Compare commits

...

22 Commits

Author SHA1 Message Date
Alex P 0e1c896aa2 Fix: go lint errors 2025-08-22 23:23:07 +00:00
Alex P 0ed84257f6 Improvements, Fixes: enhanced audio metrics (including prometheus format), fixed lint errors 2025-08-22 23:20:22 +00:00
Alex P 32055f5762 Cleanup: remove polling fallback for /audio/mute status 2025-08-22 22:54:05 +00:00
Alex P 97bcb3c1ea Fix: linter errors 2025-08-22 22:29:48 +00:00
Alex P 6ecb829334 Fix: linter errors 2025-08-22 22:28:15 +00:00
Alex P e360348829 Fix: linter errors 2025-08-22 22:26:15 +00:00
Alex P 1e1677b35a Fix: linter errors 2025-08-22 22:23:50 +00:00
Alex P 3c1e9b8dc2 Fix: audio subprocess handling, avg atency audio metric 2025-08-22 22:21:41 +00:00
Alex P 62d4ec2f89 Fix: audio subprocess handling 2025-08-22 22:17:27 +00:00
Alex P aeb7a12c72 Fix: linting errors 2025-08-22 22:07:35 +00:00
Alex P 671d875890 Fix: literal /home/vscode in cache paths 2025-08-22 21:49:15 +00:00
Alex P 7129bd5521 Fix: workflow indentation 2025-08-22 21:43:51 +00:00
Alex P bd4fbef6dc Tweak: steps order 2025-08-22 21:43:17 +00:00
Alex P b3373e56de Improvement: use cache save/restore actions 2025-08-22 21:41:44 +00:00
Alex P 73e8897fc3 Improvement: Automatically invalidate cache 2025-08-22 21:37:53 +00:00
Alex P de0077a351 Fix: always save cache 2025-08-22 21:34:27 +00:00
Alex P 4875c243d3 Fix: Lint env vars 2025-08-22 21:30:24 +00:00
Alex P 071129a9ec Fix: use absolute path for caching 2025-08-22 21:26:37 +00:00
Alex P dee8a0b5a1 Fix: golangci-lint 2025-08-22 21:21:09 +00:00
Alex P a976ce1da9 Updates: set LDFLAGS and CFLAGS for the lint steps 2025-08-22 21:15:25 +00:00
Alex P d5295d0e4b Updates: golangci-lint workflow 2025-08-22 21:06:40 +00:00
Alex P 423d5775e3 [WIP] Performance Enhancements: move audion processing into a separate process 2025-08-22 12:04:30 +00:00
37 changed files with 3888 additions and 1252 deletions

View File

@ -27,11 +27,64 @@ jobs:
uses: actions/setup-go@fa96338abe5531f6e34c5cc0bbe28c1a533d5505 # v4.2.1
with:
go-version: 1.24.4
- name: Setup build environment variables
id: build-env
run: |
# Extract versions from Makefile
ALSA_VERSION=$(grep '^ALSA_VERSION' Makefile | cut -d'=' -f2 | tr -d ' ')
OPUS_VERSION=$(grep '^OPUS_VERSION' Makefile | cut -d'=' -f2 | tr -d ' ')
# Get rv1106-system latest commit
RV1106_COMMIT=$(git ls-remote https://github.com/jetkvm/rv1106-system.git HEAD | cut -f1)
# Set environment variables
echo "ALSA_VERSION=$ALSA_VERSION" >> $GITHUB_ENV
echo "OPUS_VERSION=$OPUS_VERSION" >> $GITHUB_ENV
echo "RV1106_COMMIT=$RV1106_COMMIT" >> $GITHUB_ENV
# Set outputs for use in other steps
echo "alsa_version=$ALSA_VERSION" >> $GITHUB_OUTPUT
echo "opus_version=$OPUS_VERSION" >> $GITHUB_OUTPUT
echo "rv1106_commit=$RV1106_COMMIT" >> $GITHUB_OUTPUT
# Set resolved cache path
CACHE_PATH="$HOME/.jetkvm/audio-libs"
echo "CACHE_PATH=$CACHE_PATH" >> $GITHUB_ENV
echo "cache_path=$CACHE_PATH" >> $GITHUB_OUTPUT
echo "Extracted ALSA version: $ALSA_VERSION"
echo "Extracted Opus version: $OPUS_VERSION"
echo "Latest rv1106-system commit: $RV1106_COMMIT"
echo "Cache path: $CACHE_PATH"
- name: Restore audio dependencies cache
id: cache-audio-deps
uses: actions/cache/restore@v4
with:
path: ${{ steps.build-env.outputs.cache_path }}
key: audio-deps-${{ runner.os }}-alsa-${{ steps.build-env.outputs.alsa_version }}-opus-${{ steps.build-env.outputs.opus_version }}-rv1106-${{ steps.build-env.outputs.rv1106_commit }}
- name: Setup development environment
if: steps.cache-audio-deps.outputs.cache-hit != 'true'
run: make dev_env
env:
ALSA_VERSION: ${{ env.ALSA_VERSION }}
OPUS_VERSION: ${{ env.OPUS_VERSION }}
- name: Create empty resource directory
run: |
mkdir -p static && touch static/.gitkeep
- name: Save audio dependencies cache
if: always() && steps.cache-audio-deps.outputs.cache-hit != 'true'
uses: actions/cache/save@v4
with:
path: ${{ steps.build-env.outputs.cache_path }}
key: ${{ steps.cache-audio-deps.outputs.cache-primary-key }}
- name: Lint
uses: golangci/golangci-lint-action@1481404843c368bc19ca9406f87d6e0fc97bdcfd # v7.0.0
with:
args: --verbose
version: v2.0.2
env:
CGO_ENABLED: 1
ALSA_VERSION: ${{ env.ALSA_VERSION }}
OPUS_VERSION: ${{ env.OPUS_VERSION }}
CGO_CFLAGS: "-I${{ steps.build-env.outputs.cache_path }}/alsa-lib-${{ steps.build-env.outputs.alsa_version }}/include -I${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }}/include -I${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }}/celt"
CGO_LDFLAGS: "-L${{ steps.build-env.outputs.cache_path }}/alsa-lib-${{ steps.build-env.outputs.alsa_version }}/src/.libs -lasound -L${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }}/.libs -lopus -lm -ldl -static"

View File

@ -11,6 +11,7 @@ import (
func main() {
versionPtr := flag.Bool("version", false, "print version and exit")
versionJsonPtr := flag.Bool("version-json", false, "print version as json and exit")
audioServerPtr := flag.Bool("audio-server", false, "Run as audio server subprocess")
flag.Parse()
if *versionPtr || *versionJsonPtr {
@ -23,5 +24,5 @@ func main() {
return
}
kvm.Main()
kvm.Main(*audioServerPtr)
}

View File

@ -159,8 +159,8 @@ else
msg_info "▶ Building development binary"
make build_dev
# Kill any existing instances of the application
ssh "${REMOTE_USER}@${REMOTE_HOST}" "killall jetkvm_app_debug || true"
# Kill any existing instances of the application (specific cleanup)
ssh "${REMOTE_USER}@${REMOTE_HOST}" "killall jetkvm_app || true; killall jetkvm_native || true; killall jetkvm_app_debug || true; sleep 2"
# Copy the binary to the remote host
ssh "${REMOTE_USER}@${REMOTE_HOST}" "cat > ${REMOTE_PATH}/jetkvm_app_debug" < bin/jetkvm_app
@ -180,18 +180,18 @@ set -e
# Set the library path to include the directory where librockit.so is located
export LD_LIBRARY_PATH=/oem/usr/lib:\$LD_LIBRARY_PATH
# Check if production jetkvm_app is running and save its state
PROD_APP_RUNNING=false
if pgrep -f "/userdata/jetkvm/bin/jetkvm_app" > /dev/null; then
PROD_APP_RUNNING=true
echo "Production jetkvm_app is running, will restore after development session"
else
echo "No production jetkvm_app detected"
fi
# Kill any existing instances of the application
pkill -f "/userdata/jetkvm/bin/jetkvm_app" || true
# Kill any existing instances of the application (specific cleanup)
killall jetkvm_app || true
killall jetkvm_native || true
killall jetkvm_app_debug || true
sleep 2
# Verify no processes are using port 80
if netstat -tlnp | grep :80 > /dev/null 2>&1; then
echo "Warning: Port 80 still in use, attempting to free it..."
fuser -k 80/tcp || true
sleep 1
fi
# Navigate to the directory where the binary will be stored
cd "${REMOTE_PATH}"
@ -199,29 +199,7 @@ cd "${REMOTE_PATH}"
# Make the new binary executable
chmod +x jetkvm_app_debug
# Create a cleanup script that will restore the production app
cat > /tmp/restore_jetkvm.sh << RESTORE_EOF
#!/bin/ash
set -e
export LD_LIBRARY_PATH=/oem/usr/lib:\$LD_LIBRARY_PATH
cd ${REMOTE_PATH}
if [ "$PROD_APP_RUNNING" = "true" ]; then
echo "Restoring production jetkvm_app..."
killall jetkvm_app_debug || true
nohup /userdata/jetkvm/bin/jetkvm_app > /tmp/jetkvm_app.log 2>&1 &
echo "Production jetkvm_app restored"
else
echo "No production app was running before, not restoring"
fi
RESTORE_EOF
chmod +x /tmp/restore_jetkvm.sh
# Set up signal handler to restore production app on exit
trap '/tmp/restore_jetkvm.sh' EXIT INT TERM
# Run the application in the foreground
echo "Starting development jetkvm_app_debug..."
# Run the application in the background
PION_LOG_TRACE=${LOG_TRACE_SCOPES} ./jetkvm_app_debug | tee -a /tmp/jetkvm_app_debug.log
EOF
fi

View File

@ -14,7 +14,7 @@ const (
// Input RPC Direct Handlers
// This module provides optimized direct handlers for high-frequency input events,
// bypassing the reflection-based RPC system for improved performance.
//
//
// Performance benefits:
// - Eliminates reflection overhead (~2-3ms per call)
// - Reduces memory allocations
@ -214,4 +214,4 @@ func isInputMethod(method string) bool {
default:
return false
}
}
}

View File

@ -1,13 +1,51 @@
package audio
// StartAudioStreaming launches the in-process audio stream and delivers Opus frames to the provided callback.
// This is now a wrapper around the non-blocking audio implementation for backward compatibility.
func StartAudioStreaming(send func([]byte)) error {
return StartNonBlockingAudioStreaming(send)
import (
"os"
"strings"
)
// isAudioServerProcess detects if we're running as the audio server subprocess
func isAudioServerProcess() bool {
for _, arg := range os.Args {
if strings.Contains(arg, "--audio-server") {
return true
}
}
return false
}
// StopAudioStreaming stops the in-process audio stream.
// This is now a wrapper around the non-blocking audio implementation for backward compatibility.
func StopAudioStreaming() {
StopNonBlockingAudioStreaming()
// StartAudioStreaming launches the audio stream.
// In audio server subprocess: uses CGO-based audio streaming
// In main process: this should not be called (use StartAudioRelay instead)
func StartAudioStreaming(send func([]byte)) error {
if isAudioServerProcess() {
// Audio server subprocess: use CGO audio processing
return StartAudioOutputStreaming(send)
} else {
// Main process: should use relay system instead
// This is kept for backward compatibility but not recommended
return StartAudioOutputStreaming(send)
}
}
// StopAudioStreaming stops the audio stream.
func StopAudioStreaming() {
if isAudioServerProcess() {
// Audio server subprocess: stop CGO audio processing
StopAudioOutputStreaming()
} else {
// Main process: stop relay if running
StopAudioRelay()
}
}
// StartNonBlockingAudioStreaming is an alias for backward compatibility
func StartNonBlockingAudioStreaming(send func([]byte)) error {
return StartAudioOutputStreaming(send)
}
// StopNonBlockingAudioStreaming is an alias for backward compatibility
func StopNonBlockingAudioStreaming() {
StopAudioOutputStreaming()
}

View File

@ -28,27 +28,23 @@ type BatchAudioProcessor struct {
// Batch queues and state (atomic for lock-free access)
readQueue chan batchReadRequest
writeQueue chan batchWriteRequest
initialized int32
running int32
threadPinned int32
// Buffers (pre-allocated to avoid allocation overhead)
readBufPool *sync.Pool
writeBufPool *sync.Pool
readBufPool *sync.Pool
}
type BatchAudioStats struct {
// int64 fields MUST be first for ARM32 alignment
BatchedReads int64
BatchedWrites int64
SingleReads int64
SingleWrites int64
BatchedFrames int64
SingleFrames int64
CGOCallsReduced int64
OSThreadPinTime time.Duration // time.Duration is int64 internally
LastBatchTime time.Time
BatchedReads int64
SingleReads int64
BatchedFrames int64
SingleFrames int64
CGOCallsReduced int64
OSThreadPinTime time.Duration // time.Duration is int64 internally
LastBatchTime time.Time
}
type batchReadRequest struct {
@ -57,22 +53,11 @@ type batchReadRequest struct {
timestamp time.Time
}
type batchWriteRequest struct {
buffer []byte
resultChan chan batchWriteResult
timestamp time.Time
}
type batchReadResult struct {
length int
err error
}
type batchWriteResult struct {
written int
err error
}
// NewBatchAudioProcessor creates a new batch audio processor
func NewBatchAudioProcessor(batchSize int, batchDuration time.Duration) *BatchAudioProcessor {
ctx, cancel := context.WithCancel(context.Background())
@ -85,17 +70,11 @@ func NewBatchAudioProcessor(batchSize int, batchDuration time.Duration) *BatchAu
batchSize: batchSize,
batchDuration: batchDuration,
readQueue: make(chan batchReadRequest, batchSize*2),
writeQueue: make(chan batchWriteRequest, batchSize*2),
readBufPool: &sync.Pool{
New: func() interface{} {
return make([]byte, 1500) // Max audio frame size
},
},
writeBufPool: &sync.Pool{
New: func() interface{} {
return make([]byte, 4096) // Max write buffer size
},
},
}
return processor
@ -114,7 +93,6 @@ func (bap *BatchAudioProcessor) Start() error {
// Start batch processing goroutines
go bap.batchReadProcessor()
go bap.batchWriteProcessor()
bap.logger.Info().Int("batch_size", bap.batchSize).
Dur("batch_duration", bap.batchDuration).
@ -175,44 +153,6 @@ func (bap *BatchAudioProcessor) BatchReadEncode(buffer []byte) (int, error) {
}
}
// BatchDecodeWrite performs batched audio decode and write operations
func (bap *BatchAudioProcessor) BatchDecodeWrite(buffer []byte) (int, error) {
if atomic.LoadInt32(&bap.running) == 0 {
// Fallback to single operation if batch processor is not running
atomic.AddInt64(&bap.stats.SingleWrites, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioDecodeWrite(buffer)
}
resultChan := make(chan batchWriteResult, 1)
request := batchWriteRequest{
buffer: buffer,
resultChan: resultChan,
timestamp: time.Now(),
}
select {
case bap.writeQueue <- request:
// Successfully queued
case <-time.After(5 * time.Millisecond):
// Queue is full or blocked, fallback to single operation
atomic.AddInt64(&bap.stats.SingleWrites, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioDecodeWrite(buffer)
}
// Wait for result
select {
case result := <-resultChan:
return result.written, result.err
case <-time.After(50 * time.Millisecond):
// Timeout, fallback to single operation
atomic.AddInt64(&bap.stats.SingleWrites, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioDecodeWrite(buffer)
}
}
// batchReadProcessor processes batched read operations
func (bap *BatchAudioProcessor) batchReadProcessor() {
defer bap.logger.Debug().Msg("batch read processor stopped")
@ -249,42 +189,6 @@ func (bap *BatchAudioProcessor) batchReadProcessor() {
}
}
// batchWriteProcessor processes batched write operations
func (bap *BatchAudioProcessor) batchWriteProcessor() {
defer bap.logger.Debug().Msg("batch write processor stopped")
ticker := time.NewTicker(bap.batchDuration)
defer ticker.Stop()
var batch []batchWriteRequest
batch = make([]batchWriteRequest, 0, bap.batchSize)
for atomic.LoadInt32(&bap.running) == 1 {
select {
case <-bap.ctx.Done():
return
case req := <-bap.writeQueue:
batch = append(batch, req)
if len(batch) >= bap.batchSize {
bap.processBatchWrite(batch)
batch = batch[:0] // Clear slice but keep capacity
}
case <-ticker.C:
if len(batch) > 0 {
bap.processBatchWrite(batch)
batch = batch[:0] // Clear slice but keep capacity
}
}
}
// Process any remaining requests
if len(batch) > 0 {
bap.processBatchWrite(batch)
}
}
// processBatchRead processes a batch of read requests efficiently
func (bap *BatchAudioProcessor) processBatchRead(batch []batchReadRequest) {
if len(batch) == 0 {
@ -328,56 +232,11 @@ func (bap *BatchAudioProcessor) processBatchRead(batch []batchReadRequest) {
bap.stats.LastBatchTime = time.Now()
}
// processBatchWrite processes a batch of write requests efficiently
func (bap *BatchAudioProcessor) processBatchWrite(batch []batchWriteRequest) {
if len(batch) == 0 {
return
}
// Pin to OS thread for the entire batch to minimize thread switching overhead
start := time.Now()
if atomic.CompareAndSwapInt32(&bap.threadPinned, 0, 1) {
runtime.LockOSThread()
defer func() {
runtime.UnlockOSThread()
atomic.StoreInt32(&bap.threadPinned, 0)
bap.stats.OSThreadPinTime += time.Since(start)
}()
}
batchSize := len(batch)
atomic.AddInt64(&bap.stats.BatchedWrites, 1)
atomic.AddInt64(&bap.stats.BatchedFrames, int64(batchSize))
if batchSize > 1 {
atomic.AddInt64(&bap.stats.CGOCallsReduced, int64(batchSize-1))
}
// Process each request in the batch
for _, req := range batch {
written, err := CGOAudioDecodeWrite(req.buffer)
result := batchWriteResult{
written: written,
err: err,
}
// Send result back (non-blocking)
select {
case req.resultChan <- result:
default:
// Requestor timed out, drop result
}
}
bap.stats.LastBatchTime = time.Now()
}
// GetStats returns current batch processor statistics
func (bap *BatchAudioProcessor) GetStats() BatchAudioStats {
return BatchAudioStats{
BatchedReads: atomic.LoadInt64(&bap.stats.BatchedReads),
BatchedWrites: atomic.LoadInt64(&bap.stats.BatchedWrites),
SingleReads: atomic.LoadInt64(&bap.stats.SingleReads),
SingleWrites: atomic.LoadInt64(&bap.stats.SingleWrites),
BatchedFrames: atomic.LoadInt64(&bap.stats.BatchedFrames),
SingleFrames: atomic.LoadInt64(&bap.stats.SingleFrames),
CGOCallsReduced: atomic.LoadInt64(&bap.stats.CGOCallsReduced),
@ -393,7 +252,7 @@ func (bap *BatchAudioProcessor) IsRunning() bool {
// Global batch processor instance
var (
globalBatchProcessor unsafe.Pointer // *BatchAudioProcessor
globalBatchProcessor unsafe.Pointer // *BatchAudioProcessor
batchProcessorInitialized int32
)
@ -444,12 +303,3 @@ func BatchCGOAudioReadEncode(buffer []byte) (int, error) {
}
return CGOAudioReadEncode(buffer)
}
// BatchCGOAudioDecodeWrite is a batched version of CGOAudioDecodeWrite
func BatchCGOAudioDecodeWrite(buffer []byte) (int, error) {
processor := GetBatchAudioProcessor()
if processor != nil && processor.IsRunning() {
return processor.BatchDecodeWrite(buffer)
}
return CGOAudioDecodeWrite(buffer)
}

View File

@ -23,14 +23,18 @@ func NewAudioBufferPool(bufferSize int) *AudioBufferPool {
// Get retrieves a buffer from the pool
func (p *AudioBufferPool) Get() []byte {
return p.pool.Get().([]byte)
if buf := p.pool.Get(); buf != nil {
return *buf.(*[]byte)
}
return make([]byte, 0, 1500) // fallback if pool is empty
}
// Put returns a buffer to the pool
func (p *AudioBufferPool) Put(buf []byte) {
// Reset length but keep capacity for reuse
if cap(buf) >= 1500 { // Only pool buffers of reasonable size
p.pool.Put(buf[:0])
resetBuf := buf[:0]
p.pool.Put(&resetBuf)
}
}
@ -38,7 +42,7 @@ func (p *AudioBufferPool) Put(buf []byte) {
var (
// Pool for 1500-byte audio frame buffers (Opus max frame size)
audioFramePool = NewAudioBufferPool(1500)
// Pool for smaller control buffers
audioControlPool = NewAudioBufferPool(64)
)
@ -61,4 +65,4 @@ func GetAudioControlBuffer() []byte {
// PutAudioControlBuffer returns a buffer to the control pool
func PutAudioControlBuffer(buf []byte) {
audioControlPool.Put(buf)
}
}

View File

@ -8,7 +8,7 @@ import (
)
/*
#cgo CFLAGS: -I${SRCDIR}/../../tools/alsa-opus-includes
#cgo CFLAGS: -I$HOME/.jetkvm/audio-libs/alsa-lib-$ALSA_VERSION/include -I$HOME/.jetkvm/audio-libs/opus-$OPUS_VERSION/include -I$HOME/.jetkvm/audio-libs/opus-$OPUS_VERSION/celt
#cgo LDFLAGS: -L$HOME/.jetkvm/audio-libs/alsa-lib-$ALSA_VERSION/src/.libs -lasound -L$HOME/.jetkvm/audio-libs/opus-$OPUS_VERSION/.libs -lopus -lm -ldl -static
#include <alsa/asoundlib.h>
#include <opus.h>
@ -39,7 +39,7 @@ static volatile int playback_initialized = 0;
static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream) {
int attempts = 3;
int err;
while (attempts-- > 0) {
err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK);
if (err >= 0) {
@ -47,7 +47,7 @@ static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream
snd_pcm_nonblock(*handle, 0);
return 0;
}
if (err == -EBUSY && attempts > 0) {
// Device busy, wait and retry
usleep(50000); // 50ms
@ -63,26 +63,26 @@ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name) {
snd_pcm_hw_params_t *params;
snd_pcm_sw_params_t *sw_params;
int err;
if (!handle) return -1;
// Use stack allocation for better performance
snd_pcm_hw_params_alloca(&params);
snd_pcm_sw_params_alloca(&sw_params);
// Hardware parameters
err = snd_pcm_hw_params_any(handle, params);
if (err < 0) return err;
err = snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
if (err < 0) return err;
err = snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
if (err < 0) return err;
err = snd_pcm_hw_params_set_channels(handle, params, channels);
if (err < 0) return err;
// Set exact rate for better performance
err = snd_pcm_hw_params_set_rate(handle, params, sample_rate, 0);
if (err < 0) {
@ -91,70 +91,70 @@ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name) {
err = snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0);
if (err < 0) return err;
}
// Optimize buffer sizes for low latency
snd_pcm_uframes_t period_size = frame_size;
err = snd_pcm_hw_params_set_period_size_near(handle, params, &period_size, 0);
if (err < 0) return err;
// Set buffer size to 4 periods for good latency/stability balance
snd_pcm_uframes_t buffer_size = period_size * 4;
err = snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size);
if (err < 0) return err;
err = snd_pcm_hw_params(handle, params);
if (err < 0) return err;
// Software parameters for optimal performance
err = snd_pcm_sw_params_current(handle, sw_params);
if (err < 0) return err;
// Start playback/capture when buffer is period_size frames
err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, period_size);
if (err < 0) return err;
// Allow transfers when at least period_size frames are available
err = snd_pcm_sw_params_set_avail_min(handle, sw_params, period_size);
if (err < 0) return err;
err = snd_pcm_sw_params(handle, sw_params);
if (err < 0) return err;
return snd_pcm_prepare(handle);
}
// Initialize ALSA and Opus encoder with improved safety
int jetkvm_audio_init() {
int err;
// Prevent concurrent initialization
if (__sync_bool_compare_and_swap(&capture_initializing, 0, 1) == 0) {
return -EBUSY; // Already initializing
}
// Check if already initialized
if (capture_initialized) {
capture_initializing = 0;
return 0;
}
// Clean up any existing resources first
if (encoder) {
opus_encoder_destroy(encoder);
encoder = NULL;
if (encoder) {
opus_encoder_destroy(encoder);
encoder = NULL;
}
if (pcm_handle) {
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
if (pcm_handle) {
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
}
// Try to open ALSA capture device
err = safe_alsa_open(&pcm_handle, "hw:1,0", SND_PCM_STREAM_CAPTURE);
if (err < 0) {
capture_initializing = 0;
return -1;
}
// Configure the device
err = configure_alsa_device(pcm_handle, "capture");
if (err < 0) {
@ -163,7 +163,7 @@ int jetkvm_audio_init() {
capture_initializing = 0;
return -1;
}
// Initialize Opus encoder
int opus_err = 0;
encoder = opus_encoder_create(sample_rate, channels, OPUS_APPLICATION_AUDIO, &opus_err);
@ -172,10 +172,10 @@ int jetkvm_audio_init() {
capture_initializing = 0;
return -2;
}
opus_encoder_ctl(encoder, OPUS_SET_BITRATE(opus_bitrate));
opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(opus_complexity));
capture_initialized = 1;
capture_initializing = 0;
return 0;
@ -186,21 +186,21 @@ int jetkvm_audio_read_encode(void *opus_buf) {
short pcm_buffer[1920]; // max 2ch*960
unsigned char *out = (unsigned char*)opus_buf;
int err = 0;
// Safety checks
if (!capture_initialized || !pcm_handle || !encoder || !opus_buf) {
return -1;
}
int pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
// Handle ALSA errors with enhanced recovery
if (pcm_rc < 0) {
if (pcm_rc == -EPIPE) {
// Buffer underrun - try to recover
err = snd_pcm_prepare(pcm_handle);
if (err < 0) return -1;
pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
if (pcm_rc < 0) return -1;
} else if (pcm_rc == -EAGAIN) {
@ -221,12 +221,12 @@ int jetkvm_audio_read_encode(void *opus_buf) {
return -1;
}
}
// If we got fewer frames than expected, pad with silence
if (pcm_rc < frame_size) {
memset(&pcm_buffer[pcm_rc * channels], 0, (frame_size - pcm_rc) * channels * sizeof(short));
}
int nb_bytes = opus_encode(encoder, pcm_buffer, frame_size, out, max_packet_size);
return nb_bytes;
}
@ -234,28 +234,28 @@ int jetkvm_audio_read_encode(void *opus_buf) {
// Initialize ALSA playback with improved safety
int jetkvm_audio_playback_init() {
int err;
// Prevent concurrent initialization
if (__sync_bool_compare_and_swap(&playback_initializing, 0, 1) == 0) {
return -EBUSY; // Already initializing
}
// Check if already initialized
if (playback_initialized) {
playback_initializing = 0;
return 0;
}
// Clean up any existing resources first
if (decoder) {
opus_decoder_destroy(decoder);
decoder = NULL;
if (decoder) {
opus_decoder_destroy(decoder);
decoder = NULL;
}
if (pcm_playback_handle) {
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
if (pcm_playback_handle) {
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
}
// Try to open the USB gadget audio device for playback
err = safe_alsa_open(&pcm_playback_handle, "hw:1,0", SND_PCM_STREAM_PLAYBACK);
if (err < 0) {
@ -266,7 +266,7 @@ int jetkvm_audio_playback_init() {
return -1;
}
}
// Configure the device
err = configure_alsa_device(pcm_playback_handle, "playback");
if (err < 0) {
@ -275,7 +275,7 @@ int jetkvm_audio_playback_init() {
playback_initializing = 0;
return -1;
}
// Initialize Opus decoder
int opus_err = 0;
decoder = opus_decoder_create(sample_rate, channels, &opus_err);
@ -285,7 +285,7 @@ int jetkvm_audio_playback_init() {
playback_initializing = 0;
return -2;
}
playback_initialized = 1;
playback_initializing = 0;
return 0;
@ -296,21 +296,21 @@ int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
short pcm_buffer[1920]; // max 2ch*960
unsigned char *in = (unsigned char*)opus_buf;
int err = 0;
// Safety checks
if (!playback_initialized || !pcm_playback_handle || !decoder || !opus_buf || opus_size <= 0) {
return -1;
}
// Additional bounds checking
if (opus_size > max_packet_size) {
return -1;
}
// Decode Opus to PCM
int pcm_frames = opus_decode(decoder, in, opus_size, pcm_buffer, frame_size, 0);
if (pcm_frames < 0) return -1;
// Write PCM to playback device with enhanced recovery
int pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
if (pcm_rc < 0) {
@ -318,7 +318,7 @@ int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
// Buffer underrun - try to recover
err = snd_pcm_prepare(pcm_playback_handle);
if (err < 0) return -2;
pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
} else if (pcm_rc == -ESTRPIPE) {
// Device suspended, try to resume
@ -333,7 +333,7 @@ int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
}
if (pcm_rc < 0) return -2;
}
return pcm_frames;
}
@ -343,20 +343,20 @@ void jetkvm_audio_playback_close() {
while (playback_initializing) {
usleep(1000); // 1ms
}
// Atomic check and set to prevent double cleanup
if (__sync_bool_compare_and_swap(&playback_initialized, 1, 0) == 0) {
return; // Already cleaned up
}
if (decoder) {
opus_decoder_destroy(decoder);
decoder = NULL;
if (decoder) {
opus_decoder_destroy(decoder);
decoder = NULL;
}
if (pcm_playback_handle) {
if (pcm_playback_handle) {
snd_pcm_drain(pcm_playback_handle);
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
}
}
@ -366,19 +366,19 @@ void jetkvm_audio_close() {
while (capture_initializing) {
usleep(1000); // 1ms
}
capture_initialized = 0;
if (encoder) {
opus_encoder_destroy(encoder);
encoder = NULL;
if (encoder) {
opus_encoder_destroy(encoder);
encoder = NULL;
}
if (pcm_handle) {
if (pcm_handle) {
snd_pcm_drop(pcm_handle); // Drop pending samples
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
}
// Also clean up playback
jetkvm_audio_playback_close();
}
@ -387,15 +387,15 @@ import "C"
// Optimized Go wrappers with reduced overhead
var (
errAudioInitFailed = errors.New("failed to init ALSA/Opus")
errBufferTooSmall = errors.New("buffer too small")
errAudioReadEncode = errors.New("audio read/encode error")
errAudioDecodeWrite = errors.New("audio decode/write error")
errAudioPlaybackInit = errors.New("failed to init ALSA playback/Opus decoder")
errEmptyBuffer = errors.New("empty buffer")
errNilBuffer = errors.New("nil buffer")
errBufferTooLarge = errors.New("buffer too large")
errInvalidBufferPtr = errors.New("invalid buffer pointer")
errAudioInitFailed = errors.New("failed to init ALSA/Opus")
errBufferTooSmall = errors.New("buffer too small")
errAudioReadEncode = errors.New("audio read/encode error")
errAudioDecodeWrite = errors.New("audio decode/write error")
errAudioPlaybackInit = errors.New("failed to init ALSA playback/Opus decoder")
errEmptyBuffer = errors.New("empty buffer")
errNilBuffer = errors.New("nil buffer")
errBufferTooLarge = errors.New("buffer too large")
errInvalidBufferPtr = errors.New("invalid buffer pointer")
)
func cgoAudioInit() error {
@ -416,7 +416,7 @@ func cgoAudioReadEncode(buf []byte) (int, error) {
if len(buf) < 1276 {
return 0, errBufferTooSmall
}
n := C.jetkvm_audio_read_encode(unsafe.Pointer(&buf[0]))
if n < 0 {
return 0, errAudioReadEncode
@ -449,26 +449,27 @@ func cgoAudioDecodeWrite(buf []byte) (int, error) {
if buf == nil {
return 0, errors.New("nil buffer")
}
// Validate buffer size to prevent potential overruns
if len(buf) > 4096 { // Maximum reasonable Opus frame size
return 0, errors.New("buffer too large")
}
// Ensure buffer is not deallocated by keeping a reference
bufPtr := unsafe.Pointer(&buf[0])
if bufPtr == nil {
return 0, errors.New("invalid buffer pointer")
}
// Add recovery mechanism for C function crashes
defer func() {
if r := recover(); r != nil {
// Log the panic but don't crash the entire program
// This should not happen with proper validation, but provides safety
_ = r // Explicitly ignore the panic value
}
}()
n := C.jetkvm_audio_decode_write(bufPtr, C.int(len(buf)))
if n < 0 {
return 0, errors.New("audio decode/write error")
@ -478,10 +479,10 @@ func cgoAudioDecodeWrite(buf []byte) (int, error) {
// Wrapper functions for non-blocking audio manager
var (
CGOAudioInit = cgoAudioInit
CGOAudioClose = cgoAudioClose
CGOAudioReadEncode = cgoAudioReadEncode
CGOAudioPlaybackInit = cgoAudioPlaybackInit
CGOAudioPlaybackClose = cgoAudioPlaybackClose
CGOAudioDecodeWrite = cgoAudioDecodeWrite
CGOAudioInit = cgoAudioInit
CGOAudioClose = cgoAudioClose
CGOAudioReadEncode = cgoAudioReadEncode
CGOAudioPlaybackInit = cgoAudioPlaybackInit
CGOAudioPlaybackClose = cgoAudioPlaybackClose
CGOAudioDecodeWrite = cgoAudioDecodeWrite
)

View File

@ -2,6 +2,7 @@ package audio
import (
"context"
"fmt"
"strings"
"sync"
"time"
@ -249,13 +250,13 @@ func (aeb *AudioEventBroadcaster) startMetricsBroadcasting() {
for range ticker.C {
aeb.mutex.RLock()
subscriberCount := len(aeb.subscribers)
// Early exit if no subscribers to save CPU
if subscriberCount == 0 {
aeb.mutex.RUnlock()
continue
}
// Create a copy for safe iteration
subscribersCopy := make([]*AudioEventSubscriber, 0, subscriberCount)
for _, sub := range aeb.subscribers {
@ -270,7 +271,7 @@ func (aeb *AudioEventBroadcaster) startMetricsBroadcasting() {
activeSubscribers++
}
}
// Skip metrics gathering if no active subscribers
if activeSubscribers == 0 {
continue
@ -286,7 +287,7 @@ func (aeb *AudioEventBroadcaster) startMetricsBroadcasting() {
BytesProcessed: audioMetrics.BytesProcessed,
LastFrameTime: audioMetrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
ConnectionDrops: audioMetrics.ConnectionDrops,
AverageLatency: audioMetrics.AverageLatency.String(),
AverageLatency: fmt.Sprintf("%.1fms", float64(audioMetrics.AverageLatency.Nanoseconds())/1e6),
},
}
aeb.broadcast(audioMetricsEvent)
@ -304,7 +305,7 @@ func (aeb *AudioEventBroadcaster) startMetricsBroadcasting() {
BytesProcessed: micMetrics.BytesProcessed,
LastFrameTime: micMetrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
ConnectionDrops: micMetrics.ConnectionDrops,
AverageLatency: micMetrics.AverageLatency.String(),
AverageLatency: fmt.Sprintf("%.1fms", float64(micMetrics.AverageLatency.Nanoseconds())/1e6),
},
}
aeb.broadcast(micMetricsEvent)
@ -357,9 +358,9 @@ func (aeb *AudioEventBroadcaster) sendToSubscriber(subscriber *AudioEventSubscri
err := wsjson.Write(ctx, subscriber.conn, event)
if err != nil {
// Don't log network errors for closed connections as warnings, they're expected
if strings.Contains(err.Error(), "use of closed network connection") ||
strings.Contains(err.Error(), "connection reset by peer") ||
strings.Contains(err.Error(), "context canceled") {
if strings.Contains(err.Error(), "use of closed network connection") ||
strings.Contains(err.Error(), "connection reset by peer") ||
strings.Contains(err.Error(), "context canceled") {
subscriber.logger.Debug().Err(err).Msg("websocket connection closed during audio event send")
} else {
subscriber.logger.Warn().Err(err).Msg("failed to send audio event to subscriber")

View File

@ -19,21 +19,21 @@ type AudioInputMetrics struct {
LastFrameTime time.Time
}
// AudioInputManager manages microphone input stream from WebRTC to USB gadget
// AudioInputManager manages microphone input stream using IPC mode only
type AudioInputManager struct {
// metrics MUST be first for ARM32 alignment (contains int64 fields)
metrics AudioInputMetrics
inputBuffer chan []byte
logger zerolog.Logger
running int32
ipcManager *AudioInputIPCManager
logger zerolog.Logger
running int32
}
// NewAudioInputManager creates a new audio input manager
// NewAudioInputManager creates a new audio input manager (IPC mode only)
func NewAudioInputManager() *AudioInputManager {
return &AudioInputManager{
inputBuffer: make(chan []byte, 100), // Buffer up to 100 frames
logger: logging.GetDefaultLogger().With().Str("component", "audio-input").Logger(),
ipcManager: NewAudioInputIPCManager(),
logger: logging.GetDefaultLogger().With().Str("component", "audio-input").Logger(),
}
}
@ -45,9 +45,10 @@ func (aim *AudioInputManager) Start() error {
aim.logger.Info().Msg("Starting audio input manager")
// Start the non-blocking audio input stream
err := StartNonBlockingAudioInput(aim.inputBuffer)
// Start the IPC-based audio input
err := aim.ipcManager.Start()
if err != nil {
aim.logger.Error().Err(err).Msg("Failed to start IPC audio input")
atomic.StoreInt32(&aim.running, 0)
return err
}
@ -63,57 +64,114 @@ func (aim *AudioInputManager) Stop() {
aim.logger.Info().Msg("Stopping audio input manager")
// Stop the non-blocking audio input stream
StopNonBlockingAudioInput()
// Drain the input buffer
go func() {
for {
select {
case <-aim.inputBuffer:
// Drain
case <-time.After(100 * time.Millisecond):
return
}
}
}()
// Stop the IPC-based audio input
aim.ipcManager.Stop()
aim.logger.Info().Msg("Audio input manager stopped")
}
// WriteOpusFrame writes an Opus frame to the input buffer
// WriteOpusFrame writes an Opus frame to the audio input system with latency tracking
func (aim *AudioInputManager) WriteOpusFrame(frame []byte) error {
if atomic.LoadInt32(&aim.running) == 0 {
return nil // Not running, ignore
if !aim.IsRunning() {
return nil // Not running, silently drop
}
select {
case aim.inputBuffer <- frame:
atomic.AddInt64(&aim.metrics.FramesSent, 1)
atomic.AddInt64(&aim.metrics.BytesProcessed, int64(len(frame)))
aim.metrics.LastFrameTime = time.Now()
return nil
default:
// Buffer full, drop frame
// Track end-to-end latency from WebRTC to IPC
startTime := time.Now()
err := aim.ipcManager.WriteOpusFrame(frame)
processingTime := time.Since(startTime)
// Log high latency warnings
if processingTime > 10*time.Millisecond {
aim.logger.Warn().
Dur("latency_ms", processingTime).
Msg("High audio processing latency detected")
}
if err != nil {
atomic.AddInt64(&aim.metrics.FramesDropped, 1)
aim.logger.Warn().Msg("Audio input buffer full, dropping frame")
return nil
return err
}
// Update metrics
atomic.AddInt64(&aim.metrics.FramesSent, 1)
atomic.AddInt64(&aim.metrics.BytesProcessed, int64(len(frame)))
aim.metrics.LastFrameTime = time.Now()
aim.metrics.AverageLatency = processingTime
return nil
}
// GetMetrics returns current audio input metrics
func (aim *AudioInputManager) GetMetrics() AudioInputMetrics {
return AudioInputMetrics{
FramesSent: atomic.LoadInt64(&aim.metrics.FramesSent),
FramesDropped: atomic.LoadInt64(&aim.metrics.FramesDropped),
BytesProcessed: atomic.LoadInt64(&aim.metrics.BytesProcessed),
AverageLatency: aim.metrics.AverageLatency,
LastFrameTime: aim.metrics.LastFrameTime,
}
}
// GetMetrics returns current microphone input metrics
func (aim *AudioInputManager) GetMetrics() AudioInputMetrics {
return AudioInputMetrics{
FramesSent: atomic.LoadInt64(&aim.metrics.FramesSent),
FramesDropped: atomic.LoadInt64(&aim.metrics.FramesDropped),
BytesProcessed: atomic.LoadInt64(&aim.metrics.BytesProcessed),
LastFrameTime: aim.metrics.LastFrameTime,
ConnectionDrops: atomic.LoadInt64(&aim.metrics.ConnectionDrops),
AverageLatency: aim.metrics.AverageLatency,
// GetComprehensiveMetrics returns detailed performance metrics across all components
func (aim *AudioInputManager) GetComprehensiveMetrics() map[string]interface{} {
// Get base metrics
baseMetrics := aim.GetMetrics()
// Get detailed IPC metrics
ipcMetrics, detailedStats := aim.ipcManager.GetDetailedMetrics()
comprehensiveMetrics := map[string]interface{}{
"manager": map[string]interface{}{
"frames_sent": baseMetrics.FramesSent,
"frames_dropped": baseMetrics.FramesDropped,
"bytes_processed": baseMetrics.BytesProcessed,
"average_latency_ms": float64(baseMetrics.AverageLatency.Nanoseconds()) / 1e6,
"last_frame_time": baseMetrics.LastFrameTime,
"running": aim.IsRunning(),
},
"ipc": map[string]interface{}{
"frames_sent": ipcMetrics.FramesSent,
"frames_dropped": ipcMetrics.FramesDropped,
"bytes_processed": ipcMetrics.BytesProcessed,
"average_latency_ms": float64(ipcMetrics.AverageLatency.Nanoseconds()) / 1e6,
"last_frame_time": ipcMetrics.LastFrameTime,
},
"detailed": detailedStats,
}
return comprehensiveMetrics
}
// LogPerformanceStats logs current performance statistics
func (aim *AudioInputManager) LogPerformanceStats() {
metrics := aim.GetComprehensiveMetrics()
managerStats := metrics["manager"].(map[string]interface{})
ipcStats := metrics["ipc"].(map[string]interface{})
detailedStats := metrics["detailed"].(map[string]interface{})
aim.logger.Info().
Int64("manager_frames_sent", managerStats["frames_sent"].(int64)).
Int64("manager_frames_dropped", managerStats["frames_dropped"].(int64)).
Float64("manager_latency_ms", managerStats["average_latency_ms"].(float64)).
Int64("ipc_frames_sent", ipcStats["frames_sent"].(int64)).
Int64("ipc_frames_dropped", ipcStats["frames_dropped"].(int64)).
Float64("ipc_latency_ms", ipcStats["average_latency_ms"].(float64)).
Float64("client_drop_rate", detailedStats["client_drop_rate"].(float64)).
Float64("frames_per_second", detailedStats["frames_per_second"].(float64)).
Msg("Audio input performance metrics")
}
// IsRunning returns whether the audio input manager is running
func (aim *AudioInputManager) IsRunning() bool {
return atomic.LoadInt32(&aim.running) == 1
}
// IsReady returns whether the audio input manager is ready to receive frames
// This checks both that it's running and that the IPC connection is established
func (aim *AudioInputManager) IsReady() bool {
if !aim.IsRunning() {
return false
}
return aim.ipcManager.IsReady()
}

View File

@ -0,0 +1,94 @@
package audio
import (
"sync/atomic"
"unsafe"
)
var (
// Global audio input manager instance
globalInputManager unsafe.Pointer // *AudioInputManager
)
// AudioInputInterface defines the common interface for audio input managers
type AudioInputInterface interface {
Start() error
Stop()
WriteOpusFrame(frame []byte) error
IsRunning() bool
GetMetrics() AudioInputMetrics
}
// GetSupervisor returns the audio input supervisor for advanced management
func (m *AudioInputManager) GetSupervisor() *AudioInputSupervisor {
return m.ipcManager.GetSupervisor()
}
// getAudioInputManager returns the audio input manager
func getAudioInputManager() AudioInputInterface {
ptr := atomic.LoadPointer(&globalInputManager)
if ptr == nil {
// Create new manager
newManager := NewAudioInputManager()
if atomic.CompareAndSwapPointer(&globalInputManager, nil, unsafe.Pointer(newManager)) {
return newManager
}
// Another goroutine created it, use that one
ptr = atomic.LoadPointer(&globalInputManager)
}
return (*AudioInputManager)(ptr)
}
// StartAudioInput starts the audio input system using the appropriate manager
func StartAudioInput() error {
manager := getAudioInputManager()
return manager.Start()
}
// StopAudioInput stops the audio input system
func StopAudioInput() {
manager := getAudioInputManager()
manager.Stop()
}
// WriteAudioInputFrame writes an Opus frame to the audio input system
func WriteAudioInputFrame(frame []byte) error {
manager := getAudioInputManager()
return manager.WriteOpusFrame(frame)
}
// IsAudioInputRunning returns whether the audio input system is running
func IsAudioInputRunning() bool {
manager := getAudioInputManager()
return manager.IsRunning()
}
// GetAudioInputMetrics returns current audio input metrics
func GetAudioInputMetrics() AudioInputMetrics {
manager := getAudioInputManager()
return manager.GetMetrics()
}
// GetAudioInputIPCSupervisor returns the IPC supervisor
func GetAudioInputIPCSupervisor() *AudioInputSupervisor {
ptr := atomic.LoadPointer(&globalInputManager)
if ptr == nil {
return nil
}
manager := (*AudioInputManager)(ptr)
return manager.GetSupervisor()
}
// Helper functions
// ResetAudioInputManagers resets the global manager (for testing)
func ResetAudioInputManagers() {
// Stop existing manager first
if ptr := atomic.LoadPointer(&globalInputManager); ptr != nil {
(*AudioInputManager)(ptr).Stop()
}
// Reset pointer
atomic.StorePointer(&globalInputManager, nil)
}

694
internal/audio/input_ipc.go Normal file
View File

@ -0,0 +1,694 @@
package audio
import (
"context"
"encoding/binary"
"fmt"
"io"
"net"
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"
)
const (
inputMagicNumber uint32 = 0x4A4B4D49 // "JKMI" (JetKVM Microphone Input)
inputSocketName = "audio_input.sock"
maxFrameSize = 4096 // Maximum Opus frame size
writeTimeout = 5 * time.Millisecond // Non-blocking write timeout
maxDroppedFrames = 100 // Maximum consecutive dropped frames before reconnect
)
// InputMessageType represents the type of IPC message
type InputMessageType uint8
const (
InputMessageTypeOpusFrame InputMessageType = iota
InputMessageTypeConfig
InputMessageTypeStop
InputMessageTypeHeartbeat
InputMessageTypeAck
)
// InputIPCMessage represents a message sent over IPC
type InputIPCMessage struct {
Magic uint32
Type InputMessageType
Length uint32
Timestamp int64
Data []byte
}
// InputIPCConfig represents configuration for audio input
type InputIPCConfig struct {
SampleRate int
Channels int
FrameSize int
}
// AudioInputServer handles IPC communication for audio input processing
type AudioInputServer struct {
// Atomic fields must be first for proper alignment on ARM
bufferSize int64 // Current buffer size (atomic)
processingTime int64 // Average processing time in nanoseconds (atomic)
droppedFrames int64 // Dropped frames counter (atomic)
totalFrames int64 // Total frames counter (atomic)
listener net.Listener
conn net.Conn
mtx sync.Mutex
running bool
// Triple-goroutine architecture
messageChan chan *InputIPCMessage // Buffered channel for incoming messages
processChan chan *InputIPCMessage // Buffered channel for processing queue
stopChan chan struct{} // Stop signal for all goroutines
wg sync.WaitGroup // Wait group for goroutine coordination
}
// NewAudioInputServer creates a new audio input server
func NewAudioInputServer() (*AudioInputServer, error) {
socketPath := getInputSocketPath()
// Remove existing socket if any
os.Remove(socketPath)
listener, err := net.Listen("unix", socketPath)
if err != nil {
return nil, fmt.Errorf("failed to create unix socket: %w", err)
}
// Initialize with adaptive buffer size (start with 1000 frames)
initialBufferSize := int64(1000)
return &AudioInputServer{
listener: listener,
messageChan: make(chan *InputIPCMessage, initialBufferSize),
processChan: make(chan *InputIPCMessage, initialBufferSize),
stopChan: make(chan struct{}),
bufferSize: initialBufferSize,
}, nil
}
// Start starts the audio input server
func (ais *AudioInputServer) Start() error {
ais.mtx.Lock()
defer ais.mtx.Unlock()
if ais.running {
return fmt.Errorf("server already running")
}
ais.running = true
// Start triple-goroutine architecture
ais.startReaderGoroutine()
ais.startProcessorGoroutine()
ais.startMonitorGoroutine()
// Accept connections in a goroutine
go ais.acceptConnections()
return nil
}
// Stop stops the audio input server
func (ais *AudioInputServer) Stop() {
ais.mtx.Lock()
defer ais.mtx.Unlock()
if !ais.running {
return
}
ais.running = false
// Signal all goroutines to stop
close(ais.stopChan)
ais.wg.Wait()
if ais.conn != nil {
ais.conn.Close()
ais.conn = nil
}
if ais.listener != nil {
ais.listener.Close()
}
}
// Close closes the server and cleans up resources
func (ais *AudioInputServer) Close() {
ais.Stop()
// Remove socket file
os.Remove(getInputSocketPath())
}
// acceptConnections accepts incoming connections
func (ais *AudioInputServer) acceptConnections() {
for ais.running {
conn, err := ais.listener.Accept()
if err != nil {
if ais.running {
// Only log error if we're still supposed to be running
continue
}
return
}
ais.mtx.Lock()
// Close existing connection if any
if ais.conn != nil {
ais.conn.Close()
}
ais.conn = conn
ais.mtx.Unlock()
// Handle this connection
go ais.handleConnection(conn)
}
}
// handleConnection handles a single client connection
func (ais *AudioInputServer) handleConnection(conn net.Conn) {
defer conn.Close()
// Connection is now handled by the reader goroutine
// Just wait for connection to close or stop signal
for {
select {
case <-ais.stopChan:
return
default:
// Check if connection is still alive
if ais.conn == nil {
return
}
time.Sleep(100 * time.Millisecond)
}
}
}
// readMessage reads a complete message from the connection
func (ais *AudioInputServer) readMessage(conn net.Conn) (*InputIPCMessage, error) {
// Read header (magic + type + length + timestamp)
headerSize := 4 + 1 + 4 + 8 // uint32 + uint8 + uint32 + int64
header := make([]byte, headerSize)
_, err := io.ReadFull(conn, header)
if err != nil {
return nil, err
}
// Parse header
msg := &InputIPCMessage{}
msg.Magic = binary.LittleEndian.Uint32(header[0:4])
msg.Type = InputMessageType(header[4])
msg.Length = binary.LittleEndian.Uint32(header[5:9])
msg.Timestamp = int64(binary.LittleEndian.Uint64(header[9:17]))
// Validate magic number
if msg.Magic != inputMagicNumber {
return nil, fmt.Errorf("invalid magic number: %x", msg.Magic)
}
// Validate message length
if msg.Length > maxFrameSize {
return nil, fmt.Errorf("message too large: %d bytes", msg.Length)
}
// Read data if present
if msg.Length > 0 {
msg.Data = make([]byte, msg.Length)
_, err = io.ReadFull(conn, msg.Data)
if err != nil {
return nil, err
}
}
return msg, nil
}
// processMessage processes a received message
func (ais *AudioInputServer) processMessage(msg *InputIPCMessage) error {
switch msg.Type {
case InputMessageTypeOpusFrame:
return ais.processOpusFrame(msg.Data)
case InputMessageTypeConfig:
return ais.processConfig(msg.Data)
case InputMessageTypeStop:
return fmt.Errorf("stop message received")
case InputMessageTypeHeartbeat:
return ais.sendAck()
default:
return fmt.Errorf("unknown message type: %d", msg.Type)
}
}
// processOpusFrame processes an Opus audio frame
func (ais *AudioInputServer) processOpusFrame(data []byte) error {
if len(data) == 0 {
return nil // Empty frame, ignore
}
// Process the Opus frame using CGO
_, err := CGOAudioDecodeWrite(data)
return err
}
// processConfig processes a configuration update
func (ais *AudioInputServer) processConfig(data []byte) error {
// Acknowledge configuration receipt
return ais.sendAck()
}
// sendAck sends an acknowledgment message
func (ais *AudioInputServer) sendAck() error {
ais.mtx.Lock()
defer ais.mtx.Unlock()
if ais.conn == nil {
return fmt.Errorf("no connection")
}
msg := &InputIPCMessage{
Magic: inputMagicNumber,
Type: InputMessageTypeAck,
Length: 0,
Timestamp: time.Now().UnixNano(),
}
return ais.writeMessage(ais.conn, msg)
}
// writeMessage writes a message to the connection
func (ais *AudioInputServer) writeMessage(conn net.Conn, msg *InputIPCMessage) error {
// Prepare header
headerSize := 4 + 1 + 4 + 8
header := make([]byte, headerSize)
binary.LittleEndian.PutUint32(header[0:4], msg.Magic)
header[4] = byte(msg.Type)
binary.LittleEndian.PutUint32(header[5:9], msg.Length)
binary.LittleEndian.PutUint64(header[9:17], uint64(msg.Timestamp))
// Write header
_, err := conn.Write(header)
if err != nil {
return err
}
// Write data if present
if msg.Length > 0 && msg.Data != nil {
_, err = conn.Write(msg.Data)
if err != nil {
return err
}
}
return nil
}
// AudioInputClient handles IPC communication from the main process
type AudioInputClient struct {
// Atomic fields must be first for proper alignment on ARM
droppedFrames int64 // Atomic counter for dropped frames
totalFrames int64 // Atomic counter for total frames
conn net.Conn
mtx sync.Mutex
running bool
}
// NewAudioInputClient creates a new audio input client
func NewAudioInputClient() *AudioInputClient {
return &AudioInputClient{}
}
// Connect connects to the audio input server
func (aic *AudioInputClient) Connect() error {
aic.mtx.Lock()
defer aic.mtx.Unlock()
if aic.running {
return nil // Already connected
}
socketPath := getInputSocketPath()
// Try connecting multiple times as the server might not be ready
// Reduced retry count and delay for faster startup
for i := 0; i < 10; i++ {
conn, err := net.Dial("unix", socketPath)
if err == nil {
aic.conn = conn
aic.running = true
return nil
}
// Exponential backoff starting at 50ms
delay := time.Duration(50*(1<<uint(i/3))) * time.Millisecond
if delay > 500*time.Millisecond {
delay = 500 * time.Millisecond
}
time.Sleep(delay)
}
return fmt.Errorf("failed to connect to audio input server")
}
// Disconnect disconnects from the audio input server
func (aic *AudioInputClient) Disconnect() {
aic.mtx.Lock()
defer aic.mtx.Unlock()
if !aic.running {
return
}
aic.running = false
if aic.conn != nil {
// Send stop message
msg := &InputIPCMessage{
Magic: inputMagicNumber,
Type: InputMessageTypeStop,
Length: 0,
Timestamp: time.Now().UnixNano(),
}
_ = aic.writeMessage(msg) // Ignore errors during shutdown
aic.conn.Close()
aic.conn = nil
}
}
// SendFrame sends an Opus frame to the audio input server
func (aic *AudioInputClient) SendFrame(frame []byte) error {
aic.mtx.Lock()
defer aic.mtx.Unlock()
if !aic.running || aic.conn == nil {
return fmt.Errorf("not connected")
}
if len(frame) == 0 {
return nil // Empty frame, ignore
}
if len(frame) > maxFrameSize {
return fmt.Errorf("frame too large: %d bytes", len(frame))
}
msg := &InputIPCMessage{
Magic: inputMagicNumber,
Type: InputMessageTypeOpusFrame,
Length: uint32(len(frame)),
Timestamp: time.Now().UnixNano(),
Data: frame,
}
return aic.writeMessage(msg)
}
// SendConfig sends a configuration update to the audio input server
func (aic *AudioInputClient) SendConfig(config InputIPCConfig) error {
aic.mtx.Lock()
defer aic.mtx.Unlock()
if !aic.running || aic.conn == nil {
return fmt.Errorf("not connected")
}
// Serialize config (simple binary format)
data := make([]byte, 12) // 3 * int32
binary.LittleEndian.PutUint32(data[0:4], uint32(config.SampleRate))
binary.LittleEndian.PutUint32(data[4:8], uint32(config.Channels))
binary.LittleEndian.PutUint32(data[8:12], uint32(config.FrameSize))
msg := &InputIPCMessage{
Magic: inputMagicNumber,
Type: InputMessageTypeConfig,
Length: uint32(len(data)),
Timestamp: time.Now().UnixNano(),
Data: data,
}
return aic.writeMessage(msg)
}
// SendHeartbeat sends a heartbeat message
func (aic *AudioInputClient) SendHeartbeat() error {
aic.mtx.Lock()
defer aic.mtx.Unlock()
if !aic.running || aic.conn == nil {
return fmt.Errorf("not connected")
}
msg := &InputIPCMessage{
Magic: inputMagicNumber,
Type: InputMessageTypeHeartbeat,
Length: 0,
Timestamp: time.Now().UnixNano(),
}
return aic.writeMessage(msg)
}
// writeMessage writes a message to the server
func (aic *AudioInputClient) writeMessage(msg *InputIPCMessage) error {
// Increment total frames counter
atomic.AddInt64(&aic.totalFrames, 1)
// Prepare header
headerSize := 4 + 1 + 4 + 8
header := make([]byte, headerSize)
binary.LittleEndian.PutUint32(header[0:4], msg.Magic)
header[4] = byte(msg.Type)
binary.LittleEndian.PutUint32(header[5:9], msg.Length)
binary.LittleEndian.PutUint64(header[9:17], uint64(msg.Timestamp))
// Use non-blocking write with timeout
ctx, cancel := context.WithTimeout(context.Background(), writeTimeout)
defer cancel()
// Create a channel to signal write completion
done := make(chan error, 1)
go func() {
// Write header
_, err := aic.conn.Write(header)
if err != nil {
done <- err
return
}
// Write data if present
if msg.Length > 0 && msg.Data != nil {
_, err = aic.conn.Write(msg.Data)
if err != nil {
done <- err
return
}
}
done <- nil
}()
// Wait for completion or timeout
select {
case err := <-done:
if err != nil {
atomic.AddInt64(&aic.droppedFrames, 1)
return err
}
return nil
case <-ctx.Done():
// Timeout occurred - drop frame to prevent blocking
atomic.AddInt64(&aic.droppedFrames, 1)
return fmt.Errorf("write timeout - frame dropped")
}
}
// IsConnected returns whether the client is connected
func (aic *AudioInputClient) IsConnected() bool {
aic.mtx.Lock()
defer aic.mtx.Unlock()
return aic.running && aic.conn != nil
}
// GetFrameStats returns frame statistics
func (aic *AudioInputClient) GetFrameStats() (total, dropped int64) {
return atomic.LoadInt64(&aic.totalFrames), atomic.LoadInt64(&aic.droppedFrames)
}
// GetDropRate returns the current frame drop rate as a percentage
func (aic *AudioInputClient) GetDropRate() float64 {
total := atomic.LoadInt64(&aic.totalFrames)
dropped := atomic.LoadInt64(&aic.droppedFrames)
if total == 0 {
return 0.0
}
return float64(dropped) / float64(total) * 100.0
}
// ResetStats resets frame statistics
func (aic *AudioInputClient) ResetStats() {
atomic.StoreInt64(&aic.totalFrames, 0)
atomic.StoreInt64(&aic.droppedFrames, 0)
}
// startReaderGoroutine starts the message reader goroutine
func (ais *AudioInputServer) startReaderGoroutine() {
ais.wg.Add(1)
go func() {
defer ais.wg.Done()
for {
select {
case <-ais.stopChan:
return
default:
if ais.conn != nil {
msg, err := ais.readMessage(ais.conn)
if err != nil {
continue // Connection error, retry
}
// Send to message channel with non-blocking write
select {
case ais.messageChan <- msg:
atomic.AddInt64(&ais.totalFrames, 1)
default:
// Channel full, drop message
atomic.AddInt64(&ais.droppedFrames, 1)
}
}
}
}
}()
}
// startProcessorGoroutine starts the message processor goroutine
func (ais *AudioInputServer) startProcessorGoroutine() {
ais.wg.Add(1)
go func() {
defer ais.wg.Done()
for {
select {
case <-ais.stopChan:
return
case msg := <-ais.messageChan:
// Intelligent frame dropping: prioritize recent frames
if msg.Type == InputMessageTypeOpusFrame {
// Check if processing queue is getting full
queueLen := len(ais.processChan)
bufferSize := int(atomic.LoadInt64(&ais.bufferSize))
if queueLen > bufferSize*3/4 {
// Drop oldest frames, keep newest
select {
case <-ais.processChan: // Remove oldest
atomic.AddInt64(&ais.droppedFrames, 1)
default:
}
}
}
// Send to processing queue
select {
case ais.processChan <- msg:
default:
// Processing queue full, drop frame
atomic.AddInt64(&ais.droppedFrames, 1)
}
}
}
}()
}
// startMonitorGoroutine starts the performance monitoring goroutine
func (ais *AudioInputServer) startMonitorGoroutine() {
ais.wg.Add(1)
go func() {
defer ais.wg.Done()
ticker := time.NewTicker(100 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-ais.stopChan:
return
case <-ticker.C:
// Process frames from processing queue
for {
select {
case msg := <-ais.processChan:
start := time.Now()
err := ais.processMessage(msg)
processingTime := time.Since(start).Nanoseconds()
// Calculate end-to-end latency using message timestamp
if msg.Type == InputMessageTypeOpusFrame && msg.Timestamp > 0 {
msgTime := time.Unix(0, msg.Timestamp)
endToEndLatency := time.Since(msgTime).Nanoseconds()
// Use exponential moving average for end-to-end latency tracking
currentAvg := atomic.LoadInt64(&ais.processingTime)
// Weight: 90% historical, 10% current (for smoother averaging)
newAvg := (currentAvg*9 + endToEndLatency) / 10
atomic.StoreInt64(&ais.processingTime, newAvg)
} else {
// Fallback to processing time only
currentAvg := atomic.LoadInt64(&ais.processingTime)
newAvg := (currentAvg + processingTime) / 2
atomic.StoreInt64(&ais.processingTime, newAvg)
}
if err != nil {
atomic.AddInt64(&ais.droppedFrames, 1)
}
default:
// No more messages to process
goto adaptiveBuffering
}
}
adaptiveBuffering:
// Adaptive buffer sizing based on processing time
avgTime := atomic.LoadInt64(&ais.processingTime)
currentSize := atomic.LoadInt64(&ais.bufferSize)
if avgTime > 10*1000*1000 { // > 10ms processing time
// Increase buffer size
newSize := currentSize * 2
if newSize > 1000 {
newSize = 1000
}
atomic.StoreInt64(&ais.bufferSize, newSize)
} else if avgTime < 1*1000*1000 { // < 1ms processing time
// Decrease buffer size
newSize := currentSize / 2
if newSize < 50 {
newSize = 50
}
atomic.StoreInt64(&ais.bufferSize, newSize)
}
}
}
}()
}
// GetServerStats returns server performance statistics
func (ais *AudioInputServer) GetServerStats() (total, dropped int64, avgProcessingTime time.Duration, bufferSize int64) {
return atomic.LoadInt64(&ais.totalFrames),
atomic.LoadInt64(&ais.droppedFrames),
time.Duration(atomic.LoadInt64(&ais.processingTime)),
atomic.LoadInt64(&ais.bufferSize)
}
// Helper functions
// getInputSocketPath returns the path to the input socket
func getInputSocketPath() string {
if path := os.Getenv("JETKVM_AUDIO_INPUT_SOCKET"); path != "" {
return path
}
return filepath.Join("/var/run", inputSocketName)
}

View File

@ -0,0 +1,197 @@
package audio
import (
"sync/atomic"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// AudioInputIPCManager manages microphone input using IPC when enabled
type AudioInputIPCManager struct {
// metrics MUST be first for ARM32 alignment (contains int64 fields)
metrics AudioInputMetrics
supervisor *AudioInputSupervisor
logger zerolog.Logger
running int32
}
// NewAudioInputIPCManager creates a new IPC-based audio input manager
func NewAudioInputIPCManager() *AudioInputIPCManager {
return &AudioInputIPCManager{
supervisor: NewAudioInputSupervisor(),
logger: logging.GetDefaultLogger().With().Str("component", "audio-input-ipc").Logger(),
}
}
// Start starts the IPC-based audio input system
func (aim *AudioInputIPCManager) Start() error {
if !atomic.CompareAndSwapInt32(&aim.running, 0, 1) {
return nil // Already running
}
aim.logger.Info().Msg("Starting IPC-based audio input system")
// Start the supervisor which will launch the subprocess
err := aim.supervisor.Start()
if err != nil {
atomic.StoreInt32(&aim.running, 0)
return err
}
// Send initial configuration
config := InputIPCConfig{
SampleRate: 48000,
Channels: 2,
FrameSize: 960, // 20ms at 48kHz
}
// Wait briefly for the subprocess to be ready (reduced from 1 second)
time.Sleep(200 * time.Millisecond)
err = aim.supervisor.SendConfig(config)
if err != nil {
aim.logger.Warn().Err(err).Msg("Failed to send initial config to audio input server")
// Don't fail startup for config errors
}
aim.logger.Info().Msg("IPC-based audio input system started")
return nil
}
// Stop stops the IPC-based audio input system
func (aim *AudioInputIPCManager) Stop() {
if !atomic.CompareAndSwapInt32(&aim.running, 1, 0) {
return // Already stopped
}
aim.logger.Info().Msg("Stopping IPC-based audio input system")
// Stop the supervisor
aim.supervisor.Stop()
aim.logger.Info().Msg("IPC-based audio input system stopped")
}
// WriteOpusFrame sends an Opus frame to the audio input server via IPC
func (aim *AudioInputIPCManager) WriteOpusFrame(frame []byte) error {
if atomic.LoadInt32(&aim.running) == 0 {
return nil // Not running, silently ignore
}
if len(frame) == 0 {
return nil // Empty frame, ignore
}
// Start latency measurement
startTime := time.Now()
// Update metrics
atomic.AddInt64(&aim.metrics.FramesSent, 1)
atomic.AddInt64(&aim.metrics.BytesProcessed, int64(len(frame)))
aim.metrics.LastFrameTime = startTime
// Send frame via IPC
err := aim.supervisor.SendFrame(frame)
if err != nil {
// Count as dropped frame
atomic.AddInt64(&aim.metrics.FramesDropped, 1)
aim.logger.Debug().Err(err).Msg("Failed to send frame via IPC")
return err
}
// Calculate and update latency (end-to-end IPC transmission time)
latency := time.Since(startTime)
aim.updateLatencyMetrics(latency)
return nil
}
// IsRunning returns whether the IPC manager is running
func (aim *AudioInputIPCManager) IsRunning() bool {
return atomic.LoadInt32(&aim.running) == 1
}
// IsReady returns whether the IPC manager is ready to receive frames
// This checks that the supervisor is connected to the audio input server
func (aim *AudioInputIPCManager) IsReady() bool {
if !aim.IsRunning() {
return false
}
return aim.supervisor.IsConnected()
}
// GetMetrics returns current metrics
func (aim *AudioInputIPCManager) GetMetrics() AudioInputMetrics {
return AudioInputMetrics{
FramesSent: atomic.LoadInt64(&aim.metrics.FramesSent),
FramesDropped: atomic.LoadInt64(&aim.metrics.FramesDropped),
BytesProcessed: atomic.LoadInt64(&aim.metrics.BytesProcessed),
ConnectionDrops: atomic.LoadInt64(&aim.metrics.ConnectionDrops),
AverageLatency: aim.metrics.AverageLatency,
LastFrameTime: aim.metrics.LastFrameTime,
}
}
// updateLatencyMetrics updates the latency metrics with exponential moving average
func (aim *AudioInputIPCManager) updateLatencyMetrics(latency time.Duration) {
// Use exponential moving average for smooth latency calculation
currentAvg := aim.metrics.AverageLatency
if currentAvg == 0 {
aim.metrics.AverageLatency = latency
} else {
// EMA with alpha = 0.1 for smooth averaging
aim.metrics.AverageLatency = time.Duration(float64(currentAvg)*0.9 + float64(latency)*0.1)
}
}
// GetDetailedMetrics returns comprehensive performance metrics
func (aim *AudioInputIPCManager) GetDetailedMetrics() (AudioInputMetrics, map[string]interface{}) {
metrics := aim.GetMetrics()
// Get client frame statistics
client := aim.supervisor.GetClient()
totalFrames, droppedFrames := int64(0), int64(0)
dropRate := 0.0
if client != nil {
totalFrames, droppedFrames = client.GetFrameStats()
dropRate = client.GetDropRate()
}
// Get server statistics if available
serverStats := make(map[string]interface{})
if aim.supervisor.IsRunning() {
serverStats["status"] = "running"
} else {
serverStats["status"] = "stopped"
}
detailedStats := map[string]interface{}{
"client_total_frames": totalFrames,
"client_dropped_frames": droppedFrames,
"client_drop_rate": dropRate,
"server_stats": serverStats,
"ipc_latency_ms": float64(metrics.AverageLatency.Nanoseconds()) / 1e6,
"frames_per_second": aim.calculateFrameRate(),
}
return metrics, detailedStats
}
// calculateFrameRate calculates the current frame rate
func (aim *AudioInputIPCManager) calculateFrameRate() float64 {
framesSent := atomic.LoadInt64(&aim.metrics.FramesSent)
if framesSent == 0 {
return 0.0
}
// Return typical Opus frame rate
return 50.0
}
// GetSupervisor returns the supervisor for advanced operations
func (aim *AudioInputIPCManager) GetSupervisor() *AudioInputSupervisor {
return aim.supervisor
}

View File

@ -0,0 +1,72 @@
package audio
import (
"context"
"os"
"os/signal"
"syscall"
"time"
"github.com/jetkvm/kvm/internal/logging"
)
// IsAudioInputServerProcess detects if we're running as the audio input server subprocess
func IsAudioInputServerProcess() bool {
return os.Getenv("JETKVM_AUDIO_INPUT_SERVER") == "true"
}
// RunAudioInputServer runs the audio input server subprocess
// This should be called from main() when the subprocess is detected
func RunAudioInputServer() error {
logger := logging.GetDefaultLogger().With().Str("component", "audio-input-server").Logger()
logger.Info().Msg("Starting audio input server subprocess")
// Initialize CGO audio system
err := CGOAudioPlaybackInit()
if err != nil {
logger.Error().Err(err).Msg("Failed to initialize CGO audio playback")
return err
}
defer CGOAudioPlaybackClose()
// Create and start the IPC server
server, err := NewAudioInputServer()
if err != nil {
logger.Error().Err(err).Msg("Failed to create audio input server")
return err
}
defer server.Close()
err = server.Start()
if err != nil {
logger.Error().Err(err).Msg("Failed to start audio input server")
return err
}
logger.Info().Msg("Audio input server started, waiting for connections")
// Set up signal handling for graceful shutdown
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, syscall.SIGINT, syscall.SIGTERM)
// Wait for shutdown signal
select {
case sig := <-sigChan:
logger.Info().Str("signal", sig.String()).Msg("Received shutdown signal")
case <-ctx.Done():
logger.Info().Msg("Context cancelled")
}
// Graceful shutdown
logger.Info().Msg("Shutting down audio input server")
server.Stop()
// Give some time for cleanup
time.Sleep(100 * time.Millisecond)
logger.Info().Msg("Audio input server subprocess stopped")
return nil
}

View File

@ -0,0 +1,259 @@
package audio
import (
"context"
"fmt"
"os"
"os/exec"
"sync"
"syscall"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// AudioInputSupervisor manages the audio input server subprocess
type AudioInputSupervisor struct {
cmd *exec.Cmd
cancel context.CancelFunc
mtx sync.Mutex
running bool
logger zerolog.Logger
client *AudioInputClient
processMonitor *ProcessMonitor
}
// NewAudioInputSupervisor creates a new audio input supervisor
func NewAudioInputSupervisor() *AudioInputSupervisor {
return &AudioInputSupervisor{
logger: logging.GetDefaultLogger().With().Str("component", "audio-input-supervisor").Logger(),
client: NewAudioInputClient(),
processMonitor: GetProcessMonitor(),
}
}
// Start starts the audio input server subprocess
func (ais *AudioInputSupervisor) Start() error {
ais.mtx.Lock()
defer ais.mtx.Unlock()
if ais.running {
return fmt.Errorf("audio input supervisor already running")
}
// Create context for subprocess management
ctx, cancel := context.WithCancel(context.Background())
ais.cancel = cancel
// Get current executable path
execPath, err := os.Executable()
if err != nil {
return fmt.Errorf("failed to get executable path: %w", err)
}
// Create command for audio input server subprocess
cmd := exec.CommandContext(ctx, execPath)
cmd.Env = append(os.Environ(),
"JETKVM_AUDIO_INPUT_SERVER=true", // Flag to indicate this is the input server process
"JETKVM_AUDIO_INPUT_IPC=true", // Enable IPC mode
)
// Set process group to allow clean termination
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
}
ais.cmd = cmd
ais.running = true
// Start the subprocess
err = cmd.Start()
if err != nil {
ais.running = false
cancel()
return fmt.Errorf("failed to start audio input server: %w", err)
}
ais.logger.Info().Int("pid", cmd.Process.Pid).Msg("Audio input server subprocess started")
// Add process to monitoring
ais.processMonitor.AddProcess(cmd.Process.Pid, "audio-input-server")
// Monitor the subprocess in a goroutine
go ais.monitorSubprocess()
// Connect client to the server
go ais.connectClient()
return nil
}
// Stop stops the audio input server subprocess
func (ais *AudioInputSupervisor) Stop() {
ais.mtx.Lock()
defer ais.mtx.Unlock()
if !ais.running {
return
}
ais.running = false
// Disconnect client first
if ais.client != nil {
ais.client.Disconnect()
}
// Cancel context to signal subprocess to stop
if ais.cancel != nil {
ais.cancel()
}
// Try graceful termination first
if ais.cmd != nil && ais.cmd.Process != nil {
ais.logger.Info().Int("pid", ais.cmd.Process.Pid).Msg("Stopping audio input server subprocess")
// Send SIGTERM
err := ais.cmd.Process.Signal(syscall.SIGTERM)
if err != nil {
ais.logger.Warn().Err(err).Msg("Failed to send SIGTERM to audio input server")
}
// Wait for graceful shutdown with timeout
done := make(chan error, 1)
go func() {
done <- ais.cmd.Wait()
}()
select {
case <-done:
ais.logger.Info().Msg("Audio input server subprocess stopped gracefully")
case <-time.After(5 * time.Second):
// Force kill if graceful shutdown failed
ais.logger.Warn().Msg("Audio input server subprocess did not stop gracefully, force killing")
err := ais.cmd.Process.Kill()
if err != nil {
ais.logger.Error().Err(err).Msg("Failed to kill audio input server subprocess")
}
}
}
ais.cmd = nil
ais.cancel = nil
}
// IsRunning returns whether the supervisor is running
func (ais *AudioInputSupervisor) IsRunning() bool {
ais.mtx.Lock()
defer ais.mtx.Unlock()
return ais.running
}
// IsConnected returns whether the client is connected to the audio input server
func (ais *AudioInputSupervisor) IsConnected() bool {
if !ais.IsRunning() {
return false
}
return ais.client.IsConnected()
}
// GetClient returns the IPC client for sending audio frames
func (ais *AudioInputSupervisor) GetClient() *AudioInputClient {
return ais.client
}
// GetProcessMetrics returns current process metrics if the process is running
func (ais *AudioInputSupervisor) GetProcessMetrics() *ProcessMetrics {
ais.mtx.Lock()
defer ais.mtx.Unlock()
if ais.cmd == nil || ais.cmd.Process == nil {
return nil
}
pid := ais.cmd.Process.Pid
metrics := ais.processMonitor.GetCurrentMetrics()
for _, metric := range metrics {
if metric.PID == pid {
return &metric
}
}
return nil
}
// monitorSubprocess monitors the subprocess and handles unexpected exits
func (ais *AudioInputSupervisor) monitorSubprocess() {
if ais.cmd == nil {
return
}
pid := ais.cmd.Process.Pid
err := ais.cmd.Wait()
// Remove process from monitoring
ais.processMonitor.RemoveProcess(pid)
ais.mtx.Lock()
defer ais.mtx.Unlock()
if ais.running {
// Unexpected exit
if err != nil {
ais.logger.Error().Err(err).Msg("Audio input server subprocess exited unexpectedly")
} else {
ais.logger.Warn().Msg("Audio input server subprocess exited unexpectedly")
}
// Disconnect client
if ais.client != nil {
ais.client.Disconnect()
}
// Mark as not running
ais.running = false
ais.cmd = nil
ais.logger.Info().Msg("Audio input server subprocess monitoring stopped")
}
}
// connectClient attempts to connect the client to the server
func (ais *AudioInputSupervisor) connectClient() {
// Wait briefly for the server to start (reduced from 500ms)
time.Sleep(100 * time.Millisecond)
err := ais.client.Connect()
if err != nil {
ais.logger.Error().Err(err).Msg("Failed to connect to audio input server")
return
}
ais.logger.Info().Msg("Connected to audio input server")
}
// SendFrame sends an audio frame to the subprocess (convenience method)
func (ais *AudioInputSupervisor) SendFrame(frame []byte) error {
if ais.client == nil {
return fmt.Errorf("client not initialized")
}
if !ais.client.IsConnected() {
return fmt.Errorf("client not connected")
}
return ais.client.SendFrame(frame)
}
// SendConfig sends a configuration update to the subprocess (convenience method)
func (ais *AudioInputSupervisor) SendConfig(config InputIPCConfig) error {
if ais.client == nil {
return fmt.Errorf("client not initialized")
}
if !ais.client.IsConnected() {
return fmt.Errorf("client not connected")
}
return ais.client.SendConfig(config)
}

128
internal/audio/ipc.go Normal file
View File

@ -0,0 +1,128 @@
package audio
import (
"encoding/binary"
"fmt"
"io"
"net"
"os"
"path/filepath"
"sync"
"time"
)
const (
magicNumber uint32 = 0x4A4B564D // "JKVM"
socketName = "audio_output.sock"
)
type AudioServer struct {
listener net.Listener
conn net.Conn
mtx sync.Mutex
}
func NewAudioServer() (*AudioServer, error) {
socketPath := filepath.Join("/var/run", socketName)
// Remove existing socket if any
os.Remove(socketPath)
listener, err := net.Listen("unix", socketPath)
if err != nil {
return nil, fmt.Errorf("failed to create unix socket: %w", err)
}
return &AudioServer{listener: listener}, nil
}
func (s *AudioServer) Start() error {
conn, err := s.listener.Accept()
if err != nil {
return fmt.Errorf("failed to accept connection: %w", err)
}
s.conn = conn
return nil
}
func (s *AudioServer) Close() error {
if s.conn != nil {
s.conn.Close()
}
return s.listener.Close()
}
func (s *AudioServer) SendFrame(frame []byte) error {
s.mtx.Lock()
defer s.mtx.Unlock()
if s.conn == nil {
return fmt.Errorf("no client connected")
}
// Write magic number
if err := binary.Write(s.conn, binary.BigEndian, magicNumber); err != nil {
return fmt.Errorf("failed to write magic number: %w", err)
}
// Write frame size
if err := binary.Write(s.conn, binary.BigEndian, uint32(len(frame))); err != nil {
return fmt.Errorf("failed to write frame size: %w", err)
}
// Write frame data
if _, err := s.conn.Write(frame); err != nil {
return fmt.Errorf("failed to write frame data: %w", err)
}
return nil
}
type AudioClient struct {
conn net.Conn
mtx sync.Mutex
}
func NewAudioClient() (*AudioClient, error) {
socketPath := filepath.Join("/var/run", socketName)
// Try connecting multiple times as the server might not be ready
for i := 0; i < 5; i++ {
conn, err := net.Dial("unix", socketPath)
if err == nil {
return &AudioClient{conn: conn}, nil
}
time.Sleep(time.Second)
}
return nil, fmt.Errorf("failed to connect to audio server")
}
func (c *AudioClient) Close() error {
return c.conn.Close()
}
func (c *AudioClient) ReceiveFrame() ([]byte, error) {
c.mtx.Lock()
defer c.mtx.Unlock()
// Read magic number
var magic uint32
if err := binary.Read(c.conn, binary.BigEndian, &magic); err != nil {
return nil, fmt.Errorf("failed to read magic number: %w", err)
}
if magic != magicNumber {
return nil, fmt.Errorf("invalid magic number: %x", magic)
}
// Read frame size
var size uint32
if err := binary.Read(c.conn, binary.BigEndian, &size); err != nil {
return nil, fmt.Errorf("failed to read frame size: %w", err)
}
// Read frame data
frame := make([]byte, size)
if _, err := io.ReadFull(c.conn, frame); err != nil {
return nil, fmt.Errorf("failed to read frame data: %w", err)
}
return frame, nil
}

410
internal/audio/metrics.go Normal file
View File

@ -0,0 +1,410 @@
package audio
import (
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
)
var (
// Audio output metrics
audioFramesReceivedTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_audio_frames_received_total",
Help: "Total number of audio frames received",
},
)
audioFramesDroppedTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_audio_frames_dropped_total",
Help: "Total number of audio frames dropped",
},
)
audioBytesProcessedTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_audio_bytes_processed_total",
Help: "Total number of audio bytes processed",
},
)
audioConnectionDropsTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_audio_connection_drops_total",
Help: "Total number of audio connection drops",
},
)
audioAverageLatencySeconds = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_average_latency_seconds",
Help: "Average audio latency in seconds",
},
)
audioLastFrameTimestamp = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_last_frame_timestamp_seconds",
Help: "Timestamp of the last audio frame received",
},
)
// Microphone input metrics
microphoneFramesSentTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_microphone_frames_sent_total",
Help: "Total number of microphone frames sent",
},
)
microphoneFramesDroppedTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_microphone_frames_dropped_total",
Help: "Total number of microphone frames dropped",
},
)
microphoneBytesProcessedTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_microphone_bytes_processed_total",
Help: "Total number of microphone bytes processed",
},
)
microphoneConnectionDropsTotal = promauto.NewCounter(
prometheus.CounterOpts{
Name: "jetkvm_microphone_connection_drops_total",
Help: "Total number of microphone connection drops",
},
)
microphoneAverageLatencySeconds = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_average_latency_seconds",
Help: "Average microphone latency in seconds",
},
)
microphoneLastFrameTimestamp = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_last_frame_timestamp_seconds",
Help: "Timestamp of the last microphone frame sent",
},
)
// Audio subprocess process metrics
audioProcessCpuPercent = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_process_cpu_percent",
Help: "CPU usage percentage of audio output subprocess",
},
)
audioProcessMemoryPercent = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_process_memory_percent",
Help: "Memory usage percentage of audio output subprocess",
},
)
audioProcessMemoryRssBytes = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_process_memory_rss_bytes",
Help: "RSS memory usage in bytes of audio output subprocess",
},
)
audioProcessMemoryVmsBytes = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_process_memory_vms_bytes",
Help: "VMS memory usage in bytes of audio output subprocess",
},
)
audioProcessRunning = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_process_running",
Help: "Whether audio output subprocess is running (1=running, 0=stopped)",
},
)
// Microphone subprocess process metrics
microphoneProcessCpuPercent = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_process_cpu_percent",
Help: "CPU usage percentage of microphone input subprocess",
},
)
microphoneProcessMemoryPercent = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_process_memory_percent",
Help: "Memory usage percentage of microphone input subprocess",
},
)
microphoneProcessMemoryRssBytes = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_process_memory_rss_bytes",
Help: "RSS memory usage in bytes of microphone input subprocess",
},
)
microphoneProcessMemoryVmsBytes = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_process_memory_vms_bytes",
Help: "VMS memory usage in bytes of microphone input subprocess",
},
)
microphoneProcessRunning = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_process_running",
Help: "Whether microphone input subprocess is running (1=running, 0=stopped)",
},
)
// Audio configuration metrics
audioConfigQuality = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_config_quality",
Help: "Current audio quality setting (0=Low, 1=Medium, 2=High, 3=Ultra)",
},
)
audioConfigBitrate = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_config_bitrate_kbps",
Help: "Current audio bitrate in kbps",
},
)
audioConfigSampleRate = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_config_sample_rate_hz",
Help: "Current audio sample rate in Hz",
},
)
audioConfigChannels = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_audio_config_channels",
Help: "Current audio channel count",
},
)
microphoneConfigQuality = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_config_quality",
Help: "Current microphone quality setting (0=Low, 1=Medium, 2=High, 3=Ultra)",
},
)
microphoneConfigBitrate = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_config_bitrate_kbps",
Help: "Current microphone bitrate in kbps",
},
)
microphoneConfigSampleRate = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_config_sample_rate_hz",
Help: "Current microphone sample rate in Hz",
},
)
microphoneConfigChannels = promauto.NewGauge(
prometheus.GaugeOpts{
Name: "jetkvm_microphone_config_channels",
Help: "Current microphone channel count",
},
)
// Metrics update tracking
metricsUpdateMutex sync.RWMutex
lastMetricsUpdate time.Time
// Counter value tracking (since prometheus counters don't have Get() method)
audioFramesReceivedValue int64
audioFramesDroppedValue int64
audioBytesProcessedValue int64
audioConnectionDropsValue int64
micFramesSentValue int64
micFramesDroppedValue int64
micBytesProcessedValue int64
micConnectionDropsValue int64
)
// UpdateAudioMetrics updates Prometheus metrics with current audio data
func UpdateAudioMetrics(metrics AudioMetrics) {
metricsUpdateMutex.Lock()
defer metricsUpdateMutex.Unlock()
// Update counters with delta values
if metrics.FramesReceived > audioFramesReceivedValue {
audioFramesReceivedTotal.Add(float64(metrics.FramesReceived - audioFramesReceivedValue))
audioFramesReceivedValue = metrics.FramesReceived
}
if metrics.FramesDropped > audioFramesDroppedValue {
audioFramesDroppedTotal.Add(float64(metrics.FramesDropped - audioFramesDroppedValue))
audioFramesDroppedValue = metrics.FramesDropped
}
if metrics.BytesProcessed > audioBytesProcessedValue {
audioBytesProcessedTotal.Add(float64(metrics.BytesProcessed - audioBytesProcessedValue))
audioBytesProcessedValue = metrics.BytesProcessed
}
if metrics.ConnectionDrops > audioConnectionDropsValue {
audioConnectionDropsTotal.Add(float64(metrics.ConnectionDrops - audioConnectionDropsValue))
audioConnectionDropsValue = metrics.ConnectionDrops
}
// Update gauges
audioAverageLatencySeconds.Set(float64(metrics.AverageLatency.Nanoseconds()) / 1e9)
if !metrics.LastFrameTime.IsZero() {
audioLastFrameTimestamp.Set(float64(metrics.LastFrameTime.Unix()))
}
lastMetricsUpdate = time.Now()
}
// UpdateMicrophoneMetrics updates Prometheus metrics with current microphone data
func UpdateMicrophoneMetrics(metrics AudioInputMetrics) {
metricsUpdateMutex.Lock()
defer metricsUpdateMutex.Unlock()
// Update counters with delta values
if metrics.FramesSent > micFramesSentValue {
microphoneFramesSentTotal.Add(float64(metrics.FramesSent - micFramesSentValue))
micFramesSentValue = metrics.FramesSent
}
if metrics.FramesDropped > micFramesDroppedValue {
microphoneFramesDroppedTotal.Add(float64(metrics.FramesDropped - micFramesDroppedValue))
micFramesDroppedValue = metrics.FramesDropped
}
if metrics.BytesProcessed > micBytesProcessedValue {
microphoneBytesProcessedTotal.Add(float64(metrics.BytesProcessed - micBytesProcessedValue))
micBytesProcessedValue = metrics.BytesProcessed
}
if metrics.ConnectionDrops > micConnectionDropsValue {
microphoneConnectionDropsTotal.Add(float64(metrics.ConnectionDrops - micConnectionDropsValue))
micConnectionDropsValue = metrics.ConnectionDrops
}
// Update gauges
microphoneAverageLatencySeconds.Set(float64(metrics.AverageLatency.Nanoseconds()) / 1e9)
if !metrics.LastFrameTime.IsZero() {
microphoneLastFrameTimestamp.Set(float64(metrics.LastFrameTime.Unix()))
}
lastMetricsUpdate = time.Now()
}
// UpdateAudioProcessMetrics updates Prometheus metrics with audio subprocess data
func UpdateAudioProcessMetrics(metrics ProcessMetrics, isRunning bool) {
metricsUpdateMutex.Lock()
defer metricsUpdateMutex.Unlock()
audioProcessCpuPercent.Set(metrics.CPUPercent)
audioProcessMemoryPercent.Set(metrics.MemoryPercent)
audioProcessMemoryRssBytes.Set(float64(metrics.MemoryRSS))
audioProcessMemoryVmsBytes.Set(float64(metrics.MemoryVMS))
if isRunning {
audioProcessRunning.Set(1)
} else {
audioProcessRunning.Set(0)
}
lastMetricsUpdate = time.Now()
}
// UpdateMicrophoneProcessMetrics updates Prometheus metrics with microphone subprocess data
func UpdateMicrophoneProcessMetrics(metrics ProcessMetrics, isRunning bool) {
metricsUpdateMutex.Lock()
defer metricsUpdateMutex.Unlock()
microphoneProcessCpuPercent.Set(metrics.CPUPercent)
microphoneProcessMemoryPercent.Set(metrics.MemoryPercent)
microphoneProcessMemoryRssBytes.Set(float64(metrics.MemoryRSS))
microphoneProcessMemoryVmsBytes.Set(float64(metrics.MemoryVMS))
if isRunning {
microphoneProcessRunning.Set(1)
} else {
microphoneProcessRunning.Set(0)
}
lastMetricsUpdate = time.Now()
}
// UpdateAudioConfigMetrics updates Prometheus metrics with audio configuration
func UpdateAudioConfigMetrics(config AudioConfig) {
metricsUpdateMutex.Lock()
defer metricsUpdateMutex.Unlock()
audioConfigQuality.Set(float64(config.Quality))
audioConfigBitrate.Set(float64(config.Bitrate))
audioConfigSampleRate.Set(float64(config.SampleRate))
audioConfigChannels.Set(float64(config.Channels))
lastMetricsUpdate = time.Now()
}
// UpdateMicrophoneConfigMetrics updates Prometheus metrics with microphone configuration
func UpdateMicrophoneConfigMetrics(config AudioConfig) {
metricsUpdateMutex.Lock()
defer metricsUpdateMutex.Unlock()
microphoneConfigQuality.Set(float64(config.Quality))
microphoneConfigBitrate.Set(float64(config.Bitrate))
microphoneConfigSampleRate.Set(float64(config.SampleRate))
microphoneConfigChannels.Set(float64(config.Channels))
lastMetricsUpdate = time.Now()
}
// GetLastMetricsUpdate returns the timestamp of the last metrics update
func GetLastMetricsUpdate() time.Time {
metricsUpdateMutex.RLock()
defer metricsUpdateMutex.RUnlock()
return lastMetricsUpdate
}
// StartMetricsUpdater starts a goroutine that periodically updates Prometheus metrics
func StartMetricsUpdater() {
go func() {
ticker := time.NewTicker(5 * time.Second) // Update every 5 seconds
defer ticker.Stop()
for range ticker.C {
// Update audio output metrics
audioMetrics := GetAudioMetrics()
UpdateAudioMetrics(audioMetrics)
// Update microphone input metrics
micMetrics := GetAudioInputMetrics()
UpdateMicrophoneMetrics(micMetrics)
// Update microphone subprocess process metrics
if inputSupervisor := GetAudioInputIPCSupervisor(); inputSupervisor != nil {
if processMetrics := inputSupervisor.GetProcessMetrics(); processMetrics != nil {
UpdateMicrophoneProcessMetrics(*processMetrics, inputSupervisor.IsRunning())
}
}
// Update audio configuration metrics
audioConfig := GetAudioConfig()
UpdateAudioConfigMetrics(audioConfig)
micConfig := GetMicrophoneConfig()
UpdateMicrophoneConfigMetrics(micConfig)
}
}()
}

View File

@ -10,10 +10,10 @@ import (
// with reduced contention using atomic operations and conditional locking
type MicrophoneContentionManager struct {
// Atomic fields (must be 64-bit aligned on 32-bit systems)
lastOpNano int64 // Unix nanoseconds of last operation
cooldownNanos int64 // Cooldown duration in nanoseconds
operationID int64 // Incremental operation ID for tracking
lastOpNano int64 // Unix nanoseconds of last operation
cooldownNanos int64 // Cooldown duration in nanoseconds
operationID int64 // Incremental operation ID for tracking
// Lock-free state flags (using atomic.Pointer for lock-free updates)
lockPtr unsafe.Pointer // *sync.Mutex - conditionally allocated
}
@ -27,61 +27,61 @@ func NewMicrophoneContentionManager(cooldown time.Duration) *MicrophoneContentio
// OperationResult represents the result of attempting a microphone operation
type OperationResult struct {
Allowed bool
Allowed bool
RemainingCooldown time.Duration
OperationID int64
OperationID int64
}
// TryOperation attempts to perform a microphone operation with optimized contention handling
func (mcm *MicrophoneContentionManager) TryOperation() OperationResult {
now := time.Now().UnixNano()
cooldown := atomic.LoadInt64(&mcm.cooldownNanos)
// Fast path: check if we're clearly outside cooldown period using atomic read
lastOp := atomic.LoadInt64(&mcm.lastOpNano)
elapsed := now - lastOp
if elapsed >= cooldown {
// Attempt atomic update without locking
if atomic.CompareAndSwapInt64(&mcm.lastOpNano, lastOp, now) {
opID := atomic.AddInt64(&mcm.operationID, 1)
return OperationResult{
Allowed: true,
Allowed: true,
RemainingCooldown: 0,
OperationID: opID,
OperationID: opID,
}
}
}
// Slow path: potential contention, check remaining cooldown
currentLastOp := atomic.LoadInt64(&mcm.lastOpNano)
currentElapsed := now - currentLastOp
if currentElapsed >= cooldown {
// Race condition: another operation might have updated lastOpNano
// Try once more with CAS
if atomic.CompareAndSwapInt64(&mcm.lastOpNano, currentLastOp, now) {
opID := atomic.AddInt64(&mcm.operationID, 1)
return OperationResult{
Allowed: true,
Allowed: true,
RemainingCooldown: 0,
OperationID: opID,
OperationID: opID,
}
}
// If CAS failed, fall through to cooldown calculation
currentLastOp = atomic.LoadInt64(&mcm.lastOpNano)
currentElapsed = now - currentLastOp
}
remaining := time.Duration(cooldown - currentElapsed)
if remaining < 0 {
remaining = 0
}
return OperationResult{
Allowed: false,
Allowed: false,
RemainingCooldown: remaining,
OperationID: atomic.LoadInt64(&mcm.operationID),
OperationID: atomic.LoadInt64(&mcm.operationID),
}
}
@ -127,20 +127,20 @@ func GetMicrophoneContentionManager() *MicrophoneContentionManager {
if ptr != nil {
return (*MicrophoneContentionManager)(ptr)
}
// Initialize on first use
if atomic.CompareAndSwapInt32(&micContentionInitialized, 0, 1) {
manager := NewMicrophoneContentionManager(200 * time.Millisecond)
atomic.StorePointer(&globalMicContentionManager, unsafe.Pointer(manager))
return manager
}
// Another goroutine initialized it, try again
ptr = atomic.LoadPointer(&globalMicContentionManager)
if ptr != nil {
return (*MicrophoneContentionManager)(ptr)
}
// Fallback: create a new manager (should rarely happen)
return NewMicrophoneContentionManager(200 * time.Millisecond)
}
@ -155,4 +155,4 @@ func TryMicrophoneOperation() OperationResult {
func SetMicrophoneCooldown(cooldown time.Duration) {
manager := GetMicrophoneContentionManager()
manager.SetCooldown(cooldown)
}
}

View File

@ -1,115 +0,0 @@
package audio
import (
"sync/atomic"
"unsafe"
)
var (
// Use unsafe.Pointer for atomic operations instead of mutex
globalNonBlockingManager unsafe.Pointer // *NonBlockingAudioManager
)
// loadManager atomically loads the global manager
func loadManager() *NonBlockingAudioManager {
ptr := atomic.LoadPointer(&globalNonBlockingManager)
if ptr == nil {
return nil
}
return (*NonBlockingAudioManager)(ptr)
}
// storeManager atomically stores the global manager
func storeManager(manager *NonBlockingAudioManager) {
atomic.StorePointer(&globalNonBlockingManager, unsafe.Pointer(manager))
}
// compareAndSwapManager atomically compares and swaps the global manager
func compareAndSwapManager(old, new *NonBlockingAudioManager) bool {
return atomic.CompareAndSwapPointer(&globalNonBlockingManager,
unsafe.Pointer(old), unsafe.Pointer(new))
}
// StartNonBlockingAudioStreaming starts the non-blocking audio streaming system
func StartNonBlockingAudioStreaming(send func([]byte)) error {
manager := loadManager()
if manager != nil && manager.IsOutputRunning() {
return nil // Already running, this is not an error
}
if manager == nil {
newManager := NewNonBlockingAudioManager()
if !compareAndSwapManager(nil, newManager) {
// Another goroutine created manager, use it
manager = loadManager()
} else {
manager = newManager
}
}
return manager.StartAudioOutput(send)
}
// StartNonBlockingAudioInput starts the non-blocking audio input system
func StartNonBlockingAudioInput(receiveChan <-chan []byte) error {
manager := loadManager()
if manager == nil {
newManager := NewNonBlockingAudioManager()
if !compareAndSwapManager(nil, newManager) {
// Another goroutine created manager, use it
manager = loadManager()
} else {
manager = newManager
}
}
// Check if input is already running to avoid unnecessary operations
if manager.IsInputRunning() {
return nil // Already running, this is not an error
}
return manager.StartAudioInput(receiveChan)
}
// StopNonBlockingAudioStreaming stops the non-blocking audio streaming system
func StopNonBlockingAudioStreaming() {
manager := loadManager()
if manager != nil {
manager.Stop()
storeManager(nil)
}
}
// StopNonBlockingAudioInput stops only the audio input without affecting output
func StopNonBlockingAudioInput() {
manager := loadManager()
if manager != nil && manager.IsInputRunning() {
manager.StopAudioInput()
// If both input and output are stopped, recreate manager to ensure clean state
if !manager.IsRunning() {
storeManager(nil)
}
}
}
// GetNonBlockingAudioStats returns statistics from the non-blocking audio system
func GetNonBlockingAudioStats() NonBlockingAudioStats {
manager := loadManager()
if manager != nil {
return manager.GetStats()
}
return NonBlockingAudioStats{}
}
// IsNonBlockingAudioRunning returns true if the non-blocking audio system is running
func IsNonBlockingAudioRunning() bool {
manager := loadManager()
return manager != nil && manager.IsRunning()
}
// IsNonBlockingAudioInputRunning returns true if the non-blocking audio input is running
func IsNonBlockingAudioInputRunning() bool {
manager := loadManager()
return manager != nil && manager.IsInputRunning()
}

View File

@ -1,564 +0,0 @@
package audio
import (
"context"
"errors"
// "runtime" // removed: no longer directly pinning OS thread here; batching handles it
"sync"
"sync/atomic"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// NonBlockingAudioManager manages audio operations in separate worker threads
// to prevent blocking of mouse/keyboard operations
type NonBlockingAudioManager struct {
// Statistics - MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
stats NonBlockingAudioStats
// Control
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
logger *zerolog.Logger
// Audio output (capture from device, send to WebRTC)
outputSendFunc func([]byte)
outputWorkChan chan audioWorkItem
outputResultChan chan audioResult
// Audio input (receive from WebRTC, playback to device)
inputReceiveChan <-chan []byte
inputWorkChan chan audioWorkItem
inputResultChan chan audioResult
// Worker threads and flags - int32 fields grouped together
outputRunning int32
inputRunning int32
outputWorkerRunning int32
inputWorkerRunning int32
}
type audioWorkItem struct {
workType audioWorkType
data []byte
resultChan chan audioResult
}
type audioWorkType int
const (
audioWorkInit audioWorkType = iota
audioWorkReadEncode
audioWorkDecodeWrite
audioWorkClose
)
type audioResult struct {
success bool
data []byte
length int
err error
}
type NonBlockingAudioStats struct {
// int64 fields MUST be first for ARM32 alignment
OutputFramesProcessed int64
OutputFramesDropped int64
InputFramesProcessed int64
InputFramesDropped int64
WorkerErrors int64
// time.Time is int64 internally, so it's also aligned
LastProcessTime time.Time
}
// NewNonBlockingAudioManager creates a new non-blocking audio manager
func NewNonBlockingAudioManager() *NonBlockingAudioManager {
ctx, cancel := context.WithCancel(context.Background())
logger := logging.GetDefaultLogger().With().Str("component", "nonblocking-audio").Logger()
return &NonBlockingAudioManager{
ctx: ctx,
cancel: cancel,
logger: &logger,
outputWorkChan: make(chan audioWorkItem, 10), // Buffer for work items
outputResultChan: make(chan audioResult, 10), // Buffer for results
inputWorkChan: make(chan audioWorkItem, 10),
inputResultChan: make(chan audioResult, 10),
}
}
// StartAudioOutput starts non-blocking audio output (capture and encode)
func (nam *NonBlockingAudioManager) StartAudioOutput(sendFunc func([]byte)) error {
if !atomic.CompareAndSwapInt32(&nam.outputRunning, 0, 1) {
return ErrAudioAlreadyRunning
}
nam.outputSendFunc = sendFunc
// Enable batch audio processing for performance
EnableBatchAudioProcessing()
// Start the blocking worker thread
nam.wg.Add(1)
go nam.outputWorkerThread()
// Start the non-blocking coordinator
nam.wg.Add(1)
go nam.outputCoordinatorThread()
nam.logger.Info().Msg("non-blocking audio output started with batch processing")
return nil
}
// StartAudioInput starts non-blocking audio input (receive and decode)
func (nam *NonBlockingAudioManager) StartAudioInput(receiveChan <-chan []byte) error {
if !atomic.CompareAndSwapInt32(&nam.inputRunning, 0, 1) {
return ErrAudioAlreadyRunning
}
nam.inputReceiveChan = receiveChan
// Enable batch audio processing for performance
EnableBatchAudioProcessing()
// Start the blocking worker thread
nam.wg.Add(1)
go nam.inputWorkerThread()
// Start the non-blocking coordinator
nam.wg.Add(1)
go nam.inputCoordinatorThread()
nam.logger.Info().Msg("non-blocking audio input started with batch processing")
return nil
}
// outputWorkerThread handles all blocking audio output operations
func (nam *NonBlockingAudioManager) outputWorkerThread() {
defer nam.wg.Done()
defer atomic.StoreInt32(&nam.outputWorkerRunning, 0)
atomic.StoreInt32(&nam.outputWorkerRunning, 1)
nam.logger.Debug().Msg("output worker thread started")
// Initialize audio in worker thread
if err := CGOAudioInit(); err != nil {
nam.logger.Error().Err(err).Msg("failed to initialize audio in worker thread")
return
}
defer CGOAudioClose()
// Use buffer pool to avoid allocations
buf := GetAudioFrameBuffer()
defer PutAudioFrameBuffer(buf)
for {
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("output worker thread stopping")
return
case workItem := <-nam.outputWorkChan:
switch workItem.workType {
case audioWorkReadEncode:
n, err := BatchCGOAudioReadEncode(buf)
result := audioResult{
success: err == nil,
length: n,
err: err,
}
if err == nil && n > 0 {
// Get buffer from pool and copy data
resultBuf := GetAudioFrameBuffer()
copy(resultBuf[:n], buf[:n])
result.data = resultBuf[:n]
}
// Send result back (non-blocking)
select {
case workItem.resultChan <- result:
case <-nam.ctx.Done():
return
default:
// Drop result if coordinator is not ready
if result.data != nil {
PutAudioFrameBuffer(result.data)
}
atomic.AddInt64(&nam.stats.OutputFramesDropped, 1)
}
case audioWorkClose:
nam.logger.Debug().Msg("output worker received close signal")
return
}
}
}
}
// outputCoordinatorThread coordinates audio output without blocking
func (nam *NonBlockingAudioManager) outputCoordinatorThread() {
defer nam.wg.Done()
defer atomic.StoreInt32(&nam.outputRunning, 0)
nam.logger.Debug().Msg("output coordinator thread started")
ticker := time.NewTicker(20 * time.Millisecond) // Match frame timing
defer ticker.Stop()
pendingWork := false
resultChan := make(chan audioResult, 1)
for atomic.LoadInt32(&nam.outputRunning) == 1 {
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("output coordinator stopping")
return
case <-ticker.C:
// Only submit work if worker is ready and no pending work
if !pendingWork && atomic.LoadInt32(&nam.outputWorkerRunning) == 1 {
if IsAudioMuted() {
continue // Skip when muted
}
workItem := audioWorkItem{
workType: audioWorkReadEncode,
resultChan: resultChan,
}
// Submit work (non-blocking)
select {
case nam.outputWorkChan <- workItem:
pendingWork = true
default:
// Worker is busy, drop this frame
atomic.AddInt64(&nam.stats.OutputFramesDropped, 1)
}
}
case result := <-resultChan:
pendingWork = false
nam.stats.LastProcessTime = time.Now()
if result.success && result.data != nil && result.length > 0 {
// Send to WebRTC (non-blocking)
if nam.outputSendFunc != nil {
nam.outputSendFunc(result.data)
atomic.AddInt64(&nam.stats.OutputFramesProcessed, 1)
RecordFrameReceived(result.length)
}
// Return buffer to pool after use
PutAudioFrameBuffer(result.data)
} else if result.success && result.length == 0 {
// No data available - this is normal, not an error
// Just continue without logging or counting as error
} else {
atomic.AddInt64(&nam.stats.OutputFramesDropped, 1)
atomic.AddInt64(&nam.stats.WorkerErrors, 1)
if result.err != nil {
nam.logger.Warn().Err(result.err).Msg("audio output worker error")
}
// Clean up buffer if present
if result.data != nil {
PutAudioFrameBuffer(result.data)
}
RecordFrameDropped()
}
}
}
// Signal worker to close
select {
case nam.outputWorkChan <- audioWorkItem{workType: audioWorkClose}:
case <-time.After(100 * time.Millisecond):
nam.logger.Warn().Msg("timeout signaling output worker to close")
}
nam.logger.Info().Msg("output coordinator thread stopped")
}
// inputWorkerThread handles all blocking audio input operations
func (nam *NonBlockingAudioManager) inputWorkerThread() {
defer nam.wg.Done()
// Cleanup CGO resources properly to avoid double-close scenarios
// The outputWorkerThread's CGOAudioClose() will handle all cleanup
atomic.StoreInt32(&nam.inputWorkerRunning, 0)
atomic.StoreInt32(&nam.inputWorkerRunning, 1)
nam.logger.Debug().Msg("input worker thread started")
// Initialize audio playback in worker thread
if err := CGOAudioPlaybackInit(); err != nil {
nam.logger.Error().Err(err).Msg("failed to initialize audio playback in worker thread")
return
}
// Ensure CGO cleanup happens even if we exit unexpectedly
cgoInitialized := true
defer func() {
if cgoInitialized {
nam.logger.Debug().Msg("cleaning up CGO audio playback")
// Add extra safety: ensure no more CGO calls can happen
atomic.StoreInt32(&nam.inputWorkerRunning, 0)
// Note: Don't call CGOAudioPlaybackClose() here to avoid double-close
// The outputWorkerThread's CGOAudioClose() will handle all cleanup
}
}()
for {
// If coordinator has stopped, exit worker loop
if atomic.LoadInt32(&nam.inputRunning) == 0 {
return
}
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("input worker thread stopping due to context cancellation")
return
case workItem := <-nam.inputWorkChan:
switch workItem.workType {
case audioWorkDecodeWrite:
// Check if we're still supposed to be running before processing
if atomic.LoadInt32(&nam.inputWorkerRunning) == 0 || atomic.LoadInt32(&nam.inputRunning) == 0 {
nam.logger.Debug().Msg("input worker stopping, ignoring decode work")
// Do not send to resultChan; coordinator may have exited
return
}
// Validate input data before CGO call
if workItem.data == nil || len(workItem.data) == 0 {
result := audioResult{
success: false,
err: errors.New("invalid audio data"),
}
// Check if coordinator is still running before sending result
if atomic.LoadInt32(&nam.inputRunning) == 1 {
select {
case workItem.resultChan <- result:
case <-nam.ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Timeout - coordinator may have stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
} else {
// Coordinator has stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
continue
}
// Perform blocking CGO operation with panic recovery
var result audioResult
func() {
defer func() {
if r := recover(); r != nil {
nam.logger.Error().Interface("panic", r).Msg("CGO decode write panic recovered")
result = audioResult{
success: false,
err: errors.New("CGO decode write panic"),
}
}
}()
// Double-check we're still running before CGO call
if atomic.LoadInt32(&nam.inputWorkerRunning) == 0 {
result = audioResult{success: false, err: errors.New("worker shutting down")}
return
}
n, err := BatchCGOAudioDecodeWrite(workItem.data)
result = audioResult{
success: err == nil,
length: n,
err: err,
}
}()
// Send result back (non-blocking) - check if coordinator is still running
if atomic.LoadInt32(&nam.inputRunning) == 1 {
select {
case workItem.resultChan <- result:
case <-nam.ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Timeout - coordinator may have stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
} else {
// Coordinator has stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
case audioWorkClose:
nam.logger.Debug().Msg("input worker received close signal")
return
}
}
}
}
// inputCoordinatorThread coordinates audio input without blocking
func (nam *NonBlockingAudioManager) inputCoordinatorThread() {
defer nam.wg.Done()
defer atomic.StoreInt32(&nam.inputRunning, 0)
nam.logger.Debug().Msg("input coordinator thread started")
resultChan := make(chan audioResult, 1)
// Do not close resultChan to avoid races with worker sends during shutdown
for atomic.LoadInt32(&nam.inputRunning) == 1 {
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("input coordinator stopping")
return
case frame := <-nam.inputReceiveChan:
if len(frame) == 0 {
continue
}
// Submit work to worker (non-blocking)
if atomic.LoadInt32(&nam.inputWorkerRunning) == 1 {
workItem := audioWorkItem{
workType: audioWorkDecodeWrite,
data: frame,
resultChan: resultChan,
}
select {
case nam.inputWorkChan <- workItem:
// Wait for result with timeout and context cancellation
select {
case result := <-resultChan:
if result.success {
atomic.AddInt64(&nam.stats.InputFramesProcessed, 1)
} else {
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
atomic.AddInt64(&nam.stats.WorkerErrors, 1)
if result.err != nil {
nam.logger.Warn().Err(result.err).Msg("audio input worker error")
}
}
case <-nam.ctx.Done():
nam.logger.Debug().Msg("input coordinator stopping during result wait")
return
case <-time.After(50 * time.Millisecond):
// Timeout waiting for result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
nam.logger.Warn().Msg("timeout waiting for input worker result")
// Drain any pending result to prevent worker blocking
select {
case <-resultChan:
default:
}
}
default:
// Worker is busy, drop this frame
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
}
case <-time.After(250 * time.Millisecond):
// Periodic timeout to prevent blocking
continue
}
}
// Avoid sending close signals or touching channels here; inputRunning=0 will stop worker via checks
nam.logger.Info().Msg("input coordinator thread stopped")
}
// Stop stops all audio operations
func (nam *NonBlockingAudioManager) Stop() {
nam.logger.Info().Msg("stopping non-blocking audio manager")
// Signal all threads to stop
nam.cancel()
// Stop coordinators
atomic.StoreInt32(&nam.outputRunning, 0)
atomic.StoreInt32(&nam.inputRunning, 0)
// Wait for all goroutines to finish
nam.wg.Wait()
// Disable batch processing to free resources
DisableBatchAudioProcessing()
nam.logger.Info().Msg("non-blocking audio manager stopped")
}
// StopAudioInput stops only the audio input operations
func (nam *NonBlockingAudioManager) StopAudioInput() {
nam.logger.Info().Msg("stopping audio input")
// Stop only the input coordinator
atomic.StoreInt32(&nam.inputRunning, 0)
// Drain the receive channel to prevent blocking senders
go func() {
for {
select {
case <-nam.inputReceiveChan:
// Drain any remaining frames
case <-time.After(100 * time.Millisecond):
return
}
}
}()
// Wait for the worker to actually stop to prevent race conditions
timeout := time.After(2 * time.Second)
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-timeout:
nam.logger.Warn().Msg("timeout waiting for input worker to stop")
return
case <-ticker.C:
if atomic.LoadInt32(&nam.inputWorkerRunning) == 0 {
nam.logger.Info().Msg("audio input stopped successfully")
// Close ALSA playback resources now that input worker has stopped
CGOAudioPlaybackClose()
return
}
}
}
}
// GetStats returns current statistics
func (nam *NonBlockingAudioManager) GetStats() NonBlockingAudioStats {
return NonBlockingAudioStats{
OutputFramesProcessed: atomic.LoadInt64(&nam.stats.OutputFramesProcessed),
OutputFramesDropped: atomic.LoadInt64(&nam.stats.OutputFramesDropped),
InputFramesProcessed: atomic.LoadInt64(&nam.stats.InputFramesProcessed),
InputFramesDropped: atomic.LoadInt64(&nam.stats.InputFramesDropped),
WorkerErrors: atomic.LoadInt64(&nam.stats.WorkerErrors),
LastProcessTime: nam.stats.LastProcessTime,
}
}
// IsRunning returns true if any audio operations are running
func (nam *NonBlockingAudioManager) IsRunning() bool {
return atomic.LoadInt32(&nam.outputRunning) == 1 || atomic.LoadInt32(&nam.inputRunning) == 1
}
// IsInputRunning returns true if audio input is running
func (nam *NonBlockingAudioManager) IsInputRunning() bool {
return atomic.LoadInt32(&nam.inputRunning) == 1
}
// IsOutputRunning returns true if audio output is running
func (nam *NonBlockingAudioManager) IsOutputRunning() bool {
return atomic.LoadInt32(&nam.outputRunning) == 1
}

View File

@ -0,0 +1,94 @@
package audio
import (
"context"
"sync/atomic"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
var (
outputStreamingRunning int32
outputStreamingCancel context.CancelFunc
outputStreamingLogger *zerolog.Logger
)
func getOutputStreamingLogger() *zerolog.Logger {
if outputStreamingLogger == nil {
logger := logging.GetDefaultLogger().With().Str("component", "audio-output").Logger()
outputStreamingLogger = &logger
}
return outputStreamingLogger
}
// StartAudioOutputStreaming starts audio output streaming (capturing system audio)
func StartAudioOutputStreaming(send func([]byte)) error {
if !atomic.CompareAndSwapInt32(&outputStreamingRunning, 0, 1) {
return ErrAudioAlreadyRunning
}
// Initialize CGO audio capture
if err := CGOAudioInit(); err != nil {
atomic.StoreInt32(&outputStreamingRunning, 0)
return err
}
ctx, cancel := context.WithCancel(context.Background())
outputStreamingCancel = cancel
// Start audio capture loop
go func() {
defer func() {
CGOAudioClose()
atomic.StoreInt32(&outputStreamingRunning, 0)
getOutputStreamingLogger().Info().Msg("Audio output streaming stopped")
}()
getOutputStreamingLogger().Info().Msg("Audio output streaming started")
buffer := make([]byte, MaxAudioFrameSize)
for {
select {
case <-ctx.Done():
return
default:
// Capture audio frame
n, err := CGOAudioReadEncode(buffer)
if err != nil {
getOutputStreamingLogger().Warn().Err(err).Msg("Failed to read/encode audio")
continue
}
if n > 0 {
// Send frame to callback
frame := make([]byte, n)
copy(frame, buffer[:n])
send(frame)
RecordFrameReceived(n)
}
// Small delay to prevent busy waiting
time.Sleep(10 * time.Millisecond)
}
}
}()
return nil
}
// StopAudioOutputStreaming stops audio output streaming
func StopAudioOutputStreaming() {
if atomic.LoadInt32(&outputStreamingRunning) == 0 {
return
}
if outputStreamingCancel != nil {
outputStreamingCancel()
outputStreamingCancel = nil
}
// Wait for streaming to stop
for atomic.LoadInt32(&outputStreamingRunning) == 1 {
time.Sleep(10 * time.Millisecond)
}
}

View File

@ -0,0 +1,263 @@
package audio
import (
"bufio"
"fmt"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// ProcessMetrics represents CPU and memory usage metrics for a process
type ProcessMetrics struct {
PID int `json:"pid"`
CPUPercent float64 `json:"cpu_percent"`
MemoryRSS int64 `json:"memory_rss_bytes"`
MemoryVMS int64 `json:"memory_vms_bytes"`
MemoryPercent float64 `json:"memory_percent"`
Timestamp time.Time `json:"timestamp"`
ProcessName string `json:"process_name"`
}
// ProcessMonitor monitors CPU and memory usage of processes
type ProcessMonitor struct {
logger zerolog.Logger
mutex sync.RWMutex
monitoredPIDs map[int]*processState
running bool
stopChan chan struct{}
metricsChan chan ProcessMetrics
updateInterval time.Duration
}
// processState tracks the state needed for CPU calculation
type processState struct {
name string
lastCPUTime int64
lastSysTime int64
lastUserTime int64
lastSample time.Time
}
// NewProcessMonitor creates a new process monitor
func NewProcessMonitor() *ProcessMonitor {
return &ProcessMonitor{
logger: logging.GetDefaultLogger().With().Str("component", "process-monitor").Logger(),
monitoredPIDs: make(map[int]*processState),
stopChan: make(chan struct{}),
metricsChan: make(chan ProcessMetrics, 100),
updateInterval: 2 * time.Second, // Update every 2 seconds
}
}
// Start begins monitoring processes
func (pm *ProcessMonitor) Start() {
pm.mutex.Lock()
defer pm.mutex.Unlock()
if pm.running {
return
}
pm.running = true
go pm.monitorLoop()
pm.logger.Info().Msg("Process monitor started")
}
// Stop stops monitoring processes
func (pm *ProcessMonitor) Stop() {
pm.mutex.Lock()
defer pm.mutex.Unlock()
if !pm.running {
return
}
pm.running = false
close(pm.stopChan)
pm.logger.Info().Msg("Process monitor stopped")
}
// AddProcess adds a process to monitor
func (pm *ProcessMonitor) AddProcess(pid int, name string) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
pm.monitoredPIDs[pid] = &processState{
name: name,
lastSample: time.Now(),
}
pm.logger.Info().Int("pid", pid).Str("name", name).Msg("Added process to monitor")
}
// RemoveProcess removes a process from monitoring
func (pm *ProcessMonitor) RemoveProcess(pid int) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
delete(pm.monitoredPIDs, pid)
pm.logger.Info().Int("pid", pid).Msg("Removed process from monitor")
}
// GetMetricsChan returns the channel for receiving metrics
func (pm *ProcessMonitor) GetMetricsChan() <-chan ProcessMetrics {
return pm.metricsChan
}
// GetCurrentMetrics returns current metrics for all monitored processes
func (pm *ProcessMonitor) GetCurrentMetrics() []ProcessMetrics {
pm.mutex.RLock()
defer pm.mutex.RUnlock()
var metrics []ProcessMetrics
for pid, state := range pm.monitoredPIDs {
if metric, err := pm.collectMetrics(pid, state); err == nil {
metrics = append(metrics, metric)
}
}
return metrics
}
// monitorLoop is the main monitoring loop
func (pm *ProcessMonitor) monitorLoop() {
ticker := time.NewTicker(pm.updateInterval)
defer ticker.Stop()
for {
select {
case <-pm.stopChan:
return
case <-ticker.C:
pm.collectAllMetrics()
}
}
}
// collectAllMetrics collects metrics for all monitored processes
func (pm *ProcessMonitor) collectAllMetrics() {
pm.mutex.RLock()
pids := make(map[int]*processState)
for pid, state := range pm.monitoredPIDs {
pids[pid] = state
}
pm.mutex.RUnlock()
for pid, state := range pids {
if metric, err := pm.collectMetrics(pid, state); err == nil {
select {
case pm.metricsChan <- metric:
default:
// Channel full, skip this metric
}
} else {
// Process might have died, remove it
pm.RemoveProcess(pid)
}
}
}
// collectMetrics collects metrics for a specific process
func (pm *ProcessMonitor) collectMetrics(pid int, state *processState) (ProcessMetrics, error) {
now := time.Now()
metric := ProcessMetrics{
PID: pid,
Timestamp: now,
ProcessName: state.name,
}
// Read /proc/[pid]/stat for CPU and memory info
statPath := fmt.Sprintf("/proc/%d/stat", pid)
statData, err := os.ReadFile(statPath)
if err != nil {
return metric, fmt.Errorf("failed to read stat file: %w", err)
}
// Parse stat file
fields := strings.Fields(string(statData))
if len(fields) < 24 {
return metric, fmt.Errorf("invalid stat file format")
}
// Extract CPU times (fields 13, 14 are utime, stime in clock ticks)
utime, _ := strconv.ParseInt(fields[13], 10, 64)
stime, _ := strconv.ParseInt(fields[14], 10, 64)
totalCPUTime := utime + stime
// Extract memory info (field 22 is vsize, field 23 is rss in pages)
vsize, _ := strconv.ParseInt(fields[22], 10, 64)
rss, _ := strconv.ParseInt(fields[23], 10, 64)
// Convert RSS from pages to bytes (assuming 4KB pages)
pageSize := int64(4096)
metric.MemoryRSS = rss * pageSize
metric.MemoryVMS = vsize
// Calculate CPU percentage
if !state.lastSample.IsZero() {
timeDelta := now.Sub(state.lastSample).Seconds()
cpuDelta := float64(totalCPUTime - state.lastCPUTime)
// Convert from clock ticks to seconds (assuming 100 Hz)
clockTicks := 100.0
cpuSeconds := cpuDelta / clockTicks
if timeDelta > 0 {
metric.CPUPercent = (cpuSeconds / timeDelta) * 100.0
}
}
// Calculate memory percentage (RSS / total system memory)
if totalMem := pm.getTotalMemory(); totalMem > 0 {
metric.MemoryPercent = float64(metric.MemoryRSS) / float64(totalMem) * 100.0
}
// Update state for next calculation
state.lastCPUTime = totalCPUTime
state.lastUserTime = utime
state.lastSysTime = stime
state.lastSample = now
return metric, nil
}
// getTotalMemory returns total system memory in bytes
func (pm *ProcessMonitor) getTotalMemory() int64 {
file, err := os.Open("/proc/meminfo")
if err != nil {
return 0
}
defer file.Close()
scanner := bufio.NewScanner(file)
for scanner.Scan() {
line := scanner.Text()
if strings.HasPrefix(line, "MemTotal:") {
fields := strings.Fields(line)
if len(fields) >= 2 {
if kb, err := strconv.ParseInt(fields[1], 10, 64); err == nil {
return kb * 1024 // Convert KB to bytes
}
}
break
}
}
return 0
}
// Global process monitor instance
var globalProcessMonitor *ProcessMonitor
var processMonitorOnce sync.Once
// GetProcessMonitor returns the global process monitor instance
func GetProcessMonitor() *ProcessMonitor {
processMonitorOnce.Do(func() {
globalProcessMonitor = NewProcessMonitor()
globalProcessMonitor.Start()
})
return globalProcessMonitor
}

196
internal/audio/relay.go Normal file
View File

@ -0,0 +1,196 @@
package audio
import (
"context"
"sync"
"github.com/jetkvm/kvm/internal/logging"
"github.com/pion/webrtc/v4/pkg/media"
"github.com/rs/zerolog"
)
// AudioRelay handles forwarding audio frames from the audio server subprocess
// to WebRTC without any CGO audio processing. This runs in the main process.
type AudioRelay struct {
client *AudioClient
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
logger *zerolog.Logger
running bool
mutex sync.RWMutex
// WebRTC integration
audioTrack AudioTrackWriter
config AudioConfig
muted bool
// Statistics
framesRelayed int64
framesDropped int64
}
// AudioTrackWriter interface for WebRTC audio track
type AudioTrackWriter interface {
WriteSample(sample media.Sample) error
}
// NewAudioRelay creates a new audio relay for the main process
func NewAudioRelay() *AudioRelay {
ctx, cancel := context.WithCancel(context.Background())
logger := logging.GetDefaultLogger().With().Str("component", "audio-relay").Logger()
return &AudioRelay{
ctx: ctx,
cancel: cancel,
logger: &logger,
}
}
// Start begins the audio relay process
func (r *AudioRelay) Start(audioTrack AudioTrackWriter, config AudioConfig) error {
r.mutex.Lock()
defer r.mutex.Unlock()
if r.running {
return nil // Already running
}
// Create audio client to connect to subprocess
client, err := NewAudioClient()
if err != nil {
return err
}
r.client = client
r.audioTrack = audioTrack
r.config = config
// Start relay goroutine
r.wg.Add(1)
go r.relayLoop()
r.running = true
r.logger.Info().Msg("Audio relay started")
return nil
}
// Stop stops the audio relay
func (r *AudioRelay) Stop() {
r.mutex.Lock()
defer r.mutex.Unlock()
if !r.running {
return
}
r.cancel()
r.wg.Wait()
if r.client != nil {
r.client.Close()
r.client = nil
}
r.running = false
r.logger.Info().Msg("Audio relay stopped")
}
// SetMuted sets the mute state
func (r *AudioRelay) SetMuted(muted bool) {
r.mutex.Lock()
defer r.mutex.Unlock()
r.muted = muted
}
// IsMuted returns the current mute state (checks both relay and global mute)
func (r *AudioRelay) IsMuted() bool {
r.mutex.RLock()
defer r.mutex.RUnlock()
return r.muted || IsAudioMuted()
}
// GetStats returns relay statistics
func (r *AudioRelay) GetStats() (framesRelayed, framesDropped int64) {
r.mutex.RLock()
defer r.mutex.RUnlock()
return r.framesRelayed, r.framesDropped
}
// UpdateTrack updates the WebRTC audio track for the relay
func (r *AudioRelay) UpdateTrack(audioTrack AudioTrackWriter) {
r.mutex.Lock()
defer r.mutex.Unlock()
r.audioTrack = audioTrack
}
// relayLoop is the main relay loop that forwards frames from subprocess to WebRTC
func (r *AudioRelay) relayLoop() {
defer r.wg.Done()
r.logger.Debug().Msg("Audio relay loop started")
for {
select {
case <-r.ctx.Done():
r.logger.Debug().Msg("Audio relay loop stopping")
return
default:
// Receive frame from audio server subprocess
frame, err := r.client.ReceiveFrame()
if err != nil {
r.logger.Error().Err(err).Msg("Failed to receive audio frame")
r.incrementDropped()
continue
}
// Forward frame to WebRTC
if err := r.forwardToWebRTC(frame); err != nil {
r.logger.Warn().Err(err).Msg("Failed to forward frame to WebRTC")
r.incrementDropped()
} else {
r.incrementRelayed()
}
}
}
}
// forwardToWebRTC forwards a frame to the WebRTC audio track
func (r *AudioRelay) forwardToWebRTC(frame []byte) error {
r.mutex.RLock()
audioTrack := r.audioTrack
config := r.config
muted := r.muted
r.mutex.RUnlock()
if audioTrack == nil {
return nil // No audio track available
}
// Prepare sample data
var sampleData []byte
if muted {
// Send silence when muted
sampleData = make([]byte, len(frame))
} else {
sampleData = frame
}
// Write sample to WebRTC track
return audioTrack.WriteSample(media.Sample{
Data: sampleData,
Duration: config.FrameSize,
})
}
// incrementRelayed atomically increments the relayed frames counter
func (r *AudioRelay) incrementRelayed() {
r.mutex.Lock()
r.framesRelayed++
r.mutex.Unlock()
}
// incrementDropped atomically increments the dropped frames counter
func (r *AudioRelay) incrementDropped() {
r.mutex.Lock()
r.framesDropped++
r.mutex.Unlock()
}

109
internal/audio/relay_api.go Normal file
View File

@ -0,0 +1,109 @@
package audio
import (
"sync"
)
// Global relay instance for the main process
var (
globalRelay *AudioRelay
relayMutex sync.RWMutex
)
// StartAudioRelay starts the audio relay system for the main process
// This replaces the CGO-based audio system when running in main process mode
// audioTrack can be nil initially and updated later via UpdateAudioRelayTrack
func StartAudioRelay(audioTrack AudioTrackWriter) error {
relayMutex.Lock()
defer relayMutex.Unlock()
if globalRelay != nil {
return nil // Already running
}
// Create new relay
relay := NewAudioRelay()
// Get current audio config
config := GetAudioConfig()
// Start the relay (audioTrack can be nil initially)
if err := relay.Start(audioTrack, config); err != nil {
return err
}
globalRelay = relay
return nil
}
// StopAudioRelay stops the audio relay system
func StopAudioRelay() {
relayMutex.Lock()
defer relayMutex.Unlock()
if globalRelay != nil {
globalRelay.Stop()
globalRelay = nil
}
}
// SetAudioRelayMuted sets the mute state for the audio relay
func SetAudioRelayMuted(muted bool) {
relayMutex.RLock()
defer relayMutex.RUnlock()
if globalRelay != nil {
globalRelay.SetMuted(muted)
}
}
// IsAudioRelayMuted returns the current mute state of the audio relay
func IsAudioRelayMuted() bool {
relayMutex.RLock()
defer relayMutex.RUnlock()
if globalRelay != nil {
return globalRelay.IsMuted()
}
return false
}
// GetAudioRelayStats returns statistics from the audio relay
func GetAudioRelayStats() (framesRelayed, framesDropped int64) {
relayMutex.RLock()
defer relayMutex.RUnlock()
if globalRelay != nil {
return globalRelay.GetStats()
}
return 0, 0
}
// IsAudioRelayRunning returns whether the audio relay is currently running
func IsAudioRelayRunning() bool {
relayMutex.RLock()
defer relayMutex.RUnlock()
return globalRelay != nil
}
// UpdateAudioRelayTrack updates the WebRTC audio track for the relay
func UpdateAudioRelayTrack(audioTrack AudioTrackWriter) error {
relayMutex.Lock()
defer relayMutex.Unlock()
if globalRelay == nil {
// No relay running, start one with the provided track
relay := NewAudioRelay()
config := GetAudioConfig()
if err := relay.Start(audioTrack, config); err != nil {
return err
}
globalRelay = relay
return nil
}
// Update the track in the existing relay
globalRelay.UpdateTrack(audioTrack)
return nil
}

View File

@ -0,0 +1,429 @@
//go:build cgo
// +build cgo
package audio
import (
"context"
"fmt"
"os"
"os/exec"
"sync"
"sync/atomic"
"syscall"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
const (
// Maximum number of restart attempts within the restart window
maxRestartAttempts = 5
// Time window for counting restart attempts
restartWindow = 5 * time.Minute
// Delay between restart attempts
restartDelay = 2 * time.Second
// Maximum restart delay (exponential backoff)
maxRestartDelay = 30 * time.Second
)
// AudioServerSupervisor manages the audio server subprocess lifecycle
type AudioServerSupervisor struct {
ctx context.Context
cancel context.CancelFunc
logger *zerolog.Logger
mutex sync.RWMutex
running int32
// Process management
cmd *exec.Cmd
processPID int
// Restart management
restartAttempts []time.Time
lastExitCode int
lastExitTime time.Time
// Channels for coordination
processDone chan struct{}
stopChan chan struct{}
// Process monitoring
processMonitor *ProcessMonitor
// Callbacks
onProcessStart func(pid int)
onProcessExit func(pid int, exitCode int, crashed bool)
onRestart func(attempt int, delay time.Duration)
}
// NewAudioServerSupervisor creates a new audio server supervisor
func NewAudioServerSupervisor() *AudioServerSupervisor {
ctx, cancel := context.WithCancel(context.Background())
logger := logging.GetDefaultLogger().With().Str("component", "audio-supervisor").Logger()
return &AudioServerSupervisor{
ctx: ctx,
cancel: cancel,
logger: &logger,
processDone: make(chan struct{}),
stopChan: make(chan struct{}),
processMonitor: GetProcessMonitor(),
}
}
// SetCallbacks sets optional callbacks for process lifecycle events
func (s *AudioServerSupervisor) SetCallbacks(
onStart func(pid int),
onExit func(pid int, exitCode int, crashed bool),
onRestart func(attempt int, delay time.Duration),
) {
s.mutex.Lock()
defer s.mutex.Unlock()
s.onProcessStart = onStart
s.onProcessExit = onExit
s.onRestart = onRestart
}
// Start begins supervising the audio server process
func (s *AudioServerSupervisor) Start() error {
if !atomic.CompareAndSwapInt32(&s.running, 0, 1) {
return fmt.Errorf("supervisor already running")
}
s.logger.Info().Msg("starting audio server supervisor")
// Start the supervision loop
go s.supervisionLoop()
return nil
}
// Stop gracefully stops the audio server and supervisor
func (s *AudioServerSupervisor) Stop() error {
if !atomic.CompareAndSwapInt32(&s.running, 1, 0) {
return nil // Already stopped
}
s.logger.Info().Msg("stopping audio server supervisor")
// Signal stop and wait for cleanup
close(s.stopChan)
s.cancel()
// Wait for process to exit
select {
case <-s.processDone:
s.logger.Info().Msg("audio server process stopped gracefully")
case <-time.After(10 * time.Second):
s.logger.Warn().Msg("audio server process did not stop gracefully, forcing termination")
s.forceKillProcess()
}
return nil
}
// IsRunning returns true if the supervisor is running
func (s *AudioServerSupervisor) IsRunning() bool {
return atomic.LoadInt32(&s.running) == 1
}
// GetProcessPID returns the current process PID (0 if not running)
func (s *AudioServerSupervisor) GetProcessPID() int {
s.mutex.RLock()
defer s.mutex.RUnlock()
return s.processPID
}
// GetLastExitInfo returns information about the last process exit
func (s *AudioServerSupervisor) GetLastExitInfo() (exitCode int, exitTime time.Time) {
s.mutex.RLock()
defer s.mutex.RUnlock()
return s.lastExitCode, s.lastExitTime
}
// GetProcessMetrics returns current process metrics if the process is running
func (s *AudioServerSupervisor) GetProcessMetrics() *ProcessMetrics {
s.mutex.RLock()
pid := s.processPID
s.mutex.RUnlock()
if pid == 0 {
return nil
}
metrics := s.processMonitor.GetCurrentMetrics()
for _, metric := range metrics {
if metric.PID == pid {
return &metric
}
}
return nil
}
// supervisionLoop is the main supervision loop
func (s *AudioServerSupervisor) supervisionLoop() {
defer func() {
close(s.processDone)
s.logger.Info().Msg("audio server supervision ended")
}()
for atomic.LoadInt32(&s.running) == 1 {
select {
case <-s.stopChan:
s.logger.Info().Msg("received stop signal")
s.terminateProcess()
return
case <-s.ctx.Done():
s.logger.Info().Msg("context cancelled")
s.terminateProcess()
return
default:
// Start or restart the process
if err := s.startProcess(); err != nil {
s.logger.Error().Err(err).Msg("failed to start audio server process")
// Check if we should attempt restart
if !s.shouldRestart() {
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
return
}
delay := s.calculateRestartDelay()
s.logger.Warn().Dur("delay", delay).Msg("retrying process start after delay")
if s.onRestart != nil {
s.onRestart(len(s.restartAttempts), delay)
}
select {
case <-time.After(delay):
case <-s.stopChan:
return
case <-s.ctx.Done():
return
}
continue
}
// Wait for process to exit
s.waitForProcessExit()
// Check if we should restart
if !s.shouldRestart() {
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
return
}
// Calculate restart delay
delay := s.calculateRestartDelay()
s.logger.Info().Dur("delay", delay).Msg("restarting audio server process after delay")
if s.onRestart != nil {
s.onRestart(len(s.restartAttempts), delay)
}
// Wait for restart delay
select {
case <-time.After(delay):
case <-s.stopChan:
return
case <-s.ctx.Done():
return
}
}
}
}
// startProcess starts the audio server process
func (s *AudioServerSupervisor) startProcess() error {
execPath, err := os.Executable()
if err != nil {
return fmt.Errorf("failed to get executable path: %w", err)
}
s.mutex.Lock()
defer s.mutex.Unlock()
// Create new command
s.cmd = exec.CommandContext(s.ctx, execPath, "--audio-server")
s.cmd.Stdout = os.Stdout
s.cmd.Stderr = os.Stderr
// Start the process
if err := s.cmd.Start(); err != nil {
return fmt.Errorf("failed to start process: %w", err)
}
s.processPID = s.cmd.Process.Pid
s.logger.Info().Int("pid", s.processPID).Msg("audio server process started")
// Add process to monitoring
s.processMonitor.AddProcess(s.processPID, "audio-server")
if s.onProcessStart != nil {
s.onProcessStart(s.processPID)
}
return nil
}
// waitForProcessExit waits for the current process to exit and logs the result
func (s *AudioServerSupervisor) waitForProcessExit() {
s.mutex.RLock()
cmd := s.cmd
pid := s.processPID
s.mutex.RUnlock()
if cmd == nil {
return
}
// Wait for process to exit
err := cmd.Wait()
s.mutex.Lock()
s.lastExitTime = time.Now()
s.processPID = 0
var exitCode int
var crashed bool
if err != nil {
if exitError, ok := err.(*exec.ExitError); ok {
exitCode = exitError.ExitCode()
crashed = exitCode != 0
} else {
// Process was killed or other error
exitCode = -1
crashed = true
}
} else {
exitCode = 0
crashed = false
}
s.lastExitCode = exitCode
s.mutex.Unlock()
// Remove process from monitoring
s.processMonitor.RemoveProcess(pid)
if crashed {
s.logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msg("audio server process crashed")
s.recordRestartAttempt()
} else {
s.logger.Info().Int("pid", pid).Msg("audio server process exited gracefully")
}
if s.onProcessExit != nil {
s.onProcessExit(pid, exitCode, crashed)
}
}
// terminateProcess gracefully terminates the current process
func (s *AudioServerSupervisor) terminateProcess() {
s.mutex.RLock()
cmd := s.cmd
pid := s.processPID
s.mutex.RUnlock()
if cmd == nil || cmd.Process == nil {
return
}
s.logger.Info().Int("pid", pid).Msg("terminating audio server process")
// Send SIGTERM first
if err := cmd.Process.Signal(syscall.SIGTERM); err != nil {
s.logger.Warn().Err(err).Int("pid", pid).Msg("failed to send SIGTERM")
}
// Wait for graceful shutdown
done := make(chan struct{})
go func() {
_ = cmd.Wait()
close(done)
}()
select {
case <-done:
s.logger.Info().Int("pid", pid).Msg("audio server process terminated gracefully")
case <-time.After(5 * time.Second):
s.logger.Warn().Int("pid", pid).Msg("process did not terminate gracefully, sending SIGKILL")
s.forceKillProcess()
}
}
// forceKillProcess forcefully kills the current process
func (s *AudioServerSupervisor) forceKillProcess() {
s.mutex.RLock()
cmd := s.cmd
pid := s.processPID
s.mutex.RUnlock()
if cmd == nil || cmd.Process == nil {
return
}
s.logger.Warn().Int("pid", pid).Msg("force killing audio server process")
if err := cmd.Process.Kill(); err != nil {
s.logger.Error().Err(err).Int("pid", pid).Msg("failed to kill process")
}
}
// shouldRestart determines if the process should be restarted
func (s *AudioServerSupervisor) shouldRestart() bool {
if atomic.LoadInt32(&s.running) == 0 {
return false // Supervisor is stopping
}
s.mutex.RLock()
defer s.mutex.RUnlock()
// Clean up old restart attempts outside the window
now := time.Now()
var recentAttempts []time.Time
for _, attempt := range s.restartAttempts {
if now.Sub(attempt) < restartWindow {
recentAttempts = append(recentAttempts, attempt)
}
}
s.restartAttempts = recentAttempts
return len(s.restartAttempts) < maxRestartAttempts
}
// recordRestartAttempt records a restart attempt
func (s *AudioServerSupervisor) recordRestartAttempt() {
s.mutex.Lock()
defer s.mutex.Unlock()
s.restartAttempts = append(s.restartAttempts, time.Now())
}
// calculateRestartDelay calculates the delay before next restart attempt
func (s *AudioServerSupervisor) calculateRestartDelay() time.Duration {
s.mutex.RLock()
defer s.mutex.RUnlock()
// Exponential backoff based on recent restart attempts
attempts := len(s.restartAttempts)
if attempts == 0 {
return restartDelay
}
// Calculate exponential backoff: 2^attempts * base delay
delay := restartDelay
for i := 0; i < attempts && delay < maxRestartDelay; i++ {
delay *= 2
}
if delay > maxRestartDelay {
delay = maxRestartDelay
}
return delay
}

162
main.go
View File

@ -2,6 +2,7 @@ package kvm
import (
"context"
"fmt"
"net/http"
"os"
"os/signal"
@ -10,12 +11,127 @@ import (
"github.com/gwatts/rootcerts"
"github.com/jetkvm/kvm/internal/audio"
"github.com/pion/webrtc/v4/pkg/media"
)
var appCtx context.Context
var (
appCtx context.Context
isAudioServer bool
audioProcessDone chan struct{}
audioSupervisor *audio.AudioServerSupervisor
)
func Main() {
func runAudioServer() {
logger.Info().Msg("Starting audio server subprocess")
// Create audio server
server, err := audio.NewAudioServer()
if err != nil {
logger.Error().Err(err).Msg("failed to create audio server")
os.Exit(1)
}
defer server.Close()
// Start accepting connections
if err := server.Start(); err != nil {
logger.Error().Err(err).Msg("failed to start audio server")
os.Exit(1)
}
// Initialize audio processing
err = audio.StartNonBlockingAudioStreaming(func(frame []byte) {
if err := server.SendFrame(frame); err != nil {
logger.Warn().Err(err).Msg("failed to send audio frame")
audio.RecordFrameDropped()
}
})
if err != nil {
logger.Error().Err(err).Msg("failed to start audio processing")
os.Exit(1)
}
// Wait for termination signal
sigs := make(chan os.Signal, 1)
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
<-sigs
// Cleanup
audio.StopNonBlockingAudioStreaming()
logger.Info().Msg("Audio server subprocess stopped")
}
func startAudioSubprocess() error {
// Create audio server supervisor
audioSupervisor = audio.NewAudioServerSupervisor()
// Set up callbacks for process lifecycle events
audioSupervisor.SetCallbacks(
// onProcessStart
func(pid int) {
logger.Info().Int("pid", pid).Msg("audio server process started")
// Start audio relay system for main process without a track initially
// The track will be updated when a WebRTC session is created
if err := audio.StartAudioRelay(nil); err != nil {
logger.Error().Err(err).Msg("failed to start audio relay")
}
},
// onProcessExit
func(pid int, exitCode int, crashed bool) {
if crashed {
logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msg("audio server process crashed")
} else {
logger.Info().Int("pid", pid).Msg("audio server process exited gracefully")
}
// Stop audio relay when process exits
audio.StopAudioRelay()
},
// onRestart
func(attempt int, delay time.Duration) {
logger.Warn().Int("attempt", attempt).Dur("delay", delay).Msg("restarting audio server process")
},
)
// Start the supervisor
if err := audioSupervisor.Start(); err != nil {
return fmt.Errorf("failed to start audio supervisor: %w", err)
}
// Monitor supervisor and handle cleanup
go func() {
defer close(audioProcessDone)
// Wait for supervisor to stop
for audioSupervisor.IsRunning() {
time.Sleep(100 * time.Millisecond)
}
logger.Info().Msg("audio supervisor stopped")
}()
return nil
}
func Main(audioServer bool) {
// Initialize channel and set audio server flag
isAudioServer = audioServer
audioProcessDone = make(chan struct{})
// If running as audio server, only initialize audio processing
if isAudioServer {
runAudioServer()
return
}
// If running as audio input server, only initialize audio input processing
if audio.IsAudioInputServerProcess() {
err := audio.RunAudioInputServer()
if err != nil {
logger.Error().Err(err).Msg("audio input server failed")
os.Exit(1)
}
return
}
LoadConfig()
var cancel context.CancelFunc
@ -80,30 +196,10 @@ func Main() {
// initialize usb gadget
initUsbGadget()
// Start non-blocking audio streaming and deliver Opus frames to WebRTC
err = audio.StartNonBlockingAudioStreaming(func(frame []byte) {
// Deliver Opus frame to WebRTC audio track if session is active
if currentSession != nil {
config := audio.GetAudioConfig()
var sampleData []byte
if audio.IsAudioMuted() {
sampleData = make([]byte, len(frame)) // silence
} else {
sampleData = frame
}
if err := currentSession.AudioTrack.WriteSample(media.Sample{
Data: sampleData,
Duration: config.FrameSize,
}); err != nil {
logger.Warn().Err(err).Msg("error writing audio sample")
audio.RecordFrameDropped()
}
} else {
audio.RecordFrameDropped()
}
})
// Start audio subprocess
err = startAudioSubprocess()
if err != nil {
logger.Warn().Err(err).Msg("failed to start non-blocking audio streaming")
logger.Warn().Err(err).Msg("failed to start audio subprocess")
}
// Initialize session provider for audio events
@ -163,8 +259,18 @@ func Main() {
<-sigs
logger.Info().Msg("JetKVM Shutting Down")
// Stop non-blocking audio manager
audio.StopNonBlockingAudioStreaming()
// Stop audio subprocess and wait for cleanup
if !isAudioServer {
if audioSupervisor != nil {
logger.Info().Msg("stopping audio supervisor")
if err := audioSupervisor.Stop(); err != nil {
logger.Error().Err(err).Msg("failed to stop audio supervisor")
}
}
<-audioProcessDone
} else {
audio.StopNonBlockingAudioStreaming()
}
//if fuseServer != nil {
// err := setMassStorageImage(" ")
// if err != nil {

View File

@ -1,6 +1,7 @@
package kvm
import (
"github.com/jetkvm/kvm/internal/audio"
"github.com/prometheus/client_golang/prometheus"
versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version"
"github.com/prometheus/common/version"
@ -10,4 +11,7 @@ func initPrometheus() {
// A Prometheus metrics endpoint.
version.Version = builtAppVersion
prometheus.MustRegister(versioncollector.NewCollector("jetkvm"))
// Start audio metrics collection
audio.StartMetricsUpdater()
}

2
resource/dev_test.sh Normal file → Executable file
View File

@ -1,4 +1,4 @@
#!/bin/sh
#!/bin/bash
JSON_OUTPUT=false
GET_COMMANDS=false
if [ "$1" = "-json" ]; then

0
tools/build_audio_deps.sh Normal file → Executable file
View File

0
tools/setup_rv1106_toolchain.sh Normal file → Executable file
View File

View File

@ -2,7 +2,7 @@ import { MdOutlineContentPasteGo, MdVolumeOff, MdVolumeUp, MdGraphicEq } from "r
import { LuCable, LuHardDrive, LuMaximize, LuSettings, LuSignal } from "react-icons/lu";
import { FaKeyboard } from "react-icons/fa6";
import { Popover, PopoverButton, PopoverPanel } from "@headlessui/react";
import { Fragment, useCallback, useEffect, useRef, useState } from "react";
import { Fragment, useCallback, useRef } from "react";
import { CommandLineIcon } from "@heroicons/react/20/solid";
import { Button } from "@components/Button";
@ -21,7 +21,7 @@ import ExtensionPopover from "@/components/popovers/ExtensionPopover";
import AudioControlPopover from "@/components/popovers/AudioControlPopover";
import { useDeviceUiNavigation } from "@/hooks/useAppNavigation";
import { useAudioEvents } from "@/hooks/useAudioEvents";
import api from "@/api";
// Type for microphone error
interface MicrophoneError {
@ -83,35 +83,10 @@ export default function Actionbar({
);
// Use WebSocket-based audio events for real-time updates
const { audioMuted, isConnected } = useAudioEvents();
const { audioMuted } = useAudioEvents();
// Fallback to polling if WebSocket is not connected
const [fallbackMuted, setFallbackMuted] = useState(false);
useEffect(() => {
if (!isConnected) {
// Load initial state
api.GET("/audio/mute").then(async resp => {
if (resp.ok) {
const data = await resp.json();
setFallbackMuted(!!data.muted);
}
});
// Fallback polling when WebSocket is not available
const interval = setInterval(async () => {
const resp = await api.GET("/audio/mute");
if (resp.ok) {
const data = await resp.json();
setFallbackMuted(!!data.muted);
}
}, 1000);
return () => clearInterval(interval);
}
}, [isConnected]);
// Use WebSocket data when available, fallback to polling data otherwise
const isMuted = isConnected && audioMuted !== null ? audioMuted : fallbackMuted;
// Use WebSocket data exclusively - no polling fallback
const isMuted = audioMuted ?? false; // Default to false if WebSocket data not available yet
return (
<Container className="border-b border-b-slate-800/20 bg-white dark:border-b-slate-300/20 dark:bg-slate-900">

View File

@ -1,6 +1,6 @@
import { useEffect, useState } from "react";
import { MdGraphicEq, MdSignalWifi4Bar, MdError, MdMic } from "react-icons/md";
import { LuActivity, LuClock, LuHardDrive, LuSettings } from "react-icons/lu";
import { LuActivity, LuClock, LuHardDrive, LuSettings, LuCpu, LuMemoryStick } from "react-icons/lu";
import { AudioLevelMeter } from "@components/AudioLevelMeter";
import { cx } from "@/cva.config";
@ -27,6 +27,14 @@ interface MicrophoneMetrics {
average_latency: string;
}
interface ProcessMetrics {
cpu_percent: number;
memory_percent: number;
memory_rss: number;
memory_vms: number;
running: boolean;
}
interface AudioConfig {
Quality: number;
Bitrate: number;
@ -55,6 +63,16 @@ export default function AudioMetricsDashboard() {
const [fallbackMicrophoneMetrics, setFallbackMicrophoneMetrics] = useState<MicrophoneMetrics | null>(null);
const [fallbackConnected, setFallbackConnected] = useState(false);
// Process metrics state
const [audioProcessMetrics, setAudioProcessMetrics] = useState<ProcessMetrics | null>(null);
const [microphoneProcessMetrics, setMicrophoneProcessMetrics] = useState<ProcessMetrics | null>(null);
// Historical data for histograms (last 60 data points, ~1 minute at 1s intervals)
const [audioCpuHistory, setAudioCpuHistory] = useState<number[]>([]);
const [audioMemoryHistory, setAudioMemoryHistory] = useState<number[]>([]);
const [micCpuHistory, setMicCpuHistory] = useState<number[]>([]);
const [micMemoryHistory, setMicMemoryHistory] = useState<number[]>([]);
// Configuration state (these don't change frequently, so we can load them once)
const [config, setConfig] = useState<AudioConfig | null>(null);
const [microphoneConfig, setMicrophoneConfig] = useState<AudioConfig | null>(null);
@ -124,6 +142,29 @@ export default function AudioMetricsDashboard() {
setFallbackConnected(false);
}
// Load audio process metrics
try {
const audioProcessResp = await api.GET("/audio/process-metrics");
if (audioProcessResp.ok) {
const audioProcessData = await audioProcessResp.json();
setAudioProcessMetrics(audioProcessData);
// Update historical data for histograms (keep last 60 points)
if (audioProcessData.running) {
setAudioCpuHistory(prev => {
const newHistory = [...prev, audioProcessData.cpu_percent];
return newHistory.slice(-60); // Keep last 60 data points
});
setAudioMemoryHistory(prev => {
const newHistory = [...prev, audioProcessData.memory_percent];
return newHistory.slice(-60);
});
}
}
} catch (audioProcessError) {
console.debug("Audio process metrics not available:", audioProcessError);
}
// Load microphone metrics
try {
const micResp = await api.GET("/microphone/metrics");
@ -135,6 +176,29 @@ export default function AudioMetricsDashboard() {
// Microphone metrics might not be available, that's okay
console.debug("Microphone metrics not available:", micError);
}
// Load microphone process metrics
try {
const micProcessResp = await api.GET("/microphone/process-metrics");
if (micProcessResp.ok) {
const micProcessData = await micProcessResp.json();
setMicrophoneProcessMetrics(micProcessData);
// Update historical data for histograms (keep last 60 points)
if (micProcessData.running) {
setMicCpuHistory(prev => {
const newHistory = [...prev, micProcessData.cpu_percent];
return newHistory.slice(-60); // Keep last 60 data points
});
setMicMemoryHistory(prev => {
const newHistory = [...prev, micProcessData.memory_percent];
return newHistory.slice(-60);
});
}
}
} catch (micProcessError) {
console.debug("Microphone process metrics not available:", micProcessError);
}
} catch (error) {
console.error("Failed to load audio data:", error);
setFallbackConnected(false);
@ -158,6 +222,18 @@ export default function AudioMetricsDashboard() {
return ((metrics.frames_dropped / metrics.frames_received) * 100);
};
const formatMemory = (bytes: number) => {
if (bytes === 0) return "0 MB";
const mb = bytes / (1024 * 1024);
if (mb < 1024) {
return `${mb.toFixed(1)} MB`;
}
const gb = mb / 1024;
return `${gb.toFixed(2)} GB`;
};
const getQualityColor = (quality: number) => {
switch (quality) {
case 0: return "text-yellow-600 dark:text-yellow-400";
@ -168,6 +244,53 @@ export default function AudioMetricsDashboard() {
}
};
// Histogram component for displaying historical data
const Histogram = ({ data, title, unit, color }: {
data: number[],
title: string,
unit: string,
color: string
}) => {
if (data.length === 0) return null;
const maxValue = Math.max(...data, 1); // Avoid division by zero
const minValue = Math.min(...data);
const range = maxValue - minValue;
return (
<div className="space-y-2">
<div className="flex items-center justify-between">
<span className="text-sm font-medium text-slate-700 dark:text-slate-300">
{title}
</span>
<span className="text-xs text-slate-500 dark:text-slate-400">
{data.length > 0 ? `${data[data.length - 1].toFixed(1)}${unit}` : `0${unit}`}
</span>
</div>
<div className="flex items-end gap-0.5 h-16 bg-slate-50 dark:bg-slate-800 rounded p-2">
{data.slice(-30).map((value, index) => { // Show last 30 points
const height = range > 0 ? ((value - minValue) / range) * 100 : 0;
return (
<div
key={index}
className={cx(
"flex-1 rounded-sm transition-all duration-200",
color
)}
style={{ height: `${Math.max(height, 2)}%` }}
title={`${value.toFixed(1)}${unit}`}
/>
);
})}
</div>
<div className="flex justify-between text-xs text-slate-400 dark:text-slate-500">
<span>{minValue.toFixed(1)}{unit}</span>
<span>{maxValue.toFixed(1)}{unit}</span>
</div>
</div>
);
};
return (
<div className="space-y-4">
{/* Header */}
@ -266,6 +389,97 @@ export default function AudioMetricsDashboard() {
)}
</div>
{/* Subprocess Resource Usage - Histogram View */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
{/* Audio Output Subprocess */}
{audioProcessMetrics && (
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-3 flex items-center gap-2">
<LuCpu className="h-4 w-4 text-blue-600 dark:text-blue-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Output Process
</span>
<div className={cx(
"h-2 w-2 rounded-full ml-auto",
audioProcessMetrics.running ? "bg-green-500" : "bg-red-500"
)} />
</div>
<div className="space-y-4">
<Histogram
data={audioCpuHistory}
title="CPU Usage"
unit="%"
color="bg-blue-500 dark:bg-blue-400"
/>
<Histogram
data={audioMemoryHistory}
title="Memory Usage"
unit="%"
color="bg-purple-500 dark:bg-purple-400"
/>
<div className="grid grid-cols-2 gap-2 text-xs">
<div className="text-center p-2 bg-slate-50 dark:bg-slate-800 rounded">
<div className="font-medium text-slate-900 dark:text-slate-100">
{formatMemory(audioProcessMetrics.memory_rss)}
</div>
<div className="text-slate-500 dark:text-slate-400">RSS</div>
</div>
<div className="text-center p-2 bg-slate-50 dark:bg-slate-800 rounded">
<div className="font-medium text-slate-900 dark:text-slate-100">
{formatMemory(audioProcessMetrics.memory_vms)}
</div>
<div className="text-slate-500 dark:text-slate-400">VMS</div>
</div>
</div>
</div>
</div>
)}
{/* Microphone Input Subprocess */}
{microphoneProcessMetrics && (
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-3 flex items-center gap-2">
<LuMemoryStick className="h-4 w-4 text-green-600 dark:text-green-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Microphone Input Process
</span>
<div className={cx(
"h-2 w-2 rounded-full ml-auto",
microphoneProcessMetrics.running ? "bg-green-500" : "bg-red-500"
)} />
</div>
<div className="space-y-4">
<Histogram
data={micCpuHistory}
title="CPU Usage"
unit="%"
color="bg-green-500 dark:bg-green-400"
/>
<Histogram
data={micMemoryHistory}
title="Memory Usage"
unit="%"
color="bg-orange-500 dark:bg-orange-400"
/>
<div className="grid grid-cols-2 gap-2 text-xs">
<div className="text-center p-2 bg-slate-50 dark:bg-slate-800 rounded">
<div className="font-medium text-slate-900 dark:text-slate-100">
{formatMemory(microphoneProcessMetrics.memory_rss)}
</div>
<div className="text-slate-500 dark:text-slate-400">RSS</div>
</div>
<div className="text-center p-2 bg-slate-50 dark:bg-slate-800 rounded">
<div className="font-medium text-slate-900 dark:text-slate-100">
{formatMemory(microphoneProcessMetrics.memory_vms)}
</div>
<div className="text-slate-500 dark:text-slate-400">VMS</div>
</div>
</div>
</div>
</div>
)}
</div>
{/* Performance Metrics */}
{metrics && (
<div className="space-y-3">
@ -412,6 +626,41 @@ export default function AudioMetricsDashboard() {
/>
</div>
)}
{/* Microphone Connection Health */}
<div className="mt-3 rounded-md bg-slate-50 p-2 dark:bg-slate-700">
<div className="mb-2 flex items-center gap-2">
<MdSignalWifi4Bar className="h-3 w-3 text-purple-600 dark:text-purple-400" />
<span className="text-sm font-medium text-slate-900 dark:text-slate-100">
Connection Health
</span>
</div>
<div className="space-y-2">
<div className="flex justify-between">
<span className="text-xs text-slate-500 dark:text-slate-400">
Connection Drops:
</span>
<span className={cx(
"text-xs font-medium",
microphoneMetrics.connection_drops > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(microphoneMetrics.connection_drops)}
</span>
</div>
{microphoneMetrics.average_latency && (
<div className="flex justify-between">
<span className="text-xs text-slate-500 dark:text-slate-400">
Avg Latency:
</span>
<span className="text-xs font-medium text-slate-900 dark:text-slate-100">
{microphoneMetrics.average_latency}
</span>
</div>
)}
</div>
</div>
</div>
)}

View File

@ -41,23 +41,7 @@ interface AudioConfig {
FrameSize: string;
}
interface AudioMetrics {
frames_received: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
interface MicrophoneMetrics {
frames_sent: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
@ -94,11 +78,7 @@ export default function AudioControlPopover({ microphone, open }: AudioControlPo
isConnected: wsConnected
} = useAudioEvents();
// Fallback state for when WebSocket is not connected
const [fallbackMuted, setFallbackMuted] = useState(false);
const [fallbackMetrics, setFallbackMetrics] = useState<AudioMetrics | null>(null);
const [fallbackMicMetrics, setFallbackMicMetrics] = useState<MicrophoneMetrics | null>(null);
const [fallbackConnected, setFallbackConnected] = useState(false);
// WebSocket-only implementation - no fallback polling
// Microphone state from props
const {
@ -115,11 +95,11 @@ export default function AudioControlPopover({ microphone, open }: AudioControlPo
isToggling,
} = microphone;
// Use WebSocket data when available, fallback to polling data otherwise
const isMuted = wsConnected && audioMuted !== null ? audioMuted : fallbackMuted;
const metrics = wsConnected && audioMetrics !== null ? audioMetrics : fallbackMetrics;
const micMetrics = wsConnected && microphoneMetrics !== null ? microphoneMetrics : fallbackMicMetrics;
const isConnected = wsConnected ? wsConnected : fallbackConnected;
// Use WebSocket data exclusively - no polling fallback
const isMuted = audioMuted ?? false;
const metrics = audioMetrics;
const micMetrics = microphoneMetrics;
const isConnected = wsConnected;
// Audio level monitoring - enable only when popover is open and microphone is active to save resources
const analysisEnabled = (open ?? true) && isMicrophoneActive;
@ -150,34 +130,15 @@ export default function AudioControlPopover({ microphone, open }: AudioControlPo
}
}, [configsLoaded]);
// Optimize fallback polling - only run when WebSocket is not connected
// WebSocket-only implementation - sync microphone state when needed
useEffect(() => {
if (!wsConnected && !configsLoaded) {
// Load state once if configs aren't loaded yet
loadAudioState();
}
if (!wsConnected) {
loadAudioMetrics();
loadMicrophoneMetrics();
// Reduced frequency for fallback polling (every 3 seconds instead of 2)
const metricsInterval = setInterval(() => {
if (!wsConnected) { // Double-check to prevent unnecessary requests
loadAudioMetrics();
loadMicrophoneMetrics();
}
}, 3000);
return () => clearInterval(metricsInterval);
}
// Always sync microphone state, but debounce it
const syncTimeout = setTimeout(() => {
syncMicrophoneState();
}, 500);
return () => clearTimeout(syncTimeout);
}, [wsConnected, syncMicrophoneState, configsLoaded]);
}, [syncMicrophoneState]);
const loadAudioConfigurations = async () => {
try {
@ -203,60 +164,14 @@ export default function AudioControlPopover({ microphone, open }: AudioControlPo
}
};
const loadAudioState = async () => {
try {
// Load mute state only (configurations are loaded separately)
const muteResp = await api.GET("/audio/mute");
if (muteResp.ok) {
const muteData = await muteResp.json();
setFallbackMuted(!!muteData.muted);
}
} catch (error) {
console.error("Failed to load audio state:", error);
}
};
const loadAudioMetrics = async () => {
try {
const resp = await api.GET("/audio/metrics");
if (resp.ok) {
const data = await resp.json();
setFallbackMetrics(data);
// Consider connected if API call succeeds, regardless of frame count
setFallbackConnected(true);
} else {
setFallbackConnected(false);
}
} catch (error) {
console.error("Failed to load audio metrics:", error);
setFallbackConnected(false);
}
};
const loadMicrophoneMetrics = async () => {
try {
const resp = await api.GET("/microphone/metrics");
if (resp.ok) {
const data = await resp.json();
setFallbackMicMetrics(data);
}
} catch (error) {
console.error("Failed to load microphone metrics:", error);
}
};
const handleToggleMute = async () => {
setIsLoading(true);
try {
const resp = await api.POST("/audio/mute", { muted: !isMuted });
if (resp.ok) {
// WebSocket will handle the state update, but update fallback for immediate feedback
if (!wsConnected) {
setFallbackMuted(!isMuted);
}
if (!resp.ok) {
console.error("Failed to toggle mute:", resp.statusText);
}
// WebSocket will handle the state update automatically
} catch (error) {
console.error("Failed to toggle mute:", error);
} finally {

View File

@ -62,7 +62,7 @@ export interface UseAudioEventsReturn {
}
// Global subscription management to prevent multiple subscriptions per WebSocket connection
let globalSubscriptionState = {
const globalSubscriptionState = {
isSubscribed: false,
subscriberCount: 0,
connectionId: null as string | null

View File

@ -858,11 +858,15 @@ export function useMicrophone() {
}, [microphoneSender, peerConnection]);
const startMicrophoneDebounced = useCallback((deviceId?: string) => {
debouncedOperation(() => startMicrophone(deviceId).then(() => {}), "start");
debouncedOperation(async () => {
await startMicrophone(deviceId).catch(console.error);
}, "start");
}, [startMicrophone, debouncedOperation]);
const stopMicrophoneDebounced = useCallback(() => {
debouncedOperation(() => stopMicrophone().then(() => {}), "stop");
debouncedOperation(async () => {
await stopMicrophone().catch(console.error);
}, "stop");
}, [stopMicrophone, debouncedOperation]);
// Make debug functions available globally for console access

129
web.go
View File

@ -159,10 +159,6 @@ func setupRouter() *gin.Engine {
protected.POST("/storage/upload", handleUploadHttp)
}
protected.GET("/audio/mute", func(c *gin.Context) {
c.JSON(200, gin.H{"muted": audio.IsAudioMuted()})
})
protected.POST("/audio/mute", func(c *gin.Context) {
type muteReq struct {
Muted bool `json:"muted"`
@ -173,6 +169,8 @@ func setupRouter() *gin.Engine {
return
}
audio.SetAudioMuted(req.Muted)
// Also set relay mute state if in main process
audio.SetAudioRelayMuted(req.Muted)
// Broadcast audio mute state change via WebSocket
broadcaster := audio.GetAudioEventBroadcaster()
@ -221,7 +219,7 @@ func setupRouter() *gin.Engine {
"bytes_processed": metrics.BytesProcessed,
"last_frame_time": metrics.LastFrameTime,
"connection_drops": metrics.ConnectionDrops,
"average_latency": metrics.AverageLatency.String(),
"average_latency": fmt.Sprintf("%.1fms", float64(metrics.AverageLatency.Nanoseconds())/1e6),
})
})
@ -286,18 +284,18 @@ func setupRouter() *gin.Engine {
// Optimized server-side cooldown using atomic operations
opResult := audio.TryMicrophoneOperation()
if !opResult.Allowed {
running := currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning()
running := currentSession.AudioInputManager.IsRunning()
c.JSON(200, gin.H{
"status": "cooldown",
"running": running,
"cooldown_ms_remaining": opResult.RemainingCooldown.Milliseconds(),
"operation_id": opResult.OperationID,
"status": "cooldown",
"running": running,
"cooldown_ms_remaining": opResult.RemainingCooldown.Milliseconds(),
"operation_id": opResult.OperationID,
})
return
}
// Check if already running before attempting to start
if currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning() {
if currentSession.AudioInputManager.IsRunning() {
c.JSON(200, gin.H{
"status": "already running",
"running": true,
@ -312,7 +310,7 @@ func setupRouter() *gin.Engine {
// Check if it's already running after the failed start attempt
// This handles race conditions where another request started it
if currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning() {
if currentSession.AudioInputManager.IsRunning() {
c.JSON(200, gin.H{
"status": "started by concurrent request",
"running": true,
@ -348,18 +346,18 @@ func setupRouter() *gin.Engine {
// Optimized server-side cooldown using atomic operations
opResult := audio.TryMicrophoneOperation()
if !opResult.Allowed {
running := currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning()
running := currentSession.AudioInputManager.IsRunning()
c.JSON(200, gin.H{
"status": "cooldown",
"running": running,
"cooldown_ms_remaining": opResult.RemainingCooldown.Milliseconds(),
"operation_id": opResult.OperationID,
"status": "cooldown",
"running": running,
"cooldown_ms_remaining": opResult.RemainingCooldown.Milliseconds(),
"operation_id": opResult.OperationID,
})
return
}
// Check if already stopped before attempting to stop
if !currentSession.AudioInputManager.IsRunning() && !audio.IsNonBlockingAudioInputRunning() {
if !currentSession.AudioInputManager.IsRunning() {
c.JSON(200, gin.H{
"status": "already stopped",
"running": false,
@ -369,7 +367,7 @@ func setupRouter() *gin.Engine {
currentSession.AudioInputManager.Stop()
// AudioInputManager.Stop() already coordinates a clean stop via StopNonBlockingAudioInput()
// AudioInputManager.Stop() already coordinates a clean stop via IPC audio input system
// so we don't need to call it again here
// Broadcast microphone state change via WebSocket
@ -408,7 +406,7 @@ func setupRouter() *gin.Engine {
"bytes_processed": 0,
"last_frame_time": "",
"connection_drops": 0,
"average_latency": "0s",
"average_latency": "0.0ms",
})
return
}
@ -420,7 +418,88 @@ func setupRouter() *gin.Engine {
"bytes_processed": metrics.BytesProcessed,
"last_frame_time": metrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
"connection_drops": metrics.ConnectionDrops,
"average_latency": metrics.AverageLatency.String(),
"average_latency": fmt.Sprintf("%.1fms", float64(metrics.AverageLatency.Nanoseconds())/1e6),
})
})
// Audio subprocess process metrics endpoints
protected.GET("/audio/process-metrics", func(c *gin.Context) {
// Access the global audio supervisor from main.go
if audioSupervisor == nil {
c.JSON(200, gin.H{
"cpu_percent": 0.0,
"memory_percent": 0.0,
"memory_rss": 0,
"memory_vms": 0,
"running": false,
})
return
}
metrics := audioSupervisor.GetProcessMetrics()
if metrics == nil {
c.JSON(200, gin.H{
"cpu_percent": 0.0,
"memory_percent": 0.0,
"memory_rss": 0,
"memory_vms": 0,
"running": false,
})
return
}
c.JSON(200, gin.H{
"cpu_percent": metrics.CPUPercent,
"memory_percent": metrics.MemoryPercent,
"memory_rss": metrics.MemoryRSS,
"memory_vms": metrics.MemoryVMS,
"running": true,
})
})
protected.GET("/microphone/process-metrics", func(c *gin.Context) {
if currentSession == nil || currentSession.AudioInputManager == nil {
c.JSON(200, gin.H{
"cpu_percent": 0.0,
"memory_percent": 0.0,
"memory_rss": 0,
"memory_vms": 0,
"running": false,
})
return
}
// Get the supervisor from the audio input manager
supervisor := currentSession.AudioInputManager.GetSupervisor()
if supervisor == nil {
c.JSON(200, gin.H{
"cpu_percent": 0.0,
"memory_percent": 0.0,
"memory_rss": 0,
"memory_vms": 0,
"running": false,
})
return
}
metrics := supervisor.GetProcessMetrics()
if metrics == nil {
c.JSON(200, gin.H{
"cpu_percent": 0.0,
"memory_percent": 0.0,
"memory_rss": 0,
"memory_vms": 0,
"running": false,
})
return
}
c.JSON(200, gin.H{
"cpu_percent": metrics.CPUPercent,
"memory_percent": metrics.MemoryPercent,
"memory_rss": metrics.MemoryRSS,
"memory_vms": metrics.MemoryVMS,
"running": true,
})
})
@ -437,9 +516,8 @@ func setupRouter() *gin.Engine {
logger.Info().Msg("forcing microphone state reset")
// Force stop both the AudioInputManager and NonBlockingAudioManager
// Force stop the AudioInputManager
currentSession.AudioInputManager.Stop()
audio.StopNonBlockingAudioInput()
// Wait a bit to ensure everything is stopped
time.Sleep(100 * time.Millisecond)
@ -449,9 +527,8 @@ func setupRouter() *gin.Engine {
broadcaster.BroadcastMicrophoneStateChanged(false, true)
c.JSON(200, gin.H{
"status": "reset",
"audio_input_running": currentSession.AudioInputManager.IsRunning(),
"nonblocking_input_running": audio.IsNonBlockingAudioInputRunning(),
"status": "reset",
"audio_input_running": currentSession.AudioInputManager.IsRunning(),
})
})

View File

@ -30,10 +30,13 @@ type Session struct {
AudioInputManager *audio.AudioInputManager
shouldUmountVirtualMedia bool
// Microphone operation cooldown to mitigate rapid start/stop races
micOpMu sync.Mutex
lastMicOp time.Time
micCooldown time.Duration
// Microphone operation throttling
micCooldown time.Duration
// Audio frame processing
audioFrameChan chan []byte
audioStopChan chan struct{}
audioWg sync.WaitGroup
}
type SessionConfig struct {
@ -118,8 +121,14 @@ func newSession(config SessionConfig) (*Session, error) {
session := &Session{
peerConnection: peerConnection,
AudioInputManager: audio.NewAudioInputManager(),
micCooldown: 100 * time.Millisecond,
audioFrameChan: make(chan []byte, 1000),
audioStopChan: make(chan struct{}),
}
// Start audio processing goroutine
session.startAudioProcessor(*logger)
peerConnection.OnDataChannel(func(d *webrtc.DataChannel) {
scopedLogger.Info().Str("label", d.Label()).Uint16("id", *d.ID()).Msg("New DataChannel")
switch d.Label() {
@ -155,6 +164,11 @@ func newSession(config SessionConfig) (*Session, error) {
return nil, err
}
// Update the audio relay with the new WebRTC audio track
if err := audio.UpdateAudioRelayTrack(session.AudioTrack); err != nil {
scopedLogger.Warn().Err(err).Msg("Failed to update audio relay track")
}
videoRtpSender, err := peerConnection.AddTrack(session.VideoTrack)
if err != nil {
return nil, err
@ -190,10 +204,14 @@ func newSession(config SessionConfig) (*Session, error) {
// Extract Opus payload from RTP packet
opusPayload := rtpPacket.Payload
if len(opusPayload) > 0 && session.AudioInputManager != nil {
err := session.AudioInputManager.WriteOpusFrame(opusPayload)
if err != nil {
scopedLogger.Warn().Err(err).Msg("Failed to write Opus frame to audio input manager")
if len(opusPayload) > 0 {
// Send to buffered channel for processing
select {
case session.audioFrameChan <- opusPayload:
// Frame sent successfully
default:
// Channel is full, drop the frame
scopedLogger.Warn().Msg("Audio frame channel full, dropping frame")
}
}
}
@ -245,7 +263,8 @@ func newSession(config SessionConfig) (*Session, error) {
err := rpcUnmountImage()
scopedLogger.Warn().Err(err).Msg("unmount image failed on connection close")
}
// Stop audio input manager
// Stop audio processing and input manager
session.stopAudioProcessor()
if session.AudioInputManager != nil {
session.AudioInputManager.Stop()
}
@ -262,6 +281,43 @@ func newSession(config SessionConfig) (*Session, error) {
return session, nil
}
// startAudioProcessor starts the dedicated audio processing goroutine
func (s *Session) startAudioProcessor(logger zerolog.Logger) {
s.audioWg.Add(1)
go func() {
defer s.audioWg.Done()
logger.Debug().Msg("Audio processor goroutine started")
for {
select {
case frame := <-s.audioFrameChan:
if s.AudioInputManager != nil {
// Check if audio input manager is ready before processing frames
if s.AudioInputManager.IsReady() {
err := s.AudioInputManager.WriteOpusFrame(frame)
if err != nil {
logger.Warn().Err(err).Msg("Failed to write Opus frame to audio input manager")
}
} else {
// Audio input manager not ready, drop frame silently
// This prevents the "client not connected" errors during startup
logger.Debug().Msg("Audio input manager not ready, dropping frame")
}
}
case <-s.audioStopChan:
logger.Debug().Msg("Audio processor goroutine stopping")
return
}
}
}()
}
// stopAudioProcessor stops the audio processing goroutine
func (s *Session) stopAudioProcessor() {
close(s.audioStopChan)
s.audioWg.Wait()
}
func drainRtpSender(rtpSender *webrtc.RTPSender) {
// Lock to OS thread to isolate RTCP processing
runtime.LockOSThread()