mirror of https://github.com/jetkvm/kvm.git
feat(audio): enhance error handling and add device health monitoring
- Implement robust error recovery with progressive backoff in audio streaming - Add comprehensive device health monitoring system - Improve ALSA device handling with enhanced retry logic - Refactor IPC message handling to use shared pools - Add validation utilities for audio frames and configuration - Introduce atomic utilities for thread-safe metrics tracking - Update latency histogram to use configurable buckets - Add documentation for new metrics and configuration options
This commit is contained in:
parent
e4ed2b8fad
commit
b1f85db7de
|
@ -45,7 +45,7 @@ func DefaultOptimizerConfig() OptimizerConfig {
|
||||||
CooldownPeriod: GetConfig().CooldownPeriod,
|
CooldownPeriod: GetConfig().CooldownPeriod,
|
||||||
Aggressiveness: GetConfig().OptimizerAggressiveness,
|
Aggressiveness: GetConfig().OptimizerAggressiveness,
|
||||||
RollbackThreshold: GetConfig().RollbackThreshold,
|
RollbackThreshold: GetConfig().RollbackThreshold,
|
||||||
StabilityPeriod: 10 * time.Second,
|
StabilityPeriod: GetConfig().AdaptiveOptimizerStability,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,204 @@
|
||||||
|
package audio
|
||||||
|
|
||||||
|
import (
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// AtomicCounter provides thread-safe counter operations
|
||||||
|
type AtomicCounter struct {
|
||||||
|
value int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewAtomicCounter creates a new atomic counter
|
||||||
|
func NewAtomicCounter() *AtomicCounter {
|
||||||
|
return &AtomicCounter{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add atomically adds delta to the counter and returns the new value
|
||||||
|
func (c *AtomicCounter) Add(delta int64) int64 {
|
||||||
|
return atomic.AddInt64(&c.value, delta)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Increment atomically increments the counter by 1
|
||||||
|
func (c *AtomicCounter) Increment() int64 {
|
||||||
|
return atomic.AddInt64(&c.value, 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Load atomically loads the counter value
|
||||||
|
func (c *AtomicCounter) Load() int64 {
|
||||||
|
return atomic.LoadInt64(&c.value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Store atomically stores a new value
|
||||||
|
func (c *AtomicCounter) Store(value int64) {
|
||||||
|
atomic.StoreInt64(&c.value, value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset atomically resets the counter to zero
|
||||||
|
func (c *AtomicCounter) Reset() {
|
||||||
|
atomic.StoreInt64(&c.value, 0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Swap atomically swaps the value and returns the old value
|
||||||
|
func (c *AtomicCounter) Swap(new int64) int64 {
|
||||||
|
return atomic.SwapInt64(&c.value, new)
|
||||||
|
}
|
||||||
|
|
||||||
|
// FrameMetrics provides common frame tracking metrics
|
||||||
|
type FrameMetrics struct {
|
||||||
|
Total *AtomicCounter
|
||||||
|
Dropped *AtomicCounter
|
||||||
|
Bytes *AtomicCounter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFrameMetrics creates a new frame metrics tracker
|
||||||
|
func NewFrameMetrics() *FrameMetrics {
|
||||||
|
return &FrameMetrics{
|
||||||
|
Total: NewAtomicCounter(),
|
||||||
|
Dropped: NewAtomicCounter(),
|
||||||
|
Bytes: NewAtomicCounter(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordFrame atomically records a successful frame with its size
|
||||||
|
func (fm *FrameMetrics) RecordFrame(size int64) {
|
||||||
|
fm.Total.Increment()
|
||||||
|
fm.Bytes.Add(size)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordDrop atomically records a dropped frame
|
||||||
|
func (fm *FrameMetrics) RecordDrop() {
|
||||||
|
fm.Dropped.Increment()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStats returns current metrics values
|
||||||
|
func (fm *FrameMetrics) GetStats() (total, dropped, bytes int64) {
|
||||||
|
return fm.Total.Load(), fm.Dropped.Load(), fm.Bytes.Load()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset resets all metrics to zero
|
||||||
|
func (fm *FrameMetrics) Reset() {
|
||||||
|
fm.Total.Reset()
|
||||||
|
fm.Dropped.Reset()
|
||||||
|
fm.Bytes.Reset()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetDropRate calculates the drop rate as a percentage
|
||||||
|
func (fm *FrameMetrics) GetDropRate() float64 {
|
||||||
|
total := fm.Total.Load()
|
||||||
|
if total == 0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
dropped := fm.Dropped.Load()
|
||||||
|
return float64(dropped) / float64(total) * 100.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// LatencyTracker provides atomic latency tracking
|
||||||
|
type LatencyTracker struct {
|
||||||
|
current *AtomicCounter
|
||||||
|
min *AtomicCounter
|
||||||
|
max *AtomicCounter
|
||||||
|
average *AtomicCounter
|
||||||
|
samples *AtomicCounter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewLatencyTracker creates a new latency tracker
|
||||||
|
func NewLatencyTracker() *LatencyTracker {
|
||||||
|
lt := &LatencyTracker{
|
||||||
|
current: NewAtomicCounter(),
|
||||||
|
min: NewAtomicCounter(),
|
||||||
|
max: NewAtomicCounter(),
|
||||||
|
average: NewAtomicCounter(),
|
||||||
|
samples: NewAtomicCounter(),
|
||||||
|
}
|
||||||
|
// Initialize min to max value so first measurement sets it properly
|
||||||
|
lt.min.Store(int64(^uint64(0) >> 1)) // Max int64
|
||||||
|
return lt
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordLatency atomically records a new latency measurement
|
||||||
|
func (lt *LatencyTracker) RecordLatency(latency time.Duration) {
|
||||||
|
latencyNanos := latency.Nanoseconds()
|
||||||
|
lt.current.Store(latencyNanos)
|
||||||
|
lt.samples.Increment()
|
||||||
|
|
||||||
|
// Update min
|
||||||
|
for {
|
||||||
|
oldMin := lt.min.Load()
|
||||||
|
if latencyNanos >= oldMin {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if atomic.CompareAndSwapInt64(<.min.value, oldMin, latencyNanos) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update max
|
||||||
|
for {
|
||||||
|
oldMax := lt.max.Load()
|
||||||
|
if latencyNanos <= oldMax {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
if atomic.CompareAndSwapInt64(<.max.value, oldMax, latencyNanos) {
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update average using exponential moving average
|
||||||
|
oldAvg := lt.average.Load()
|
||||||
|
newAvg := (oldAvg*7 + latencyNanos) / 8 // 87.5% weight to old average
|
||||||
|
lt.average.Store(newAvg)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetLatencyStats returns current latency statistics
|
||||||
|
func (lt *LatencyTracker) GetLatencyStats() (current, min, max, average time.Duration, samples int64) {
|
||||||
|
return time.Duration(lt.current.Load()),
|
||||||
|
time.Duration(lt.min.Load()),
|
||||||
|
time.Duration(lt.max.Load()),
|
||||||
|
time.Duration(lt.average.Load()),
|
||||||
|
lt.samples.Load()
|
||||||
|
}
|
||||||
|
|
||||||
|
// PoolMetrics provides common pool performance metrics
|
||||||
|
type PoolMetrics struct {
|
||||||
|
Hits *AtomicCounter
|
||||||
|
Misses *AtomicCounter
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewPoolMetrics creates a new pool metrics tracker
|
||||||
|
func NewPoolMetrics() *PoolMetrics {
|
||||||
|
return &PoolMetrics{
|
||||||
|
Hits: NewAtomicCounter(),
|
||||||
|
Misses: NewAtomicCounter(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordHit atomically records a pool hit
|
||||||
|
func (pm *PoolMetrics) RecordHit() {
|
||||||
|
pm.Hits.Increment()
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordMiss atomically records a pool miss
|
||||||
|
func (pm *PoolMetrics) RecordMiss() {
|
||||||
|
pm.Misses.Increment()
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHitRate calculates the hit rate as a percentage
|
||||||
|
func (pm *PoolMetrics) GetHitRate() float64 {
|
||||||
|
hits := pm.Hits.Load()
|
||||||
|
misses := pm.Misses.Load()
|
||||||
|
total := hits + misses
|
||||||
|
if total == 0 {
|
||||||
|
return 0.0
|
||||||
|
}
|
||||||
|
return float64(hits) / float64(total) * 100.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetStats returns hit and miss counts
|
||||||
|
func (pm *PoolMetrics) GetStats() (hits, misses int64, hitRate float64) {
|
||||||
|
hits = pm.Hits.Load()
|
||||||
|
misses = pm.Misses.Load()
|
||||||
|
hitRate = pm.GetHitRate()
|
||||||
|
return
|
||||||
|
}
|
|
@ -61,12 +61,15 @@ static volatile int capture_initialized = 0;
|
||||||
static volatile int playback_initializing = 0;
|
static volatile int playback_initializing = 0;
|
||||||
static volatile int playback_initialized = 0;
|
static volatile int playback_initialized = 0;
|
||||||
|
|
||||||
// Safe ALSA device opening with retry logic
|
// Enhanced ALSA device opening with exponential backoff retry logic
|
||||||
static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream) {
|
static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream) {
|
||||||
int attempts = 3;
|
int max_attempts = 5; // Increased from 3 to 5
|
||||||
|
int attempt = 0;
|
||||||
int err;
|
int err;
|
||||||
|
int backoff_us = sleep_microseconds; // Start with base sleep time
|
||||||
|
const int max_backoff_us = 500000; // Max 500ms backoff
|
||||||
|
|
||||||
while (attempts-- > 0) {
|
while (attempt < max_attempts) {
|
||||||
err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK);
|
err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK);
|
||||||
if (err >= 0) {
|
if (err >= 0) {
|
||||||
// Switch to blocking mode after successful open
|
// Switch to blocking mode after successful open
|
||||||
|
@ -74,12 +77,26 @@ static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (err == -EBUSY && attempts > 0) {
|
attempt++;
|
||||||
// Device busy, wait and retry
|
if (attempt >= max_attempts) break;
|
||||||
usleep(sleep_microseconds); // 50ms
|
|
||||||
continue;
|
// Enhanced error handling with specific retry strategies
|
||||||
|
if (err == -EBUSY || err == -EAGAIN) {
|
||||||
|
// Device busy or temporarily unavailable - retry with backoff
|
||||||
|
usleep(backoff_us);
|
||||||
|
backoff_us = (backoff_us * 2 < max_backoff_us) ? backoff_us * 2 : max_backoff_us;
|
||||||
|
} else if (err == -ENODEV || err == -ENOENT) {
|
||||||
|
// Device not found - longer wait as device might be initializing
|
||||||
|
usleep(backoff_us * 2);
|
||||||
|
backoff_us = (backoff_us * 2 < max_backoff_us) ? backoff_us * 2 : max_backoff_us;
|
||||||
|
} else if (err == -EPERM || err == -EACCES) {
|
||||||
|
// Permission denied - shorter wait, likely persistent issue
|
||||||
|
usleep(backoff_us / 2);
|
||||||
|
} else {
|
||||||
|
// Other errors - standard backoff
|
||||||
|
usleep(backoff_us);
|
||||||
|
backoff_us = (backoff_us * 2 < max_backoff_us) ? backoff_us * 2 : max_backoff_us;
|
||||||
}
|
}
|
||||||
break;
|
|
||||||
}
|
}
|
||||||
return err;
|
return err;
|
||||||
}
|
}
|
||||||
|
@ -217,43 +234,90 @@ int jetkvm_audio_init() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Read and encode one frame with enhanced error handling
|
// Read and encode one frame with robust error handling and recovery
|
||||||
int jetkvm_audio_read_encode(void *opus_buf) {
|
int jetkvm_audio_read_encode(void *opus_buf) {
|
||||||
short pcm_buffer[1920]; // max 2ch*960
|
short pcm_buffer[1920]; // max 2ch*960
|
||||||
unsigned char *out = (unsigned char*)opus_buf;
|
unsigned char *out = (unsigned char*)opus_buf;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
int recovery_attempts = 0;
|
||||||
|
const int max_recovery_attempts = 3;
|
||||||
|
|
||||||
// Safety checks
|
// Safety checks
|
||||||
if (!capture_initialized || !pcm_handle || !encoder || !opus_buf) {
|
if (!capture_initialized || !pcm_handle || !encoder || !opus_buf) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
retry_read:
|
||||||
|
;
|
||||||
int pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
|
int pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
|
||||||
|
|
||||||
// Handle ALSA errors with enhanced recovery
|
// Handle ALSA errors with robust recovery strategies
|
||||||
if (pcm_rc < 0) {
|
if (pcm_rc < 0) {
|
||||||
if (pcm_rc == -EPIPE) {
|
if (pcm_rc == -EPIPE) {
|
||||||
// Buffer underrun - try to recover
|
// Buffer underrun - implement progressive recovery
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts > max_recovery_attempts) {
|
||||||
|
return -1; // Give up after max attempts
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to recover with prepare
|
||||||
|
err = snd_pcm_prepare(pcm_handle);
|
||||||
|
if (err < 0) {
|
||||||
|
// If prepare fails, try drop and prepare
|
||||||
|
snd_pcm_drop(pcm_handle);
|
||||||
err = snd_pcm_prepare(pcm_handle);
|
err = snd_pcm_prepare(pcm_handle);
|
||||||
if (err < 0) return -1;
|
if (err < 0) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
|
// Wait before retry to allow device to stabilize
|
||||||
if (pcm_rc < 0) return -1;
|
usleep(sleep_microseconds * recovery_attempts);
|
||||||
|
goto retry_read;
|
||||||
} else if (pcm_rc == -EAGAIN) {
|
} else if (pcm_rc == -EAGAIN) {
|
||||||
// No data available - return 0 to indicate no frame
|
// No data available - return 0 to indicate no frame
|
||||||
return 0;
|
return 0;
|
||||||
} else if (pcm_rc == -ESTRPIPE) {
|
} else if (pcm_rc == -ESTRPIPE) {
|
||||||
// Device suspended, try to resume
|
// Device suspended, implement robust resume logic
|
||||||
while ((err = snd_pcm_resume(pcm_handle)) == -EAGAIN) {
|
recovery_attempts++;
|
||||||
usleep(sleep_microseconds); // Use centralized constant
|
if (recovery_attempts > max_recovery_attempts) {
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to resume with timeout
|
||||||
|
int resume_attempts = 0;
|
||||||
|
while ((err = snd_pcm_resume(pcm_handle)) == -EAGAIN && resume_attempts < 10) {
|
||||||
|
usleep(sleep_microseconds);
|
||||||
|
resume_attempts++;
|
||||||
}
|
}
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
|
// Resume failed, try prepare as fallback
|
||||||
err = snd_pcm_prepare(pcm_handle);
|
err = snd_pcm_prepare(pcm_handle);
|
||||||
if (err < 0) return -1;
|
if (err < 0) return -1;
|
||||||
}
|
}
|
||||||
return 0; // Skip this frame
|
// Wait before retry to allow device to stabilize
|
||||||
|
usleep(sleep_microseconds * recovery_attempts);
|
||||||
|
return 0; // Skip this frame but don't fail
|
||||||
|
} else if (pcm_rc == -ENODEV) {
|
||||||
|
// Device disconnected - critical error
|
||||||
|
return -1;
|
||||||
|
} else if (pcm_rc == -EIO) {
|
||||||
|
// I/O error - try recovery once
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts <= max_recovery_attempts) {
|
||||||
|
snd_pcm_drop(pcm_handle);
|
||||||
|
err = snd_pcm_prepare(pcm_handle);
|
||||||
|
if (err >= 0) {
|
||||||
|
usleep(sleep_microseconds);
|
||||||
|
goto retry_read;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -1;
|
||||||
} else {
|
} else {
|
||||||
// Other error - return error code
|
// Other errors - limited retry for transient issues
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts <= 1 && (pcm_rc == -EINTR || pcm_rc == -EBUSY)) {
|
||||||
|
usleep(sleep_microseconds / 2);
|
||||||
|
goto retry_read;
|
||||||
|
}
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -327,11 +391,13 @@ int jetkvm_audio_playback_init() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode Opus and write PCM with enhanced error handling
|
// Decode Opus and write PCM with robust error handling and recovery
|
||||||
int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
|
int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
|
||||||
short pcm_buffer[1920]; // max 2ch*960
|
short pcm_buffer[1920]; // max 2ch*960
|
||||||
unsigned char *in = (unsigned char*)opus_buf;
|
unsigned char *in = (unsigned char*)opus_buf;
|
||||||
int err = 0;
|
int err = 0;
|
||||||
|
int recovery_attempts = 0;
|
||||||
|
const int max_recovery_attempts = 3;
|
||||||
|
|
||||||
// Safety checks
|
// Safety checks
|
||||||
if (!playback_initialized || !pcm_playback_handle || !decoder || !opus_buf || opus_size <= 0) {
|
if (!playback_initialized || !pcm_playback_handle || !decoder || !opus_buf || opus_size <= 0) {
|
||||||
|
@ -343,31 +409,91 @@ int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Decode Opus to PCM
|
// Decode Opus to PCM with error handling
|
||||||
int pcm_frames = opus_decode(decoder, in, opus_size, pcm_buffer, frame_size, 0);
|
int pcm_frames = opus_decode(decoder, in, opus_size, pcm_buffer, frame_size, 0);
|
||||||
|
if (pcm_frames < 0) {
|
||||||
|
// Try packet loss concealment on decode error
|
||||||
|
pcm_frames = opus_decode(decoder, NULL, 0, pcm_buffer, frame_size, 0);
|
||||||
if (pcm_frames < 0) return -1;
|
if (pcm_frames < 0) return -1;
|
||||||
|
}
|
||||||
|
|
||||||
// Write PCM to playback device with enhanced recovery
|
retry_write:
|
||||||
|
;
|
||||||
|
// Write PCM to playback device with robust recovery
|
||||||
int pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
|
int pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
|
||||||
if (pcm_rc < 0) {
|
if (pcm_rc < 0) {
|
||||||
if (pcm_rc == -EPIPE) {
|
if (pcm_rc == -EPIPE) {
|
||||||
// Buffer underrun - try to recover
|
// Buffer underrun - implement progressive recovery
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts > max_recovery_attempts) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to recover with prepare
|
||||||
|
err = snd_pcm_prepare(pcm_playback_handle);
|
||||||
|
if (err < 0) {
|
||||||
|
// If prepare fails, try drop and prepare
|
||||||
|
snd_pcm_drop(pcm_playback_handle);
|
||||||
err = snd_pcm_prepare(pcm_playback_handle);
|
err = snd_pcm_prepare(pcm_playback_handle);
|
||||||
if (err < 0) return -2;
|
if (err < 0) return -2;
|
||||||
|
}
|
||||||
|
|
||||||
pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
|
// Wait before retry to allow device to stabilize
|
||||||
|
usleep(sleep_microseconds * recovery_attempts);
|
||||||
|
goto retry_write;
|
||||||
} else if (pcm_rc == -ESTRPIPE) {
|
} else if (pcm_rc == -ESTRPIPE) {
|
||||||
// Device suspended, try to resume
|
// Device suspended, implement robust resume logic
|
||||||
while ((err = snd_pcm_resume(pcm_playback_handle)) == -EAGAIN) {
|
recovery_attempts++;
|
||||||
usleep(sleep_microseconds); // Use centralized constant
|
if (recovery_attempts > max_recovery_attempts) {
|
||||||
|
return -2;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to resume with timeout
|
||||||
|
int resume_attempts = 0;
|
||||||
|
while ((err = snd_pcm_resume(pcm_playback_handle)) == -EAGAIN && resume_attempts < 10) {
|
||||||
|
usleep(sleep_microseconds);
|
||||||
|
resume_attempts++;
|
||||||
}
|
}
|
||||||
if (err < 0) {
|
if (err < 0) {
|
||||||
|
// Resume failed, try prepare as fallback
|
||||||
err = snd_pcm_prepare(pcm_playback_handle);
|
err = snd_pcm_prepare(pcm_playback_handle);
|
||||||
if (err < 0) return -2;
|
if (err < 0) return -2;
|
||||||
}
|
}
|
||||||
return 0; // Skip this frame
|
// Wait before retry to allow device to stabilize
|
||||||
|
usleep(sleep_microseconds * recovery_attempts);
|
||||||
|
return 0; // Skip this frame but don't fail
|
||||||
|
} else if (pcm_rc == -ENODEV) {
|
||||||
|
// Device disconnected - critical error
|
||||||
|
return -2;
|
||||||
|
} else if (pcm_rc == -EIO) {
|
||||||
|
// I/O error - try recovery once
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts <= max_recovery_attempts) {
|
||||||
|
snd_pcm_drop(pcm_playback_handle);
|
||||||
|
err = snd_pcm_prepare(pcm_playback_handle);
|
||||||
|
if (err >= 0) {
|
||||||
|
usleep(sleep_microseconds);
|
||||||
|
goto retry_write;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return -2;
|
||||||
|
} else if (pcm_rc == -EAGAIN) {
|
||||||
|
// Device not ready - brief wait and retry
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts <= max_recovery_attempts) {
|
||||||
|
usleep(sleep_microseconds / 4);
|
||||||
|
goto retry_write;
|
||||||
|
}
|
||||||
|
return -2;
|
||||||
|
} else {
|
||||||
|
// Other errors - limited retry for transient issues
|
||||||
|
recovery_attempts++;
|
||||||
|
if (recovery_attempts <= 1 && (pcm_rc == -EINTR || pcm_rc == -EBUSY)) {
|
||||||
|
usleep(sleep_microseconds / 2);
|
||||||
|
goto retry_write;
|
||||||
|
}
|
||||||
|
return -2;
|
||||||
}
|
}
|
||||||
if (pcm_rc < 0) return -2;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
return pcm_frames;
|
return pcm_frames;
|
||||||
|
|
|
@ -1540,6 +1540,49 @@ type AudioConfigConstants struct {
|
||||||
// Impact: Prevents excessive channel counts that could impact performance.
|
// Impact: Prevents excessive channel counts that could impact performance.
|
||||||
// Default 8 channels provides reasonable upper bound for multi-channel audio.
|
// Default 8 channels provides reasonable upper bound for multi-channel audio.
|
||||||
MaxChannels int
|
MaxChannels int
|
||||||
|
|
||||||
|
// Device Health Monitoring Configuration
|
||||||
|
// Used in: device_health.go for proactive device monitoring and recovery
|
||||||
|
// Impact: Controls health check frequency and recovery thresholds
|
||||||
|
|
||||||
|
// HealthCheckIntervalMS defines interval between device health checks in milliseconds.
|
||||||
|
// Used in: DeviceHealthMonitor for periodic health assessment
|
||||||
|
// Impact: Lower values provide faster detection but increase CPU usage.
|
||||||
|
// Default 5000ms (5s) provides good balance between responsiveness and overhead.
|
||||||
|
HealthCheckIntervalMS int
|
||||||
|
|
||||||
|
// HealthRecoveryThreshold defines number of consecutive successful operations
|
||||||
|
// required to mark a device as healthy after being unhealthy.
|
||||||
|
// Used in: DeviceHealthMonitor for recovery state management
|
||||||
|
// Impact: Higher values prevent premature recovery declarations.
|
||||||
|
// Default 3 consecutive successes ensures stable recovery.
|
||||||
|
HealthRecoveryThreshold int
|
||||||
|
|
||||||
|
// HealthLatencyThresholdMS defines maximum acceptable latency in milliseconds
|
||||||
|
// before considering a device unhealthy.
|
||||||
|
// Used in: DeviceHealthMonitor for latency-based health assessment
|
||||||
|
// Impact: Lower values trigger recovery sooner but may cause false positives.
|
||||||
|
// Default 100ms provides reasonable threshold for real-time audio.
|
||||||
|
HealthLatencyThresholdMS int
|
||||||
|
|
||||||
|
// HealthErrorRateLimit defines maximum error rate (0.0-1.0) before
|
||||||
|
// considering a device unhealthy.
|
||||||
|
// Used in: DeviceHealthMonitor for error rate assessment
|
||||||
|
// Impact: Lower values trigger recovery sooner for error-prone devices.
|
||||||
|
// Default 0.1 (10%) allows some transient errors while detecting problems.
|
||||||
|
HealthErrorRateLimit float64
|
||||||
|
|
||||||
|
// Latency Histogram Bucket Configuration
|
||||||
|
// Used in: LatencyHistogram for granular latency measurement buckets
|
||||||
|
// Impact: Defines the boundaries for latency distribution analysis
|
||||||
|
LatencyBucket10ms time.Duration // 10ms latency bucket
|
||||||
|
LatencyBucket25ms time.Duration // 25ms latency bucket
|
||||||
|
LatencyBucket50ms time.Duration // 50ms latency bucket
|
||||||
|
LatencyBucket100ms time.Duration // 100ms latency bucket
|
||||||
|
LatencyBucket250ms time.Duration // 250ms latency bucket
|
||||||
|
LatencyBucket500ms time.Duration // 500ms latency bucket
|
||||||
|
LatencyBucket1s time.Duration // 1s latency bucket
|
||||||
|
LatencyBucket2s time.Duration // 2s latency bucket
|
||||||
}
|
}
|
||||||
|
|
||||||
// DefaultAudioConfig returns the default configuration constants
|
// DefaultAudioConfig returns the default configuration constants
|
||||||
|
@ -2563,6 +2606,22 @@ func DefaultAudioConfig() *AudioConfigConstants {
|
||||||
MinSampleRate: 8000, // 8kHz minimum sample rate
|
MinSampleRate: 8000, // 8kHz minimum sample rate
|
||||||
MaxSampleRate: 48000, // 48kHz maximum sample rate
|
MaxSampleRate: 48000, // 48kHz maximum sample rate
|
||||||
MaxChannels: 8, // 8 maximum audio channels
|
MaxChannels: 8, // 8 maximum audio channels
|
||||||
|
|
||||||
|
// Device Health Monitoring Configuration
|
||||||
|
HealthCheckIntervalMS: 5000, // 5000ms (5s) health check interval
|
||||||
|
HealthRecoveryThreshold: 3, // 3 consecutive successes for recovery
|
||||||
|
HealthLatencyThresholdMS: 100, // 100ms latency threshold for health
|
||||||
|
HealthErrorRateLimit: 0.1, // 10% error rate limit for health
|
||||||
|
|
||||||
|
// Latency Histogram Bucket Configuration
|
||||||
|
LatencyBucket10ms: 10 * time.Millisecond, // 10ms latency bucket
|
||||||
|
LatencyBucket25ms: 25 * time.Millisecond, // 25ms latency bucket
|
||||||
|
LatencyBucket50ms: 50 * time.Millisecond, // 50ms latency bucket
|
||||||
|
LatencyBucket100ms: 100 * time.Millisecond, // 100ms latency bucket
|
||||||
|
LatencyBucket250ms: 250 * time.Millisecond, // 250ms latency bucket
|
||||||
|
LatencyBucket500ms: 500 * time.Millisecond, // 500ms latency bucket
|
||||||
|
LatencyBucket1s: 1 * time.Second, // 1s latency bucket
|
||||||
|
LatencyBucket2s: 2 * time.Second, // 2s latency bucket
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,514 @@
|
||||||
|
package audio
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"sync"
|
||||||
|
"sync/atomic"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/jetkvm/kvm/internal/logging"
|
||||||
|
"github.com/rs/zerolog"
|
||||||
|
)
|
||||||
|
|
||||||
|
// DeviceHealthStatus represents the health status of an audio device
|
||||||
|
type DeviceHealthStatus int
|
||||||
|
|
||||||
|
const (
|
||||||
|
DeviceHealthUnknown DeviceHealthStatus = iota
|
||||||
|
DeviceHealthHealthy
|
||||||
|
DeviceHealthDegraded
|
||||||
|
DeviceHealthFailing
|
||||||
|
DeviceHealthCritical
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s DeviceHealthStatus) String() string {
|
||||||
|
switch s {
|
||||||
|
case DeviceHealthHealthy:
|
||||||
|
return "healthy"
|
||||||
|
case DeviceHealthDegraded:
|
||||||
|
return "degraded"
|
||||||
|
case DeviceHealthFailing:
|
||||||
|
return "failing"
|
||||||
|
case DeviceHealthCritical:
|
||||||
|
return "critical"
|
||||||
|
default:
|
||||||
|
return "unknown"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeviceHealthMetrics tracks health-related metrics for audio devices
|
||||||
|
type DeviceHealthMetrics struct {
|
||||||
|
// Error tracking
|
||||||
|
ConsecutiveErrors int64 `json:"consecutive_errors"`
|
||||||
|
TotalErrors int64 `json:"total_errors"`
|
||||||
|
LastErrorTime time.Time `json:"last_error_time"`
|
||||||
|
ErrorRate float64 `json:"error_rate"` // errors per minute
|
||||||
|
|
||||||
|
// Performance metrics
|
||||||
|
AverageLatency time.Duration `json:"average_latency"`
|
||||||
|
MaxLatency time.Duration `json:"max_latency"`
|
||||||
|
LatencySpikes int64 `json:"latency_spikes"`
|
||||||
|
Underruns int64 `json:"underruns"`
|
||||||
|
Overruns int64 `json:"overruns"`
|
||||||
|
|
||||||
|
// Device availability
|
||||||
|
LastSuccessfulOp time.Time `json:"last_successful_op"`
|
||||||
|
DeviceDisconnects int64 `json:"device_disconnects"`
|
||||||
|
RecoveryAttempts int64 `json:"recovery_attempts"`
|
||||||
|
SuccessfulRecoveries int64 `json:"successful_recoveries"`
|
||||||
|
|
||||||
|
// Health assessment
|
||||||
|
CurrentStatus DeviceHealthStatus `json:"current_status"`
|
||||||
|
StatusLastChanged time.Time `json:"status_last_changed"`
|
||||||
|
HealthScore float64 `json:"health_score"` // 0.0 to 1.0
|
||||||
|
}
|
||||||
|
|
||||||
|
// DeviceHealthMonitor monitors the health of audio devices and triggers recovery
|
||||||
|
type DeviceHealthMonitor struct {
|
||||||
|
// Atomic fields first for ARM32 alignment
|
||||||
|
running int32
|
||||||
|
monitoringEnabled int32
|
||||||
|
|
||||||
|
// Configuration
|
||||||
|
checkInterval time.Duration
|
||||||
|
recoveryThreshold int
|
||||||
|
latencyThreshold time.Duration
|
||||||
|
errorRateLimit float64 // max errors per minute
|
||||||
|
|
||||||
|
// State tracking
|
||||||
|
captureMetrics *DeviceHealthMetrics
|
||||||
|
playbackMetrics *DeviceHealthMetrics
|
||||||
|
mutex sync.RWMutex
|
||||||
|
|
||||||
|
// Control channels
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
|
stopChan chan struct{}
|
||||||
|
doneChan chan struct{}
|
||||||
|
|
||||||
|
// Recovery callbacks
|
||||||
|
recoveryCallbacks map[string]func() error
|
||||||
|
callbackMutex sync.RWMutex
|
||||||
|
|
||||||
|
// Logging
|
||||||
|
logger zerolog.Logger
|
||||||
|
config *AudioConfigConstants
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewDeviceHealthMonitor creates a new device health monitor
|
||||||
|
func NewDeviceHealthMonitor() *DeviceHealthMonitor {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
config := GetConfig()
|
||||||
|
|
||||||
|
return &DeviceHealthMonitor{
|
||||||
|
checkInterval: time.Duration(config.HealthCheckIntervalMS) * time.Millisecond,
|
||||||
|
recoveryThreshold: config.HealthRecoveryThreshold,
|
||||||
|
latencyThreshold: time.Duration(config.HealthLatencyThresholdMS) * time.Millisecond,
|
||||||
|
errorRateLimit: config.HealthErrorRateLimit,
|
||||||
|
captureMetrics: &DeviceHealthMetrics{
|
||||||
|
CurrentStatus: DeviceHealthUnknown,
|
||||||
|
HealthScore: 1.0,
|
||||||
|
},
|
||||||
|
playbackMetrics: &DeviceHealthMetrics{
|
||||||
|
CurrentStatus: DeviceHealthUnknown,
|
||||||
|
HealthScore: 1.0,
|
||||||
|
},
|
||||||
|
ctx: ctx,
|
||||||
|
cancel: cancel,
|
||||||
|
stopChan: make(chan struct{}),
|
||||||
|
doneChan: make(chan struct{}),
|
||||||
|
recoveryCallbacks: make(map[string]func() error),
|
||||||
|
logger: logging.GetDefaultLogger().With().Str("component", "device-health-monitor").Logger(),
|
||||||
|
config: config,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Start begins health monitoring
|
||||||
|
func (dhm *DeviceHealthMonitor) Start() error {
|
||||||
|
if !atomic.CompareAndSwapInt32(&dhm.running, 0, 1) {
|
||||||
|
return fmt.Errorf("device health monitor already running")
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.logger.Info().Msg("starting device health monitor")
|
||||||
|
atomic.StoreInt32(&dhm.monitoringEnabled, 1)
|
||||||
|
|
||||||
|
go dhm.monitoringLoop()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Stop stops health monitoring
|
||||||
|
func (dhm *DeviceHealthMonitor) Stop() {
|
||||||
|
if !atomic.CompareAndSwapInt32(&dhm.running, 1, 0) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.logger.Info().Msg("stopping device health monitor")
|
||||||
|
atomic.StoreInt32(&dhm.monitoringEnabled, 0)
|
||||||
|
|
||||||
|
close(dhm.stopChan)
|
||||||
|
dhm.cancel()
|
||||||
|
|
||||||
|
// Wait for monitoring loop to finish
|
||||||
|
select {
|
||||||
|
case <-dhm.doneChan:
|
||||||
|
dhm.logger.Info().Msg("device health monitor stopped")
|
||||||
|
case <-time.After(time.Duration(dhm.config.SupervisorTimeout)):
|
||||||
|
dhm.logger.Warn().Msg("device health monitor stop timeout")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RegisterRecoveryCallback registers a recovery function for a specific component
|
||||||
|
func (dhm *DeviceHealthMonitor) RegisterRecoveryCallback(component string, callback func() error) {
|
||||||
|
dhm.callbackMutex.Lock()
|
||||||
|
defer dhm.callbackMutex.Unlock()
|
||||||
|
dhm.recoveryCallbacks[component] = callback
|
||||||
|
dhm.logger.Info().Str("component", component).Msg("registered recovery callback")
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordError records an error for health tracking
|
||||||
|
func (dhm *DeviceHealthMonitor) RecordError(deviceType string, err error) {
|
||||||
|
if atomic.LoadInt32(&dhm.monitoringEnabled) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.mutex.Lock()
|
||||||
|
defer dhm.mutex.Unlock()
|
||||||
|
|
||||||
|
var metrics *DeviceHealthMetrics
|
||||||
|
switch deviceType {
|
||||||
|
case "capture":
|
||||||
|
metrics = dhm.captureMetrics
|
||||||
|
case "playback":
|
||||||
|
metrics = dhm.playbackMetrics
|
||||||
|
default:
|
||||||
|
dhm.logger.Warn().Str("device_type", deviceType).Msg("unknown device type for error recording")
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic.AddInt64(&metrics.ConsecutiveErrors, 1)
|
||||||
|
atomic.AddInt64(&metrics.TotalErrors, 1)
|
||||||
|
metrics.LastErrorTime = time.Now()
|
||||||
|
|
||||||
|
// Update error rate (errors per minute)
|
||||||
|
if !metrics.LastErrorTime.IsZero() {
|
||||||
|
timeSinceFirst := time.Since(metrics.LastErrorTime)
|
||||||
|
if timeSinceFirst > 0 {
|
||||||
|
metrics.ErrorRate = float64(metrics.TotalErrors) / timeSinceFirst.Minutes()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.logger.Debug().
|
||||||
|
Str("device_type", deviceType).
|
||||||
|
Err(err).
|
||||||
|
Int64("consecutive_errors", metrics.ConsecutiveErrors).
|
||||||
|
Float64("error_rate", metrics.ErrorRate).
|
||||||
|
Msg("recorded device error")
|
||||||
|
|
||||||
|
// Trigger immediate health assessment
|
||||||
|
dhm.assessDeviceHealth(deviceType, metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordSuccess records a successful operation
|
||||||
|
func (dhm *DeviceHealthMonitor) RecordSuccess(deviceType string) {
|
||||||
|
if atomic.LoadInt32(&dhm.monitoringEnabled) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.mutex.Lock()
|
||||||
|
defer dhm.mutex.Unlock()
|
||||||
|
|
||||||
|
var metrics *DeviceHealthMetrics
|
||||||
|
switch deviceType {
|
||||||
|
case "capture":
|
||||||
|
metrics = dhm.captureMetrics
|
||||||
|
case "playback":
|
||||||
|
metrics = dhm.playbackMetrics
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset consecutive errors on success
|
||||||
|
atomic.StoreInt64(&metrics.ConsecutiveErrors, 0)
|
||||||
|
metrics.LastSuccessfulOp = time.Now()
|
||||||
|
|
||||||
|
// Improve health score gradually
|
||||||
|
if metrics.HealthScore < 1.0 {
|
||||||
|
metrics.HealthScore = min(1.0, metrics.HealthScore+0.1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordLatency records operation latency for health assessment
|
||||||
|
func (dhm *DeviceHealthMonitor) RecordLatency(deviceType string, latency time.Duration) {
|
||||||
|
if atomic.LoadInt32(&dhm.monitoringEnabled) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.mutex.Lock()
|
||||||
|
defer dhm.mutex.Unlock()
|
||||||
|
|
||||||
|
var metrics *DeviceHealthMetrics
|
||||||
|
switch deviceType {
|
||||||
|
case "capture":
|
||||||
|
metrics = dhm.captureMetrics
|
||||||
|
case "playback":
|
||||||
|
metrics = dhm.playbackMetrics
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update latency metrics
|
||||||
|
if metrics.AverageLatency == 0 {
|
||||||
|
metrics.AverageLatency = latency
|
||||||
|
} else {
|
||||||
|
// Exponential moving average
|
||||||
|
metrics.AverageLatency = time.Duration(float64(metrics.AverageLatency)*0.9 + float64(latency)*0.1)
|
||||||
|
}
|
||||||
|
|
||||||
|
if latency > metrics.MaxLatency {
|
||||||
|
metrics.MaxLatency = latency
|
||||||
|
}
|
||||||
|
|
||||||
|
// Track latency spikes
|
||||||
|
if latency > dhm.latencyThreshold {
|
||||||
|
atomic.AddInt64(&metrics.LatencySpikes, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordUnderrun records an audio underrun event
|
||||||
|
func (dhm *DeviceHealthMonitor) RecordUnderrun(deviceType string) {
|
||||||
|
if atomic.LoadInt32(&dhm.monitoringEnabled) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.mutex.Lock()
|
||||||
|
defer dhm.mutex.Unlock()
|
||||||
|
|
||||||
|
var metrics *DeviceHealthMetrics
|
||||||
|
switch deviceType {
|
||||||
|
case "capture":
|
||||||
|
metrics = dhm.captureMetrics
|
||||||
|
case "playback":
|
||||||
|
metrics = dhm.playbackMetrics
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic.AddInt64(&metrics.Underruns, 1)
|
||||||
|
dhm.logger.Debug().Str("device_type", deviceType).Msg("recorded audio underrun")
|
||||||
|
}
|
||||||
|
|
||||||
|
// RecordOverrun records an audio overrun event
|
||||||
|
func (dhm *DeviceHealthMonitor) RecordOverrun(deviceType string) {
|
||||||
|
if atomic.LoadInt32(&dhm.monitoringEnabled) == 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
dhm.mutex.Lock()
|
||||||
|
defer dhm.mutex.Unlock()
|
||||||
|
|
||||||
|
var metrics *DeviceHealthMetrics
|
||||||
|
switch deviceType {
|
||||||
|
case "capture":
|
||||||
|
metrics = dhm.captureMetrics
|
||||||
|
case "playback":
|
||||||
|
metrics = dhm.playbackMetrics
|
||||||
|
default:
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
atomic.AddInt64(&metrics.Overruns, 1)
|
||||||
|
dhm.logger.Debug().Str("device_type", deviceType).Msg("recorded audio overrun")
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetHealthMetrics returns current health metrics
|
||||||
|
func (dhm *DeviceHealthMonitor) GetHealthMetrics() (capture, playback DeviceHealthMetrics) {
|
||||||
|
dhm.mutex.RLock()
|
||||||
|
defer dhm.mutex.RUnlock()
|
||||||
|
return *dhm.captureMetrics, *dhm.playbackMetrics
|
||||||
|
}
|
||||||
|
|
||||||
|
// monitoringLoop runs the main health monitoring loop
|
||||||
|
func (dhm *DeviceHealthMonitor) monitoringLoop() {
|
||||||
|
defer close(dhm.doneChan)
|
||||||
|
|
||||||
|
ticker := time.NewTicker(dhm.checkInterval)
|
||||||
|
defer ticker.Stop()
|
||||||
|
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-dhm.stopChan:
|
||||||
|
return
|
||||||
|
case <-dhm.ctx.Done():
|
||||||
|
return
|
||||||
|
case <-ticker.C:
|
||||||
|
dhm.performHealthCheck()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// performHealthCheck performs a comprehensive health check
|
||||||
|
func (dhm *DeviceHealthMonitor) performHealthCheck() {
|
||||||
|
dhm.mutex.Lock()
|
||||||
|
defer dhm.mutex.Unlock()
|
||||||
|
|
||||||
|
// Assess health for both devices
|
||||||
|
dhm.assessDeviceHealth("capture", dhm.captureMetrics)
|
||||||
|
dhm.assessDeviceHealth("playback", dhm.playbackMetrics)
|
||||||
|
|
||||||
|
// Check if recovery is needed
|
||||||
|
dhm.checkRecoveryNeeded("capture", dhm.captureMetrics)
|
||||||
|
dhm.checkRecoveryNeeded("playback", dhm.playbackMetrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
// assessDeviceHealth assesses the health status of a device
|
||||||
|
func (dhm *DeviceHealthMonitor) assessDeviceHealth(deviceType string, metrics *DeviceHealthMetrics) {
|
||||||
|
previousStatus := metrics.CurrentStatus
|
||||||
|
newStatus := dhm.calculateHealthStatus(metrics)
|
||||||
|
|
||||||
|
if newStatus != previousStatus {
|
||||||
|
metrics.CurrentStatus = newStatus
|
||||||
|
metrics.StatusLastChanged = time.Now()
|
||||||
|
dhm.logger.Info().
|
||||||
|
Str("device_type", deviceType).
|
||||||
|
Str("previous_status", previousStatus.String()).
|
||||||
|
Str("new_status", newStatus.String()).
|
||||||
|
Float64("health_score", metrics.HealthScore).
|
||||||
|
Msg("device health status changed")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update health score
|
||||||
|
metrics.HealthScore = dhm.calculateHealthScore(metrics)
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateHealthStatus determines health status based on metrics
|
||||||
|
func (dhm *DeviceHealthMonitor) calculateHealthStatus(metrics *DeviceHealthMetrics) DeviceHealthStatus {
|
||||||
|
consecutiveErrors := atomic.LoadInt64(&metrics.ConsecutiveErrors)
|
||||||
|
totalErrors := atomic.LoadInt64(&metrics.TotalErrors)
|
||||||
|
|
||||||
|
// Critical: Too many consecutive errors or device disconnected recently
|
||||||
|
if consecutiveErrors >= int64(dhm.recoveryThreshold) {
|
||||||
|
return DeviceHealthCritical
|
||||||
|
}
|
||||||
|
|
||||||
|
// Critical: No successful operations in a long time
|
||||||
|
if !metrics.LastSuccessfulOp.IsZero() && time.Since(metrics.LastSuccessfulOp) > time.Duration(dhm.config.SupervisorTimeout) {
|
||||||
|
return DeviceHealthCritical
|
||||||
|
}
|
||||||
|
|
||||||
|
// Failing: High error rate or frequent latency spikes
|
||||||
|
if metrics.ErrorRate > dhm.errorRateLimit || atomic.LoadInt64(&metrics.LatencySpikes) > int64(dhm.config.MaxDroppedFrames) {
|
||||||
|
return DeviceHealthFailing
|
||||||
|
}
|
||||||
|
|
||||||
|
// Degraded: Some errors or performance issues
|
||||||
|
if consecutiveErrors > 0 || totalErrors > int64(dhm.config.MaxDroppedFrames/2) || metrics.AverageLatency > dhm.latencyThreshold {
|
||||||
|
return DeviceHealthDegraded
|
||||||
|
}
|
||||||
|
|
||||||
|
// Healthy: No significant issues
|
||||||
|
return DeviceHealthHealthy
|
||||||
|
}
|
||||||
|
|
||||||
|
// calculateHealthScore calculates a numeric health score (0.0 to 1.0)
|
||||||
|
func (dhm *DeviceHealthMonitor) calculateHealthScore(metrics *DeviceHealthMetrics) float64 {
|
||||||
|
score := 1.0
|
||||||
|
|
||||||
|
// Penalize consecutive errors
|
||||||
|
consecutiveErrors := atomic.LoadInt64(&metrics.ConsecutiveErrors)
|
||||||
|
if consecutiveErrors > 0 {
|
||||||
|
score -= float64(consecutiveErrors) * 0.1
|
||||||
|
}
|
||||||
|
|
||||||
|
// Penalize high error rate
|
||||||
|
if metrics.ErrorRate > 0 {
|
||||||
|
score -= min(0.5, metrics.ErrorRate/dhm.errorRateLimit*0.5)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Penalize high latency
|
||||||
|
if metrics.AverageLatency > dhm.latencyThreshold {
|
||||||
|
excess := float64(metrics.AverageLatency-dhm.latencyThreshold) / float64(dhm.latencyThreshold)
|
||||||
|
score -= min(0.3, excess*0.3)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Penalize underruns/overruns
|
||||||
|
underruns := atomic.LoadInt64(&metrics.Underruns)
|
||||||
|
overruns := atomic.LoadInt64(&metrics.Overruns)
|
||||||
|
if underruns+overruns > 0 {
|
||||||
|
score -= min(0.2, float64(underruns+overruns)*0.01)
|
||||||
|
}
|
||||||
|
|
||||||
|
return max(0.0, score)
|
||||||
|
}
|
||||||
|
|
||||||
|
// checkRecoveryNeeded checks if recovery is needed and triggers it
|
||||||
|
func (dhm *DeviceHealthMonitor) checkRecoveryNeeded(deviceType string, metrics *DeviceHealthMetrics) {
|
||||||
|
if metrics.CurrentStatus == DeviceHealthCritical {
|
||||||
|
dhm.triggerRecovery(deviceType, metrics)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// triggerRecovery triggers recovery for a device
|
||||||
|
func (dhm *DeviceHealthMonitor) triggerRecovery(deviceType string, metrics *DeviceHealthMetrics) {
|
||||||
|
atomic.AddInt64(&metrics.RecoveryAttempts, 1)
|
||||||
|
|
||||||
|
dhm.logger.Warn().
|
||||||
|
Str("device_type", deviceType).
|
||||||
|
Str("status", metrics.CurrentStatus.String()).
|
||||||
|
Int64("consecutive_errors", atomic.LoadInt64(&metrics.ConsecutiveErrors)).
|
||||||
|
Float64("error_rate", metrics.ErrorRate).
|
||||||
|
Msg("triggering device recovery")
|
||||||
|
|
||||||
|
// Try registered recovery callbacks
|
||||||
|
dhm.callbackMutex.RLock()
|
||||||
|
defer dhm.callbackMutex.RUnlock()
|
||||||
|
|
||||||
|
for component, callback := range dhm.recoveryCallbacks {
|
||||||
|
if callback != nil {
|
||||||
|
go func(comp string, cb func() error) {
|
||||||
|
if err := cb(); err != nil {
|
||||||
|
dhm.logger.Error().
|
||||||
|
Str("component", comp).
|
||||||
|
Str("device_type", deviceType).
|
||||||
|
Err(err).
|
||||||
|
Msg("recovery callback failed")
|
||||||
|
} else {
|
||||||
|
atomic.AddInt64(&metrics.SuccessfulRecoveries, 1)
|
||||||
|
dhm.logger.Info().
|
||||||
|
Str("component", comp).
|
||||||
|
Str("device_type", deviceType).
|
||||||
|
Msg("recovery callback succeeded")
|
||||||
|
}
|
||||||
|
}(component, callback)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Global device health monitor instance
|
||||||
|
var (
|
||||||
|
globalDeviceHealthMonitor *DeviceHealthMonitor
|
||||||
|
deviceHealthOnce sync.Once
|
||||||
|
)
|
||||||
|
|
||||||
|
// GetDeviceHealthMonitor returns the global device health monitor
|
||||||
|
func GetDeviceHealthMonitor() *DeviceHealthMonitor {
|
||||||
|
deviceHealthOnce.Do(func() {
|
||||||
|
globalDeviceHealthMonitor = NewDeviceHealthMonitor()
|
||||||
|
})
|
||||||
|
return globalDeviceHealthMonitor
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper functions for min/max
|
||||||
|
func min(a, b float64) float64 {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func max(a, b float64) float64 {
|
||||||
|
if a > b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
|
@ -93,18 +93,18 @@ type BufferPoolEfficiencyTracker struct {
|
||||||
|
|
||||||
// NewLatencyHistogram creates a new latency histogram with predefined buckets
|
// NewLatencyHistogram creates a new latency histogram with predefined buckets
|
||||||
func NewLatencyHistogram(maxSamples int, logger zerolog.Logger) *LatencyHistogram {
|
func NewLatencyHistogram(maxSamples int, logger zerolog.Logger) *LatencyHistogram {
|
||||||
// Define latency buckets: 1ms, 5ms, 10ms, 25ms, 50ms, 100ms, 250ms, 500ms, 1s, 2s+
|
// Define latency buckets using configuration constants
|
||||||
buckets := []int64{
|
buckets := []int64{
|
||||||
int64(1 * time.Millisecond),
|
int64(1 * time.Millisecond),
|
||||||
int64(5 * time.Millisecond),
|
int64(5 * time.Millisecond),
|
||||||
int64(10 * time.Millisecond),
|
int64(GetConfig().LatencyBucket10ms),
|
||||||
int64(25 * time.Millisecond),
|
int64(GetConfig().LatencyBucket25ms),
|
||||||
int64(50 * time.Millisecond),
|
int64(GetConfig().LatencyBucket50ms),
|
||||||
int64(100 * time.Millisecond),
|
int64(GetConfig().LatencyBucket100ms),
|
||||||
int64(250 * time.Millisecond),
|
int64(GetConfig().LatencyBucket250ms),
|
||||||
int64(500 * time.Millisecond),
|
int64(GetConfig().LatencyBucket500ms),
|
||||||
int64(1 * time.Second),
|
int64(GetConfig().LatencyBucket1s),
|
||||||
int64(2 * time.Second),
|
int64(GetConfig().LatencyBucket2s),
|
||||||
}
|
}
|
||||||
|
|
||||||
return &LatencyHistogram{
|
return &LatencyHistogram{
|
||||||
|
|
|
@ -10,10 +10,10 @@ import (
|
||||||
|
|
||||||
// AudioInputMetrics holds metrics for microphone input
|
// AudioInputMetrics holds metrics for microphone input
|
||||||
type AudioInputMetrics struct {
|
type AudioInputMetrics struct {
|
||||||
FramesSent int64
|
FramesSent int64 // Total frames sent
|
||||||
FramesDropped int64
|
FramesDropped int64 // Total frames dropped
|
||||||
BytesProcessed int64
|
BytesProcessed int64 // Total bytes processed
|
||||||
ConnectionDrops int64
|
ConnectionDrops int64 // Connection drops
|
||||||
AverageLatency time.Duration // time.Duration is int64
|
AverageLatency time.Duration // time.Duration is int64
|
||||||
LastFrameTime time.Time
|
LastFrameTime time.Time
|
||||||
}
|
}
|
||||||
|
|
|
@ -13,6 +13,7 @@ import (
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"github.com/jetkvm/kvm/internal/logging"
|
"github.com/jetkvm/kvm/internal/logging"
|
||||||
|
"github.com/rs/zerolog"
|
||||||
)
|
)
|
||||||
|
|
||||||
var (
|
var (
|
||||||
|
@ -99,16 +100,15 @@ var globalMessagePool = &MessagePool{
|
||||||
|
|
||||||
var messagePoolInitOnce sync.Once
|
var messagePoolInitOnce sync.Once
|
||||||
|
|
||||||
// initializeMessagePool initializes the message pool with pre-allocated messages
|
// initializeMessagePool initializes the global message pool with pre-allocated messages
|
||||||
func initializeMessagePool() {
|
func initializeMessagePool() {
|
||||||
messagePoolInitOnce.Do(func() {
|
messagePoolInitOnce.Do(func() {
|
||||||
// Pre-allocate 30% of pool size for immediate availability
|
preallocSize := messagePoolSize / 4 // 25% pre-allocated for immediate use
|
||||||
preallocSize := messagePoolSize * GetConfig().InputPreallocPercentage / 100
|
|
||||||
globalMessagePool.preallocSize = preallocSize
|
globalMessagePool.preallocSize = preallocSize
|
||||||
globalMessagePool.maxPoolSize = messagePoolSize * GetConfig().PoolGrowthMultiplier // Allow growth up to 2x
|
globalMessagePool.maxPoolSize = messagePoolSize * GetConfig().PoolGrowthMultiplier // Allow growth up to 2x
|
||||||
globalMessagePool.preallocated = make([]*OptimizedIPCMessage, 0, preallocSize)
|
globalMessagePool.preallocated = make([]*OptimizedIPCMessage, 0, preallocSize)
|
||||||
|
|
||||||
// Pre-allocate messages to reduce initial allocation overhead
|
// Pre-allocate messages for immediate use
|
||||||
for i := 0; i < preallocSize; i++ {
|
for i := 0; i < preallocSize; i++ {
|
||||||
msg := &OptimizedIPCMessage{
|
msg := &OptimizedIPCMessage{
|
||||||
data: make([]byte, 0, maxFrameSize),
|
data: make([]byte, 0, maxFrameSize),
|
||||||
|
@ -116,7 +116,7 @@ func initializeMessagePool() {
|
||||||
globalMessagePool.preallocated = append(globalMessagePool.preallocated, msg)
|
globalMessagePool.preallocated = append(globalMessagePool.preallocated, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fill the channel pool with remaining messages
|
// Fill the channel with remaining messages
|
||||||
for i := preallocSize; i < messagePoolSize; i++ {
|
for i := preallocSize; i < messagePoolSize; i++ {
|
||||||
globalMessagePool.pool <- &OptimizedIPCMessage{
|
globalMessagePool.pool <- &OptimizedIPCMessage{
|
||||||
data: make([]byte, 0, maxFrameSize),
|
data: make([]byte, 0, maxFrameSize),
|
||||||
|
@ -488,33 +488,13 @@ func (ais *AudioInputServer) sendAck() error {
|
||||||
return ais.writeMessage(ais.conn, msg)
|
return ais.writeMessage(ais.conn, msg)
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeMessage writes a message to the connection using optimized buffers
|
// Global shared message pool for input IPC server
|
||||||
|
var globalInputServerMessagePool = NewGenericMessagePool(messagePoolSize)
|
||||||
|
|
||||||
|
// writeMessage writes a message to the connection using shared common utilities
|
||||||
func (ais *AudioInputServer) writeMessage(conn net.Conn, msg *InputIPCMessage) error {
|
func (ais *AudioInputServer) writeMessage(conn net.Conn, msg *InputIPCMessage) error {
|
||||||
// Get optimized message from pool for header preparation
|
// Use shared WriteIPCMessage function with global message pool
|
||||||
optMsg := globalMessagePool.Get()
|
return WriteIPCMessage(conn, msg, globalInputServerMessagePool, &ais.droppedFrames)
|
||||||
defer globalMessagePool.Put(optMsg)
|
|
||||||
|
|
||||||
// Prepare header in pre-allocated buffer
|
|
||||||
binary.LittleEndian.PutUint32(optMsg.header[0:4], msg.Magic)
|
|
||||||
optMsg.header[4] = byte(msg.Type)
|
|
||||||
binary.LittleEndian.PutUint32(optMsg.header[5:9], msg.Length)
|
|
||||||
binary.LittleEndian.PutUint64(optMsg.header[9:17], uint64(msg.Timestamp))
|
|
||||||
|
|
||||||
// Write header
|
|
||||||
_, err := conn.Write(optMsg.header[:])
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Write data if present
|
|
||||||
if msg.Length > 0 && msg.Data != nil {
|
|
||||||
_, err = conn.Write(msg.Data)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// AudioInputClient handles IPC communication from the main process
|
// AudioInputClient handles IPC communication from the main process
|
||||||
|
@ -706,21 +686,15 @@ func (aic *AudioInputClient) SendHeartbeat() error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// writeMessage writes a message to the server
|
// writeMessage writes a message to the server
|
||||||
|
// Global shared message pool for input IPC clients
|
||||||
|
var globalInputMessagePool = NewGenericMessagePool(messagePoolSize)
|
||||||
|
|
||||||
func (aic *AudioInputClient) writeMessage(msg *InputIPCMessage) error {
|
func (aic *AudioInputClient) writeMessage(msg *InputIPCMessage) error {
|
||||||
// Increment total frames counter
|
// Increment total frames counter
|
||||||
atomic.AddInt64(&aic.totalFrames, 1)
|
atomic.AddInt64(&aic.totalFrames, 1)
|
||||||
|
|
||||||
// Use common write function with shared message pool
|
// Use shared WriteIPCMessage function with global message pool
|
||||||
sharedPool := &GenericMessagePool{
|
return WriteIPCMessage(aic.conn, msg, globalInputMessagePool, &aic.droppedFrames)
|
||||||
pool: make(chan *OptimizedMessage, messagePoolSize),
|
|
||||||
hitCount: globalMessagePool.hitCount,
|
|
||||||
missCount: globalMessagePool.missCount,
|
|
||||||
preallocated: make([]*OptimizedMessage, 0),
|
|
||||||
preallocSize: messagePoolSize / 4,
|
|
||||||
maxPoolSize: messagePoolSize,
|
|
||||||
}
|
|
||||||
|
|
||||||
return WriteIPCMessage(aic.conn, msg, sharedPool, &aic.droppedFrames)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// IsConnected returns whether the client is connected
|
// IsConnected returns whether the client is connected
|
||||||
|
@ -752,6 +726,17 @@ func (ais *AudioInputServer) startReaderGoroutine() {
|
||||||
ais.wg.Add(1)
|
ais.wg.Add(1)
|
||||||
go func() {
|
go func() {
|
||||||
defer ais.wg.Done()
|
defer ais.wg.Done()
|
||||||
|
|
||||||
|
// Enhanced error tracking and recovery
|
||||||
|
var consecutiveErrors int
|
||||||
|
var lastErrorTime time.Time
|
||||||
|
maxConsecutiveErrors := GetConfig().MaxConsecutiveErrors
|
||||||
|
errorResetWindow := GetConfig().RestartWindow // Use existing restart window
|
||||||
|
baseBackoffDelay := GetConfig().RetryDelay
|
||||||
|
maxBackoffDelay := GetConfig().MaxRetryDelay
|
||||||
|
|
||||||
|
logger := logging.GetDefaultLogger().With().Str("component", "audio-input-reader").Logger()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ais.stopChan:
|
case <-ais.stopChan:
|
||||||
|
@ -760,8 +745,55 @@ func (ais *AudioInputServer) startReaderGoroutine() {
|
||||||
if ais.conn != nil {
|
if ais.conn != nil {
|
||||||
msg, err := ais.readMessage(ais.conn)
|
msg, err := ais.readMessage(ais.conn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
continue // Connection error, retry
|
// Enhanced error handling with progressive backoff
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
// Reset error counter if enough time has passed
|
||||||
|
if now.Sub(lastErrorTime) > errorResetWindow {
|
||||||
|
consecutiveErrors = 0
|
||||||
}
|
}
|
||||||
|
|
||||||
|
consecutiveErrors++
|
||||||
|
lastErrorTime = now
|
||||||
|
|
||||||
|
// Log error with context
|
||||||
|
logger.Warn().Err(err).
|
||||||
|
Int("consecutive_errors", consecutiveErrors).
|
||||||
|
Msg("Failed to read message from input connection")
|
||||||
|
|
||||||
|
// Progressive backoff based on error count
|
||||||
|
if consecutiveErrors > 1 {
|
||||||
|
backoffDelay := time.Duration(consecutiveErrors-1) * baseBackoffDelay
|
||||||
|
if backoffDelay > maxBackoffDelay {
|
||||||
|
backoffDelay = maxBackoffDelay
|
||||||
|
}
|
||||||
|
time.Sleep(backoffDelay)
|
||||||
|
}
|
||||||
|
|
||||||
|
// If too many consecutive errors, close connection to force reconnect
|
||||||
|
if consecutiveErrors >= maxConsecutiveErrors {
|
||||||
|
logger.Error().
|
||||||
|
Int("consecutive_errors", consecutiveErrors).
|
||||||
|
Msg("Too many consecutive read errors, closing connection")
|
||||||
|
|
||||||
|
ais.mtx.Lock()
|
||||||
|
if ais.conn != nil {
|
||||||
|
ais.conn.Close()
|
||||||
|
ais.conn = nil
|
||||||
|
}
|
||||||
|
ais.mtx.Unlock()
|
||||||
|
|
||||||
|
consecutiveErrors = 0 // Reset for next connection
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset error counter on successful read
|
||||||
|
if consecutiveErrors > 0 {
|
||||||
|
consecutiveErrors = 0
|
||||||
|
logger.Info().Msg("Input connection recovered")
|
||||||
|
}
|
||||||
|
|
||||||
// Send to message channel with non-blocking write
|
// Send to message channel with non-blocking write
|
||||||
select {
|
select {
|
||||||
case ais.messageChan <- msg:
|
case ais.messageChan <- msg:
|
||||||
|
@ -769,7 +801,11 @@ func (ais *AudioInputServer) startReaderGoroutine() {
|
||||||
default:
|
default:
|
||||||
// Channel full, drop message
|
// Channel full, drop message
|
||||||
atomic.AddInt64(&ais.droppedFrames, 1)
|
atomic.AddInt64(&ais.droppedFrames, 1)
|
||||||
|
logger.Warn().Msg("Message channel full, dropping frame")
|
||||||
}
|
}
|
||||||
|
} else {
|
||||||
|
// No connection, wait briefly before checking again
|
||||||
|
time.Sleep(GetConfig().DefaultSleepDuration)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -794,12 +830,73 @@ func (ais *AudioInputServer) startProcessorGoroutine() {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
|
// Enhanced error tracking for processing
|
||||||
|
var processingErrors int
|
||||||
|
var lastProcessingError time.Time
|
||||||
|
maxProcessingErrors := GetConfig().MaxConsecutiveErrors
|
||||||
|
errorResetWindow := GetConfig().RestartWindow
|
||||||
|
|
||||||
defer ais.wg.Done()
|
defer ais.wg.Done()
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ais.stopChan:
|
case <-ais.stopChan:
|
||||||
return
|
return
|
||||||
case msg := <-ais.messageChan:
|
case msg := <-ais.messageChan:
|
||||||
|
// Process message with error handling
|
||||||
|
start := time.Now()
|
||||||
|
err := ais.processMessageWithRecovery(msg, logger)
|
||||||
|
processingTime := time.Since(start)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
// Track processing errors
|
||||||
|
now := time.Now()
|
||||||
|
if now.Sub(lastProcessingError) > errorResetWindow {
|
||||||
|
processingErrors = 0
|
||||||
|
}
|
||||||
|
|
||||||
|
processingErrors++
|
||||||
|
lastProcessingError = now
|
||||||
|
|
||||||
|
logger.Warn().Err(err).
|
||||||
|
Int("processing_errors", processingErrors).
|
||||||
|
Dur("processing_time", processingTime).
|
||||||
|
Msg("Failed to process input message")
|
||||||
|
|
||||||
|
// If too many processing errors, drop frames more aggressively
|
||||||
|
if processingErrors >= maxProcessingErrors {
|
||||||
|
logger.Error().
|
||||||
|
Int("processing_errors", processingErrors).
|
||||||
|
Msg("Too many processing errors, entering aggressive drop mode")
|
||||||
|
|
||||||
|
// Clear processing queue to recover
|
||||||
|
for len(ais.processChan) > 0 {
|
||||||
|
select {
|
||||||
|
case <-ais.processChan:
|
||||||
|
atomic.AddInt64(&ais.droppedFrames, 1)
|
||||||
|
default:
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
processingErrors = 0 // Reset after clearing queue
|
||||||
|
}
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Reset error counter on successful processing
|
||||||
|
if processingErrors > 0 {
|
||||||
|
processingErrors = 0
|
||||||
|
logger.Info().Msg("Input processing recovered")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update processing time metrics
|
||||||
|
atomic.StoreInt64(&ais.processingTime, processingTime.Nanoseconds())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
|
// processMessageWithRecovery processes a message with enhanced error recovery
|
||||||
|
func (ais *AudioInputServer) processMessageWithRecovery(msg *InputIPCMessage, logger zerolog.Logger) error {
|
||||||
// Intelligent frame dropping: prioritize recent frames
|
// Intelligent frame dropping: prioritize recent frames
|
||||||
if msg.Type == InputMessageTypeOpusFrame {
|
if msg.Type == InputMessageTypeOpusFrame {
|
||||||
// Check if processing queue is getting full
|
// Check if processing queue is getting full
|
||||||
|
@ -811,21 +908,25 @@ func (ais *AudioInputServer) startProcessorGoroutine() {
|
||||||
select {
|
select {
|
||||||
case <-ais.processChan: // Remove oldest
|
case <-ais.processChan: // Remove oldest
|
||||||
atomic.AddInt64(&ais.droppedFrames, 1)
|
atomic.AddInt64(&ais.droppedFrames, 1)
|
||||||
|
logger.Debug().Msg("Dropped oldest frame to make room")
|
||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Send to processing queue
|
// Send to processing queue with timeout
|
||||||
select {
|
select {
|
||||||
case ais.processChan <- msg:
|
case ais.processChan <- msg:
|
||||||
default:
|
return nil
|
||||||
// Processing queue full, drop frame
|
case <-time.After(GetConfig().WriteTimeout):
|
||||||
|
// Processing queue full and timeout reached, drop frame
|
||||||
atomic.AddInt64(&ais.droppedFrames, 1)
|
atomic.AddInt64(&ais.droppedFrames, 1)
|
||||||
|
return fmt.Errorf("processing queue timeout")
|
||||||
|
default:
|
||||||
|
// Processing queue full, drop frame immediately
|
||||||
|
atomic.AddInt64(&ais.droppedFrames, 1)
|
||||||
|
return fmt.Errorf("processing queue full")
|
||||||
}
|
}
|
||||||
}
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// startMonitorGoroutine starts the performance monitoring goroutine
|
// startMonitorGoroutine starts the performance monitoring goroutine
|
||||||
|
|
|
@ -1,7 +1,6 @@
|
||||||
package audio
|
package audio
|
||||||
|
|
||||||
import (
|
import (
|
||||||
"context"
|
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
@ -65,59 +64,8 @@ func (msg *OutputIPCMessage) GetData() []byte {
|
||||||
return msg.Data
|
return msg.Data
|
||||||
}
|
}
|
||||||
|
|
||||||
// OutputOptimizedMessage represents a pre-allocated message for zero-allocation operations
|
// Global shared message pool for output IPC client header reading
|
||||||
type OutputOptimizedMessage struct {
|
var globalOutputClientMessagePool = NewGenericMessagePool(GetConfig().OutputMessagePoolSize)
|
||||||
header [17]byte // Pre-allocated header buffer (using constant value since array size must be compile-time constant)
|
|
||||||
data []byte // Reusable data buffer
|
|
||||||
}
|
|
||||||
|
|
||||||
// OutputMessagePool manages pre-allocated messages for zero-allocation IPC
|
|
||||||
type OutputMessagePool struct {
|
|
||||||
pool chan *OutputOptimizedMessage
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewOutputMessagePool creates a new message pool
|
|
||||||
func NewOutputMessagePool(size int) *OutputMessagePool {
|
|
||||||
pool := &OutputMessagePool{
|
|
||||||
pool: make(chan *OutputOptimizedMessage, size),
|
|
||||||
}
|
|
||||||
|
|
||||||
// Pre-allocate messages
|
|
||||||
for i := 0; i < size; i++ {
|
|
||||||
msg := &OutputOptimizedMessage{
|
|
||||||
data: make([]byte, GetConfig().OutputMaxFrameSize),
|
|
||||||
}
|
|
||||||
pool.pool <- msg
|
|
||||||
}
|
|
||||||
|
|
||||||
return pool
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get retrieves a message from the pool
|
|
||||||
func (p *OutputMessagePool) Get() *OutputOptimizedMessage {
|
|
||||||
select {
|
|
||||||
case msg := <-p.pool:
|
|
||||||
return msg
|
|
||||||
default:
|
|
||||||
// Pool exhausted, create new message
|
|
||||||
return &OutputOptimizedMessage{
|
|
||||||
data: make([]byte, GetConfig().OutputMaxFrameSize),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Put returns a message to the pool
|
|
||||||
func (p *OutputMessagePool) Put(msg *OutputOptimizedMessage) {
|
|
||||||
select {
|
|
||||||
case p.pool <- msg:
|
|
||||||
// Successfully returned to pool
|
|
||||||
default:
|
|
||||||
// Pool full, let GC handle it
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Global message pool for output IPC
|
|
||||||
var globalOutputMessagePool = NewOutputMessagePool(GetConfig().OutputMessagePoolSize)
|
|
||||||
|
|
||||||
type AudioOutputServer struct {
|
type AudioOutputServer struct {
|
||||||
// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
|
// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
|
||||||
|
@ -341,6 +289,9 @@ func (s *AudioOutputServer) SendFrame(frame []byte) error {
|
||||||
}
|
}
|
||||||
|
|
||||||
// sendFrameToClient sends frame data directly to the connected client
|
// sendFrameToClient sends frame data directly to the connected client
|
||||||
|
// Global shared message pool for output IPC server
|
||||||
|
var globalOutputServerMessagePool = NewGenericMessagePool(GetConfig().OutputMessagePoolSize)
|
||||||
|
|
||||||
func (s *AudioOutputServer) sendFrameToClient(frame []byte) error {
|
func (s *AudioOutputServer) sendFrameToClient(frame []byte) error {
|
||||||
s.mtx.Lock()
|
s.mtx.Lock()
|
||||||
defer s.mtx.Unlock()
|
defer s.mtx.Unlock()
|
||||||
|
@ -351,59 +302,28 @@ func (s *AudioOutputServer) sendFrameToClient(frame []byte) error {
|
||||||
|
|
||||||
start := time.Now()
|
start := time.Now()
|
||||||
|
|
||||||
// Get optimized message from pool
|
// Create output IPC message
|
||||||
optMsg := globalOutputMessagePool.Get()
|
msg := &OutputIPCMessage{
|
||||||
defer globalOutputMessagePool.Put(optMsg)
|
Magic: outputMagicNumber,
|
||||||
|
Type: OutputMessageTypeOpusFrame,
|
||||||
// Prepare header in pre-allocated buffer
|
Length: uint32(len(frame)),
|
||||||
binary.LittleEndian.PutUint32(optMsg.header[0:4], outputMagicNumber)
|
Timestamp: start.UnixNano(),
|
||||||
optMsg.header[4] = byte(OutputMessageTypeOpusFrame)
|
Data: frame,
|
||||||
binary.LittleEndian.PutUint32(optMsg.header[5:9], uint32(len(frame)))
|
|
||||||
binary.LittleEndian.PutUint64(optMsg.header[9:17], uint64(start.UnixNano()))
|
|
||||||
|
|
||||||
// Use non-blocking write with timeout
|
|
||||||
ctx, cancel := context.WithTimeout(context.Background(), GetConfig().OutputWriteTimeout)
|
|
||||||
defer cancel()
|
|
||||||
|
|
||||||
// Create a channel to signal write completion
|
|
||||||
done := make(chan error, 1)
|
|
||||||
go func() {
|
|
||||||
// Write header using pre-allocated buffer
|
|
||||||
_, err := s.conn.Write(optMsg.header[:])
|
|
||||||
if err != nil {
|
|
||||||
done <- err
|
|
||||||
return
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Write frame data
|
// Use shared WriteIPCMessage function
|
||||||
if len(frame) > 0 {
|
err := WriteIPCMessage(s.conn, msg, globalOutputServerMessagePool, &s.droppedFrames)
|
||||||
_, err = s.conn.Write(frame)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
done <- err
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
done <- nil
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Wait for completion or timeout
|
|
||||||
select {
|
|
||||||
case err := <-done:
|
|
||||||
if err != nil {
|
|
||||||
atomic.AddInt64(&s.droppedFrames, 1)
|
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
// Record latency for monitoring
|
// Record latency for monitoring
|
||||||
if s.latencyMonitor != nil {
|
if s.latencyMonitor != nil {
|
||||||
writeLatency := time.Since(start)
|
writeLatency := time.Since(start)
|
||||||
s.latencyMonitor.RecordLatency(writeLatency, "ipc_write")
|
s.latencyMonitor.RecordLatency(writeLatency, "ipc_write")
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
case <-ctx.Done():
|
|
||||||
// Timeout occurred - drop frame to prevent blocking
|
|
||||||
atomic.AddInt64(&s.droppedFrames, 1)
|
|
||||||
return fmt.Errorf("write timeout after %v - frame dropped to prevent blocking", GetConfig().OutputWriteTimeout)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetServerStats returns server performance statistics
|
// GetServerStats returns server performance statistics
|
||||||
|
@ -495,8 +415,8 @@ func (c *AudioOutputClient) ReceiveFrame() ([]byte, error) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get optimized message from pool for header reading
|
// Get optimized message from pool for header reading
|
||||||
optMsg := globalOutputMessagePool.Get()
|
optMsg := globalOutputClientMessagePool.Get()
|
||||||
defer globalOutputMessagePool.Put(optMsg)
|
defer globalOutputClientMessagePool.Put(optMsg)
|
||||||
|
|
||||||
// Read header
|
// Read header
|
||||||
if _, err := io.ReadFull(c.conn, optMsg.header[:]); err != nil {
|
if _, err := io.ReadFull(c.conn, optMsg.header[:]); err != nil {
|
||||||
|
|
|
@ -321,17 +321,61 @@ func StartAudioOutputStreaming(send func([]byte)) error {
|
||||||
getOutputStreamingLogger().Info().Str("socket_path", getOutputSocketPath()).Msg("Audio output streaming started, connected to output server")
|
getOutputStreamingLogger().Info().Str("socket_path", getOutputSocketPath()).Msg("Audio output streaming started, connected to output server")
|
||||||
buffer := make([]byte, GetMaxAudioFrameSize())
|
buffer := make([]byte, GetMaxAudioFrameSize())
|
||||||
|
|
||||||
|
consecutiveErrors := 0
|
||||||
|
maxConsecutiveErrors := GetConfig().MaxConsecutiveErrors
|
||||||
|
errorBackoffDelay := GetConfig().RetryDelay
|
||||||
|
maxErrorBackoff := GetConfig().MaxRetryDelay
|
||||||
|
|
||||||
for {
|
for {
|
||||||
select {
|
select {
|
||||||
case <-ctx.Done():
|
case <-ctx.Done():
|
||||||
return
|
return
|
||||||
default:
|
default:
|
||||||
// Capture audio frame
|
// Capture audio frame with enhanced error handling
|
||||||
n, err := CGOAudioReadEncode(buffer)
|
n, err := CGOAudioReadEncode(buffer)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
getOutputStreamingLogger().Warn().Err(err).Msg("Failed to read/encode audio")
|
consecutiveErrors++
|
||||||
|
getOutputStreamingLogger().Warn().
|
||||||
|
Err(err).
|
||||||
|
Int("consecutive_errors", consecutiveErrors).
|
||||||
|
Msg("Failed to read/encode audio")
|
||||||
|
|
||||||
|
// Implement progressive backoff for consecutive errors
|
||||||
|
if consecutiveErrors >= maxConsecutiveErrors {
|
||||||
|
getOutputStreamingLogger().Error().
|
||||||
|
Int("consecutive_errors", consecutiveErrors).
|
||||||
|
Msg("Too many consecutive audio errors, attempting recovery")
|
||||||
|
|
||||||
|
// Try to reinitialize audio system
|
||||||
|
CGOAudioClose()
|
||||||
|
time.Sleep(errorBackoffDelay)
|
||||||
|
if initErr := CGOAudioInit(); initErr != nil {
|
||||||
|
getOutputStreamingLogger().Error().
|
||||||
|
Err(initErr).
|
||||||
|
Msg("Failed to reinitialize audio system")
|
||||||
|
// Exponential backoff for reinitialization failures
|
||||||
|
errorBackoffDelay = time.Duration(float64(errorBackoffDelay) * GetConfig().BackoffMultiplier)
|
||||||
|
if errorBackoffDelay > maxErrorBackoff {
|
||||||
|
errorBackoffDelay = maxErrorBackoff
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
getOutputStreamingLogger().Info().Msg("Audio system reinitialized successfully")
|
||||||
|
consecutiveErrors = 0
|
||||||
|
errorBackoffDelay = GetConfig().RetryDelay // Reset backoff
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Brief delay for transient errors
|
||||||
|
time.Sleep(GetConfig().ShortSleepDuration)
|
||||||
|
}
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Success - reset error counters
|
||||||
|
if consecutiveErrors > 0 {
|
||||||
|
consecutiveErrors = 0
|
||||||
|
errorBackoffDelay = GetConfig().RetryDelay
|
||||||
|
}
|
||||||
|
|
||||||
if n > 0 {
|
if n > 0 {
|
||||||
// Get frame buffer from pool to reduce allocations
|
// Get frame buffer from pool to reduce allocations
|
||||||
frame := GetAudioFrameBuffer()
|
frame := GetAudioFrameBuffer()
|
||||||
|
|
|
@ -0,0 +1,281 @@
|
||||||
|
package audio
|
||||||
|
|
||||||
|
import (
|
||||||
|
"errors"
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Enhanced validation errors with more specific context
|
||||||
|
var (
|
||||||
|
ErrInvalidFrameLength = errors.New("invalid frame length")
|
||||||
|
ErrFrameDataCorrupted = errors.New("frame data appears corrupted")
|
||||||
|
ErrBufferAlignment = errors.New("buffer alignment invalid")
|
||||||
|
ErrInvalidSampleFormat = errors.New("invalid sample format")
|
||||||
|
ErrInvalidTimestamp = errors.New("invalid timestamp")
|
||||||
|
ErrConfigurationMismatch = errors.New("configuration mismatch")
|
||||||
|
ErrResourceExhaustion = errors.New("resource exhaustion detected")
|
||||||
|
ErrInvalidPointer = errors.New("invalid pointer")
|
||||||
|
ErrBufferOverflow = errors.New("buffer overflow detected")
|
||||||
|
ErrInvalidState = errors.New("invalid state")
|
||||||
|
)
|
||||||
|
|
||||||
|
// ValidationLevel defines the level of validation to perform
|
||||||
|
type ValidationLevel int
|
||||||
|
|
||||||
|
const (
|
||||||
|
ValidationMinimal ValidationLevel = iota // Only critical safety checks
|
||||||
|
ValidationStandard // Standard validation for production
|
||||||
|
ValidationStrict // Comprehensive validation for debugging
|
||||||
|
)
|
||||||
|
|
||||||
|
// ValidationConfig controls validation behavior
|
||||||
|
type ValidationConfig struct {
|
||||||
|
Level ValidationLevel
|
||||||
|
EnableRangeChecks bool
|
||||||
|
EnableAlignmentCheck bool
|
||||||
|
EnableDataIntegrity bool
|
||||||
|
MaxValidationTime time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetValidationConfig returns the current validation configuration
|
||||||
|
func GetValidationConfig() ValidationConfig {
|
||||||
|
config := GetConfig()
|
||||||
|
return ValidationConfig{
|
||||||
|
Level: ValidationStandard,
|
||||||
|
EnableRangeChecks: true,
|
||||||
|
EnableAlignmentCheck: true,
|
||||||
|
EnableDataIntegrity: false, // Disabled by default for performance
|
||||||
|
MaxValidationTime: time.Duration(config.ValidationTimeoutMS) * time.Millisecond,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateAudioFrameFast performs minimal validation for performance-critical paths
|
||||||
|
func ValidateAudioFrameFast(data []byte) error {
|
||||||
|
if len(data) == 0 {
|
||||||
|
return ErrInvalidFrameData
|
||||||
|
}
|
||||||
|
|
||||||
|
// Quick bounds check using config constants
|
||||||
|
maxSize := GetConfig().MaxAudioFrameSize
|
||||||
|
if len(data) > maxSize {
|
||||||
|
return fmt.Errorf("%w: frame size %d exceeds maximum %d", ErrInvalidFrameSize, len(data), maxSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateAudioFrameComprehensive performs thorough validation
|
||||||
|
func ValidateAudioFrameComprehensive(data []byte, expectedSampleRate int, expectedChannels int) error {
|
||||||
|
validationConfig := GetValidationConfig()
|
||||||
|
start := time.Now()
|
||||||
|
|
||||||
|
// Timeout protection for validation
|
||||||
|
defer func() {
|
||||||
|
if time.Since(start) > validationConfig.MaxValidationTime {
|
||||||
|
// Log validation timeout but don't fail
|
||||||
|
getValidationLogger().Warn().Dur("duration", time.Since(start)).Msg("validation timeout exceeded")
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
|
||||||
|
// Basic validation first
|
||||||
|
if err := ValidateAudioFrameFast(data); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Range validation
|
||||||
|
if validationConfig.EnableRangeChecks {
|
||||||
|
config := GetConfig()
|
||||||
|
if len(data) < config.MinAudioFrameSize {
|
||||||
|
return fmt.Errorf("%w: frame size %d below minimum %d", ErrInvalidFrameSize, len(data), config.MinAudioFrameSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate frame length matches expected sample format
|
||||||
|
expectedFrameSize := (expectedSampleRate * expectedChannels * 2) / 1000 * int(config.AudioQualityMediumFrameSize/time.Millisecond)
|
||||||
|
if abs(len(data)-expectedFrameSize) > config.FrameSizeTolerance {
|
||||||
|
return fmt.Errorf("%w: frame size %d doesn't match expected %d (±%d)", ErrInvalidFrameLength, len(data), expectedFrameSize, config.FrameSizeTolerance)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Alignment validation for ARM32 compatibility
|
||||||
|
if validationConfig.EnableAlignmentCheck {
|
||||||
|
if uintptr(unsafe.Pointer(&data[0]))%4 != 0 {
|
||||||
|
return fmt.Errorf("%w: buffer not 4-byte aligned for ARM32", ErrBufferAlignment)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Data integrity checks (expensive, only for debugging)
|
||||||
|
if validationConfig.EnableDataIntegrity && validationConfig.Level == ValidationStrict {
|
||||||
|
if err := validateAudioDataIntegrity(data, expectedChannels); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateZeroCopyFrameEnhanced performs enhanced zero-copy frame validation
|
||||||
|
func ValidateZeroCopyFrameEnhanced(frame *ZeroCopyAudioFrame) error {
|
||||||
|
if frame == nil {
|
||||||
|
return fmt.Errorf("%w: frame is nil", ErrInvalidPointer)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check reference count validity
|
||||||
|
frame.mutex.RLock()
|
||||||
|
refCount := frame.refCount
|
||||||
|
length := frame.length
|
||||||
|
capacity := frame.capacity
|
||||||
|
frame.mutex.RUnlock()
|
||||||
|
|
||||||
|
if refCount <= 0 {
|
||||||
|
return fmt.Errorf("%w: invalid reference count %d", ErrInvalidState, refCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
if length < 0 || capacity < 0 {
|
||||||
|
return fmt.Errorf("%w: negative length (%d) or capacity (%d)", ErrInvalidState, length, capacity)
|
||||||
|
}
|
||||||
|
|
||||||
|
if length > capacity {
|
||||||
|
return fmt.Errorf("%w: length %d exceeds capacity %d", ErrBufferOverflow, length, capacity)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate the underlying data
|
||||||
|
data := frame.Data()
|
||||||
|
return ValidateAudioFrameFast(data)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateBufferBounds performs bounds checking with overflow protection
|
||||||
|
func ValidateBufferBounds(buffer []byte, offset, length int) error {
|
||||||
|
if buffer == nil {
|
||||||
|
return fmt.Errorf("%w: buffer is nil", ErrInvalidPointer)
|
||||||
|
}
|
||||||
|
|
||||||
|
if offset < 0 {
|
||||||
|
return fmt.Errorf("%w: negative offset %d", ErrInvalidState, offset)
|
||||||
|
}
|
||||||
|
|
||||||
|
if length < 0 {
|
||||||
|
return fmt.Errorf("%w: negative length %d", ErrInvalidState, length)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for integer overflow
|
||||||
|
if offset > len(buffer) {
|
||||||
|
return fmt.Errorf("%w: offset %d exceeds buffer length %d", ErrBufferOverflow, offset, len(buffer))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Safe addition check for overflow
|
||||||
|
if offset+length < offset || offset+length > len(buffer) {
|
||||||
|
return fmt.Errorf("%w: range [%d:%d] exceeds buffer length %d", ErrBufferOverflow, offset, offset+length, len(buffer))
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateAudioConfiguration performs comprehensive configuration validation
|
||||||
|
func ValidateAudioConfiguration(config AudioConfig) error {
|
||||||
|
if err := ValidateAudioQuality(config.Quality); err != nil {
|
||||||
|
return fmt.Errorf("quality validation failed: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
configConstants := GetConfig()
|
||||||
|
|
||||||
|
// Validate bitrate ranges
|
||||||
|
if config.Bitrate < configConstants.MinBitrate || config.Bitrate > configConstants.MaxBitrate {
|
||||||
|
return fmt.Errorf("%w: bitrate %d outside valid range [%d, %d]", ErrInvalidConfiguration, config.Bitrate, configConstants.MinBitrate, configConstants.MaxBitrate)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate sample rate
|
||||||
|
validSampleRates := []int{8000, 12000, 16000, 24000, 48000}
|
||||||
|
validSampleRate := false
|
||||||
|
for _, rate := range validSampleRates {
|
||||||
|
if config.SampleRate == rate {
|
||||||
|
validSampleRate = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !validSampleRate {
|
||||||
|
return fmt.Errorf("%w: sample rate %d not in supported rates %v", ErrInvalidSampleRate, config.SampleRate, validSampleRates)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate channels
|
||||||
|
if config.Channels < 1 || config.Channels > configConstants.MaxChannels {
|
||||||
|
return fmt.Errorf("%w: channels %d outside valid range [1, %d]", ErrInvalidChannels, config.Channels, configConstants.MaxChannels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate frame size
|
||||||
|
if config.FrameSize < configConstants.MinFrameSize || config.FrameSize > configConstants.MaxFrameSize {
|
||||||
|
return fmt.Errorf("%w: frame size %v outside valid range [%v, %v]", ErrInvalidConfiguration, config.FrameSize, configConstants.MinFrameSize, configConstants.MaxFrameSize)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// ValidateResourceLimits checks if system resources are within acceptable limits
|
||||||
|
func ValidateResourceLimits() error {
|
||||||
|
config := GetConfig()
|
||||||
|
|
||||||
|
// Check buffer pool sizes
|
||||||
|
framePoolStats := GetAudioBufferPoolStats()
|
||||||
|
if framePoolStats.FramePoolSize > int64(config.MaxPoolSize*2) {
|
||||||
|
return fmt.Errorf("%w: frame pool size %d exceeds safe limit %d", ErrResourceExhaustion, framePoolStats.FramePoolSize, config.MaxPoolSize*2)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check zero-copy pool allocation count
|
||||||
|
zeroCopyStats := GetGlobalZeroCopyPoolStats()
|
||||||
|
if zeroCopyStats.AllocationCount > int64(config.MaxPoolSize*3) {
|
||||||
|
return fmt.Errorf("%w: zero-copy allocations %d exceed safe limit %d", ErrResourceExhaustion, zeroCopyStats.AllocationCount, config.MaxPoolSize*3)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// validateAudioDataIntegrity performs expensive data integrity checks
|
||||||
|
func validateAudioDataIntegrity(data []byte, channels int) error {
|
||||||
|
if len(data)%2 != 0 {
|
||||||
|
return fmt.Errorf("%w: odd number of bytes for 16-bit samples", ErrInvalidSampleFormat)
|
||||||
|
}
|
||||||
|
|
||||||
|
if len(data)%(channels*2) != 0 {
|
||||||
|
return fmt.Errorf("%w: data length %d not aligned to channel count %d", ErrInvalidSampleFormat, len(data), channels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for obvious corruption patterns (all zeros, all max values)
|
||||||
|
sampleCount := len(data) / 2
|
||||||
|
zeroCount := 0
|
||||||
|
maxCount := 0
|
||||||
|
|
||||||
|
for i := 0; i < len(data); i += 2 {
|
||||||
|
sample := int16(data[i]) | int16(data[i+1])<<8
|
||||||
|
if sample == 0 {
|
||||||
|
zeroCount++
|
||||||
|
} else if sample == 32767 || sample == -32768 {
|
||||||
|
maxCount++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Flag suspicious patterns
|
||||||
|
if zeroCount > sampleCount*9/10 {
|
||||||
|
return fmt.Errorf("%w: %d%% zero samples suggests silence or corruption", ErrFrameDataCorrupted, (zeroCount*100)/sampleCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
if maxCount > sampleCount/10 {
|
||||||
|
return fmt.Errorf("%w: %d%% max-value samples suggests clipping or corruption", ErrFrameDataCorrupted, (maxCount*100)/sampleCount)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Helper function for absolute value
|
||||||
|
func abs(x int) int {
|
||||||
|
if x < 0 {
|
||||||
|
return -x
|
||||||
|
}
|
||||||
|
return x
|
||||||
|
}
|
||||||
|
|
||||||
|
// getValidationLogger returns a logger for validation operations
|
||||||
|
func getValidationLogger() *zerolog.Logger {
|
||||||
|
logger := logging.GetDefaultLogger().With().Str("component", "audio-validation").Logger()
|
||||||
|
return &logger
|
||||||
|
}
|
Loading…
Reference in New Issue