[WIP] Updates: simplify audio system

This commit is contained in:
Alex P 2025-09-30 09:08:55 +00:00
parent 6c6a1def28
commit 680607e82e
5 changed files with 55 additions and 315 deletions

View File

@ -15,14 +15,9 @@ func ensureAudioControlService() *audio.AudioControlService {
sessionProvider := &SessionProviderImpl{}
audioControlService = audio.NewAudioControlService(sessionProvider, logger)
// Set up RPC callback functions for the audio package
// Set up RPC callback function for the audio package
audio.SetRPCCallbacks(
func() *audio.AudioControlService { return audioControlService },
func() audio.AudioConfig { return audioControlService.GetCurrentAudioQuality() },
func(quality audio.AudioQuality) error {
audioControlService.SetAudioQuality(quality)
return nil
},
)
}
return audioControlService

View File

@ -6,7 +6,7 @@
// Key components: output/input pipelines with Opus codec, buffer management,
// zero-copy frame pools, IPC communication, and process supervision.
//
// Supports four quality presets (Low/Medium/High/Ultra) with configurable bitrates.
// Optimized for S16_LE @ 48kHz stereo HDMI audio with minimal CPU usage.
// All APIs are thread-safe with comprehensive error handling and metrics collection.
//
// # Performance Characteristics
@ -14,13 +14,12 @@
// Designed for embedded ARM systems with limited resources:
// - Sub-50ms end-to-end latency under normal conditions
// - Memory usage scales with buffer configuration
// - CPU usage optimized through zero-copy operations
// - Network bandwidth adapts to quality settings
// - CPU usage optimized through zero-copy operations and complexity=1 Opus
// - Fixed optimal configuration (96 kbps output, 48 kbps input)
//
// # Usage Example
//
// config := GetAudioConfig()
// SetAudioQuality(AudioQualityHigh)
//
// // Audio output will automatically start when frames are received
package audio
@ -42,23 +41,13 @@ func GetMaxAudioFrameSize() int {
return Config.MaxAudioFrameSize
}
// AudioQuality represents different audio quality presets
type AudioQuality int
const (
AudioQualityLow AudioQuality = iota
AudioQualityMedium
AudioQualityHigh
AudioQualityUltra
)
// AudioConfig holds configuration for audio processing
// AudioConfig holds the optimal audio configuration
// All settings are fixed for S16_LE @ 48kHz HDMI audio
type AudioConfig struct {
Quality AudioQuality
Bitrate int // kbps
SampleRate int // Hz
Channels int
FrameSize time.Duration // ms
Bitrate int // kbps (96 for output, 48 for input)
SampleRate int // Hz (always 48000)
Channels int // 2 for output (stereo), 1 for input (mono)
FrameSize time.Duration // ms (always 20ms)
}
// AudioMetrics tracks audio performance metrics
@ -72,195 +61,29 @@ type AudioMetrics struct {
}
var (
// Optimal configuration for audio output (HDMI → client)
currentConfig = AudioConfig{
Quality: AudioQualityMedium,
Bitrate: Config.AudioQualityMediumOutputBitrate,
Bitrate: Config.OptimalOutputBitrate,
SampleRate: Config.SampleRate,
Channels: Config.Channels,
FrameSize: Config.AudioQualityMediumFrameSize,
FrameSize: 20 * time.Millisecond,
}
// Optimal configuration for microphone input (client → target)
currentMicrophoneConfig = AudioConfig{
Quality: AudioQualityMedium,
Bitrate: Config.AudioQualityMediumInputBitrate,
Bitrate: Config.OptimalInputBitrate,
SampleRate: Config.SampleRate,
Channels: 1,
FrameSize: Config.AudioQualityMediumFrameSize,
FrameSize: 20 * time.Millisecond,
}
metrics AudioMetrics
)
// qualityPresets defines the base quality configurations
var qualityPresets = map[AudioQuality]struct {
outputBitrate, inputBitrate int
sampleRate, channels int
frameSize time.Duration
}{
AudioQualityLow: {
outputBitrate: Config.AudioQualityLowOutputBitrate, inputBitrate: Config.AudioQualityLowInputBitrate,
sampleRate: Config.AudioQualityLowSampleRate, channels: Config.AudioQualityLowChannels,
frameSize: Config.AudioQualityLowFrameSize,
},
AudioQualityMedium: {
outputBitrate: Config.AudioQualityMediumOutputBitrate, inputBitrate: Config.AudioQualityMediumInputBitrate,
sampleRate: Config.AudioQualityMediumSampleRate, channels: Config.AudioQualityMediumChannels,
frameSize: Config.AudioQualityMediumFrameSize,
},
AudioQualityHigh: {
outputBitrate: Config.AudioQualityHighOutputBitrate, inputBitrate: Config.AudioQualityHighInputBitrate,
sampleRate: Config.SampleRate, channels: Config.AudioQualityHighChannels,
frameSize: Config.AudioQualityHighFrameSize,
},
AudioQualityUltra: {
outputBitrate: Config.AudioQualityUltraOutputBitrate, inputBitrate: Config.AudioQualityUltraInputBitrate,
sampleRate: Config.SampleRate, channels: Config.AudioQualityUltraChannels,
frameSize: Config.AudioQualityUltraFrameSize,
},
}
// GetAudioQualityPresets returns predefined quality configurations for audio output
func GetAudioQualityPresets() map[AudioQuality]AudioConfig {
result := make(map[AudioQuality]AudioConfig)
for quality, preset := range qualityPresets {
config := AudioConfig{
Quality: quality,
Bitrate: preset.outputBitrate,
SampleRate: preset.sampleRate,
Channels: preset.channels,
FrameSize: preset.frameSize,
}
result[quality] = config
}
return result
}
// GetMicrophoneQualityPresets returns predefined quality configurations for microphone input
func GetMicrophoneQualityPresets() map[AudioQuality]AudioConfig {
result := make(map[AudioQuality]AudioConfig)
for quality, preset := range qualityPresets {
config := AudioConfig{
Quality: quality,
Bitrate: preset.inputBitrate,
SampleRate: func() int {
if quality == AudioQualityLow {
return Config.AudioQualityMicLowSampleRate
}
return preset.sampleRate
}(),
Channels: 1, // Microphone is always mono
FrameSize: preset.frameSize,
}
result[quality] = config
}
return result
}
// SetAudioQuality updates the current audio quality configuration
func SetAudioQuality(quality AudioQuality) {
// Validate audio quality parameter
if err := ValidateAudioQuality(quality); err != nil {
// Log validation error but don't fail - maintain backward compatibility
logger := logging.GetDefaultLogger().With().Str("component", "audio").Logger()
logger.Warn().Err(err).Int("quality", int(quality)).Msg("invalid audio quality, using current config")
return
}
presets := GetAudioQualityPresets()
if config, exists := presets[quality]; exists {
currentConfig = config
// Get OPUS encoder parameters based on quality
var complexity, vbr, signalType, bandwidth, dtx int
switch quality {
case AudioQualityLow:
complexity = Config.AudioQualityLowOpusComplexity
vbr = Config.AudioQualityLowOpusVBR
signalType = Config.AudioQualityLowOpusSignalType
bandwidth = Config.AudioQualityLowOpusBandwidth
dtx = Config.AudioQualityLowOpusDTX
case AudioQualityMedium:
complexity = Config.AudioQualityMediumOpusComplexity
vbr = Config.AudioQualityMediumOpusVBR
signalType = Config.AudioQualityMediumOpusSignalType
bandwidth = Config.AudioQualityMediumOpusBandwidth
dtx = Config.AudioQualityMediumOpusDTX
case AudioQualityHigh:
complexity = Config.AudioQualityHighOpusComplexity
vbr = Config.AudioQualityHighOpusVBR
signalType = Config.AudioQualityHighOpusSignalType
bandwidth = Config.AudioQualityHighOpusBandwidth
dtx = Config.AudioQualityHighOpusDTX
case AudioQualityUltra:
complexity = Config.AudioQualityUltraOpusComplexity
vbr = Config.AudioQualityUltraOpusVBR
signalType = Config.AudioQualityUltraOpusSignalType
bandwidth = Config.AudioQualityUltraOpusBandwidth
dtx = Config.AudioQualityUltraOpusDTX
default:
// Use medium quality as fallback
complexity = Config.AudioQualityMediumOpusComplexity
vbr = Config.AudioQualityMediumOpusVBR
signalType = Config.AudioQualityMediumOpusSignalType
bandwidth = Config.AudioQualityMediumOpusBandwidth
dtx = Config.AudioQualityMediumOpusDTX
}
// Update audio output subprocess configuration dynamically without restart
logger := logging.GetDefaultLogger().With().Str("component", "audio").Logger()
logger.Info().Int("quality", int(quality)).Msg("updating audio output quality settings dynamically")
// Set new OPUS configuration for future restarts
if supervisor := GetAudioOutputSupervisor(); supervisor != nil {
supervisor.SetOpusConfig(config.Bitrate*1000, complexity, vbr, signalType, bandwidth, dtx)
// Send dynamic configuration update to running subprocess via IPC
if supervisor.IsConnected() {
// Convert AudioConfig to UnifiedIPCOpusConfig with complete Opus parameters
opusConfig := UnifiedIPCOpusConfig{
SampleRate: config.SampleRate,
Channels: config.Channels,
FrameSize: int(config.FrameSize.Milliseconds() * int64(config.SampleRate) / 1000), // Convert ms to samples
Bitrate: config.Bitrate * 1000, // Convert kbps to bps
Complexity: complexity,
VBR: vbr,
SignalType: signalType,
Bandwidth: bandwidth,
DTX: dtx,
}
logger.Info().Interface("opusConfig", opusConfig).Msg("sending Opus configuration to audio output subprocess")
if err := supervisor.SendOpusConfig(opusConfig); err != nil {
logger.Warn().Err(err).Msg("failed to send dynamic Opus config update via IPC, falling back to subprocess restart")
// Fallback to subprocess restart if IPC update fails
supervisor.Stop()
if err := supervisor.Start(); err != nil {
logger.Error().Err(err).Msg("failed to restart audio output subprocess after IPC update failure")
}
} else {
logger.Info().Msg("audio output quality updated dynamically via IPC")
// Reset audio output stats after config update
go func() {
time.Sleep(Config.QualityChangeSettleDelay) // Wait for quality change to settle
// Reset audio input server stats to clear persistent warnings
ResetGlobalAudioInputServerStats()
// Attempt recovery if there are still issues
time.Sleep(1 * time.Second)
RecoverGlobalAudioInputServer()
}()
}
} else {
logger.Info().Bool("supervisor_running", supervisor.IsRunning()).Msg("audio output subprocess not connected, configuration will apply on next start")
}
}
}
}
// GetAudioConfig returns the current audio configuration
// GetAudioConfig returns the current optimal audio configuration
func GetAudioConfig() AudioConfig {
return currentConfig
}
// GetMicrophoneConfig returns the current microphone configuration
// GetMicrophoneConfig returns the current optimal microphone configuration
func GetMicrophoneConfig() AudioConfig {
return currentMicrophoneConfig
}

View File

@ -7,22 +7,14 @@ import (
// RPC wrapper functions for audio control
// These functions bridge the RPC layer to the AudioControlService
// These variables will be set by the main package to provide access to the global service
// This variable will be set by the main package to provide access to the global service
var (
getAudioControlServiceFunc func() *AudioControlService
getAudioQualityFunc func() AudioConfig
setAudioQualityFunc func(AudioQuality) error
)
// SetRPCCallbacks sets the callback functions for RPC operations
func SetRPCCallbacks(
getService func() *AudioControlService,
getQuality func() AudioConfig,
setQuality func(AudioQuality) error,
) {
// SetRPCCallbacks sets the callback function for RPC operations
func SetRPCCallbacks(getService func() *AudioControlService) {
getAudioControlServiceFunc = getService
getAudioQualityFunc = getQuality
setAudioQualityFunc = setQuality
}
// RPCAudioMute handles audio mute/unmute RPC requests
@ -37,30 +29,11 @@ func RPCAudioMute(muted bool) error {
return service.MuteAudio(muted)
}
// RPCAudioQuality handles audio quality change RPC requests
// RPCAudioQuality is deprecated - quality is now fixed at optimal settings
// Returns current config for backward compatibility
func RPCAudioQuality(quality int) (map[string]any, error) {
if getAudioQualityFunc == nil || setAudioQualityFunc == nil {
return nil, fmt.Errorf("audio quality functions not available")
}
// Convert int to AudioQuality type
audioQuality := AudioQuality(quality)
// Get current audio quality configuration
currentConfig := getAudioQualityFunc()
// Set new quality if different
if currentConfig.Quality != audioQuality {
err := setAudioQualityFunc(audioQuality)
if err != nil {
return nil, fmt.Errorf("failed to set audio quality: %w", err)
}
// Get updated config after setting
newConfig := getAudioQualityFunc()
return map[string]any{"config": newConfig}, nil
}
// Return current config if no change needed
// Quality is now fixed - return current optimal configuration
currentConfig := GetAudioConfig()
return map[string]any{"config": currentConfig}, nil
}
@ -100,21 +73,15 @@ func RPCAudioStatus() (map[string]interface{}, error) {
return service.GetAudioStatus(), nil
}
// RPCAudioQualityPresets handles audio quality presets RPC requests (read-only)
// RPCAudioQualityPresets is deprecated - returns single optimal configuration
// Kept for backward compatibility with UI
func RPCAudioQualityPresets() (map[string]any, error) {
if getAudioControlServiceFunc == nil || getAudioQualityFunc == nil {
return nil, fmt.Errorf("audio control service not available")
}
service := getAudioControlServiceFunc()
if service == nil {
return nil, fmt.Errorf("audio control service not initialized")
}
presets := service.GetAudioQualityPresets()
current := getAudioQualityFunc()
// Return single optimal configuration as both preset and current
current := GetAudioConfig()
// Return empty presets map (UI will handle this gracefully)
return map[string]any{
"presets": presets,
"presets": map[string]any{},
"current": current,
}, nil
}

14
main.go
View File

@ -36,15 +36,15 @@ func startAudioSubprocess() error {
audioInputSupervisor := audio.NewAudioInputSupervisor()
audio.SetAudioInputSupervisor(audioInputSupervisor)
// Set default OPUS configuration for audio input supervisor (low quality for single-core RV1106)
// Set optimal OPUS configuration for audio input supervisor (48 kbps mono mic)
audioConfig := audio.Config
audioInputSupervisor.SetOpusConfig(
audioConfig.AudioQualityLowInputBitrate*1000, // Convert kbps to bps
audioConfig.AudioQualityLowOpusComplexity,
audioConfig.AudioQualityLowOpusVBR,
audioConfig.AudioQualityLowOpusSignalType,
audioConfig.AudioQualityLowOpusBandwidth,
audioConfig.AudioQualityLowOpusDTX,
audioConfig.OptimalInputBitrate*1000, // Convert kbps to bps (48 kbps)
audioConfig.OptimalOpusComplexity, // Complexity 1 for minimal CPU
audioConfig.OptimalOpusVBR, // VBR enabled
audioConfig.OptimalOpusSignalType, // MUSIC signal type
audioConfig.OptimalOpusBandwidth, // WIDEBAND for 48kHz
audioConfig.OptimalOpusDTX, // DTX disabled
)
// Note: Audio input supervisor is NOT started here - it will be started on-demand

View File

@ -180,34 +180,7 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
}
};
const handleQualityChange = async (quality: number) => {
setIsLoading(true);
try {
// Use RPC for device communication - works for both local and cloud
if (rpcDataChannel?.readyState !== "open") {
throw new Error("Device connection not available");
}
await new Promise<void>((resolve, reject) => {
send("audioQuality", { quality }, (resp: JsonRpcResponse) => {
if ("error" in resp) {
reject(new Error(resp.error.message));
} else {
// Update local state with response
if ("result" in resp && resp.result && typeof resp.result === 'object' && 'config' in resp.result) {
setCurrentConfig(resp.result.config as AudioConfig);
}
resolve();
}
});
});
} catch (error) {
const errorMessage = error instanceof Error ? error.message : "Failed to change audio quality";
notifications.error(errorMessage);
} finally {
setIsLoading(false);
}
};
// Quality change handler removed - quality is now fixed at optimal settings
const handleToggleMicrophoneEnable = async () => {
const now = Date.now();
@ -447,41 +420,23 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
</button>
</div>
{/* Quality Settings */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<MdGraphicEq className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Output Quality
</span>
</div>
<div className="grid grid-cols-2 gap-2">
{Object.entries(audioQualityService.getQualityLabels()).map(([quality, label]) => (
<button
key={quality}
onClick={() => handleQualityChange(parseInt(quality))}
disabled={isLoading}
className={cx(
"rounded-md border px-3 py-2 text-sm font-medium transition-colors",
currentConfig?.Quality === parseInt(quality)
? "border-blue-500 bg-blue-50 text-blue-700 dark:bg-blue-900/20 dark:text-blue-300"
: "border-slate-200 bg-white text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600",
isLoading && "opacity-50 cursor-not-allowed"
)}
>
{label}
</button>
))}
</div>
{currentConfig && (
<div className="text-xs text-slate-600 dark:text-slate-400 mt-2">
Bitrate: {currentConfig.Bitrate}kbps |
Sample Rate: {currentConfig.SampleRate}Hz
{/* Audio Quality Info (fixed optimal configuration) */}
{currentConfig && (
<div className="space-y-2 rounded-md bg-slate-50 p-3 dark:bg-slate-800">
<div className="flex items-center gap-2">
<MdGraphicEq className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Configuration
</span>
</div>
)}
</div>
<div className="text-sm text-slate-600 dark:text-slate-400">
Optimized for S16_LE @ 48kHz stereo HDMI audio
</div>
<div className="text-xs text-slate-500 dark:text-slate-500">
Bitrate: {currentConfig.Bitrate} kbps | Sample Rate: {currentConfig.SampleRate} Hz | Channels: {currentConfig.Channels}
</div>
</div>
)}