feat(audio): implement dynamic Opus config updates and optimize audio params

Add support for dynamic Opus encoder configuration updates without requiring subprocess restart. This allows quality changes to be applied immediately while maintaining audio stream continuity.

Optimize audio quality parameters to reduce CPU load and prevent mouse lag on RV1106 devices. Lower bitrates and complexity while adjusting signal types and bandwidths for better performance.

Add build tags for CGO requirements and improve audio input supervisor behavior to check for existing processes before starting new ones.
This commit is contained in:
Alex P 2025-09-01 08:02:43 +00:00
parent 6adcc26ff2
commit fd7608384a
7 changed files with 219 additions and 44 deletions

View File

@ -1,3 +1,6 @@
//go:build cgo
// +build cgo
// Package audio provides real-time audio processing for JetKVM with low-latency streaming.
//
// Key components: output/input pipelines with Opus codec, adaptive buffer management,
@ -283,20 +286,41 @@ func SetMicrophoneQuality(quality AudioQuality) {
dtx = GetConfig().AudioQualityMediumOpusDTX
}
// Restart audio input subprocess with new OPUS configuration
// Update audio input subprocess configuration dynamically without restart
if supervisor := GetAudioInputSupervisor(); supervisor != nil {
logger := logging.GetDefaultLogger().With().Str("component", "audio").Logger()
logger.Info().Int("quality", int(quality)).Msg("restarting audio input subprocess with new quality settings")
logger.Info().Int("quality", int(quality)).Msg("updating audio input subprocess quality settings dynamically")
// Set new OPUS configuration
// Set new OPUS configuration for future restarts
supervisor.SetOpusConfig(config.Bitrate*1000, complexity, vbr, signalType, bandwidth, dtx)
// Stop current subprocess
supervisor.Stop()
// Send dynamic configuration update to running subprocess
if supervisor.IsConnected() {
// Convert AudioConfig to InputIPCOpusConfig with complete Opus parameters
opusConfig := InputIPCOpusConfig{
SampleRate: config.SampleRate,
Channels: config.Channels,
FrameSize: int(config.FrameSize.Milliseconds() * int64(config.SampleRate) / 1000), // Convert ms to samples
Bitrate: config.Bitrate * 1000, // Convert kbps to bps
Complexity: complexity,
VBR: vbr,
SignalType: signalType,
Bandwidth: bandwidth,
DTX: dtx,
}
// Start subprocess with new configuration
if err := supervisor.Start(); err != nil {
logger.Error().Err(err).Msg("failed to restart audio input subprocess")
if err := supervisor.SendOpusConfig(opusConfig); err != nil {
logger.Warn().Err(err).Msg("failed to send dynamic Opus config update, subprocess may need restart")
// Fallback to restart if dynamic update fails
supervisor.Stop()
if err := supervisor.Start(); err != nil {
logger.Error().Err(err).Msg("failed to restart audio input subprocess after config update failure")
}
} else {
logger.Info().Msg("audio input quality updated dynamically with complete Opus configuration")
}
} else {
logger.Info().Msg("audio input subprocess not connected, configuration will apply on next start")
}
}
}

View File

@ -1555,35 +1555,35 @@ func DefaultAudioConfig() *AudioConfigConstants {
MaxPacketSize: 4000,
// Audio Quality Bitrates - Optimized for RV1106 SoC and KVM layer compatibility
// Low quality increased to 48/24 kbps for better audio quality while maintaining efficiency
AudioQualityLowOutputBitrate: 48,
AudioQualityLowInputBitrate: 24,
AudioQualityMediumOutputBitrate: 64,
AudioQualityMediumInputBitrate: 32,
// Reduced bitrates to minimize CPU load and prevent mouse lag
AudioQualityLowOutputBitrate: 32,
AudioQualityLowInputBitrate: 16,
AudioQualityMediumOutputBitrate: 48,
AudioQualityMediumInputBitrate: 24,
// AudioQualityHighOutputBitrate defines bitrate for high-quality output.
// Used in: Professional applications requiring good audio fidelity on RV1106
// Impact: Balanced quality optimized for single-core ARM performance.
// Reduced to 96kbps for RV1106 compatibility and KVM layer stability.
AudioQualityHighOutputBitrate: 96,
// Reduced to 64kbps for RV1106 compatibility and minimal CPU overhead.
AudioQualityHighOutputBitrate: 64,
// AudioQualityHighInputBitrate defines bitrate for high-quality input.
// Used in: High-quality microphone input optimized for RV1106
// Impact: Clear voice reproduction without overwhelming single-core CPU.
// Reduced to 48kbps for optimal RV1106 performance.
AudioQualityHighInputBitrate: 48,
// Reduced to 32kbps for optimal RV1106 performance without lag.
AudioQualityHighInputBitrate: 32,
// AudioQualityUltraOutputBitrate defines bitrate for ultra-quality output.
// Used in: Maximum quality while ensuring RV1106 stability
// Impact: Best possible quality without interfering with KVM operations.
// Optimized to 128kbps for RV1106 maximum performance threshold.
AudioQualityUltraOutputBitrate: 128,
// Reduced to 96kbps for RV1106 maximum performance without mouse lag.
AudioQualityUltraOutputBitrate: 96,
// AudioQualityUltraInputBitrate defines bitrate for ultra-quality input.
// Used in: Premium microphone input optimized for RV1106 constraints
// Impact: Excellent voice quality within RV1106 processing limits.
// Optimized to 64kbps for stable RV1106 operation.
AudioQualityUltraInputBitrate: 64,
// Reduced to 48kbps for stable RV1106 operation without lag.
AudioQualityUltraInputBitrate: 48,
// Audio Quality Sample Rates - Sampling frequencies for different quality levels
// Used in: Audio capture, processing, and format negotiation
@ -1647,31 +1647,31 @@ func DefaultAudioConfig() *AudioConfigConstants {
// Impact: Controls encoding complexity, VBR, signal type, bandwidth, and DTX
// Low Quality OPUS Parameters - Optimized for RV1106 minimal CPU usage
AudioQualityLowOpusComplexity: 2, // Slightly increased complexity for better quality
AudioQualityLowOpusComplexity: 0, // Minimum complexity to reduce CPU load
AudioQualityLowOpusVBR: 1, // VBR for better quality at same bitrate
AudioQualityLowOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC for better general audio
AudioQualityLowOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for improved range
AudioQualityLowOpusDTX: 0, // Disable DTX to prevent audio interruptions
AudioQualityLowOpusSignalType: 3001, // OPUS_SIGNAL_VOICE for lower complexity
AudioQualityLowOpusBandwidth: 1101, // OPUS_BANDWIDTH_NARROWBAND for efficiency
AudioQualityLowOpusDTX: 1, // Enable DTX to reduce processing when silent
// Medium Quality OPUS Parameters - Balanced for RV1106 performance
AudioQualityMediumOpusComplexity: 3, // Reduced complexity for RV1106 stability
AudioQualityMediumOpusComplexity: 1, // Very low complexity for RV1106 stability
AudioQualityMediumOpusVBR: 1, // VBR for optimal quality
AudioQualityMediumOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC
AudioQualityMediumOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for balanced range
AudioQualityMediumOpusDTX: 0, // Disable DTX for consistent quality
AudioQualityMediumOpusSignalType: 3001, // OPUS_SIGNAL_VOICE for efficiency
AudioQualityMediumOpusBandwidth: 1102, // OPUS_BANDWIDTH_MEDIUMBAND for balance
AudioQualityMediumOpusDTX: 1, // Enable DTX for CPU savings
// High Quality OPUS Parameters - Optimized for RV1106 high performance
AudioQualityHighOpusComplexity: 5, // Moderate complexity for RV1106 limits
AudioQualityHighOpusComplexity: 2, // Low complexity for RV1106 limits
AudioQualityHighOpusVBR: 1, // VBR for optimal quality
AudioQualityHighOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC
AudioQualityHighOpusBandwidth: 1104, // OPUS_BANDWIDTH_SUPERWIDEBAND for good range
AudioQualityHighOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for good range
AudioQualityHighOpusDTX: 0, // Disable DTX for consistent quality
// Ultra Quality OPUS Parameters - Maximum RV1106 performance without KVM interference
AudioQualityUltraOpusComplexity: 6, // Conservative complexity for RV1106 stability
AudioQualityUltraOpusComplexity: 3, // Moderate complexity for RV1106 stability
AudioQualityUltraOpusVBR: 1, // VBR for optimal quality
AudioQualityUltraOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC
AudioQualityUltraOpusBandwidth: 1104, // OPUS_BANDWIDTH_SUPERWIDEBAND for stability
AudioQualityUltraOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for stability
AudioQualityUltraOpusDTX: 0, // Disable DTX for maximum quality
// CGO Audio Constants - Optimized for RV1106 native audio processing

View File

@ -208,7 +208,30 @@ func (aim *AudioInputManager) LogPerformanceStats() {
Msg("Audio input performance metrics")
}
// Note: IsRunning() is inherited from BaseAudioManager
// IsRunning returns whether the audio input manager is running
// This checks both the internal state and existing system processes
func (aim *AudioInputManager) IsRunning() bool {
// First check internal state
if aim.BaseAudioManager.IsRunning() {
return true
}
// If internal state says not running, check for existing system processes
// This prevents duplicate subprocess creation when a process already exists
if aim.ipcManager != nil {
supervisor := aim.ipcManager.GetSupervisor()
if supervisor != nil {
if existingPID, exists := supervisor.HasExistingProcess(); exists {
aim.logger.Info().Int("existing_pid", existingPID).Msg("Found existing audio input server process")
// Update internal state to reflect reality
aim.setRunning(true)
return true
}
}
}
return false
}
// IsReady returns whether the audio input manager is ready to receive frames
// This checks both that it's running and that the IPC connection is established

View File

@ -36,6 +36,7 @@ type InputMessageType uint8
const (
InputMessageTypeOpusFrame InputMessageType = iota
InputMessageTypeConfig
InputMessageTypeOpusConfig
InputMessageTypeStop
InputMessageTypeHeartbeat
InputMessageTypeAck
@ -203,6 +204,19 @@ type InputIPCConfig struct {
FrameSize int
}
// InputIPCOpusConfig contains complete Opus encoder configuration
type InputIPCOpusConfig struct {
SampleRate int
Channels int
FrameSize int
Bitrate int
Complexity int
VBR int
SignalType int
Bandwidth int
DTX int
}
// AudioInputServer handles IPC communication for audio input processing
type AudioInputServer struct {
// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
@ -462,6 +476,8 @@ func (ais *AudioInputServer) processMessage(msg *InputIPCMessage) error {
return ais.processOpusFrame(msg.Data)
case InputMessageTypeConfig:
return ais.processConfig(msg.Data)
case InputMessageTypeOpusConfig:
return ais.processOpusConfig(msg.Data)
case InputMessageTypeStop:
return fmt.Errorf("stop message received")
case InputMessageTypeHeartbeat:
@ -507,6 +523,50 @@ func (ais *AudioInputServer) processConfig(data []byte) error {
return ais.sendAck()
}
// processOpusConfig processes a complete Opus encoder configuration update
func (ais *AudioInputServer) processOpusConfig(data []byte) error {
logger := logging.GetDefaultLogger().With().Str("component", AudioInputServerComponent).Logger()
// Validate configuration data size (9 * int32 = 36 bytes)
if len(data) != 36 {
return fmt.Errorf("invalid Opus configuration data size: expected 36 bytes, got %d", len(data))
}
// Deserialize Opus configuration
config := InputIPCOpusConfig{
SampleRate: int(binary.LittleEndian.Uint32(data[0:4])),
Channels: int(binary.LittleEndian.Uint32(data[4:8])),
FrameSize: int(binary.LittleEndian.Uint32(data[8:12])),
Bitrate: int(binary.LittleEndian.Uint32(data[12:16])),
Complexity: int(binary.LittleEndian.Uint32(data[16:20])),
VBR: int(binary.LittleEndian.Uint32(data[20:24])),
SignalType: int(binary.LittleEndian.Uint32(data[24:28])),
Bandwidth: int(binary.LittleEndian.Uint32(data[28:32])),
DTX: int(binary.LittleEndian.Uint32(data[32:36])),
}
logger.Info().Interface("config", config).Msg("applying dynamic Opus encoder configuration")
// Apply the Opus encoder configuration dynamically
err := CGOUpdateOpusEncoderParams(
config.Bitrate,
config.Complexity,
config.VBR,
0, // VBR constraint - using default
config.SignalType,
config.Bandwidth,
config.DTX,
)
if err != nil {
logger.Error().Err(err).Msg("failed to apply Opus encoder configuration")
return fmt.Errorf("failed to apply Opus configuration: %w", err)
}
logger.Info().Msg("Opus encoder configuration applied successfully")
return ais.sendAck()
}
// sendAck sends an acknowledgment message
func (ais *AudioInputServer) sendAck() error {
ais.mtx.Lock()
@ -725,6 +785,44 @@ func (aic *AudioInputClient) SendConfig(config InputIPCConfig) error {
return aic.writeMessage(msg)
}
// SendOpusConfig sends a complete Opus encoder configuration update to the audio input server
func (aic *AudioInputClient) SendOpusConfig(config InputIPCOpusConfig) error {
aic.mtx.Lock()
defer aic.mtx.Unlock()
if !aic.running || aic.conn == nil {
return fmt.Errorf("not connected to audio input server")
}
// Validate configuration parameters
if config.SampleRate <= 0 || config.Channels <= 0 || config.FrameSize <= 0 || config.Bitrate <= 0 {
return fmt.Errorf("invalid Opus configuration: SampleRate=%d, Channels=%d, FrameSize=%d, Bitrate=%d",
config.SampleRate, config.Channels, config.FrameSize, config.Bitrate)
}
// Serialize Opus configuration (9 * int32 = 36 bytes)
data := make([]byte, 36)
binary.LittleEndian.PutUint32(data[0:4], uint32(config.SampleRate))
binary.LittleEndian.PutUint32(data[4:8], uint32(config.Channels))
binary.LittleEndian.PutUint32(data[8:12], uint32(config.FrameSize))
binary.LittleEndian.PutUint32(data[12:16], uint32(config.Bitrate))
binary.LittleEndian.PutUint32(data[16:20], uint32(config.Complexity))
binary.LittleEndian.PutUint32(data[20:24], uint32(config.VBR))
binary.LittleEndian.PutUint32(data[24:28], uint32(config.SignalType))
binary.LittleEndian.PutUint32(data[28:32], uint32(config.Bandwidth))
binary.LittleEndian.PutUint32(data[32:36], uint32(config.DTX))
msg := &InputIPCMessage{
Magic: inputMagicNumber,
Type: InputMessageTypeOpusConfig,
Length: uint32(len(data)),
Timestamp: time.Now().UnixNano(),
Data: data,
}
return aic.writeMessage(msg)
}
// SendHeartbeat sends a heartbeat message
func (aic *AudioInputClient) SendHeartbeat() error {
aic.mtx.Lock()

View File

@ -1,5 +1,14 @@
//go:build cgo
// +build cgo
package audio
/*
#cgo pkg-config: alsa
#cgo LDFLAGS: -lopus
*/
import "C"
import (
"context"
"os"
@ -63,13 +72,16 @@ func RunAudioInputServer() error {
StartAdaptiveBuffering()
defer StopAdaptiveBuffering()
// Initialize CGO audio system
// Initialize CGO audio playback (optional for input server)
// This is used for audio loopback/monitoring features
err := CGOAudioPlaybackInit()
if err != nil {
logger.Error().Err(err).Msg("failed to initialize CGO audio playback")
return err
logger.Warn().Err(err).Msg("failed to initialize CGO audio playback - audio monitoring disabled")
// Continue without playback - input functionality doesn't require it
} else {
defer CGOAudioPlaybackClose()
logger.Debug().Msg("CGO audio playback initialized successfully")
}
defer CGOAudioPlaybackClose()
// Create and start the IPC server
server, err := NewAudioInputServer()

View File

@ -283,6 +283,19 @@ func (ais *AudioInputSupervisor) SendConfig(config InputIPCConfig) error {
return ais.client.SendConfig(config)
}
// SendOpusConfig sends a complete Opus encoder configuration to the audio input server
func (ais *AudioInputSupervisor) SendOpusConfig(config InputIPCOpusConfig) error {
if ais.client == nil {
return fmt.Errorf("client not initialized")
}
if !ais.client.IsConnected() {
return fmt.Errorf("client not connected")
}
return ais.client.SendOpusConfig(config)
}
// findExistingAudioInputProcess checks if there's already an audio input server process running
func (ais *AudioInputSupervisor) findExistingAudioInputProcess() (int, error) {
// Get current executable path
@ -331,3 +344,10 @@ func (ais *AudioInputSupervisor) isProcessRunning(pid int) bool {
err = process.Signal(syscall.Signal(0))
return err == nil
}
// HasExistingProcess checks if there's already an audio input server process running
// This is a public wrapper around findExistingAudioInputProcess for external access
func (ais *AudioInputSupervisor) HasExistingProcess() (int, bool) {
pid, err := ais.findExistingAudioInputProcess()
return pid, err == nil
}

10
main.go
View File

@ -44,7 +44,8 @@ func startAudioSubprocess() error {
// Set the global supervisor for access from audio package
audio.SetAudioOutputSupervisor(audioSupervisor)
// Create and register audio input supervisor
// Create and register audio input supervisor (but don't start it)
// Audio input will be started on-demand through the UI
audioInputSupervisor := audio.NewAudioInputSupervisor()
audio.SetAudioInputSupervisor(audioInputSupervisor)
@ -59,11 +60,8 @@ func startAudioSubprocess() error {
config.AudioQualityLowOpusDTX,
)
// Start audio input supervisor
if err := audioInputSupervisor.Start(); err != nil {
logger.Error().Err(err).Msg("failed to start audio input supervisor")
// Continue execution as audio input is not critical for basic functionality
}
// Note: Audio input supervisor is NOT started here - it will be started on-demand
// when the user activates microphone input through the UI
// Set up callbacks for process lifecycle events
audioSupervisor.SetCallbacks(