diff --git a/internal/audio/audio.go b/internal/audio/audio.go index e6f46743..f1de66bf 100644 --- a/internal/audio/audio.go +++ b/internal/audio/audio.go @@ -1,3 +1,6 @@ +//go:build cgo +// +build cgo + // Package audio provides real-time audio processing for JetKVM with low-latency streaming. // // Key components: output/input pipelines with Opus codec, adaptive buffer management, @@ -283,20 +286,41 @@ func SetMicrophoneQuality(quality AudioQuality) { dtx = GetConfig().AudioQualityMediumOpusDTX } - // Restart audio input subprocess with new OPUS configuration + // Update audio input subprocess configuration dynamically without restart if supervisor := GetAudioInputSupervisor(); supervisor != nil { logger := logging.GetDefaultLogger().With().Str("component", "audio").Logger() - logger.Info().Int("quality", int(quality)).Msg("restarting audio input subprocess with new quality settings") + logger.Info().Int("quality", int(quality)).Msg("updating audio input subprocess quality settings dynamically") - // Set new OPUS configuration + // Set new OPUS configuration for future restarts supervisor.SetOpusConfig(config.Bitrate*1000, complexity, vbr, signalType, bandwidth, dtx) - // Stop current subprocess - supervisor.Stop() + // Send dynamic configuration update to running subprocess + if supervisor.IsConnected() { + // Convert AudioConfig to InputIPCOpusConfig with complete Opus parameters + opusConfig := InputIPCOpusConfig{ + SampleRate: config.SampleRate, + Channels: config.Channels, + FrameSize: int(config.FrameSize.Milliseconds() * int64(config.SampleRate) / 1000), // Convert ms to samples + Bitrate: config.Bitrate * 1000, // Convert kbps to bps + Complexity: complexity, + VBR: vbr, + SignalType: signalType, + Bandwidth: bandwidth, + DTX: dtx, + } - // Start subprocess with new configuration - if err := supervisor.Start(); err != nil { - logger.Error().Err(err).Msg("failed to restart audio input subprocess") + if err := supervisor.SendOpusConfig(opusConfig); err != nil { + logger.Warn().Err(err).Msg("failed to send dynamic Opus config update, subprocess may need restart") + // Fallback to restart if dynamic update fails + supervisor.Stop() + if err := supervisor.Start(); err != nil { + logger.Error().Err(err).Msg("failed to restart audio input subprocess after config update failure") + } + } else { + logger.Info().Msg("audio input quality updated dynamically with complete Opus configuration") + } + } else { + logger.Info().Msg("audio input subprocess not connected, configuration will apply on next start") } } } diff --git a/internal/audio/config_constants.go b/internal/audio/config_constants.go index 0b50edfe..fc84b895 100644 --- a/internal/audio/config_constants.go +++ b/internal/audio/config_constants.go @@ -1555,35 +1555,35 @@ func DefaultAudioConfig() *AudioConfigConstants { MaxPacketSize: 4000, // Audio Quality Bitrates - Optimized for RV1106 SoC and KVM layer compatibility - // Low quality increased to 48/24 kbps for better audio quality while maintaining efficiency - AudioQualityLowOutputBitrate: 48, - AudioQualityLowInputBitrate: 24, - AudioQualityMediumOutputBitrate: 64, - AudioQualityMediumInputBitrate: 32, + // Reduced bitrates to minimize CPU load and prevent mouse lag + AudioQualityLowOutputBitrate: 32, + AudioQualityLowInputBitrate: 16, + AudioQualityMediumOutputBitrate: 48, + AudioQualityMediumInputBitrate: 24, // AudioQualityHighOutputBitrate defines bitrate for high-quality output. // Used in: Professional applications requiring good audio fidelity on RV1106 // Impact: Balanced quality optimized for single-core ARM performance. - // Reduced to 96kbps for RV1106 compatibility and KVM layer stability. - AudioQualityHighOutputBitrate: 96, + // Reduced to 64kbps for RV1106 compatibility and minimal CPU overhead. + AudioQualityHighOutputBitrate: 64, // AudioQualityHighInputBitrate defines bitrate for high-quality input. // Used in: High-quality microphone input optimized for RV1106 // Impact: Clear voice reproduction without overwhelming single-core CPU. - // Reduced to 48kbps for optimal RV1106 performance. - AudioQualityHighInputBitrate: 48, + // Reduced to 32kbps for optimal RV1106 performance without lag. + AudioQualityHighInputBitrate: 32, // AudioQualityUltraOutputBitrate defines bitrate for ultra-quality output. // Used in: Maximum quality while ensuring RV1106 stability // Impact: Best possible quality without interfering with KVM operations. - // Optimized to 128kbps for RV1106 maximum performance threshold. - AudioQualityUltraOutputBitrate: 128, + // Reduced to 96kbps for RV1106 maximum performance without mouse lag. + AudioQualityUltraOutputBitrate: 96, // AudioQualityUltraInputBitrate defines bitrate for ultra-quality input. // Used in: Premium microphone input optimized for RV1106 constraints // Impact: Excellent voice quality within RV1106 processing limits. - // Optimized to 64kbps for stable RV1106 operation. - AudioQualityUltraInputBitrate: 64, + // Reduced to 48kbps for stable RV1106 operation without lag. + AudioQualityUltraInputBitrate: 48, // Audio Quality Sample Rates - Sampling frequencies for different quality levels // Used in: Audio capture, processing, and format negotiation @@ -1647,31 +1647,31 @@ func DefaultAudioConfig() *AudioConfigConstants { // Impact: Controls encoding complexity, VBR, signal type, bandwidth, and DTX // Low Quality OPUS Parameters - Optimized for RV1106 minimal CPU usage - AudioQualityLowOpusComplexity: 2, // Slightly increased complexity for better quality + AudioQualityLowOpusComplexity: 0, // Minimum complexity to reduce CPU load AudioQualityLowOpusVBR: 1, // VBR for better quality at same bitrate - AudioQualityLowOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC for better general audio - AudioQualityLowOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for improved range - AudioQualityLowOpusDTX: 0, // Disable DTX to prevent audio interruptions + AudioQualityLowOpusSignalType: 3001, // OPUS_SIGNAL_VOICE for lower complexity + AudioQualityLowOpusBandwidth: 1101, // OPUS_BANDWIDTH_NARROWBAND for efficiency + AudioQualityLowOpusDTX: 1, // Enable DTX to reduce processing when silent // Medium Quality OPUS Parameters - Balanced for RV1106 performance - AudioQualityMediumOpusComplexity: 3, // Reduced complexity for RV1106 stability + AudioQualityMediumOpusComplexity: 1, // Very low complexity for RV1106 stability AudioQualityMediumOpusVBR: 1, // VBR for optimal quality - AudioQualityMediumOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC - AudioQualityMediumOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for balanced range - AudioQualityMediumOpusDTX: 0, // Disable DTX for consistent quality + AudioQualityMediumOpusSignalType: 3001, // OPUS_SIGNAL_VOICE for efficiency + AudioQualityMediumOpusBandwidth: 1102, // OPUS_BANDWIDTH_MEDIUMBAND for balance + AudioQualityMediumOpusDTX: 1, // Enable DTX for CPU savings // High Quality OPUS Parameters - Optimized for RV1106 high performance - AudioQualityHighOpusComplexity: 5, // Moderate complexity for RV1106 limits + AudioQualityHighOpusComplexity: 2, // Low complexity for RV1106 limits AudioQualityHighOpusVBR: 1, // VBR for optimal quality AudioQualityHighOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC - AudioQualityHighOpusBandwidth: 1104, // OPUS_BANDWIDTH_SUPERWIDEBAND for good range + AudioQualityHighOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for good range AudioQualityHighOpusDTX: 0, // Disable DTX for consistent quality // Ultra Quality OPUS Parameters - Maximum RV1106 performance without KVM interference - AudioQualityUltraOpusComplexity: 6, // Conservative complexity for RV1106 stability + AudioQualityUltraOpusComplexity: 3, // Moderate complexity for RV1106 stability AudioQualityUltraOpusVBR: 1, // VBR for optimal quality AudioQualityUltraOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC - AudioQualityUltraOpusBandwidth: 1104, // OPUS_BANDWIDTH_SUPERWIDEBAND for stability + AudioQualityUltraOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for stability AudioQualityUltraOpusDTX: 0, // Disable DTX for maximum quality // CGO Audio Constants - Optimized for RV1106 native audio processing diff --git a/internal/audio/input.go b/internal/audio/input.go index 68de2677..0c0c505b 100644 --- a/internal/audio/input.go +++ b/internal/audio/input.go @@ -208,7 +208,30 @@ func (aim *AudioInputManager) LogPerformanceStats() { Msg("Audio input performance metrics") } -// Note: IsRunning() is inherited from BaseAudioManager +// IsRunning returns whether the audio input manager is running +// This checks both the internal state and existing system processes +func (aim *AudioInputManager) IsRunning() bool { + // First check internal state + if aim.BaseAudioManager.IsRunning() { + return true + } + + // If internal state says not running, check for existing system processes + // This prevents duplicate subprocess creation when a process already exists + if aim.ipcManager != nil { + supervisor := aim.ipcManager.GetSupervisor() + if supervisor != nil { + if existingPID, exists := supervisor.HasExistingProcess(); exists { + aim.logger.Info().Int("existing_pid", existingPID).Msg("Found existing audio input server process") + // Update internal state to reflect reality + aim.setRunning(true) + return true + } + } + } + + return false +} // IsReady returns whether the audio input manager is ready to receive frames // This checks both that it's running and that the IPC connection is established diff --git a/internal/audio/input_ipc.go b/internal/audio/input_ipc.go index d56eac63..2142f206 100644 --- a/internal/audio/input_ipc.go +++ b/internal/audio/input_ipc.go @@ -36,6 +36,7 @@ type InputMessageType uint8 const ( InputMessageTypeOpusFrame InputMessageType = iota InputMessageTypeConfig + InputMessageTypeOpusConfig InputMessageTypeStop InputMessageTypeHeartbeat InputMessageTypeAck @@ -203,6 +204,19 @@ type InputIPCConfig struct { FrameSize int } +// InputIPCOpusConfig contains complete Opus encoder configuration +type InputIPCOpusConfig struct { + SampleRate int + Channels int + FrameSize int + Bitrate int + Complexity int + VBR int + SignalType int + Bandwidth int + DTX int +} + // AudioInputServer handles IPC communication for audio input processing type AudioInputServer struct { // Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) @@ -462,6 +476,8 @@ func (ais *AudioInputServer) processMessage(msg *InputIPCMessage) error { return ais.processOpusFrame(msg.Data) case InputMessageTypeConfig: return ais.processConfig(msg.Data) + case InputMessageTypeOpusConfig: + return ais.processOpusConfig(msg.Data) case InputMessageTypeStop: return fmt.Errorf("stop message received") case InputMessageTypeHeartbeat: @@ -507,6 +523,50 @@ func (ais *AudioInputServer) processConfig(data []byte) error { return ais.sendAck() } +// processOpusConfig processes a complete Opus encoder configuration update +func (ais *AudioInputServer) processOpusConfig(data []byte) error { + logger := logging.GetDefaultLogger().With().Str("component", AudioInputServerComponent).Logger() + + // Validate configuration data size (9 * int32 = 36 bytes) + if len(data) != 36 { + return fmt.Errorf("invalid Opus configuration data size: expected 36 bytes, got %d", len(data)) + } + + // Deserialize Opus configuration + config := InputIPCOpusConfig{ + SampleRate: int(binary.LittleEndian.Uint32(data[0:4])), + Channels: int(binary.LittleEndian.Uint32(data[4:8])), + FrameSize: int(binary.LittleEndian.Uint32(data[8:12])), + Bitrate: int(binary.LittleEndian.Uint32(data[12:16])), + Complexity: int(binary.LittleEndian.Uint32(data[16:20])), + VBR: int(binary.LittleEndian.Uint32(data[20:24])), + SignalType: int(binary.LittleEndian.Uint32(data[24:28])), + Bandwidth: int(binary.LittleEndian.Uint32(data[28:32])), + DTX: int(binary.LittleEndian.Uint32(data[32:36])), + } + + logger.Info().Interface("config", config).Msg("applying dynamic Opus encoder configuration") + + // Apply the Opus encoder configuration dynamically + err := CGOUpdateOpusEncoderParams( + config.Bitrate, + config.Complexity, + config.VBR, + 0, // VBR constraint - using default + config.SignalType, + config.Bandwidth, + config.DTX, + ) + + if err != nil { + logger.Error().Err(err).Msg("failed to apply Opus encoder configuration") + return fmt.Errorf("failed to apply Opus configuration: %w", err) + } + + logger.Info().Msg("Opus encoder configuration applied successfully") + return ais.sendAck() +} + // sendAck sends an acknowledgment message func (ais *AudioInputServer) sendAck() error { ais.mtx.Lock() @@ -725,6 +785,44 @@ func (aic *AudioInputClient) SendConfig(config InputIPCConfig) error { return aic.writeMessage(msg) } +// SendOpusConfig sends a complete Opus encoder configuration update to the audio input server +func (aic *AudioInputClient) SendOpusConfig(config InputIPCOpusConfig) error { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if !aic.running || aic.conn == nil { + return fmt.Errorf("not connected to audio input server") + } + + // Validate configuration parameters + if config.SampleRate <= 0 || config.Channels <= 0 || config.FrameSize <= 0 || config.Bitrate <= 0 { + return fmt.Errorf("invalid Opus configuration: SampleRate=%d, Channels=%d, FrameSize=%d, Bitrate=%d", + config.SampleRate, config.Channels, config.FrameSize, config.Bitrate) + } + + // Serialize Opus configuration (9 * int32 = 36 bytes) + data := make([]byte, 36) + binary.LittleEndian.PutUint32(data[0:4], uint32(config.SampleRate)) + binary.LittleEndian.PutUint32(data[4:8], uint32(config.Channels)) + binary.LittleEndian.PutUint32(data[8:12], uint32(config.FrameSize)) + binary.LittleEndian.PutUint32(data[12:16], uint32(config.Bitrate)) + binary.LittleEndian.PutUint32(data[16:20], uint32(config.Complexity)) + binary.LittleEndian.PutUint32(data[20:24], uint32(config.VBR)) + binary.LittleEndian.PutUint32(data[24:28], uint32(config.SignalType)) + binary.LittleEndian.PutUint32(data[28:32], uint32(config.Bandwidth)) + binary.LittleEndian.PutUint32(data[32:36], uint32(config.DTX)) + + msg := &InputIPCMessage{ + Magic: inputMagicNumber, + Type: InputMessageTypeOpusConfig, + Length: uint32(len(data)), + Timestamp: time.Now().UnixNano(), + Data: data, + } + + return aic.writeMessage(msg) +} + // SendHeartbeat sends a heartbeat message func (aic *AudioInputClient) SendHeartbeat() error { aic.mtx.Lock() diff --git a/internal/audio/input_server_main.go b/internal/audio/input_server_main.go index 0cc8b11b..808c1667 100644 --- a/internal/audio/input_server_main.go +++ b/internal/audio/input_server_main.go @@ -1,5 +1,14 @@ +//go:build cgo +// +build cgo + package audio +/* +#cgo pkg-config: alsa +#cgo LDFLAGS: -lopus +*/ +import "C" + import ( "context" "os" @@ -63,13 +72,16 @@ func RunAudioInputServer() error { StartAdaptiveBuffering() defer StopAdaptiveBuffering() - // Initialize CGO audio system + // Initialize CGO audio playback (optional for input server) + // This is used for audio loopback/monitoring features err := CGOAudioPlaybackInit() if err != nil { - logger.Error().Err(err).Msg("failed to initialize CGO audio playback") - return err + logger.Warn().Err(err).Msg("failed to initialize CGO audio playback - audio monitoring disabled") + // Continue without playback - input functionality doesn't require it + } else { + defer CGOAudioPlaybackClose() + logger.Debug().Msg("CGO audio playback initialized successfully") } - defer CGOAudioPlaybackClose() // Create and start the IPC server server, err := NewAudioInputServer() diff --git a/internal/audio/input_supervisor.go b/internal/audio/input_supervisor.go index 70587eef..fa52d4ba 100644 --- a/internal/audio/input_supervisor.go +++ b/internal/audio/input_supervisor.go @@ -283,6 +283,19 @@ func (ais *AudioInputSupervisor) SendConfig(config InputIPCConfig) error { return ais.client.SendConfig(config) } +// SendOpusConfig sends a complete Opus encoder configuration to the audio input server +func (ais *AudioInputSupervisor) SendOpusConfig(config InputIPCOpusConfig) error { + if ais.client == nil { + return fmt.Errorf("client not initialized") + } + + if !ais.client.IsConnected() { + return fmt.Errorf("client not connected") + } + + return ais.client.SendOpusConfig(config) +} + // findExistingAudioInputProcess checks if there's already an audio input server process running func (ais *AudioInputSupervisor) findExistingAudioInputProcess() (int, error) { // Get current executable path @@ -331,3 +344,10 @@ func (ais *AudioInputSupervisor) isProcessRunning(pid int) bool { err = process.Signal(syscall.Signal(0)) return err == nil } + +// HasExistingProcess checks if there's already an audio input server process running +// This is a public wrapper around findExistingAudioInputProcess for external access +func (ais *AudioInputSupervisor) HasExistingProcess() (int, bool) { + pid, err := ais.findExistingAudioInputProcess() + return pid, err == nil +} diff --git a/main.go b/main.go index 4f85110d..412d6eb3 100644 --- a/main.go +++ b/main.go @@ -44,7 +44,8 @@ func startAudioSubprocess() error { // Set the global supervisor for access from audio package audio.SetAudioOutputSupervisor(audioSupervisor) - // Create and register audio input supervisor + // Create and register audio input supervisor (but don't start it) + // Audio input will be started on-demand through the UI audioInputSupervisor := audio.NewAudioInputSupervisor() audio.SetAudioInputSupervisor(audioInputSupervisor) @@ -59,11 +60,8 @@ func startAudioSubprocess() error { config.AudioQualityLowOpusDTX, ) - // Start audio input supervisor - if err := audioInputSupervisor.Start(); err != nil { - logger.Error().Err(err).Msg("failed to start audio input supervisor") - // Continue execution as audio input is not critical for basic functionality - } + // Note: Audio input supervisor is NOT started here - it will be started on-demand + // when the user activates microphone input through the UI // Set up callbacks for process lifecycle events audioSupervisor.SetCallbacks(