mirror of https://github.com/jetkvm/kvm.git
feat(audio): implement dynamic Opus config updates and optimize audio params
Add support for dynamic Opus encoder configuration updates without requiring subprocess restart. This allows quality changes to be applied immediately while maintaining audio stream continuity. Optimize audio quality parameters to reduce CPU load and prevent mouse lag on RV1106 devices. Lower bitrates and complexity while adjusting signal types and bandwidths for better performance. Add build tags for CGO requirements and improve audio input supervisor behavior to check for existing processes before starting new ones.
This commit is contained in:
parent
6adcc26ff2
commit
fd7608384a
|
@ -1,3 +1,6 @@
|
|||
//go:build cgo
|
||||
// +build cgo
|
||||
|
||||
// Package audio provides real-time audio processing for JetKVM with low-latency streaming.
|
||||
//
|
||||
// Key components: output/input pipelines with Opus codec, adaptive buffer management,
|
||||
|
@ -283,20 +286,41 @@ func SetMicrophoneQuality(quality AudioQuality) {
|
|||
dtx = GetConfig().AudioQualityMediumOpusDTX
|
||||
}
|
||||
|
||||
// Restart audio input subprocess with new OPUS configuration
|
||||
// Update audio input subprocess configuration dynamically without restart
|
||||
if supervisor := GetAudioInputSupervisor(); supervisor != nil {
|
||||
logger := logging.GetDefaultLogger().With().Str("component", "audio").Logger()
|
||||
logger.Info().Int("quality", int(quality)).Msg("restarting audio input subprocess with new quality settings")
|
||||
logger.Info().Int("quality", int(quality)).Msg("updating audio input subprocess quality settings dynamically")
|
||||
|
||||
// Set new OPUS configuration
|
||||
// Set new OPUS configuration for future restarts
|
||||
supervisor.SetOpusConfig(config.Bitrate*1000, complexity, vbr, signalType, bandwidth, dtx)
|
||||
|
||||
// Stop current subprocess
|
||||
supervisor.Stop()
|
||||
// Send dynamic configuration update to running subprocess
|
||||
if supervisor.IsConnected() {
|
||||
// Convert AudioConfig to InputIPCOpusConfig with complete Opus parameters
|
||||
opusConfig := InputIPCOpusConfig{
|
||||
SampleRate: config.SampleRate,
|
||||
Channels: config.Channels,
|
||||
FrameSize: int(config.FrameSize.Milliseconds() * int64(config.SampleRate) / 1000), // Convert ms to samples
|
||||
Bitrate: config.Bitrate * 1000, // Convert kbps to bps
|
||||
Complexity: complexity,
|
||||
VBR: vbr,
|
||||
SignalType: signalType,
|
||||
Bandwidth: bandwidth,
|
||||
DTX: dtx,
|
||||
}
|
||||
|
||||
// Start subprocess with new configuration
|
||||
if err := supervisor.SendOpusConfig(opusConfig); err != nil {
|
||||
logger.Warn().Err(err).Msg("failed to send dynamic Opus config update, subprocess may need restart")
|
||||
// Fallback to restart if dynamic update fails
|
||||
supervisor.Stop()
|
||||
if err := supervisor.Start(); err != nil {
|
||||
logger.Error().Err(err).Msg("failed to restart audio input subprocess")
|
||||
logger.Error().Err(err).Msg("failed to restart audio input subprocess after config update failure")
|
||||
}
|
||||
} else {
|
||||
logger.Info().Msg("audio input quality updated dynamically with complete Opus configuration")
|
||||
}
|
||||
} else {
|
||||
logger.Info().Msg("audio input subprocess not connected, configuration will apply on next start")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1555,35 +1555,35 @@ func DefaultAudioConfig() *AudioConfigConstants {
|
|||
MaxPacketSize: 4000,
|
||||
|
||||
// Audio Quality Bitrates - Optimized for RV1106 SoC and KVM layer compatibility
|
||||
// Low quality increased to 48/24 kbps for better audio quality while maintaining efficiency
|
||||
AudioQualityLowOutputBitrate: 48,
|
||||
AudioQualityLowInputBitrate: 24,
|
||||
AudioQualityMediumOutputBitrate: 64,
|
||||
AudioQualityMediumInputBitrate: 32,
|
||||
// Reduced bitrates to minimize CPU load and prevent mouse lag
|
||||
AudioQualityLowOutputBitrate: 32,
|
||||
AudioQualityLowInputBitrate: 16,
|
||||
AudioQualityMediumOutputBitrate: 48,
|
||||
AudioQualityMediumInputBitrate: 24,
|
||||
|
||||
// AudioQualityHighOutputBitrate defines bitrate for high-quality output.
|
||||
// Used in: Professional applications requiring good audio fidelity on RV1106
|
||||
// Impact: Balanced quality optimized for single-core ARM performance.
|
||||
// Reduced to 96kbps for RV1106 compatibility and KVM layer stability.
|
||||
AudioQualityHighOutputBitrate: 96,
|
||||
// Reduced to 64kbps for RV1106 compatibility and minimal CPU overhead.
|
||||
AudioQualityHighOutputBitrate: 64,
|
||||
|
||||
// AudioQualityHighInputBitrate defines bitrate for high-quality input.
|
||||
// Used in: High-quality microphone input optimized for RV1106
|
||||
// Impact: Clear voice reproduction without overwhelming single-core CPU.
|
||||
// Reduced to 48kbps for optimal RV1106 performance.
|
||||
AudioQualityHighInputBitrate: 48,
|
||||
// Reduced to 32kbps for optimal RV1106 performance without lag.
|
||||
AudioQualityHighInputBitrate: 32,
|
||||
|
||||
// AudioQualityUltraOutputBitrate defines bitrate for ultra-quality output.
|
||||
// Used in: Maximum quality while ensuring RV1106 stability
|
||||
// Impact: Best possible quality without interfering with KVM operations.
|
||||
// Optimized to 128kbps for RV1106 maximum performance threshold.
|
||||
AudioQualityUltraOutputBitrate: 128,
|
||||
// Reduced to 96kbps for RV1106 maximum performance without mouse lag.
|
||||
AudioQualityUltraOutputBitrate: 96,
|
||||
|
||||
// AudioQualityUltraInputBitrate defines bitrate for ultra-quality input.
|
||||
// Used in: Premium microphone input optimized for RV1106 constraints
|
||||
// Impact: Excellent voice quality within RV1106 processing limits.
|
||||
// Optimized to 64kbps for stable RV1106 operation.
|
||||
AudioQualityUltraInputBitrate: 64,
|
||||
// Reduced to 48kbps for stable RV1106 operation without lag.
|
||||
AudioQualityUltraInputBitrate: 48,
|
||||
|
||||
// Audio Quality Sample Rates - Sampling frequencies for different quality levels
|
||||
// Used in: Audio capture, processing, and format negotiation
|
||||
|
@ -1647,31 +1647,31 @@ func DefaultAudioConfig() *AudioConfigConstants {
|
|||
// Impact: Controls encoding complexity, VBR, signal type, bandwidth, and DTX
|
||||
|
||||
// Low Quality OPUS Parameters - Optimized for RV1106 minimal CPU usage
|
||||
AudioQualityLowOpusComplexity: 2, // Slightly increased complexity for better quality
|
||||
AudioQualityLowOpusComplexity: 0, // Minimum complexity to reduce CPU load
|
||||
AudioQualityLowOpusVBR: 1, // VBR for better quality at same bitrate
|
||||
AudioQualityLowOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC for better general audio
|
||||
AudioQualityLowOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for improved range
|
||||
AudioQualityLowOpusDTX: 0, // Disable DTX to prevent audio interruptions
|
||||
AudioQualityLowOpusSignalType: 3001, // OPUS_SIGNAL_VOICE for lower complexity
|
||||
AudioQualityLowOpusBandwidth: 1101, // OPUS_BANDWIDTH_NARROWBAND for efficiency
|
||||
AudioQualityLowOpusDTX: 1, // Enable DTX to reduce processing when silent
|
||||
|
||||
// Medium Quality OPUS Parameters - Balanced for RV1106 performance
|
||||
AudioQualityMediumOpusComplexity: 3, // Reduced complexity for RV1106 stability
|
||||
AudioQualityMediumOpusComplexity: 1, // Very low complexity for RV1106 stability
|
||||
AudioQualityMediumOpusVBR: 1, // VBR for optimal quality
|
||||
AudioQualityMediumOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC
|
||||
AudioQualityMediumOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for balanced range
|
||||
AudioQualityMediumOpusDTX: 0, // Disable DTX for consistent quality
|
||||
AudioQualityMediumOpusSignalType: 3001, // OPUS_SIGNAL_VOICE for efficiency
|
||||
AudioQualityMediumOpusBandwidth: 1102, // OPUS_BANDWIDTH_MEDIUMBAND for balance
|
||||
AudioQualityMediumOpusDTX: 1, // Enable DTX for CPU savings
|
||||
|
||||
// High Quality OPUS Parameters - Optimized for RV1106 high performance
|
||||
AudioQualityHighOpusComplexity: 5, // Moderate complexity for RV1106 limits
|
||||
AudioQualityHighOpusComplexity: 2, // Low complexity for RV1106 limits
|
||||
AudioQualityHighOpusVBR: 1, // VBR for optimal quality
|
||||
AudioQualityHighOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC
|
||||
AudioQualityHighOpusBandwidth: 1104, // OPUS_BANDWIDTH_SUPERWIDEBAND for good range
|
||||
AudioQualityHighOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for good range
|
||||
AudioQualityHighOpusDTX: 0, // Disable DTX for consistent quality
|
||||
|
||||
// Ultra Quality OPUS Parameters - Maximum RV1106 performance without KVM interference
|
||||
AudioQualityUltraOpusComplexity: 6, // Conservative complexity for RV1106 stability
|
||||
AudioQualityUltraOpusComplexity: 3, // Moderate complexity for RV1106 stability
|
||||
AudioQualityUltraOpusVBR: 1, // VBR for optimal quality
|
||||
AudioQualityUltraOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC
|
||||
AudioQualityUltraOpusBandwidth: 1104, // OPUS_BANDWIDTH_SUPERWIDEBAND for stability
|
||||
AudioQualityUltraOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND for stability
|
||||
AudioQualityUltraOpusDTX: 0, // Disable DTX for maximum quality
|
||||
|
||||
// CGO Audio Constants - Optimized for RV1106 native audio processing
|
||||
|
|
|
@ -208,7 +208,30 @@ func (aim *AudioInputManager) LogPerformanceStats() {
|
|||
Msg("Audio input performance metrics")
|
||||
}
|
||||
|
||||
// Note: IsRunning() is inherited from BaseAudioManager
|
||||
// IsRunning returns whether the audio input manager is running
|
||||
// This checks both the internal state and existing system processes
|
||||
func (aim *AudioInputManager) IsRunning() bool {
|
||||
// First check internal state
|
||||
if aim.BaseAudioManager.IsRunning() {
|
||||
return true
|
||||
}
|
||||
|
||||
// If internal state says not running, check for existing system processes
|
||||
// This prevents duplicate subprocess creation when a process already exists
|
||||
if aim.ipcManager != nil {
|
||||
supervisor := aim.ipcManager.GetSupervisor()
|
||||
if supervisor != nil {
|
||||
if existingPID, exists := supervisor.HasExistingProcess(); exists {
|
||||
aim.logger.Info().Int("existing_pid", existingPID).Msg("Found existing audio input server process")
|
||||
// Update internal state to reflect reality
|
||||
aim.setRunning(true)
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// IsReady returns whether the audio input manager is ready to receive frames
|
||||
// This checks both that it's running and that the IPC connection is established
|
||||
|
|
|
@ -36,6 +36,7 @@ type InputMessageType uint8
|
|||
const (
|
||||
InputMessageTypeOpusFrame InputMessageType = iota
|
||||
InputMessageTypeConfig
|
||||
InputMessageTypeOpusConfig
|
||||
InputMessageTypeStop
|
||||
InputMessageTypeHeartbeat
|
||||
InputMessageTypeAck
|
||||
|
@ -203,6 +204,19 @@ type InputIPCConfig struct {
|
|||
FrameSize int
|
||||
}
|
||||
|
||||
// InputIPCOpusConfig contains complete Opus encoder configuration
|
||||
type InputIPCOpusConfig struct {
|
||||
SampleRate int
|
||||
Channels int
|
||||
FrameSize int
|
||||
Bitrate int
|
||||
Complexity int
|
||||
VBR int
|
||||
SignalType int
|
||||
Bandwidth int
|
||||
DTX int
|
||||
}
|
||||
|
||||
// AudioInputServer handles IPC communication for audio input processing
|
||||
type AudioInputServer struct {
|
||||
// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
|
||||
|
@ -462,6 +476,8 @@ func (ais *AudioInputServer) processMessage(msg *InputIPCMessage) error {
|
|||
return ais.processOpusFrame(msg.Data)
|
||||
case InputMessageTypeConfig:
|
||||
return ais.processConfig(msg.Data)
|
||||
case InputMessageTypeOpusConfig:
|
||||
return ais.processOpusConfig(msg.Data)
|
||||
case InputMessageTypeStop:
|
||||
return fmt.Errorf("stop message received")
|
||||
case InputMessageTypeHeartbeat:
|
||||
|
@ -507,6 +523,50 @@ func (ais *AudioInputServer) processConfig(data []byte) error {
|
|||
return ais.sendAck()
|
||||
}
|
||||
|
||||
// processOpusConfig processes a complete Opus encoder configuration update
|
||||
func (ais *AudioInputServer) processOpusConfig(data []byte) error {
|
||||
logger := logging.GetDefaultLogger().With().Str("component", AudioInputServerComponent).Logger()
|
||||
|
||||
// Validate configuration data size (9 * int32 = 36 bytes)
|
||||
if len(data) != 36 {
|
||||
return fmt.Errorf("invalid Opus configuration data size: expected 36 bytes, got %d", len(data))
|
||||
}
|
||||
|
||||
// Deserialize Opus configuration
|
||||
config := InputIPCOpusConfig{
|
||||
SampleRate: int(binary.LittleEndian.Uint32(data[0:4])),
|
||||
Channels: int(binary.LittleEndian.Uint32(data[4:8])),
|
||||
FrameSize: int(binary.LittleEndian.Uint32(data[8:12])),
|
||||
Bitrate: int(binary.LittleEndian.Uint32(data[12:16])),
|
||||
Complexity: int(binary.LittleEndian.Uint32(data[16:20])),
|
||||
VBR: int(binary.LittleEndian.Uint32(data[20:24])),
|
||||
SignalType: int(binary.LittleEndian.Uint32(data[24:28])),
|
||||
Bandwidth: int(binary.LittleEndian.Uint32(data[28:32])),
|
||||
DTX: int(binary.LittleEndian.Uint32(data[32:36])),
|
||||
}
|
||||
|
||||
logger.Info().Interface("config", config).Msg("applying dynamic Opus encoder configuration")
|
||||
|
||||
// Apply the Opus encoder configuration dynamically
|
||||
err := CGOUpdateOpusEncoderParams(
|
||||
config.Bitrate,
|
||||
config.Complexity,
|
||||
config.VBR,
|
||||
0, // VBR constraint - using default
|
||||
config.SignalType,
|
||||
config.Bandwidth,
|
||||
config.DTX,
|
||||
)
|
||||
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("failed to apply Opus encoder configuration")
|
||||
return fmt.Errorf("failed to apply Opus configuration: %w", err)
|
||||
}
|
||||
|
||||
logger.Info().Msg("Opus encoder configuration applied successfully")
|
||||
return ais.sendAck()
|
||||
}
|
||||
|
||||
// sendAck sends an acknowledgment message
|
||||
func (ais *AudioInputServer) sendAck() error {
|
||||
ais.mtx.Lock()
|
||||
|
@ -725,6 +785,44 @@ func (aic *AudioInputClient) SendConfig(config InputIPCConfig) error {
|
|||
return aic.writeMessage(msg)
|
||||
}
|
||||
|
||||
// SendOpusConfig sends a complete Opus encoder configuration update to the audio input server
|
||||
func (aic *AudioInputClient) SendOpusConfig(config InputIPCOpusConfig) error {
|
||||
aic.mtx.Lock()
|
||||
defer aic.mtx.Unlock()
|
||||
|
||||
if !aic.running || aic.conn == nil {
|
||||
return fmt.Errorf("not connected to audio input server")
|
||||
}
|
||||
|
||||
// Validate configuration parameters
|
||||
if config.SampleRate <= 0 || config.Channels <= 0 || config.FrameSize <= 0 || config.Bitrate <= 0 {
|
||||
return fmt.Errorf("invalid Opus configuration: SampleRate=%d, Channels=%d, FrameSize=%d, Bitrate=%d",
|
||||
config.SampleRate, config.Channels, config.FrameSize, config.Bitrate)
|
||||
}
|
||||
|
||||
// Serialize Opus configuration (9 * int32 = 36 bytes)
|
||||
data := make([]byte, 36)
|
||||
binary.LittleEndian.PutUint32(data[0:4], uint32(config.SampleRate))
|
||||
binary.LittleEndian.PutUint32(data[4:8], uint32(config.Channels))
|
||||
binary.LittleEndian.PutUint32(data[8:12], uint32(config.FrameSize))
|
||||
binary.LittleEndian.PutUint32(data[12:16], uint32(config.Bitrate))
|
||||
binary.LittleEndian.PutUint32(data[16:20], uint32(config.Complexity))
|
||||
binary.LittleEndian.PutUint32(data[20:24], uint32(config.VBR))
|
||||
binary.LittleEndian.PutUint32(data[24:28], uint32(config.SignalType))
|
||||
binary.LittleEndian.PutUint32(data[28:32], uint32(config.Bandwidth))
|
||||
binary.LittleEndian.PutUint32(data[32:36], uint32(config.DTX))
|
||||
|
||||
msg := &InputIPCMessage{
|
||||
Magic: inputMagicNumber,
|
||||
Type: InputMessageTypeOpusConfig,
|
||||
Length: uint32(len(data)),
|
||||
Timestamp: time.Now().UnixNano(),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
return aic.writeMessage(msg)
|
||||
}
|
||||
|
||||
// SendHeartbeat sends a heartbeat message
|
||||
func (aic *AudioInputClient) SendHeartbeat() error {
|
||||
aic.mtx.Lock()
|
||||
|
|
|
@ -1,5 +1,14 @@
|
|||
//go:build cgo
|
||||
// +build cgo
|
||||
|
||||
package audio
|
||||
|
||||
/*
|
||||
#cgo pkg-config: alsa
|
||||
#cgo LDFLAGS: -lopus
|
||||
*/
|
||||
import "C"
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
@ -63,13 +72,16 @@ func RunAudioInputServer() error {
|
|||
StartAdaptiveBuffering()
|
||||
defer StopAdaptiveBuffering()
|
||||
|
||||
// Initialize CGO audio system
|
||||
// Initialize CGO audio playback (optional for input server)
|
||||
// This is used for audio loopback/monitoring features
|
||||
err := CGOAudioPlaybackInit()
|
||||
if err != nil {
|
||||
logger.Error().Err(err).Msg("failed to initialize CGO audio playback")
|
||||
return err
|
||||
}
|
||||
logger.Warn().Err(err).Msg("failed to initialize CGO audio playback - audio monitoring disabled")
|
||||
// Continue without playback - input functionality doesn't require it
|
||||
} else {
|
||||
defer CGOAudioPlaybackClose()
|
||||
logger.Debug().Msg("CGO audio playback initialized successfully")
|
||||
}
|
||||
|
||||
// Create and start the IPC server
|
||||
server, err := NewAudioInputServer()
|
||||
|
|
|
@ -283,6 +283,19 @@ func (ais *AudioInputSupervisor) SendConfig(config InputIPCConfig) error {
|
|||
return ais.client.SendConfig(config)
|
||||
}
|
||||
|
||||
// SendOpusConfig sends a complete Opus encoder configuration to the audio input server
|
||||
func (ais *AudioInputSupervisor) SendOpusConfig(config InputIPCOpusConfig) error {
|
||||
if ais.client == nil {
|
||||
return fmt.Errorf("client not initialized")
|
||||
}
|
||||
|
||||
if !ais.client.IsConnected() {
|
||||
return fmt.Errorf("client not connected")
|
||||
}
|
||||
|
||||
return ais.client.SendOpusConfig(config)
|
||||
}
|
||||
|
||||
// findExistingAudioInputProcess checks if there's already an audio input server process running
|
||||
func (ais *AudioInputSupervisor) findExistingAudioInputProcess() (int, error) {
|
||||
// Get current executable path
|
||||
|
@ -331,3 +344,10 @@ func (ais *AudioInputSupervisor) isProcessRunning(pid int) bool {
|
|||
err = process.Signal(syscall.Signal(0))
|
||||
return err == nil
|
||||
}
|
||||
|
||||
// HasExistingProcess checks if there's already an audio input server process running
|
||||
// This is a public wrapper around findExistingAudioInputProcess for external access
|
||||
func (ais *AudioInputSupervisor) HasExistingProcess() (int, bool) {
|
||||
pid, err := ais.findExistingAudioInputProcess()
|
||||
return pid, err == nil
|
||||
}
|
||||
|
|
10
main.go
10
main.go
|
@ -44,7 +44,8 @@ func startAudioSubprocess() error {
|
|||
// Set the global supervisor for access from audio package
|
||||
audio.SetAudioOutputSupervisor(audioSupervisor)
|
||||
|
||||
// Create and register audio input supervisor
|
||||
// Create and register audio input supervisor (but don't start it)
|
||||
// Audio input will be started on-demand through the UI
|
||||
audioInputSupervisor := audio.NewAudioInputSupervisor()
|
||||
audio.SetAudioInputSupervisor(audioInputSupervisor)
|
||||
|
||||
|
@ -59,11 +60,8 @@ func startAudioSubprocess() error {
|
|||
config.AudioQualityLowOpusDTX,
|
||||
)
|
||||
|
||||
// Start audio input supervisor
|
||||
if err := audioInputSupervisor.Start(); err != nil {
|
||||
logger.Error().Err(err).Msg("failed to start audio input supervisor")
|
||||
// Continue execution as audio input is not critical for basic functionality
|
||||
}
|
||||
// Note: Audio input supervisor is NOT started here - it will be started on-demand
|
||||
// when the user activates microphone input through the UI
|
||||
|
||||
// Set up callbacks for process lifecycle events
|
||||
audioSupervisor.SetCallbacks(
|
||||
|
|
Loading…
Reference in New Issue