diff --git a/.devcontainer/install_audio_deps.sh b/.devcontainer/install_audio_deps.sh index 0a3d8043..79c45cbe 100755 --- a/.devcontainer/install_audio_deps.sh +++ b/.devcontainer/install_audio_deps.sh @@ -15,6 +15,7 @@ function use_sudo() { # Accept version parameters or use defaults ALSA_VERSION="${1:-1.2.14}" OPUS_VERSION="${2:-1.5.2}" +SPEEXDSP_VERSION="${3:-1.2.1}" AUDIO_LIBS_DIR="/opt/jetkvm-audio-libs" BUILDKIT_PATH="/opt/jetkvm-native-buildkit" @@ -29,12 +30,14 @@ cd "$AUDIO_LIBS_DIR" # Download sources [ -f alsa-lib-${ALSA_VERSION}.tar.bz2 ] || wget -N https://www.alsa-project.org/files/pub/lib/alsa-lib-${ALSA_VERSION}.tar.bz2 [ -f opus-${OPUS_VERSION}.tar.gz ] || wget -N https://downloads.xiph.org/releases/opus/opus-${OPUS_VERSION}.tar.gz +[ -f speexdsp-${SPEEXDSP_VERSION}.tar.gz ] || wget -N https://ftp.osuosl.org/pub/xiph/releases/speex/speexdsp-${SPEEXDSP_VERSION}.tar.gz # Extract [ -d alsa-lib-${ALSA_VERSION} ] || tar xf alsa-lib-${ALSA_VERSION}.tar.bz2 [ -d opus-${OPUS_VERSION} ] || tar xf opus-${OPUS_VERSION}.tar.gz +[ -d speexdsp-${SPEEXDSP_VERSION} ] || tar xf speexdsp-${SPEEXDSP_VERSION}.tar.gz -# Optimization flags for ARM Cortex-A7 with NEON (simplified to avoid FD_SETSIZE issues) +# ARM Cortex-A7 optimization flags with NEON support OPTIM_CFLAGS="-O2 -mfpu=neon -mtune=cortex-a7 -mfloat-abi=hard" export CC="${CROSS_PREFIX}-gcc" @@ -45,7 +48,7 @@ export CXXFLAGS="$OPTIM_CFLAGS" cd alsa-lib-${ALSA_VERSION} if [ ! -f .built ]; then chown -R $(whoami):$(whoami) . - # Use minimal ALSA configuration to avoid FD_SETSIZE issues in devcontainer + # Minimal ALSA configuration for audio capture/playback CFLAGS="$OPTIM_CFLAGS" ./configure --host $BUILDKIT_FLAVOR \ --enable-static=yes --enable-shared=no \ --with-pcm-plugins=plug,rate,linear,copy \ @@ -68,4 +71,18 @@ if [ ! -f .built ]; then fi cd .. -echo "ALSA and Opus built in $AUDIO_LIBS_DIR" +# Build SpeexDSP +cd speexdsp-${SPEEXDSP_VERSION} +if [ ! -f .built ]; then + chown -R $(whoami):$(whoami) . + # NEON-optimized high-quality resampler + CFLAGS="$OPTIM_CFLAGS" ./configure --host $BUILDKIT_FLAVOR \ + --enable-static=yes --enable-shared=no \ + --enable-neon \ + --disable-examples + make -j$(nproc) + touch .built +fi +cd .. + +echo "ALSA, Opus, and SpeexDSP built in $AUDIO_LIBS_DIR" diff --git a/audio.go b/audio.go index efbcc6dc..fbdff99a 100644 --- a/audio.go +++ b/audio.go @@ -30,9 +30,9 @@ var ( func getAlsaDevice(source string) string { if source == "hdmi" { - return "plughw:0,0" + return "hw:0,0" // TC358743 HDMI audio } - return "plughw:1,0" + return "hw:1,0" // USB Audio Gadget } func initAudio() { @@ -67,15 +67,6 @@ func getAudioConfig() audio.AudioConfig { audioLogger.Warn().Int("buffer_periods", config.AudioBufferPeriods).Msg("Invalid buffer periods, using default") } - switch config.AudioSampleRate { - case 8000, 12000, 16000, 24000, 48000: - cfg.SampleRate = uint32(config.AudioSampleRate) - default: - if config.AudioSampleRate != 0 { - audioLogger.Warn().Int("sample_rate", config.AudioSampleRate).Msg("Invalid sample rate, using default") - } - } - if config.AudioPacketLossPerc >= 0 && config.AudioPacketLossPerc <= 100 { cfg.PacketLossPerc = uint8(config.AudioPacketLossPerc) } else if config.AudioPacketLossPerc != 0 { @@ -105,22 +96,29 @@ func startAudio() error { ensureConfigLoaded() var outputErr, inputErr error + + // Start output audio if enabled and track is available if audioOutputEnabled.Load() && currentAudioTrack != nil { outputErr = startOutputAudioUnderMutex(getAlsaDevice(config.AudioOutputSource)) } + // Start input audio if enabled and USB audio device is configured if audioInputEnabled.Load() && config.UsbDevices != nil && config.UsbDevices.Audio { inputErr = startInputAudioUnderMutex(getAlsaDevice("usb")) } - if outputErr != nil || inputErr != nil { - if outputErr != nil && inputErr != nil { - return fmt.Errorf("audio start failed - output: %w, input: %v", outputErr, inputErr) + // Return combined errors if any + if outputErr != nil && inputErr != nil { + return fmt.Errorf("audio start failed - output: %w, input: %v", outputErr, inputErr) + } + return firstError(outputErr, inputErr) +} + +func firstError(errs ...error) error { + for _, err := range errs { + if err != nil { + return err } - if outputErr != nil { - return outputErr - } - return inputErr } return nil } @@ -291,15 +289,8 @@ func SetAudioInputEnabled(enabled bool) error { return nil } -// SetAudioOutputSource switches between HDMI (hw:0,0) and USB (hw:1,0) audio capture. -// -// The function returns immediately after updating and persisting the config change, -// while the actual audio device switch happens asynchronously in the background: -// - Config save is synchronous to ensure the change persists even if the process crashes -// - Audio restart is async to avoid blocking the RPC caller during ALSA reconfiguration -// -// Note: The HDMI audio device (hw:0,0) can take 30-60 seconds to initialize due to -// TC358743 hardware characteristics. Callers receive success before audio actually switches. +// SetAudioOutputSource switches between HDMI and USB audio capture. +// Config is saved synchronously, audio restarts asynchronously. func SetAudioOutputSource(source string) error { if source != "hdmi" && source != "usb" { return fmt.Errorf("invalid audio source: %s (must be 'hdmi' or 'usb')", source) @@ -399,7 +390,6 @@ func handleInputTrackForSession(track *webrtc.TrackRemote) { } } -// processInputPacket handles writing audio data to the input source func processInputPacket(opusData []byte) error { inputSourceMutex.Lock() defer inputSourceMutex.Unlock() @@ -409,14 +399,14 @@ func processInputPacket(opusData []byte) error { return nil } - // Ensure source is connected + // Lazy connect on first use if !(*source).IsConnected() { if err := (*source).Connect(); err != nil { return err } } - // Write the message + // Write opus data, disconnect on error if err := (*source).WriteMessage(0, opusData); err != nil { (*source).Disconnect() return err diff --git a/config.go b/config.go index c7bc27a8..c68e2295 100644 --- a/config.go +++ b/config.go @@ -122,7 +122,6 @@ type Config struct { AudioDTXEnabled bool `json:"audio_dtx_enabled"` AudioFECEnabled bool `json:"audio_fec_enabled"` AudioBufferPeriods int `json:"audio_buffer_periods"` // 2-24 - AudioSampleRate int `json:"audio_sample_rate"` // Hz (Opus: 8k, 12k, 16k, 24k, 48k) AudioPacketLossPerc int `json:"audio_packet_loss_perc"` // 0-100 NativeMaxRestart uint `json:"native_max_restart_attempts"` } @@ -218,7 +217,6 @@ func getDefaultConfig() Config { AudioDTXEnabled: true, AudioFECEnabled: true, AudioBufferPeriods: 12, - AudioSampleRate: 48000, AudioPacketLossPerc: 20, } } @@ -298,7 +296,6 @@ func LoadConfig() { loadedConfig.AudioDTXEnabled = defaults.AudioDTXEnabled loadedConfig.AudioFECEnabled = defaults.AudioFECEnabled loadedConfig.AudioBufferPeriods = defaults.AudioBufferPeriods - loadedConfig.AudioSampleRate = defaults.AudioSampleRate loadedConfig.AudioPacketLossPerc = defaults.AudioPacketLossPerc } diff --git a/internal/audio/c/audio.c b/internal/audio/c/audio.c index 8ea6c04c..0e74d80e 100644 --- a/internal/audio/c/audio.c +++ b/internal/audio/c/audio.c @@ -3,20 +3,22 @@ * * Bidirectional audio processing optimized for ARM NEON SIMD: * - OUTPUT PATH: TC358743 HDMI or USB Gadget audio → Client speakers - * Pipeline: ALSA plughw:0,0 or plughw:1,0 capture → Opus encode (192kbps, FEC enabled) + * Pipeline: ALSA hw:0,0 or hw:1,0 capture → SpeexDSP resample → Opus encode (192kbps, FEC enabled) * * - INPUT PATH: Client microphone → Device speakers - * Pipeline: Opus decode (with FEC) → ALSA plughw:1,0 playback + * Pipeline: Opus decode (with FEC) → ALSA hw:1,0 playback * * Key features: * - ARM NEON SIMD optimization for all audio operations + * - SpeexDSP high-quality resampling (SPEEX_RESAMPLER_QUALITY_DESKTOP) * - Opus in-band FEC for packet loss resilience - * - S16_LE stereo, 20ms frames (sample rate configurable: 8k/12k/16k/24k/48kHz) - * - ALSA plughw layer provides automatic rate conversion from hardware to Opus rate + * - S16_LE stereo, 20ms frames at 48kHz (hardware rate auto-negotiated) + * - Direct hardware access with userspace resampling (no ALSA plugin layer) */ #include #include +#include #include #include #include @@ -45,12 +47,15 @@ static const char *alsa_playback_device = NULL; static OpusEncoder *encoder = NULL; static OpusDecoder *decoder = NULL; +static SpeexResamplerState *capture_resampler = NULL; -// Audio format (S16_LE @ 48kHz) -static uint32_t sample_rate = 48000; +// Audio format - Opus always uses 48kHz for WebRTC (RFC 7587) +static const uint32_t opus_sample_rate = 48000; // Fixed: Opus RTP clock rate +static uint32_t hardware_sample_rate = 48000; // Hardware-negotiated rate static uint8_t capture_channels = 2; // OUTPUT: Audio source (HDMI or USB) → client (stereo by default) static uint8_t playback_channels = 1; // INPUT: Client mono mic → device (always mono for USB audio gadget) -static uint16_t frame_size = 960; // 20ms frames at 48kHz +static const uint16_t opus_frame_size = 960; // 20ms frames at 48kHz (fixed) +static uint16_t hardware_frame_size = 960; // 20ms frames at hardware rate static uint32_t opus_bitrate = 192000; static uint8_t opus_complexity = 8; @@ -105,34 +110,50 @@ void update_audio_constants(uint32_t bitrate, uint8_t complexity, uint32_t sr, uint8_t ch, uint16_t fs, uint16_t max_pkt, uint32_t sleep_us, uint8_t max_attempts, uint32_t max_backoff, uint8_t dtx_enabled, uint8_t fec_enabled, uint8_t buf_periods, uint8_t pkt_loss_perc) { + // Validate and set bitrate (64-256 kbps range) opus_bitrate = (bitrate >= 64000 && bitrate <= 256000) ? bitrate : 192000; + + // Set complexity (0-10 range) opus_complexity = (complexity <= 10) ? complexity : 5; - sample_rate = sr > 0 ? sr : 48000; + + // Set channel count (mono or stereo) capture_channels = (ch == 1 || ch == 2) ? ch : 2; - frame_size = fs > 0 ? fs : 960; + + // Set packet and timing parameters max_packet_size = max_pkt > 0 ? max_pkt : 1500; sleep_microseconds = sleep_us > 0 ? sleep_us : 1000; sleep_milliseconds = sleep_microseconds / 1000; max_attempts_global = max_attempts > 0 ? max_attempts : 5; max_backoff_us_global = max_backoff > 0 ? max_backoff : 500000; + + // Set codec features opus_dtx_enabled = dtx_enabled ? 1 : 0; opus_fec_enabled = fec_enabled ? 1 : 0; + + // Set buffer configuration buffer_period_count = (buf_periods >= 2 && buf_periods <= 24) ? buf_periods : 12; opus_packet_loss_perc = (pkt_loss_perc <= 100) ? pkt_loss_perc : 20; + + // Note: sr and fs parameters ignored - Opus always uses 48kHz with 960 samples } void update_audio_decoder_constants(uint32_t sr, uint8_t ch, uint16_t fs, uint16_t max_pkt, uint32_t sleep_us, uint8_t max_attempts, uint32_t max_backoff, uint8_t buf_periods) { - sample_rate = sr > 0 ? sr : 48000; + // Set playback channels (mono or stereo) playback_channels = (ch == 1 || ch == 2) ? ch : 2; - frame_size = fs > 0 ? fs : 960; + + // Set packet and timing parameters max_packet_size = max_pkt > 0 ? max_pkt : 1500; sleep_microseconds = sleep_us > 0 ? sleep_us : 1000; sleep_milliseconds = sleep_microseconds / 1000; max_attempts_global = max_attempts > 0 ? max_attempts : 5; max_backoff_us_global = max_backoff > 0 ? max_backoff : 500000; + + // Set buffer configuration buffer_period_count = (buf_periods >= 2 && buf_periods <= 24) ? buf_periods : 12; + + // Note: sr and fs parameters ignored - always 48kHz with 960 samples } /** @@ -140,19 +161,19 @@ void update_audio_decoder_constants(uint32_t sr, uint8_t ch, uint16_t fs, uint16 * Must be called before jetkvm_audio_capture_init or jetkvm_audio_playback_init * * Device mapping (set via ALSA_CAPTURE_DEVICE/ALSA_PLAYBACK_DEVICE): - * plughw:0,0 = TC358743 HDMI audio with rate conversion (for OUTPUT path capture) - * plughw:1,0 = USB Audio Gadget with rate conversion (for OUTPUT path capture or INPUT path playback) + * hw:0,0 = TC358743 HDMI audio (direct hardware access, SpeexDSP resampling) + * hw:1,0 = USB Audio Gadget (direct hardware access, SpeexDSP resampling) */ static void init_alsa_devices_from_env(void) { // Always read from environment to support device switching alsa_capture_device = getenv("ALSA_CAPTURE_DEVICE"); if (alsa_capture_device == NULL || alsa_capture_device[0] == '\0') { - alsa_capture_device = "plughw:1,0"; // Default: USB gadget audio for capture with rate conversion + alsa_capture_device = "hw:1,0"; // Default: USB gadget audio for capture } alsa_playback_device = getenv("ALSA_PLAYBACK_DEVICE"); if (alsa_playback_device == NULL || alsa_playback_device[0] == '\0') { - alsa_playback_device = "plughw:1,0"; // Default: USB gadget audio for playback with rate conversion + alsa_playback_device = "hw:1,0"; // Default: USB gadget audio for playback } } @@ -197,7 +218,7 @@ static volatile sig_atomic_t playback_initialized = 0; * Open ALSA device with exponential backoff retry * @return 0 on success, negative error code on failure */ -// Helper: High-precision sleep using nanosleep (better than usleep) +// High-precision sleep using nanosleep static inline void precise_sleep_us(uint32_t microseconds) { struct timespec ts = { .tv_sec = microseconds / 1000000, @@ -220,12 +241,12 @@ static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream attempt++; - // Apply different sleep strategies based on error type + // Apply sleep strategy based on error type if (err == -EPERM || err == -EACCES) { precise_sleep_us(backoff_us >> 1); // Shorter wait for permission errors } else { precise_sleep_us(backoff_us); - // Exponential backoff for all retry-worthy errors + // Exponential backoff for retry-worthy errors if (err == -EBUSY || err == -EAGAIN || err == -ENODEV || err == -ENOENT) { backoff_us = (backoff_us < 50000) ? (backoff_us << 1) : 50000; } @@ -345,12 +366,12 @@ static int handle_alsa_error(snd_pcm_t *handle, snd_pcm_t **valid_handle, } /** - * Configure ALSA device (S16_LE @ variable rate with optimized buffering) + * Configure ALSA device (S16_LE @ hardware-negotiated rate with optimized buffering) * @param handle ALSA PCM handle * @param device_name Device name for logging * @param num_channels Number of channels (1=mono, 2=stereo) - * @param actual_rate_out Pointer to store the actual rate the device was configured to use - * @param actual_frame_size_out Pointer to store the actual frame size (samples per channel) + * @param actual_rate_out Pointer to store the actual hardware-negotiated rate + * @param actual_frame_size_out Pointer to store the actual frame size at hardware rate * @return 0 on success, negative error code on failure */ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name, uint8_t num_channels, @@ -368,23 +389,43 @@ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name, uin if (err < 0) return err; err = snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); - if (err < 0) return err; + if (err < 0) { + fprintf(stderr, "ERROR: %s: Failed to set access mode: %s\n", device_name, snd_strerror(err)); + fflush(stderr); + return err; + } err = snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE); - if (err < 0) return err; + if (err < 0) { + fprintf(stderr, "ERROR: %s: Failed to set format S16_LE: %s\n", device_name, snd_strerror(err)); + fflush(stderr); + return err; + } err = snd_pcm_hw_params_set_channels(handle, params, num_channels); + if (err < 0) { + fprintf(stderr, "ERROR: %s: Failed to set %u channels: %s\n", device_name, num_channels, snd_strerror(err)); + fflush(stderr); + return err; + } + + // Disable ALSA resampling - we handle it with SpeexDSP + err = snd_pcm_hw_params_set_rate_resample(handle, params, 0); + if (err < 0) { + fprintf(stderr, "ERROR: %s: Failed to disable ALSA resampling: %s\n", device_name, snd_strerror(err)); + fflush(stderr); + return err; + } + + // Try to set 48kHz first (preferred), then let hardware negotiate + unsigned int requested_rate = opus_sample_rate; + err = snd_pcm_hw_params_set_rate_near(handle, params, &requested_rate, 0); if (err < 0) return err; - err = snd_pcm_hw_params_set_rate_resample(handle, params, 1); - if (err < 0) return err; + // Calculate frame size for this hardware rate (20ms) + uint16_t hw_frame_size = requested_rate / 50; - err = snd_pcm_hw_params_set_rate(handle, params, sample_rate, 0); - if (err < 0) return err; - - uint16_t actual_frame_size = frame_size; - - snd_pcm_uframes_t period_size = actual_frame_size; + snd_pcm_uframes_t period_size = hw_frame_size; if (period_size < 64) period_size = 64; err = snd_pcm_hw_params_set_period_size_near(handle, params, &period_size, 0); @@ -399,12 +440,17 @@ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name, uin unsigned int verified_rate = 0; err = snd_pcm_hw_params_get_rate(params, &verified_rate, 0); - if (err < 0 || verified_rate != sample_rate) { - fprintf(stderr, "WARNING: %s: Rate verification failed - expected %u Hz, got %u Hz\n", - device_name, sample_rate, verified_rate); + if (err < 0) { + fprintf(stderr, "ERROR: %s: Failed to get rate: %s\n", + device_name, snd_strerror(err)); fflush(stderr); + return err; } + fprintf(stderr, "INFO: %s: Hardware negotiated %u Hz (Opus uses %u Hz with SpeexDSP resampling)\n", + device_name, verified_rate, opus_sample_rate); + fflush(stderr); + err = snd_pcm_sw_params_current(handle, sw_params); if (err < 0) return err; @@ -420,8 +466,8 @@ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name, uin err = snd_pcm_prepare(handle); if (err < 0) return err; - if (actual_rate_out) *actual_rate_out = sample_rate; - if (actual_frame_size_out) *actual_frame_size_out = actual_frame_size; + if (actual_rate_out) *actual_rate_out = verified_rate; + if (actual_frame_size_out) *actual_frame_size_out = hw_frame_size; return 0; } @@ -430,9 +476,9 @@ static int configure_alsa_device(snd_pcm_t *handle, const char *device_name, uin /** * Initialize OUTPUT path (HDMI or USB Gadget audio capture → Opus encoder) - * Opens ALSA capture device from ALSA_CAPTURE_DEVICE env (default: plughw:1,0, set to plughw:0,0 for HDMI) + * Opens ALSA capture device from ALSA_CAPTURE_DEVICE env (default: hw:1,0, set to hw:0,0 for HDMI) * and creates Opus encoder with optimized settings - * @return 0 on success, -EBUSY if initializing, -1/-2/-3 on errors + * @return 0 on success, -EBUSY if initializing, -1/-2/-3/-4 on errors */ int jetkvm_audio_capture_init() { int err; @@ -492,13 +538,65 @@ int jetkvm_audio_capture_init() { return -2; } + // Store hardware-negotiated values + hardware_sample_rate = actual_rate; + hardware_frame_size = actual_frame_size; + + // Validate hardware frame size + if (hardware_frame_size > 3840) { + fprintf(stderr, "ERROR: capture: Hardware frame size %u exceeds buffer capacity 3840\n", + hardware_frame_size); + fflush(stderr); + snd_pcm_t *handle = pcm_capture_handle; + pcm_capture_handle = NULL; + snd_pcm_close(handle); + atomic_store(&capture_stop_requested, 0); + capture_initializing = 0; + return -4; + } + + // Clean up any existing resampler before creating new one (prevents memory leak on re-init) + if (capture_resampler) { + speex_resampler_destroy(capture_resampler); + capture_resampler = NULL; + } + + // Initialize Speex resampler if hardware rate != 48kHz + if (hardware_sample_rate != opus_sample_rate) { + int speex_err = 0; + capture_resampler = speex_resampler_init(capture_channels, hardware_sample_rate, + opus_sample_rate, SPEEX_RESAMPLER_QUALITY_DESKTOP, + &speex_err); + if (!capture_resampler || speex_err != 0) { + fprintf(stderr, "ERROR: capture: Failed to create SpeexDSP resampler (%u Hz → %u Hz): %d\n", + hardware_sample_rate, opus_sample_rate, speex_err); + fflush(stderr); + snd_pcm_t *handle = pcm_capture_handle; + pcm_capture_handle = NULL; + snd_pcm_close(handle); + atomic_store(&capture_stop_requested, 0); + capture_initializing = 0; + return -3; + } + fprintf(stderr, "INFO: capture: SpeexDSP resampler initialized (%u Hz → %u Hz)\n", + hardware_sample_rate, opus_sample_rate); + fflush(stderr); + } else { + fprintf(stderr, "INFO: capture: No resampling needed (hardware = Opus = %u Hz)\n", opus_sample_rate); + fflush(stderr); + } + fprintf(stderr, "INFO: capture: Initializing Opus encoder at %u Hz, %u channels, frame size %u\n", - actual_rate, capture_channels, actual_frame_size); + opus_sample_rate, capture_channels, opus_frame_size); fflush(stderr); int opus_err = 0; - encoder = opus_encoder_create(actual_rate, capture_channels, OPUS_APPLICATION_AUDIO, &opus_err); + encoder = opus_encoder_create(opus_sample_rate, capture_channels, OPUS_APPLICATION_AUDIO, &opus_err); if (!encoder || opus_err != OPUS_OK) { + if (capture_resampler) { + speex_resampler_destroy(capture_resampler); + capture_resampler = NULL; + } if (pcm_capture_handle) { snd_pcm_t *handle = pcm_capture_handle; pcm_capture_handle = NULL; @@ -506,20 +604,29 @@ int jetkvm_audio_capture_init() { } atomic_store(&capture_stop_requested, 0); capture_initializing = 0; - return -3; + return -4; } - opus_encoder_ctl(encoder, OPUS_SET_BITRATE(opus_bitrate)); - opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(opus_complexity)); - opus_encoder_ctl(encoder, OPUS_SET_VBR(OPUS_VBR)); - opus_encoder_ctl(encoder, OPUS_SET_VBR_CONSTRAINT(OPUS_VBR_CONSTRAINT)); - opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_TYPE)); - opus_encoder_ctl(encoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH)); - opus_encoder_ctl(encoder, OPUS_SET_DTX(opus_dtx_enabled)); - opus_encoder_ctl(encoder, OPUS_SET_LSB_DEPTH(OPUS_LSB_DEPTH)); + #define OPUS_CTL_WARN(call, desc) do { \ + int _err = call; \ + if (_err != OPUS_OK) { \ + fprintf(stderr, "WARN: capture: Failed to set " desc ": %s\n", opus_strerror(_err)); \ + fflush(stderr); \ + } \ + } while(0) - opus_encoder_ctl(encoder, OPUS_SET_INBAND_FEC(opus_fec_enabled)); - opus_encoder_ctl(encoder, OPUS_SET_PACKET_LOSS_PERC(opus_packet_loss_perc)); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_BITRATE(opus_bitrate)), "bitrate"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(opus_complexity)), "complexity"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_VBR(OPUS_VBR)), "VBR mode"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_VBR_CONSTRAINT(OPUS_VBR_CONSTRAINT)), "VBR constraint"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(OPUS_SIGNAL_TYPE)), "signal type"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_BANDWIDTH(OPUS_BANDWIDTH)), "bandwidth"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_DTX(opus_dtx_enabled)), "DTX"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_LSB_DEPTH(OPUS_LSB_DEPTH)), "LSB depth"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_INBAND_FEC(opus_fec_enabled)), "FEC"); + OPUS_CTL_WARN(opus_encoder_ctl(encoder, OPUS_SET_PACKET_LOSS_PERC(opus_packet_loss_perc)), "packet loss percentage"); + + #undef OPUS_CTL_WARN capture_initialized = 1; atomic_store(&capture_stop_requested, 0); @@ -528,12 +635,14 @@ int jetkvm_audio_capture_init() { } /** - * Read HDMI audio, encode to Opus (OUTPUT path hot function) + * Read HDMI audio, resample with SpeexDSP, encode to Opus (OUTPUT path hot function) * @param opus_buf Output buffer for encoded Opus packet * @return >0 = Opus packet size in bytes, -1 = error */ __attribute__((hot)) int jetkvm_audio_read_encode(void * __restrict__ opus_buf) { - static short CACHE_ALIGN pcm_buffer[960 * 2]; // Cache-aligned + // Two buffers: hardware buffer + resampled buffer (at 48kHz) + static short CACHE_ALIGN pcm_hw_buffer[3840 * 2]; // Max 192kHz @ 20ms * 2 channels + static short CACHE_ALIGN pcm_opus_buffer[960 * 2]; // 48kHz @ 20ms * 2 channels unsigned char * __restrict__ out = (unsigned char*)opus_buf; int32_t pcm_rc, nb_bytes; int32_t err = 0; @@ -545,8 +654,8 @@ __attribute__((hot)) int jetkvm_audio_read_encode(void * __restrict__ opus_buf) } SIMD_PREFETCH(out, 1, 0); - SIMD_PREFETCH(pcm_buffer, 0, 0); - SIMD_PREFETCH(pcm_buffer + 64, 0, 1); + SIMD_PREFETCH(pcm_hw_buffer, 0, 0); + SIMD_PREFETCH(pcm_hw_buffer + 64, 0, 1); // Acquire mutex to protect against concurrent close pthread_mutex_lock(&capture_mutex); @@ -564,7 +673,8 @@ retry_read: snd_pcm_t *handle = pcm_capture_handle; - pcm_rc = snd_pcm_readi(handle, pcm_buffer, frame_size); + // Read from hardware at hardware sample rate + pcm_rc = snd_pcm_readi(handle, pcm_hw_buffer, hardware_frame_size); if (handle != pcm_capture_handle) { pthread_mutex_unlock(&capture_mutex); @@ -585,9 +695,29 @@ retry_read: } // Zero-pad if we got a short read - if (__builtin_expect(pcm_rc < frame_size, 0)) { - uint32_t remaining_samples = (frame_size - pcm_rc) * capture_channels; - simd_clear_samples_s16(&pcm_buffer[pcm_rc * capture_channels], remaining_samples); + if (__builtin_expect(pcm_rc < hardware_frame_size, 0)) { + uint32_t remaining_samples = (hardware_frame_size - pcm_rc) * capture_channels; + simd_clear_samples_s16(&pcm_hw_buffer[pcm_rc * capture_channels], remaining_samples); + } + + // Resample to 48kHz if needed + short *pcm_to_encode; + if (capture_resampler) { + spx_uint32_t in_len = hardware_frame_size; + spx_uint32_t out_len = opus_frame_size; + int res_err = speex_resampler_process_interleaved_int(capture_resampler, + pcm_hw_buffer, &in_len, + pcm_opus_buffer, &out_len); + if (res_err != 0 || out_len != opus_frame_size) { + fprintf(stderr, "ERROR: capture: Resampling failed (err=%d, out_len=%u, expected=%u)\n", + res_err, out_len, opus_frame_size); + fflush(stderr); + pthread_mutex_unlock(&capture_mutex); + return -1; + } + pcm_to_encode = pcm_opus_buffer; + } else { + pcm_to_encode = pcm_hw_buffer; } OpusEncoder *enc = encoder; @@ -596,7 +726,12 @@ retry_read: return -1; } - nb_bytes = opus_encode(enc, pcm_buffer, frame_size, out, max_packet_size); + nb_bytes = opus_encode(enc, pcm_to_encode, opus_frame_size, out, max_packet_size); + + if (__builtin_expect(nb_bytes < 0, 0)) { + fprintf(stderr, "ERROR: capture: Opus encoding failed: %s\n", opus_strerror(nb_bytes)); + fflush(stderr); + } pthread_mutex_unlock(&capture_mutex); return nb_bytes; @@ -606,7 +741,7 @@ retry_read: /** * Initialize INPUT path (Opus decoder → device speakers) - * Opens ALSA playback device from ALSA_PLAYBACK_DEVICE env (default: plughw:1,0) + * Opens ALSA playback device from ALSA_PLAYBACK_DEVICE env (default: hw:1,0) * and creates Opus decoder. Returns immediately on device open failure (no fallback). * @return 0 on success, -EBUSY if initializing, -1/-2 on errors */ @@ -731,10 +866,10 @@ __attribute__((hot)) int jetkvm_audio_decode_write(void * __restrict__ opus_buf, // Decode Opus packet to PCM (FEC automatically applied if embedded in packet) // decode_fec=0 means normal decode (FEC data is used automatically when present) - pcm_frames = opus_decode(dec, in, opus_size, pcm_buffer, frame_size, 0); + pcm_frames = opus_decode(dec, in, opus_size, pcm_buffer, opus_frame_size, 0); if (__builtin_expect(pcm_frames < 0, 0)) { - pcm_frames = opus_decode(dec, NULL, 0, pcm_buffer, frame_size, 1); + pcm_frames = opus_decode(dec, NULL, 0, pcm_buffer, opus_frame_size, 1); if (pcm_frames < 0) { pthread_mutex_unlock(&playback_mutex); @@ -810,6 +945,13 @@ static void close_audio_stream(atomic_int *stop_requested, volatile int *initial *pcm_handle = NULL; *codec = NULL; + // Clean up resampler inside mutex to prevent race with encoding thread + if (mutex == &capture_mutex && capture_resampler) { + SpeexResamplerState *res = capture_resampler; + capture_resampler = NULL; + speex_resampler_destroy(res); + } + pthread_mutex_unlock(mutex); if (handle_to_close) { diff --git a/internal/audio/cgo_source.go b/internal/audio/cgo_source.go index 1f6e0905..21e7ad83 100644 --- a/internal/audio/cgo_source.go +++ b/internal/audio/cgo_source.go @@ -3,8 +3,8 @@ package audio /* -#cgo CFLAGS: -O3 -ffast-math -I/opt/jetkvm-audio-libs/alsa-lib-1.2.14/include -I/opt/jetkvm-audio-libs/opus-1.5.2/include -#cgo LDFLAGS: /opt/jetkvm-audio-libs/alsa-lib-1.2.14/src/.libs/libasound.a /opt/jetkvm-audio-libs/opus-1.5.2/.libs/libopus.a -lm -ldl -lpthread +#cgo CFLAGS: -O3 -ffast-math -I/opt/jetkvm-audio-libs/alsa-lib-1.2.14/include -I/opt/jetkvm-audio-libs/opus-1.5.2/include -I/opt/jetkvm-audio-libs/speexdsp-1.2.1/include +#cgo LDFLAGS: /opt/jetkvm-audio-libs/alsa-lib-1.2.14/src/.libs/libasound.a /opt/jetkvm-audio-libs/opus-1.5.2/.libs/libopus.a /opt/jetkvm-audio-libs/speexdsp-1.2.1/libspeexdsp/.libs/libspeexdsp.a -lm -ldl -lpthread #include #include "c/audio.c" @@ -83,16 +83,10 @@ func (c *CgoSource) Connect() error { func (c *CgoSource) connectOutput() error { os.Setenv("ALSA_CAPTURE_DEVICE", c.alsaDevice) - // Using plughw: enables ALSA rate conversion plugin - // USB Gadget hardware is fixed at 48kHz (configfs hardcoded), so keep it at 48kHz - // HDMI can use configured rate - plughw resamples from hardware rate to Opus rate - sampleRate := c.config.SampleRate - if c.alsaDevice == "plughw:1,0" { - sampleRate = 48000 - } else if sampleRate == 0 { - sampleRate = 48000 - } - frameSize := uint16(sampleRate * 20 / 1000) + // Opus uses fixed 48kHz sample rate (RFC 7587) + // SpeexDSP handles any hardware rate conversion + const sampleRate = 48000 + const frameSize = 960 // 20ms at 48kHz c.logger.Debug(). Uint16("bitrate_kbps", c.config.Bitrate). @@ -101,7 +95,7 @@ func (c *CgoSource) connectOutput() error { Bool("fec", c.config.FECEnabled). Uint8("buffer_periods", c.config.BufferPeriods). Uint32("sample_rate", sampleRate). - Uint16("frame_size", frameSize). + Uint16("frame_size", uint16(frameSize)). Uint8("packet_loss_perc", c.config.PacketLossPerc). Msg("Initializing audio capture") @@ -134,15 +128,14 @@ func (c *CgoSource) connectOutput() error { func (c *CgoSource) connectInput() error { os.Setenv("ALSA_PLAYBACK_DEVICE", c.alsaDevice) - // USB Audio Gadget (hw:1,0) is hardcoded to 48kHz in usbgadget/config.go - // Always use 48kHz for input path regardless of UI configuration + // USB Audio Gadget uses fixed 48kHz sample rate const inputSampleRate = 48000 - frameSize := uint16(inputSampleRate * 20 / 1000) + const frameSize = 960 // 20ms at 48kHz C.update_audio_decoder_constants( C.uint(inputSampleRate), - C.uchar(1), - C.ushort(frameSize), + C.uchar(1), // Mono for USB audio gadget + C.ushort(uint16(frameSize)), C.ushort(1500), C.uint(1000), C.uchar(5), diff --git a/internal/audio/source.go b/internal/audio/source.go index e323b611..fcc19e62 100644 --- a/internal/audio/source.go +++ b/internal/audio/source.go @@ -10,7 +10,6 @@ type AudioConfig struct { BufferPeriods uint8 DTXEnabled bool FECEnabled bool - SampleRate uint32 PacketLossPerc uint8 } @@ -21,7 +20,6 @@ func DefaultAudioConfig() AudioConfig { BufferPeriods: 12, DTXEnabled: true, FECEnabled: true, - SampleRate: 48000, PacketLossPerc: 0, } } diff --git a/jsonrpc.go b/jsonrpc.go index 0865fe6d..cb3e49b2 100644 --- a/jsonrpc.go +++ b/jsonrpc.go @@ -982,7 +982,6 @@ type AudioConfigResponse struct { DTXEnabled bool `json:"dtx_enabled"` FECEnabled bool `json:"fec_enabled"` BufferPeriods int `json:"buffer_periods"` - SampleRate int `json:"sample_rate"` PacketLossPerc int `json:"packet_loss_perc"` } @@ -995,12 +994,11 @@ func rpcGetAudioConfig() (AudioConfigResponse, error) { DTXEnabled: cfg.DTXEnabled, FECEnabled: cfg.FECEnabled, BufferPeriods: int(cfg.BufferPeriods), - SampleRate: int(cfg.SampleRate), PacketLossPerc: int(cfg.PacketLossPerc), }, nil } -func rpcSetAudioConfig(bitrate int, complexity int, dtxEnabled bool, fecEnabled bool, bufferPeriods int, sampleRate int, packetLossPerc int) error { +func rpcSetAudioConfig(bitrate int, complexity int, dtxEnabled bool, fecEnabled bool, bufferPeriods int, packetLossPerc int) error { ensureConfigLoaded() if bitrate < 64 || bitrate > 256 { @@ -1012,10 +1010,6 @@ func rpcSetAudioConfig(bitrate int, complexity int, dtxEnabled bool, fecEnabled if bufferPeriods < 2 || bufferPeriods > 24 { return fmt.Errorf("buffer periods must be between 2 and 24") } - validSampleRates := map[int]bool{8000: true, 12000: true, 16000: true, 24000: true, 48000: true} - if !validSampleRates[sampleRate] { - return fmt.Errorf("sample rate must be one of: 8000, 12000, 16000, 24000, 48000 Hz") - } if packetLossPerc < 0 || packetLossPerc > 100 { return fmt.Errorf("packet loss percentage must be between 0 and 100") } @@ -1025,7 +1019,6 @@ func rpcSetAudioConfig(bitrate int, complexity int, dtxEnabled bool, fecEnabled config.AudioDTXEnabled = dtxEnabled config.AudioFECEnabled = fecEnabled config.AudioBufferPeriods = bufferPeriods - config.AudioSampleRate = sampleRate config.AudioPacketLossPerc = packetLossPerc return SaveConfig() @@ -1380,7 +1373,7 @@ var rpcHandlers = map[string]RPCHandler{ "setAudioOutputSource": {Func: rpcSetAudioOutputSource, Params: []string{"source"}}, "refreshHdmiConnection": {Func: rpcRefreshHdmiConnection}, "getAudioConfig": {Func: rpcGetAudioConfig}, - "setAudioConfig": {Func: rpcSetAudioConfig, Params: []string{"bitrate", "complexity", "dtxEnabled", "fecEnabled", "bufferPeriods", "sampleRate", "packetLossPerc"}}, + "setAudioConfig": {Func: rpcSetAudioConfig, Params: []string{"bitrate", "complexity", "dtxEnabled", "fecEnabled", "bufferPeriods", "packetLossPerc"}}, "restartAudioOutput": {Func: rpcRestartAudioOutput}, "getAudioInputAutoEnable": {Func: rpcGetAudioInputAutoEnable}, "setAudioInputAutoEnable": {Func: rpcSetAudioInputAutoEnable, Params: []string{"enabled"}}, diff --git a/scripts/dev_deploy.sh b/scripts/dev_deploy.sh index 96e7cf60..652d41eb 100755 --- a/scripts/dev_deploy.sh +++ b/scripts/dev_deploy.sh @@ -260,18 +260,20 @@ fi if [ "$INSTALL_APP" = true ] then msg_info "▶ Building release binary" + # Build audio dependencies and release binary + do_make build_audio_deps do_make build_release \ SKIP_NATIVE_IF_EXISTS=${SKIP_NATIVE_BUILD} \ SKIP_UI_BUILD=${SKIP_UI_BUILD_RELEASE} \ ENABLE_SYNC_TRACE=${ENABLE_SYNC_TRACE} - # Copy the binary to the remote host as if we were the OTA updater. + # Deploy as OTA update and reboot sshdev "cat > /userdata/jetkvm/jetkvm_app.update" < bin/jetkvm_app - - # Reboot the device, the new app will be deployed by the startup process. sshdev "reboot" else msg_info "▶ Building development binary" + # Build audio dependencies and development binary + do_make build_audio_deps do_make build_dev \ SKIP_NATIVE_IF_EXISTS=${SKIP_NATIVE_BUILD} \ SKIP_UI_BUILD=${SKIP_UI_BUILD_RELEASE} \ diff --git a/ui/src/routes/devices.$id.settings.audio.tsx b/ui/src/routes/devices.$id.settings.audio.tsx index c33d4752..011f8a75 100644 --- a/ui/src/routes/devices.$id.settings.audio.tsx +++ b/ui/src/routes/devices.$id.settings.audio.tsx @@ -16,7 +16,6 @@ interface AudioConfigResult { dtx_enabled: boolean; fec_enabled: boolean; buffer_periods: number; - sample_rate: number; packet_loss_perc: number; } @@ -54,8 +53,6 @@ export default function SettingsAudioRoute() { setAudioFECEnabled, audioBufferPeriods, setAudioBufferPeriods, - audioSampleRate, - setAudioSampleRate, audioPacketLossPerc, setAudioPacketLossPerc, } = useSettingsStore(); @@ -84,10 +81,9 @@ export default function SettingsAudioRoute() { setAudioDTXEnabled(config.dtx_enabled); setAudioFECEnabled(config.fec_enabled); setAudioBufferPeriods(config.buffer_periods); - setAudioSampleRate(config.sample_rate); setAudioPacketLossPerc(config.packet_loss_perc); }); - }, [send, setAudioOutputEnabled, setAudioInputAutoEnable, setAudioOutputSource, setAudioBitrate, setAudioComplexity, setAudioDTXEnabled, setAudioFECEnabled, setAudioBufferPeriods, setAudioSampleRate, setAudioPacketLossPerc]); + }, [send, setAudioOutputEnabled, setAudioInputAutoEnable, setAudioOutputSource, setAudioBitrate, setAudioComplexity, setAudioDTXEnabled, setAudioFECEnabled, setAudioBufferPeriods, setAudioPacketLossPerc]); const handleAudioOutputEnabledChange = (enabled: boolean) => { send("setAudioOutputEnabled", { enabled }, (resp: JsonRpcResponse) => { @@ -138,11 +134,10 @@ export default function SettingsAudioRoute() { dtxEnabled: audioDTXEnabled, fecEnabled: audioFECEnabled, bufferPeriods: audioBufferPeriods, - sampleRate: audioSampleRate, packetLossPerc: audioPacketLossPerc, }); - const handleAudioConfigChange = (updates: Partial) => { + const handleAudioConfigChange = (updates: Partial>) => { const config = { ...getCurrentConfig(), ...updates }; send("setAudioConfig", config, (resp: JsonRpcResponse) => { @@ -153,7 +148,6 @@ export default function SettingsAudioRoute() { setAudioDTXEnabled(config.dtxEnabled); setAudioFECEnabled(config.fecEnabled); setAudioBufferPeriods(config.bufferPeriods); - setAudioSampleRate(config.sampleRate); setAudioPacketLossPerc(config.packetLossPerc); notifications.success(m.audio_settings_config_updated()); }); @@ -283,24 +277,6 @@ export default function SettingsAudioRoute() { /> - - handleAudioConfigChange({ sampleRate: parseInt(e.target.value) })} - /> - -