From 9e6ffb34e370e24b87240c5f71fc9d99c3eba102 Mon Sep 17 00:00:00 2001 From: Alex P Date: Fri, 21 Nov 2025 20:23:10 +0200 Subject: [PATCH] Add ARM NEON soft-clipper to prevent clipping on sharp transients MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implements SIMD-optimized soft-clipping before Opus encoding to prevent digital clipping distortion on sharp transient attacks (e.g., plastic cup impacts, percussive sounds). Uses smooth saturation curve starting at ±30720 (~94% of max amplitude) to preserve audio quality while eliminating crackles and pops. Processes 8 samples per iteration using ARM NEON intrinsics for optimal performance on the ARM Cortex-A7 platform. --- internal/audio/c/audio.c | 58 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/internal/audio/c/audio.c b/internal/audio/c/audio.c index 20865010..d0241aac 100644 --- a/internal/audio/c/audio.c +++ b/internal/audio/c/audio.c @@ -206,6 +206,61 @@ static inline void simd_clear_samples_s16(short * __restrict__ buffer, uint32_t } } +/** + * Soft-clip audio samples to prevent digital clipping distortion + * Uses smooth saturation curve for transients that exceed ±30720 (~0.94 of max) + * Processes 8 samples per iteration using ARM NEON + */ +static inline void simd_soft_clip_s16(short * __restrict__ buffer, uint32_t samples) { + const int16_t threshold = 30720; // 0.9375 * 32768 + const int16x8_t thresh_pos = vdupq_n_s16(threshold); + const int16x8_t thresh_neg = vdupq_n_s16(-threshold); + const int16x8_t max_val = vdupq_n_s16(32767); + const int16x8_t min_val = vdupq_n_s16(-32768); + + uint32_t i = 0; + uint32_t simd_samples = samples & ~7U; + + for (; i < simd_samples; i += 8) { + int16x8_t samples_vec = vld1q_s16(&buffer[i]); + + // Detect samples exceeding positive threshold + uint16x8_t exceeds_pos = vcgtq_s16(samples_vec, thresh_pos); + // Detect samples below negative threshold + uint16x8_t exceeds_neg = vcltq_s16(samples_vec, thresh_neg); + + // Apply soft saturation to samples exceeding thresholds + // For positive: scale down to range [threshold, max_val] + // For negative: scale up to range [min_val, -threshold] + int16x8_t clipped = samples_vec; + clipped = vbslq_s16(exceeds_pos, + vaddq_s16(thresh_pos, vshrq_n_s16(vsubq_s16(samples_vec, thresh_pos), 2)), + clipped); + clipped = vbslq_s16(exceeds_neg, + vaddq_s16(thresh_neg, vshrq_n_s16(vsubq_s16(samples_vec, thresh_neg), 2)), + clipped); + + // Clamp to int16 range + clipped = vminq_s16(vmaxq_s16(clipped, min_val), max_val); + + vst1q_s16(&buffer[i], clipped); + } + + // Scalar: remaining samples + for (; i < samples; i++) { + int32_t sample = buffer[i]; + if (sample > threshold) { + sample = threshold + ((sample - threshold) >> 2); + } else if (sample < -threshold) { + sample = -threshold + ((sample + threshold) >> 2); + } + // Clamp to int16 range + if (sample > 32767) sample = 32767; + if (sample < -32768) sample = -32768; + buffer[i] = (short)sample; + } +} + // INITIALIZATION STATE TRACKING static volatile sig_atomic_t capture_initializing = 0; @@ -755,6 +810,9 @@ retry_read: return -1; } + // Apply soft-clipping to prevent digital clipping distortion on sharp transients + simd_soft_clip_s16(pcm_to_encode, opus_frame_size * capture_channels); + nb_bytes = opus_encode(enc, pcm_to_encode, opus_frame_size, out, max_packet_size); if (__builtin_expect(nb_bytes < 0, 0)) {