From 9e6ffb34e370e24b87240c5f71fc9d99c3eba102 Mon Sep 17 00:00:00 2001
From: Alex P <dtk077@gmail.com>
Date: Fri, 21 Nov 2025 20:23:10 +0200
Subject: [PATCH] Add ARM NEON soft-clipper to prevent clipping on sharp
 transients
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements SIMD-optimized soft-clipping before Opus encoding to prevent
digital clipping distortion on sharp transient attacks (e.g., plastic cup
impacts, percussive sounds). Uses smooth saturation curve starting at
±30720 (~94% of max amplitude) to preserve audio quality while eliminating
crackles and pops.

Processes 8 samples per iteration using ARM NEON intrinsics for optimal
performance on the ARM Cortex-A7 platform.
---
 internal/audio/c/audio.c | 58 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/internal/audio/c/audio.c b/internal/audio/c/audio.c
index 20865010..d0241aac 100644
--- a/internal/audio/c/audio.c
+++ b/internal/audio/c/audio.c
@@ -206,6 +206,61 @@ static inline void simd_clear_samples_s16(short * __restrict__ buffer, uint32_t
     }
 }
 
+/**
+ * Soft-clip audio samples to prevent digital clipping distortion
+ * Uses smooth saturation curve for transients that exceed ±30720 (~0.94 of max)
+ * Processes 8 samples per iteration using ARM NEON
+ */
+static inline void simd_soft_clip_s16(short * __restrict__ buffer, uint32_t samples) {
+	const int16_t threshold = 30720;  // 0.9375 * 32768
+	const int16x8_t thresh_pos = vdupq_n_s16(threshold);
+	const int16x8_t thresh_neg = vdupq_n_s16(-threshold);
+	const int16x8_t max_val = vdupq_n_s16(32767);
+	const int16x8_t min_val = vdupq_n_s16(-32768);
+
+	uint32_t i = 0;
+	uint32_t simd_samples = samples & ~7U;
+
+	for (; i < simd_samples; i += 8) {
+		int16x8_t samples_vec = vld1q_s16(&buffer[i]);
+
+		// Detect samples exceeding positive threshold
+		uint16x8_t exceeds_pos = vcgtq_s16(samples_vec, thresh_pos);
+		// Detect samples below negative threshold
+		uint16x8_t exceeds_neg = vcltq_s16(samples_vec, thresh_neg);
+
+		// Apply soft saturation to samples exceeding thresholds
+		// For positive: scale down to range [threshold, max_val]
+		// For negative: scale up to range [min_val, -threshold]
+		int16x8_t clipped = samples_vec;
+		clipped = vbslq_s16(exceeds_pos,
+		                    vaddq_s16(thresh_pos, vshrq_n_s16(vsubq_s16(samples_vec, thresh_pos), 2)),
+		                    clipped);
+		clipped = vbslq_s16(exceeds_neg,
+		                    vaddq_s16(thresh_neg, vshrq_n_s16(vsubq_s16(samples_vec, thresh_neg), 2)),
+		                    clipped);
+
+		// Clamp to int16 range
+		clipped = vminq_s16(vmaxq_s16(clipped, min_val), max_val);
+
+		vst1q_s16(&buffer[i], clipped);
+	}
+
+	// Scalar: remaining samples
+	for (; i < samples; i++) {
+		int32_t sample = buffer[i];
+		if (sample > threshold) {
+			sample = threshold + ((sample - threshold) >> 2);
+		} else if (sample < -threshold) {
+			sample = -threshold + ((sample + threshold) >> 2);
+		}
+		// Clamp to int16 range
+		if (sample > 32767) sample = 32767;
+		if (sample < -32768) sample = -32768;
+		buffer[i] = (short)sample;
+	}
+}
+
 // INITIALIZATION STATE TRACKING
 
 static volatile sig_atomic_t capture_initializing = 0;
@@ -755,6 +810,9 @@ retry_read:
 		return -1;
 	}
 
+	// Apply soft-clipping to prevent digital clipping distortion on sharp transients
+	simd_soft_clip_s16(pcm_to_encode, opus_frame_size * capture_channels);
+
 	nb_bytes = opus_encode(enc, pcm_to_encode, opus_frame_size, out, max_packet_size);
 
 	if (__builtin_expect(nb_bytes < 0, 0)) {