From cdf0b20bc7be20b068b917d4c2b6c3df62e11916 Mon Sep 17 00:00:00 2001 From: Alex P Date: Wed, 27 Aug 2025 22:57:07 +0000 Subject: [PATCH] perf(audio): optimize validation and buffer pool with caching - Cache max frame size in validation for hot path performance - Add lock-free per-goroutine buffer cache to reduce contention --- internal/audio/buffer_pool.go | 78 ++++++++++++++++++++++++++++++++++- internal/audio/validation.go | 32 +++++++++++++- 2 files changed, 106 insertions(+), 4 deletions(-) diff --git a/internal/audio/buffer_pool.go b/internal/audio/buffer_pool.go index a015487..a6a09d3 100644 --- a/internal/audio/buffer_pool.go +++ b/internal/audio/buffer_pool.go @@ -1,13 +1,43 @@ package audio import ( + "runtime" "sync" "sync/atomic" "time" + "unsafe" "github.com/jetkvm/kvm/internal/logging" ) +// Lock-free buffer cache for per-goroutine optimization +type lockFreeBufferCache struct { + buffers [4]*[]byte // Small fixed-size array for lock-free access +} + +// Per-goroutine buffer cache using goroutine-local storage +var goroutineBufferCache = make(map[int64]*lockFreeBufferCache) +var goroutineCacheMutex sync.RWMutex + +// getGoroutineID extracts goroutine ID from runtime stack for cache key +func getGoroutineID() int64 { + b := make([]byte, 64) + b = b[:runtime.Stack(b, false)] + // Parse "goroutine 123 [running]:" format + for i := 10; i < len(b); i++ { + if b[i] == ' ' { + id := int64(0) + for j := 10; j < i; j++ { + if b[j] >= '0' && b[j] <= '9' { + id = id*10 + int64(b[j]-'0') + } + } + return id + } + } + return 0 +} + type AudioBufferPool struct { // Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) currentSize int64 // Current pool size (atomic) @@ -81,7 +111,27 @@ func (p *AudioBufferPool) Get() []byte { } }() - // First try to get from pre-allocated pool for fastest access + // Fast path: Try lock-free per-goroutine cache first + gid := getGoroutineID() + goroutineCacheMutex.RLock() + cache, exists := goroutineBufferCache[gid] + goroutineCacheMutex.RUnlock() + + if exists && cache != nil { + // Try to get buffer from lock-free cache + for i := 0; i < len(cache.buffers); i++ { + bufPtr := (*unsafe.Pointer)(unsafe.Pointer(&cache.buffers[i])) + buf := (*[]byte)(atomic.LoadPointer(bufPtr)) + if buf != nil && atomic.CompareAndSwapPointer(bufPtr, unsafe.Pointer(buf), nil) { + atomic.AddInt64(&p.hitCount, 1) + wasHit = true + *buf = (*buf)[:0] + return *buf + } + } + } + + // Fallback: Try pre-allocated pool with mutex p.mutex.Lock() if len(p.preallocated) > 0 { lastIdx := len(p.preallocated) - 1 @@ -141,7 +191,31 @@ func (p *AudioBufferPool) Put(buf []byte) { // Reset buffer for reuse - clear any sensitive data resetBuf := buf[:0] - // First try to return to pre-allocated pool for fastest reuse + // Fast path: Try to put in lock-free per-goroutine cache + gid := getGoroutineID() + goroutineCacheMutex.RLock() + cache, exists := goroutineBufferCache[gid] + goroutineCacheMutex.RUnlock() + + if !exists { + // Create new cache for this goroutine + cache = &lockFreeBufferCache{} + goroutineCacheMutex.Lock() + goroutineBufferCache[gid] = cache + goroutineCacheMutex.Unlock() + } + + if cache != nil { + // Try to store in lock-free cache + for i := 0; i < len(cache.buffers); i++ { + bufPtr := (*unsafe.Pointer)(unsafe.Pointer(&cache.buffers[i])) + if atomic.CompareAndSwapPointer(bufPtr, nil, unsafe.Pointer(&buf)) { + return // Successfully cached + } + } + } + + // Fallback: Try to return to pre-allocated pool for fastest reuse p.mutex.Lock() if len(p.preallocated) < p.preallocSize { p.preallocated = append(p.preallocated, &resetBuf) diff --git a/internal/audio/validation.go b/internal/audio/validation.go index ec9daf5..b2d682c 100644 --- a/internal/audio/validation.go +++ b/internal/audio/validation.go @@ -312,12 +312,40 @@ func ValidateAudioFrameFast(data []byte) error { return nil } +// Cached constants for ultra-fast validation (initialized once at startup) +var ( + maxFrameSizeCache = 8192 // Will be updated from config during init +) + +// init initializes cached validation constants for optimal performance +// +//nolint:gochecknoinits // Required for performance-critical config caching +func init() { + // Cache the maximum frame size to avoid function calls in hot paths + if config := GetConfig(); config != nil { + maxFrameSizeCache = config.MaxAudioFrameSize + } + // Fallback to safe default if config unavailable + if maxFrameSizeCache <= 0 { + maxFrameSizeCache = 8192 + } +} + // ValidateAudioFrameUltraFast provides zero-overhead validation for ultra-critical paths // This function only checks for nil/empty data and maximum size to prevent buffer overruns // Use this in hot audio processing loops where every microsecond matters +// +// Performance optimizations: +// - Uses cached max frame size to avoid config function calls +// - Single branch condition for optimal CPU pipeline efficiency +// - Inlined length checks for minimal overhead +// +//go:inline func ValidateAudioFrameUltraFast(data []byte) error { - // Only check for catastrophic failures that could crash the system - if len(data) == 0 || len(data) > 8192 { // Hard-coded 8KB safety limit + // Single optimized check: empty data OR exceeds cached maximum + // This branch prediction friendly pattern minimizes CPU pipeline stalls + dataLen := len(data) + if dataLen == 0 || dataLen > maxFrameSizeCache { return ErrInvalidFrameData } return nil