mirror of https://github.com/jetkvm/kvm.git
perf(audio): optimize validation and buffer pool with caching
- Cache max frame size in validation for hot path performance - Add lock-free per-goroutine buffer cache to reduce contention
This commit is contained in:
parent
25363cef90
commit
cdf0b20bc7
|
@ -1,13 +1,43 @@
|
||||||
package audio
|
package audio
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"sync/atomic"
|
"sync/atomic"
|
||||||
"time"
|
"time"
|
||||||
|
"unsafe"
|
||||||
|
|
||||||
"github.com/jetkvm/kvm/internal/logging"
|
"github.com/jetkvm/kvm/internal/logging"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// Lock-free buffer cache for per-goroutine optimization
|
||||||
|
type lockFreeBufferCache struct {
|
||||||
|
buffers [4]*[]byte // Small fixed-size array for lock-free access
|
||||||
|
}
|
||||||
|
|
||||||
|
// Per-goroutine buffer cache using goroutine-local storage
|
||||||
|
var goroutineBufferCache = make(map[int64]*lockFreeBufferCache)
|
||||||
|
var goroutineCacheMutex sync.RWMutex
|
||||||
|
|
||||||
|
// getGoroutineID extracts goroutine ID from runtime stack for cache key
|
||||||
|
func getGoroutineID() int64 {
|
||||||
|
b := make([]byte, 64)
|
||||||
|
b = b[:runtime.Stack(b, false)]
|
||||||
|
// Parse "goroutine 123 [running]:" format
|
||||||
|
for i := 10; i < len(b); i++ {
|
||||||
|
if b[i] == ' ' {
|
||||||
|
id := int64(0)
|
||||||
|
for j := 10; j < i; j++ {
|
||||||
|
if b[j] >= '0' && b[j] <= '9' {
|
||||||
|
id = id*10 + int64(b[j]-'0')
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return id
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return 0
|
||||||
|
}
|
||||||
|
|
||||||
type AudioBufferPool struct {
|
type AudioBufferPool struct {
|
||||||
// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
|
// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
|
||||||
currentSize int64 // Current pool size (atomic)
|
currentSize int64 // Current pool size (atomic)
|
||||||
|
@ -81,7 +111,27 @@ func (p *AudioBufferPool) Get() []byte {
|
||||||
}
|
}
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// First try to get from pre-allocated pool for fastest access
|
// Fast path: Try lock-free per-goroutine cache first
|
||||||
|
gid := getGoroutineID()
|
||||||
|
goroutineCacheMutex.RLock()
|
||||||
|
cache, exists := goroutineBufferCache[gid]
|
||||||
|
goroutineCacheMutex.RUnlock()
|
||||||
|
|
||||||
|
if exists && cache != nil {
|
||||||
|
// Try to get buffer from lock-free cache
|
||||||
|
for i := 0; i < len(cache.buffers); i++ {
|
||||||
|
bufPtr := (*unsafe.Pointer)(unsafe.Pointer(&cache.buffers[i]))
|
||||||
|
buf := (*[]byte)(atomic.LoadPointer(bufPtr))
|
||||||
|
if buf != nil && atomic.CompareAndSwapPointer(bufPtr, unsafe.Pointer(buf), nil) {
|
||||||
|
atomic.AddInt64(&p.hitCount, 1)
|
||||||
|
wasHit = true
|
||||||
|
*buf = (*buf)[:0]
|
||||||
|
return *buf
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: Try pre-allocated pool with mutex
|
||||||
p.mutex.Lock()
|
p.mutex.Lock()
|
||||||
if len(p.preallocated) > 0 {
|
if len(p.preallocated) > 0 {
|
||||||
lastIdx := len(p.preallocated) - 1
|
lastIdx := len(p.preallocated) - 1
|
||||||
|
@ -141,7 +191,31 @@ func (p *AudioBufferPool) Put(buf []byte) {
|
||||||
// Reset buffer for reuse - clear any sensitive data
|
// Reset buffer for reuse - clear any sensitive data
|
||||||
resetBuf := buf[:0]
|
resetBuf := buf[:0]
|
||||||
|
|
||||||
// First try to return to pre-allocated pool for fastest reuse
|
// Fast path: Try to put in lock-free per-goroutine cache
|
||||||
|
gid := getGoroutineID()
|
||||||
|
goroutineCacheMutex.RLock()
|
||||||
|
cache, exists := goroutineBufferCache[gid]
|
||||||
|
goroutineCacheMutex.RUnlock()
|
||||||
|
|
||||||
|
if !exists {
|
||||||
|
// Create new cache for this goroutine
|
||||||
|
cache = &lockFreeBufferCache{}
|
||||||
|
goroutineCacheMutex.Lock()
|
||||||
|
goroutineBufferCache[gid] = cache
|
||||||
|
goroutineCacheMutex.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
if cache != nil {
|
||||||
|
// Try to store in lock-free cache
|
||||||
|
for i := 0; i < len(cache.buffers); i++ {
|
||||||
|
bufPtr := (*unsafe.Pointer)(unsafe.Pointer(&cache.buffers[i]))
|
||||||
|
if atomic.CompareAndSwapPointer(bufPtr, nil, unsafe.Pointer(&buf)) {
|
||||||
|
return // Successfully cached
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Fallback: Try to return to pre-allocated pool for fastest reuse
|
||||||
p.mutex.Lock()
|
p.mutex.Lock()
|
||||||
if len(p.preallocated) < p.preallocSize {
|
if len(p.preallocated) < p.preallocSize {
|
||||||
p.preallocated = append(p.preallocated, &resetBuf)
|
p.preallocated = append(p.preallocated, &resetBuf)
|
||||||
|
|
|
@ -312,12 +312,40 @@ func ValidateAudioFrameFast(data []byte) error {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Cached constants for ultra-fast validation (initialized once at startup)
|
||||||
|
var (
|
||||||
|
maxFrameSizeCache = 8192 // Will be updated from config during init
|
||||||
|
)
|
||||||
|
|
||||||
|
// init initializes cached validation constants for optimal performance
|
||||||
|
//
|
||||||
|
//nolint:gochecknoinits // Required for performance-critical config caching
|
||||||
|
func init() {
|
||||||
|
// Cache the maximum frame size to avoid function calls in hot paths
|
||||||
|
if config := GetConfig(); config != nil {
|
||||||
|
maxFrameSizeCache = config.MaxAudioFrameSize
|
||||||
|
}
|
||||||
|
// Fallback to safe default if config unavailable
|
||||||
|
if maxFrameSizeCache <= 0 {
|
||||||
|
maxFrameSizeCache = 8192
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// ValidateAudioFrameUltraFast provides zero-overhead validation for ultra-critical paths
|
// ValidateAudioFrameUltraFast provides zero-overhead validation for ultra-critical paths
|
||||||
// This function only checks for nil/empty data and maximum size to prevent buffer overruns
|
// This function only checks for nil/empty data and maximum size to prevent buffer overruns
|
||||||
// Use this in hot audio processing loops where every microsecond matters
|
// Use this in hot audio processing loops where every microsecond matters
|
||||||
|
//
|
||||||
|
// Performance optimizations:
|
||||||
|
// - Uses cached max frame size to avoid config function calls
|
||||||
|
// - Single branch condition for optimal CPU pipeline efficiency
|
||||||
|
// - Inlined length checks for minimal overhead
|
||||||
|
//
|
||||||
|
//go:inline
|
||||||
func ValidateAudioFrameUltraFast(data []byte) error {
|
func ValidateAudioFrameUltraFast(data []byte) error {
|
||||||
// Only check for catastrophic failures that could crash the system
|
// Single optimized check: empty data OR exceeds cached maximum
|
||||||
if len(data) == 0 || len(data) > 8192 { // Hard-coded 8KB safety limit
|
// This branch prediction friendly pattern minimizes CPU pipeline stalls
|
||||||
|
dataLen := len(data)
|
||||||
|
if dataLen == 0 || dataLen > maxFrameSizeCache {
|
||||||
return ErrInvalidFrameData
|
return ErrInvalidFrameData
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
|
|
Loading…
Reference in New Issue