refactor(audio): improve memory management with atomic operations and chunk allocation

- Replace mutex-protected refCount with atomic operations in ZeroCopyFramePool - Implement chunk-based allocation in AudioBufferPool to reduce allocations - Add proper reference counting with atomic operations in ZeroCopyAudioFrame - Optimize buffer pool sizing based on buffer size
2025-09-08 08:25:42 +00:00 · 2025-09-08 08:25:42 +00:00 · 323d2587b7
parent a6913bf33b
commit 323d2587b7
4 changed files with 127 additions and 74 deletions
--- a/internal/audio/util_buffer_pool.go
+++ b/internal/audio/util_buffer_pool.go
@ -354,6 +354,12 @@ type AudioBufferPool struct {
 	// Memory optimization fields
 	preallocated []*[]byte // Pre-allocated buffers for immediate use
 	preallocSize int       // Number of pre-allocated buffers
 	// Chunk-based allocation optimization
 	chunkSize     int      // Size of each memory chunk
 	chunks        [][]byte // Pre-allocated memory chunks
 	chunkOffsets  []int    // Current offset in each chunk
 	chunkMutex    sync.Mutex // Protects chunk allocation
 }
 func NewAudioBufferPool(bufferSize int) *AudioBufferPool {
@ -379,29 +385,74 @@ func NewAudioBufferPool(bufferSize int) *AudioBufferPool {
 		preallocSize = minPrealloc
 	}
-	// Pre-allocate with exact capacity to avoid slice growth
+	// Calculate max pool size based on buffer size to prevent memory bloat
-	preallocated := make([]*[]byte, 0, preallocSize)
+	maxPoolSize := 256 // Default
 	if bufferSize > 8192 {
 		maxPoolSize = 64 // Much smaller for very large buffers
 	} else if bufferSize > 4096 {
 		maxPoolSize = 128 // Smaller for large buffers
 	} else if bufferSize > 1024 {
 		maxPoolSize = 192 // Medium for medium buffers
 	}
 	// Calculate chunk size - allocate larger chunks to reduce allocation frequency
 	chunkSize := bufferSize * 64 // Each chunk holds 64 buffers worth of memory
 	if chunkSize < 64*1024 {
 		chunkSize = 64 * 1024 // Minimum 64KB chunks
 	}
 	p := &AudioBufferPool{
 		bufferSize:   bufferSize,
 		maxPoolSize:  maxPoolSize,
 		preallocated: make([]*[]byte, 0, preallocSize),
 		preallocSize: preallocSize,
 		chunkSize:    chunkSize,
 		chunks:       make([][]byte, 0, 4), // Start with capacity for 4 chunks
 		chunkOffsets: make([]int, 0, 4),
 	}
 	// Configure sync.Pool with optimized allocation
 	p.pool.New = func() interface{} {
 		// Use chunk-based allocation instead of individual make()
 		buf := p.allocateFromChunk()
 		return &buf
 	}
 	// Pre-allocate buffers with optimized capacity
 	for i := 0; i < preallocSize; i++ {
-		// Use exact buffer size to prevent over-allocation
+		// Use chunk-based allocation to prevent over-allocation
-		buf := make([]byte, 0, bufferSize)
+		buf := p.allocateFromChunk()
-		preallocated = append(preallocated, &buf)
+		p.preallocated = append(p.preallocated, &buf)
 	}
-	return &AudioBufferPool{
+	return p
 		bufferSize:   bufferSize,
 		maxPoolSize:  GetConfig().MaxPoolSize * 2, // Double the max pool size for better buffering
 		preallocated: preallocated,
 		preallocSize: preallocSize,
 		pool: sync.Pool{
 			New: func() interface{} {
 				// Allocate exact size to minimize memory waste
 				buf := make([]byte, 0, bufferSize)
 				return &buf
 			},
 		},
 }
 // allocateFromChunk allocates a buffer from pre-allocated memory chunks
 func (p *AudioBufferPool) allocateFromChunk() []byte {
 	p.chunkMutex.Lock()
 	defer p.chunkMutex.Unlock()
 	// Try to allocate from existing chunks
 	for i := 0; i < len(p.chunks); i++ {
 		if p.chunkOffsets[i]+p.bufferSize <= len(p.chunks[i]) {
 			// Slice from the chunk
 			start := p.chunkOffsets[i]
 			end := start + p.bufferSize
 			buf := p.chunks[i][start:end:end] // Use 3-index slice to set capacity
 			p.chunkOffsets[i] = end
 			return buf[:0] // Return with zero length but correct capacity
 		}
 	}
 	// Need to allocate a new chunk
 	newChunk := make([]byte, p.chunkSize)
 	p.chunks = append(p.chunks, newChunk)
 	p.chunkOffsets = append(p.chunkOffsets, p.bufferSize)
 	// Return buffer from the new chunk
 	buf := newChunk[0:p.bufferSize:p.bufferSize]
 	return buf[:0] // Return with zero length but correct capacity
 }
 func (p *AudioBufferPool) Get() []byte {
@ -459,10 +510,10 @@ func (p *AudioBufferPool) Get() []byte {
 		// Buffer too small, fall through to allocation
 	}
-	// Pool miss - allocate new buffer with exact capacity
+	// Pool miss - allocate new buffer from chunk
 	// Direct miss count update to avoid sampling complexity in critical path
 	atomic.AddInt64(&p.missCount, 1)
-	return make([]byte, 0, p.bufferSize)
+	return p.allocateFromChunk()
 }
 func (p *AudioBufferPool) Put(buf []byte) {
--- a/internal/audio/zero_copy.go
+++ b/internal/audio/zero_copy.go
@ -147,7 +147,7 @@ func (p *ZeroCopyFramePool) Get() *ZeroCopyAudioFrame {
 		// If we've allocated too many frames, force pool reuse
 		frame := p.pool.Get().(*ZeroCopyAudioFrame)
 		frame.mutex.Lock()
-		frame.refCount = 1
+		atomic.StoreInt32(&frame.refCount, 1)
 		frame.length = 0
 		frame.data = frame.data[:0]
 		frame.mutex.Unlock()
@ -163,11 +163,12 @@ func (p *ZeroCopyFramePool) Get() *ZeroCopyAudioFrame {
 		p.mutex.Unlock()
 		frame.mutex.Lock()
-		frame.refCount = 1
+		atomic.StoreInt32(&frame.refCount, 1)
 		frame.length = 0
 		frame.data = frame.data[:0]
 		frame.mutex.Unlock()
 		atomic.AddInt64(&p.hitCount, 1)
 		return frame
 	}
 	p.mutex.Unlock()
@ -175,7 +176,7 @@ func (p *ZeroCopyFramePool) Get() *ZeroCopyAudioFrame {
 	// Try sync.Pool next and track allocation
 	frame := p.pool.Get().(*ZeroCopyAudioFrame)
 	frame.mutex.Lock()
-	frame.refCount = 1
+	atomic.StoreInt32(&frame.refCount, 1)
 	frame.length = 0
 	frame.data = frame.data[:0]
 	frame.mutex.Unlock()
@ -191,10 +192,9 @@ func (p *ZeroCopyFramePool) Put(frame *ZeroCopyAudioFrame) {
 		return
 	}
 	// Reset frame state for reuse
 	frame.mutex.Lock()
-	frame.refCount--
+	atomic.StoreInt32(&frame.refCount, 0)
 	if frame.refCount <= 0 {
 		frame.refCount = 0
 	frame.length = 0
 	frame.data = frame.data[:0]
 	frame.mutex.Unlock()
@ -219,16 +219,8 @@ func (p *ZeroCopyFramePool) Put(frame *ZeroCopyAudioFrame) {
 	// Return to sync.Pool
 	p.pool.Put(frame)
 		// Metrics collection removed
 		if false {
 	atomic.AddInt64(&p.counter, 1)
 }
 	} else {
 		frame.mutex.Unlock()
 	}
 	// Metrics recording removed - granular metrics collector was unused
 }
 // Data returns the frame data as a slice (zero-copy view)
 func (f *ZeroCopyAudioFrame) Data() []byte {
@ -271,18 +263,28 @@ func (f *ZeroCopyAudioFrame) SetDataDirect(data []byte) {
 	f.pooled = false // Direct assignment means we can't pool this frame
 }
-// AddRef increments the reference count for shared access
+// AddRef increments the reference count atomically
 func (f *ZeroCopyAudioFrame) AddRef() {
-	f.mutex.Lock()
+	atomic.AddInt32(&f.refCount, 1)
 	f.refCount++
 	f.mutex.Unlock()
 }
-// Release decrements the reference count
+// Release decrements the reference count atomically
-func (f *ZeroCopyAudioFrame) Release() {
+// Returns true if this was the final reference
-	f.mutex.Lock()
+func (f *ZeroCopyAudioFrame) Release() bool {
-	f.refCount--
+	newCount := atomic.AddInt32(&f.refCount, -1)
-	f.mutex.Unlock()
+	if newCount == 0 {
 		// Final reference released, return to pool if pooled
 		if f.pooled {
 			globalZeroCopyPool.Put(f)
 		}
 		return true
 	}
 	return false
 }
 // RefCount returns the current reference count atomically
 func (f *ZeroCopyAudioFrame) RefCount() int32 {
 	return atomic.LoadInt32(&f.refCount)
 }
 // Length returns the current data length