mirror of https://github.com/jetkvm/kvm.git
482 lines
12 KiB
Go
482 lines
12 KiB
Go
//go:build cgo
|
|
// +build cgo
|
|
|
|
package audio
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"os"
|
|
"os/exec"
|
|
"sync"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"time"
|
|
|
|
"github.com/jetkvm/kvm/internal/logging"
|
|
"github.com/rs/zerolog"
|
|
)
|
|
|
|
// Restart configuration is now retrieved from centralized config
|
|
func getMaxRestartAttempts() int {
|
|
return GetConfig().MaxRestartAttempts
|
|
}
|
|
|
|
func getRestartWindow() time.Duration {
|
|
return GetConfig().RestartWindow
|
|
}
|
|
|
|
func getRestartDelay() time.Duration {
|
|
return GetConfig().RestartDelay
|
|
}
|
|
|
|
func getMaxRestartDelay() time.Duration {
|
|
return GetConfig().MaxRestartDelay
|
|
}
|
|
|
|
// AudioOutputSupervisor manages the audio output server subprocess lifecycle
|
|
type AudioOutputSupervisor struct {
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
logger *zerolog.Logger
|
|
mutex sync.RWMutex
|
|
running int32
|
|
|
|
// Process management
|
|
cmd *exec.Cmd
|
|
processPID int
|
|
|
|
// Restart management
|
|
restartAttempts []time.Time
|
|
lastExitCode int
|
|
lastExitTime time.Time
|
|
|
|
// Channels for coordination
|
|
processDone chan struct{}
|
|
stopChan chan struct{}
|
|
stopChanClosed bool // Track if stopChan is closed
|
|
processDoneClosed bool // Track if processDone is closed
|
|
|
|
// Process monitoring
|
|
processMonitor *ProcessMonitor
|
|
|
|
// Callbacks
|
|
onProcessStart func(pid int)
|
|
onProcessExit func(pid int, exitCode int, crashed bool)
|
|
onRestart func(attempt int, delay time.Duration)
|
|
}
|
|
|
|
// NewAudioOutputSupervisor creates a new audio output server supervisor
|
|
func NewAudioOutputSupervisor() *AudioOutputSupervisor {
|
|
ctx, cancel := context.WithCancel(context.Background())
|
|
logger := logging.GetDefaultLogger().With().Str("component", AudioOutputSupervisorComponent).Logger()
|
|
|
|
return &AudioOutputSupervisor{
|
|
ctx: ctx,
|
|
cancel: cancel,
|
|
logger: &logger,
|
|
processDone: make(chan struct{}),
|
|
stopChan: make(chan struct{}),
|
|
processMonitor: GetProcessMonitor(),
|
|
}
|
|
}
|
|
|
|
// SetCallbacks sets optional callbacks for process lifecycle events
|
|
func (s *AudioOutputSupervisor) SetCallbacks(
|
|
onStart func(pid int),
|
|
onExit func(pid int, exitCode int, crashed bool),
|
|
onRestart func(attempt int, delay time.Duration),
|
|
) {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
s.onProcessStart = onStart
|
|
s.onProcessExit = onExit
|
|
s.onRestart = onRestart
|
|
}
|
|
|
|
// Start begins supervising the audio output server process
|
|
func (s *AudioOutputSupervisor) Start() error {
|
|
if !atomic.CompareAndSwapInt32(&s.running, 0, 1) {
|
|
return fmt.Errorf("audio output supervisor is already running")
|
|
}
|
|
|
|
s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("starting component")
|
|
|
|
// Recreate channels in case they were closed by a previous Stop() call
|
|
s.mutex.Lock()
|
|
s.processDone = make(chan struct{})
|
|
s.stopChan = make(chan struct{})
|
|
s.stopChanClosed = false // Reset channel closed flag
|
|
s.processDoneClosed = false // Reset channel closed flag
|
|
// Recreate context as well since it might have been cancelled
|
|
s.ctx, s.cancel = context.WithCancel(context.Background())
|
|
// Reset restart tracking on start
|
|
s.restartAttempts = s.restartAttempts[:0]
|
|
s.lastExitCode = 0
|
|
s.lastExitTime = time.Time{}
|
|
s.mutex.Unlock()
|
|
|
|
// Start the supervision loop
|
|
go s.supervisionLoop()
|
|
|
|
s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component started successfully")
|
|
return nil
|
|
}
|
|
|
|
// Stop gracefully stops the audio server and supervisor
|
|
func (s *AudioOutputSupervisor) Stop() {
|
|
if !atomic.CompareAndSwapInt32(&s.running, 1, 0) {
|
|
return // Already stopped
|
|
}
|
|
|
|
s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("stopping component")
|
|
|
|
// Signal stop and wait for cleanup
|
|
s.mutex.Lock()
|
|
if !s.stopChanClosed {
|
|
close(s.stopChan)
|
|
s.stopChanClosed = true
|
|
}
|
|
s.mutex.Unlock()
|
|
s.cancel()
|
|
|
|
// Wait for process to exit
|
|
select {
|
|
case <-s.processDone:
|
|
s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component stopped gracefully")
|
|
case <-time.After(GetConfig().SupervisorTimeout):
|
|
s.logger.Warn().Str("component", AudioOutputSupervisorComponent).Msg("component did not stop gracefully, forcing termination")
|
|
s.forceKillProcess()
|
|
}
|
|
|
|
s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component stopped")
|
|
}
|
|
|
|
// IsRunning returns true if the supervisor is running
|
|
func (s *AudioOutputSupervisor) IsRunning() bool {
|
|
return atomic.LoadInt32(&s.running) == 1
|
|
}
|
|
|
|
// GetProcessPID returns the current process PID (0 if not running)
|
|
func (s *AudioOutputSupervisor) GetProcessPID() int {
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
return s.processPID
|
|
}
|
|
|
|
// GetLastExitInfo returns information about the last process exit
|
|
func (s *AudioOutputSupervisor) GetLastExitInfo() (exitCode int, exitTime time.Time) {
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
return s.lastExitCode, s.lastExitTime
|
|
}
|
|
|
|
// GetProcessMetrics returns current process metrics if the process is running
|
|
func (s *AudioOutputSupervisor) GetProcessMetrics() *ProcessMetrics {
|
|
s.mutex.RLock()
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if pid == 0 {
|
|
// Return default metrics when no process is running
|
|
return &ProcessMetrics{
|
|
PID: 0,
|
|
CPUPercent: 0.0,
|
|
MemoryRSS: 0,
|
|
MemoryVMS: 0,
|
|
MemoryPercent: 0.0,
|
|
Timestamp: time.Now(),
|
|
ProcessName: "audio-output-server",
|
|
}
|
|
}
|
|
|
|
metrics := s.processMonitor.GetCurrentMetrics()
|
|
for _, metric := range metrics {
|
|
if metric.PID == pid {
|
|
return &metric
|
|
}
|
|
}
|
|
|
|
// Return default metrics if process not found in monitor
|
|
return &ProcessMetrics{
|
|
PID: pid,
|
|
CPUPercent: 0.0,
|
|
MemoryRSS: 0,
|
|
MemoryVMS: 0,
|
|
MemoryPercent: 0.0,
|
|
Timestamp: time.Now(),
|
|
ProcessName: "audio-output-server",
|
|
}
|
|
}
|
|
|
|
// supervisionLoop is the main supervision loop
|
|
func (s *AudioOutputSupervisor) supervisionLoop() {
|
|
defer func() {
|
|
s.mutex.Lock()
|
|
if !s.processDoneClosed {
|
|
close(s.processDone)
|
|
s.processDoneClosed = true
|
|
}
|
|
s.mutex.Unlock()
|
|
s.logger.Info().Msg("audio server supervision ended")
|
|
}()
|
|
|
|
for atomic.LoadInt32(&s.running) == 1 {
|
|
select {
|
|
case <-s.stopChan:
|
|
s.logger.Info().Msg("received stop signal")
|
|
s.terminateProcess()
|
|
return
|
|
case <-s.ctx.Done():
|
|
s.logger.Info().Msg("context cancelled")
|
|
s.terminateProcess()
|
|
return
|
|
default:
|
|
// Start or restart the process
|
|
if err := s.startProcess(); err != nil {
|
|
s.logger.Error().Err(err).Msg("failed to start audio server process")
|
|
|
|
// Check if we should attempt restart
|
|
if !s.shouldRestart() {
|
|
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
|
|
return
|
|
}
|
|
|
|
delay := s.calculateRestartDelay()
|
|
s.logger.Warn().Dur("delay", delay).Msg("retrying process start after delay")
|
|
|
|
if s.onRestart != nil {
|
|
s.onRestart(len(s.restartAttempts), delay)
|
|
}
|
|
|
|
select {
|
|
case <-time.After(delay):
|
|
case <-s.stopChan:
|
|
return
|
|
case <-s.ctx.Done():
|
|
return
|
|
}
|
|
continue
|
|
}
|
|
|
|
// Wait for process to exit
|
|
s.waitForProcessExit()
|
|
|
|
// Check if we should restart
|
|
if !s.shouldRestart() {
|
|
s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor")
|
|
return
|
|
}
|
|
|
|
// Calculate restart delay
|
|
delay := s.calculateRestartDelay()
|
|
s.logger.Info().Dur("delay", delay).Msg("restarting audio server process after delay")
|
|
|
|
if s.onRestart != nil {
|
|
s.onRestart(len(s.restartAttempts), delay)
|
|
}
|
|
|
|
// Wait for restart delay
|
|
select {
|
|
case <-time.After(delay):
|
|
case <-s.stopChan:
|
|
return
|
|
case <-s.ctx.Done():
|
|
return
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// startProcess starts the audio server process
|
|
func (s *AudioOutputSupervisor) startProcess() error {
|
|
execPath, err := os.Executable()
|
|
if err != nil {
|
|
return fmt.Errorf("failed to get executable path: %w", err)
|
|
}
|
|
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
// Create new command
|
|
s.cmd = exec.CommandContext(s.ctx, execPath, "--audio-output-server")
|
|
s.cmd.Stdout = os.Stdout
|
|
s.cmd.Stderr = os.Stderr
|
|
|
|
// Start the process
|
|
if err := s.cmd.Start(); err != nil {
|
|
return fmt.Errorf("failed to start audio output server process: %w", err)
|
|
}
|
|
|
|
s.processPID = s.cmd.Process.Pid
|
|
s.logger.Info().Int("pid", s.processPID).Msg("audio server process started")
|
|
|
|
// Add process to monitoring
|
|
s.processMonitor.AddProcess(s.processPID, "audio-output-server")
|
|
|
|
if s.onProcessStart != nil {
|
|
s.onProcessStart(s.processPID)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// waitForProcessExit waits for the current process to exit and logs the result
|
|
func (s *AudioOutputSupervisor) waitForProcessExit() {
|
|
s.mutex.RLock()
|
|
cmd := s.cmd
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if cmd == nil {
|
|
return
|
|
}
|
|
|
|
// Wait for process to exit
|
|
err := cmd.Wait()
|
|
|
|
s.mutex.Lock()
|
|
s.lastExitTime = time.Now()
|
|
s.processPID = 0
|
|
|
|
var exitCode int
|
|
var crashed bool
|
|
|
|
if err != nil {
|
|
if exitError, ok := err.(*exec.ExitError); ok {
|
|
exitCode = exitError.ExitCode()
|
|
crashed = exitCode != 0
|
|
} else {
|
|
// Process was killed or other error
|
|
exitCode = -1
|
|
crashed = true
|
|
}
|
|
} else {
|
|
exitCode = 0
|
|
crashed = false
|
|
}
|
|
|
|
s.lastExitCode = exitCode
|
|
s.mutex.Unlock()
|
|
|
|
// Remove process from monitoring
|
|
s.processMonitor.RemoveProcess(pid)
|
|
|
|
if crashed {
|
|
s.logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msg("audio server process crashed")
|
|
s.recordRestartAttempt()
|
|
} else {
|
|
s.logger.Info().Int("pid", pid).Msg("audio server process exited gracefully")
|
|
}
|
|
|
|
if s.onProcessExit != nil {
|
|
s.onProcessExit(pid, exitCode, crashed)
|
|
}
|
|
}
|
|
|
|
// terminateProcess gracefully terminates the current process
|
|
func (s *AudioOutputSupervisor) terminateProcess() {
|
|
s.mutex.RLock()
|
|
cmd := s.cmd
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if cmd == nil || cmd.Process == nil {
|
|
return
|
|
}
|
|
|
|
s.logger.Info().Int("pid", pid).Msg("terminating audio server process")
|
|
|
|
// Send SIGTERM first
|
|
if err := cmd.Process.Signal(syscall.SIGTERM); err != nil {
|
|
s.logger.Warn().Err(err).Int("pid", pid).Msg("failed to send SIGTERM")
|
|
}
|
|
|
|
// Wait for graceful shutdown
|
|
done := make(chan struct{})
|
|
go func() {
|
|
_ = cmd.Wait()
|
|
close(done)
|
|
}()
|
|
|
|
select {
|
|
case <-done:
|
|
s.logger.Info().Int("pid", pid).Msg("audio server process terminated gracefully")
|
|
case <-time.After(GetConfig().OutputSupervisorTimeout):
|
|
s.logger.Warn().Int("pid", pid).Msg("process did not terminate gracefully, sending SIGKILL")
|
|
s.forceKillProcess()
|
|
}
|
|
}
|
|
|
|
// forceKillProcess forcefully kills the current process
|
|
func (s *AudioOutputSupervisor) forceKillProcess() {
|
|
s.mutex.RLock()
|
|
cmd := s.cmd
|
|
pid := s.processPID
|
|
s.mutex.RUnlock()
|
|
|
|
if cmd == nil || cmd.Process == nil {
|
|
return
|
|
}
|
|
|
|
s.logger.Warn().Int("pid", pid).Msg("force killing audio server process")
|
|
if err := cmd.Process.Kill(); err != nil {
|
|
s.logger.Error().Err(err).Int("pid", pid).Msg("failed to kill process")
|
|
}
|
|
}
|
|
|
|
// shouldRestart determines if the process should be restarted
|
|
func (s *AudioOutputSupervisor) shouldRestart() bool {
|
|
if atomic.LoadInt32(&s.running) == 0 {
|
|
return false // Supervisor is stopping
|
|
}
|
|
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
|
|
// Clean up old restart attempts outside the window
|
|
now := time.Now()
|
|
var recentAttempts []time.Time
|
|
for _, attempt := range s.restartAttempts {
|
|
if now.Sub(attempt) < getRestartWindow() {
|
|
recentAttempts = append(recentAttempts, attempt)
|
|
}
|
|
}
|
|
s.restartAttempts = recentAttempts
|
|
|
|
return len(s.restartAttempts) < getMaxRestartAttempts()
|
|
}
|
|
|
|
// recordRestartAttempt records a restart attempt
|
|
func (s *AudioOutputSupervisor) recordRestartAttempt() {
|
|
s.mutex.Lock()
|
|
defer s.mutex.Unlock()
|
|
|
|
s.restartAttempts = append(s.restartAttempts, time.Now())
|
|
}
|
|
|
|
// calculateRestartDelay calculates the delay before next restart attempt
|
|
func (s *AudioOutputSupervisor) calculateRestartDelay() time.Duration {
|
|
s.mutex.RLock()
|
|
defer s.mutex.RUnlock()
|
|
|
|
// Exponential backoff based on recent restart attempts
|
|
attempts := len(s.restartAttempts)
|
|
if attempts == 0 {
|
|
return getRestartDelay()
|
|
}
|
|
|
|
// Calculate exponential backoff: 2^attempts * base delay
|
|
delay := getRestartDelay()
|
|
for i := 0; i < attempts && delay < getMaxRestartDelay(); i++ {
|
|
delay *= 2
|
|
}
|
|
|
|
if delay > getMaxRestartDelay() {
|
|
delay = getMaxRestartDelay()
|
|
}
|
|
|
|
return delay
|
|
}
|