//go:build cgo // +build cgo package audio import ( "fmt" "os" "os/exec" "strconv" "sync/atomic" "time" "github.com/rs/zerolog" ) // Component name constants for logging const ( AudioOutputSupervisorComponent = "audio-output-supervisor" ) // Restart configuration is now retrieved from centralized config func getMaxRestartAttempts() int { return GetConfig().MaxRestartAttempts } func getRestartWindow() time.Duration { return GetConfig().RestartWindow } func getRestartDelay() time.Duration { return GetConfig().RestartDelay } func getMaxRestartDelay() time.Duration { return GetConfig().MaxRestartDelay } // AudioOutputSupervisor manages the audio output server subprocess lifecycle type AudioOutputSupervisor struct { *BaseSupervisor // Restart management restartAttempts []time.Time // Environment variables for OPUS configuration opusEnv []string // Callbacks onProcessStart func(pid int) onProcessExit func(pid int, exitCode int, crashed bool) onRestart func(attempt int, delay time.Duration) } // NewAudioOutputSupervisor creates a new audio output server supervisor func NewAudioOutputSupervisor() *AudioOutputSupervisor { return &AudioOutputSupervisor{ BaseSupervisor: NewBaseSupervisor("audio-output-supervisor"), restartAttempts: make([]time.Time, 0), } } // SetCallbacks sets optional callbacks for process lifecycle events func (s *AudioOutputSupervisor) SetCallbacks( onStart func(pid int), onExit func(pid int, exitCode int, crashed bool), onRestart func(attempt int, delay time.Duration), ) { s.mutex.Lock() defer s.mutex.Unlock() s.onProcessStart = onStart s.onProcessExit = onExit s.onRestart = onRestart } // SetOpusConfig sets OPUS configuration parameters as environment variables // for the audio output subprocess func (s *AudioOutputSupervisor) SetOpusConfig(bitrate, complexity, vbr, signalType, bandwidth, dtx int) { s.mutex.Lock() defer s.mutex.Unlock() // Store OPUS parameters as environment variables s.opusEnv = []string{ "JETKVM_OPUS_BITRATE=" + strconv.Itoa(bitrate), "JETKVM_OPUS_COMPLEXITY=" + strconv.Itoa(complexity), "JETKVM_OPUS_VBR=" + strconv.Itoa(vbr), "JETKVM_OPUS_SIGNAL_TYPE=" + strconv.Itoa(signalType), "JETKVM_OPUS_BANDWIDTH=" + strconv.Itoa(bandwidth), "JETKVM_OPUS_DTX=" + strconv.Itoa(dtx), } } // Start begins supervising the audio output server process func (s *AudioOutputSupervisor) Start() error { if !atomic.CompareAndSwapInt32(&s.running, 0, 1) { return fmt.Errorf("audio output supervisor is already running") } s.logSupervisorStart() s.createContext() // Recreate channels in case they were closed by a previous Stop() call s.initializeChannels() // Reset restart tracking on start s.mutex.Lock() s.restartAttempts = s.restartAttempts[:0] s.mutex.Unlock() // Start the supervision loop go s.supervisionLoop() s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component started successfully") return nil } // Stop gracefully stops the audio server and supervisor func (s *AudioOutputSupervisor) Stop() { if !atomic.CompareAndSwapInt32(&s.running, 1, 0) { return // Already stopped } s.logSupervisorStop() // Signal stop and wait for cleanup s.closeStopChan() s.cancelContext() // Wait for process to exit select { case <-s.processDone: s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component stopped gracefully") case <-time.After(GetConfig().OutputSupervisorTimeout): s.logger.Warn().Str("component", AudioOutputSupervisorComponent).Msg("component did not stop gracefully, forcing termination") s.forceKillProcess("audio output server") } s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component stopped") } // supervisionLoop is the main supervision loop func (s *AudioOutputSupervisor) supervisionLoop() { defer func() { s.closeProcessDone() s.logger.Info().Msg("audio server supervision ended") }() for atomic.LoadInt32(&s.running) == 1 { select { case <-s.stopChan: s.logger.Info().Msg("received stop signal") s.terminateProcess(GetConfig().OutputSupervisorTimeout, "audio output server") return case <-s.ctx.Done(): s.logger.Info().Msg("context cancelled") s.terminateProcess(GetConfig().OutputSupervisorTimeout, "audio output server") return default: // Start or restart the process if err := s.startProcess(); err != nil { // Only log start errors if error level enabled to reduce overhead if s.logger.GetLevel() <= zerolog.ErrorLevel { s.logger.Error().Err(err).Msg("failed to start audio server process") } // Check if we should attempt restart if !s.shouldRestart() { // Only log critical errors to reduce overhead if s.logger.GetLevel() <= zerolog.ErrorLevel { s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor") } return } delay := s.calculateRestartDelay() // Sample logging to reduce overhead - log every 5th restart attempt if len(s.restartAttempts)%5 == 0 && s.logger.GetLevel() <= zerolog.WarnLevel { s.logger.Warn().Dur("delay", delay).Int("attempt", len(s.restartAttempts)).Msg("retrying process start after delay") } if s.onRestart != nil { s.onRestart(len(s.restartAttempts), delay) } select { case <-time.After(delay): case <-s.stopChan: return case <-s.ctx.Done(): return } continue } // Wait for process to exit s.waitForProcessExit() // Check if we should restart if !s.shouldRestart() { s.logger.Error().Msg("maximum restart attempts exceeded, stopping supervisor") return } // Calculate restart delay delay := s.calculateRestartDelay() s.logger.Info().Dur("delay", delay).Msg("restarting audio server process after delay") if s.onRestart != nil { s.onRestart(len(s.restartAttempts), delay) } // Wait for restart delay select { case <-time.After(delay): case <-s.stopChan: return case <-s.ctx.Done(): return } } } } // startProcess starts the audio server process func (s *AudioOutputSupervisor) startProcess() error { execPath, err := os.Executable() if err != nil { return fmt.Errorf("failed to get executable path: %w", err) } s.mutex.Lock() defer s.mutex.Unlock() // Build command arguments (only subprocess flag) args := []string{"--audio-output-server"} // Create new command s.cmd = exec.CommandContext(s.ctx, execPath, args...) s.cmd.Stdout = os.Stdout s.cmd.Stderr = os.Stderr // Set environment variables for OPUS configuration s.cmd.Env = append(os.Environ(), s.opusEnv...) // Start the process if err := s.cmd.Start(); err != nil { return fmt.Errorf("failed to start audio output server process: %w", err) } s.processPID = s.cmd.Process.Pid s.logger.Info().Int("pid", s.processPID).Strs("args", args).Strs("opus_env", s.opusEnv).Msg("audio server process started") // Add process to monitoring s.processMonitor.AddProcess(s.processPID, "audio-output-server") if s.onProcessStart != nil { s.onProcessStart(s.processPID) } return nil } // waitForProcessExit waits for the current process to exit and handles restart logic func (s *AudioOutputSupervisor) waitForProcessExit() { s.mutex.RLock() pid := s.processPID s.mutex.RUnlock() // Use base supervisor's waitForProcessExit s.BaseSupervisor.waitForProcessExit("audio output server") // Handle output-specific logic (restart tracking and callbacks) s.mutex.RLock() exitCode := s.lastExitCode s.mutex.RUnlock() crashed := exitCode != 0 if crashed { s.recordRestartAttempt() } if s.onProcessExit != nil { s.onProcessExit(pid, exitCode, crashed) } } // shouldRestart determines if the process should be restarted func (s *AudioOutputSupervisor) shouldRestart() bool { if atomic.LoadInt32(&s.running) == 0 { return false // Supervisor is stopping } s.mutex.RLock() defer s.mutex.RUnlock() // Clean up old restart attempts outside the window now := time.Now() var recentAttempts []time.Time for _, attempt := range s.restartAttempts { if now.Sub(attempt) < getRestartWindow() { recentAttempts = append(recentAttempts, attempt) } } s.restartAttempts = recentAttempts return len(s.restartAttempts) < getMaxRestartAttempts() } // recordRestartAttempt records a restart attempt func (s *AudioOutputSupervisor) recordRestartAttempt() { s.mutex.Lock() defer s.mutex.Unlock() s.restartAttempts = append(s.restartAttempts, time.Now()) } // calculateRestartDelay calculates the delay before next restart attempt func (s *AudioOutputSupervisor) calculateRestartDelay() time.Duration { s.mutex.RLock() defer s.mutex.RUnlock() // Exponential backoff based on recent restart attempts attempts := len(s.restartAttempts) if attempts == 0 { return getRestartDelay() } // Calculate exponential backoff: 2^attempts * base delay delay := getRestartDelay() for i := 0; i < attempts && delay < getMaxRestartDelay(); i++ { delay *= 2 } if delay > getMaxRestartDelay() { delay = getMaxRestartDelay() } return delay }