[WIP] Improvements: improve Audio Input Activation / Deactivation process so it is faster

This commit is contained in:
Alex P 2025-09-07 16:17:06 +00:00
parent e27f1cfa59
commit 7d39a2741e
4 changed files with 89 additions and 49 deletions

View File

@ -9,6 +9,11 @@ var audioMuteState struct {
mu sync.RWMutex mu sync.RWMutex
} }
var microphoneMuteState struct {
muted bool
mu sync.RWMutex
}
func SetAudioMuted(muted bool) { func SetAudioMuted(muted bool) {
audioMuteState.mu.Lock() audioMuteState.mu.Lock()
audioMuteState.muted = muted audioMuteState.muted = muted
@ -20,3 +25,15 @@ func IsAudioMuted() bool {
defer audioMuteState.mu.RUnlock() defer audioMuteState.mu.RUnlock()
return audioMuteState.muted return audioMuteState.muted
} }
func SetMicrophoneMuted(muted bool) {
microphoneMuteState.mu.Lock()
microphoneMuteState.muted = muted
microphoneMuteState.mu.Unlock()
}
func IsMicrophoneMuted() bool {
microphoneMuteState.mu.RLock()
defer microphoneMuteState.mu.RUnlock()
return microphoneMuteState.muted
}

View File

@ -119,29 +119,42 @@ func (s *AudioControlService) StopMicrophone() error {
return nil return nil
} }
// MuteMicrophone sets the microphone mute state by controlling the microphone process // MuteMicrophone sets the microphone mute state by controlling data flow (like audio output)
func (s *AudioControlService) MuteMicrophone(muted bool) error { func (s *AudioControlService) MuteMicrophone(muted bool) error {
if muted { if muted {
// Mute: Stop microphone process // Mute: Control data flow, don't stop subprocess (like audio output)
err := s.StopMicrophone() SetMicrophoneMuted(true)
if err != nil { s.logger.Info().Msg("microphone muted (data flow disabled)")
s.logger.Error().Err(err).Msg("failed to stop microphone during mute")
return err
}
s.logger.Info().Msg("microphone muted (process stopped)")
} else { } else {
// Unmute: Start microphone process // Unmute: Ensure subprocess is running, then enable data flow
err := s.StartMicrophone() if !s.sessionProvider.IsSessionActive() {
if err != nil { return errors.New("no active session for microphone unmute")
s.logger.Error().Err(err).Msg("failed to start microphone during unmute")
return err
} }
s.logger.Info().Msg("microphone unmuted (process started)")
audioInputManager := s.sessionProvider.GetAudioInputManager()
if audioInputManager == nil {
return errors.New("audio input manager not available")
}
// Start subprocess if not already running (async, non-blocking)
if !audioInputManager.IsRunning() {
go func() {
if err := audioInputManager.Start(); err != nil {
s.logger.Error().Err(err).Msg("failed to start microphone during unmute")
}
}()
}
// Enable data flow immediately
SetMicrophoneMuted(false)
s.logger.Info().Msg("microphone unmuted (data flow enabled)")
} }
// Broadcast microphone mute state change via WebSocket // Broadcast microphone state change via WebSocket
broadcaster := GetAudioEventBroadcaster() broadcaster := GetAudioEventBroadcaster()
broadcaster.BroadcastAudioDeviceChanged(!muted, "microphone_mute_changed") sessionActive := s.sessionProvider.IsSessionActive()
// With the new approach, "running" means "not muted"
broadcaster.BroadcastMicrophoneStateChanged(!muted, sessionActive)
return nil return nil
} }
@ -254,16 +267,13 @@ func (s *AudioControlService) IsAudioOutputActive() bool {
return !IsAudioMuted() && IsAudioRelayRunning() return !IsAudioMuted() && IsAudioRelayRunning()
} }
// IsMicrophoneActive returns whether the microphone subprocess is running // IsMicrophoneActive returns whether the microphone is active (not muted)
func (s *AudioControlService) IsMicrophoneActive() bool { func (s *AudioControlService) IsMicrophoneActive() bool {
if !s.sessionProvider.IsSessionActive() { if !s.sessionProvider.IsSessionActive() {
return false return false
} }
audioInputManager := s.sessionProvider.GetAudioInputManager() // With the new unified approach, microphone "active" means "not muted"
if audioInputManager == nil { // This matches how audio output works - active means not muted
return false return !IsMicrophoneMuted()
}
return audioInputManager.IsRunning()
} }

View File

@ -91,6 +91,11 @@ func (aim *AudioInputManager) WriteOpusFrame(frame []byte) error {
return nil // Not running, silently drop return nil // Not running, silently drop
} }
// Check mute state - drop frames if microphone is muted (like audio output)
if IsMicrophoneMuted() {
return nil // Muted, silently drop
}
// Use ultra-fast validation for critical audio path // Use ultra-fast validation for critical audio path
if err := ValidateAudioFrame(frame); err != nil { if err := ValidateAudioFrame(frame); err != nil {
aim.logComponentError(AudioInputManagerComponent, err, "Frame validation failed") aim.logComponentError(AudioInputManagerComponent, err, "Frame validation failed")
@ -128,6 +133,11 @@ func (aim *AudioInputManager) WriteOpusFrameZeroCopy(frame *ZeroCopyAudioFrame)
return nil // Not running, silently drop return nil // Not running, silently drop
} }
// Check mute state - drop frames if microphone is muted (like audio output)
if IsMicrophoneMuted() {
return nil // Muted, silently drop
}
if frame == nil { if frame == nil {
atomic.AddInt64(&aim.metrics.FramesDropped, 1) atomic.AddInt64(&aim.metrics.FramesDropped, 1)
return nil return nil

View File

@ -61,14 +61,14 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
// Use WebSocket-based audio events for real-time updates // Use WebSocket-based audio events for real-time updates
const { const {
audioMuted, audioMuted,
microphoneState,
isConnected: wsConnected isConnected: wsConnected
} = useAudioEvents(); } = useAudioEvents();
// WebSocket-only implementation - no fallback polling // WebSocket-only implementation - no fallback polling
// Microphone state from props // Microphone state from props (keeping hook for legacy device operations)
const { const {
isMicrophoneActive,
startMicrophone, startMicrophone,
stopMicrophone, stopMicrophone,
syncMicrophoneState, syncMicrophoneState,
@ -82,6 +82,9 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
const isMuted = audioMuted ?? false; const isMuted = audioMuted ?? false;
const isConnected = wsConnected; const isConnected = wsConnected;
// Use WebSocket microphone state instead of hook state for real-time updates
const isMicrophoneActiveFromWS = microphoneState?.running ?? false;
// Audio devices // Audio devices
@ -206,24 +209,29 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
} }
setLastClickTime(now); setLastClickTime(now);
setIsLoading(true);
try { try {
if (isMicrophoneActive) { if (isMicrophoneActiveFromWS) {
// Microphone is active: stop the microphone process and WebRTC tracks // Mute: Use unified microphone mute API (like audio output)
const result = await stopMicrophone(); const resp = await api.POST("/microphone/mute", { muted: true });
if (!result.success && result.error) { if (!resp.ok) {
notifications.error(result.error.message); throw new Error(`Failed to mute microphone: ${resp.status}`);
} }
// WebSocket will handle the state update automatically
} else { } else {
// Microphone is inactive: start the microphone process and WebRTC tracks // Unmute: Use unified microphone mute API (like audio output)
const result = await startMicrophone(selectedInputDevice); const resp = await api.POST("/microphone/mute", { muted: false });
if (!result.success && result.error) { if (!resp.ok) {
notifications.error(result.error.message); throw new Error(`Failed to unmute microphone: ${resp.status}`);
} }
// WebSocket will handle the state update automatically
} }
} catch (error) { } catch (error) {
const errorMessage = error instanceof Error ? error.message : "Failed to toggle microphone"; const errorMessage = error instanceof Error ? error.message : "Failed to toggle microphone";
notifications.error(errorMessage); notifications.error(errorMessage);
} finally {
setIsLoading(false);
} }
}; };
@ -232,7 +240,7 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
setSelectedInputDevice(deviceId); setSelectedInputDevice(deviceId);
// If microphone is currently active (unmuted), restart it with the new device // If microphone is currently active (unmuted), restart it with the new device
if (isMicrophoneActive) { if (isMicrophoneActiveFromWS) {
try { try {
// Stop current microphone // Stop current microphone
await stopMicrophone(); await stopMicrophone();
@ -317,26 +325,21 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
<div className="flex items-center justify-between rounded-lg bg-slate-50 p-3 dark:bg-slate-700"> <div className="flex items-center justify-between rounded-lg bg-slate-50 p-3 dark:bg-slate-700">
<div className="flex items-center gap-3"> <div className="flex items-center gap-3">
{isMicrophoneActive ? ( {isMicrophoneActiveFromWS ? (
<MdMic className="h-5 w-5 text-green-500" /> <MdMic className="h-5 w-5 text-green-500" />
) : ( ) : (
<MdMicOff className="h-5 w-5 text-red-500" /> <MdMicOff className="h-5 w-5 text-red-500" />
)} )}
<span className="font-medium text-slate-900 dark:text-slate-100"> <span className="font-medium text-slate-900 dark:text-slate-100">
{isMicrophoneActive ? "Unmuted" : "Muted"} {isMicrophoneActiveFromWS ? "Unmuted" : "Muted"}
</span> </span>
</div> </div>
<Button <Button
size="SM" size="SM"
theme={isMicrophoneActive ? "danger" : "primary"} theme={isMicrophoneActiveFromWS ? "danger" : "primary"}
text={ text={isMicrophoneActiveFromWS ? "Disable" : "Enable"}
isStarting ? "Enabling..." :
isStopping ? "Disabling..." :
isMicrophoneActive ? "Disable" : "Enable"
}
onClick={handleToggleMicrophoneMute} onClick={handleToggleMicrophoneMute}
disabled={isStarting || isStopping || isToggling} disabled={isLoading}
loading={isStarting || isStopping}
/> />
</div> </div>
@ -378,7 +381,7 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
</option> </option>
))} ))}
</select> </select>
{isMicrophoneActive && ( {isMicrophoneActiveFromWS && (
<p className="text-xs text-slate-500 dark:text-slate-400"> <p className="text-xs text-slate-500 dark:text-slate-400">
Changing device will restart the microphone Changing device will restart the microphone
</p> </p>
@ -415,7 +418,7 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
</div> </div>
{/* Microphone Quality Settings */} {/* Microphone Quality Settings */}
{isMicrophoneActive && ( {isMicrophoneActiveFromWS && (
<div className="space-y-3"> <div className="space-y-3">
<div className="flex items-center gap-2"> <div className="flex items-center gap-2">
<MdMic className="h-4 w-4 text-slate-600 dark:text-slate-400" /> <MdMic className="h-4 w-4 text-slate-600 dark:text-slate-400" />
@ -429,13 +432,13 @@ export default function AudioControlPopover({ microphone }: AudioControlPopoverP
<button <button
key={`mic-${quality}`} key={`mic-${quality}`}
onClick={() => handleMicrophoneQualityChange(parseInt(quality))} onClick={() => handleMicrophoneQualityChange(parseInt(quality))}
disabled={isStarting || isStopping || isToggling} disabled={isLoading}
className={cx( className={cx(
"rounded-md border px-3 py-2 text-sm font-medium transition-colors", "rounded-md border px-3 py-2 text-sm font-medium transition-colors",
currentMicrophoneConfig?.Quality === parseInt(quality) currentMicrophoneConfig?.Quality === parseInt(quality)
? "border-green-500 bg-green-50 text-green-700 dark:bg-green-900/20 dark:text-green-300" ? "border-green-500 bg-green-50 text-green-700 dark:bg-green-900/20 dark:text-green-300"
: "border-slate-200 bg-white text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600", : "border-slate-200 bg-white text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600",
(isStarting || isStopping || isToggling) && "opacity-50 cursor-not-allowed" isLoading && "opacity-50 cursor-not-allowed"
)} )}
> >
{label} {label}