fix: clear transfer blacklist on primary disconnect to enable grace period promotion

When a primary session disconnects accidentally (not intentional logout), the
60-second transfer blacklist from previous role transfers was blocking observer
sessions from being promoted after the grace period expires (~10s).

The blacklist is intended to prevent immediate re-promotion during manual
transfers (user-initiated), but should not interfere with emergency promotion
after accidental disconnects (system-initiated).

Changes:
- Clear all transfer blacklist entries when primary enters grace period
- Add logging to track blacklist clearing for debugging
- Preserve blacklist during intentional logout to maintain manual transfer protection

This ensures observers are promoted after grace period (~10s) instead of
waiting for blacklist expiration (~40-60s).
This commit is contained in:
Alex P 2025-10-09 12:55:25 +03:00
parent b388bc3c62
commit 57f4be2846
1 changed files with 119 additions and 38 deletions

View File

@ -428,13 +428,30 @@ func (sm *SessionManager) RemoveSession(sessionID string) {
// Remove from queue if present // Remove from queue if present
sm.removeFromQueue(sessionID) sm.removeFromQueue(sessionID)
// Add a grace period for reconnection for all sessions // Check if this session was marked for immediate removal (intentional logout)
// Use configured grace period or default to 10 seconds isIntentionalLogout := false
if graceTime, exists := sm.reconnectGrace[sessionID]; exists {
// If grace period is already expired, this was intentional logout
if time.Now().After(graceTime) {
isIntentionalLogout = true
sm.logger.Info().
Str("sessionID", sessionID).
Msg("Detected intentional logout - skipping grace period")
delete(sm.reconnectGrace, sessionID)
delete(sm.reconnectInfo, sessionID)
}
}
// Determine grace period duration (used for logging even if intentional logout)
gracePeriod := 10 gracePeriod := 10
if currentSessionSettings != nil && currentSessionSettings.ReconnectGrace > 0 { if currentSessionSettings != nil && currentSessionSettings.ReconnectGrace > 0 {
gracePeriod = currentSessionSettings.ReconnectGrace gracePeriod = currentSessionSettings.ReconnectGrace
} }
// Only add grace period if this is NOT an intentional logout
if !isIntentionalLogout {
// Add a grace period for reconnection for all sessions
// Limit grace period entries to prevent memory exhaustion (DoS protection) // Limit grace period entries to prevent memory exhaustion (DoS protection)
const maxGraceEntries = 10 // Reduced from 20 to limit memory usage const maxGraceEntries = 10 // Reduced from 20 to limit memory usage
for len(sm.reconnectGrace) >= maxGraceEntries { for len(sm.reconnectGrace) >= maxGraceEntries {
@ -466,22 +483,46 @@ func (sm *SessionManager) RemoveSession(sessionID string) {
Nickname: session.Nickname, Nickname: session.Nickname,
CreatedAt: session.CreatedAt, CreatedAt: session.CreatedAt,
} }
}
// If this was the primary session, clear primary slot and track for grace period // If this was the primary session, clear primary slot and track for grace period
if wasPrimary { if wasPrimary {
if isIntentionalLogout {
// Intentional logout: clear immediately and promote right away
sm.primarySessionID = ""
sm.lastPrimaryID = ""
sm.logger.Info().
Str("sessionID", sessionID).
Int("remainingSessions", len(sm.sessions)).
Msg("Primary session removed via intentional logout - immediate promotion")
} else {
// Accidental disconnect: use grace period
sm.lastPrimaryID = sessionID // Remember this was the primary for grace period sm.lastPrimaryID = sessionID // Remember this was the primary for grace period
sm.primarySessionID = "" // Clear primary slot so other sessions can be promoted sm.primarySessionID = "" // Clear primary slot so other sessions can be promoted
// Clear all blacklists to allow emergency promotion after grace period expires
// The blacklist is meant to prevent immediate re-promotion during manual transfers,
// but should not block emergency promotion after accidental disconnects
if len(sm.transferBlacklist) > 0 {
sm.logger.Info().
Int("clearedBlacklistEntries", len(sm.transferBlacklist)).
Str("disconnectedPrimaryID", sessionID).
Msg("Clearing transfer blacklist to allow grace period promotion")
sm.transferBlacklist = make([]TransferBlacklistEntry, 0)
}
sm.logger.Info(). sm.logger.Info().
Str("sessionID", sessionID). Str("sessionID", sessionID).
Dur("gracePeriod", time.Duration(gracePeriod)*time.Second). Dur("gracePeriod", time.Duration(gracePeriod)*time.Second).
Int("remainingSessions", len(sm.sessions)). Int("remainingSessions", len(sm.sessions)).
Msg("Primary session removed, grace period active") Msg("Primary session removed, grace period active")
}
// Immediate promotion check: if there are observers waiting, trigger validation // Trigger validation for potential promotion
// This allows immediate promotion while still respecting grace period protection
if len(sm.sessions) > 0 { if len(sm.sessions) > 0 {
sm.logger.Debug(). sm.logger.Debug().
Str("removedPrimaryID", sessionID). Str("removedPrimaryID", sessionID).
Bool("intentionalLogout", isIntentionalLogout).
Int("remainingSessions", len(sm.sessions)). Int("remainingSessions", len(sm.sessions)).
Msg("Triggering immediate validation for potential promotion") Msg("Triggering immediate validation for potential promotion")
sm.validateSinglePrimary() sm.validateSinglePrimary()
@ -525,6 +566,28 @@ func (sm *SessionManager) IsInGracePeriod(sessionID string) bool {
return false return false
} }
// ClearGracePeriod removes the grace period for a session (for intentional logout/disconnect)
// This marks the session for immediate removal without grace period protection
// Actual promotion will happen in RemoveSession when it detects no grace period
func (sm *SessionManager) ClearGracePeriod(sessionID string) {
sm.mu.Lock()
defer sm.mu.Unlock()
// Clear grace period and reconnect info to prevent grace period from being added
delete(sm.reconnectGrace, sessionID)
delete(sm.reconnectInfo, sessionID)
// Mark this session with a special "immediate removal" grace period (already expired)
// This signals to RemoveSession that this was intentional and should skip grace period
sm.reconnectGrace[sessionID] = time.Now().Add(-1 * time.Second) // Already expired
sm.logger.Info().
Str("sessionID", sessionID).
Str("lastPrimaryID", sm.lastPrimaryID).
Str("primarySessionID", sm.primarySessionID).
Msg("Marked session for immediate removal (intentional logout)")
}
// isSessionBlacklisted checks if a session was recently demoted via transfer and should not become primary // isSessionBlacklisted checks if a session was recently demoted via transfer and should not become primary
func (sm *SessionManager) isSessionBlacklisted(sessionID string) bool { func (sm *SessionManager) isSessionBlacklisted(sessionID string) bool {
now := time.Now() now := time.Now()
@ -1309,6 +1372,7 @@ func (sm *SessionManager) findMostTrustedSessionForEmergency() string {
bestSessionID := "" bestSessionID := ""
bestScore := -1 bestScore := -1
// First pass: try to find observers or queued sessions (preferred)
for sessionID, session := range sm.sessions { for sessionID, session := range sm.sessions {
// Skip if blacklisted, primary, or not eligible modes // Skip if blacklisted, primary, or not eligible modes
if sm.isSessionBlacklisted(sessionID) || if sm.isSessionBlacklisted(sessionID) ||
@ -1324,6 +1388,23 @@ func (sm *SessionManager) findMostTrustedSessionForEmergency() string {
} }
} }
// If no observers/queued found, try pending sessions as last resort
if bestSessionID == "" {
for sessionID, session := range sm.sessions {
if sm.isSessionBlacklisted(sessionID) || session.Mode == SessionModePrimary {
continue
}
if session.Mode == SessionModePending {
score := sm.getSessionTrustScore(sessionID)
if score > bestScore {
bestScore = score
bestSessionID = sessionID
}
}
}
}
// Log the selection decision for audit trail // Log the selection decision for audit trail
if bestSessionID != "" { if bestSessionID != "" {
sm.logger.Info(). sm.logger.Info().