mirror of https://github.com/jetkvm/kvm.git
refactor(cgo-rpc): rewrite the monitor process logic
This commit is contained in:
parent
9c0ac4e3d0
commit
f3b854c407
|
|
@ -120,15 +120,20 @@ type NativeProxy struct {
|
||||||
videoStreamListener net.Listener
|
videoStreamListener net.Listener
|
||||||
binaryPath string
|
binaryPath string
|
||||||
|
|
||||||
client *GRPCClient
|
startMu sync.Mutex // mutex for the start process (context and isStopped)
|
||||||
clientMu sync.RWMutex
|
ctx context.Context
|
||||||
cmd *cmdWrapper
|
cancel context.CancelFunc
|
||||||
logger *zerolog.Logger
|
|
||||||
ready chan struct{}
|
client *GRPCClient
|
||||||
options *nativeProxyOptions
|
clientMu sync.RWMutex // mutex for the client
|
||||||
restartMu sync.Mutex
|
|
||||||
restarts uint
|
cmd *cmdWrapper
|
||||||
stopped bool
|
cmdMu sync.Mutex // mutex for the cmd
|
||||||
|
|
||||||
|
logger *zerolog.Logger
|
||||||
|
options *nativeProxyOptions
|
||||||
|
restarts uint
|
||||||
|
stopped bool
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewNativeProxy creates a new NativeProxy that spawns a separate process
|
// NewNativeProxy creates a new NativeProxy that spawns a separate process
|
||||||
|
|
@ -147,7 +152,6 @@ func NewNativeProxy(opts NativeOptions) (*NativeProxy, error) {
|
||||||
videoStreamUnixSocket: proxyOptions.VideoStreamUnixSocket,
|
videoStreamUnixSocket: proxyOptions.VideoStreamUnixSocket,
|
||||||
binaryPath: exePath,
|
binaryPath: exePath,
|
||||||
logger: nativeLogger,
|
logger: nativeLogger,
|
||||||
ready: make(chan struct{}),
|
|
||||||
options: proxyOptions,
|
options: proxyOptions,
|
||||||
restarts: 0,
|
restarts: 0,
|
||||||
}
|
}
|
||||||
|
|
@ -264,10 +268,8 @@ func (p *NativeProxy) handleVideoFrame(conn net.Conn) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// it should be only called by start() method, as it isn't thread-safe
|
||||||
func (p *NativeProxy) setUpGRPCClient() error {
|
func (p *NativeProxy) setUpGRPCClient() error {
|
||||||
p.clientMu.Lock()
|
|
||||||
defer p.clientMu.Unlock()
|
|
||||||
|
|
||||||
// wait until handshake completed
|
// wait until handshake completed
|
||||||
select {
|
select {
|
||||||
case <-p.cmd.stdoutHandler.handshakeCh:
|
case <-p.cmd.stdoutHandler.handshakeCh:
|
||||||
|
|
@ -306,13 +308,13 @@ func (p *NativeProxy) setUpGRPCClient() error {
|
||||||
go p.options.OnNativeRestart()
|
go p.options.OnNativeRestart()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Start monitoring process for crashes
|
|
||||||
go p.monitorProcess(client.getContext())
|
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (p *NativeProxy) start() error {
|
func (p *NativeProxy) doStart() error {
|
||||||
|
p.cmdMu.Lock()
|
||||||
|
defer p.cmdMu.Unlock()
|
||||||
|
|
||||||
// lock OS thread to prevent the process from being moved to a different thread
|
// lock OS thread to prevent the process from being moved to a different thread
|
||||||
// see also https://go.dev/issue/27505
|
// see also https://go.dev/issue/27505
|
||||||
runtime.LockOSThread()
|
runtime.LockOSThread()
|
||||||
|
|
@ -345,8 +347,10 @@ func (p *NativeProxy) start() error {
|
||||||
|
|
||||||
// Start starts the native process
|
// Start starts the native process
|
||||||
func (p *NativeProxy) Start() error {
|
func (p *NativeProxy) Start() error {
|
||||||
p.restartMu.Lock()
|
p.startMu.Lock()
|
||||||
defer p.restartMu.Unlock()
|
defer p.startMu.Unlock()
|
||||||
|
|
||||||
|
p.ctx, p.cancel = context.WithCancel(context.Background())
|
||||||
|
|
||||||
if p.stopped {
|
if p.stopped {
|
||||||
return fmt.Errorf("proxy is stopped")
|
return fmt.Errorf("proxy is stopped")
|
||||||
|
|
@ -356,51 +360,46 @@ func (p *NativeProxy) Start() error {
|
||||||
return fmt.Errorf("failed to start video stream listener: %w", err)
|
return fmt.Errorf("failed to start video stream listener: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.start(); err != nil {
|
if err := p.doStart(); err != nil {
|
||||||
return fmt.Errorf("failed to start native process: %w", err)
|
return fmt.Errorf("failed to start native process: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
close(p.ready)
|
go p.monitorProcess()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// monitorProcess monitors the native process and restarts it if it crashes
|
// monitorProcess monitors the native process and restarts it if it crashes
|
||||||
func (p *NativeProxy) monitorProcess(ctx context.Context) {
|
func (p *NativeProxy) monitorProcess() {
|
||||||
for {
|
for {
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
p.restartMu.Lock()
|
|
||||||
cmd := p.cmd
|
|
||||||
stopped := p.stopped
|
|
||||||
p.restartMu.Unlock()
|
|
||||||
|
|
||||||
if stopped {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
if cmd == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
err := cmd.Wait()
|
|
||||||
// check if the context is done
|
|
||||||
// if yes, it means that there should be already one restart in progress
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
p.restartMu.Lock()
|
|
||||||
if p.stopped {
|
if p.stopped {
|
||||||
p.restartMu.Unlock()
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
p.restartMu.Unlock()
|
|
||||||
|
select {
|
||||||
|
case <-p.ctx.Done():
|
||||||
|
p.logger.Trace().Msg("context done, stopping monitor process [before wait]")
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
p.cmdMu.Lock()
|
||||||
|
err := fmt.Errorf("native process not started")
|
||||||
|
if p.cmd != nil {
|
||||||
|
err = p.cmd.Wait()
|
||||||
|
}
|
||||||
|
p.cmdMu.Unlock()
|
||||||
|
|
||||||
|
if p.stopped {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-p.ctx.Done():
|
||||||
|
p.logger.Trace().Msg("context done, stopping monitor process [after wait]")
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
p.logger.Warn().Err(err).Msg("native process exited, restarting ...")
|
p.logger.Warn().Err(err).Msg("native process exited, restarting ...")
|
||||||
|
|
||||||
|
|
@ -419,9 +418,6 @@ func (p *NativeProxy) monitorProcess(ctx context.Context) {
|
||||||
|
|
||||||
// restartProcess restarts the native process
|
// restartProcess restarts the native process
|
||||||
func (p *NativeProxy) restartProcess() error {
|
func (p *NativeProxy) restartProcess() error {
|
||||||
p.restartMu.Lock()
|
|
||||||
defer p.restartMu.Unlock()
|
|
||||||
|
|
||||||
p.restarts++
|
p.restarts++
|
||||||
logger := p.logger.With().Uint("attempt", p.restarts).Uint("maxAttempts", p.options.MaxRestartAttempts).Logger()
|
logger := p.logger.With().Uint("attempt", p.restarts).Uint("maxAttempts", p.options.MaxRestartAttempts).Logger()
|
||||||
|
|
||||||
|
|
@ -446,7 +442,7 @@ func (p *NativeProxy) restartProcess() error {
|
||||||
logger.Info().Msg("gRPC client closed")
|
logger.Info().Msg("gRPC client closed")
|
||||||
|
|
||||||
logger.Info().Msg("attempting to restart native process")
|
logger.Info().Msg("attempting to restart native process")
|
||||||
if err := p.start(); err != nil {
|
if err := p.doStart(); err != nil {
|
||||||
logger.Error().Err(err).Msg("failed to start native process")
|
logger.Error().Err(err).Msg("failed to start native process")
|
||||||
return fmt.Errorf("failed to start native process: %w", err)
|
return fmt.Errorf("failed to start native process: %w", err)
|
||||||
}
|
}
|
||||||
|
|
@ -457,17 +453,11 @@ func (p *NativeProxy) restartProcess() error {
|
||||||
|
|
||||||
// Stop stops the native process
|
// Stop stops the native process
|
||||||
func (p *NativeProxy) Stop() error {
|
func (p *NativeProxy) Stop() error {
|
||||||
p.restartMu.Lock()
|
p.startMu.Lock()
|
||||||
defer p.restartMu.Unlock()
|
defer p.startMu.Unlock()
|
||||||
|
|
||||||
p.stopped = true
|
p.stopped = true
|
||||||
|
|
||||||
p.clientMu.Lock()
|
|
||||||
if err := p.client.Close(); err != nil {
|
|
||||||
p.logger.Warn().Err(err).Msg("failed to close IPC client")
|
|
||||||
}
|
|
||||||
p.clientMu.Unlock()
|
|
||||||
|
|
||||||
if p.cmd.Process != nil {
|
if p.cmd.Process != nil {
|
||||||
if err := p.cmd.Process.Kill(); err != nil {
|
if err := p.cmd.Process.Kill(); err != nil {
|
||||||
return fmt.Errorf("failed to kill native process: %w", err)
|
return fmt.Errorf("failed to kill native process: %w", err)
|
||||||
|
|
|
||||||
|
|
@ -10,6 +10,7 @@ import (
|
||||||
|
|
||||||
"github.com/caarlos0/env/v11"
|
"github.com/caarlos0/env/v11"
|
||||||
"github.com/erikdubbelboer/gspt"
|
"github.com/erikdubbelboer/gspt"
|
||||||
|
"github.com/rs/zerolog"
|
||||||
)
|
)
|
||||||
|
|
||||||
// Native Process
|
// Native Process
|
||||||
|
|
@ -21,6 +22,10 @@ var (
|
||||||
lastProcTitle string
|
lastProcTitle string
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
DebugModeFile = "/userdata/jetkvm/.native-debug-mode"
|
||||||
|
)
|
||||||
|
|
||||||
func setProcTitle(status string) {
|
func setProcTitle(status string) {
|
||||||
lastProcTitle = status
|
lastProcTitle = status
|
||||||
if status != "" {
|
if status != "" {
|
||||||
|
|
@ -30,6 +35,21 @@ func setProcTitle(status string) {
|
||||||
gspt.SetProcTitle(title)
|
gspt.SetProcTitle(title)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func monitorCrashSignal(logger *zerolog.Logger, nativeInstance NativeInterface) {
|
||||||
|
logger.Info().Msg("DEBUG mode: will crash the process on SIGHUP signal")
|
||||||
|
|
||||||
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
signal.Notify(sigChan, syscall.SIGHUP)
|
||||||
|
|
||||||
|
// non-blocking receive
|
||||||
|
select {
|
||||||
|
case <-sigChan:
|
||||||
|
logger.Info().Msg("received SIGHUP signal, emulating crash")
|
||||||
|
nativeInstance.DoNotUseThisIsForCrashTestingOnly()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// RunNativeProcess runs the native process mode
|
// RunNativeProcess runs the native process mode
|
||||||
func RunNativeProcess(binaryName string) {
|
func RunNativeProcess(binaryName string) {
|
||||||
logger := nativeLogger.With().Int("pid", os.Getpid()).Logger()
|
logger := nativeLogger.With().Int("pid", os.Getpid()).Logger()
|
||||||
|
|
@ -85,18 +105,10 @@ func RunNativeProcess(binaryName string) {
|
||||||
logger.Fatal().Err(err).Msg("failed to write handshake message to stdout")
|
logger.Fatal().Err(err).Msg("failed to write handshake message to stdout")
|
||||||
}
|
}
|
||||||
|
|
||||||
go func() {
|
if _, err := os.Stat(DebugModeFile); err == nil {
|
||||||
sigChan := make(chan os.Signal, 1)
|
logger.Info().Msg("DEBUG mode: enabled")
|
||||||
signal.Notify(sigChan, syscall.SIGHUP)
|
go monitorCrashSignal(&logger, nativeInstance)
|
||||||
|
}
|
||||||
// non-blocking receive
|
|
||||||
select {
|
|
||||||
case <-sigChan:
|
|
||||||
logger.Info().Msg("received SIGHUP signal, emulating crash")
|
|
||||||
nativeInstance.DoNotUseThisIsForCrashTestingOnly()
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}()
|
|
||||||
|
|
||||||
// Set up signal handling
|
// Set up signal handling
|
||||||
sigChan := make(chan os.Signal, 1)
|
sigChan := make(chan os.Signal, 1)
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue