This commit is contained in:
Alex 2025-08-14 09:51:53 +00:00 committed by GitHub
commit d7db8f999f
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
57 changed files with 7767 additions and 477 deletions

View File

@ -1,10 +1,15 @@
{ {
"name": "JetKVM", "name": "JetKVM",
"image": "mcr.microsoft.com/devcontainers/go:1-1.23-bookworm", "image": "mcr.microsoft.com/devcontainers/base:ubuntu-22.04",
"runArgs": ["--platform=linux/amd64" ],
"features": { "features": {
"ghcr.io/devcontainers/features/node:1": { "ghcr.io/devcontainers/features/node:1": {
// Should match what is defined in ui/package.json // Should match what is defined in ui/package.json
"version": "22.15.0" "version": "22.15.0"
},
"ghcr.io/devcontainers/features/go:1": {
// Should match what is defined in go.mod
"version": "latest"
} }
}, },
"mounts": [ "mounts": [

6
.gitignore vendored
View File

@ -1,6 +1,12 @@
bin/* bin/*
static/* static/*
.vscode/
tmp/
.devcontainer/devcontainer-lock.json
.idea .idea
.DS_Store .DS_Store
*.log
*.tmp
*.code-workspace
device-tests.tar.gz device-tests.tar.gz

View File

@ -1,4 +1,7 @@
version: "2" version: "2"
run:
build-tags:
- nolint
linters: linters:
enable: enable:
- forbidigo - forbidigo

View File

@ -11,21 +11,39 @@
</div> </div>
# JetKVM Development Guide # JetKVM Development Guide
Welcome to JetKVM development! This guide will help you get started quickly, whether you're fixing bugs, adding features, or just exploring the codebase. Welcome to JetKVM development! This guide will help you get started quickly, whether you're fixing bugs, adding features, or just exploring the codebase.
## Get Started ## Get Started
### Prerequisites ### Prerequisites
- **A JetKVM device** (for full development) - **A JetKVM device** (for full development)
- **[Go 1.24.4+](https://go.dev/doc/install)** and **[Node.js 22.15.0](https://nodejs.org/en/download/)** - **[Go 1.24.4+](https://go.dev/doc/install)** and **[Node.js 22.15.0](https://nodejs.org/en/download/)**
- **[Git](https://git-scm.com/downloads)** for version control - **[Git](https://git-scm.com/downloads)** for version control
- **[SSH access](https://jetkvm.com/docs/advanced-usage/developing#developer-mode)** to your JetKVM device - **[SSH access](https://jetkvm.com/docs/advanced-usage/developing#developer-mode)** to your JetKVM device
- **Audio build dependencies:**
- **New in this release:** The audio pipeline is now fully in-process using CGO, ALSA, and Opus. You must run the provided scripts in `tools/` to set up the cross-compiler and build static ALSA/Opus libraries for ARM. See below.
### Development Environment ### Development Environment
**Recommended:** Development is best done on **Linux** or **macOS**. **Recommended:** Development is best done on **Linux** or **macOS**.
#### Apple Silicon (M1/M2/M3) Mac Users
If you are developing on an Apple Silicon Mac, you should use a devcontainer to ensure compatibility with the JetKVM build environment (which targets linux/amd64 and ARM). There are two main options:
- **VS Code Dev Containers**: Open the project in VS Code and use the built-in Dev Containers support. The configuration is in `.devcontainer/devcontainer.json`.
- **Devpod**: [Devpod](https://devpod.sh/) is a fast, open-source tool for running devcontainers anywhere. If you use Devpod, go to **Settings → Experimental → Additional Environmental Variables** and add:
- `DOCKER_DEFAULT_PLATFORM=linux/amd64`
This ensures all builds run in the correct architecture.
- **devcontainer CLI**: You can also use the [devcontainer CLI](https://github.com/devcontainers/cli) to launch the devcontainer from the terminal.
This approach ensures compatibility with all shell scripts, build tools, and cross-compilation steps used in the project.
If you're using Windows, we strongly recommend using **WSL (Windows Subsystem for Linux)** for the best development experience: If you're using Windows, we strongly recommend using **WSL (Windows Subsystem for Linux)** for the best development experience:
- [Install WSL on Windows](https://docs.microsoft.com/en-us/windows/wsl/install) - [Install WSL on Windows](https://docs.microsoft.com/en-us/windows/wsl/install)
@ -33,6 +51,7 @@ If you're using Windows, we strongly recommend using **WSL (Windows Subsystem fo
This ensures compatibility with shell scripts and build tools used in the project. This ensures compatibility with shell scripts and build tools used in the project.
### Project Setup ### Project Setup
1. **Clone the repository:** 1. **Clone the repository:**
@ -46,16 +65,25 @@ This ensures compatibility with shell scripts and build tools used in the projec
go version && node --version go version && node --version
``` ```
3. **Find your JetKVM IP address** (check your router or device screen) 3. **Set up the cross-compiler and audio dependencies:**
```bash
make dev_env
# This will run tools/setup_rv1106_toolchain.sh and tools/build_audio_deps.sh
# It will clone the cross-compiler and build ALSA/Opus static libs in $HOME/.jetkvm
#
# **Note:** This is required for the new in-process audio pipeline. If you skip this step, audio will not work.
```
4. **Deploy and test:** 4. **Find your JetKVM IP address** (check your router or device screen)
5. **Deploy and test:**
```bash ```bash
./dev_deploy.sh -r 192.168.1.100 # Replace with your device IP ./dev_deploy.sh -r 192.168.1.100 # Replace with your device IP
``` ```
5. **Open in browser:** `http://192.168.1.100` 6. **Open in browser:** `http://192.168.1.100`
That's it! You're now running your own development version of JetKVM. That's it! You're now running your own development version of JetKVM, **with in-process audio streaming for the first time.**
--- ---
@ -71,13 +99,15 @@ npm install
Now edit files in `ui/src/` and see changes live in your browser! Now edit files in `ui/src/` and see changes live in your browser!
### Modify the backend
### Modify the backend (including audio)
```bash ```bash
# Edit Go files (config.go, web.go, etc.) # Edit Go files (config.go, web.go, internal/audio, etc.)
./dev_deploy.sh -r 192.168.1.100 --skip-ui-build ./dev_deploy.sh -r 192.168.1.100 --skip-ui-build
``` ```
### Run tests ### Run tests
```bash ```bash
@ -93,21 +123,26 @@ tail -f /var/log/jetkvm.log
--- ---
## Project Layout ## Project Layout
``` ```
/kvm/ /kvm/
├── main.go # App entry point ├── main.go # App entry point
├── config.go # Settings & configuration ├── config.go # Settings & configuration
├── web.go # API endpoints ├── web.go # API endpoints
├── ui/ # React frontend ├── ui/ # React frontend
│ ├── src/routes/ # Pages (login, settings, etc.) │ ├── src/routes/ # Pages (login, settings, etc.)
│ └── src/components/ # UI components │ └── src/components/ # UI components
└── internal/ # Internal Go packages ├── internal/ # Internal Go packages
│ └── audio/ # In-process audio pipeline (CGO, ALSA, Opus) [NEW]
├── tools/ # Toolchain and audio dependency setup scripts
└── Makefile # Build and dev automation (see audio targets)
``` ```
**Key files for beginners:** **Key files for beginners:**
- `internal/audio/` - [NEW] In-process audio pipeline (CGO, ALSA, Opus)
- `web.go` - Add new API endpoints here - `web.go` - Add new API endpoints here
- `config.go` - Add new settings here - `config.go` - Add new settings here
- `ui/src/routes/` - Add new pages here - `ui/src/routes/` - Add new pages here
@ -136,9 +171,10 @@ npm install
./dev_device.sh <YOUR_DEVICE_IP> ./dev_device.sh <YOUR_DEVICE_IP>
``` ```
### Quick Backend Changes ### Quick Backend Changes
*Best for: API or backend logic changes* *Best for: API, backend, or audio logic changes (including audio pipeline)*
```bash ```bash
# Skip frontend build for faster deployment # Skip frontend build for faster deployment
@ -206,7 +242,8 @@ curl -X POST http://<IP>/auth/password-local \
--- ---
## Common Issues & Solutions
### Common Issues & Solutions
### "Build failed" or "Permission denied" ### "Build failed" or "Permission denied"
@ -218,6 +255,8 @@ ssh root@<IP> chmod +x /userdata/jetkvm/bin/jetkvm_app_debug
go clean -modcache go clean -modcache
go mod tidy go mod tidy
make build_dev make build_dev
# If you see errors about missing ALSA/Opus or toolchain, run:
make dev_env # Required for new audio support
``` ```
### "Can't connect to device" ### "Can't connect to device"
@ -230,6 +269,15 @@ ping <IP>
ssh root@<IP> echo "Connection OK" ssh root@<IP> echo "Connection OK"
``` ```
### "Audio not working"
```bash
# Make sure you have run:
make dev_env
# If you see errors about ALSA/Opus, check logs and re-run the setup scripts in tools/.
```
### "Frontend not updating" ### "Frontend not updating"
```bash ```bash
@ -244,18 +292,21 @@ npm install
## Next Steps ## Next Steps
### Adding a New Feature ### Adding a New Feature
1. **Backend:** Add API endpoint in `web.go` 1. **Backend:** Add API endpoint in `web.go` or extend audio in `internal/audio/`
2. **Config:** Add settings in `config.go` 2. **Config:** Add settings in `config.go`
3. **Frontend:** Add UI in `ui/src/routes/` 3. **Frontend:** Add UI in `ui/src/routes/`
4. **Test:** Deploy and test with `./dev_deploy.sh` 4. **Test:** Deploy and test with `./dev_deploy.sh`
### Code Style ### Code Style
- **Go:** Follow standard Go conventions - **Go:** Follow standard Go conventions
- **TypeScript:** Use TypeScript for type safety - **TypeScript:** Use TypeScript for type safety
- **React:** Keep components small and reusable - **React:** Keep components small and reusable
- **Audio/CGO:** Keep C/Go integration minimal, robust, and well-documented. Use zerolog for all logging.
### Environment Variables ### Environment Variables

View File

@ -1,3 +1,20 @@
# --- JetKVM Audio/Toolchain Dev Environment Setup ---
.PHONY: setup_toolchain build_audio_deps dev_env
# Clone the rv1106-system toolchain to $HOME/.jetkvm/rv1106-system
setup_toolchain:
bash tools/setup_rv1106_toolchain.sh
# Build ALSA and Opus static libs for ARM in $HOME/.jetkvm/audio-libs
build_audio_deps: setup_toolchain
bash tools/build_audio_deps.sh $(ALSA_VERSION) $(OPUS_VERSION)
# Prepare everything needed for local development (toolchain + audio deps)
dev_env: build_audio_deps
@echo "Development environment ready."
JETKVM_HOME ?= $(HOME)/.jetkvm
TOOLCHAIN_DIR ?= $(JETKVM_HOME)/rv1106-system
AUDIO_LIBS_DIR ?= $(JETKVM_HOME)/audio-libs
BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD) BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD)
BUILDDATE ?= $(shell date -u +%FT%T%z) BUILDDATE ?= $(shell date -u +%FT%T%z)
BUILDTS ?= $(shell date -u +%s) BUILDTS ?= $(shell date -u +%s)
@ -5,6 +22,13 @@ REVISION ?= $(shell git rev-parse HEAD)
VERSION_DEV ?= 0.4.7-dev$(shell date +%Y%m%d%H%M) VERSION_DEV ?= 0.4.7-dev$(shell date +%Y%m%d%H%M)
VERSION ?= 0.4.6 VERSION ?= 0.4.6
# Audio library versions
ALSA_VERSION ?= 1.2.14
OPUS_VERSION ?= 1.5.2
# Optimization flags for ARM Cortex-A7 with NEON
OPTIM_CFLAGS := -O3 -mcpu=cortex-a7 -mfpu=neon -mfloat-abi=hard -ftree-vectorize -ffast-math -funroll-loops
PROMETHEUS_TAG := github.com/prometheus/common/version PROMETHEUS_TAG := github.com/prometheus/common/version
KVM_PKG_NAME := github.com/jetkvm/kvm KVM_PKG_NAME := github.com/jetkvm/kvm
@ -25,9 +49,14 @@ TEST_DIRS := $(shell find . -name "*_test.go" -type f -exec dirname {} \; | sort
hash_resource: hash_resource:
@shasum -a 256 resource/jetkvm_native | cut -d ' ' -f 1 > resource/jetkvm_native.sha256 @shasum -a 256 resource/jetkvm_native | cut -d ' ' -f 1 > resource/jetkvm_native.sha256
build_dev: hash_resource build_dev: build_audio_deps hash_resource
@echo "Building..." @echo "Building..."
$(GO_CMD) build \ GOOS=linux GOARCH=arm GOARM=7 \
CC=$(TOOLCHAIN_DIR)/tools/linux/toolchain/arm-rockchip830-linux-uclibcgnueabihf/bin/arm-rockchip830-linux-uclibcgnueabihf-gcc \
CGO_ENABLED=1 \
CGO_CFLAGS="$(OPTIM_CFLAGS) -I$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/celt" \
CGO_LDFLAGS="-L$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/src/.libs -lasound -L$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/.libs -lopus -lm -ldl -static" \
go build \
-ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION_DEV)" \ -ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION_DEV)" \
$(GO_RELEASE_BUILD_ARGS) \ $(GO_RELEASE_BUILD_ARGS) \
-o $(BIN_DIR)/jetkvm_app cmd/main.go -o $(BIN_DIR)/jetkvm_app cmd/main.go
@ -40,7 +69,7 @@ build_gotestsum:
$(GO_CMD) install gotest.tools/gotestsum@latest $(GO_CMD) install gotest.tools/gotestsum@latest
cp $(shell $(GO_CMD) env GOPATH)/bin/linux_arm/gotestsum $(BIN_DIR)/gotestsum cp $(shell $(GO_CMD) env GOPATH)/bin/linux_arm/gotestsum $(BIN_DIR)/gotestsum
build_dev_test: build_test2json build_gotestsum build_dev_test: build_audio_deps build_test2json build_gotestsum
# collect all directories that contain tests # collect all directories that contain tests
@echo "Building tests for devices ..." @echo "Building tests for devices ..."
@rm -rf $(BIN_DIR)/tests && mkdir -p $(BIN_DIR)/tests @rm -rf $(BIN_DIR)/tests && mkdir -p $(BIN_DIR)/tests
@ -50,7 +79,12 @@ build_dev_test: build_test2json build_gotestsum
test_pkg_name=$$(echo $$test | sed 's/^.\///g'); \ test_pkg_name=$$(echo $$test | sed 's/^.\///g'); \
test_pkg_full_name=$(KVM_PKG_NAME)/$$(echo $$test | sed 's/^.\///g'); \ test_pkg_full_name=$(KVM_PKG_NAME)/$$(echo $$test | sed 's/^.\///g'); \
test_filename=$$(echo $$test_pkg_name | sed 's/\//__/g')_test; \ test_filename=$$(echo $$test_pkg_name | sed 's/\//__/g')_test; \
$(GO_CMD) test -v \ GOOS=linux GOARCH=arm GOARM=7 \
CC=$(TOOLCHAIN_DIR)/tools/linux/toolchain/arm-rockchip830-linux-uclibcgnueabihf/bin/arm-rockchip830-linux-uclibcgnueabihf-gcc \
CGO_ENABLED=1 \
CGO_CFLAGS="$(OPTIM_CFLAGS) -I$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/celt" \
CGO_LDFLAGS="-L$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/src/.libs -lasound -L$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/.libs -lopus -lm -ldl -static" \
go test -v \
-ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION_DEV)" \ -ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION_DEV)" \
$(GO_BUILD_ARGS) \ $(GO_BUILD_ARGS) \
-c -o $(BIN_DIR)/tests/$$test_filename $$test; \ -c -o $(BIN_DIR)/tests/$$test_filename $$test; \
@ -70,9 +104,14 @@ dev_release: frontend build_dev
rclone copyto bin/jetkvm_app r2://jetkvm-update/app/$(VERSION_DEV)/jetkvm_app rclone copyto bin/jetkvm_app r2://jetkvm-update/app/$(VERSION_DEV)/jetkvm_app
rclone copyto bin/jetkvm_app.sha256 r2://jetkvm-update/app/$(VERSION_DEV)/jetkvm_app.sha256 rclone copyto bin/jetkvm_app.sha256 r2://jetkvm-update/app/$(VERSION_DEV)/jetkvm_app.sha256
build_release: frontend hash_resource build_release: frontend build_audio_deps hash_resource
@echo "Building release..." @echo "Building release..."
$(GO_CMD) build \ GOOS=linux GOARCH=arm GOARM=7 \
CC=$(TOOLCHAIN_DIR)/tools/linux/toolchain/arm-rockchip830-linux-uclibcgnueabihf/bin/arm-rockchip830-linux-uclibcgnueabihf-gcc \
CGO_ENABLED=1 \
CGO_CFLAGS="$(OPTIM_CFLAGS) -I$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/celt" \
CGO_LDFLAGS="-L$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/src/.libs -lasound -L$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/.libs -lopus -lm -ldl -static" \
go build \
-ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION)" \ -ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION)" \
$(GO_RELEASE_BUILD_ARGS) \ $(GO_RELEASE_BUILD_ARGS) \
-o bin/jetkvm_app cmd/main.go -o bin/jetkvm_app cmd/main.go

View File

@ -11,13 +11,20 @@
</div> </div>
JetKVM is a high-performance, open-source KVM over IP (Keyboard, Video, Mouse) solution designed for efficient remote management of computers, servers, and workstations. Whether you're dealing with boot failures, installing a new operating system, adjusting BIOS settings, or simply taking control of a machine from afar, JetKVM provides the tools to get it done effectively.
JetKVM is a high-performance, open-source KVM over IP (Keyboard, Video, Mouse, **Audio**) solution designed for efficient remote management of computers, servers, and workstations. Whether you're dealing with boot failures, installing a new operating system, adjusting BIOS settings, or simply taking control of a machine from afar, JetKVM provides the tools to get it done effectively.
## Features ## Features
- **Ultra-low Latency** - 1080p@60FPS video with 30-60ms latency using H.264 encoding. Smooth mouse and keyboard interaction for responsive remote control. - **Ultra-low Latency** - 1080p@60FPS video with 30-60ms latency using H.264 encoding. Smooth mouse, keyboard, and audio for responsive remote control.
- **First-Class Audio Support** - JetKVM now supports in-process, low-latency audio streaming using ALSA and Opus, fully integrated via CGO. No external audio binaries or IPC required—audio is delivered directly from the device to your browser.
- **Free & Optional Remote Access** - Remote management via JetKVM Cloud using WebRTC. - **Free & Optional Remote Access** - Remote management via JetKVM Cloud using WebRTC.
- **Open-source software** - Written in Golang on Linux. Easily customizable through SSH access to the JetKVM device. - **Open-source software** - Written in Golang (with CGO for audio) on Linux. Easily customizable through SSH access to the JetKVM device.
## Contributing ## Contributing
@ -31,20 +38,23 @@ The best place to search for answers is our [Documentation](https://jetkvm.com/d
If you've found an issue and want to report it, please check our [Issues](https://github.com/jetkvm/kvm/issues) page. Make sure the description contains information about the firmware version you're using, your platform, and a clear explanation of the steps to reproduce the issue. If you've found an issue and want to report it, please check our [Issues](https://github.com/jetkvm/kvm/issues) page. Make sure the description contains information about the firmware version you're using, your platform, and a clear explanation of the steps to reproduce the issue.
# Development # Development
JetKVM is written in Go & TypeScript. with some bits and pieces written in C. An intermediate level of Go & TypeScript knowledge is recommended for comfortable programming. JetKVM is written in Go & TypeScript, with some C for low-level integration. **Audio support is now fully in-process using CGO, ALSA, and Opus—no external audio binaries required.**
The project contains two main parts, the backend software that runs on the KVM device and the frontend software that is served by the KVM device, and also the cloud. The project contains two main parts: the backend software (Go, CGO) that runs on the KVM device, and the frontend software (React/TypeScript) that is served by the KVM device and the cloud.
For comprehensive development information, including setup, testing, debugging, and contribution guidelines, see **[DEVELOPMENT.md](DEVELOPMENT.md)**. For comprehensive development information, including setup, testing, debugging, and contribution guidelines, see **[DEVELOPMENT.md](DEVELOPMENT.md)**.
For quick device development, use the `./dev_deploy.sh` script. It will build the frontend and backend and deploy them to the local KVM device. Run `./dev_deploy.sh --help` for more information. For quick device development, use the `./dev_deploy.sh` script. It will build the frontend and backend and deploy them to the local KVM device. Run `./dev_deploy.sh --help` for more information.
## Backend ## Backend
The backend is written in Go and is responsible for the KVM device management, the cloud API and the cloud web. The backend is written in Go and is responsible for KVM device management, audio/video streaming, the cloud API, and the cloud web. **Audio is now captured and encoded in-process using ALSA and Opus via CGO, with no external processes or IPC.**
## Frontend ## Frontend
The frontend is written in React and TypeScript and is served by the KVM device. It has three build targets: `device`, `development` and `production`. Development is used for development of the cloud version on your local machine, device is used for building the frontend for the KVM device and production is used for building the frontend for the cloud. The frontend is written in React and TypeScript and is served by the KVM device. It has three build targets: `device`, `development`, and `production`. Development is used for the cloud version on your local machine, device is used for building the frontend for the KVM device, and production is used for building the frontend for the cloud.

View File

@ -39,7 +39,8 @@ const (
// should be lower than the websocket response timeout set in cloud-api // should be lower than the websocket response timeout set in cloud-api
CloudOidcRequestTimeout = 10 * time.Second CloudOidcRequestTimeout = 10 * time.Second
// WebsocketPingInterval is the interval at which the websocket client sends ping messages to the cloud // WebsocketPingInterval is the interval at which the websocket client sends ping messages to the cloud
WebsocketPingInterval = 15 * time.Second // Increased to 30 seconds for constrained environments to reduce overhead
WebsocketPingInterval = 30 * time.Second
) )
var ( var (
@ -447,35 +448,70 @@ func handleSessionRequest(
} }
} }
session, err := newSession(SessionConfig{ var session *Session
ws: c, var err error
IsCloud: isCloudConnection, var sd string
LocalIP: req.IP,
ICEServers: req.ICEServers,
Logger: scopedLogger,
})
if err != nil {
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
return err
}
sd, err := session.ExchangeOffer(req.Sd) // Check if we have an existing session
if err != nil {
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
return err
}
if currentSession != nil { if currentSession != nil {
scopedLogger.Info().Msg("existing session detected, creating new session and notifying old session")
// Always create a new session when there's an existing one
// This ensures the "otherSessionConnected" prompt is shown
session, err = newSession(SessionConfig{
ws: c,
IsCloud: isCloudConnection,
LocalIP: req.IP,
ICEServers: req.ICEServers,
Logger: scopedLogger,
})
if err != nil {
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
return err
}
sd, err = session.ExchangeOffer(req.Sd)
if err != nil {
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
return err
}
// Notify the old session about the takeover
writeJSONRPCEvent("otherSessionConnected", nil, currentSession) writeJSONRPCEvent("otherSessionConnected", nil, currentSession)
peerConn := currentSession.peerConnection peerConn := currentSession.peerConnection
go func() { go func() {
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
_ = peerConn.Close() _ = peerConn.Close()
}() }()
currentSession = session
scopedLogger.Info().Interface("session", session).Msg("new session created, old session notified")
} else {
// No existing session, create a new one
scopedLogger.Info().Msg("creating new session")
session, err = newSession(SessionConfig{
ws: c,
IsCloud: isCloudConnection,
LocalIP: req.IP,
ICEServers: req.ICEServers,
Logger: scopedLogger,
})
if err != nil {
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
return err
}
sd, err = session.ExchangeOffer(req.Sd)
if err != nil {
_ = wsjson.Write(context.Background(), c, gin.H{"error": err})
return err
}
currentSession = session
cloudLogger.Info().Interface("session", session).Msg("new session accepted")
cloudLogger.Trace().Interface("session", session).Msg("new session accepted")
} }
cloudLogger.Info().Interface("session", session).Msg("new session accepted")
cloudLogger.Trace().Interface("session", session).Msg("new session accepted")
currentSession = session
_ = wsjson.Write(context.Background(), c, gin.H{"type": "answer", "data": sd}) _ = wsjson.Write(context.Background(), c, gin.H{"type": "answer", "data": sd})
return nil return nil
} }

View File

@ -137,6 +137,7 @@ var defaultConfig = &Config{
RelativeMouse: true, RelativeMouse: true,
Keyboard: true, Keyboard: true,
MassStorage: true, MassStorage: true,
Audio: true,
}, },
NetworkConfig: &network.NetworkConfig{}, NetworkConfig: &network.NetworkConfig{},
DefaultLogLevel: "INFO", DefaultLogLevel: "INFO",

View File

@ -180,8 +180,17 @@ set -e
# Set the library path to include the directory where librockit.so is located # Set the library path to include the directory where librockit.so is located
export LD_LIBRARY_PATH=/oem/usr/lib:\$LD_LIBRARY_PATH export LD_LIBRARY_PATH=/oem/usr/lib:\$LD_LIBRARY_PATH
# Check if production jetkvm_app is running and save its state
PROD_APP_RUNNING=false
if pgrep -f "/userdata/jetkvm/bin/jetkvm_app" > /dev/null; then
PROD_APP_RUNNING=true
echo "Production jetkvm_app is running, will restore after development session"
else
echo "No production jetkvm_app detected"
fi
# Kill any existing instances of the application # Kill any existing instances of the application
killall jetkvm_app || true pkill -f "/userdata/jetkvm/bin/jetkvm_app" || true
killall jetkvm_app_debug || true killall jetkvm_app_debug || true
# Navigate to the directory where the binary will be stored # Navigate to the directory where the binary will be stored
@ -190,7 +199,29 @@ cd "${REMOTE_PATH}"
# Make the new binary executable # Make the new binary executable
chmod +x jetkvm_app_debug chmod +x jetkvm_app_debug
# Run the application in the background # Create a cleanup script that will restore the production app
cat > /tmp/restore_jetkvm.sh << RESTORE_EOF
#!/bin/ash
set -e
export LD_LIBRARY_PATH=/oem/usr/lib:\$LD_LIBRARY_PATH
cd ${REMOTE_PATH}
if [ "$PROD_APP_RUNNING" = "true" ]; then
echo "Restoring production jetkvm_app..."
killall jetkvm_app_debug || true
nohup /userdata/jetkvm/bin/jetkvm_app > /tmp/jetkvm_app.log 2>&1 &
echo "Production jetkvm_app restored"
else
echo "No production app was running before, not restoring"
fi
RESTORE_EOF
chmod +x /tmp/restore_jetkvm.sh
# Set up signal handler to restore production app on exit
trap '/tmp/restore_jetkvm.sh' EXIT INT TERM
# Run the application in the foreground
echo "Starting development jetkvm_app_debug..."
PION_LOG_TRACE=${LOG_TRACE_SCOPES} ./jetkvm_app_debug | tee -a /tmp/jetkvm_app_debug.log PION_LOG_TRACE=${LOG_TRACE_SCOPES} ./jetkvm_app_debug | tee -a /tmp/jetkvm_app_debug.log
EOF EOF
fi fi

View File

@ -372,11 +372,8 @@ func startBacklightTickers() {
dimTicker = time.NewTicker(time.Duration(config.DisplayDimAfterSec) * time.Second) dimTicker = time.NewTicker(time.Duration(config.DisplayDimAfterSec) * time.Second)
go func() { go func() {
for { //nolint:staticcheck for range dimTicker.C {
select { tick_displayDim()
case <-dimTicker.C:
tick_displayDim()
}
} }
}() }()
} }
@ -386,11 +383,8 @@ func startBacklightTickers() {
offTicker = time.NewTicker(time.Duration(config.DisplayOffAfterSec) * time.Second) offTicker = time.NewTicker(time.Duration(config.DisplayOffAfterSec) * time.Second)
go func() { go func() {
for { //nolint:staticcheck for range offTicker.C {
select { tick_displayOff()
case <-offTicker.C:
tick_displayOff()
}
} }
}() }()
} }

217
input_rpc.go Normal file
View File

@ -0,0 +1,217 @@
package kvm
import (
"fmt"
)
// Constants for input validation
const (
// MaxKeyboardKeys defines the maximum number of simultaneous key presses
// This matches the USB HID keyboard report specification
MaxKeyboardKeys = 6
)
// Input RPC Direct Handlers
// This module provides optimized direct handlers for high-frequency input events,
// bypassing the reflection-based RPC system for improved performance.
//
// Performance benefits:
// - Eliminates reflection overhead (~2-3ms per call)
// - Reduces memory allocations
// - Optimizes parameter parsing and validation
// - Provides faster code path for input methods
//
// The handlers maintain full compatibility with existing RPC interface
// while providing significant latency improvements for input events.
// Common validation helpers for parameter parsing
// These reduce code duplication and provide consistent error messages
// validateFloat64Param extracts and validates a float64 parameter from the params map
func validateFloat64Param(params map[string]interface{}, paramName, methodName string, min, max float64) (float64, error) {
value, ok := params[paramName].(float64)
if !ok {
return 0, fmt.Errorf("%s: %s parameter must be a number, got %T", methodName, paramName, params[paramName])
}
if value < min || value > max {
return 0, fmt.Errorf("%s: %s value %v out of range [%v to %v]", methodName, paramName, value, min, max)
}
return value, nil
}
// validateKeysArray extracts and validates a keys array parameter
func validateKeysArray(params map[string]interface{}, methodName string) ([]uint8, error) {
keysInterface, ok := params["keys"].([]interface{})
if !ok {
return nil, fmt.Errorf("%s: keys parameter must be an array, got %T", methodName, params["keys"])
}
if len(keysInterface) > MaxKeyboardKeys {
return nil, fmt.Errorf("%s: too many keys (%d), maximum is %d", methodName, len(keysInterface), MaxKeyboardKeys)
}
keys := make([]uint8, len(keysInterface))
for i, keyInterface := range keysInterface {
keyFloat, ok := keyInterface.(float64)
if !ok {
return nil, fmt.Errorf("%s: key at index %d must be a number, got %T", methodName, i, keyInterface)
}
if keyFloat < 0 || keyFloat > 255 {
return nil, fmt.Errorf("%s: key at index %d value %v out of range [0-255]", methodName, i, keyFloat)
}
keys[i] = uint8(keyFloat)
}
return keys, nil
}
// Input parameter structures for direct RPC handlers
// These mirror the original RPC method signatures but provide
// optimized parsing from JSON map parameters.
// KeyboardReportParams represents parameters for keyboard HID report
// Matches rpcKeyboardReport(modifier uint8, keys []uint8)
type KeyboardReportParams struct {
Modifier uint8 `json:"modifier"` // Keyboard modifier keys (Ctrl, Alt, Shift, etc.)
Keys []uint8 `json:"keys"` // Array of pressed key codes (up to 6 keys)
}
// AbsMouseReportParams represents parameters for absolute mouse positioning
// Matches rpcAbsMouseReport(x, y int, buttons uint8)
type AbsMouseReportParams struct {
X int `json:"x"` // Absolute X coordinate (0-32767)
Y int `json:"y"` // Absolute Y coordinate (0-32767)
Buttons uint8 `json:"buttons"` // Mouse button state bitmask
}
// RelMouseReportParams represents parameters for relative mouse movement
// Matches rpcRelMouseReport(dx, dy int8, buttons uint8)
type RelMouseReportParams struct {
Dx int8 `json:"dx"` // Relative X movement delta (-127 to +127)
Dy int8 `json:"dy"` // Relative Y movement delta (-127 to +127)
Buttons uint8 `json:"buttons"` // Mouse button state bitmask
}
// WheelReportParams represents parameters for mouse wheel events
// Matches rpcWheelReport(wheelY int8)
type WheelReportParams struct {
WheelY int8 `json:"wheelY"` // Wheel scroll delta (-127 to +127)
}
// Direct handler for keyboard reports
// Optimized path that bypasses reflection for keyboard input events
func handleKeyboardReportDirect(params map[string]interface{}) (interface{}, error) {
// Extract and validate modifier parameter
modifierFloat, err := validateFloat64Param(params, "modifier", "keyboardReport", 0, 255)
if err != nil {
return nil, err
}
modifier := uint8(modifierFloat)
// Extract and validate keys array
keys, err := validateKeysArray(params, "keyboardReport")
if err != nil {
return nil, err
}
return nil, rpcKeyboardReport(modifier, keys)
}
// Direct handler for absolute mouse reports
// Optimized path that bypasses reflection for absolute mouse positioning
func handleAbsMouseReportDirect(params map[string]interface{}) (interface{}, error) {
// Extract and validate x coordinate
xFloat, err := validateFloat64Param(params, "x", "absMouseReport", 0, 32767)
if err != nil {
return nil, err
}
x := int(xFloat)
// Extract and validate y coordinate
yFloat, err := validateFloat64Param(params, "y", "absMouseReport", 0, 32767)
if err != nil {
return nil, err
}
y := int(yFloat)
// Extract and validate buttons
buttonsFloat, err := validateFloat64Param(params, "buttons", "absMouseReport", 0, 255)
if err != nil {
return nil, err
}
buttons := uint8(buttonsFloat)
return nil, rpcAbsMouseReport(x, y, buttons)
}
// Direct handler for relative mouse reports
// Optimized path that bypasses reflection for relative mouse movement
func handleRelMouseReportDirect(params map[string]interface{}) (interface{}, error) {
// Extract and validate dx (relative X movement)
dxFloat, err := validateFloat64Param(params, "dx", "relMouseReport", -127, 127)
if err != nil {
return nil, err
}
dx := int8(dxFloat)
// Extract and validate dy (relative Y movement)
dyFloat, err := validateFloat64Param(params, "dy", "relMouseReport", -127, 127)
if err != nil {
return nil, err
}
dy := int8(dyFloat)
// Extract and validate buttons
buttonsFloat, err := validateFloat64Param(params, "buttons", "relMouseReport", 0, 255)
if err != nil {
return nil, err
}
buttons := uint8(buttonsFloat)
return nil, rpcRelMouseReport(dx, dy, buttons)
}
// Direct handler for wheel reports
// Optimized path that bypasses reflection for mouse wheel events
func handleWheelReportDirect(params map[string]interface{}) (interface{}, error) {
// Extract and validate wheelY (scroll delta)
wheelYFloat, err := validateFloat64Param(params, "wheelY", "wheelReport", -127, 127)
if err != nil {
return nil, err
}
wheelY := int8(wheelYFloat)
return nil, rpcWheelReport(wheelY)
}
// handleInputRPCDirect routes input method calls to their optimized direct handlers
// This is the main entry point for the fast path that bypasses reflection.
// It provides significant performance improvements for high-frequency input events.
//
// Performance monitoring: Consider adding metrics collection here to track
// latency improvements and call frequency for production monitoring.
func handleInputRPCDirect(method string, params map[string]interface{}) (interface{}, error) {
switch method {
case "keyboardReport":
return handleKeyboardReportDirect(params)
case "absMouseReport":
return handleAbsMouseReportDirect(params)
case "relMouseReport":
return handleRelMouseReportDirect(params)
case "wheelReport":
return handleWheelReportDirect(params)
default:
// This should never happen if isInputMethod is correctly implemented
return nil, fmt.Errorf("handleInputRPCDirect: unsupported method '%s'", method)
}
}
// isInputMethod determines if a given RPC method should use the optimized direct path
// Returns true for input-related methods that have direct handlers implemented.
// This function must be kept in sync with handleInputRPCDirect.
func isInputMethod(method string) bool {
switch method {
case "keyboardReport", "absMouseReport", "relMouseReport", "wheelReport":
return true
default:
return false
}
}

560
input_rpc_test.go Normal file
View File

@ -0,0 +1,560 @@
package kvm
import (
"testing"
"github.com/stretchr/testify/assert"
)
// Test validateFloat64Param function
func TestValidateFloat64Param(t *testing.T) {
tests := []struct {
name string
params map[string]interface{}
paramName string
methodName string
min float64
max float64
expected float64
expectError bool
}{
{
name: "valid parameter",
params: map[string]interface{}{"test": 50.0},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 50.0,
expectError: false,
},
{
name: "parameter at minimum boundary",
params: map[string]interface{}{"test": 0.0},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 0.0,
expectError: false,
},
{
name: "parameter at maximum boundary",
params: map[string]interface{}{"test": 100.0},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 100.0,
expectError: false,
},
{
name: "parameter below minimum",
params: map[string]interface{}{"test": -1.0},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 0,
expectError: true,
},
{
name: "parameter above maximum",
params: map[string]interface{}{"test": 101.0},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 0,
expectError: true,
},
{
name: "wrong parameter type",
params: map[string]interface{}{"test": "not a number"},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 0,
expectError: true,
},
{
name: "missing parameter",
params: map[string]interface{}{},
paramName: "test",
methodName: "testMethod",
min: 0,
max: 100,
expected: 0,
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := validateFloat64Param(tt.params, tt.paramName, tt.methodName, tt.min, tt.max)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expected, result)
}
})
}
}
// Test validateKeysArray function
func TestValidateKeysArray(t *testing.T) {
tests := []struct {
name string
params map[string]interface{}
methodName string
expected []uint8
expectError bool
}{
{
name: "valid keys array",
params: map[string]interface{}{"keys": []interface{}{65.0, 66.0, 67.0}},
methodName: "testMethod",
expected: []uint8{65, 66, 67},
expectError: false,
},
{
name: "empty keys array",
params: map[string]interface{}{"keys": []interface{}{}},
methodName: "testMethod",
expected: []uint8{},
expectError: false,
},
{
name: "maximum keys array",
params: map[string]interface{}{"keys": []interface{}{1.0, 2.0, 3.0, 4.0, 5.0, 6.0}},
methodName: "testMethod",
expected: []uint8{1, 2, 3, 4, 5, 6},
expectError: false,
},
{
name: "too many keys",
params: map[string]interface{}{"keys": []interface{}{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}},
methodName: "testMethod",
expected: nil,
expectError: true,
},
{
name: "invalid key type",
params: map[string]interface{}{"keys": []interface{}{"not a number"}},
methodName: "testMethod",
expected: nil,
expectError: true,
},
{
name: "key value out of range (negative)",
params: map[string]interface{}{"keys": []interface{}{-1.0}},
methodName: "testMethod",
expected: nil,
expectError: true,
},
{
name: "key value out of range (too high)",
params: map[string]interface{}{"keys": []interface{}{256.0}},
methodName: "testMethod",
expected: nil,
expectError: true,
},
{
name: "wrong parameter type",
params: map[string]interface{}{"keys": "not an array"},
methodName: "testMethod",
expected: nil,
expectError: true,
},
{
name: "missing keys parameter",
params: map[string]interface{}{},
methodName: "testMethod",
expected: nil,
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result, err := validateKeysArray(tt.params, tt.methodName)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
assert.Equal(t, tt.expected, result)
}
})
}
}
// Test handleKeyboardReportDirect function
func TestHandleKeyboardReportDirect(t *testing.T) {
tests := []struct {
name string
params map[string]interface{}
expectError bool
}{
{
name: "valid keyboard report",
params: map[string]interface{}{
"modifier": 2.0, // Shift key
"keys": []interface{}{65.0, 66.0}, // A, B keys
},
expectError: false,
},
{
name: "empty keys array",
params: map[string]interface{}{
"modifier": 0.0,
"keys": []interface{}{},
},
expectError: false,
},
{
name: "invalid modifier",
params: map[string]interface{}{
"modifier": 256.0, // Out of range
"keys": []interface{}{65.0},
},
expectError: true,
},
{
name: "invalid keys",
params: map[string]interface{}{
"modifier": 0.0,
"keys": []interface{}{1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0}, // Too many keys
},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := handleKeyboardReportDirect(tt.params)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
// Test handleAbsMouseReportDirect function
func TestHandleAbsMouseReportDirect(t *testing.T) {
tests := []struct {
name string
params map[string]interface{}
expectError bool
}{
{
name: "valid absolute mouse report",
params: map[string]interface{}{
"x": 1000.0,
"y": 500.0,
"buttons": 1.0, // Left button
},
expectError: false,
},
{
name: "boundary values",
params: map[string]interface{}{
"x": 0.0,
"y": 32767.0,
"buttons": 255.0,
},
expectError: false,
},
{
name: "invalid x coordinate",
params: map[string]interface{}{
"x": -1.0, // Out of range
"y": 500.0,
"buttons": 0.0,
},
expectError: true,
},
{
name: "invalid y coordinate",
params: map[string]interface{}{
"x": 1000.0,
"y": 32768.0, // Out of range
"buttons": 0.0,
},
expectError: true,
},
{
name: "invalid buttons",
params: map[string]interface{}{
"x": 1000.0,
"y": 500.0,
"buttons": 256.0, // Out of range
},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := handleAbsMouseReportDirect(tt.params)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
// Test handleRelMouseReportDirect function
func TestHandleRelMouseReportDirect(t *testing.T) {
tests := []struct {
name string
params map[string]interface{}
expectError bool
}{
{
name: "valid relative mouse report",
params: map[string]interface{}{
"dx": 10.0,
"dy": -5.0,
"buttons": 2.0, // Right button
},
expectError: false,
},
{
name: "boundary values",
params: map[string]interface{}{
"dx": -127.0,
"dy": 127.0,
"buttons": 0.0,
},
expectError: false,
},
{
name: "invalid dx",
params: map[string]interface{}{
"dx": -128.0, // Out of range
"dy": 0.0,
"buttons": 0.0,
},
expectError: true,
},
{
name: "invalid dy",
params: map[string]interface{}{
"dx": 0.0,
"dy": 128.0, // Out of range
"buttons": 0.0,
},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := handleRelMouseReportDirect(tt.params)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
// Test handleWheelReportDirect function
func TestHandleWheelReportDirect(t *testing.T) {
tests := []struct {
name string
params map[string]interface{}
expectError bool
}{
{
name: "valid wheel report",
params: map[string]interface{}{
"wheelY": 3.0,
},
expectError: false,
},
{
name: "boundary values",
params: map[string]interface{}{
"wheelY": -127.0,
},
expectError: false,
},
{
name: "invalid wheelY",
params: map[string]interface{}{
"wheelY": 128.0, // Out of range
},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := handleWheelReportDirect(tt.params)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
// Test handleInputRPCDirect function
func TestHandleInputRPCDirect(t *testing.T) {
tests := []struct {
name string
method string
params map[string]interface{}
expectError bool
}{
{
name: "keyboard report",
method: "keyboardReport",
params: map[string]interface{}{
"modifier": 0.0,
"keys": []interface{}{65.0},
},
expectError: false,
},
{
name: "absolute mouse report",
method: "absMouseReport",
params: map[string]interface{}{
"x": 1000.0,
"y": 500.0,
"buttons": 1.0,
},
expectError: false,
},
{
name: "relative mouse report",
method: "relMouseReport",
params: map[string]interface{}{
"dx": 10.0,
"dy": -5.0,
"buttons": 2.0,
},
expectError: false,
},
{
name: "wheel report",
method: "wheelReport",
params: map[string]interface{}{
"wheelY": 3.0,
},
expectError: false,
},
{
name: "unknown method",
method: "unknownMethod",
params: map[string]interface{}{},
expectError: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := handleInputRPCDirect(tt.method, tt.params)
if tt.expectError {
assert.Error(t, err)
} else {
assert.NoError(t, err)
}
})
}
}
// Test isInputMethod function
func TestIsInputMethod(t *testing.T) {
tests := []struct {
name string
method string
expected bool
}{
{
name: "keyboard report method",
method: "keyboardReport",
expected: true,
},
{
name: "absolute mouse report method",
method: "absMouseReport",
expected: true,
},
{
name: "relative mouse report method",
method: "relMouseReport",
expected: true,
},
{
name: "wheel report method",
method: "wheelReport",
expected: true,
},
{
name: "non-input method",
method: "someOtherMethod",
expected: false,
},
{
name: "empty method",
method: "",
expected: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
result := isInputMethod(tt.method)
assert.Equal(t, tt.expected, result)
})
}
}
// Benchmark tests to verify performance improvements
func BenchmarkValidateFloat64Param(b *testing.B) {
params := map[string]interface{}{"test": 50.0}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = validateFloat64Param(params, "test", "benchmarkMethod", 0, 100)
}
}
func BenchmarkValidateKeysArray(b *testing.B) {
params := map[string]interface{}{"keys": []interface{}{65.0, 66.0, 67.0}}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = validateKeysArray(params, "benchmarkMethod")
}
}
func BenchmarkHandleKeyboardReportDirect(b *testing.B) {
params := map[string]interface{}{
"modifier": 2.0,
"keys": []interface{}{65.0, 66.0},
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = handleKeyboardReportDirect(params)
}
}
func BenchmarkHandleInputRPCDirect(b *testing.B) {
params := map[string]interface{}{
"modifier": 2.0,
"keys": []interface{}{65.0, 66.0},
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = handleInputRPCDirect("keyboardReport", params)
}
}

13
internal/audio/api.go Normal file
View File

@ -0,0 +1,13 @@
package audio
// StartAudioStreaming launches the in-process audio stream and delivers Opus frames to the provided callback.
// This is now a wrapper around the non-blocking audio implementation for backward compatibility.
func StartAudioStreaming(send func([]byte)) error {
return StartNonBlockingAudioStreaming(send)
}
// StopAudioStreaming stops the in-process audio stream.
// This is now a wrapper around the non-blocking audio implementation for backward compatibility.
func StopAudioStreaming() {
StopNonBlockingAudioStreaming()
}

185
internal/audio/audio.go Normal file
View File

@ -0,0 +1,185 @@
package audio
import (
"errors"
"sync/atomic"
"time"
// Explicit import for CGO audio stream glue
)
var (
ErrAudioAlreadyRunning = errors.New("audio already running")
)
const MaxAudioFrameSize = 1500
// AudioQuality represents different audio quality presets
type AudioQuality int
const (
AudioQualityLow AudioQuality = iota
AudioQualityMedium
AudioQualityHigh
AudioQualityUltra
)
// AudioConfig holds configuration for audio processing
type AudioConfig struct {
Quality AudioQuality
Bitrate int // kbps
SampleRate int // Hz
Channels int
FrameSize time.Duration // ms
}
// AudioMetrics tracks audio performance metrics
// Note: 64-bit fields must be first for proper alignment on 32-bit ARM
type AudioMetrics struct {
FramesReceived int64
FramesDropped int64
BytesProcessed int64
ConnectionDrops int64
LastFrameTime time.Time
AverageLatency time.Duration
}
var (
currentConfig = AudioConfig{
Quality: AudioQualityMedium,
Bitrate: 64,
SampleRate: 48000,
Channels: 2,
FrameSize: 20 * time.Millisecond,
}
currentMicrophoneConfig = AudioConfig{
Quality: AudioQualityMedium,
Bitrate: 32,
SampleRate: 48000,
Channels: 1,
FrameSize: 20 * time.Millisecond,
}
metrics AudioMetrics
)
// GetAudioQualityPresets returns predefined quality configurations
func GetAudioQualityPresets() map[AudioQuality]AudioConfig {
return map[AudioQuality]AudioConfig{
AudioQualityLow: {
Quality: AudioQualityLow,
Bitrate: 32,
SampleRate: 22050,
Channels: 1,
FrameSize: 40 * time.Millisecond,
},
AudioQualityMedium: {
Quality: AudioQualityMedium,
Bitrate: 64,
SampleRate: 44100,
Channels: 2,
FrameSize: 20 * time.Millisecond,
},
AudioQualityHigh: {
Quality: AudioQualityHigh,
Bitrate: 128,
SampleRate: 48000,
Channels: 2,
FrameSize: 20 * time.Millisecond,
},
AudioQualityUltra: {
Quality: AudioQualityUltra,
Bitrate: 192,
SampleRate: 48000,
Channels: 2,
FrameSize: 10 * time.Millisecond,
},
}
}
// GetMicrophoneQualityPresets returns predefined quality configurations for microphone input
func GetMicrophoneQualityPresets() map[AudioQuality]AudioConfig {
return map[AudioQuality]AudioConfig{
AudioQualityLow: {
Quality: AudioQualityLow,
Bitrate: 16,
SampleRate: 16000,
Channels: 1,
FrameSize: 40 * time.Millisecond,
},
AudioQualityMedium: {
Quality: AudioQualityMedium,
Bitrate: 32,
SampleRate: 22050,
Channels: 1,
FrameSize: 20 * time.Millisecond,
},
AudioQualityHigh: {
Quality: AudioQualityHigh,
Bitrate: 64,
SampleRate: 44100,
Channels: 1,
FrameSize: 20 * time.Millisecond,
},
AudioQualityUltra: {
Quality: AudioQualityUltra,
Bitrate: 96,
SampleRate: 48000,
Channels: 1,
FrameSize: 10 * time.Millisecond,
},
}
}
// SetAudioQuality updates the current audio quality configuration
func SetAudioQuality(quality AudioQuality) {
presets := GetAudioQualityPresets()
if config, exists := presets[quality]; exists {
currentConfig = config
}
}
// GetAudioConfig returns the current audio configuration
func GetAudioConfig() AudioConfig {
return currentConfig
}
// SetMicrophoneQuality updates the current microphone quality configuration
func SetMicrophoneQuality(quality AudioQuality) {
presets := GetMicrophoneQualityPresets()
if config, exists := presets[quality]; exists {
currentMicrophoneConfig = config
}
}
// GetMicrophoneConfig returns the current microphone configuration
func GetMicrophoneConfig() AudioConfig {
return currentMicrophoneConfig
}
// GetAudioMetrics returns current audio metrics
func GetAudioMetrics() AudioMetrics {
return AudioMetrics{
FramesReceived: atomic.LoadInt64(&metrics.FramesReceived),
FramesDropped: atomic.LoadInt64(&metrics.FramesDropped),
BytesProcessed: atomic.LoadInt64(&metrics.BytesProcessed),
LastFrameTime: metrics.LastFrameTime,
ConnectionDrops: atomic.LoadInt64(&metrics.ConnectionDrops),
AverageLatency: metrics.AverageLatency,
}
}
// RecordFrameReceived increments the frames received counter
func RecordFrameReceived(bytes int) {
atomic.AddInt64(&metrics.FramesReceived, 1)
atomic.AddInt64(&metrics.BytesProcessed, int64(bytes))
metrics.LastFrameTime = time.Now()
}
// RecordFrameDropped increments the frames dropped counter
func RecordFrameDropped() {
atomic.AddInt64(&metrics.FramesDropped, 1)
}
// RecordConnectionDrop increments the connection drops counter
func RecordConnectionDrop() {
atomic.AddInt64(&metrics.ConnectionDrops, 1)
}

View File

@ -0,0 +1,26 @@
package audio
import (
"sync"
"github.com/jetkvm/kvm/internal/logging"
)
var audioMuteState struct {
muted bool
mu sync.RWMutex
}
func SetAudioMuted(muted bool) {
audioMuteState.mu.Lock()
prev := audioMuteState.muted
audioMuteState.muted = muted
logging.GetDefaultLogger().Info().Str("component", "audio").Msgf("SetAudioMuted: prev=%v, new=%v", prev, muted)
audioMuteState.mu.Unlock()
}
func IsAudioMuted() bool {
audioMuteState.mu.RLock()
defer audioMuteState.mu.RUnlock()
return audioMuteState.muted
}

View File

@ -0,0 +1,455 @@
//go:build cgo
package audio
import (
"context"
"runtime"
"sync"
"sync/atomic"
"time"
"unsafe"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// BatchAudioProcessor manages batched CGO operations to reduce syscall overhead
type BatchAudioProcessor struct {
// Statistics - MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
stats BatchAudioStats
// Control
ctx context.Context
cancel context.CancelFunc
logger *zerolog.Logger
batchSize int
batchDuration time.Duration
// Batch queues and state (atomic for lock-free access)
readQueue chan batchReadRequest
writeQueue chan batchWriteRequest
initialized int32
running int32
threadPinned int32
// Buffers (pre-allocated to avoid allocation overhead)
readBufPool *sync.Pool
writeBufPool *sync.Pool
}
type BatchAudioStats struct {
// int64 fields MUST be first for ARM32 alignment
BatchedReads int64
BatchedWrites int64
SingleReads int64
SingleWrites int64
BatchedFrames int64
SingleFrames int64
CGOCallsReduced int64
OSThreadPinTime time.Duration // time.Duration is int64 internally
LastBatchTime time.Time
}
type batchReadRequest struct {
buffer []byte
resultChan chan batchReadResult
timestamp time.Time
}
type batchWriteRequest struct {
buffer []byte
resultChan chan batchWriteResult
timestamp time.Time
}
type batchReadResult struct {
length int
err error
}
type batchWriteResult struct {
written int
err error
}
// NewBatchAudioProcessor creates a new batch audio processor
func NewBatchAudioProcessor(batchSize int, batchDuration time.Duration) *BatchAudioProcessor {
ctx, cancel := context.WithCancel(context.Background())
logger := logging.GetDefaultLogger().With().Str("component", "batch-audio").Logger()
processor := &BatchAudioProcessor{
ctx: ctx,
cancel: cancel,
logger: &logger,
batchSize: batchSize,
batchDuration: batchDuration,
readQueue: make(chan batchReadRequest, batchSize*2),
writeQueue: make(chan batchWriteRequest, batchSize*2),
readBufPool: &sync.Pool{
New: func() interface{} {
return make([]byte, 1500) // Max audio frame size
},
},
writeBufPool: &sync.Pool{
New: func() interface{} {
return make([]byte, 4096) // Max write buffer size
},
},
}
return processor
}
// Start initializes and starts the batch processor
func (bap *BatchAudioProcessor) Start() error {
if !atomic.CompareAndSwapInt32(&bap.running, 0, 1) {
return nil // Already running
}
// Initialize CGO resources once per processor lifecycle
if !atomic.CompareAndSwapInt32(&bap.initialized, 0, 1) {
return nil // Already initialized
}
// Start batch processing goroutines
go bap.batchReadProcessor()
go bap.batchWriteProcessor()
bap.logger.Info().Int("batch_size", bap.batchSize).
Dur("batch_duration", bap.batchDuration).
Msg("batch audio processor started")
return nil
}
// Stop cleanly shuts down the batch processor
func (bap *BatchAudioProcessor) Stop() {
if !atomic.CompareAndSwapInt32(&bap.running, 1, 0) {
return // Already stopped
}
bap.cancel()
// Wait for processing to complete
time.Sleep(bap.batchDuration + 10*time.Millisecond)
bap.logger.Info().Msg("batch audio processor stopped")
}
// BatchReadEncode performs batched audio read and encode operations
func (bap *BatchAudioProcessor) BatchReadEncode(buffer []byte) (int, error) {
if atomic.LoadInt32(&bap.running) == 0 {
// Fallback to single operation if batch processor is not running
atomic.AddInt64(&bap.stats.SingleReads, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioReadEncode(buffer)
}
resultChan := make(chan batchReadResult, 1)
request := batchReadRequest{
buffer: buffer,
resultChan: resultChan,
timestamp: time.Now(),
}
select {
case bap.readQueue <- request:
// Successfully queued
case <-time.After(5 * time.Millisecond):
// Queue is full or blocked, fallback to single operation
atomic.AddInt64(&bap.stats.SingleReads, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioReadEncode(buffer)
}
// Wait for result
select {
case result := <-resultChan:
return result.length, result.err
case <-time.After(50 * time.Millisecond):
// Timeout, fallback to single operation
atomic.AddInt64(&bap.stats.SingleReads, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioReadEncode(buffer)
}
}
// BatchDecodeWrite performs batched audio decode and write operations
func (bap *BatchAudioProcessor) BatchDecodeWrite(buffer []byte) (int, error) {
if atomic.LoadInt32(&bap.running) == 0 {
// Fallback to single operation if batch processor is not running
atomic.AddInt64(&bap.stats.SingleWrites, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioDecodeWrite(buffer)
}
resultChan := make(chan batchWriteResult, 1)
request := batchWriteRequest{
buffer: buffer,
resultChan: resultChan,
timestamp: time.Now(),
}
select {
case bap.writeQueue <- request:
// Successfully queued
case <-time.After(5 * time.Millisecond):
// Queue is full or blocked, fallback to single operation
atomic.AddInt64(&bap.stats.SingleWrites, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioDecodeWrite(buffer)
}
// Wait for result
select {
case result := <-resultChan:
return result.written, result.err
case <-time.After(50 * time.Millisecond):
// Timeout, fallback to single operation
atomic.AddInt64(&bap.stats.SingleWrites, 1)
atomic.AddInt64(&bap.stats.SingleFrames, 1)
return CGOAudioDecodeWrite(buffer)
}
}
// batchReadProcessor processes batched read operations
func (bap *BatchAudioProcessor) batchReadProcessor() {
defer bap.logger.Debug().Msg("batch read processor stopped")
ticker := time.NewTicker(bap.batchDuration)
defer ticker.Stop()
var batch []batchReadRequest
batch = make([]batchReadRequest, 0, bap.batchSize)
for atomic.LoadInt32(&bap.running) == 1 {
select {
case <-bap.ctx.Done():
return
case req := <-bap.readQueue:
batch = append(batch, req)
if len(batch) >= bap.batchSize {
bap.processBatchRead(batch)
batch = batch[:0] // Clear slice but keep capacity
}
case <-ticker.C:
if len(batch) > 0 {
bap.processBatchRead(batch)
batch = batch[:0] // Clear slice but keep capacity
}
}
}
// Process any remaining requests
if len(batch) > 0 {
bap.processBatchRead(batch)
}
}
// batchWriteProcessor processes batched write operations
func (bap *BatchAudioProcessor) batchWriteProcessor() {
defer bap.logger.Debug().Msg("batch write processor stopped")
ticker := time.NewTicker(bap.batchDuration)
defer ticker.Stop()
var batch []batchWriteRequest
batch = make([]batchWriteRequest, 0, bap.batchSize)
for atomic.LoadInt32(&bap.running) == 1 {
select {
case <-bap.ctx.Done():
return
case req := <-bap.writeQueue:
batch = append(batch, req)
if len(batch) >= bap.batchSize {
bap.processBatchWrite(batch)
batch = batch[:0] // Clear slice but keep capacity
}
case <-ticker.C:
if len(batch) > 0 {
bap.processBatchWrite(batch)
batch = batch[:0] // Clear slice but keep capacity
}
}
}
// Process any remaining requests
if len(batch) > 0 {
bap.processBatchWrite(batch)
}
}
// processBatchRead processes a batch of read requests efficiently
func (bap *BatchAudioProcessor) processBatchRead(batch []batchReadRequest) {
if len(batch) == 0 {
return
}
// Pin to OS thread for the entire batch to minimize thread switching overhead
start := time.Now()
if atomic.CompareAndSwapInt32(&bap.threadPinned, 0, 1) {
runtime.LockOSThread()
defer func() {
runtime.UnlockOSThread()
atomic.StoreInt32(&bap.threadPinned, 0)
bap.stats.OSThreadPinTime += time.Since(start)
}()
}
batchSize := len(batch)
atomic.AddInt64(&bap.stats.BatchedReads, 1)
atomic.AddInt64(&bap.stats.BatchedFrames, int64(batchSize))
if batchSize > 1 {
atomic.AddInt64(&bap.stats.CGOCallsReduced, int64(batchSize-1))
}
// Process each request in the batch
for _, req := range batch {
length, err := CGOAudioReadEncode(req.buffer)
result := batchReadResult{
length: length,
err: err,
}
// Send result back (non-blocking)
select {
case req.resultChan <- result:
default:
// Requestor timed out, drop result
}
}
bap.stats.LastBatchTime = time.Now()
}
// processBatchWrite processes a batch of write requests efficiently
func (bap *BatchAudioProcessor) processBatchWrite(batch []batchWriteRequest) {
if len(batch) == 0 {
return
}
// Pin to OS thread for the entire batch to minimize thread switching overhead
start := time.Now()
if atomic.CompareAndSwapInt32(&bap.threadPinned, 0, 1) {
runtime.LockOSThread()
defer func() {
runtime.UnlockOSThread()
atomic.StoreInt32(&bap.threadPinned, 0)
bap.stats.OSThreadPinTime += time.Since(start)
}()
}
batchSize := len(batch)
atomic.AddInt64(&bap.stats.BatchedWrites, 1)
atomic.AddInt64(&bap.stats.BatchedFrames, int64(batchSize))
if batchSize > 1 {
atomic.AddInt64(&bap.stats.CGOCallsReduced, int64(batchSize-1))
}
// Process each request in the batch
for _, req := range batch {
written, err := CGOAudioDecodeWrite(req.buffer)
result := batchWriteResult{
written: written,
err: err,
}
// Send result back (non-blocking)
select {
case req.resultChan <- result:
default:
// Requestor timed out, drop result
}
}
bap.stats.LastBatchTime = time.Now()
}
// GetStats returns current batch processor statistics
func (bap *BatchAudioProcessor) GetStats() BatchAudioStats {
return BatchAudioStats{
BatchedReads: atomic.LoadInt64(&bap.stats.BatchedReads),
BatchedWrites: atomic.LoadInt64(&bap.stats.BatchedWrites),
SingleReads: atomic.LoadInt64(&bap.stats.SingleReads),
SingleWrites: atomic.LoadInt64(&bap.stats.SingleWrites),
BatchedFrames: atomic.LoadInt64(&bap.stats.BatchedFrames),
SingleFrames: atomic.LoadInt64(&bap.stats.SingleFrames),
CGOCallsReduced: atomic.LoadInt64(&bap.stats.CGOCallsReduced),
OSThreadPinTime: bap.stats.OSThreadPinTime,
LastBatchTime: bap.stats.LastBatchTime,
}
}
// IsRunning returns whether the batch processor is running
func (bap *BatchAudioProcessor) IsRunning() bool {
return atomic.LoadInt32(&bap.running) == 1
}
// Global batch processor instance
var (
globalBatchProcessor unsafe.Pointer // *BatchAudioProcessor
batchProcessorInitialized int32
)
// GetBatchAudioProcessor returns the global batch processor instance
func GetBatchAudioProcessor() *BatchAudioProcessor {
ptr := atomic.LoadPointer(&globalBatchProcessor)
if ptr != nil {
return (*BatchAudioProcessor)(ptr)
}
// Initialize on first use
if atomic.CompareAndSwapInt32(&batchProcessorInitialized, 0, 1) {
processor := NewBatchAudioProcessor(4, 5*time.Millisecond) // 4 frames per batch, 5ms timeout
atomic.StorePointer(&globalBatchProcessor, unsafe.Pointer(processor))
return processor
}
// Another goroutine initialized it, try again
ptr = atomic.LoadPointer(&globalBatchProcessor)
if ptr != nil {
return (*BatchAudioProcessor)(ptr)
}
// Fallback: create a new processor (should rarely happen)
return NewBatchAudioProcessor(4, 5*time.Millisecond)
}
// EnableBatchAudioProcessing enables the global batch processor
func EnableBatchAudioProcessing() error {
processor := GetBatchAudioProcessor()
return processor.Start()
}
// DisableBatchAudioProcessing disables the global batch processor
func DisableBatchAudioProcessing() {
ptr := atomic.LoadPointer(&globalBatchProcessor)
if ptr != nil {
processor := (*BatchAudioProcessor)(ptr)
processor.Stop()
}
}
// BatchCGOAudioReadEncode is a batched version of CGOAudioReadEncode
func BatchCGOAudioReadEncode(buffer []byte) (int, error) {
processor := GetBatchAudioProcessor()
if processor != nil && processor.IsRunning() {
return processor.BatchReadEncode(buffer)
}
return CGOAudioReadEncode(buffer)
}
// BatchCGOAudioDecodeWrite is a batched version of CGOAudioDecodeWrite
func BatchCGOAudioDecodeWrite(buffer []byte) (int, error) {
processor := GetBatchAudioProcessor()
if processor != nil && processor.IsRunning() {
return processor.BatchDecodeWrite(buffer)
}
return CGOAudioDecodeWrite(buffer)
}

View File

@ -0,0 +1,64 @@
package audio
import (
"sync"
)
// AudioBufferPool manages reusable audio buffers to reduce allocations
type AudioBufferPool struct {
pool sync.Pool
}
// NewAudioBufferPool creates a new buffer pool for audio frames
func NewAudioBufferPool(bufferSize int) *AudioBufferPool {
return &AudioBufferPool{
pool: sync.Pool{
New: func() interface{} {
// Pre-allocate buffer with specified size
return make([]byte, bufferSize)
},
},
}
}
// Get retrieves a buffer from the pool
func (p *AudioBufferPool) Get() []byte {
return p.pool.Get().([]byte)
}
// Put returns a buffer to the pool
func (p *AudioBufferPool) Put(buf []byte) {
// Reset length but keep capacity for reuse
if cap(buf) >= 1500 { // Only pool buffers of reasonable size
p.pool.Put(buf[:0])
}
}
// Global buffer pools for different audio operations
var (
// Pool for 1500-byte audio frame buffers (Opus max frame size)
audioFramePool = NewAudioBufferPool(1500)
// Pool for smaller control buffers
audioControlPool = NewAudioBufferPool(64)
)
// GetAudioFrameBuffer gets a reusable buffer for audio frames
func GetAudioFrameBuffer() []byte {
return audioFramePool.Get()
}
// PutAudioFrameBuffer returns a buffer to the frame pool
func PutAudioFrameBuffer(buf []byte) {
audioFramePool.Put(buf)
}
// GetAudioControlBuffer gets a reusable buffer for control data
func GetAudioControlBuffer() []byte {
return audioControlPool.Get()
}
// PutAudioControlBuffer returns a buffer to the control pool
func PutAudioControlBuffer(buf []byte) {
audioControlPool.Put(buf)
}

487
internal/audio/cgo_audio.go Normal file
View File

@ -0,0 +1,487 @@
//go:build cgo
package audio
import (
"errors"
"unsafe"
)
/*
#cgo CFLAGS: -I${SRCDIR}/../../tools/alsa-opus-includes
#cgo LDFLAGS: -L$HOME/.jetkvm/audio-libs/alsa-lib-$ALSA_VERSION/src/.libs -lasound -L$HOME/.jetkvm/audio-libs/opus-$OPUS_VERSION/.libs -lopus -lm -ldl -static
#include <alsa/asoundlib.h>
#include <opus.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
// C state for ALSA/Opus with safety flags
static snd_pcm_t *pcm_handle = NULL;
static snd_pcm_t *pcm_playback_handle = NULL;
static OpusEncoder *encoder = NULL;
static OpusDecoder *decoder = NULL;
static int opus_bitrate = 64000;
static int opus_complexity = 5;
static int sample_rate = 48000;
static int channels = 2;
static int frame_size = 960; // 20ms for 48kHz
static int max_packet_size = 1500;
// State tracking to prevent race conditions during rapid start/stop
static volatile int capture_initializing = 0;
static volatile int capture_initialized = 0;
static volatile int playback_initializing = 0;
static volatile int playback_initialized = 0;
// Safe ALSA device opening with retry logic
static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream) {
int attempts = 3;
int err;
while (attempts-- > 0) {
err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK);
if (err >= 0) {
// Switch to blocking mode after successful open
snd_pcm_nonblock(*handle, 0);
return 0;
}
if (err == -EBUSY && attempts > 0) {
// Device busy, wait and retry
usleep(50000); // 50ms
continue;
}
break;
}
return err;
}
// Optimized ALSA configuration with stack allocation and performance tuning
static int configure_alsa_device(snd_pcm_t *handle, const char *device_name) {
snd_pcm_hw_params_t *params;
snd_pcm_sw_params_t *sw_params;
int err;
if (!handle) return -1;
// Use stack allocation for better performance
snd_pcm_hw_params_alloca(&params);
snd_pcm_sw_params_alloca(&sw_params);
// Hardware parameters
err = snd_pcm_hw_params_any(handle, params);
if (err < 0) return err;
err = snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED);
if (err < 0) return err;
err = snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE);
if (err < 0) return err;
err = snd_pcm_hw_params_set_channels(handle, params, channels);
if (err < 0) return err;
// Set exact rate for better performance
err = snd_pcm_hw_params_set_rate(handle, params, sample_rate, 0);
if (err < 0) {
// Fallback to near rate if exact fails
unsigned int rate = sample_rate;
err = snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0);
if (err < 0) return err;
}
// Optimize buffer sizes for low latency
snd_pcm_uframes_t period_size = frame_size;
err = snd_pcm_hw_params_set_period_size_near(handle, params, &period_size, 0);
if (err < 0) return err;
// Set buffer size to 4 periods for good latency/stability balance
snd_pcm_uframes_t buffer_size = period_size * 4;
err = snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size);
if (err < 0) return err;
err = snd_pcm_hw_params(handle, params);
if (err < 0) return err;
// Software parameters for optimal performance
err = snd_pcm_sw_params_current(handle, sw_params);
if (err < 0) return err;
// Start playback/capture when buffer is period_size frames
err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, period_size);
if (err < 0) return err;
// Allow transfers when at least period_size frames are available
err = snd_pcm_sw_params_set_avail_min(handle, sw_params, period_size);
if (err < 0) return err;
err = snd_pcm_sw_params(handle, sw_params);
if (err < 0) return err;
return snd_pcm_prepare(handle);
}
// Initialize ALSA and Opus encoder with improved safety
int jetkvm_audio_init() {
int err;
// Prevent concurrent initialization
if (__sync_bool_compare_and_swap(&capture_initializing, 0, 1) == 0) {
return -EBUSY; // Already initializing
}
// Check if already initialized
if (capture_initialized) {
capture_initializing = 0;
return 0;
}
// Clean up any existing resources first
if (encoder) {
opus_encoder_destroy(encoder);
encoder = NULL;
}
if (pcm_handle) {
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
}
// Try to open ALSA capture device
err = safe_alsa_open(&pcm_handle, "hw:1,0", SND_PCM_STREAM_CAPTURE);
if (err < 0) {
capture_initializing = 0;
return -1;
}
// Configure the device
err = configure_alsa_device(pcm_handle, "capture");
if (err < 0) {
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
capture_initializing = 0;
return -1;
}
// Initialize Opus encoder
int opus_err = 0;
encoder = opus_encoder_create(sample_rate, channels, OPUS_APPLICATION_AUDIO, &opus_err);
if (!encoder || opus_err != OPUS_OK) {
if (pcm_handle) { snd_pcm_close(pcm_handle); pcm_handle = NULL; }
capture_initializing = 0;
return -2;
}
opus_encoder_ctl(encoder, OPUS_SET_BITRATE(opus_bitrate));
opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(opus_complexity));
capture_initialized = 1;
capture_initializing = 0;
return 0;
}
// Read and encode one frame with enhanced error handling
int jetkvm_audio_read_encode(void *opus_buf) {
short pcm_buffer[1920]; // max 2ch*960
unsigned char *out = (unsigned char*)opus_buf;
int err = 0;
// Safety checks
if (!capture_initialized || !pcm_handle || !encoder || !opus_buf) {
return -1;
}
int pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
// Handle ALSA errors with enhanced recovery
if (pcm_rc < 0) {
if (pcm_rc == -EPIPE) {
// Buffer underrun - try to recover
err = snd_pcm_prepare(pcm_handle);
if (err < 0) return -1;
pcm_rc = snd_pcm_readi(pcm_handle, pcm_buffer, frame_size);
if (pcm_rc < 0) return -1;
} else if (pcm_rc == -EAGAIN) {
// No data available - return 0 to indicate no frame
return 0;
} else if (pcm_rc == -ESTRPIPE) {
// Device suspended, try to resume
while ((err = snd_pcm_resume(pcm_handle)) == -EAGAIN) {
usleep(1000); // 1ms
}
if (err < 0) {
err = snd_pcm_prepare(pcm_handle);
if (err < 0) return -1;
}
return 0; // Skip this frame
} else {
// Other error - return error code
return -1;
}
}
// If we got fewer frames than expected, pad with silence
if (pcm_rc < frame_size) {
memset(&pcm_buffer[pcm_rc * channels], 0, (frame_size - pcm_rc) * channels * sizeof(short));
}
int nb_bytes = opus_encode(encoder, pcm_buffer, frame_size, out, max_packet_size);
return nb_bytes;
}
// Initialize ALSA playback with improved safety
int jetkvm_audio_playback_init() {
int err;
// Prevent concurrent initialization
if (__sync_bool_compare_and_swap(&playback_initializing, 0, 1) == 0) {
return -EBUSY; // Already initializing
}
// Check if already initialized
if (playback_initialized) {
playback_initializing = 0;
return 0;
}
// Clean up any existing resources first
if (decoder) {
opus_decoder_destroy(decoder);
decoder = NULL;
}
if (pcm_playback_handle) {
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
}
// Try to open the USB gadget audio device for playback
err = safe_alsa_open(&pcm_playback_handle, "hw:1,0", SND_PCM_STREAM_PLAYBACK);
if (err < 0) {
// Fallback to default device
err = safe_alsa_open(&pcm_playback_handle, "default", SND_PCM_STREAM_PLAYBACK);
if (err < 0) {
playback_initializing = 0;
return -1;
}
}
// Configure the device
err = configure_alsa_device(pcm_playback_handle, "playback");
if (err < 0) {
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
playback_initializing = 0;
return -1;
}
// Initialize Opus decoder
int opus_err = 0;
decoder = opus_decoder_create(sample_rate, channels, &opus_err);
if (!decoder || opus_err != OPUS_OK) {
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
playback_initializing = 0;
return -2;
}
playback_initialized = 1;
playback_initializing = 0;
return 0;
}
// Decode Opus and write PCM with enhanced error handling
int jetkvm_audio_decode_write(void *opus_buf, int opus_size) {
short pcm_buffer[1920]; // max 2ch*960
unsigned char *in = (unsigned char*)opus_buf;
int err = 0;
// Safety checks
if (!playback_initialized || !pcm_playback_handle || !decoder || !opus_buf || opus_size <= 0) {
return -1;
}
// Additional bounds checking
if (opus_size > max_packet_size) {
return -1;
}
// Decode Opus to PCM
int pcm_frames = opus_decode(decoder, in, opus_size, pcm_buffer, frame_size, 0);
if (pcm_frames < 0) return -1;
// Write PCM to playback device with enhanced recovery
int pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
if (pcm_rc < 0) {
if (pcm_rc == -EPIPE) {
// Buffer underrun - try to recover
err = snd_pcm_prepare(pcm_playback_handle);
if (err < 0) return -2;
pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames);
} else if (pcm_rc == -ESTRPIPE) {
// Device suspended, try to resume
while ((err = snd_pcm_resume(pcm_playback_handle)) == -EAGAIN) {
usleep(1000); // 1ms
}
if (err < 0) {
err = snd_pcm_prepare(pcm_playback_handle);
if (err < 0) return -2;
}
return 0; // Skip this frame
}
if (pcm_rc < 0) return -2;
}
return pcm_frames;
}
// Safe playback cleanup with double-close protection
void jetkvm_audio_playback_close() {
// Wait for any ongoing operations to complete
while (playback_initializing) {
usleep(1000); // 1ms
}
// Atomic check and set to prevent double cleanup
if (__sync_bool_compare_and_swap(&playback_initialized, 1, 0) == 0) {
return; // Already cleaned up
}
if (decoder) {
opus_decoder_destroy(decoder);
decoder = NULL;
}
if (pcm_playback_handle) {
snd_pcm_drain(pcm_playback_handle);
snd_pcm_close(pcm_playback_handle);
pcm_playback_handle = NULL;
}
}
// Safe capture cleanup
void jetkvm_audio_close() {
// Wait for any ongoing operations to complete
while (capture_initializing) {
usleep(1000); // 1ms
}
capture_initialized = 0;
if (encoder) {
opus_encoder_destroy(encoder);
encoder = NULL;
}
if (pcm_handle) {
snd_pcm_drop(pcm_handle); // Drop pending samples
snd_pcm_close(pcm_handle);
pcm_handle = NULL;
}
// Also clean up playback
jetkvm_audio_playback_close();
}
*/
import "C"
// Optimized Go wrappers with reduced overhead
var (
errAudioInitFailed = errors.New("failed to init ALSA/Opus")
errBufferTooSmall = errors.New("buffer too small")
errAudioReadEncode = errors.New("audio read/encode error")
errAudioDecodeWrite = errors.New("audio decode/write error")
errAudioPlaybackInit = errors.New("failed to init ALSA playback/Opus decoder")
errEmptyBuffer = errors.New("empty buffer")
errNilBuffer = errors.New("nil buffer")
errBufferTooLarge = errors.New("buffer too large")
errInvalidBufferPtr = errors.New("invalid buffer pointer")
)
func cgoAudioInit() error {
ret := C.jetkvm_audio_init()
if ret != 0 {
return errAudioInitFailed
}
return nil
}
func cgoAudioClose() {
C.jetkvm_audio_close()
}
// Optimized read and encode with pre-allocated error objects and reduced checks
func cgoAudioReadEncode(buf []byte) (int, error) {
// Fast path: check minimum buffer size (reduced from 1500 to 1276 for 10ms frames)
if len(buf) < 1276 {
return 0, errBufferTooSmall
}
n := C.jetkvm_audio_read_encode(unsafe.Pointer(&buf[0]))
if n < 0 {
return 0, errAudioReadEncode
}
if n == 0 {
return 0, nil // No data available
}
return int(n), nil
}
// Go wrappers for audio playback (microphone input)
func cgoAudioPlaybackInit() error {
ret := C.jetkvm_audio_playback_init()
if ret != 0 {
return errors.New("failed to init ALSA playback/Opus decoder")
}
return nil
}
func cgoAudioPlaybackClose() {
C.jetkvm_audio_playback_close()
}
// Decodes Opus frame and writes to playback device
func cgoAudioDecodeWrite(buf []byte) (int, error) {
if len(buf) == 0 {
return 0, errors.New("empty buffer")
}
// Additional safety check to prevent segfault
if buf == nil {
return 0, errors.New("nil buffer")
}
// Validate buffer size to prevent potential overruns
if len(buf) > 4096 { // Maximum reasonable Opus frame size
return 0, errors.New("buffer too large")
}
// Ensure buffer is not deallocated by keeping a reference
bufPtr := unsafe.Pointer(&buf[0])
if bufPtr == nil {
return 0, errors.New("invalid buffer pointer")
}
// Add recovery mechanism for C function crashes
defer func() {
if r := recover(); r != nil {
// Log the panic but don't crash the entire program
// This should not happen with proper validation, but provides safety
}
}()
n := C.jetkvm_audio_decode_write(bufPtr, C.int(len(buf)))
if n < 0 {
return 0, errors.New("audio decode/write error")
}
return int(n), nil
}
// Wrapper functions for non-blocking audio manager
var (
CGOAudioInit = cgoAudioInit
CGOAudioClose = cgoAudioClose
CGOAudioReadEncode = cgoAudioReadEncode
CGOAudioPlaybackInit = cgoAudioPlaybackInit
CGOAudioPlaybackClose = cgoAudioPlaybackClose
CGOAudioDecodeWrite = cgoAudioDecodeWrite
)

View File

@ -0,0 +1,42 @@
//go:build !cgo
package audio
import "errors"
// Stub implementations for linting (no CGO dependencies)
func cgoAudioInit() error {
return errors.New("audio not available in lint mode")
}
func cgoAudioClose() {
// No-op
}
func cgoAudioReadEncode(buf []byte) (int, error) {
return 0, errors.New("audio not available in lint mode")
}
func cgoAudioPlaybackInit() error {
return errors.New("audio not available in lint mode")
}
func cgoAudioPlaybackClose() {
// No-op
}
func cgoAudioDecodeWrite(buf []byte) (int, error) {
return 0, errors.New("audio not available in lint mode")
}
// Uppercase aliases for external API compatibility
var (
CGOAudioInit = cgoAudioInit
CGOAudioClose = cgoAudioClose
CGOAudioReadEncode = cgoAudioReadEncode
CGOAudioPlaybackInit = cgoAudioPlaybackInit
CGOAudioPlaybackClose = cgoAudioPlaybackClose
CGOAudioDecodeWrite = cgoAudioDecodeWrite
)

371
internal/audio/events.go Normal file
View File

@ -0,0 +1,371 @@
package audio
import (
"context"
"strings"
"sync"
"time"
"github.com/coder/websocket"
"github.com/coder/websocket/wsjson"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// AudioEventType represents different types of audio events
type AudioEventType string
const (
AudioEventMuteChanged AudioEventType = "audio-mute-changed"
AudioEventMetricsUpdate AudioEventType = "audio-metrics-update"
AudioEventMicrophoneState AudioEventType = "microphone-state-changed"
AudioEventMicrophoneMetrics AudioEventType = "microphone-metrics-update"
)
// AudioEvent represents a WebSocket audio event
type AudioEvent struct {
Type AudioEventType `json:"type"`
Data interface{} `json:"data"`
}
// AudioMuteData represents audio mute state change data
type AudioMuteData struct {
Muted bool `json:"muted"`
}
// AudioMetricsData represents audio metrics data
type AudioMetricsData struct {
FramesReceived int64 `json:"frames_received"`
FramesDropped int64 `json:"frames_dropped"`
BytesProcessed int64 `json:"bytes_processed"`
LastFrameTime string `json:"last_frame_time"`
ConnectionDrops int64 `json:"connection_drops"`
AverageLatency string `json:"average_latency"`
}
// MicrophoneStateData represents microphone state data
type MicrophoneStateData struct {
Running bool `json:"running"`
SessionActive bool `json:"session_active"`
}
// MicrophoneMetricsData represents microphone metrics data
type MicrophoneMetricsData struct {
FramesSent int64 `json:"frames_sent"`
FramesDropped int64 `json:"frames_dropped"`
BytesProcessed int64 `json:"bytes_processed"`
LastFrameTime string `json:"last_frame_time"`
ConnectionDrops int64 `json:"connection_drops"`
AverageLatency string `json:"average_latency"`
}
// AudioEventSubscriber represents a WebSocket connection subscribed to audio events
type AudioEventSubscriber struct {
conn *websocket.Conn
ctx context.Context
logger *zerolog.Logger
}
// AudioEventBroadcaster manages audio event subscriptions and broadcasting
type AudioEventBroadcaster struct {
subscribers map[string]*AudioEventSubscriber
mutex sync.RWMutex
logger *zerolog.Logger
}
var (
audioEventBroadcaster *AudioEventBroadcaster
audioEventOnce sync.Once
)
// InitializeAudioEventBroadcaster initializes the global audio event broadcaster
func InitializeAudioEventBroadcaster() {
audioEventOnce.Do(func() {
l := logging.GetDefaultLogger().With().Str("component", "audio-events").Logger()
audioEventBroadcaster = &AudioEventBroadcaster{
subscribers: make(map[string]*AudioEventSubscriber),
logger: &l,
}
// Start metrics broadcasting goroutine
go audioEventBroadcaster.startMetricsBroadcasting()
})
}
// GetAudioEventBroadcaster returns the singleton audio event broadcaster
func GetAudioEventBroadcaster() *AudioEventBroadcaster {
audioEventOnce.Do(func() {
l := logging.GetDefaultLogger().With().Str("component", "audio-events").Logger()
audioEventBroadcaster = &AudioEventBroadcaster{
subscribers: make(map[string]*AudioEventSubscriber),
logger: &l,
}
// Start metrics broadcasting goroutine
go audioEventBroadcaster.startMetricsBroadcasting()
})
return audioEventBroadcaster
}
// Subscribe adds a WebSocket connection to receive audio events
func (aeb *AudioEventBroadcaster) Subscribe(connectionID string, conn *websocket.Conn, ctx context.Context, logger *zerolog.Logger) {
aeb.mutex.Lock()
defer aeb.mutex.Unlock()
// Check if there's already a subscription for this connectionID
if _, exists := aeb.subscribers[connectionID]; exists {
aeb.logger.Debug().Str("connectionID", connectionID).Msg("duplicate audio events subscription detected; replacing existing entry")
// Do NOT close the existing WebSocket connection here because it's shared
// with the signaling channel. Just replace the subscriber map entry.
delete(aeb.subscribers, connectionID)
}
aeb.subscribers[connectionID] = &AudioEventSubscriber{
conn: conn,
ctx: ctx,
logger: logger,
}
aeb.logger.Info().Str("connectionID", connectionID).Msg("audio events subscription added")
// Send initial state to new subscriber
go aeb.sendInitialState(connectionID)
}
// Unsubscribe removes a WebSocket connection from audio events
func (aeb *AudioEventBroadcaster) Unsubscribe(connectionID string) {
aeb.mutex.Lock()
defer aeb.mutex.Unlock()
delete(aeb.subscribers, connectionID)
aeb.logger.Info().Str("connectionID", connectionID).Msg("audio events subscription removed")
}
// BroadcastAudioMuteChanged broadcasts audio mute state changes
func (aeb *AudioEventBroadcaster) BroadcastAudioMuteChanged(muted bool) {
event := AudioEvent{
Type: AudioEventMuteChanged,
Data: AudioMuteData{Muted: muted},
}
aeb.broadcast(event)
}
// BroadcastMicrophoneStateChanged broadcasts microphone state changes
func (aeb *AudioEventBroadcaster) BroadcastMicrophoneStateChanged(running, sessionActive bool) {
event := AudioEvent{
Type: AudioEventMicrophoneState,
Data: MicrophoneStateData{
Running: running,
SessionActive: sessionActive,
},
}
aeb.broadcast(event)
}
// sendInitialState sends current audio state to a new subscriber
func (aeb *AudioEventBroadcaster) sendInitialState(connectionID string) {
aeb.mutex.RLock()
subscriber, exists := aeb.subscribers[connectionID]
aeb.mutex.RUnlock()
if !exists {
return
}
// Send current audio mute state
muteEvent := AudioEvent{
Type: AudioEventMuteChanged,
Data: AudioMuteData{Muted: IsAudioMuted()},
}
aeb.sendToSubscriber(subscriber, muteEvent)
// Send current microphone state using session provider
sessionProvider := GetSessionProvider()
sessionActive := sessionProvider.IsSessionActive()
var running bool
if sessionActive {
if inputManager := sessionProvider.GetAudioInputManager(); inputManager != nil {
running = inputManager.IsRunning()
}
}
micStateEvent := AudioEvent{
Type: AudioEventMicrophoneState,
Data: MicrophoneStateData{
Running: running,
SessionActive: sessionActive,
},
}
aeb.sendToSubscriber(subscriber, micStateEvent)
// Send current metrics
aeb.sendCurrentMetrics(subscriber)
}
// sendCurrentMetrics sends current audio and microphone metrics to a subscriber
func (aeb *AudioEventBroadcaster) sendCurrentMetrics(subscriber *AudioEventSubscriber) {
// Send audio metrics
audioMetrics := GetAudioMetrics()
audioMetricsEvent := AudioEvent{
Type: AudioEventMetricsUpdate,
Data: AudioMetricsData{
FramesReceived: audioMetrics.FramesReceived,
FramesDropped: audioMetrics.FramesDropped,
BytesProcessed: audioMetrics.BytesProcessed,
LastFrameTime: audioMetrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
ConnectionDrops: audioMetrics.ConnectionDrops,
AverageLatency: audioMetrics.AverageLatency.String(),
},
}
aeb.sendToSubscriber(subscriber, audioMetricsEvent)
// Send microphone metrics using session provider
sessionProvider := GetSessionProvider()
if sessionProvider.IsSessionActive() {
if inputManager := sessionProvider.GetAudioInputManager(); inputManager != nil {
micMetrics := inputManager.GetMetrics()
micMetricsEvent := AudioEvent{
Type: AudioEventMicrophoneMetrics,
Data: MicrophoneMetricsData{
FramesSent: micMetrics.FramesSent,
FramesDropped: micMetrics.FramesDropped,
BytesProcessed: micMetrics.BytesProcessed,
LastFrameTime: micMetrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
ConnectionDrops: micMetrics.ConnectionDrops,
AverageLatency: micMetrics.AverageLatency.String(),
},
}
aeb.sendToSubscriber(subscriber, micMetricsEvent)
}
}
}
// startMetricsBroadcasting starts a goroutine that periodically broadcasts metrics
func (aeb *AudioEventBroadcaster) startMetricsBroadcasting() {
// Use 5-second interval instead of 2 seconds for constrained environments
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for range ticker.C {
aeb.mutex.RLock()
subscriberCount := len(aeb.subscribers)
// Early exit if no subscribers to save CPU
if subscriberCount == 0 {
aeb.mutex.RUnlock()
continue
}
// Create a copy for safe iteration
subscribersCopy := make([]*AudioEventSubscriber, 0, subscriberCount)
for _, sub := range aeb.subscribers {
subscribersCopy = append(subscribersCopy, sub)
}
aeb.mutex.RUnlock()
// Pre-check for cancelled contexts to avoid unnecessary work
activeSubscribers := 0
for _, sub := range subscribersCopy {
if sub.ctx.Err() == nil {
activeSubscribers++
}
}
// Skip metrics gathering if no active subscribers
if activeSubscribers == 0 {
continue
}
// Broadcast audio metrics
audioMetrics := GetAudioMetrics()
audioMetricsEvent := AudioEvent{
Type: AudioEventMetricsUpdate,
Data: AudioMetricsData{
FramesReceived: audioMetrics.FramesReceived,
FramesDropped: audioMetrics.FramesDropped,
BytesProcessed: audioMetrics.BytesProcessed,
LastFrameTime: audioMetrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
ConnectionDrops: audioMetrics.ConnectionDrops,
AverageLatency: audioMetrics.AverageLatency.String(),
},
}
aeb.broadcast(audioMetricsEvent)
// Broadcast microphone metrics if available using session provider
sessionProvider := GetSessionProvider()
if sessionProvider.IsSessionActive() {
if inputManager := sessionProvider.GetAudioInputManager(); inputManager != nil {
micMetrics := inputManager.GetMetrics()
micMetricsEvent := AudioEvent{
Type: AudioEventMicrophoneMetrics,
Data: MicrophoneMetricsData{
FramesSent: micMetrics.FramesSent,
FramesDropped: micMetrics.FramesDropped,
BytesProcessed: micMetrics.BytesProcessed,
LastFrameTime: micMetrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
ConnectionDrops: micMetrics.ConnectionDrops,
AverageLatency: micMetrics.AverageLatency.String(),
},
}
aeb.broadcast(micMetricsEvent)
}
}
}
}
// broadcast sends an event to all subscribers
func (aeb *AudioEventBroadcaster) broadcast(event AudioEvent) {
aeb.mutex.RLock()
// Create a copy of subscribers to avoid holding the lock during sending
subscribersCopy := make(map[string]*AudioEventSubscriber)
for id, sub := range aeb.subscribers {
subscribersCopy[id] = sub
}
aeb.mutex.RUnlock()
// Track failed subscribers to remove them after sending
var failedSubscribers []string
// Send to all subscribers without holding the lock
for connectionID, subscriber := range subscribersCopy {
if !aeb.sendToSubscriber(subscriber, event) {
failedSubscribers = append(failedSubscribers, connectionID)
}
}
// Remove failed subscribers if any
if len(failedSubscribers) > 0 {
aeb.mutex.Lock()
for _, connectionID := range failedSubscribers {
delete(aeb.subscribers, connectionID)
aeb.logger.Warn().Str("connectionID", connectionID).Msg("removed failed audio events subscriber")
}
aeb.mutex.Unlock()
}
}
// sendToSubscriber sends an event to a specific subscriber
func (aeb *AudioEventBroadcaster) sendToSubscriber(subscriber *AudioEventSubscriber, event AudioEvent) bool {
// Check if subscriber context is already cancelled
if subscriber.ctx.Err() != nil {
return false
}
ctx, cancel := context.WithTimeout(subscriber.ctx, 2*time.Second)
defer cancel()
err := wsjson.Write(ctx, subscriber.conn, event)
if err != nil {
// Don't log network errors for closed connections as warnings, they're expected
if strings.Contains(err.Error(), "use of closed network connection") ||
strings.Contains(err.Error(), "connection reset by peer") ||
strings.Contains(err.Error(), "context canceled") {
subscriber.logger.Debug().Err(err).Msg("websocket connection closed during audio event send")
} else {
subscriber.logger.Warn().Err(err).Msg("failed to send audio event to subscriber")
}
return false
}
return true
}

119
internal/audio/input.go Normal file
View File

@ -0,0 +1,119 @@
package audio
import (
"sync/atomic"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// AudioInputMetrics holds metrics for microphone input
// Note: int64 fields must be 64-bit aligned for atomic operations on ARM
type AudioInputMetrics struct {
FramesSent int64 // Must be first for alignment
FramesDropped int64
BytesProcessed int64
ConnectionDrops int64
AverageLatency time.Duration // time.Duration is int64
LastFrameTime time.Time
}
// AudioInputManager manages microphone input stream from WebRTC to USB gadget
type AudioInputManager struct {
// metrics MUST be first for ARM32 alignment (contains int64 fields)
metrics AudioInputMetrics
inputBuffer chan []byte
logger zerolog.Logger
running int32
}
// NewAudioInputManager creates a new audio input manager
func NewAudioInputManager() *AudioInputManager {
return &AudioInputManager{
inputBuffer: make(chan []byte, 100), // Buffer up to 100 frames
logger: logging.GetDefaultLogger().With().Str("component", "audio-input").Logger(),
}
}
// Start begins processing microphone input
func (aim *AudioInputManager) Start() error {
if !atomic.CompareAndSwapInt32(&aim.running, 0, 1) {
return nil // Already running
}
aim.logger.Info().Msg("Starting audio input manager")
// Start the non-blocking audio input stream
err := StartNonBlockingAudioInput(aim.inputBuffer)
if err != nil {
atomic.StoreInt32(&aim.running, 0)
return err
}
return nil
}
// Stop stops processing microphone input
func (aim *AudioInputManager) Stop() {
if !atomic.CompareAndSwapInt32(&aim.running, 1, 0) {
return // Already stopped
}
aim.logger.Info().Msg("Stopping audio input manager")
// Stop the non-blocking audio input stream
StopNonBlockingAudioInput()
// Drain the input buffer
go func() {
for {
select {
case <-aim.inputBuffer:
// Drain
case <-time.After(100 * time.Millisecond):
return
}
}
}()
aim.logger.Info().Msg("Audio input manager stopped")
}
// WriteOpusFrame writes an Opus frame to the input buffer
func (aim *AudioInputManager) WriteOpusFrame(frame []byte) error {
if atomic.LoadInt32(&aim.running) == 0 {
return nil // Not running, ignore
}
select {
case aim.inputBuffer <- frame:
atomic.AddInt64(&aim.metrics.FramesSent, 1)
atomic.AddInt64(&aim.metrics.BytesProcessed, int64(len(frame)))
aim.metrics.LastFrameTime = time.Now()
return nil
default:
// Buffer full, drop frame
atomic.AddInt64(&aim.metrics.FramesDropped, 1)
aim.logger.Warn().Msg("Audio input buffer full, dropping frame")
return nil
}
}
// GetMetrics returns current microphone input metrics
func (aim *AudioInputManager) GetMetrics() AudioInputMetrics {
return AudioInputMetrics{
FramesSent: atomic.LoadInt64(&aim.metrics.FramesSent),
FramesDropped: atomic.LoadInt64(&aim.metrics.FramesDropped),
BytesProcessed: atomic.LoadInt64(&aim.metrics.BytesProcessed),
LastFrameTime: aim.metrics.LastFrameTime,
ConnectionDrops: atomic.LoadInt64(&aim.metrics.ConnectionDrops),
AverageLatency: aim.metrics.AverageLatency,
}
}
// IsRunning returns whether the audio input manager is running
func (aim *AudioInputManager) IsRunning() bool {
return atomic.LoadInt32(&aim.running) == 1
}

View File

@ -0,0 +1,158 @@
package audio
import (
"sync/atomic"
"time"
"unsafe"
)
// MicrophoneContentionManager provides optimized microphone operation locking
// with reduced contention using atomic operations and conditional locking
type MicrophoneContentionManager struct {
// Atomic fields (must be 64-bit aligned on 32-bit systems)
lastOpNano int64 // Unix nanoseconds of last operation
cooldownNanos int64 // Cooldown duration in nanoseconds
operationID int64 // Incremental operation ID for tracking
// Lock-free state flags (using atomic.Pointer for lock-free updates)
lockPtr unsafe.Pointer // *sync.Mutex - conditionally allocated
}
// NewMicrophoneContentionManager creates a new microphone contention manager
func NewMicrophoneContentionManager(cooldown time.Duration) *MicrophoneContentionManager {
return &MicrophoneContentionManager{
cooldownNanos: int64(cooldown),
}
}
// OperationResult represents the result of attempting a microphone operation
type OperationResult struct {
Allowed bool
RemainingCooldown time.Duration
OperationID int64
}
// TryOperation attempts to perform a microphone operation with optimized contention handling
func (mcm *MicrophoneContentionManager) TryOperation() OperationResult {
now := time.Now().UnixNano()
cooldown := atomic.LoadInt64(&mcm.cooldownNanos)
// Fast path: check if we're clearly outside cooldown period using atomic read
lastOp := atomic.LoadInt64(&mcm.lastOpNano)
elapsed := now - lastOp
if elapsed >= cooldown {
// Attempt atomic update without locking
if atomic.CompareAndSwapInt64(&mcm.lastOpNano, lastOp, now) {
opID := atomic.AddInt64(&mcm.operationID, 1)
return OperationResult{
Allowed: true,
RemainingCooldown: 0,
OperationID: opID,
}
}
}
// Slow path: potential contention, check remaining cooldown
currentLastOp := atomic.LoadInt64(&mcm.lastOpNano)
currentElapsed := now - currentLastOp
if currentElapsed >= cooldown {
// Race condition: another operation might have updated lastOpNano
// Try once more with CAS
if atomic.CompareAndSwapInt64(&mcm.lastOpNano, currentLastOp, now) {
opID := atomic.AddInt64(&mcm.operationID, 1)
return OperationResult{
Allowed: true,
RemainingCooldown: 0,
OperationID: opID,
}
}
// If CAS failed, fall through to cooldown calculation
currentLastOp = atomic.LoadInt64(&mcm.lastOpNano)
currentElapsed = now - currentLastOp
}
remaining := time.Duration(cooldown - currentElapsed)
if remaining < 0 {
remaining = 0
}
return OperationResult{
Allowed: false,
RemainingCooldown: remaining,
OperationID: atomic.LoadInt64(&mcm.operationID),
}
}
// SetCooldown updates the cooldown duration atomically
func (mcm *MicrophoneContentionManager) SetCooldown(cooldown time.Duration) {
atomic.StoreInt64(&mcm.cooldownNanos, int64(cooldown))
}
// GetCooldown returns the current cooldown duration
func (mcm *MicrophoneContentionManager) GetCooldown() time.Duration {
return time.Duration(atomic.LoadInt64(&mcm.cooldownNanos))
}
// GetLastOperationTime returns the time of the last operation
func (mcm *MicrophoneContentionManager) GetLastOperationTime() time.Time {
nanos := atomic.LoadInt64(&mcm.lastOpNano)
if nanos == 0 {
return time.Time{}
}
return time.Unix(0, nanos)
}
// GetOperationCount returns the total number of successful operations
func (mcm *MicrophoneContentionManager) GetOperationCount() int64 {
return atomic.LoadInt64(&mcm.operationID)
}
// Reset resets the contention manager state
func (mcm *MicrophoneContentionManager) Reset() {
atomic.StoreInt64(&mcm.lastOpNano, 0)
atomic.StoreInt64(&mcm.operationID, 0)
}
// Global instance for microphone contention management
var (
globalMicContentionManager unsafe.Pointer // *MicrophoneContentionManager
micContentionInitialized int32
)
// GetMicrophoneContentionManager returns the global microphone contention manager
func GetMicrophoneContentionManager() *MicrophoneContentionManager {
ptr := atomic.LoadPointer(&globalMicContentionManager)
if ptr != nil {
return (*MicrophoneContentionManager)(ptr)
}
// Initialize on first use
if atomic.CompareAndSwapInt32(&micContentionInitialized, 0, 1) {
manager := NewMicrophoneContentionManager(200 * time.Millisecond)
atomic.StorePointer(&globalMicContentionManager, unsafe.Pointer(manager))
return manager
}
// Another goroutine initialized it, try again
ptr = atomic.LoadPointer(&globalMicContentionManager)
if ptr != nil {
return (*MicrophoneContentionManager)(ptr)
}
// Fallback: create a new manager (should rarely happen)
return NewMicrophoneContentionManager(200 * time.Millisecond)
}
// TryMicrophoneOperation provides a convenient global function for microphone operations
func TryMicrophoneOperation() OperationResult {
manager := GetMicrophoneContentionManager()
return manager.TryOperation()
}
// SetMicrophoneCooldown updates the global microphone cooldown
func SetMicrophoneCooldown(cooldown time.Duration) {
manager := GetMicrophoneContentionManager()
manager.SetCooldown(cooldown)
}

View File

@ -0,0 +1,115 @@
package audio
import (
"sync/atomic"
"unsafe"
)
var (
// Use unsafe.Pointer for atomic operations instead of mutex
globalNonBlockingManager unsafe.Pointer // *NonBlockingAudioManager
)
// loadManager atomically loads the global manager
func loadManager() *NonBlockingAudioManager {
ptr := atomic.LoadPointer(&globalNonBlockingManager)
if ptr == nil {
return nil
}
return (*NonBlockingAudioManager)(ptr)
}
// storeManager atomically stores the global manager
func storeManager(manager *NonBlockingAudioManager) {
atomic.StorePointer(&globalNonBlockingManager, unsafe.Pointer(manager))
}
// compareAndSwapManager atomically compares and swaps the global manager
func compareAndSwapManager(old, new *NonBlockingAudioManager) bool {
return atomic.CompareAndSwapPointer(&globalNonBlockingManager,
unsafe.Pointer(old), unsafe.Pointer(new))
}
// StartNonBlockingAudioStreaming starts the non-blocking audio streaming system
func StartNonBlockingAudioStreaming(send func([]byte)) error {
manager := loadManager()
if manager != nil && manager.IsOutputRunning() {
return nil // Already running, this is not an error
}
if manager == nil {
newManager := NewNonBlockingAudioManager()
if !compareAndSwapManager(nil, newManager) {
// Another goroutine created manager, use it
manager = loadManager()
} else {
manager = newManager
}
}
return manager.StartAudioOutput(send)
}
// StartNonBlockingAudioInput starts the non-blocking audio input system
func StartNonBlockingAudioInput(receiveChan <-chan []byte) error {
manager := loadManager()
if manager == nil {
newManager := NewNonBlockingAudioManager()
if !compareAndSwapManager(nil, newManager) {
// Another goroutine created manager, use it
manager = loadManager()
} else {
manager = newManager
}
}
// Check if input is already running to avoid unnecessary operations
if manager.IsInputRunning() {
return nil // Already running, this is not an error
}
return manager.StartAudioInput(receiveChan)
}
// StopNonBlockingAudioStreaming stops the non-blocking audio streaming system
func StopNonBlockingAudioStreaming() {
manager := loadManager()
if manager != nil {
manager.Stop()
storeManager(nil)
}
}
// StopNonBlockingAudioInput stops only the audio input without affecting output
func StopNonBlockingAudioInput() {
manager := loadManager()
if manager != nil && manager.IsInputRunning() {
manager.StopAudioInput()
// If both input and output are stopped, recreate manager to ensure clean state
if !manager.IsRunning() {
storeManager(nil)
}
}
}
// GetNonBlockingAudioStats returns statistics from the non-blocking audio system
func GetNonBlockingAudioStats() NonBlockingAudioStats {
manager := loadManager()
if manager != nil {
return manager.GetStats()
}
return NonBlockingAudioStats{}
}
// IsNonBlockingAudioRunning returns true if the non-blocking audio system is running
func IsNonBlockingAudioRunning() bool {
manager := loadManager()
return manager != nil && manager.IsRunning()
}
// IsNonBlockingAudioInputRunning returns true if the non-blocking audio input is running
func IsNonBlockingAudioInputRunning() bool {
manager := loadManager()
return manager != nil && manager.IsInputRunning()
}

View File

@ -0,0 +1,564 @@
package audio
import (
"context"
"errors"
// "runtime" // removed: no longer directly pinning OS thread here; batching handles it
"sync"
"sync/atomic"
"time"
"github.com/jetkvm/kvm/internal/logging"
"github.com/rs/zerolog"
)
// NonBlockingAudioManager manages audio operations in separate worker threads
// to prevent blocking of mouse/keyboard operations
type NonBlockingAudioManager struct {
// Statistics - MUST be first for ARM32 alignment (int64 fields need 8-byte alignment)
stats NonBlockingAudioStats
// Control
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
logger *zerolog.Logger
// Audio output (capture from device, send to WebRTC)
outputSendFunc func([]byte)
outputWorkChan chan audioWorkItem
outputResultChan chan audioResult
// Audio input (receive from WebRTC, playback to device)
inputReceiveChan <-chan []byte
inputWorkChan chan audioWorkItem
inputResultChan chan audioResult
// Worker threads and flags - int32 fields grouped together
outputRunning int32
inputRunning int32
outputWorkerRunning int32
inputWorkerRunning int32
}
type audioWorkItem struct {
workType audioWorkType
data []byte
resultChan chan audioResult
}
type audioWorkType int
const (
audioWorkInit audioWorkType = iota
audioWorkReadEncode
audioWorkDecodeWrite
audioWorkClose
)
type audioResult struct {
success bool
data []byte
length int
err error
}
type NonBlockingAudioStats struct {
// int64 fields MUST be first for ARM32 alignment
OutputFramesProcessed int64
OutputFramesDropped int64
InputFramesProcessed int64
InputFramesDropped int64
WorkerErrors int64
// time.Time is int64 internally, so it's also aligned
LastProcessTime time.Time
}
// NewNonBlockingAudioManager creates a new non-blocking audio manager
func NewNonBlockingAudioManager() *NonBlockingAudioManager {
ctx, cancel := context.WithCancel(context.Background())
logger := logging.GetDefaultLogger().With().Str("component", "nonblocking-audio").Logger()
return &NonBlockingAudioManager{
ctx: ctx,
cancel: cancel,
logger: &logger,
outputWorkChan: make(chan audioWorkItem, 10), // Buffer for work items
outputResultChan: make(chan audioResult, 10), // Buffer for results
inputWorkChan: make(chan audioWorkItem, 10),
inputResultChan: make(chan audioResult, 10),
}
}
// StartAudioOutput starts non-blocking audio output (capture and encode)
func (nam *NonBlockingAudioManager) StartAudioOutput(sendFunc func([]byte)) error {
if !atomic.CompareAndSwapInt32(&nam.outputRunning, 0, 1) {
return ErrAudioAlreadyRunning
}
nam.outputSendFunc = sendFunc
// Enable batch audio processing for performance
EnableBatchAudioProcessing()
// Start the blocking worker thread
nam.wg.Add(1)
go nam.outputWorkerThread()
// Start the non-blocking coordinator
nam.wg.Add(1)
go nam.outputCoordinatorThread()
nam.logger.Info().Msg("non-blocking audio output started with batch processing")
return nil
}
// StartAudioInput starts non-blocking audio input (receive and decode)
func (nam *NonBlockingAudioManager) StartAudioInput(receiveChan <-chan []byte) error {
if !atomic.CompareAndSwapInt32(&nam.inputRunning, 0, 1) {
return ErrAudioAlreadyRunning
}
nam.inputReceiveChan = receiveChan
// Enable batch audio processing for performance
EnableBatchAudioProcessing()
// Start the blocking worker thread
nam.wg.Add(1)
go nam.inputWorkerThread()
// Start the non-blocking coordinator
nam.wg.Add(1)
go nam.inputCoordinatorThread()
nam.logger.Info().Msg("non-blocking audio input started with batch processing")
return nil
}
// outputWorkerThread handles all blocking audio output operations
func (nam *NonBlockingAudioManager) outputWorkerThread() {
defer nam.wg.Done()
defer atomic.StoreInt32(&nam.outputWorkerRunning, 0)
atomic.StoreInt32(&nam.outputWorkerRunning, 1)
nam.logger.Debug().Msg("output worker thread started")
// Initialize audio in worker thread
if err := CGOAudioInit(); err != nil {
nam.logger.Error().Err(err).Msg("failed to initialize audio in worker thread")
return
}
defer CGOAudioClose()
// Use buffer pool to avoid allocations
buf := GetAudioFrameBuffer()
defer PutAudioFrameBuffer(buf)
for {
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("output worker thread stopping")
return
case workItem := <-nam.outputWorkChan:
switch workItem.workType {
case audioWorkReadEncode:
n, err := BatchCGOAudioReadEncode(buf)
result := audioResult{
success: err == nil,
length: n,
err: err,
}
if err == nil && n > 0 {
// Get buffer from pool and copy data
resultBuf := GetAudioFrameBuffer()
copy(resultBuf[:n], buf[:n])
result.data = resultBuf[:n]
}
// Send result back (non-blocking)
select {
case workItem.resultChan <- result:
case <-nam.ctx.Done():
return
default:
// Drop result if coordinator is not ready
if result.data != nil {
PutAudioFrameBuffer(result.data)
}
atomic.AddInt64(&nam.stats.OutputFramesDropped, 1)
}
case audioWorkClose:
nam.logger.Debug().Msg("output worker received close signal")
return
}
}
}
}
// outputCoordinatorThread coordinates audio output without blocking
func (nam *NonBlockingAudioManager) outputCoordinatorThread() {
defer nam.wg.Done()
defer atomic.StoreInt32(&nam.outputRunning, 0)
nam.logger.Debug().Msg("output coordinator thread started")
ticker := time.NewTicker(20 * time.Millisecond) // Match frame timing
defer ticker.Stop()
pendingWork := false
resultChan := make(chan audioResult, 1)
for atomic.LoadInt32(&nam.outputRunning) == 1 {
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("output coordinator stopping")
return
case <-ticker.C:
// Only submit work if worker is ready and no pending work
if !pendingWork && atomic.LoadInt32(&nam.outputWorkerRunning) == 1 {
if IsAudioMuted() {
continue // Skip when muted
}
workItem := audioWorkItem{
workType: audioWorkReadEncode,
resultChan: resultChan,
}
// Submit work (non-blocking)
select {
case nam.outputWorkChan <- workItem:
pendingWork = true
default:
// Worker is busy, drop this frame
atomic.AddInt64(&nam.stats.OutputFramesDropped, 1)
}
}
case result := <-resultChan:
pendingWork = false
nam.stats.LastProcessTime = time.Now()
if result.success && result.data != nil && result.length > 0 {
// Send to WebRTC (non-blocking)
if nam.outputSendFunc != nil {
nam.outputSendFunc(result.data)
atomic.AddInt64(&nam.stats.OutputFramesProcessed, 1)
RecordFrameReceived(result.length)
}
// Return buffer to pool after use
PutAudioFrameBuffer(result.data)
} else if result.success && result.length == 0 {
// No data available - this is normal, not an error
// Just continue without logging or counting as error
} else {
atomic.AddInt64(&nam.stats.OutputFramesDropped, 1)
atomic.AddInt64(&nam.stats.WorkerErrors, 1)
if result.err != nil {
nam.logger.Warn().Err(result.err).Msg("audio output worker error")
}
// Clean up buffer if present
if result.data != nil {
PutAudioFrameBuffer(result.data)
}
RecordFrameDropped()
}
}
}
// Signal worker to close
select {
case nam.outputWorkChan <- audioWorkItem{workType: audioWorkClose}:
case <-time.After(100 * time.Millisecond):
nam.logger.Warn().Msg("timeout signaling output worker to close")
}
nam.logger.Info().Msg("output coordinator thread stopped")
}
// inputWorkerThread handles all blocking audio input operations
func (nam *NonBlockingAudioManager) inputWorkerThread() {
defer nam.wg.Done()
// Cleanup CGO resources properly to avoid double-close scenarios
// The outputWorkerThread's CGOAudioClose() will handle all cleanup
atomic.StoreInt32(&nam.inputWorkerRunning, 0)
atomic.StoreInt32(&nam.inputWorkerRunning, 1)
nam.logger.Debug().Msg("input worker thread started")
// Initialize audio playback in worker thread
if err := CGOAudioPlaybackInit(); err != nil {
nam.logger.Error().Err(err).Msg("failed to initialize audio playback in worker thread")
return
}
// Ensure CGO cleanup happens even if we exit unexpectedly
cgoInitialized := true
defer func() {
if cgoInitialized {
nam.logger.Debug().Msg("cleaning up CGO audio playback")
// Add extra safety: ensure no more CGO calls can happen
atomic.StoreInt32(&nam.inputWorkerRunning, 0)
// Note: Don't call CGOAudioPlaybackClose() here to avoid double-close
// The outputWorkerThread's CGOAudioClose() will handle all cleanup
}
}()
for {
// If coordinator has stopped, exit worker loop
if atomic.LoadInt32(&nam.inputRunning) == 0 {
return
}
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("input worker thread stopping due to context cancellation")
return
case workItem := <-nam.inputWorkChan:
switch workItem.workType {
case audioWorkDecodeWrite:
// Check if we're still supposed to be running before processing
if atomic.LoadInt32(&nam.inputWorkerRunning) == 0 || atomic.LoadInt32(&nam.inputRunning) == 0 {
nam.logger.Debug().Msg("input worker stopping, ignoring decode work")
// Do not send to resultChan; coordinator may have exited
return
}
// Validate input data before CGO call
if workItem.data == nil || len(workItem.data) == 0 {
result := audioResult{
success: false,
err: errors.New("invalid audio data"),
}
// Check if coordinator is still running before sending result
if atomic.LoadInt32(&nam.inputRunning) == 1 {
select {
case workItem.resultChan <- result:
case <-nam.ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Timeout - coordinator may have stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
} else {
// Coordinator has stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
continue
}
// Perform blocking CGO operation with panic recovery
var result audioResult
func() {
defer func() {
if r := recover(); r != nil {
nam.logger.Error().Interface("panic", r).Msg("CGO decode write panic recovered")
result = audioResult{
success: false,
err: errors.New("CGO decode write panic"),
}
}
}()
// Double-check we're still running before CGO call
if atomic.LoadInt32(&nam.inputWorkerRunning) == 0 {
result = audioResult{success: false, err: errors.New("worker shutting down")}
return
}
n, err := BatchCGOAudioDecodeWrite(workItem.data)
result = audioResult{
success: err == nil,
length: n,
err: err,
}
}()
// Send result back (non-blocking) - check if coordinator is still running
if atomic.LoadInt32(&nam.inputRunning) == 1 {
select {
case workItem.resultChan <- result:
case <-nam.ctx.Done():
return
case <-time.After(10 * time.Millisecond):
// Timeout - coordinator may have stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
} else {
// Coordinator has stopped, drop result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
case audioWorkClose:
nam.logger.Debug().Msg("input worker received close signal")
return
}
}
}
}
// inputCoordinatorThread coordinates audio input without blocking
func (nam *NonBlockingAudioManager) inputCoordinatorThread() {
defer nam.wg.Done()
defer atomic.StoreInt32(&nam.inputRunning, 0)
nam.logger.Debug().Msg("input coordinator thread started")
resultChan := make(chan audioResult, 1)
// Do not close resultChan to avoid races with worker sends during shutdown
for atomic.LoadInt32(&nam.inputRunning) == 1 {
select {
case <-nam.ctx.Done():
nam.logger.Debug().Msg("input coordinator stopping")
return
case frame := <-nam.inputReceiveChan:
if len(frame) == 0 {
continue
}
// Submit work to worker (non-blocking)
if atomic.LoadInt32(&nam.inputWorkerRunning) == 1 {
workItem := audioWorkItem{
workType: audioWorkDecodeWrite,
data: frame,
resultChan: resultChan,
}
select {
case nam.inputWorkChan <- workItem:
// Wait for result with timeout and context cancellation
select {
case result := <-resultChan:
if result.success {
atomic.AddInt64(&nam.stats.InputFramesProcessed, 1)
} else {
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
atomic.AddInt64(&nam.stats.WorkerErrors, 1)
if result.err != nil {
nam.logger.Warn().Err(result.err).Msg("audio input worker error")
}
}
case <-nam.ctx.Done():
nam.logger.Debug().Msg("input coordinator stopping during result wait")
return
case <-time.After(50 * time.Millisecond):
// Timeout waiting for result
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
nam.logger.Warn().Msg("timeout waiting for input worker result")
// Drain any pending result to prevent worker blocking
select {
case <-resultChan:
default:
}
}
default:
// Worker is busy, drop this frame
atomic.AddInt64(&nam.stats.InputFramesDropped, 1)
}
}
case <-time.After(250 * time.Millisecond):
// Periodic timeout to prevent blocking
continue
}
}
// Avoid sending close signals or touching channels here; inputRunning=0 will stop worker via checks
nam.logger.Info().Msg("input coordinator thread stopped")
}
// Stop stops all audio operations
func (nam *NonBlockingAudioManager) Stop() {
nam.logger.Info().Msg("stopping non-blocking audio manager")
// Signal all threads to stop
nam.cancel()
// Stop coordinators
atomic.StoreInt32(&nam.outputRunning, 0)
atomic.StoreInt32(&nam.inputRunning, 0)
// Wait for all goroutines to finish
nam.wg.Wait()
// Disable batch processing to free resources
DisableBatchAudioProcessing()
nam.logger.Info().Msg("non-blocking audio manager stopped")
}
// StopAudioInput stops only the audio input operations
func (nam *NonBlockingAudioManager) StopAudioInput() {
nam.logger.Info().Msg("stopping audio input")
// Stop only the input coordinator
atomic.StoreInt32(&nam.inputRunning, 0)
// Drain the receive channel to prevent blocking senders
go func() {
for {
select {
case <-nam.inputReceiveChan:
// Drain any remaining frames
case <-time.After(100 * time.Millisecond):
return
}
}
}()
// Wait for the worker to actually stop to prevent race conditions
timeout := time.After(2 * time.Second)
ticker := time.NewTicker(10 * time.Millisecond)
defer ticker.Stop()
for {
select {
case <-timeout:
nam.logger.Warn().Msg("timeout waiting for input worker to stop")
return
case <-ticker.C:
if atomic.LoadInt32(&nam.inputWorkerRunning) == 0 {
nam.logger.Info().Msg("audio input stopped successfully")
// Close ALSA playback resources now that input worker has stopped
CGOAudioPlaybackClose()
return
}
}
}
}
// GetStats returns current statistics
func (nam *NonBlockingAudioManager) GetStats() NonBlockingAudioStats {
return NonBlockingAudioStats{
OutputFramesProcessed: atomic.LoadInt64(&nam.stats.OutputFramesProcessed),
OutputFramesDropped: atomic.LoadInt64(&nam.stats.OutputFramesDropped),
InputFramesProcessed: atomic.LoadInt64(&nam.stats.InputFramesProcessed),
InputFramesDropped: atomic.LoadInt64(&nam.stats.InputFramesDropped),
WorkerErrors: atomic.LoadInt64(&nam.stats.WorkerErrors),
LastProcessTime: nam.stats.LastProcessTime,
}
}
// IsRunning returns true if any audio operations are running
func (nam *NonBlockingAudioManager) IsRunning() bool {
return atomic.LoadInt32(&nam.outputRunning) == 1 || atomic.LoadInt32(&nam.inputRunning) == 1
}
// IsInputRunning returns true if audio input is running
func (nam *NonBlockingAudioManager) IsInputRunning() bool {
return atomic.LoadInt32(&nam.inputRunning) == 1
}
// IsOutputRunning returns true if audio output is running
func (nam *NonBlockingAudioManager) IsOutputRunning() bool {
return atomic.LoadInt32(&nam.outputRunning) == 1
}

30
internal/audio/session.go Normal file
View File

@ -0,0 +1,30 @@
package audio
// SessionProvider interface abstracts session management for audio events
type SessionProvider interface {
IsSessionActive() bool
GetAudioInputManager() *AudioInputManager
}
// DefaultSessionProvider is a no-op implementation
type DefaultSessionProvider struct{}
func (d *DefaultSessionProvider) IsSessionActive() bool {
return false
}
func (d *DefaultSessionProvider) GetAudioInputManager() *AudioInputManager {
return nil
}
var sessionProvider SessionProvider = &DefaultSessionProvider{}
// SetSessionProvider allows the main package to inject session management
func SetSessionProvider(provider SessionProvider) {
sessionProvider = provider
}
// GetSessionProvider returns the current session provider
func GetSessionProvider() SessionProvider {
return sessionProvider
}

View File

@ -59,6 +59,23 @@ var defaultGadgetConfig = map[string]gadgetConfigItem{
// mass storage // mass storage
"mass_storage_base": massStorageBaseConfig, "mass_storage_base": massStorageBaseConfig,
"mass_storage_lun0": massStorageLun0Config, "mass_storage_lun0": massStorageLun0Config,
// audio
"audio": {
order: 4000,
device: "uac1.usb0",
path: []string{"functions", "uac1.usb0"},
configPath: []string{"uac1.usb0"},
attrs: gadgetAttributes{
"p_chmask": "3",
"p_srate": "48000",
"p_ssize": "2",
"p_volume_present": "0",
"c_chmask": "3",
"c_srate": "48000",
"c_ssize": "2",
"c_volume_present": "0",
},
},
} }
func (u *UsbGadget) isGadgetConfigItemEnabled(itemKey string) bool { func (u *UsbGadget) isGadgetConfigItemEnabled(itemKey string) bool {
@ -73,6 +90,8 @@ func (u *UsbGadget) isGadgetConfigItemEnabled(itemKey string) bool {
return u.enabledDevices.MassStorage return u.enabledDevices.MassStorage
case "mass_storage_lun0": case "mass_storage_lun0":
return u.enabledDevices.MassStorage return u.enabledDevices.MassStorage
case "audio":
return u.enabledDevices.Audio
default: default:
return true return true
} }
@ -182,6 +201,9 @@ func (u *UsbGadget) Init() error {
return u.logError("unable to initialize USB stack", err) return u.logError("unable to initialize USB stack", err)
} }
// Pre-open HID files to reduce input latency
u.PreOpenHidFiles()
return nil return nil
} }

View File

@ -203,8 +203,7 @@ func (u *UsbGadget) keyboardWriteHidFile(data []byte) error {
_, err := u.keyboardHidFile.Write(data) _, err := u.keyboardHidFile.Write(data)
if err != nil { if err != nil {
u.logWithSuppression("keyboardWriteHidFile", 100, u.log, err, "failed to write to hidg0") u.logWithSuppression("keyboardWriteHidFile", 100, u.log, err, "failed to write to hidg0")
u.keyboardHidFile.Close() // Keep file open on write errors to reduce I/O overhead
u.keyboardHidFile = nil
return err return err
} }
u.resetLogSuppressionCounter("keyboardWriteHidFile") u.resetLogSuppressionCounter("keyboardWriteHidFile")

View File

@ -77,8 +77,7 @@ func (u *UsbGadget) absMouseWriteHidFile(data []byte) error {
_, err := u.absMouseHidFile.Write(data) _, err := u.absMouseHidFile.Write(data)
if err != nil { if err != nil {
u.logWithSuppression("absMouseWriteHidFile", 100, u.log, err, "failed to write to hidg1") u.logWithSuppression("absMouseWriteHidFile", 100, u.log, err, "failed to write to hidg1")
u.absMouseHidFile.Close() // Keep file open on write errors to reduce I/O overhead
u.absMouseHidFile = nil
return err return err
} }
u.resetLogSuppressionCounter("absMouseWriteHidFile") u.resetLogSuppressionCounter("absMouseWriteHidFile")

View File

@ -60,15 +60,14 @@ func (u *UsbGadget) relMouseWriteHidFile(data []byte) error {
var err error var err error
u.relMouseHidFile, err = os.OpenFile("/dev/hidg2", os.O_RDWR, 0666) u.relMouseHidFile, err = os.OpenFile("/dev/hidg2", os.O_RDWR, 0666)
if err != nil { if err != nil {
return fmt.Errorf("failed to open hidg1: %w", err) return fmt.Errorf("failed to open hidg2: %w", err)
} }
} }
_, err := u.relMouseHidFile.Write(data) _, err := u.relMouseHidFile.Write(data)
if err != nil { if err != nil {
u.logWithSuppression("relMouseWriteHidFile", 100, u.log, err, "failed to write to hidg2") u.logWithSuppression("relMouseWriteHidFile", 100, u.log, err, "failed to write to hidg2")
u.relMouseHidFile.Close() // Keep file open on write errors to reduce I/O overhead
u.relMouseHidFile = nil
return err return err
} }
u.resetLogSuppressionCounter("relMouseWriteHidFile") u.resetLogSuppressionCounter("relMouseWriteHidFile")

View File

@ -19,6 +19,7 @@ type Devices struct {
RelativeMouse bool `json:"relative_mouse"` RelativeMouse bool `json:"relative_mouse"`
Keyboard bool `json:"keyboard"` Keyboard bool `json:"keyboard"`
MassStorage bool `json:"mass_storage"` MassStorage bool `json:"mass_storage"`
Audio bool `json:"audio"`
} }
// Config is a struct that represents the customizations for a USB gadget. // Config is a struct that represents the customizations for a USB gadget.
@ -94,6 +95,33 @@ func NewUsbGadget(name string, enabledDevices *Devices, config *Config, logger *
return newUsbGadget(name, defaultGadgetConfig, enabledDevices, config, logger) return newUsbGadget(name, defaultGadgetConfig, enabledDevices, config, logger)
} }
// PreOpenHidFiles opens all HID files to reduce input latency
func (u *UsbGadget) PreOpenHidFiles() {
if u.enabledDevices.Keyboard {
if err := u.openKeyboardHidFile(); err != nil {
u.log.Debug().Err(err).Msg("failed to pre-open keyboard HID file")
}
}
if u.enabledDevices.AbsoluteMouse {
if u.absMouseHidFile == nil {
var err error
u.absMouseHidFile, err = os.OpenFile("/dev/hidg1", os.O_RDWR, 0666)
if err != nil {
u.log.Debug().Err(err).Msg("failed to pre-open absolute mouse HID file")
}
}
}
if u.enabledDevices.RelativeMouse {
if u.relMouseHidFile == nil {
var err error
u.relMouseHidFile, err = os.OpenFile("/dev/hidg2", os.O_RDWR, 0666)
if err != nil {
u.log.Debug().Err(err).Msg("failed to pre-open relative mouse HID file")
}
}
}
}
func newUsbGadget(name string, configMap map[string]gadgetConfigItem, enabledDevices *Devices, config *Config, logger *zerolog.Logger) *UsbGadget { func newUsbGadget(name string, configMap map[string]gadgetConfigItem, enabledDevices *Devices, config *Config, logger *zerolog.Logger) *UsbGadget {
if logger == nil { if logger == nil {
logger = defaultLogger logger = defaultLogger

View File

@ -18,6 +18,8 @@ import (
"github.com/jetkvm/kvm/internal/usbgadget" "github.com/jetkvm/kvm/internal/usbgadget"
) )
// Direct RPC message handling for optimal input responsiveness
type JSONRPCRequest struct { type JSONRPCRequest struct {
JSONRPC string `json:"jsonrpc"` JSONRPC string `json:"jsonrpc"`
Method string `json:"method"` Method string `json:"method"`
@ -119,6 +121,39 @@ func onRPCMessage(message webrtc.DataChannelMessage, session *Session) {
scopedLogger.Trace().Msg("Received RPC request") scopedLogger.Trace().Msg("Received RPC request")
// Fast path for input methods - bypass reflection for performance
// This optimization reduces latency by 3-6ms per input event by:
// - Eliminating reflection overhead
// - Reducing memory allocations
// - Optimizing parameter parsing and validation
// See input_rpc.go for implementation details
if isInputMethod(request.Method) {
result, err := handleInputRPCDirect(request.Method, request.Params)
if err != nil {
scopedLogger.Error().Err(err).Msg("Error calling direct input handler")
errorResponse := JSONRPCResponse{
JSONRPC: "2.0",
Error: map[string]interface{}{
"code": -32603,
"message": "Internal error",
"data": err.Error(),
},
ID: request.ID,
}
writeJSONRPCResponse(errorResponse, session)
return
}
response := JSONRPCResponse{
JSONRPC: "2.0",
Result: result,
ID: request.ID,
}
writeJSONRPCResponse(response, session)
return
}
// Fallback to reflection-based handler for non-input methods
handler, ok := rpcHandlers[request.Method] handler, ok := rpcHandlers[request.Method]
if !ok { if !ok {
errorResponse := JSONRPCResponse{ errorResponse := JSONRPCResponse{

41
main.go
View File

@ -9,6 +9,8 @@ import (
"time" "time"
"github.com/gwatts/rootcerts" "github.com/gwatts/rootcerts"
"github.com/jetkvm/kvm/internal/audio"
"github.com/pion/webrtc/v4/pkg/media"
) )
var appCtx context.Context var appCtx context.Context
@ -71,12 +73,46 @@ func Main() {
err = ExtractAndRunNativeBin() err = ExtractAndRunNativeBin()
if err != nil { if err != nil {
logger.Warn().Err(err).Msg("failed to extract and run native bin") logger.Warn().Err(err).Msg("failed to extract and run native bin")
//TODO: prepare an error message screen buffer to show on kvm screen // (future) prepare an error message screen buffer to show on kvm screen
} }
}() }()
// initialize usb gadget // initialize usb gadget
initUsbGadget() initUsbGadget()
// Start non-blocking audio streaming and deliver Opus frames to WebRTC
err = audio.StartNonBlockingAudioStreaming(func(frame []byte) {
// Deliver Opus frame to WebRTC audio track if session is active
if currentSession != nil {
config := audio.GetAudioConfig()
var sampleData []byte
if audio.IsAudioMuted() {
sampleData = make([]byte, len(frame)) // silence
} else {
sampleData = frame
}
if err := currentSession.AudioTrack.WriteSample(media.Sample{
Data: sampleData,
Duration: config.FrameSize,
}); err != nil {
logger.Warn().Err(err).Msg("error writing audio sample")
audio.RecordFrameDropped()
}
} else {
audio.RecordFrameDropped()
}
})
if err != nil {
logger.Warn().Err(err).Msg("failed to start non-blocking audio streaming")
}
// Initialize session provider for audio events
initializeAudioSessionProvider()
// Initialize audio event broadcaster for WebSocket-based real-time updates
audio.InitializeAudioEventBroadcaster()
logger.Info().Msg("audio event broadcaster initialized")
if err := setInitialVirtualMediaState(); err != nil { if err := setInitialVirtualMediaState(); err != nil {
logger.Warn().Err(err).Msg("failed to set initial virtual media state") logger.Warn().Err(err).Msg("failed to set initial virtual media state")
} }
@ -126,6 +162,9 @@ func Main() {
signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM)
<-sigs <-sigs
logger.Info().Msg("JetKVM Shutting Down") logger.Info().Msg("JetKVM Shutting Down")
// Stop non-blocking audio manager
audio.StopNonBlockingAudioStreaming()
//if fuseServer != nil { //if fuseServer != nil {
// err := setMassStorageImage(" ") // err := setMassStorageImage(" ")
// if err != nil { // if err != nil {

329
native.go
View File

@ -1,255 +1,46 @@
//go:build linux
package kvm package kvm
import ( import (
"bytes"
"encoding/json"
"errors"
"fmt" "fmt"
"io"
"net"
"os" "os"
"os/exec" "os/exec"
"strings"
"sync" "sync"
"syscall"
"time" "time"
"github.com/jetkvm/kvm/resource" "github.com/rs/zerolog"
"github.com/pion/webrtc/v4/pkg/media"
) )
var ctrlSocketConn net.Conn type nativeOutput struct {
logger *zerolog.Logger
type CtrlAction struct {
Action string `json:"action"`
Seq int32 `json:"seq,omitempty"`
Params map[string]interface{} `json:"params,omitempty"`
} }
type CtrlResponse struct { func (n *nativeOutput) Write(p []byte) (int, error) {
Seq int32 `json:"seq,omitempty"` n.logger.Debug().Str("output", string(p)).Msg("native binary output")
Error string `json:"error,omitempty"` return len(p), nil
Errno int32 `json:"errno,omitempty"`
Result map[string]interface{} `json:"result,omitempty"`
Event string `json:"event,omitempty"`
Data json.RawMessage `json:"data,omitempty"`
} }
type EventHandler func(event CtrlResponse)
var seq int32 = 1
var ongoingRequests = make(map[int32]chan *CtrlResponse)
var lock = &sync.Mutex{}
var ( var (
nativeCmd *exec.Cmd nativeCmd *exec.Cmd
nativeCmdLock = &sync.Mutex{} nativeCmdLock = &sync.Mutex{}
) )
func CallCtrlAction(action string, params map[string]interface{}) (*CtrlResponse, error) { func startNativeBinary(binaryPath string) (*exec.Cmd, error) {
lock.Lock() cmd := exec.Command(binaryPath)
defer lock.Unlock() cmd.SysProcAttr = &syscall.SysProcAttr{
ctrlAction := CtrlAction{ Pdeathsig: syscall.SIGTERM,
Action: action,
Seq: seq,
Params: params,
} }
cmd.Stdout = &nativeOutput{logger: nativeLogger}
cmd.Stderr = &nativeOutput{logger: nativeLogger}
responseChan := make(chan *CtrlResponse) err := cmd.Start()
ongoingRequests[seq] = responseChan
seq++
jsonData, err := json.Marshal(ctrlAction)
if err != nil { if err != nil {
delete(ongoingRequests, ctrlAction.Seq) return nil, err
return nil, fmt.Errorf("error marshaling ctrl action: %w", err)
} }
scopedLogger := nativeLogger.With(). return cmd, nil
Str("action", ctrlAction.Action).
Interface("params", ctrlAction.Params).Logger()
scopedLogger.Debug().Msg("sending ctrl action")
err = WriteCtrlMessage(jsonData)
if err != nil {
delete(ongoingRequests, ctrlAction.Seq)
return nil, ErrorfL(&scopedLogger, "error writing ctrl message", err)
}
select {
case response := <-responseChan:
delete(ongoingRequests, seq)
if response.Error != "" {
return nil, ErrorfL(
&scopedLogger,
"error native response: %s",
errors.New(response.Error),
)
}
return response, nil
case <-time.After(5 * time.Second):
close(responseChan)
delete(ongoingRequests, seq)
return nil, ErrorfL(&scopedLogger, "timeout waiting for response", nil)
}
}
func WriteCtrlMessage(message []byte) error {
if ctrlSocketConn == nil {
return fmt.Errorf("ctrl socket not conn ected")
}
_, err := ctrlSocketConn.Write(message)
return err
}
var nativeCtrlSocketListener net.Listener //nolint:unused
var nativeVideoSocketListener net.Listener //nolint:unused
var ctrlClientConnected = make(chan struct{})
func waitCtrlClientConnected() {
<-ctrlClientConnected
}
func StartNativeSocketServer(socketPath string, handleClient func(net.Conn), isCtrl bool) net.Listener {
scopedLogger := nativeLogger.With().
Str("socket_path", socketPath).
Logger()
// Remove the socket file if it already exists
if _, err := os.Stat(socketPath); err == nil {
if err := os.Remove(socketPath); err != nil {
scopedLogger.Warn().Err(err).Msg("failed to remove existing socket file")
os.Exit(1)
}
}
listener, err := net.Listen("unixpacket", socketPath)
if err != nil {
scopedLogger.Warn().Err(err).Msg("failed to start server")
os.Exit(1)
}
scopedLogger.Info().Msg("server listening")
go func() {
for {
conn, err := listener.Accept()
if err != nil {
scopedLogger.Warn().Err(err).Msg("failed to accept socket")
continue
}
if isCtrl {
// check if the channel is closed
select {
case <-ctrlClientConnected:
scopedLogger.Debug().Msg("ctrl client reconnected")
default:
close(ctrlClientConnected)
scopedLogger.Debug().Msg("first native ctrl socket client connected")
}
}
go handleClient(conn)
}
}()
return listener
}
func StartNativeCtrlSocketServer() {
nativeCtrlSocketListener = StartNativeSocketServer("/var/run/jetkvm_ctrl.sock", handleCtrlClient, true)
nativeLogger.Debug().Msg("native app ctrl sock started")
}
func StartNativeVideoSocketServer() {
nativeVideoSocketListener = StartNativeSocketServer("/var/run/jetkvm_video.sock", handleVideoClient, false)
nativeLogger.Debug().Msg("native app video sock started")
}
func handleCtrlClient(conn net.Conn) {
defer conn.Close()
scopedLogger := nativeLogger.With().
Str("addr", conn.RemoteAddr().String()).
Str("type", "ctrl").
Logger()
scopedLogger.Info().Msg("native ctrl socket client connected")
if ctrlSocketConn != nil {
scopedLogger.Debug().Msg("closing existing native socket connection")
ctrlSocketConn.Close()
}
ctrlSocketConn = conn
// Restore HDMI EDID if applicable
go restoreHdmiEdid()
readBuf := make([]byte, 4096)
for {
n, err := conn.Read(readBuf)
if err != nil {
scopedLogger.Warn().Err(err).Msg("error reading from ctrl sock")
break
}
readMsg := string(readBuf[:n])
ctrlResp := CtrlResponse{}
err = json.Unmarshal([]byte(readMsg), &ctrlResp)
if err != nil {
scopedLogger.Warn().Err(err).Str("data", readMsg).Msg("error parsing ctrl sock msg")
continue
}
scopedLogger.Trace().Interface("data", ctrlResp).Msg("ctrl sock msg")
if ctrlResp.Seq != 0 {
responseChan, ok := ongoingRequests[ctrlResp.Seq]
if ok {
responseChan <- &ctrlResp
}
}
switch ctrlResp.Event {
case "video_input_state":
HandleVideoStateMessage(ctrlResp)
}
}
scopedLogger.Debug().Msg("ctrl sock disconnected")
}
func handleVideoClient(conn net.Conn) {
defer conn.Close()
scopedLogger := nativeLogger.With().
Str("addr", conn.RemoteAddr().String()).
Str("type", "video").
Logger()
scopedLogger.Info().Msg("native video socket client connected")
inboundPacket := make([]byte, maxFrameSize)
lastFrame := time.Now()
for {
n, err := conn.Read(inboundPacket)
if err != nil {
scopedLogger.Warn().Err(err).Msg("error during read")
return
}
now := time.Now()
sinceLastFrame := now.Sub(lastFrame)
lastFrame = now
if currentSession != nil {
err := currentSession.VideoTrack.WriteSample(media.Sample{Data: inboundPacket[:n], Duration: sinceLastFrame})
if err != nil {
scopedLogger.Warn().Err(err).Msg("error writing sample")
}
}
}
} }
func startNativeBinaryWithLock(binaryPath string) (*exec.Cmd, error) { func startNativeBinaryWithLock(binaryPath string) (*exec.Cmd, error) {
@ -351,87 +142,3 @@ func ExtractAndRunNativeBin() error {
return nil return nil
} }
func shouldOverwrite(destPath string, srcHash []byte) bool {
if srcHash == nil {
nativeLogger.Debug().Msg("error reading embedded jetkvm_native.sha256, doing overwriting")
return true
}
dstHash, err := os.ReadFile(destPath + ".sha256")
if err != nil {
nativeLogger.Debug().Msg("error reading existing jetkvm_native.sha256, doing overwriting")
return true
}
return !bytes.Equal(srcHash, dstHash)
}
func getNativeSha256() ([]byte, error) {
version, err := resource.ResourceFS.ReadFile("jetkvm_native.sha256")
if err != nil {
return nil, err
}
return version, nil
}
func GetNativeVersion() (string, error) {
version, err := getNativeSha256()
if err != nil {
return "", err
}
return strings.TrimSpace(string(version)), nil
}
func ensureBinaryUpdated(destPath string) error {
srcFile, err := resource.ResourceFS.Open("jetkvm_native")
if err != nil {
return err
}
defer srcFile.Close()
srcHash, err := getNativeSha256()
if err != nil {
nativeLogger.Debug().Msg("error reading embedded jetkvm_native.sha256, proceeding with update")
srcHash = nil
}
_, err = os.Stat(destPath)
if shouldOverwrite(destPath, srcHash) || err != nil {
nativeLogger.Info().
Interface("hash", srcHash).
Msg("writing jetkvm_native")
_ = os.Remove(destPath)
destFile, err := os.OpenFile(destPath, os.O_CREATE|os.O_RDWR, 0755)
if err != nil {
return err
}
_, err = io.Copy(destFile, srcFile)
destFile.Close()
if err != nil {
return err
}
if srcHash != nil {
err = os.WriteFile(destPath+".sha256", srcHash, 0644)
if err != nil {
return err
}
}
nativeLogger.Info().Msg("jetkvm_native updated")
}
return nil
}
// Restore the HDMI EDID value from the config.
// Called after successful connection to jetkvm_native.
func restoreHdmiEdid() {
if config.EdidString != "" {
nativeLogger.Info().Str("edid", config.EdidString).Msg("Restoring HDMI EDID")
_, err := CallCtrlAction("set_edid", map[string]interface{}{"edid": config.EdidString})
if err != nil {
nativeLogger.Warn().Err(err).Msg("Failed to restore HDMI EDID")
}
}
}

View File

@ -1,57 +0,0 @@
//go:build linux
package kvm
import (
"fmt"
"os/exec"
"sync"
"syscall"
"github.com/rs/zerolog"
)
type nativeOutput struct {
mu *sync.Mutex
logger *zerolog.Event
}
func (w *nativeOutput) Write(p []byte) (n int, err error) {
w.mu.Lock()
defer w.mu.Unlock()
w.logger.Msg(string(p))
return len(p), nil
}
func startNativeBinary(binaryPath string) (*exec.Cmd, error) {
// Run the binary in the background
cmd := exec.Command(binaryPath)
nativeOutputLock := sync.Mutex{}
nativeStdout := &nativeOutput{
mu: &nativeOutputLock,
logger: nativeLogger.Info().Str("pipe", "stdout"),
}
nativeStderr := &nativeOutput{
mu: &nativeOutputLock,
logger: nativeLogger.Info().Str("pipe", "stderr"),
}
// Redirect stdout and stderr to the current process
cmd.Stdout = nativeStdout
cmd.Stderr = nativeStderr
// Set the process group ID so we can kill the process and its children when this process exits
cmd.SysProcAttr = &syscall.SysProcAttr{
Setpgid: true,
Pdeathsig: syscall.SIGKILL,
}
// Start the command
if err := cmd.Start(); err != nil {
return nil, fmt.Errorf("failed to start binary: %w", err)
}
return cmd, nil
}

View File

@ -8,5 +8,9 @@ import (
) )
func startNativeBinary(binaryPath string) (*exec.Cmd, error) { func startNativeBinary(binaryPath string) (*exec.Cmd, error) {
return nil, fmt.Errorf("not supported") return nil, fmt.Errorf("startNativeBinary is only supported on Linux")
}
func ExtractAndRunNativeBin() error {
return fmt.Errorf("ExtractAndRunNativeBin is only supported on Linux")
} }

343
native_shared.go Normal file
View File

@ -0,0 +1,343 @@
package kvm
import (
"bytes"
"encoding/json"
"errors"
"fmt"
"io"
"net"
"os"
"runtime"
"strings"
"sync"
"time"
"github.com/jetkvm/kvm/resource"
"github.com/pion/webrtc/v4/pkg/media"
)
type CtrlAction struct {
Action string `json:"action"`
Seq int32 `json:"seq,omitempty"`
Params map[string]interface{} `json:"params,omitempty"`
}
type CtrlResponse struct {
Seq int32 `json:"seq,omitempty"`
Error string `json:"error,omitempty"`
Errno int32 `json:"errno,omitempty"`
Result map[string]interface{} `json:"result,omitempty"`
Event string `json:"event,omitempty"`
Data json.RawMessage `json:"data,omitempty"`
}
type EventHandler func(event CtrlResponse)
var seq int32 = 1
var ongoingRequests = make(map[int32]chan *CtrlResponse)
var lock = &sync.Mutex{}
var ctrlSocketConn net.Conn
var nativeCtrlSocketListener net.Listener //nolint:unused
var nativeVideoSocketListener net.Listener //nolint:unused
var ctrlClientConnected = make(chan struct{})
func waitCtrlClientConnected() {
<-ctrlClientConnected
}
func CallCtrlAction(action string, params map[string]interface{}) (*CtrlResponse, error) {
lock.Lock()
defer lock.Unlock()
ctrlAction := CtrlAction{
Action: action,
Seq: seq,
Params: params,
}
responseChan := make(chan *CtrlResponse)
ongoingRequests[seq] = responseChan
seq++
jsonData, err := json.Marshal(ctrlAction)
if err != nil {
delete(ongoingRequests, ctrlAction.Seq)
return nil, fmt.Errorf("error marshaling ctrl action: %w", err)
}
scopedLogger := nativeLogger.With().
Str("action", ctrlAction.Action).
Interface("params", ctrlAction.Params).Logger()
scopedLogger.Debug().Msg("sending ctrl action")
err = WriteCtrlMessage(jsonData)
if err != nil {
delete(ongoingRequests, ctrlAction.Seq)
return nil, ErrorfL(&scopedLogger, "error writing ctrl message", err)
}
select {
case response := <-responseChan:
delete(ongoingRequests, seq)
if response.Error != "" {
return nil, ErrorfL(
&scopedLogger,
"error native response: %s",
errors.New(response.Error),
)
}
return response, nil
case <-time.After(5 * time.Second):
close(responseChan)
delete(ongoingRequests, seq)
return nil, ErrorfL(&scopedLogger, "timeout waiting for response", nil)
}
}
func WriteCtrlMessage(message []byte) error {
if ctrlSocketConn == nil {
return fmt.Errorf("ctrl socket not connected")
}
_, err := ctrlSocketConn.Write(message)
return err
}
func StartNativeSocketServer(socketPath string, handleClient func(net.Conn), isCtrl bool) net.Listener {
scopedLogger := nativeLogger.With().
Str("socket_path", socketPath).
Logger()
// Remove the socket file if it already exists
if _, err := os.Stat(socketPath); err == nil {
if err := os.Remove(socketPath); err != nil {
scopedLogger.Warn().Err(err).Msg("failed to remove existing socket file")
os.Exit(1)
}
}
listener, err := net.Listen("unixpacket", socketPath)
if err != nil {
scopedLogger.Warn().Err(err).Msg("failed to start server")
os.Exit(1)
}
scopedLogger.Info().Msg("server listening")
go func() {
for {
conn, err := listener.Accept()
if err != nil {
scopedLogger.Warn().Err(err).Msg("failed to accept socket")
continue
}
if isCtrl {
// check if the channel is closed
select {
case <-ctrlClientConnected:
scopedLogger.Debug().Msg("ctrl client reconnected")
default:
close(ctrlClientConnected)
scopedLogger.Debug().Msg("first native ctrl socket client connected")
}
}
go handleClient(conn)
}
}()
return listener
}
func StartNativeCtrlSocketServer() {
nativeCtrlSocketListener = StartNativeSocketServer("/var/run/jetkvm_ctrl.sock", handleCtrlClient, true)
nativeLogger.Debug().Msg("native app ctrl sock started")
}
func StartNativeVideoSocketServer() {
nativeVideoSocketListener = StartNativeSocketServer("/var/run/jetkvm_video.sock", handleVideoClient, false)
nativeLogger.Debug().Msg("native app video sock started")
}
func handleCtrlClient(conn net.Conn) {
// Lock to OS thread to isolate blocking socket I/O
runtime.LockOSThread()
defer runtime.UnlockOSThread()
defer conn.Close()
scopedLogger := nativeLogger.With().
Str("addr", conn.RemoteAddr().String()).
Str("type", "ctrl").
Logger()
scopedLogger.Info().Msg("native ctrl socket client connected (OS thread locked)")
if ctrlSocketConn != nil {
scopedLogger.Debug().Msg("closing existing native socket connection")
ctrlSocketConn.Close()
}
ctrlSocketConn = conn
// Restore HDMI EDID if applicable
go restoreHdmiEdid()
readBuf := make([]byte, 4096)
for {
n, err := conn.Read(readBuf)
if err != nil {
scopedLogger.Warn().Err(err).Msg("error reading from ctrl sock")
break
}
readMsg := string(readBuf[:n])
ctrlResp := CtrlResponse{}
err = json.Unmarshal([]byte(readMsg), &ctrlResp)
if err != nil {
scopedLogger.Warn().Err(err).Str("data", readMsg).Msg("error parsing ctrl sock msg")
continue
}
scopedLogger.Trace().Interface("data", ctrlResp).Msg("ctrl sock msg")
if ctrlResp.Seq != 0 {
responseChan, ok := ongoingRequests[ctrlResp.Seq]
if ok {
responseChan <- &ctrlResp
}
}
switch ctrlResp.Event {
case "video_input_state":
HandleVideoStateMessage(ctrlResp)
}
}
scopedLogger.Debug().Msg("ctrl sock disconnected")
}
func handleVideoClient(conn net.Conn) {
// Lock to OS thread to isolate blocking video I/O
runtime.LockOSThread()
defer runtime.UnlockOSThread()
defer conn.Close()
scopedLogger := nativeLogger.With().
Str("addr", conn.RemoteAddr().String()).
Str("type", "video").
Logger()
scopedLogger.Info().Msg("native video socket client connected (OS thread locked)")
inboundPacket := make([]byte, maxVideoFrameSize)
lastFrame := time.Now()
for {
n, err := conn.Read(inboundPacket)
if err != nil {
scopedLogger.Warn().Err(err).Msg("error during read")
return
}
now := time.Now()
sinceLastFrame := now.Sub(lastFrame)
lastFrame = now
if currentSession != nil {
err := currentSession.VideoTrack.WriteSample(media.Sample{Data: inboundPacket[:n], Duration: sinceLastFrame})
if err != nil {
scopedLogger.Warn().Err(err).Msg("error writing sample")
}
}
}
}
func shouldOverwrite(destPath string, srcHash []byte) bool {
if srcHash == nil {
nativeLogger.Debug().Msg("error reading embedded jetkvm_native.sha256, doing overwriting")
return true
}
dstHash, err := os.ReadFile(destPath + ".sha256")
if err != nil {
nativeLogger.Debug().Msg("error reading existing jetkvm_native.sha256, doing overwriting")
return true
}
return !bytes.Equal(srcHash, dstHash)
}
func getNativeSha256() ([]byte, error) {
version, err := resource.ResourceFS.ReadFile("jetkvm_native.sha256")
if err != nil {
return nil, err
}
return version, nil
}
func GetNativeVersion() (string, error) {
version, err := getNativeSha256()
if err != nil {
return "", err
}
return strings.TrimSpace(string(version)), nil
}
func ensureBinaryUpdated(destPath string) error {
// Lock to OS thread for file I/O operations
runtime.LockOSThread()
defer runtime.UnlockOSThread()
srcFile, err := resource.ResourceFS.Open("jetkvm_native")
if err != nil {
return err
}
defer srcFile.Close()
srcHash, err := getNativeSha256()
if err != nil {
nativeLogger.Debug().Msg("error reading embedded jetkvm_native.sha256, proceeding with update")
srcHash = nil
}
_, err = os.Stat(destPath)
if shouldOverwrite(destPath, srcHash) || err != nil {
nativeLogger.Info().
Interface("hash", srcHash).
Msg("writing jetkvm_native")
_ = os.Remove(destPath)
destFile, err := os.OpenFile(destPath, os.O_CREATE|os.O_RDWR, 0755)
if err != nil {
return err
}
_, err = io.Copy(destFile, srcFile)
destFile.Close()
if err != nil {
return err
}
if srcHash != nil {
err = os.WriteFile(destPath+".sha256", srcHash, 0644)
if err != nil {
return err
}
}
nativeLogger.Info().Msg("jetkvm_native updated")
}
return nil
}
// Restore the HDMI EDID value from the config.
// Called after successful connection to jetkvm_native.
func restoreHdmiEdid() {
if config.EdidString != "" {
nativeLogger.Info().Str("edid", config.EdidString).Msg("Restoring HDMI EDID")
_, err := CallCtrlAction("set_edid", map[string]interface{}{"edid": config.EdidString})
if err != nil {
nativeLogger.Warn().Err(err).Msg("Failed to restore HDMI EDID")
}
}
}

View File

@ -3,6 +3,7 @@ package kvm
import ( import (
"bufio" "bufio"
"io" "io"
"runtime"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@ -141,6 +142,10 @@ func unmountDCControl() error {
var dcState DCPowerState var dcState DCPowerState
func runDCControl() { func runDCControl() {
// Lock to OS thread to isolate DC control serial I/O
runtime.LockOSThread()
defer runtime.UnlockOSThread()
scopedLogger := serialLogger.With().Str("service", "dc_control").Logger() scopedLogger := serialLogger.With().Str("service", "dc_control").Logger()
reader := bufio.NewReader(port) reader := bufio.NewReader(port)
hasRestoreFeature := false hasRestoreFeature := false
@ -290,6 +295,10 @@ func handleSerialChannel(d *webrtc.DataChannel) {
d.OnOpen(func() { d.OnOpen(func() {
go func() { go func() {
// Lock to OS thread to isolate serial I/O
runtime.LockOSThread()
defer runtime.UnlockOSThread()
buf := make([]byte, 1024) buf := make([]byte, 1024)
for { for {
n, err := port.Read(buf) n, err := port.Read(buf)

24
session_provider.go Normal file
View File

@ -0,0 +1,24 @@
package kvm
import "github.com/jetkvm/kvm/internal/audio"
// KVMSessionProvider implements the audio.SessionProvider interface
type KVMSessionProvider struct{}
// IsSessionActive returns whether there's an active session
func (k *KVMSessionProvider) IsSessionActive() bool {
return currentSession != nil
}
// GetAudioInputManager returns the current session's audio input manager
func (k *KVMSessionProvider) GetAudioInputManager() *audio.AudioInputManager {
if currentSession == nil {
return nil
}
return currentSession.AudioInputManager
}
// initializeAudioSessionProvider sets up the session provider for the audio package
func initializeAudioSessionProvider() {
audio.SetSessionProvider(&KVMSessionProvider{})
}

View File

@ -6,6 +6,7 @@ import (
"io" "io"
"os" "os"
"os/exec" "os/exec"
"runtime"
"github.com/creack/pty" "github.com/creack/pty"
"github.com/pion/webrtc/v4" "github.com/pion/webrtc/v4"
@ -33,6 +34,10 @@ func handleTerminalChannel(d *webrtc.DataChannel) {
} }
go func() { go func() {
// Lock to OS thread to isolate PTY I/O
runtime.LockOSThread()
defer runtime.UnlockOSThread()
buf := make([]byte, 1024) buf := make([]byte, 1024)
for { for {
n, err := ptmx.Read(buf) n, err := ptmx.Read(buf)

51
tools/build_audio_deps.sh Normal file
View File

@ -0,0 +1,51 @@
#!/bin/bash
# tools/build_audio_deps.sh
# Build ALSA and Opus static libs for ARM in $HOME/.jetkvm/audio-libs
set -e
# Accept version parameters or use defaults
ALSA_VERSION="${1:-1.2.14}"
OPUS_VERSION="${2:-1.5.2}"
JETKVM_HOME="$HOME/.jetkvm"
AUDIO_LIBS_DIR="$JETKVM_HOME/audio-libs"
TOOLCHAIN_DIR="$JETKVM_HOME/rv1106-system"
CROSS_PREFIX="$TOOLCHAIN_DIR/tools/linux/toolchain/arm-rockchip830-linux-uclibcgnueabihf/bin/arm-rockchip830-linux-uclibcgnueabihf"
mkdir -p "$AUDIO_LIBS_DIR"
cd "$AUDIO_LIBS_DIR"
# Download sources
[ -f alsa-lib-${ALSA_VERSION}.tar.bz2 ] || wget -N https://www.alsa-project.org/files/pub/lib/alsa-lib-${ALSA_VERSION}.tar.bz2
[ -f opus-${OPUS_VERSION}.tar.gz ] || wget -N https://downloads.xiph.org/releases/opus/opus-${OPUS_VERSION}.tar.gz
# Extract
[ -d alsa-lib-${ALSA_VERSION} ] || tar xf alsa-lib-${ALSA_VERSION}.tar.bz2
[ -d opus-${OPUS_VERSION} ] || tar xf opus-${OPUS_VERSION}.tar.gz
# Optimization flags for ARM Cortex-A7 with NEON
OPTIM_CFLAGS="-O3 -mcpu=cortex-a7 -mfpu=neon -mfloat-abi=hard -ftree-vectorize -ffast-math -funroll-loops"
export CC="${CROSS_PREFIX}-gcc"
export CFLAGS="$OPTIM_CFLAGS"
export CXXFLAGS="$OPTIM_CFLAGS"
# Build ALSA
cd alsa-lib-${ALSA_VERSION}
if [ ! -f .built ]; then
CFLAGS="$OPTIM_CFLAGS" ./configure --host arm-rockchip830-linux-uclibcgnueabihf --enable-static=yes --enable-shared=no --with-pcm-plugins=rate,linear --disable-seq --disable-rawmidi --disable-ucm
make -j$(nproc)
touch .built
fi
cd ..
# Build Opus
cd opus-${OPUS_VERSION}
if [ ! -f .built ]; then
CFLAGS="$OPTIM_CFLAGS" ./configure --host arm-rockchip830-linux-uclibcgnueabihf --enable-static=yes --enable-shared=no --enable-fixed-point
make -j$(nproc)
touch .built
fi
cd ..
echo "ALSA and Opus built in $AUDIO_LIBS_DIR"

View File

@ -0,0 +1,15 @@
#!/bin/bash
# tools/setup_rv1106_toolchain.sh
# Clone the rv1106-system toolchain to $HOME/.jetkvm/rv1106-system if not already present
set -e
JETKVM_HOME="$HOME/.jetkvm"
TOOLCHAIN_DIR="$JETKVM_HOME/rv1106-system"
REPO_URL="https://github.com/jetkvm/rv1106-system.git"
mkdir -p "$JETKVM_HOME"
if [ ! -d "$TOOLCHAIN_DIR" ]; then
echo "Cloning rv1106-system toolchain to $TOOLCHAIN_DIR ..."
git clone --depth 1 "$REPO_URL" "$TOOLCHAIN_DIR"
else
echo "Toolchain already present at $TOOLCHAIN_DIR"
fi

View File

@ -1,8 +1,8 @@
import { MdOutlineContentPasteGo } from "react-icons/md"; import { MdOutlineContentPasteGo, MdVolumeOff, MdVolumeUp, MdGraphicEq } from "react-icons/md";
import { LuCable, LuHardDrive, LuMaximize, LuSettings, LuSignal } from "react-icons/lu"; import { LuCable, LuHardDrive, LuMaximize, LuSettings, LuSignal } from "react-icons/lu";
import { FaKeyboard } from "react-icons/fa6"; import { FaKeyboard } from "react-icons/fa6";
import { Popover, PopoverButton, PopoverPanel } from "@headlessui/react"; import { Popover, PopoverButton, PopoverPanel } from "@headlessui/react";
import { Fragment, useCallback, useRef } from "react"; import { Fragment, useCallback, useEffect, useRef, useState } from "react";
import { CommandLineIcon } from "@heroicons/react/20/solid"; import { CommandLineIcon } from "@heroicons/react/20/solid";
import { Button } from "@components/Button"; import { Button } from "@components/Button";
@ -18,12 +18,38 @@ import PasteModal from "@/components/popovers/PasteModal";
import WakeOnLanModal from "@/components/popovers/WakeOnLan/Index"; import WakeOnLanModal from "@/components/popovers/WakeOnLan/Index";
import MountPopopover from "@/components/popovers/MountPopover"; import MountPopopover from "@/components/popovers/MountPopover";
import ExtensionPopover from "@/components/popovers/ExtensionPopover"; import ExtensionPopover from "@/components/popovers/ExtensionPopover";
import AudioControlPopover from "@/components/popovers/AudioControlPopover";
import { useDeviceUiNavigation } from "@/hooks/useAppNavigation"; import { useDeviceUiNavigation } from "@/hooks/useAppNavigation";
import { useAudioEvents } from "@/hooks/useAudioEvents";
import api from "@/api";
// Type for microphone error
interface MicrophoneError {
type: 'permission' | 'device' | 'network' | 'unknown';
message: string;
}
// Type for microphone hook return value
interface MicrophoneHookReturn {
isMicrophoneActive: boolean;
isMicrophoneMuted: boolean;
microphoneStream: MediaStream | null;
startMicrophone: (deviceId?: string) => Promise<{ success: boolean; error?: MicrophoneError }>;
stopMicrophone: () => Promise<{ success: boolean; error?: MicrophoneError }>;
toggleMicrophoneMute: () => Promise<{ success: boolean; error?: MicrophoneError }>;
syncMicrophoneState: () => Promise<void>;
// Loading states
isStarting: boolean;
isStopping: boolean;
isToggling: boolean;
}
export default function Actionbar({ export default function Actionbar({
requestFullscreen, requestFullscreen,
microphone,
}: { }: {
requestFullscreen: () => Promise<void>; requestFullscreen: () => Promise<void>;
microphone: MicrophoneHookReturn;
}) { }) {
const { navigateTo } = useDeviceUiNavigation(); const { navigateTo } = useDeviceUiNavigation();
const virtualKeyboard = useHidStore(state => state.isVirtualKeyboardEnabled); const virtualKeyboard = useHidStore(state => state.isVirtualKeyboardEnabled);
@ -56,6 +82,37 @@ export default function Actionbar({
[setDisableFocusTrap], [setDisableFocusTrap],
); );
// Use WebSocket-based audio events for real-time updates
const { audioMuted, isConnected } = useAudioEvents();
// Fallback to polling if WebSocket is not connected
const [fallbackMuted, setFallbackMuted] = useState(false);
useEffect(() => {
if (!isConnected) {
// Load initial state
api.GET("/audio/mute").then(async resp => {
if (resp.ok) {
const data = await resp.json();
setFallbackMuted(!!data.muted);
}
});
// Fallback polling when WebSocket is not available
const interval = setInterval(async () => {
const resp = await api.GET("/audio/mute");
if (resp.ok) {
const data = await resp.json();
setFallbackMuted(!!data.muted);
}
}, 1000);
return () => clearInterval(interval);
}
}, [isConnected]);
// Use WebSocket data when available, fallback to polling data otherwise
const isMuted = isConnected && audioMuted !== null ? audioMuted : fallbackMuted;
return ( return (
<Container className="border-b border-b-slate-800/20 bg-white dark:border-b-slate-300/20 dark:bg-slate-900"> <Container className="border-b border-b-slate-800/20 bg-white dark:border-b-slate-300/20 dark:bg-slate-900">
<div <div
@ -93,7 +150,7 @@ export default function Actionbar({
"flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0",
)} )}
> >
{({ open }) => { {({ open }: { open: boolean }) => {
checkIfStateChanged(open); checkIfStateChanged(open);
return ( return (
<div className="mx-auto w-full max-w-xl"> <div className="mx-auto w-full max-w-xl">
@ -135,7 +192,7 @@ export default function Actionbar({
"flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0",
)} )}
> >
{({ open }) => { {({ open }: { open: boolean }) => {
checkIfStateChanged(open); checkIfStateChanged(open);
return ( return (
<div className="mx-auto w-full max-w-xl"> <div className="mx-auto w-full max-w-xl">
@ -187,7 +244,7 @@ export default function Actionbar({
"flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0",
)} )}
> >
{({ open }) => { {({ open }: { open: boolean }) => {
checkIfStateChanged(open); checkIfStateChanged(open);
return ( return (
<div className="mx-auto w-full max-w-xl"> <div className="mx-auto w-full max-w-xl">
@ -230,7 +287,7 @@ export default function Actionbar({
"flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0",
)} )}
> >
{({ open }) => { {({ open }: { open: boolean }) => {
checkIfStateChanged(open); checkIfStateChanged(open);
return <ExtensionPopover />; return <ExtensionPopover />;
}} }}
@ -262,6 +319,7 @@ export default function Actionbar({
}} }}
/> />
</div> </div>
<div> <div>
<Button <Button
size="XS" size="XS"
@ -282,6 +340,45 @@ export default function Actionbar({
onClick={() => requestFullscreen()} onClick={() => requestFullscreen()}
/> />
</div> </div>
<Popover>
<PopoverButton as={Fragment}>
<Button
size="XS"
theme="light"
text="Audio"
LeadingIcon={({ className }) => (
<div className="flex items-center">
{isMuted ? (
<MdVolumeOff className={cx(className, "text-red-500")} />
) : (
<MdVolumeUp className={cx(className, "text-green-500")} />
)}
<MdGraphicEq className={cx(className, "ml-1 text-blue-500")} />
</div>
)}
onClick={() => {
setDisableFocusTrap(true);
}}
/>
</PopoverButton>
<PopoverPanel
anchor="bottom end"
transition
className={cx(
"z-10 flex origin-top flex-col overflow-visible!",
"flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0",
)}
>
{({ open }: { open: boolean }) => {
checkIfStateChanged(open);
return (
<div className="mx-auto">
<AudioControlPopover microphone={microphone} open={open} />
</div>
);
}}
</PopoverPanel>
</Popover>
</div> </div>
</div> </div>
</Container> </Container>

View File

@ -0,0 +1,77 @@
import React from 'react';
import clsx from 'clsx';
interface AudioLevelMeterProps {
level: number; // 0-100 percentage
isActive: boolean;
className?: string;
size?: 'sm' | 'md' | 'lg';
showLabel?: boolean;
}
export const AudioLevelMeter: React.FC<AudioLevelMeterProps> = ({
level,
isActive,
className,
size = 'md',
showLabel = true
}) => {
const sizeClasses = {
sm: 'h-1',
md: 'h-2',
lg: 'h-3'
};
const getLevelColor = (level: number) => {
if (level < 20) return 'bg-green-500';
if (level < 60) return 'bg-yellow-500';
return 'bg-red-500';
};
const getTextColor = (level: number) => {
if (level < 20) return 'text-green-600 dark:text-green-400';
if (level < 60) return 'text-yellow-600 dark:text-yellow-400';
return 'text-red-600 dark:text-red-400';
};
return (
<div className={clsx('space-y-1', className)}>
{showLabel && (
<div className="flex justify-between text-xs">
<span className="text-slate-500 dark:text-slate-400">
Microphone Level
</span>
<span className={clsx(
'font-mono',
isActive ? getTextColor(level) : 'text-slate-400 dark:text-slate-500'
)}>
{isActive ? `${Math.round(level)}%` : 'No Signal'}
</span>
</div>
)}
<div className={clsx(
'w-full rounded-full bg-slate-200 dark:bg-slate-700',
sizeClasses[size]
)}>
<div
className={clsx(
'rounded-full transition-all duration-150 ease-out',
sizeClasses[size],
isActive ? getLevelColor(level) : 'bg-slate-300 dark:bg-slate-600'
)}
style={{
width: isActive ? `${Math.min(100, Math.max(2, level))}%` : '0%'
}}
/>
</div>
{/* Peak indicators */}
<div className="flex justify-between text-xs text-slate-400 dark:text-slate-500">
<span>0%</span>
<span>50%</span>
<span>100%</span>
</div>
</div>
);
};

View File

@ -0,0 +1,493 @@
import { useEffect, useState } from "react";
import { MdGraphicEq, MdSignalWifi4Bar, MdError, MdMic } from "react-icons/md";
import { LuActivity, LuClock, LuHardDrive, LuSettings } from "react-icons/lu";
import { AudioLevelMeter } from "@components/AudioLevelMeter";
import { cx } from "@/cva.config";
import { useMicrophone } from "@/hooks/useMicrophone";
import { useAudioLevel } from "@/hooks/useAudioLevel";
import { useAudioEvents } from "@/hooks/useAudioEvents";
import api from "@/api";
interface AudioMetrics {
frames_received: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
interface MicrophoneMetrics {
frames_sent: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
interface AudioConfig {
Quality: number;
Bitrate: number;
SampleRate: number;
Channels: number;
FrameSize: string;
}
const qualityLabels = {
0: "Low",
1: "Medium",
2: "High",
3: "Ultra"
};
export default function AudioMetricsDashboard() {
// Use WebSocket-based audio events for real-time updates
const {
audioMetrics,
microphoneMetrics: wsMicrophoneMetrics,
isConnected: wsConnected
} = useAudioEvents();
// Fallback state for when WebSocket is not connected
const [fallbackMetrics, setFallbackMetrics] = useState<AudioMetrics | null>(null);
const [fallbackMicrophoneMetrics, setFallbackMicrophoneMetrics] = useState<MicrophoneMetrics | null>(null);
const [fallbackConnected, setFallbackConnected] = useState(false);
// Configuration state (these don't change frequently, so we can load them once)
const [config, setConfig] = useState<AudioConfig | null>(null);
const [microphoneConfig, setMicrophoneConfig] = useState<AudioConfig | null>(null);
const [lastUpdate, setLastUpdate] = useState<Date>(new Date());
// Use WebSocket data when available, fallback to polling data otherwise
const metrics = wsConnected && audioMetrics !== null ? audioMetrics : fallbackMetrics;
const microphoneMetrics = wsConnected && wsMicrophoneMetrics !== null ? wsMicrophoneMetrics : fallbackMicrophoneMetrics;
const isConnected = wsConnected ? wsConnected : fallbackConnected;
// Microphone state for audio level monitoring
const { isMicrophoneActive, isMicrophoneMuted, microphoneStream } = useMicrophone();
const { audioLevel, isAnalyzing } = useAudioLevel(
isMicrophoneActive ? microphoneStream : null,
{
enabled: isMicrophoneActive,
updateInterval: 120,
});
useEffect(() => {
// Load initial configuration (only once)
loadAudioConfig();
// Set up fallback polling only when WebSocket is not connected
if (!wsConnected) {
loadAudioData();
const interval = setInterval(loadAudioData, 1000);
return () => clearInterval(interval);
}
}, [wsConnected]);
const loadAudioConfig = async () => {
try {
// Load config
const configResp = await api.GET("/audio/quality");
if (configResp.ok) {
const configData = await configResp.json();
setConfig(configData.current);
}
// Load microphone config
try {
const micConfigResp = await api.GET("/microphone/quality");
if (micConfigResp.ok) {
const micConfigData = await micConfigResp.json();
setMicrophoneConfig(micConfigData.current);
}
} catch (micConfigError) {
console.debug("Microphone config not available:", micConfigError);
}
} catch (error) {
console.error("Failed to load audio config:", error);
}
};
const loadAudioData = async () => {
try {
// Load metrics
const metricsResp = await api.GET("/audio/metrics");
if (metricsResp.ok) {
const metricsData = await metricsResp.json();
setFallbackMetrics(metricsData);
// Consider connected if API call succeeds, regardless of frame count
setFallbackConnected(true);
setLastUpdate(new Date());
} else {
setFallbackConnected(false);
}
// Load microphone metrics
try {
const micResp = await api.GET("/microphone/metrics");
if (micResp.ok) {
const micData = await micResp.json();
setFallbackMicrophoneMetrics(micData);
}
} catch (micError) {
// Microphone metrics might not be available, that's okay
console.debug("Microphone metrics not available:", micError);
}
} catch (error) {
console.error("Failed to load audio data:", error);
setFallbackConnected(false);
}
};
const formatBytes = (bytes: number) => {
if (bytes === 0) return "0 B";
const k = 1024;
const sizes = ["B", "KB", "MB", "GB"];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
};
const formatNumber = (num: number) => {
return new Intl.NumberFormat().format(num);
};
const getDropRate = () => {
if (!metrics || metrics.frames_received === 0) return 0;
return ((metrics.frames_dropped / metrics.frames_received) * 100);
};
const getQualityColor = (quality: number) => {
switch (quality) {
case 0: return "text-yellow-600 dark:text-yellow-400";
case 1: return "text-blue-600 dark:text-blue-400";
case 2: return "text-green-600 dark:text-green-400";
case 3: return "text-purple-600 dark:text-purple-400";
default: return "text-slate-600 dark:text-slate-400";
}
};
return (
<div className="space-y-4">
{/* Header */}
<div className="flex items-center justify-between">
<div className="flex items-center gap-2">
<MdGraphicEq className="h-5 w-5 text-blue-600 dark:text-blue-400" />
<h3 className="text-lg font-semibold text-slate-900 dark:text-slate-100">
Audio Metrics
</h3>
</div>
<div className="flex items-center gap-2">
<div className={cx(
"h-2 w-2 rounded-full",
isConnected ? "bg-green-500" : "bg-red-500"
)} />
<span className="text-xs text-slate-500 dark:text-slate-400">
{isConnected ? "Active" : "Inactive"}
</span>
</div>
</div>
{/* Current Configuration */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-4">
{config && (
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-2 flex items-center gap-2">
<LuSettings className="h-4 w-4 text-blue-600 dark:text-blue-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Output Config
</span>
</div>
<div className="space-y-2 text-sm">
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Quality:</span>
<span className={cx("font-medium", getQualityColor(config.Quality))}>
{qualityLabels[config.Quality as keyof typeof qualityLabels]}
</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Bitrate:</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{config.Bitrate}kbps
</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Sample Rate:</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{config.SampleRate}Hz
</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Channels:</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{config.Channels}
</span>
</div>
</div>
</div>
)}
{microphoneConfig && (
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-2 flex items-center gap-2">
<MdMic className="h-4 w-4 text-green-600 dark:text-green-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Microphone Input Config
</span>
</div>
<div className="space-y-2 text-sm">
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Quality:</span>
<span className={cx("font-medium", getQualityColor(microphoneConfig.Quality))}>
{qualityLabels[microphoneConfig.Quality as keyof typeof qualityLabels]}
</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Bitrate:</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{microphoneConfig.Bitrate}kbps
</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Sample Rate:</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{microphoneConfig.SampleRate}Hz
</span>
</div>
<div className="flex justify-between">
<span className="text-slate-500 dark:text-slate-400">Channels:</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{microphoneConfig.Channels}
</span>
</div>
</div>
</div>
)}
</div>
{/* Performance Metrics */}
{metrics && (
<div className="space-y-3">
{/* Audio Output Frames */}
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-2 flex items-center gap-2">
<LuActivity className="h-4 w-4 text-green-600 dark:text-green-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Output
</span>
</div>
<div className="grid grid-cols-2 gap-3">
<div className="text-center">
<div className="text-2xl font-bold text-green-600 dark:text-green-400">
{formatNumber(metrics.frames_received)}
</div>
<div className="text-xs text-slate-500 dark:text-slate-400">
Frames Received
</div>
</div>
<div className="text-center">
<div className={cx(
"text-2xl font-bold",
metrics.frames_dropped > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(metrics.frames_dropped)}
</div>
<div className="text-xs text-slate-500 dark:text-slate-400">
Frames Dropped
</div>
</div>
</div>
{/* Drop Rate */}
<div className="mt-3 rounded-md bg-slate-50 p-2 dark:bg-slate-700">
<div className="flex items-center justify-between">
<span className="text-sm text-slate-600 dark:text-slate-400">
Drop Rate
</span>
<span className={cx(
"font-bold",
getDropRate() > 5
? "text-red-600 dark:text-red-400"
: getDropRate() > 1
? "text-yellow-600 dark:text-yellow-400"
: "text-green-600 dark:text-green-400"
)}>
{getDropRate().toFixed(2)}%
</span>
</div>
<div className="mt-1 h-2 w-full rounded-full bg-slate-200 dark:bg-slate-600">
<div
className={cx(
"h-2 rounded-full transition-all duration-300",
getDropRate() > 5
? "bg-red-500"
: getDropRate() > 1
? "bg-yellow-500"
: "bg-green-500"
)}
style={{ width: `${Math.min(getDropRate(), 100)}%` }}
/>
</div>
</div>
</div>
{/* Microphone Input Metrics */}
{microphoneMetrics && (
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-2 flex items-center gap-2">
<MdMic className="h-4 w-4 text-orange-600 dark:text-orange-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Microphone Input
</span>
</div>
<div className="grid grid-cols-2 gap-3">
<div className="text-center">
<div className="text-2xl font-bold text-orange-600 dark:text-orange-400">
{formatNumber(microphoneMetrics.frames_sent)}
</div>
<div className="text-xs text-slate-500 dark:text-slate-400">
Frames Sent
</div>
</div>
<div className="text-center">
<div className={cx(
"text-2xl font-bold",
microphoneMetrics.frames_dropped > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(microphoneMetrics.frames_dropped)}
</div>
<div className="text-xs text-slate-500 dark:text-slate-400">
Frames Dropped
</div>
</div>
</div>
{/* Microphone Drop Rate */}
<div className="mt-3 rounded-md bg-slate-50 p-2 dark:bg-slate-700">
<div className="flex items-center justify-between">
<span className="text-sm text-slate-600 dark:text-slate-400">
Drop Rate
</span>
<span className={cx(
"font-bold",
(microphoneMetrics.frames_sent > 0 ? (microphoneMetrics.frames_dropped / microphoneMetrics.frames_sent) * 100 : 0) > 5
? "text-red-600 dark:text-red-400"
: (microphoneMetrics.frames_sent > 0 ? (microphoneMetrics.frames_dropped / microphoneMetrics.frames_sent) * 100 : 0) > 1
? "text-yellow-600 dark:text-yellow-400"
: "text-green-600 dark:text-green-400"
)}>
{microphoneMetrics.frames_sent > 0 ? ((microphoneMetrics.frames_dropped / microphoneMetrics.frames_sent) * 100).toFixed(2) : "0.00"}%
</span>
</div>
<div className="mt-1 h-2 w-full rounded-full bg-slate-200 dark:bg-slate-600">
<div
className={cx(
"h-2 rounded-full transition-all duration-300",
(microphoneMetrics.frames_sent > 0 ? (microphoneMetrics.frames_dropped / microphoneMetrics.frames_sent) * 100 : 0) > 5
? "bg-red-500"
: (microphoneMetrics.frames_sent > 0 ? (microphoneMetrics.frames_dropped / microphoneMetrics.frames_sent) * 100 : 0) > 1
? "bg-yellow-500"
: "bg-green-500"
)}
style={{
width: `${Math.min(microphoneMetrics.frames_sent > 0 ? (microphoneMetrics.frames_dropped / microphoneMetrics.frames_sent) * 100 : 0, 100)}%`
}}
/>
</div>
</div>
{/* Microphone Audio Level */}
{isMicrophoneActive && (
<div className="mt-3 rounded-md bg-slate-50 p-2 dark:bg-slate-700">
<AudioLevelMeter
level={audioLevel}
isActive={isMicrophoneActive && !isMicrophoneMuted && isAnalyzing}
size="sm"
showLabel={true}
/>
</div>
)}
</div>
)}
{/* Data Transfer */}
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-2 flex items-center gap-2">
<LuHardDrive className="h-4 w-4 text-blue-600 dark:text-blue-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Data Transfer
</span>
</div>
<div className="text-center">
<div className="text-2xl font-bold text-blue-600 dark:text-blue-400">
{formatBytes(metrics.bytes_processed)}
</div>
<div className="text-xs text-slate-500 dark:text-slate-400">
Total Processed
</div>
</div>
</div>
{/* Connection Health */}
<div className="rounded-lg border border-slate-200 p-3 dark:border-slate-700">
<div className="mb-2 flex items-center gap-2">
<MdSignalWifi4Bar className="h-4 w-4 text-purple-600 dark:text-purple-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Connection Health
</span>
</div>
<div className="space-y-2">
<div className="flex justify-between">
<span className="text-sm text-slate-500 dark:text-slate-400">
Connection Drops:
</span>
<span className={cx(
"font-medium",
metrics.connection_drops > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(metrics.connection_drops)}
</span>
</div>
{metrics.average_latency && (
<div className="flex justify-between">
<span className="text-sm text-slate-500 dark:text-slate-400">
Avg Latency:
</span>
<span className="font-medium text-slate-900 dark:text-slate-100">
{metrics.average_latency}
</span>
</div>
)}
</div>
</div>
</div>
)}
{/* Last Update */}
<div className="flex items-center justify-center gap-2 text-xs text-slate-500 dark:text-slate-400">
<LuClock className="h-3 w-3" />
<span>Last updated: {lastUpdate.toLocaleTimeString()}</span>
</div>
{/* No Data State */}
{!metrics && (
<div className="flex flex-col items-center justify-center py-8 text-center">
<MdError className="h-12 w-12 text-slate-400 dark:text-slate-600" />
<h3 className="mt-2 text-sm font-medium text-slate-900 dark:text-slate-100">
No Audio Data
</h3>
<p className="mt-1 text-sm text-slate-500 dark:text-slate-400">
Audio metrics will appear when audio streaming is active.
</p>
</div>
)}
</div>
);
}

View File

@ -25,7 +25,32 @@ import {
PointerLockBar, PointerLockBar,
} from "./VideoOverlay"; } from "./VideoOverlay";
export default function WebRTCVideo() { // Type for microphone error
interface MicrophoneError {
type: 'permission' | 'device' | 'network' | 'unknown';
message: string;
}
// Interface for microphone hook return type
interface MicrophoneHookReturn {
isMicrophoneActive: boolean;
isMicrophoneMuted: boolean;
microphoneStream: MediaStream | null;
startMicrophone: (deviceId?: string) => Promise<{ success: boolean; error?: MicrophoneError }>;
stopMicrophone: () => Promise<{ success: boolean; error?: MicrophoneError }>;
toggleMicrophoneMute: () => Promise<{ success: boolean; error?: MicrophoneError }>;
syncMicrophoneState: () => Promise<void>;
// Loading states
isStarting: boolean;
isStopping: boolean;
isToggling: boolean;
}
interface WebRTCVideoProps {
microphone: MicrophoneHookReturn;
}
export default function WebRTCVideo({ microphone }: WebRTCVideoProps) {
// Video and stream related refs and states // Video and stream related refs and states
const videoElm = useRef<HTMLVideoElement>(null); const videoElm = useRef<HTMLVideoElement>(null);
const mediaStream = useRTCStore(state => state.mediaStream); const mediaStream = useRTCStore(state => state.mediaStream);
@ -675,7 +700,7 @@ export default function WebRTCVideo() {
disabled={peerConnection?.connectionState !== "connected"} disabled={peerConnection?.connectionState !== "connected"}
className="contents" className="contents"
> >
<Actionbar requestFullscreen={requestFullscreen} /> <Actionbar requestFullscreen={requestFullscreen} microphone={microphone} />
<MacroBar /> <MacroBar />
</fieldset> </fieldset>
</div> </div>
@ -705,7 +730,7 @@ export default function WebRTCVideo() {
controls={false} controls={false}
onPlaying={onVideoPlaying} onPlaying={onVideoPlaying}
onPlay={onVideoPlaying} onPlay={onVideoPlaying}
muted muted={false}
playsInline playsInline
disablePictureInPicture disablePictureInPicture
controlsList="nofullscreen" controlsList="nofullscreen"

View File

@ -0,0 +1,841 @@
import { useEffect, useState } from "react";
import { MdVolumeOff, MdVolumeUp, MdGraphicEq, MdMic, MdMicOff, MdRefresh } from "react-icons/md";
import { LuActivity, LuSettings, LuSignal } from "react-icons/lu";
import { Button } from "@components/Button";
import { AudioLevelMeter } from "@components/AudioLevelMeter";
import { cx } from "@/cva.config";
import { useUiStore } from "@/hooks/stores";
import { useAudioDevices } from "@/hooks/useAudioDevices";
import { useAudioLevel } from "@/hooks/useAudioLevel";
import { useAudioEvents } from "@/hooks/useAudioEvents";
import api from "@/api";
import notifications from "@/notifications";
// Type for microphone error
interface MicrophoneError {
type: 'permission' | 'device' | 'network' | 'unknown';
message: string;
}
// Type for microphone hook return value
interface MicrophoneHookReturn {
isMicrophoneActive: boolean;
isMicrophoneMuted: boolean;
microphoneStream: MediaStream | null;
startMicrophone: (deviceId?: string) => Promise<{ success: boolean; error?: MicrophoneError }>;
stopMicrophone: () => Promise<{ success: boolean; error?: MicrophoneError }>;
toggleMicrophoneMute: () => Promise<{ success: boolean; error?: MicrophoneError }>;
syncMicrophoneState: () => Promise<void>;
// Loading states
isStarting: boolean;
isStopping: boolean;
isToggling: boolean;
}
interface AudioConfig {
Quality: number;
Bitrate: number;
SampleRate: number;
Channels: number;
FrameSize: string;
}
interface AudioMetrics {
frames_received: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
interface MicrophoneMetrics {
frames_sent: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
const qualityLabels = {
0: "Low (32kbps)",
1: "Medium (64kbps)",
2: "High (128kbps)",
3: "Ultra (256kbps)"
};
interface AudioControlPopoverProps {
microphone: MicrophoneHookReturn;
open?: boolean; // whether the popover is open (controls analysis)
}
export default function AudioControlPopover({ microphone, open }: AudioControlPopoverProps) {
const [currentConfig, setCurrentConfig] = useState<AudioConfig | null>(null);
const [currentMicrophoneConfig, setCurrentMicrophoneConfig] = useState<AudioConfig | null>(null);
const [showAdvanced, setShowAdvanced] = useState(false);
const [isLoading, setIsLoading] = useState(false);
// Add cache flags to prevent unnecessary API calls
const [configsLoaded, setConfigsLoaded] = useState(false);
// Add cooldown to prevent rapid clicking
const [lastClickTime, setLastClickTime] = useState(0);
const CLICK_COOLDOWN = 500; // 500ms cooldown between clicks
// Use WebSocket-based audio events for real-time updates
const {
audioMuted,
audioMetrics,
microphoneMetrics,
isConnected: wsConnected
} = useAudioEvents();
// Fallback state for when WebSocket is not connected
const [fallbackMuted, setFallbackMuted] = useState(false);
const [fallbackMetrics, setFallbackMetrics] = useState<AudioMetrics | null>(null);
const [fallbackMicMetrics, setFallbackMicMetrics] = useState<MicrophoneMetrics | null>(null);
const [fallbackConnected, setFallbackConnected] = useState(false);
// Microphone state from props
const {
isMicrophoneActive,
isMicrophoneMuted,
microphoneStream,
startMicrophone,
stopMicrophone,
toggleMicrophoneMute,
syncMicrophoneState,
// Loading states
isStarting,
isStopping,
isToggling,
} = microphone;
// Use WebSocket data when available, fallback to polling data otherwise
const isMuted = wsConnected && audioMuted !== null ? audioMuted : fallbackMuted;
const metrics = wsConnected && audioMetrics !== null ? audioMetrics : fallbackMetrics;
const micMetrics = wsConnected && microphoneMetrics !== null ? microphoneMetrics : fallbackMicMetrics;
const isConnected = wsConnected ? wsConnected : fallbackConnected;
// Audio level monitoring - enable only when popover is open and microphone is active to save resources
const analysisEnabled = (open ?? true) && isMicrophoneActive;
const { audioLevel, isAnalyzing } = useAudioLevel(analysisEnabled ? microphoneStream : null, {
enabled: analysisEnabled,
updateInterval: 120, // 8-10 fps to reduce CPU without losing UX quality
});
// Audio devices
const {
audioInputDevices,
audioOutputDevices,
selectedInputDevice,
selectedOutputDevice,
setSelectedInputDevice,
setSelectedOutputDevice,
isLoading: devicesLoading,
error: devicesError,
refreshDevices
} = useAudioDevices();
const { toggleSidebarView } = useUiStore();
// Load initial configurations once - cache to prevent repeated calls
useEffect(() => {
if (!configsLoaded) {
loadAudioConfigurations();
}
}, [configsLoaded]);
// Optimize fallback polling - only run when WebSocket is not connected
useEffect(() => {
if (!wsConnected && !configsLoaded) {
// Load state once if configs aren't loaded yet
loadAudioState();
}
if (!wsConnected) {
loadAudioMetrics();
loadMicrophoneMetrics();
// Reduced frequency for fallback polling (every 3 seconds instead of 2)
const metricsInterval = setInterval(() => {
if (!wsConnected) { // Double-check to prevent unnecessary requests
loadAudioMetrics();
loadMicrophoneMetrics();
}
}, 3000);
return () => clearInterval(metricsInterval);
}
// Always sync microphone state, but debounce it
const syncTimeout = setTimeout(() => {
syncMicrophoneState();
}, 500);
return () => clearTimeout(syncTimeout);
}, [wsConnected, syncMicrophoneState, configsLoaded]);
const loadAudioConfigurations = async () => {
try {
// Parallel loading for better performance
const [qualityResp, micQualityResp] = await Promise.all([
api.GET("/audio/quality"),
api.GET("/microphone/quality")
]);
if (qualityResp.ok) {
const qualityData = await qualityResp.json();
setCurrentConfig(qualityData.current);
}
if (micQualityResp.ok) {
const micQualityData = await micQualityResp.json();
setCurrentMicrophoneConfig(micQualityData.current);
}
setConfigsLoaded(true);
} catch (error) {
console.error("Failed to load audio configurations:", error);
}
};
const loadAudioState = async () => {
try {
// Load mute state only (configurations are loaded separately)
const muteResp = await api.GET("/audio/mute");
if (muteResp.ok) {
const muteData = await muteResp.json();
setFallbackMuted(!!muteData.muted);
}
} catch (error) {
console.error("Failed to load audio state:", error);
}
};
const loadAudioMetrics = async () => {
try {
const resp = await api.GET("/audio/metrics");
if (resp.ok) {
const data = await resp.json();
setFallbackMetrics(data);
// Consider connected if API call succeeds, regardless of frame count
setFallbackConnected(true);
} else {
setFallbackConnected(false);
}
} catch (error) {
console.error("Failed to load audio metrics:", error);
setFallbackConnected(false);
}
};
const loadMicrophoneMetrics = async () => {
try {
const resp = await api.GET("/microphone/metrics");
if (resp.ok) {
const data = await resp.json();
setFallbackMicMetrics(data);
}
} catch (error) {
console.error("Failed to load microphone metrics:", error);
}
};
const handleToggleMute = async () => {
setIsLoading(true);
try {
const resp = await api.POST("/audio/mute", { muted: !isMuted });
if (resp.ok) {
// WebSocket will handle the state update, but update fallback for immediate feedback
if (!wsConnected) {
setFallbackMuted(!isMuted);
}
}
} catch (error) {
console.error("Failed to toggle mute:", error);
} finally {
setIsLoading(false);
}
};
const handleQualityChange = async (quality: number) => {
setIsLoading(true);
try {
const resp = await api.POST("/audio/quality", { quality });
if (resp.ok) {
const data = await resp.json();
setCurrentConfig(data.config);
}
} catch (error) {
console.error("Failed to change audio quality:", error);
} finally {
setIsLoading(false);
}
};
const handleMicrophoneQualityChange = async (quality: number) => {
try {
const resp = await api.POST("/microphone/quality", { quality });
if (resp.ok) {
const data = await resp.json();
setCurrentMicrophoneConfig(data.config);
}
} catch (error) {
console.error("Failed to change microphone quality:", error);
}
};
const handleToggleMicrophone = async () => {
const now = Date.now();
// Prevent rapid clicking - if any operation is in progress or within cooldown, ignore the click
if (isStarting || isStopping || isToggling || (now - lastClickTime < CLICK_COOLDOWN)) {
console.log("Microphone operation already in progress or within cooldown, ignoring click");
return;
}
setLastClickTime(now);
try {
const result = isMicrophoneActive ? await stopMicrophone() : await startMicrophone(selectedInputDevice);
if (!result.success && result.error) {
notifications.error(result.error.message);
}
} catch (error) {
console.error("Failed to toggle microphone:", error);
notifications.error("An unexpected error occurred");
}
};
const handleToggleMicrophoneMute = async () => {
const now = Date.now();
// Prevent rapid clicking - if any operation is in progress or within cooldown, ignore the click
if (isStarting || isStopping || isToggling || (now - lastClickTime < CLICK_COOLDOWN)) {
console.log("Microphone operation already in progress or within cooldown, ignoring mute toggle");
return;
}
setLastClickTime(now);
try {
const result = await toggleMicrophoneMute();
if (!result.success && result.error) {
notifications.error(result.error.message);
}
} catch (error) {
console.error("Failed to toggle microphone mute:", error);
notifications.error("Failed to toggle microphone mute");
}
};
// Handle microphone device change
const handleMicrophoneDeviceChange = async (deviceId: string) => {
setSelectedInputDevice(deviceId);
// If microphone is currently active, restart it with the new device
if (isMicrophoneActive) {
try {
// Stop current microphone
await stopMicrophone();
// Start with new device
const result = await startMicrophone(deviceId);
if (!result.success && result.error) {
notifications.error(result.error.message);
}
} catch (error) {
console.error("Failed to change microphone device:", error);
notifications.error("Failed to change microphone device");
}
}
};
const handleAudioOutputDeviceChange = async (deviceId: string) => {
setSelectedOutputDevice(deviceId);
// Find the video element and set the audio output device
const videoElement = document.querySelector('video');
if (videoElement && 'setSinkId' in videoElement) {
try {
await (videoElement as HTMLVideoElement & { setSinkId: (deviceId: string) => Promise<void> }).setSinkId(deviceId);
console.log('Audio output device changed to:', deviceId);
} catch (error: unknown) {
console.error('Failed to change audio output device:', error);
}
} else {
console.warn('setSinkId not supported or video element not found');
}
};
const formatBytes = (bytes: number) => {
if (bytes === 0) return "0 B";
const k = 1024;
const sizes = ["B", "KB", "MB", "GB"];
const i = Math.floor(Math.log(bytes) / Math.log(k));
return parseFloat((bytes / Math.pow(k, i)).toFixed(2)) + " " + sizes[i];
};
const formatNumber = (num: number) => {
return new Intl.NumberFormat().format(num);
};
return (
<div className="w-full max-w-md rounded-lg border border-slate-200 bg-white p-4 shadow-lg dark:border-slate-700 dark:bg-slate-800">
<div className="space-y-4">
{/* Header */}
<div className="flex items-center justify-between">
<h3 className="text-lg font-semibold text-slate-900 dark:text-slate-100">
Audio Controls
</h3>
<div className="flex items-center gap-2">
<div className={cx(
"h-2 w-2 rounded-full",
isConnected ? "bg-green-500" : "bg-red-500"
)} />
<span className="text-xs text-slate-500 dark:text-slate-400">
{isConnected ? "Connected" : "Disconnected"}
</span>
</div>
</div>
{/* Mute Control */}
<div className="flex items-center justify-between rounded-lg bg-slate-50 p-3 dark:bg-slate-700">
<div className="flex items-center gap-3">
{isMuted ? (
<MdVolumeOff className="h-5 w-5 text-red-500" />
) : (
<MdVolumeUp className="h-5 w-5 text-green-500" />
)}
<span className="font-medium text-slate-900 dark:text-slate-100">
{isMuted ? "Muted" : "Unmuted"}
</span>
</div>
<Button
size="SM"
theme={isMuted ? "danger" : "primary"}
text={isMuted ? "Unmute" : "Mute"}
onClick={handleToggleMute}
disabled={isLoading}
/>
</div>
{/* Microphone Control */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<MdMic className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Microphone Input
</span>
</div>
<div className="flex items-center justify-between rounded-lg bg-slate-50 p-3 dark:bg-slate-700">
<div className="flex items-center gap-3">
{isMicrophoneActive ? (
isMicrophoneMuted ? (
<MdMicOff className="h-5 w-5 text-yellow-500" />
) : (
<MdMic className="h-5 w-5 text-green-500" />
)
) : (
<MdMicOff className="h-5 w-5 text-red-500" />
)}
<span className="font-medium text-slate-900 dark:text-slate-100">
{!isMicrophoneActive
? "Inactive"
: isMicrophoneMuted
? "Muted"
: "Active"
}
</span>
</div>
<div className="flex gap-2">
<Button
size="SM"
theme={isMicrophoneActive ? "danger" : "primary"}
text={
isStarting ? "Starting..." :
isStopping ? "Stopping..." :
isMicrophoneActive ? "Stop" : "Start"
}
onClick={handleToggleMicrophone}
disabled={isStarting || isStopping || isToggling}
loading={isStarting || isStopping}
/>
{isMicrophoneActive && (
<Button
size="SM"
theme={isMicrophoneMuted ? "danger" : "light"}
text={
isToggling ? (isMicrophoneMuted ? "Unmuting..." : "Muting...") :
isMicrophoneMuted ? "Unmute" : "Mute"
}
onClick={handleToggleMicrophoneMute}
disabled={isStarting || isStopping || isToggling}
loading={isToggling}
/>
)}
</div>
</div>
{/* Audio Level Meter */}
{isMicrophoneActive && (
<div className="rounded-lg bg-slate-50 p-3 dark:bg-slate-700">
<AudioLevelMeter
level={audioLevel}
isActive={isMicrophoneActive && !isMicrophoneMuted && isAnalyzing}
size="md"
showLabel={true}
/>
{/* Debug information */}
<div className="mt-2 text-xs text-slate-500 dark:text-slate-400">
<div className="grid grid-cols-2 gap-1">
<span>Stream: {microphoneStream ? '✓' : '✗'}</span>
<span>Analyzing: {isAnalyzing ? '✓' : '✗'}</span>
<span>Active: {isMicrophoneActive ? '✓' : '✗'}</span>
<span>Muted: {isMicrophoneMuted ? '✓' : '✗'}</span>
</div>
{microphoneStream && (
<div className="mt-1">
Tracks: {microphoneStream.getAudioTracks().length}
{microphoneStream.getAudioTracks().length > 0 && (
<span className="ml-2">
(Enabled: {microphoneStream.getAudioTracks().filter((t: MediaStreamTrack) => t.enabled).length})
</span>
)}
</div>
)}
<button
onClick={syncMicrophoneState}
className="mt-1 text-blue-500 hover:text-blue-600 dark:text-blue-400 dark:hover:text-blue-300"
>
Sync State
</button>
</div>
</div>
)}
</div>
{/* Device Selection */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<MdMic className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Devices
</span>
{devicesLoading && (
<div className="h-3 w-3 animate-spin rounded-full border border-slate-300 border-t-slate-600 dark:border-slate-600 dark:border-t-slate-300" />
)}
</div>
{devicesError && (
<div className="rounded-md bg-red-50 p-2 text-xs text-red-600 dark:bg-red-900/20 dark:text-red-400">
{devicesError}
</div>
)}
{/* Microphone Selection */}
<div className="space-y-2">
<label className="text-sm font-medium text-slate-700 dark:text-slate-300">
Microphone
</label>
<select
value={selectedInputDevice}
onChange={(e) => handleMicrophoneDeviceChange(e.target.value)}
disabled={devicesLoading}
className="w-full rounded-md border border-slate-200 bg-white px-3 py-2 text-sm text-slate-700 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 disabled:bg-slate-50 disabled:text-slate-500 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:focus:border-blue-400 dark:disabled:bg-slate-800"
>
{audioInputDevices.map((device) => (
<option key={device.deviceId} value={device.deviceId}>
{device.label}
</option>
))}
</select>
{isMicrophoneActive && (
<p className="text-xs text-slate-500 dark:text-slate-400">
Changing device will restart the microphone
</p>
)}
</div>
{/* Speaker Selection */}
<div className="space-y-2">
<label className="text-sm font-medium text-slate-700 dark:text-slate-300">
Speaker
</label>
<select
value={selectedOutputDevice}
onChange={(e) => handleAudioOutputDeviceChange(e.target.value)}
disabled={devicesLoading}
className="w-full rounded-md border border-slate-200 bg-white px-3 py-2 text-sm text-slate-700 focus:border-blue-500 focus:outline-none focus:ring-1 focus:ring-blue-500 disabled:bg-slate-50 disabled:text-slate-500 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:focus:border-blue-400 dark:disabled:bg-slate-800"
>
{audioOutputDevices.map((device) => (
<option key={device.deviceId} value={device.deviceId}>
{device.label}
</option>
))}
</select>
</div>
<button
onClick={refreshDevices}
disabled={devicesLoading}
className="flex w-full items-center justify-center gap-2 rounded-md border border-slate-200 px-3 py-2 text-sm font-medium text-slate-700 hover:bg-slate-50 disabled:opacity-50 dark:border-slate-600 dark:text-slate-300 dark:hover:bg-slate-700"
>
<MdRefresh className={cx("h-4 w-4", devicesLoading && "animate-spin")} />
Refresh Devices
</button>
</div>
{/* Microphone Quality Settings */}
{isMicrophoneActive && (
<div className="space-y-3">
<div className="flex items-center gap-2">
<MdMic className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Microphone Quality
</span>
</div>
<div className="grid grid-cols-2 gap-2">
{Object.entries(qualityLabels).map(([quality, label]) => (
<button
key={`mic-${quality}`}
onClick={() => handleMicrophoneQualityChange(parseInt(quality))}
disabled={isStarting || isStopping || isToggling}
className={cx(
"rounded-md border px-3 py-2 text-sm font-medium transition-colors",
currentMicrophoneConfig?.Quality === parseInt(quality)
? "border-green-500 bg-green-50 text-green-700 dark:bg-green-900/20 dark:text-green-300"
: "border-slate-200 bg-white text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600",
(isStarting || isStopping || isToggling) && "opacity-50 cursor-not-allowed"
)}
>
{label}
</button>
))}
</div>
{currentMicrophoneConfig && (
<div className="rounded-md bg-green-50 p-2 text-xs text-green-600 dark:bg-green-900/20 dark:text-green-400">
<div className="grid grid-cols-2 gap-1">
<span>Sample Rate: {currentMicrophoneConfig.SampleRate}Hz</span>
<span>Channels: {currentMicrophoneConfig.Channels}</span>
<span>Bitrate: {currentMicrophoneConfig.Bitrate}kbps</span>
<span>Frame: {currentMicrophoneConfig.FrameSize}</span>
</div>
</div>
)}
</div>
)}
{/* Quality Settings */}
<div className="space-y-3">
<div className="flex items-center gap-2">
<MdGraphicEq className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Audio Output Quality
</span>
</div>
<div className="grid grid-cols-2 gap-2">
{Object.entries(qualityLabels).map(([quality, label]) => (
<button
key={quality}
onClick={() => handleQualityChange(parseInt(quality))}
disabled={isLoading}
className={cx(
"rounded-md border px-3 py-2 text-sm font-medium transition-colors",
currentConfig?.Quality === parseInt(quality)
? "border-blue-500 bg-blue-50 text-blue-700 dark:bg-blue-900/20 dark:text-blue-300"
: "border-slate-200 bg-white text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600",
isLoading && "opacity-50 cursor-not-allowed"
)}
>
{label}
</button>
))}
</div>
{currentConfig && (
<div className="rounded-md bg-slate-50 p-2 text-xs text-slate-600 dark:bg-slate-700 dark:text-slate-400">
<div className="grid grid-cols-2 gap-1">
<span>Sample Rate: {currentConfig.SampleRate}Hz</span>
<span>Channels: {currentConfig.Channels}</span>
<span>Bitrate: {currentConfig.Bitrate}kbps</span>
<span>Frame: {currentConfig.FrameSize}</span>
</div>
</div>
)}
</div>
{/* Advanced Controls Toggle */}
<button
onClick={() => setShowAdvanced(!showAdvanced)}
className="flex w-full items-center justify-between rounded-md border border-slate-200 p-2 text-sm font-medium text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:text-slate-300 dark:hover:bg-slate-700"
>
<div className="flex items-center gap-2">
<LuSettings className="h-4 w-4" />
<span>Advanced Metrics</span>
</div>
<span className={cx(
"transition-transform",
showAdvanced ? "rotate-180" : "rotate-0"
)}>
</span>
</button>
{/* Advanced Metrics */}
{showAdvanced && (
<div className="space-y-3 rounded-lg border border-slate-200 p-3 dark:border-slate-600">
<div className="flex items-center gap-2">
<LuActivity className="h-4 w-4 text-slate-600 dark:text-slate-400" />
<span className="font-medium text-slate-900 dark:text-slate-100">
Performance Metrics
</span>
</div>
{metrics ? (
<>
<div className="mb-4">
<h4 className="text-sm font-medium text-slate-700 dark:text-slate-300 mb-2">Audio Output</h4>
<div className="grid grid-cols-2 gap-3 text-xs">
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Frames Received</div>
<div className="font-mono text-green-600 dark:text-green-400">
{formatNumber(metrics.frames_received)}
</div>
</div>
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Frames Dropped</div>
<div className={cx(
"font-mono",
metrics.frames_dropped > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(metrics.frames_dropped)}
</div>
</div>
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Data Processed</div>
<div className="font-mono text-blue-600 dark:text-blue-400">
{formatBytes(metrics.bytes_processed)}
</div>
</div>
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Connection Drops</div>
<div className={cx(
"font-mono",
metrics.connection_drops > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(metrics.connection_drops)}
</div>
</div>
</div>
</div>
{micMetrics && (
<div className="mb-4">
<h4 className="text-sm font-medium text-slate-700 dark:text-slate-300 mb-2">Microphone Input</h4>
<div className="grid grid-cols-2 gap-3 text-xs">
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Frames Sent</div>
<div className="font-mono text-green-600 dark:text-green-400">
{formatNumber(micMetrics.frames_sent)}
</div>
</div>
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Frames Dropped</div>
<div className={cx(
"font-mono",
micMetrics.frames_dropped > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(micMetrics.frames_dropped)}
</div>
</div>
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Data Processed</div>
<div className="font-mono text-blue-600 dark:text-blue-400">
{formatBytes(micMetrics.bytes_processed)}
</div>
</div>
<div className="space-y-1">
<div className="text-slate-500 dark:text-slate-400">Connection Drops</div>
<div className={cx(
"font-mono",
micMetrics.connection_drops > 0
? "text-red-600 dark:text-red-400"
: "text-green-600 dark:text-green-400"
)}>
{formatNumber(micMetrics.connection_drops)}
</div>
</div>
</div>
</div>
)}
{metrics.frames_received > 0 && (
<div className="mt-3 rounded-md bg-slate-50 p-2 dark:bg-slate-700">
<div className="text-xs text-slate-500 dark:text-slate-400">Drop Rate</div>
<div className={cx(
"font-mono text-sm",
((metrics.frames_dropped / metrics.frames_received) * 100) > 5
? "text-red-600 dark:text-red-400"
: ((metrics.frames_dropped / metrics.frames_received) * 100) > 1
? "text-yellow-600 dark:text-yellow-400"
: "text-green-600 dark:text-green-400"
)}>
{((metrics.frames_dropped / metrics.frames_received) * 100).toFixed(2)}%
</div>
</div>
)}
<div className="text-xs text-slate-500 dark:text-slate-400">
Last updated: {new Date().toLocaleTimeString()}
</div>
</>
) : (
<div className="text-center py-4">
<div className="text-sm text-slate-500 dark:text-slate-400">
Loading metrics...
</div>
</div>
)}
</div>
)}
{/* Audio Metrics Dashboard Button */}
<div className="pt-2 border-t border-slate-200 dark:border-slate-600">
<div className="flex justify-center">
<button
onClick={() => {
toggleSidebarView("audio-metrics");
}}
className="flex items-center gap-2 rounded-md border border-slate-200 bg-white px-4 py-2 text-sm font-medium text-slate-700 hover:bg-slate-50 dark:border-slate-600 dark:bg-slate-700 dark:text-slate-300 dark:hover:bg-slate-600 transition-colors"
>
<LuSignal className="h-4 w-4 text-blue-500" />
<span>View Full Audio Metrics</span>
</button>
</div>
</div>
</div>
</div>
);
}

View File

@ -0,0 +1,16 @@
import SidebarHeader from "@/components/SidebarHeader";
import { useUiStore } from "@/hooks/stores";
import AudioMetricsDashboard from "@/components/AudioMetricsDashboard";
export default function AudioMetricsSidebar() {
const setSidebarView = useUiStore(state => state.setSidebarView);
return (
<>
<SidebarHeader title="Audio Metrics" setSidebarView={setSidebarView} />
<div className="h-full overflow-y-scroll bg-white px-4 py-2 pb-8 dark:bg-slate-900">
<AudioMetricsDashboard />
</div>
</>
);
}

View File

@ -38,7 +38,7 @@ const appendStatToMap = <T extends { timestamp: number }>(
}; };
// Constants and types // Constants and types
export type AvailableSidebarViews = "connection-stats"; export type AvailableSidebarViews = "connection-stats" | "audio-metrics";
export type AvailableTerminalTypes = "kvm" | "serial" | "none"; export type AvailableTerminalTypes = "kvm" | "serial" | "none";
export interface User { export interface User {
@ -117,6 +117,16 @@ interface RTCState {
mediaStream: MediaStream | null; mediaStream: MediaStream | null;
setMediaStream: (stream: MediaStream) => void; setMediaStream: (stream: MediaStream) => void;
// Microphone stream management
microphoneStream: MediaStream | null;
setMicrophoneStream: (stream: MediaStream | null) => void;
microphoneSender: RTCRtpSender | null;
setMicrophoneSender: (sender: RTCRtpSender | null) => void;
isMicrophoneActive: boolean;
setMicrophoneActive: (active: boolean) => void;
isMicrophoneMuted: boolean;
setMicrophoneMuted: (muted: boolean) => void;
videoStreamStats: RTCInboundRtpStreamStats | null; videoStreamStats: RTCInboundRtpStreamStats | null;
appendVideoStreamStats: (state: RTCInboundRtpStreamStats) => void; appendVideoStreamStats: (state: RTCInboundRtpStreamStats) => void;
videoStreamStatsHistory: Map<number, RTCInboundRtpStreamStats>; videoStreamStatsHistory: Map<number, RTCInboundRtpStreamStats>;
@ -166,6 +176,16 @@ export const useRTCStore = create<RTCState>(set => ({
mediaStream: null, mediaStream: null,
setMediaStream: stream => set({ mediaStream: stream }), setMediaStream: stream => set({ mediaStream: stream }),
// Microphone stream management
microphoneStream: null,
setMicrophoneStream: stream => set({ microphoneStream: stream }),
microphoneSender: null,
setMicrophoneSender: sender => set({ microphoneSender: sender }),
isMicrophoneActive: false,
setMicrophoneActive: active => set({ isMicrophoneActive: active }),
isMicrophoneMuted: false,
setMicrophoneMuted: muted => set({ isMicrophoneMuted: muted }),
videoStreamStats: null, videoStreamStats: null,
appendVideoStreamStats: stats => set({ videoStreamStats: stats }), appendVideoStreamStats: stats => set({ videoStreamStats: stats }),
videoStreamStatsHistory: new Map(), videoStreamStatsHistory: new Map(),

View File

@ -0,0 +1,107 @@
import { useState, useEffect, useCallback } from 'react';
export interface AudioDevice {
deviceId: string;
label: string;
kind: 'audioinput' | 'audiooutput';
}
export interface UseAudioDevicesReturn {
audioInputDevices: AudioDevice[];
audioOutputDevices: AudioDevice[];
selectedInputDevice: string;
selectedOutputDevice: string;
isLoading: boolean;
error: string | null;
refreshDevices: () => Promise<void>;
setSelectedInputDevice: (deviceId: string) => void;
setSelectedOutputDevice: (deviceId: string) => void;
}
export function useAudioDevices(): UseAudioDevicesReturn {
const [audioInputDevices, setAudioInputDevices] = useState<AudioDevice[]>([]);
const [audioOutputDevices, setAudioOutputDevices] = useState<AudioDevice[]>([]);
const [selectedInputDevice, setSelectedInputDevice] = useState<string>('default');
const [selectedOutputDevice, setSelectedOutputDevice] = useState<string>('default');
const [isLoading, setIsLoading] = useState(false);
const [error, setError] = useState<string | null>(null);
const refreshDevices = useCallback(async () => {
setIsLoading(true);
setError(null);
try {
// Request permissions first to get device labels
await navigator.mediaDevices.getUserMedia({ audio: true });
const devices = await navigator.mediaDevices.enumerateDevices();
const inputDevices: AudioDevice[] = [
{ deviceId: 'default', label: 'Default Microphone', kind: 'audioinput' }
];
const outputDevices: AudioDevice[] = [
{ deviceId: 'default', label: 'Default Speaker', kind: 'audiooutput' }
];
devices.forEach(device => {
if (device.kind === 'audioinput' && device.deviceId !== 'default') {
inputDevices.push({
deviceId: device.deviceId,
label: device.label || `Microphone ${device.deviceId.slice(0, 8)}`,
kind: 'audioinput'
});
} else if (device.kind === 'audiooutput' && device.deviceId !== 'default') {
outputDevices.push({
deviceId: device.deviceId,
label: device.label || `Speaker ${device.deviceId.slice(0, 8)}`,
kind: 'audiooutput'
});
}
});
setAudioInputDevices(inputDevices);
setAudioOutputDevices(outputDevices);
console.log('Audio devices enumerated:', {
inputs: inputDevices.length,
outputs: outputDevices.length
});
} catch (err) {
console.error('Failed to enumerate audio devices:', err);
setError(err instanceof Error ? err.message : 'Failed to access audio devices');
} finally {
setIsLoading(false);
}
}, []);
// Listen for device changes
useEffect(() => {
const handleDeviceChange = () => {
console.log('Audio devices changed, refreshing...');
refreshDevices();
};
navigator.mediaDevices.addEventListener('devicechange', handleDeviceChange);
// Initial load
refreshDevices();
return () => {
navigator.mediaDevices.removeEventListener('devicechange', handleDeviceChange);
};
}, [refreshDevices]);
return {
audioInputDevices,
audioOutputDevices,
selectedInputDevice,
selectedOutputDevice,
isLoading,
error,
refreshDevices,
setSelectedInputDevice,
setSelectedOutputDevice,
};
}

View File

@ -0,0 +1,282 @@
import { useCallback, useEffect, useRef, useState } from 'react';
import useWebSocket, { ReadyState } from 'react-use-websocket';
// Audio event types matching the backend
export type AudioEventType =
| 'audio-mute-changed'
| 'audio-metrics-update'
| 'microphone-state-changed'
| 'microphone-metrics-update';
// Audio event data interfaces
export interface AudioMuteData {
muted: boolean;
}
export interface AudioMetricsData {
frames_received: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
export interface MicrophoneStateData {
running: boolean;
session_active: boolean;
}
export interface MicrophoneMetricsData {
frames_sent: number;
frames_dropped: number;
bytes_processed: number;
last_frame_time: string;
connection_drops: number;
average_latency: string;
}
// Audio event structure
export interface AudioEvent {
type: AudioEventType;
data: AudioMuteData | AudioMetricsData | MicrophoneStateData | MicrophoneMetricsData;
}
// Hook return type
export interface UseAudioEventsReturn {
// Connection state
connectionState: ReadyState;
isConnected: boolean;
// Audio state
audioMuted: boolean | null;
audioMetrics: AudioMetricsData | null;
// Microphone state
microphoneState: MicrophoneStateData | null;
microphoneMetrics: MicrophoneMetricsData | null;
// Manual subscription control
subscribe: () => void;
unsubscribe: () => void;
}
// Global subscription management to prevent multiple subscriptions per WebSocket connection
let globalSubscriptionState = {
isSubscribed: false,
subscriberCount: 0,
connectionId: null as string | null
};
export function useAudioEvents(): UseAudioEventsReturn {
// State for audio data
const [audioMuted, setAudioMuted] = useState<boolean | null>(null);
const [audioMetrics, setAudioMetrics] = useState<AudioMetricsData | null>(null);
const [microphoneState, setMicrophoneState] = useState<MicrophoneStateData | null>(null);
const [microphoneMetrics, setMicrophoneMetricsData] = useState<MicrophoneMetricsData | null>(null);
// Local subscription state
const [isLocallySubscribed, setIsLocallySubscribed] = useState(false);
const subscriptionTimeoutRef = useRef<number | null>(null);
// Get WebSocket URL
const getWebSocketUrl = () => {
const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:';
const host = window.location.host;
return `${protocol}//${host}/webrtc/signaling/client`;
};
// Shared WebSocket connection using the `share` option for better resource management
const {
sendMessage,
lastMessage,
readyState,
} = useWebSocket(getWebSocketUrl(), {
shouldReconnect: () => true,
reconnectAttempts: 10,
reconnectInterval: 3000,
share: true, // Share the WebSocket connection across multiple hooks
onOpen: () => {
console.log('[AudioEvents] WebSocket connected');
// Reset global state on new connection
globalSubscriptionState.isSubscribed = false;
globalSubscriptionState.connectionId = Math.random().toString(36);
},
onClose: () => {
console.log('[AudioEvents] WebSocket disconnected');
// Reset global state on disconnect
globalSubscriptionState.isSubscribed = false;
globalSubscriptionState.subscriberCount = 0;
globalSubscriptionState.connectionId = null;
},
onError: (event) => {
console.error('[AudioEvents] WebSocket error:', event);
},
});
// Subscribe to audio events
const subscribe = useCallback(() => {
if (readyState === ReadyState.OPEN && !globalSubscriptionState.isSubscribed) {
// Clear any pending subscription timeout
if (subscriptionTimeoutRef.current) {
clearTimeout(subscriptionTimeoutRef.current);
subscriptionTimeoutRef.current = null;
}
// Add a small delay to prevent rapid subscription attempts
subscriptionTimeoutRef.current = setTimeout(() => {
if (readyState === ReadyState.OPEN && !globalSubscriptionState.isSubscribed) {
const subscribeMessage = {
type: 'subscribe-audio-events',
data: {}
};
sendMessage(JSON.stringify(subscribeMessage));
globalSubscriptionState.isSubscribed = true;
console.log('[AudioEvents] Subscribed to audio events');
}
}, 100); // 100ms delay to debounce subscription attempts
}
// Track local subscription regardless of global state
if (!isLocallySubscribed) {
globalSubscriptionState.subscriberCount++;
setIsLocallySubscribed(true);
}
}, [readyState, sendMessage, isLocallySubscribed]);
// Unsubscribe from audio events
const unsubscribe = useCallback(() => {
// Clear any pending subscription timeout
if (subscriptionTimeoutRef.current) {
clearTimeout(subscriptionTimeoutRef.current);
subscriptionTimeoutRef.current = null;
}
if (isLocallySubscribed) {
globalSubscriptionState.subscriberCount--;
setIsLocallySubscribed(false);
// Only send unsubscribe message if this is the last subscriber and connection is still open
if (globalSubscriptionState.subscriberCount <= 0 &&
readyState === ReadyState.OPEN &&
globalSubscriptionState.isSubscribed) {
const unsubscribeMessage = {
type: 'unsubscribe-audio-events',
data: {}
};
sendMessage(JSON.stringify(unsubscribeMessage));
globalSubscriptionState.isSubscribed = false;
globalSubscriptionState.subscriberCount = 0;
console.log('[AudioEvents] Sent unsubscribe message to backend');
}
}
console.log('[AudioEvents] Component unsubscribed from audio events');
}, [readyState, isLocallySubscribed, sendMessage]);
// Handle incoming messages
useEffect(() => {
if (lastMessage !== null) {
try {
const message = JSON.parse(lastMessage.data);
// Handle audio events
if (message.type && message.data) {
const audioEvent = message as AudioEvent;
switch (audioEvent.type) {
case 'audio-mute-changed': {
const muteData = audioEvent.data as AudioMuteData;
setAudioMuted(muteData.muted);
console.log('[AudioEvents] Audio mute changed:', muteData.muted);
break;
}
case 'audio-metrics-update': {
const audioMetricsData = audioEvent.data as AudioMetricsData;
setAudioMetrics(audioMetricsData);
break;
}
case 'microphone-state-changed': {
const micStateData = audioEvent.data as MicrophoneStateData;
setMicrophoneState(micStateData);
console.log('[AudioEvents] Microphone state changed:', micStateData);
break;
}
case 'microphone-metrics-update': {
const micMetricsData = audioEvent.data as MicrophoneMetricsData;
setMicrophoneMetricsData(micMetricsData);
break;
}
default:
// Ignore other message types (WebRTC signaling, etc.)
break;
}
}
} catch (error) {
// Ignore parsing errors for non-JSON messages (like "pong")
if (lastMessage.data !== 'pong') {
console.warn('[AudioEvents] Failed to parse WebSocket message:', error);
}
}
}
}, [lastMessage]);
// Auto-subscribe when connected
useEffect(() => {
if (readyState === ReadyState.OPEN) {
subscribe();
}
// Cleanup subscription on component unmount or connection change
return () => {
if (subscriptionTimeoutRef.current) {
clearTimeout(subscriptionTimeoutRef.current);
subscriptionTimeoutRef.current = null;
}
unsubscribe();
};
}, [readyState, subscribe, unsubscribe]);
// Reset local subscription state on disconnect
useEffect(() => {
if (readyState === ReadyState.CLOSED || readyState === ReadyState.CLOSING) {
setIsLocallySubscribed(false);
if (subscriptionTimeoutRef.current) {
clearTimeout(subscriptionTimeoutRef.current);
subscriptionTimeoutRef.current = null;
}
}
}, [readyState]);
// Cleanup on component unmount
useEffect(() => {
return () => {
unsubscribe();
};
}, [unsubscribe]);
return {
// Connection state
connectionState: readyState,
isConnected: readyState === ReadyState.OPEN && globalSubscriptionState.isSubscribed,
// Audio state
audioMuted,
audioMetrics,
// Microphone state
microphoneState,
microphoneMetrics: microphoneMetrics,
// Manual subscription control
subscribe,
unsubscribe,
};
}

View File

@ -0,0 +1,134 @@
import { useEffect, useRef, useState } from 'react';
interface AudioLevelHookResult {
audioLevel: number; // 0-100 percentage
isAnalyzing: boolean;
}
interface AudioLevelOptions {
enabled?: boolean; // Allow external control of analysis
updateInterval?: number; // Throttle updates (default: 100ms for 10fps instead of 60fps)
}
export const useAudioLevel = (
stream: MediaStream | null,
options: AudioLevelOptions = {}
): AudioLevelHookResult => {
const { enabled = true, updateInterval = 100 } = options;
const [audioLevel, setAudioLevel] = useState(0);
const [isAnalyzing, setIsAnalyzing] = useState(false);
const audioContextRef = useRef<AudioContext | null>(null);
const analyserRef = useRef<AnalyserNode | null>(null);
const sourceRef = useRef<MediaStreamAudioSourceNode | null>(null);
const intervalRef = useRef<number | null>(null);
const lastUpdateTimeRef = useRef<number>(0);
useEffect(() => {
if (!stream || !enabled) {
// Clean up when stream is null or disabled
if (intervalRef.current !== null) {
clearInterval(intervalRef.current);
intervalRef.current = null;
}
if (sourceRef.current) {
sourceRef.current.disconnect();
sourceRef.current = null;
}
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
analyserRef.current = null;
setIsAnalyzing(false);
setAudioLevel(0);
return;
}
const audioTracks = stream.getAudioTracks();
if (audioTracks.length === 0) {
setIsAnalyzing(false);
setAudioLevel(0);
return;
}
try {
// Create audio context and analyser
const audioContext = new (window.AudioContext || (window as Window & { webkitAudioContext?: typeof AudioContext }).webkitAudioContext)();
const analyser = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(stream);
// Configure analyser - use smaller FFT for better performance
analyser.fftSize = 128; // Reduced from 256 for better performance
analyser.smoothingTimeConstant = 0.8;
// Connect nodes
source.connect(analyser);
// Store references
audioContextRef.current = audioContext;
analyserRef.current = analyser;
sourceRef.current = source;
const dataArray = new Uint8Array(analyser.frequencyBinCount);
const updateLevel = () => {
if (!analyserRef.current) return;
const now = performance.now();
// Throttle updates to reduce CPU usage
if (now - lastUpdateTimeRef.current < updateInterval) {
return;
}
lastUpdateTimeRef.current = now;
analyserRef.current.getByteFrequencyData(dataArray);
// Optimized RMS calculation - process only relevant frequency bands
let sum = 0;
const relevantBins = Math.min(dataArray.length, 32); // Focus on lower frequencies for voice
for (let i = 0; i < relevantBins; i++) {
const value = dataArray[i];
sum += value * value;
}
const rms = Math.sqrt(sum / relevantBins);
// Convert to percentage (0-100) with better scaling
const level = Math.min(100, Math.max(0, (rms / 180) * 100)); // Adjusted scaling for better sensitivity
setAudioLevel(Math.round(level));
};
setIsAnalyzing(true);
// Use setInterval instead of requestAnimationFrame for more predictable timing
intervalRef.current = window.setInterval(updateLevel, updateInterval);
} catch (error) {
console.error('Failed to create audio level analyzer:', error);
setIsAnalyzing(false);
setAudioLevel(0);
}
// Cleanup function
return () => {
if (intervalRef.current !== null) {
clearInterval(intervalRef.current);
intervalRef.current = null;
}
if (sourceRef.current) {
sourceRef.current.disconnect();
sourceRef.current = null;
}
if (audioContextRef.current) {
audioContextRef.current.close();
audioContextRef.current = null;
}
analyserRef.current = null;
setIsAnalyzing(false);
setAudioLevel(0);
};
}, [stream, enabled, updateInterval]);
return { audioLevel, isAnalyzing };
};

View File

@ -0,0 +1,960 @@
import { useCallback, useEffect, useRef, useState } from "react";
import { useRTCStore } from "@/hooks/stores";
import api from "@/api";
export interface MicrophoneError {
type: 'permission' | 'device' | 'network' | 'unknown';
message: string;
}
export function useMicrophone() {
const {
peerConnection,
microphoneStream,
setMicrophoneStream,
microphoneSender,
setMicrophoneSender,
isMicrophoneActive,
setMicrophoneActive,
isMicrophoneMuted,
setMicrophoneMuted,
} = useRTCStore();
const microphoneStreamRef = useRef<MediaStream | null>(null);
// Loading states
const [isStarting, setIsStarting] = useState(false);
const [isStopping, setIsStopping] = useState(false);
const [isToggling, setIsToggling] = useState(false);
// Add debouncing refs to prevent rapid operations
const lastOperationRef = useRef<number>(0);
const operationTimeoutRef = useRef<number | null>(null);
const OPERATION_DEBOUNCE_MS = 1000; // 1 second debounce
// Debounced operation wrapper
const debouncedOperation = useCallback((operation: () => Promise<void>, operationType: string) => {
const now = Date.now();
const timeSinceLastOp = now - lastOperationRef.current;
if (timeSinceLastOp < OPERATION_DEBOUNCE_MS) {
console.log(`Debouncing ${operationType} operation - too soon (${timeSinceLastOp}ms since last)`);
return;
}
// Clear any pending operation
if (operationTimeoutRef.current) {
clearTimeout(operationTimeoutRef.current);
operationTimeoutRef.current = null;
}
lastOperationRef.current = now;
operation().catch(error => {
console.error(`Debounced ${operationType} operation failed:`, error);
});
}, []);
// Cleanup function to stop microphone stream
const stopMicrophoneStream = useCallback(async () => {
console.log("stopMicrophoneStream called - cleaning up stream");
console.trace("stopMicrophoneStream call stack");
if (microphoneStreamRef.current) {
console.log("Stopping microphone stream:", microphoneStreamRef.current.id);
microphoneStreamRef.current.getTracks().forEach(track => {
track.stop();
});
microphoneStreamRef.current = null;
setMicrophoneStream(null);
console.log("Microphone stream cleared from ref and store");
} else {
console.log("No microphone stream to stop");
}
if (microphoneSender && peerConnection) {
// Instead of removing the track, replace it with null to keep the transceiver
try {
await microphoneSender.replaceTrack(null);
} catch (error) {
console.warn("Failed to replace track with null:", error);
// Fallback to removing the track
peerConnection.removeTrack(microphoneSender);
}
setMicrophoneSender(null);
}
setMicrophoneActive(false);
setMicrophoneMuted(false);
}, [microphoneSender, peerConnection, setMicrophoneStream, setMicrophoneSender, setMicrophoneActive, setMicrophoneMuted]);
// Debug function to check current state (can be called from browser console)
const debugMicrophoneState = useCallback(() => {
const refStream = microphoneStreamRef.current;
const state = {
isMicrophoneActive,
isMicrophoneMuted,
streamInRef: !!refStream,
streamInStore: !!microphoneStream,
senderInStore: !!microphoneSender,
streamId: refStream?.id,
storeStreamId: microphoneStream?.id,
audioTracks: refStream?.getAudioTracks().length || 0,
storeAudioTracks: microphoneStream?.getAudioTracks().length || 0,
audioTrackDetails: refStream?.getAudioTracks().map(track => ({
id: track.id,
label: track.label,
enabled: track.enabled,
readyState: track.readyState,
muted: track.muted
})) || [],
peerConnectionState: peerConnection ? {
connectionState: peerConnection.connectionState,
iceConnectionState: peerConnection.iceConnectionState,
signalingState: peerConnection.signalingState
} : "No peer connection",
streamMatch: refStream === microphoneStream
};
console.log("Microphone Debug State:", state);
// Also check if streams are active
if (refStream) {
console.log("Ref stream active tracks:", refStream.getAudioTracks().filter(t => t.readyState === 'live').length);
}
if (microphoneStream && microphoneStream !== refStream) {
console.log("Store stream active tracks:", microphoneStream.getAudioTracks().filter(t => t.readyState === 'live').length);
}
return state;
}, [isMicrophoneActive, isMicrophoneMuted, microphoneStream, microphoneSender, peerConnection]);
// Make debug function available globally for console access
useEffect(() => {
(window as Window & { debugMicrophoneState?: () => unknown }).debugMicrophoneState = debugMicrophoneState;
return () => {
delete (window as Window & { debugMicrophoneState?: () => unknown }).debugMicrophoneState;
};
}, [debugMicrophoneState]);
const lastSyncRef = useRef<number>(0);
const isStartingRef = useRef<boolean>(false); // Track if we're in the middle of starting
const syncMicrophoneState = useCallback(async () => {
// Debounce sync calls to prevent race conditions
const now = Date.now();
if (now - lastSyncRef.current < 1000) { // Increased debounce time
console.log("Skipping sync - too frequent");
return;
}
lastSyncRef.current = now;
// Don't sync if we're in the middle of starting the microphone
if (isStartingRef.current) {
console.log("Skipping sync - microphone is starting");
return;
}
try {
const response = await api.GET("/microphone/status", {});
if (response.ok) {
const data = await response.json();
const backendRunning = data.running;
// Only sync if there's a significant state difference and we're not in a transition
if (backendRunning !== isMicrophoneActive) {
console.info(`Syncing microphone state: backend=${backendRunning}, frontend=${isMicrophoneActive}`);
// If backend is running but frontend thinks it's not, just update frontend state
if (backendRunning && !isMicrophoneActive) {
console.log("Backend running, updating frontend state to active");
setMicrophoneActive(true);
}
// If backend is not running but frontend thinks it is, clean up and update state
else if (!backendRunning && isMicrophoneActive) {
console.log("Backend not running, cleaning up frontend state");
setMicrophoneActive(false);
// Only clean up stream if we actually have one
if (microphoneStreamRef.current) {
console.log("Cleaning up orphaned stream");
await stopMicrophoneStream();
}
}
}
}
} catch (error) {
console.warn("Failed to sync microphone state:", error);
}
}, [isMicrophoneActive, setMicrophoneActive, stopMicrophoneStream]);
// Start microphone stream
const startMicrophone = useCallback(async (deviceId?: string): Promise<{ success: boolean; error?: MicrophoneError }> => {
// Prevent multiple simultaneous start operations
if (isStarting || isStopping || isToggling) {
console.log("Microphone operation already in progress, skipping start");
return { success: false, error: { type: 'unknown', message: 'Operation already in progress' } };
}
setIsStarting(true);
try {
// Set flag to prevent sync during startup
isStartingRef.current = true;
// Request microphone permission and get stream
const audioConstraints: MediaTrackConstraints = {
echoCancellation: true,
noiseSuppression: true,
autoGainControl: true,
sampleRate: 48000,
channelCount: 1,
};
// Add device ID if specified
if (deviceId && deviceId !== 'default') {
audioConstraints.deviceId = { exact: deviceId };
}
console.log("Requesting microphone with constraints:", audioConstraints);
const stream = await navigator.mediaDevices.getUserMedia({
audio: audioConstraints
});
console.log("Microphone stream created successfully:", {
streamId: stream.id,
audioTracks: stream.getAudioTracks().length,
videoTracks: stream.getVideoTracks().length,
audioTrackDetails: stream.getAudioTracks().map(track => ({
id: track.id,
label: track.label,
enabled: track.enabled,
readyState: track.readyState
}))
});
// Store the stream in both ref and store
microphoneStreamRef.current = stream;
setMicrophoneStream(stream);
// Verify the stream was stored correctly
console.log("Stream storage verification:", {
refSet: !!microphoneStreamRef.current,
refId: microphoneStreamRef.current?.id,
storeWillBeSet: true // Store update is async
});
// Add audio track to peer connection if available
console.log("Peer connection state:", peerConnection ? {
connectionState: peerConnection.connectionState,
iceConnectionState: peerConnection.iceConnectionState,
signalingState: peerConnection.signalingState
} : "No peer connection");
if (peerConnection && stream.getAudioTracks().length > 0) {
const audioTrack = stream.getAudioTracks()[0];
console.log("Starting microphone with audio track:", audioTrack.id, "kind:", audioTrack.kind);
// Find the audio transceiver (should already exist with sendrecv direction)
const transceivers = peerConnection.getTransceivers();
console.log("Available transceivers:", transceivers.map(t => ({
direction: t.direction,
mid: t.mid,
senderTrack: t.sender.track?.kind,
receiverTrack: t.receiver.track?.kind
})));
// Look for an audio transceiver that can send (has sendrecv or sendonly direction)
const audioTransceiver = transceivers.find(transceiver => {
// Check if this transceiver is for audio and can send
const canSend = transceiver.direction === 'sendrecv' || transceiver.direction === 'sendonly';
// For newly created transceivers, we need to check if they're for audio
// We can do this by checking if the sender doesn't have a track yet and direction allows sending
if (canSend && !transceiver.sender.track) {
return true;
}
// For existing transceivers, check if they already have an audio track
if (transceiver.sender.track?.kind === 'audio' || transceiver.receiver.track?.kind === 'audio') {
return canSend;
}
return false;
});
console.log("Found audio transceiver:", audioTransceiver ? {
direction: audioTransceiver.direction,
mid: audioTransceiver.mid,
senderTrack: audioTransceiver.sender.track?.kind,
receiverTrack: audioTransceiver.receiver.track?.kind
} : null);
let sender: RTCRtpSender;
if (audioTransceiver && audioTransceiver.sender) {
// Use the existing audio transceiver's sender
await audioTransceiver.sender.replaceTrack(audioTrack);
sender = audioTransceiver.sender;
console.log("Replaced audio track on existing transceiver");
// Verify the track was set correctly
console.log("Transceiver after track replacement:", {
direction: audioTransceiver.direction,
senderTrack: audioTransceiver.sender.track?.id,
senderTrackKind: audioTransceiver.sender.track?.kind,
senderTrackEnabled: audioTransceiver.sender.track?.enabled,
senderTrackReadyState: audioTransceiver.sender.track?.readyState
});
} else {
// Fallback: add new track if no transceiver found
sender = peerConnection.addTrack(audioTrack, stream);
console.log("Added new audio track to peer connection");
// Find the transceiver that was created for this track
const newTransceiver = peerConnection.getTransceivers().find(t => t.sender === sender);
console.log("New transceiver created:", newTransceiver ? {
direction: newTransceiver.direction,
senderTrack: newTransceiver.sender.track?.id,
senderTrackKind: newTransceiver.sender.track?.kind
} : "Not found");
}
setMicrophoneSender(sender);
console.log("Microphone sender set:", {
senderId: sender,
track: sender.track?.id,
trackKind: sender.track?.kind,
trackEnabled: sender.track?.enabled,
trackReadyState: sender.track?.readyState
});
// Check sender stats to verify audio is being transmitted
setTimeout(async () => {
try {
const stats = await sender.getStats();
console.log("Sender stats after 2 seconds:");
stats.forEach((report, id) => {
if (report.type === 'outbound-rtp' && report.kind === 'audio') {
console.log("Outbound audio RTP stats:", {
id,
packetsSent: report.packetsSent,
bytesSent: report.bytesSent,
timestamp: report.timestamp
});
}
});
} catch (error) {
console.error("Failed to get sender stats:", error);
}
}, 2000);
}
// Notify backend that microphone is started
console.log("Notifying backend about microphone start...");
// Retry logic for backend failures
let backendSuccess = false;
let lastError: Error | string | null = null;
for (let attempt = 1; attempt <= 3; attempt++) {
try {
// If this is a retry, first try to reset the backend microphone state
if (attempt > 1) {
console.log(`Backend start attempt ${attempt}, first trying to reset backend state...`);
try {
// Try the new reset endpoint first
const resetResp = await api.POST("/microphone/reset", {});
if (resetResp.ok) {
console.log("Backend reset successful");
} else {
// Fallback to stop
await api.POST("/microphone/stop", {});
}
// Wait a bit for the backend to reset
await new Promise(resolve => setTimeout(resolve, 200));
} catch (resetError) {
console.warn("Failed to reset backend state:", resetError);
}
}
const backendResp = await api.POST("/microphone/start", {});
console.log(`Backend response status (attempt ${attempt}):`, backendResp.status, "ok:", backendResp.ok);
if (!backendResp.ok) {
lastError = `Backend returned status ${backendResp.status}`;
console.error(`Backend microphone start failed with status: ${backendResp.status} (attempt ${attempt})`);
// For 500 errors, try again after a short delay
if (backendResp.status === 500 && attempt < 3) {
console.log(`Retrying backend start in 500ms (attempt ${attempt + 1}/3)...`);
await new Promise(resolve => setTimeout(resolve, 500));
continue;
}
} else {
// Success!
const responseData = await backendResp.json();
console.log("Backend response data:", responseData);
if (responseData.status === "already running") {
console.info("Backend microphone was already running");
// If we're on the first attempt and backend says "already running",
// but frontend thinks it's not active, this might be a stuck state
if (attempt === 1 && !isMicrophoneActive) {
console.warn("Backend reports 'already running' but frontend is not active - possible stuck state");
console.log("Attempting to reset backend state and retry...");
try {
const resetResp = await api.POST("/microphone/reset", {});
if (resetResp.ok) {
console.log("Backend reset successful, retrying start...");
await new Promise(resolve => setTimeout(resolve, 200));
continue; // Retry the start
}
} catch (resetError) {
console.warn("Failed to reset stuck backend state:", resetError);
}
}
}
console.log("Backend microphone start successful");
backendSuccess = true;
break;
}
} catch (error) {
lastError = error instanceof Error ? error : String(error);
console.error(`Backend microphone start threw error (attempt ${attempt}):`, error);
// For network errors, try again after a short delay
if (attempt < 3) {
console.log(`Retrying backend start in 500ms (attempt ${attempt + 1}/3)...`);
await new Promise(resolve => setTimeout(resolve, 500));
continue;
}
}
}
// If all backend attempts failed, cleanup and return error
if (!backendSuccess) {
console.error("All backend start attempts failed, cleaning up stream");
await stopMicrophoneStream();
isStartingRef.current = false;
setIsStarting(false);
return {
success: false,
error: {
type: 'network',
message: `Failed to start microphone on backend after 3 attempts. Last error: ${lastError}`
}
};
}
// Only set active state after backend confirms success
setMicrophoneActive(true);
setMicrophoneMuted(false);
console.log("Microphone state set to active. Verifying state:", {
streamInRef: !!microphoneStreamRef.current,
streamInStore: !!microphoneStream,
isActive: true,
isMuted: false
});
// Don't sync immediately after starting - it causes race conditions
// The sync will happen naturally through other triggers
setTimeout(() => {
// Just verify state after a delay for debugging
console.log("State check after delay:", {
streamInRef: !!microphoneStreamRef.current,
streamInStore: !!microphoneStream,
isActive: isMicrophoneActive,
isMuted: isMicrophoneMuted
});
}, 100);
// Clear the starting flag
isStartingRef.current = false;
setIsStarting(false);
return { success: true };
} catch (error) {
console.error("Failed to start microphone:", error);
let micError: MicrophoneError;
if (error instanceof Error) {
if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') {
micError = {
type: 'permission',
message: 'Microphone permission denied. Please allow microphone access and try again.'
};
} else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') {
micError = {
type: 'device',
message: 'No microphone device found. Please check your microphone connection.'
};
} else {
micError = {
type: 'unknown',
message: error.message || 'Failed to access microphone'
};
}
} else {
micError = {
type: 'unknown',
message: 'Unknown error occurred while accessing microphone'
};
}
// Clear the starting flag on error
isStartingRef.current = false;
setIsStarting(false);
return { success: false, error: micError };
}
}, [peerConnection, setMicrophoneStream, setMicrophoneSender, setMicrophoneActive, setMicrophoneMuted, stopMicrophoneStream, isMicrophoneActive, isMicrophoneMuted, microphoneStream, isStarting, isStopping, isToggling]);
// Reset backend microphone state
const resetBackendMicrophoneState = useCallback(async (): Promise<boolean> => {
try {
console.log("Resetting backend microphone state...");
const response = await api.POST("/microphone/reset", {});
if (response.ok) {
const data = await response.json();
console.log("Backend microphone reset successful:", data);
// Update frontend state to match backend
setMicrophoneActive(false);
setMicrophoneMuted(false);
// Clean up any orphaned streams
if (microphoneStreamRef.current) {
console.log("Cleaning up orphaned stream after reset");
await stopMicrophoneStream();
}
// Wait a bit for everything to settle
await new Promise(resolve => setTimeout(resolve, 200));
// Sync state to ensure consistency
await syncMicrophoneState();
return true;
} else {
console.error("Backend microphone reset failed:", response.status);
return false;
}
} catch (error) {
console.warn("Failed to reset backend microphone state:", error);
// Fallback to old method
try {
console.log("Trying fallback reset method...");
await api.POST("/microphone/stop", {});
await new Promise(resolve => setTimeout(resolve, 300));
return true;
} catch (fallbackError) {
console.error("Fallback reset also failed:", fallbackError);
return false;
}
}
}, [setMicrophoneActive, setMicrophoneMuted, stopMicrophoneStream, syncMicrophoneState]);
// Stop microphone
const stopMicrophone = useCallback(async (): Promise<{ success: boolean; error?: MicrophoneError }> => {
// Prevent multiple simultaneous stop operations
if (isStarting || isStopping || isToggling) {
console.log("Microphone operation already in progress, skipping stop");
return { success: false, error: { type: 'unknown', message: 'Operation already in progress' } };
}
setIsStopping(true);
try {
// First stop the stream
await stopMicrophoneStream();
// Then notify backend that microphone is stopped
try {
await api.POST("/microphone/stop", {});
console.log("Backend notified about microphone stop");
} catch (error) {
console.warn("Failed to notify backend about microphone stop:", error);
}
// Update frontend state immediately
setMicrophoneActive(false);
setMicrophoneMuted(false);
// Sync state after stopping to ensure consistency (with longer delay)
setTimeout(() => syncMicrophoneState(), 500);
setIsStopping(false);
return { success: true };
} catch (error) {
console.error("Failed to stop microphone:", error);
setIsStopping(false);
return {
success: false,
error: {
type: 'unknown',
message: error instanceof Error ? error.message : 'Failed to stop microphone'
}
};
}
}, [stopMicrophoneStream, syncMicrophoneState, setMicrophoneActive, setMicrophoneMuted, isStarting, isStopping, isToggling]);
// Toggle microphone mute
const toggleMicrophoneMute = useCallback(async (): Promise<{ success: boolean; error?: MicrophoneError }> => {
// Prevent multiple simultaneous toggle operations
if (isStarting || isStopping || isToggling) {
console.log("Microphone operation already in progress, skipping toggle");
return { success: false, error: { type: 'unknown', message: 'Operation already in progress' } };
}
setIsToggling(true);
try {
// Use the ref instead of store value to avoid race conditions
const currentStream = microphoneStreamRef.current || microphoneStream;
console.log("Toggle microphone mute - current state:", {
hasRefStream: !!microphoneStreamRef.current,
hasStoreStream: !!microphoneStream,
isActive: isMicrophoneActive,
isMuted: isMicrophoneMuted,
streamId: currentStream?.id,
audioTracks: currentStream?.getAudioTracks().length || 0
});
if (!currentStream || !isMicrophoneActive) {
const errorDetails = {
hasStream: !!currentStream,
isActive: isMicrophoneActive,
storeStream: !!microphoneStream,
refStream: !!microphoneStreamRef.current,
streamId: currentStream?.id,
audioTracks: currentStream?.getAudioTracks().length || 0
};
console.warn("Microphone mute failed: stream or active state missing", errorDetails);
// Provide more specific error message
let errorMessage = 'Microphone is not active';
if (!currentStream) {
errorMessage = 'No microphone stream found. Please restart the microphone.';
} else if (!isMicrophoneActive) {
errorMessage = 'Microphone is not marked as active. Please restart the microphone.';
}
setIsToggling(false);
return {
success: false,
error: {
type: 'device',
message: errorMessage
}
};
}
const audioTracks = currentStream.getAudioTracks();
if (audioTracks.length === 0) {
setIsToggling(false);
return {
success: false,
error: {
type: 'device',
message: 'No audio tracks found in microphone stream'
}
};
}
const newMutedState = !isMicrophoneMuted;
// Mute/unmute the audio track
audioTracks.forEach(track => {
track.enabled = !newMutedState;
console.log(`Audio track ${track.id} enabled: ${track.enabled}`);
});
setMicrophoneMuted(newMutedState);
// Notify backend about mute state
try {
await api.POST("/microphone/mute", { muted: newMutedState });
} catch (error) {
console.warn("Failed to notify backend about microphone mute:", error);
}
setIsToggling(false);
return { success: true };
} catch (error) {
console.error("Failed to toggle microphone mute:", error);
setIsToggling(false);
return {
success: false,
error: {
type: 'unknown',
message: error instanceof Error ? error.message : 'Failed to toggle microphone mute'
}
};
}
}, [microphoneStream, isMicrophoneActive, isMicrophoneMuted, setMicrophoneMuted, isStarting, isStopping, isToggling]);
// Function to check WebRTC audio transmission stats
const checkAudioTransmissionStats = useCallback(async () => {
if (!microphoneSender) {
console.log("No microphone sender available");
return null;
}
try {
const stats = await microphoneSender.getStats();
const audioStats: {
id: string;
type: string;
kind: string;
packetsSent?: number;
bytesSent?: number;
timestamp?: number;
ssrc?: number;
}[] = [];
stats.forEach((report, id) => {
if (report.type === 'outbound-rtp' && report.kind === 'audio') {
audioStats.push({
id,
type: report.type,
kind: report.kind,
packetsSent: report.packetsSent,
bytesSent: report.bytesSent,
timestamp: report.timestamp,
ssrc: report.ssrc
});
}
});
console.log("Audio transmission stats:", audioStats);
return audioStats;
} catch (error) {
console.error("Failed to get audio transmission stats:", error);
return null;
}
}, [microphoneSender]);
// Comprehensive test function to diagnose microphone issues
const testMicrophoneAudio = useCallback(async () => {
console.log("=== MICROPHONE AUDIO TEST ===");
// 1. Check if we have a stream
const stream = microphoneStreamRef.current;
if (!stream) {
console.log("❌ No microphone stream available");
return;
}
console.log("✅ Microphone stream exists:", stream.id);
// 2. Check audio tracks
const audioTracks = stream.getAudioTracks();
console.log("Audio tracks:", audioTracks.length);
if (audioTracks.length === 0) {
console.log("❌ No audio tracks in stream");
return;
}
const track = audioTracks[0];
console.log("✅ Audio track details:", {
id: track.id,
label: track.label,
enabled: track.enabled,
readyState: track.readyState,
muted: track.muted
});
// 3. Test audio level detection manually
try {
const audioContext = new (window.AudioContext || (window as Window & { webkitAudioContext?: typeof AudioContext }).webkitAudioContext)();
const analyser = audioContext.createAnalyser();
const source = audioContext.createMediaStreamSource(stream);
analyser.fftSize = 256;
source.connect(analyser);
const dataArray = new Uint8Array(analyser.frequencyBinCount);
console.log("🎤 Testing audio level detection for 5 seconds...");
console.log("Please speak into your microphone now!");
let maxLevel = 0;
let sampleCount = 0;
const testInterval = setInterval(() => {
analyser.getByteFrequencyData(dataArray);
let sum = 0;
for (const value of dataArray) {
sum += value * value;
}
const rms = Math.sqrt(sum / dataArray.length);
const level = Math.min(100, (rms / 255) * 100);
maxLevel = Math.max(maxLevel, level);
sampleCount++;
if (sampleCount % 10 === 0) { // Log every 10th sample
console.log(`Audio level: ${level.toFixed(1)}% (max so far: ${maxLevel.toFixed(1)}%)`);
}
}, 100);
setTimeout(() => {
clearInterval(testInterval);
source.disconnect();
audioContext.close();
console.log("🎤 Audio test completed!");
console.log(`Maximum audio level detected: ${maxLevel.toFixed(1)}%`);
if (maxLevel > 5) {
console.log("✅ Microphone is detecting audio!");
} else {
console.log("❌ No significant audio detected. Check microphone permissions and hardware.");
}
}, 5000);
} catch (error) {
console.error("❌ Failed to test audio level:", error);
}
// 4. Check WebRTC sender
if (microphoneSender) {
console.log("✅ WebRTC sender exists");
console.log("Sender track:", {
id: microphoneSender.track?.id,
kind: microphoneSender.track?.kind,
enabled: microphoneSender.track?.enabled,
readyState: microphoneSender.track?.readyState
});
// Check if sender track matches stream track
if (microphoneSender.track === track) {
console.log("✅ Sender track matches stream track");
} else {
console.log("❌ Sender track does NOT match stream track");
}
} else {
console.log("❌ No WebRTC sender available");
}
// 5. Check peer connection
if (peerConnection) {
console.log("✅ Peer connection exists");
console.log("Connection state:", peerConnection.connectionState);
console.log("ICE connection state:", peerConnection.iceConnectionState);
const transceivers = peerConnection.getTransceivers();
const audioTransceivers = transceivers.filter(t =>
t.sender.track?.kind === 'audio' || t.receiver.track?.kind === 'audio'
);
console.log("Audio transceivers:", audioTransceivers.map(t => ({
direction: t.direction,
senderTrack: t.sender.track?.id,
receiverTrack: t.receiver.track?.id
})));
} else {
console.log("❌ No peer connection available");
}
}, [microphoneSender, peerConnection]);
const startMicrophoneDebounced = useCallback((deviceId?: string) => {
debouncedOperation(() => startMicrophone(deviceId).then(() => {}), "start");
}, [startMicrophone, debouncedOperation]);
const stopMicrophoneDebounced = useCallback(() => {
debouncedOperation(() => stopMicrophone().then(() => {}), "stop");
}, [stopMicrophone, debouncedOperation]);
// Make debug functions available globally for console access
useEffect(() => {
(window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).debugMicrophone = debugMicrophoneState;
(window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).checkAudioStats = checkAudioTransmissionStats;
(window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).testMicrophoneAudio = testMicrophoneAudio;
(window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).resetBackendMicrophone = resetBackendMicrophoneState;
return () => {
delete (window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).debugMicrophone;
delete (window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).checkAudioStats;
delete (window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).testMicrophoneAudio;
delete (window as Window & {
debugMicrophone?: () => unknown;
checkAudioStats?: () => unknown;
testMicrophoneAudio?: () => unknown;
resetBackendMicrophone?: () => unknown;
}).resetBackendMicrophone;
};
}, [debugMicrophoneState, checkAudioTransmissionStats, testMicrophoneAudio, resetBackendMicrophoneState]);
// Sync state on mount
useEffect(() => {
syncMicrophoneState();
}, [syncMicrophoneState]);
// Cleanup on unmount - use ref to avoid dependency on stopMicrophoneStream
useEffect(() => {
return () => {
// Clean up stream directly without depending on the callback
const stream = microphoneStreamRef.current;
if (stream) {
console.log("Cleanup: stopping microphone stream on unmount");
stream.getAudioTracks().forEach(track => {
track.stop();
console.log(`Cleanup: stopped audio track ${track.id}`);
});
microphoneStreamRef.current = null;
}
};
}, []); // No dependencies to prevent re-running
return {
isMicrophoneActive,
isMicrophoneMuted,
microphoneStream,
startMicrophone,
stopMicrophone,
toggleMicrophoneMute,
debugMicrophoneState,
// Expose debounced variants for UI handlers
startMicrophoneDebounced,
stopMicrophoneDebounced,
// Expose sync and loading flags for consumers that expect them
syncMicrophoneState,
isStarting,
isStopping,
isToggling,
};
}

View File

@ -33,10 +33,12 @@ import {
useVideoStore, useVideoStore,
VideoState, VideoState,
} from "@/hooks/stores"; } from "@/hooks/stores";
import { useMicrophone } from "@/hooks/useMicrophone";
import WebRTCVideo from "@components/WebRTCVideo"; import WebRTCVideo from "@components/WebRTCVideo";
import { checkAuth, isInCloud, isOnDevice } from "@/main"; import { checkAuth, isInCloud, isOnDevice } from "@/main";
import DashboardNavbar from "@components/Header"; import DashboardNavbar from "@components/Header";
import ConnectionStatsSidebar from "@/components/sidebar/connectionStats"; import ConnectionStatsSidebar from "@/components/sidebar/connectionStats";
import AudioMetricsSidebar from "@/components/sidebar/AudioMetricsSidebar";
import { JsonRpcRequest, useJsonRpc } from "@/hooks/useJsonRpc"; import { JsonRpcRequest, useJsonRpc } from "@/hooks/useJsonRpc";
import Terminal from "@components/Terminal"; import Terminal from "@components/Terminal";
import { CLOUD_API, DEVICE_API } from "@/ui.config"; import { CLOUD_API, DEVICE_API } from "@/ui.config";
@ -141,6 +143,9 @@ export default function KvmIdRoute() {
const setTransceiver = useRTCStore(state => state.setTransceiver); const setTransceiver = useRTCStore(state => state.setTransceiver);
const location = useLocation(); const location = useLocation();
// Microphone hook - moved here to prevent unmounting when popover closes
const microphoneHook = useMicrophone();
const isLegacySignalingEnabled = useRef(false); const isLegacySignalingEnabled = useRef(false);
const [connectionFailed, setConnectionFailed] = useState(false); const [connectionFailed, setConnectionFailed] = useState(false);
@ -479,6 +484,8 @@ export default function KvmIdRoute() {
}; };
setTransceiver(pc.addTransceiver("video", { direction: "recvonly" })); setTransceiver(pc.addTransceiver("video", { direction: "recvonly" }));
// Add audio transceiver to receive audio from the server and send microphone audio
pc.addTransceiver("audio", { direction: "sendrecv" });
const rpcDataChannel = pc.createDataChannel("rpc"); const rpcDataChannel = pc.createDataChannel("rpc");
rpcDataChannel.onopen = () => { rpcDataChannel.onopen = () => {
@ -828,7 +835,7 @@ export default function KvmIdRoute() {
/> />
<div className="relative flex h-full w-full overflow-hidden"> <div className="relative flex h-full w-full overflow-hidden">
<WebRTCVideo /> <WebRTCVideo microphone={microphoneHook} />
<div <div
style={{ animationDuration: "500ms" }} style={{ animationDuration: "500ms" }}
className="animate-slideUpFade pointer-events-none absolute inset-0 flex items-center justify-center p-4" className="animate-slideUpFade pointer-events-none absolute inset-0 flex items-center justify-center p-4"
@ -900,6 +907,22 @@ function SidebarContainer(props: SidebarContainerProps) {
<ConnectionStatsSidebar /> <ConnectionStatsSidebar />
</motion.div> </motion.div>
)} )}
{sidebarView === "audio-metrics" && (
<motion.div
className="absolute inset-0"
initial={{ opacity: 0 }}
animate={{ opacity: 1 }}
exit={{ opacity: 0 }}
transition={{
duration: 0.5,
ease: "easeInOut",
}}
>
<div className="grid h-full grid-rows-(--grid-headerBody) shadow-xs">
<AudioMetricsSidebar />
</div>
</motion.div>
)}
</AnimatePresence> </AnimatePresence>
</div> </div>
</div> </div>

View File

@ -17,11 +17,7 @@ export default defineConfig(({ mode, command }) => {
const { JETKVM_PROXY_URL, USE_SSL } = process.env; const { JETKVM_PROXY_URL, USE_SSL } = process.env;
const useSSL = USE_SSL === "true"; const useSSL = USE_SSL === "true";
const plugins = [ const plugins = [tailwindcss(), tsconfigPaths(), react()];
tailwindcss(),
tsconfigPaths(),
react()
];
if (useSSL) { if (useSSL) {
plugins.push(basicSsl()); plugins.push(basicSsl());
} }
@ -41,6 +37,8 @@ export default defineConfig(({ mode, command }) => {
"/storage": JETKVM_PROXY_URL, "/storage": JETKVM_PROXY_URL,
"/cloud": JETKVM_PROXY_URL, "/cloud": JETKVM_PROXY_URL,
"/developer": JETKVM_PROXY_URL, "/developer": JETKVM_PROXY_URL,
"/microphone": JETKVM_PROXY_URL,
"/audio": JETKVM_PROXY_URL,
} }
: undefined, : undefined,
}, },

View File

@ -5,7 +5,7 @@ import (
) )
// max frame size for 1080p video, specified in mpp venc setting // max frame size for 1080p video, specified in mpp venc setting
const maxFrameSize = 1920 * 1080 / 2 const maxVideoFrameSize = 1920 * 1080 / 2
func writeCtrlAction(action string) error { func writeCtrlAction(action string) error {
actionMessage := map[string]string{ actionMessage := map[string]string{

363
web.go
View File

@ -14,8 +14,11 @@ import (
"strings" "strings"
"time" "time"
"github.com/jetkvm/kvm/internal/audio"
"github.com/coder/websocket" "github.com/coder/websocket"
"github.com/coder/websocket/wsjson" "github.com/coder/websocket/wsjson"
gin_logger "github.com/gin-contrib/logger" gin_logger "github.com/gin-contrib/logger"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/google/uuid" "github.com/google/uuid"
@ -156,6 +159,302 @@ func setupRouter() *gin.Engine {
protected.POST("/storage/upload", handleUploadHttp) protected.POST("/storage/upload", handleUploadHttp)
} }
protected.GET("/audio/mute", func(c *gin.Context) {
c.JSON(200, gin.H{"muted": audio.IsAudioMuted()})
})
protected.POST("/audio/mute", func(c *gin.Context) {
type muteReq struct {
Muted bool `json:"muted"`
}
var req muteReq
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{"error": "invalid request"})
return
}
audio.SetAudioMuted(req.Muted)
// Broadcast audio mute state change via WebSocket
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.BroadcastAudioMuteChanged(req.Muted)
c.JSON(200, gin.H{"muted": req.Muted})
})
protected.GET("/audio/quality", func(c *gin.Context) {
config := audio.GetAudioConfig()
presets := audio.GetAudioQualityPresets()
c.JSON(200, gin.H{
"current": config,
"presets": presets,
})
})
protected.POST("/audio/quality", func(c *gin.Context) {
type qualityReq struct {
Quality int `json:"quality"`
}
var req qualityReq
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{"error": "invalid request"})
return
}
// Validate quality level
if req.Quality < 0 || req.Quality > 3 {
c.JSON(400, gin.H{"error": "invalid quality level (0-3)"})
return
}
audio.SetAudioQuality(audio.AudioQuality(req.Quality))
c.JSON(200, gin.H{
"quality": req.Quality,
"config": audio.GetAudioConfig(),
})
})
protected.GET("/audio/metrics", func(c *gin.Context) {
metrics := audio.GetAudioMetrics()
c.JSON(200, gin.H{
"frames_received": metrics.FramesReceived,
"frames_dropped": metrics.FramesDropped,
"bytes_processed": metrics.BytesProcessed,
"last_frame_time": metrics.LastFrameTime,
"connection_drops": metrics.ConnectionDrops,
"average_latency": metrics.AverageLatency.String(),
})
})
protected.GET("/microphone/quality", func(c *gin.Context) {
config := audio.GetMicrophoneConfig()
presets := audio.GetMicrophoneQualityPresets()
c.JSON(200, gin.H{
"current": config,
"presets": presets,
})
})
protected.POST("/microphone/quality", func(c *gin.Context) {
type qualityReq struct {
Quality int `json:"quality"`
}
var req qualityReq
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{"error": "invalid request"})
return
}
// Validate quality level
if req.Quality < 0 || req.Quality > 3 {
c.JSON(400, gin.H{"error": "invalid quality level (0-3)"})
return
}
audio.SetMicrophoneQuality(audio.AudioQuality(req.Quality))
c.JSON(200, gin.H{
"quality": req.Quality,
"config": audio.GetMicrophoneConfig(),
})
})
// Microphone API endpoints
protected.GET("/microphone/status", func(c *gin.Context) {
sessionActive := currentSession != nil
var running bool
if sessionActive && currentSession.AudioInputManager != nil {
running = currentSession.AudioInputManager.IsRunning()
}
c.JSON(200, gin.H{
"running": running,
"session_active": sessionActive,
})
})
protected.POST("/microphone/start", func(c *gin.Context) {
if currentSession == nil {
c.JSON(400, gin.H{"error": "no active session"})
return
}
if currentSession.AudioInputManager == nil {
c.JSON(500, gin.H{"error": "audio input manager not available"})
return
}
// Optimized server-side cooldown using atomic operations
opResult := audio.TryMicrophoneOperation()
if !opResult.Allowed {
running := currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning()
c.JSON(200, gin.H{
"status": "cooldown",
"running": running,
"cooldown_ms_remaining": opResult.RemainingCooldown.Milliseconds(),
"operation_id": opResult.OperationID,
})
return
}
// Check if already running before attempting to start
if currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning() {
c.JSON(200, gin.H{
"status": "already running",
"running": true,
})
return
}
err := currentSession.AudioInputManager.Start()
if err != nil {
// Log the error for debugging but don't expose internal details
logger.Warn().Err(err).Msg("failed to start microphone")
// Check if it's already running after the failed start attempt
// This handles race conditions where another request started it
if currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning() {
c.JSON(200, gin.H{
"status": "started by concurrent request",
"running": true,
})
return
}
c.JSON(500, gin.H{"error": "failed to start microphone"})
return
}
// Broadcast microphone state change via WebSocket
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.BroadcastMicrophoneStateChanged(true, true)
c.JSON(200, gin.H{
"status": "started",
"running": currentSession.AudioInputManager.IsRunning(),
})
})
protected.POST("/microphone/stop", func(c *gin.Context) {
if currentSession == nil {
c.JSON(400, gin.H{"error": "no active session"})
return
}
if currentSession.AudioInputManager == nil {
c.JSON(500, gin.H{"error": "audio input manager not available"})
return
}
// Optimized server-side cooldown using atomic operations
opResult := audio.TryMicrophoneOperation()
if !opResult.Allowed {
running := currentSession.AudioInputManager.IsRunning() || audio.IsNonBlockingAudioInputRunning()
c.JSON(200, gin.H{
"status": "cooldown",
"running": running,
"cooldown_ms_remaining": opResult.RemainingCooldown.Milliseconds(),
"operation_id": opResult.OperationID,
})
return
}
// Check if already stopped before attempting to stop
if !currentSession.AudioInputManager.IsRunning() && !audio.IsNonBlockingAudioInputRunning() {
c.JSON(200, gin.H{
"status": "already stopped",
"running": false,
})
return
}
currentSession.AudioInputManager.Stop()
// AudioInputManager.Stop() already coordinates a clean stop via StopNonBlockingAudioInput()
// so we don't need to call it again here
// Broadcast microphone state change via WebSocket
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.BroadcastMicrophoneStateChanged(false, true)
c.JSON(200, gin.H{
"status": "stopped",
"running": currentSession.AudioInputManager.IsRunning(),
})
})
protected.POST("/microphone/mute", func(c *gin.Context) {
var req struct {
Muted bool `json:"muted"`
}
if err := c.ShouldBindJSON(&req); err != nil {
c.JSON(400, gin.H{"error": "invalid request body"})
return
}
// Note: Microphone muting is typically handled at the frontend level
// This endpoint is provided for consistency but doesn't affect backend processing
c.JSON(200, gin.H{
"status": "mute state updated",
"muted": req.Muted,
})
})
protected.GET("/microphone/metrics", func(c *gin.Context) {
if currentSession == nil || currentSession.AudioInputManager == nil {
c.JSON(200, gin.H{
"frames_sent": 0,
"frames_dropped": 0,
"bytes_processed": 0,
"last_frame_time": "",
"connection_drops": 0,
"average_latency": "0s",
})
return
}
metrics := currentSession.AudioInputManager.GetMetrics()
c.JSON(200, gin.H{
"frames_sent": metrics.FramesSent,
"frames_dropped": metrics.FramesDropped,
"bytes_processed": metrics.BytesProcessed,
"last_frame_time": metrics.LastFrameTime.Format("2006-01-02T15:04:05.000Z"),
"connection_drops": metrics.ConnectionDrops,
"average_latency": metrics.AverageLatency.String(),
})
})
protected.POST("/microphone/reset", func(c *gin.Context) {
if currentSession == nil {
c.JSON(400, gin.H{"error": "no active session"})
return
}
if currentSession.AudioInputManager == nil {
c.JSON(500, gin.H{"error": "audio input manager not available"})
return
}
logger.Info().Msg("forcing microphone state reset")
// Force stop both the AudioInputManager and NonBlockingAudioManager
currentSession.AudioInputManager.Stop()
audio.StopNonBlockingAudioInput()
// Wait a bit to ensure everything is stopped
time.Sleep(100 * time.Millisecond)
// Broadcast microphone state change via WebSocket
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.BroadcastMicrophoneStateChanged(false, true)
c.JSON(200, gin.H{
"status": "reset",
"audio_input_running": currentSession.AudioInputManager.IsRunning(),
"nonblocking_input_running": audio.IsNonBlockingAudioInputRunning(),
})
})
// Catch-all route for SPA // Catch-all route for SPA
r.NoRoute(func(c *gin.Context) { r.NoRoute(func(c *gin.Context) {
if c.Request.Method == "GET" && c.NegotiateFormat(gin.MIMEHTML) == gin.MIMEHTML { if c.Request.Method == "GET" && c.NegotiateFormat(gin.MIMEHTML) == gin.MIMEHTML {
@ -179,26 +478,57 @@ func handleWebRTCSession(c *gin.Context) {
return return
} }
session, err := newSession(SessionConfig{}) var session *Session
if err != nil { var err error
c.JSON(http.StatusInternalServerError, gin.H{"error": err}) var sd string
return
}
sd, err := session.ExchangeOffer(req.Sd) // Check if we have an existing session
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
return
}
if currentSession != nil { if currentSession != nil {
logger.Info().Msg("existing session detected, creating new session and notifying old session")
// Always create a new session when there's an existing one
// This ensures the "otherSessionConnected" prompt is shown
session, err = newSession(SessionConfig{})
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
return
}
sd, err = session.ExchangeOffer(req.Sd)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
return
}
// Notify the old session about the takeover
writeJSONRPCEvent("otherSessionConnected", nil, currentSession) writeJSONRPCEvent("otherSessionConnected", nil, currentSession)
peerConn := currentSession.peerConnection peerConn := currentSession.peerConnection
go func() { go func() {
time.Sleep(1 * time.Second) time.Sleep(1 * time.Second)
_ = peerConn.Close() _ = peerConn.Close()
}() }()
currentSession = session
logger.Info().Interface("session", session).Msg("new session created, old session notified")
} else {
// No existing session, create a new one
logger.Info().Msg("creating new session")
session, err = newSession(SessionConfig{})
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
return
}
sd, err = session.ExchangeOffer(req.Sd)
if err != nil {
c.JSON(http.StatusInternalServerError, gin.H{"error": err})
return
}
currentSession = session
logger.Info().Interface("session", session).Msg("new session accepted")
} }
currentSession = session
c.JSON(http.StatusOK, gin.H{"sd": sd}) c.JSON(http.StatusOK, gin.H{"sd": sd})
} }
@ -267,6 +597,9 @@ func handleWebRTCSignalWsMessages(
if isCloudConnection { if isCloudConnection {
setCloudConnectionState(CloudConnectionStateDisconnected) setCloudConnectionState(CloudConnectionStateDisconnected)
} }
// Clean up audio event subscription
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.Unsubscribe(connectionID)
cancelRun() cancelRun()
}() }()
@ -424,6 +757,14 @@ func handleWebRTCSignalWsMessages(
if err = currentSession.peerConnection.AddICECandidate(candidate); err != nil { if err = currentSession.peerConnection.AddICECandidate(candidate); err != nil {
l.Warn().Str("error", err.Error()).Msg("failed to add incoming ICE candidate to our peer connection") l.Warn().Str("error", err.Error()).Msg("failed to add incoming ICE candidate to our peer connection")
} }
} else if message.Type == "subscribe-audio-events" {
l.Info().Msg("client subscribing to audio events")
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.Subscribe(connectionID, wsCon, runCtx, &l)
} else if message.Type == "unsubscribe-audio-events" {
l.Info().Msg("client unsubscribing from audio events")
broadcaster := audio.GetAudioEventBroadcaster()
broadcaster.Unsubscribe(connectionID)
} }
} }
} }

View File

@ -5,11 +5,15 @@ import (
"encoding/base64" "encoding/base64"
"encoding/json" "encoding/json"
"net" "net"
"runtime"
"strings" "strings"
"sync"
"time"
"github.com/coder/websocket" "github.com/coder/websocket"
"github.com/coder/websocket/wsjson" "github.com/coder/websocket/wsjson"
"github.com/gin-gonic/gin" "github.com/gin-gonic/gin"
"github.com/jetkvm/kvm/internal/audio"
"github.com/jetkvm/kvm/internal/logging" "github.com/jetkvm/kvm/internal/logging"
"github.com/pion/webrtc/v4" "github.com/pion/webrtc/v4"
"github.com/rs/zerolog" "github.com/rs/zerolog"
@ -18,11 +22,18 @@ import (
type Session struct { type Session struct {
peerConnection *webrtc.PeerConnection peerConnection *webrtc.PeerConnection
VideoTrack *webrtc.TrackLocalStaticSample VideoTrack *webrtc.TrackLocalStaticSample
AudioTrack *webrtc.TrackLocalStaticSample
ControlChannel *webrtc.DataChannel ControlChannel *webrtc.DataChannel
RPCChannel *webrtc.DataChannel RPCChannel *webrtc.DataChannel
HidChannel *webrtc.DataChannel HidChannel *webrtc.DataChannel
DiskChannel *webrtc.DataChannel DiskChannel *webrtc.DataChannel
AudioInputManager *audio.AudioInputManager
shouldUmountVirtualMedia bool shouldUmountVirtualMedia bool
// Microphone operation cooldown to mitigate rapid start/stop races
micOpMu sync.Mutex
lastMicOp time.Time
micCooldown time.Duration
} }
type SessionConfig struct { type SessionConfig struct {
@ -104,7 +115,10 @@ func newSession(config SessionConfig) (*Session, error) {
if err != nil { if err != nil {
return nil, err return nil, err
} }
session := &Session{peerConnection: peerConnection} session := &Session{
peerConnection: peerConnection,
AudioInputManager: audio.NewAudioInputManager(),
}
peerConnection.OnDataChannel(func(d *webrtc.DataChannel) { peerConnection.OnDataChannel(func(d *webrtc.DataChannel) {
scopedLogger.Info().Str("label", d.Label()).Uint16("id", *d.ID()).Msg("New DataChannel") scopedLogger.Info().Str("label", d.Label()).Uint16("id", *d.ID()).Msg("New DataChannel")
@ -136,22 +150,63 @@ func newSession(config SessionConfig) (*Session, error) {
return nil, err return nil, err
} }
rtpSender, err := peerConnection.AddTrack(session.VideoTrack) session.AudioTrack, err = webrtc.NewTrackLocalStaticSample(webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeOpus}, "audio", "kvm")
if err != nil { if err != nil {
return nil, err return nil, err
} }
videoRtpSender, err := peerConnection.AddTrack(session.VideoTrack)
if err != nil {
return nil, err
}
// Add bidirectional audio transceiver for microphone input
audioTransceiver, err := peerConnection.AddTransceiverFromTrack(session.AudioTrack, webrtc.RTPTransceiverInit{
Direction: webrtc.RTPTransceiverDirectionSendrecv,
})
if err != nil {
return nil, err
}
audioRtpSender := audioTransceiver.Sender()
// Handle incoming audio track (microphone from browser)
peerConnection.OnTrack(func(track *webrtc.TrackRemote, receiver *webrtc.RTPReceiver) {
scopedLogger.Info().Str("codec", track.Codec().MimeType).Str("id", track.ID()).Msg("Got remote track")
if track.Kind() == webrtc.RTPCodecTypeAudio && track.Codec().MimeType == webrtc.MimeTypeOpus {
scopedLogger.Info().Msg("Processing incoming audio track for microphone input")
go func() {
// Lock to OS thread to isolate RTP processing
runtime.LockOSThread()
defer runtime.UnlockOSThread()
for {
rtpPacket, _, err := track.ReadRTP()
if err != nil {
scopedLogger.Debug().Err(err).Msg("Error reading RTP packet from audio track")
return
}
// Extract Opus payload from RTP packet
opusPayload := rtpPacket.Payload
if len(opusPayload) > 0 && session.AudioInputManager != nil {
err := session.AudioInputManager.WriteOpusFrame(opusPayload)
if err != nil {
scopedLogger.Warn().Err(err).Msg("Failed to write Opus frame to audio input manager")
}
}
}
}()
}
})
// Read incoming RTCP packets // Read incoming RTCP packets
// Before these packets are returned they are processed by interceptors. For things // Before these packets are returned they are processed by interceptors. For things
// like NACK this needs to be called. // like NACK this needs to be called.
go func() { go drainRtpSender(videoRtpSender)
rtcpBuf := make([]byte, 1500) go drainRtpSender(audioRtpSender)
for {
if _, _, rtcpErr := rtpSender.Read(rtcpBuf); rtcpErr != nil {
return
}
}
}()
var isConnected bool var isConnected bool
peerConnection.OnICECandidate(func(candidate *webrtc.ICECandidate) { peerConnection.OnICECandidate(func(candidate *webrtc.ICECandidate) {
@ -190,6 +245,10 @@ func newSession(config SessionConfig) (*Session, error) {
err := rpcUnmountImage() err := rpcUnmountImage()
scopedLogger.Warn().Err(err).Msg("unmount image failed on connection close") scopedLogger.Warn().Err(err).Msg("unmount image failed on connection close")
} }
// Stop audio input manager
if session.AudioInputManager != nil {
session.AudioInputManager.Stop()
}
if isConnected { if isConnected {
isConnected = false isConnected = false
actionSessions-- actionSessions--
@ -203,6 +262,19 @@ func newSession(config SessionConfig) (*Session, error) {
return session, nil return session, nil
} }
func drainRtpSender(rtpSender *webrtc.RTPSender) {
// Lock to OS thread to isolate RTCP processing
runtime.LockOSThread()
defer runtime.UnlockOSThread()
rtcpBuf := make([]byte, 1500)
for {
if _, _, err := rtpSender.Read(rtcpBuf); err != nil {
return
}
}
}
var actionSessions = 0 var actionSessions = 0
func onActiveSessionsChanged() { func onActiveSessionsChanged() {