diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index a7cb7c77..33d882e6 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,6 +1,10 @@ { "name": "JetKVM", "image": "mcr.microsoft.com/devcontainers/go:1.25-trixie", + "runArgs": [ + "--platform=linux/amd64" + ], + "onCreateCommand": ".devcontainer/install-deps.sh", "features": { "ghcr.io/devcontainers/features/node:1": { // Should match what is defined in ui/package.json @@ -10,7 +14,6 @@ "mounts": [ "source=${localEnv:HOME}/.ssh,target=/home/vscode/.ssh,type=bind,consistency=cached" ], - "onCreateCommand": ".devcontainer/install-deps.sh", "customizations": { "vscode": { "extensions": [ diff --git a/.devcontainer/install-deps.sh b/.devcontainer/install-deps.sh index 4435d25b..94106cc9 100755 --- a/.devcontainer/install-deps.sh +++ b/.devcontainer/install-deps.sh @@ -5,7 +5,7 @@ function sudo() { if [ "$UID" -eq 0 ]; then "$@" else - ${SUDO_PATH} "$@" + ${SUDO_PATH} -E "$@" fi } @@ -16,7 +16,7 @@ sudo apt-get update && \ sudo apt-get install -y --no-install-recommends \ build-essential \ device-tree-compiler \ - gperf g++-multilib gcc-multilib \ + gperf \ libnl-3-dev libdbus-1-dev libelf-dev libmpc-dev dwarves \ bc openssl flex bison libssl-dev python3 python-is-python3 texinfo kmod cmake \ wget zstd \ @@ -30,6 +30,34 @@ pushd "${BUILDKIT_TMPDIR}" > /dev/null wget https://github.com/jetkvm/rv1106-system/releases/download/${BUILDKIT_VERSION}/buildkit.tar.zst && \ sudo mkdir -p /opt/jetkvm-native-buildkit && \ - sudo tar --use-compress-program="unzstd --long=31" -xvf buildkit.tar.zst -C /opt/jetkvm-native-buildkit && \ + sudo tar --use-compress-program="zstd -d --long=31" -xvf buildkit.tar.zst -C /opt/jetkvm-native-buildkit && \ rm buildkit.tar.zst -popd \ No newline at end of file +popd + +# Install audio dependencies (ALSA and Opus) for JetKVM +echo "Installing JetKVM audio dependencies..." +SCRIPT_DIR="$(dirname "$(readlink -f "$0")")" +PROJECT_ROOT="$(dirname "${SCRIPT_DIR}")" +AUDIO_DEPS_SCRIPT="${PROJECT_ROOT}/install_audio_deps.sh" + +if [ -f "${AUDIO_DEPS_SCRIPT}" ]; then + echo "Running audio dependencies installation..." + # Pre-create audio libs directory with proper permissions + sudo mkdir -p /opt/jetkvm-audio-libs + sudo chmod 777 /opt/jetkvm-audio-libs + # Run installation script (now it can write without sudo) + bash "${AUDIO_DEPS_SCRIPT}" + echo "Audio dependencies installation completed." + if [ -d "/opt/jetkvm-audio-libs" ]; then + echo "Audio libraries installed in /opt/jetkvm-audio-libs" + # Set recursive permissions for all subdirectories and files + sudo chmod -R 777 /opt/jetkvm-audio-libs + echo "Permissions set to allow all users access to audio libraries" + else + echo "Error: /opt/jetkvm-audio-libs directory not found after installation." + exit 1 + fi +else + echo "Warning: Audio dependencies script not found at ${AUDIO_DEPS_SCRIPT}" + echo "Skipping audio dependencies installation." +fi diff --git a/.devcontainer/install_audio_deps.sh b/.devcontainer/install_audio_deps.sh new file mode 100755 index 00000000..8d369db4 --- /dev/null +++ b/.devcontainer/install_audio_deps.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# .devcontainer/install_audio_deps.sh +# Build ALSA and Opus static libs for ARM in /opt/jetkvm-audio-libs +set -e + +# Sudo wrapper function +SUDO_PATH=$(which sudo 2>/dev/null || echo "") +function use_sudo() { + if [ "$UID" -eq 0 ]; then + "$@" + elif [ -n "$SUDO_PATH" ]; then + ${SUDO_PATH} -E "$@" + else + "$@" + fi +} + +# Accept version parameters or use defaults +ALSA_VERSION="${1:-1.2.14}" +OPUS_VERSION="${2:-1.5.2}" + +AUDIO_LIBS_DIR="/opt/jetkvm-audio-libs" +BUILDKIT_PATH="/opt/jetkvm-native-buildkit" +BUILDKIT_FLAVOR="arm-rockchip830-linux-uclibcgnueabihf" +CROSS_PREFIX="$BUILDKIT_PATH/bin/$BUILDKIT_FLAVOR" + +# Create directory with proper permissions +use_sudo mkdir -p "$AUDIO_LIBS_DIR" +use_sudo chmod 777 "$AUDIO_LIBS_DIR" +cd "$AUDIO_LIBS_DIR" + +# Download sources +[ -f alsa-lib-${ALSA_VERSION}.tar.bz2 ] || wget -N https://www.alsa-project.org/files/pub/lib/alsa-lib-${ALSA_VERSION}.tar.bz2 +[ -f opus-${OPUS_VERSION}.tar.gz ] || wget -N https://downloads.xiph.org/releases/opus/opus-${OPUS_VERSION}.tar.gz + +# Extract +[ -d alsa-lib-${ALSA_VERSION} ] || tar xf alsa-lib-${ALSA_VERSION}.tar.bz2 +[ -d opus-${OPUS_VERSION} ] || tar xf opus-${OPUS_VERSION}.tar.gz + +# Optimization flags for ARM Cortex-A7 with NEON (simplified to avoid FD_SETSIZE issues) +OPTIM_CFLAGS="-O2 -mfpu=neon -mtune=cortex-a7 -mfloat-abi=hard" + +export CC="${CROSS_PREFIX}-gcc" +export CFLAGS="$OPTIM_CFLAGS" +export CXXFLAGS="$OPTIM_CFLAGS" + +# Build ALSA +cd alsa-lib-${ALSA_VERSION} +if [ ! -f .built ]; then + chown -R $(whoami):$(whoami) . + # Use minimal ALSA configuration to avoid FD_SETSIZE issues in devcontainer + CFLAGS="$OPTIM_CFLAGS" ./configure --host $BUILDKIT_FLAVOR \ + --enable-static=yes --enable-shared=no \ + --with-pcm-plugins=rate,linear \ + --disable-seq --disable-rawmidi --disable-ucm \ + --disable-python --disable-old-symbols \ + --disable-topology --disable-hwdep --disable-mixer \ + --disable-alisp --disable-aload --disable-resmgr + make -j$(nproc) + touch .built +fi +cd .. + +# Build Opus +cd opus-${OPUS_VERSION} +if [ ! -f .built ]; then + chown -R $(whoami):$(whoami) . + CFLAGS="$OPTIM_CFLAGS" ./configure --host $BUILDKIT_FLAVOR --enable-static=yes --enable-shared=no --enable-fixed-point + make -j$(nproc) + touch .built +fi +cd .. + +echo "ALSA and Opus built in $AUDIO_LIBS_DIR" diff --git a/.github/workflows/golangci-lint.yml b/.github/workflows/golangci-lint.yml index 71c8a087..854841e5 100644 --- a/.github/workflows/golangci-lint.yml +++ b/.github/workflows/golangci-lint.yml @@ -27,11 +27,70 @@ jobs: uses: actions/setup-go@v6 with: go-version: oldstable + - name: Setup build environment variables + id: build-env + run: | + # Extract versions from Makefile + ALSA_VERSION=$(grep '^ALSA_VERSION' Makefile | cut -d'=' -f2 | tr -d ' ') + OPUS_VERSION=$(grep '^OPUS_VERSION' Makefile | cut -d'=' -f2 | tr -d ' ') + + # Define buildkit path + BUILDKIT_PATH="/opt/jetkvm-native-buildkit" + BUILDKIT_FLAVOR="arm-rockchip830-linux-uclibcgnueabihf" + + # Set environment variables + echo "ALSA_VERSION=$ALSA_VERSION" >> $GITHUB_ENV + echo "OPUS_VERSION=$OPUS_VERSION" >> $GITHUB_ENV + echo "BUILDKIT_PATH=$BUILDKIT_PATH" >> $GITHUB_ENV + echo "BUILDKIT_FLAVOR=$BUILDKIT_FLAVOR" >> $GITHUB_ENV + + # Set outputs for use in other steps + echo "alsa_version=$ALSA_VERSION" >> $GITHUB_OUTPUT + echo "opus_version=$OPUS_VERSION" >> $GITHUB_OUTPUT + echo "buildkit_path=$BUILDKIT_PATH" >> $GITHUB_OUTPUT + echo "buildkit_flavor=$BUILDKIT_FLAVOR" >> $GITHUB_OUTPUT + + # Set resolved cache path + CACHE_PATH="/opt/jetkvm-audio-libs" + echo "CACHE_PATH=$CACHE_PATH" >> $GITHUB_ENV + echo "cache_path=$CACHE_PATH" >> $GITHUB_OUTPUT + + echo "Extracted ALSA version: $ALSA_VERSION" + echo "Extracted Opus version: $OPUS_VERSION" + echo "Buildkit path: $BUILDKIT_PATH" + echo "Cache path: $CACHE_PATH" + - name: Restore audio dependencies cache + id: cache-audio-deps + uses: actions/cache/restore@v4 + with: + path: ${{ steps.build-env.outputs.cache_path }} + key: audio-deps-${{ runner.os }}-alsa-${{ steps.build-env.outputs.alsa_version }}-opus-${{ steps.build-env.outputs.opus_version }}-buildkit + - name: Setup development environment + if: steps.cache-audio-deps.outputs.cache-hit != 'true' + run: make dev_env + env: + ALSA_VERSION: ${{ env.ALSA_VERSION }} + OPUS_VERSION: ${{ env.OPUS_VERSION }} - name: Create empty resource directory run: | mkdir -p static && touch static/.gitkeep + - name: Save audio dependencies cache + if: always() && steps.cache-audio-deps.outputs.cache-hit != 'true' + uses: actions/cache/save@v4 + with: + path: ${{ steps.build-env.outputs.cache_path }} + key: ${{ steps.cache-audio-deps.outputs.cache-primary-key }} - name: Lint uses: golangci/golangci-lint-action@v8 with: args: --verbose version: v2.1 + env: + CGO_ENABLED: 1 + GOOS: linux + GOARCH: arm + GOARM: 7 + CC: ${{ steps.build-env.outputs.buildkit_path }}/bin/${{ steps.build-env.outputs.buildkit_flavor }}-gcc + PKG_CONFIG_PATH: ${{ steps.build-env.outputs.cache_path }}/alsa-lib-${{ steps.build-env.outputs.alsa_version }}/utils:${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }} + CGO_CFLAGS: "-O3 -mfpu=neon -mtune=cortex-a7 -mfloat-abi=hard -ftree-vectorize -ffast-math -funroll-loops -mvectorize-with-neon-quad -marm -D__ARM_NEON -I${{ steps.build-env.outputs.cache_path }}/alsa-lib-${{ steps.build-env.outputs.alsa_version }}/include -I${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }}/include -I${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }}/celt" + CGO_LDFLAGS: "-L${{ steps.build-env.outputs.cache_path }}/alsa-lib-${{ steps.build-env.outputs.alsa_version }}/src/.libs -lasound -L${{ steps.build-env.outputs.cache_path }}/opus-${{ steps.build-env.outputs.opus_version }}/.libs -lopus -lm -ldl -static" diff --git a/.gitignore b/.gitignore index 99b7ce95..59a2217a 100644 --- a/.gitignore +++ b/.gitignore @@ -1,7 +1,13 @@ bin/* static/* +.vscode/ +tmp/ +.devcontainer/devcontainer-lock.json .idea .DS_Store +*.log +*.tmp +*.code-workspace .cache .vite @@ -12,4 +18,12 @@ node_modules # generated during the build process #internal/native/include -#internal/native/lib \ No newline at end of file +#internal/native/lib +internal/audio/bin/ + +# backup files +*.bak + +# core dumps +core +core.* diff --git a/DEVELOPMENT.md b/DEVELOPMENT.md index 80f9f37a..7e0bb229 100644 --- a/DEVELOPMENT.md +++ b/DEVELOPMENT.md @@ -11,21 +11,43 @@ + # JetKVM Development Guide + Welcome to JetKVM development! This guide will help you get started quickly, whether you're fixing bugs, adding features, or just exploring the codebase. ## Get Started + ### Prerequisites - **A JetKVM device** (for full development) - **[Go 1.24.4+](https://go.dev/doc/install)** and **[Node.js 22.15.0](https://nodejs.org/en/download/)** - **[Git](https://git-scm.com/downloads)** for version control - **[SSH access](https://jetkvm.com/docs/advanced-usage/developing#developer-mode)** to your JetKVM device +- **Audio build dependencies:** + - **New:** The audio system uses a dual-subprocess architecture with CGO, ALSA, and Opus integration. The audio dependencies are automatically installed by the devcontainer or can be manually built using `.devcontainer/install_audio_deps.sh`. + ### Development Environment -**Recommended:** Development is best done on **Linux** or **macOS**. +**Recommended:** Development is best done on **Linux** or **macOS**. + +#### Apple Silicon (M1/M2/M3) Mac Users + +If you are developing on an Apple Silicon Mac, you should use a devcontainer to ensure compatibility with the JetKVM build environment (which targets linux/amd64 and ARM). There are two main options: + +- **VS Code Dev Containers**: Open the project in VS Code and use the built-in Dev Containers support. The configuration in `.devcontainer/devcontainer.json` is set to use `linux/amd64` platform. +- **Devpod**: [Devpod](https://devpod.sh/) is a fast, open-source tool for running devcontainers anywhere. If you use Devpod, go to **Settings → Experimental → Additional Environmental Variables** and add: + - `DOCKER_DEFAULT_PLATFORM=linux/amd64` + This ensures all builds run in the correct architecture. +- **devcontainer CLI**: You can also use the [devcontainer CLI](https://github.com/devcontainers/cli) to launch the devcontainer from the terminal. + +**Important:** If you're switching from an ARM64 devcontainer or updating the platform settings, you'll need to rebuild the devcontainer completely: +- In VS Code: Run "Dev Containers: Rebuild Container" from the command palette +- With devcontainer CLI: Use `devcontainer up --build` + +This approach ensures compatibility with all shell scripts, build tools, and cross-compilation steps used in the project. If you're using Windows, we strongly recommend using **WSL (Windows Subsystem for Linux)** for the best development experience: - [Install WSL on Windows](https://docs.microsoft.com/en-us/windows/wsl/install) @@ -33,6 +55,7 @@ If you're using Windows, we strongly recommend using **WSL (Windows Subsystem fo This ensures compatibility with shell scripts and build tools used in the project. + ### Project Setup 1. **Clone the repository:** @@ -46,16 +69,25 @@ This ensures compatibility with shell scripts and build tools used in the projec go version && node --version ``` -3. **Find your JetKVM IP address** (check your router or device screen) +3. **Set up the cross-compiler and audio dependencies:** + ```bash + make dev_env + # This will install audio dependencies using .devcontainer/install_audio_deps.sh + # It will build ALSA/Opus static libs in /opt/jetkvm-audio-libs using the buildkit from /opt/jetkvm-native-buildkit + # + # **Note:** This is required for the audio subprocess architecture. If you skip this step, builds will not succeed. + ``` -4. **Deploy and test:** +4. **Find your JetKVM IP address** (check your router or device screen) + +5. **Deploy and test:** ```bash ./dev_deploy.sh -r 192.168.1.100 # Replace with your device IP ``` -5. **Open in browser:** `http://192.168.1.100` +6. **Open in browser:** `http://192.168.1.100` -That's it! You're now running your own development version of JetKVM. +That's it! You're now running your own development version of JetKVM, **with bidirectional audio streaming using the dual-subprocess architecture.** --- @@ -71,13 +103,15 @@ npm install Now edit files in `ui/src/` and see changes live in your browser! -### Modify the backend + +### Modify the backend (including audio) ```bash -# Edit Go files (config.go, web.go, etc.) +# Edit Go files (config.go, web.go, internal/audio, etc.) ./dev_deploy.sh -r 192.168.1.100 --skip-ui-build ``` + ### Run tests ```bash @@ -93,6 +127,7 @@ tail -f /var/log/jetkvm.log --- + ## Project Layout ``` @@ -104,6 +139,7 @@ tail -f /var/log/jetkvm.log │ ├── src/routes/ # Pages (login, settings, etc.) │ └── src/components/ # UI components ├── internal/ # Internal Go packages +│ └── audio/ # Audio Processing Layer (CGO, ALSA, Opus) │ ├── native/ # CGO / Native code glue layer │ ├── native/cgo/ # C files for the native library (HDMI, Touchscreen, etc.) │ ├── native/eez/ # EEZ Studio Project files (for Touchscreen) @@ -116,6 +152,7 @@ tail -f /var/log/jetkvm.log **Key files for beginners:** +- `internal/audio/` - [NEW] Dual-subprocess audio architecture (CGO, ALSA, Opus) - `web.go` - Add new API endpoints here - `config.go` - Add new settings here - `ui/src/routes/` - Add new pages here @@ -150,7 +187,7 @@ Please click the `Build` button in EEZ Studio then run `./dev_deploy.sh -r /auth/password-local \ --- -## Common Issues & Solutions + +### Common Issues & Solutions ### "Build failed" or "Permission denied" @@ -230,6 +362,8 @@ ssh root@ chmod +x /userdata/jetkvm/bin/jetkvm_app_debug go clean -modcache go mod tidy make build_dev +# If you see errors about missing ALSA/Opus or toolchain, run: +make dev_env # Required for audio subprocess architecture ``` ### "Can't connect to device" @@ -242,6 +376,15 @@ ping ssh root@ echo "Connection OK" ``` + +### "Audio not working" + +```bash +# Make sure you have run: +make dev_env +# # If you see errors about ALSA/Opus, check logs and re-run: make build_audio_deps +``` + ### "Frontend not updating" ```bash @@ -256,24 +399,27 @@ npm install ## Next Steps + ### Adding a New Feature -1. **Backend:** Add API endpoint in `web.go` +1. **Backend:** Add API endpoint in `web.go` or extend audio in `internal/audio/` 2. **Config:** Add settings in `config.go` 3. **Frontend:** Add UI in `ui/src/routes/` 4. **Test:** Deploy and test with `./dev_deploy.sh` + ### Code Style - **Go:** Follow standard Go conventions - **TypeScript:** Use TypeScript for type safety - **React:** Keep components small and reusable +- **Audio/CGO:** Keep C/Go integration minimal, robust, and well-documented. Use zerolog for all logging. ### Environment Variables ```bash # Enable debug logging -export LOG_TRACE_SCOPES="jetkvm,cloud,websocket,native,jsonrpc" +export LOG_TRACE_SCOPES="jetkvm,cloud,websocket,native,jsonrpc,audio" # Frontend development export JETKVM_PROXY_URL="ws://" @@ -325,7 +471,7 @@ curl http://api:$JETKVM_PASSWORD@YOUR_DEVICE_IP/developer/pprof/ ```bash # Enable trace logging (useful for debugging) -export LOG_TRACE_SCOPES="jetkvm,cloud,websocket,native,jsonrpc" +export LOG_TRACE_SCOPES="jetkvm,cloud,websocket,native,jsonrpc,audio" # For frontend development export JETKVM_PROXY_URL="ws://" diff --git a/Dockerfile.build b/Dockerfile.build index db433b2d..b588da1a 100644 --- a/Dockerfile.build +++ b/Dockerfile.build @@ -6,6 +6,8 @@ ENV GOPATH=/go ENV PATH=$GOPATH/bin:/usr/local/go/bin:$PATH COPY install-deps.sh /install-deps.sh +COPY install_audio_deps.sh /install_audio_deps.sh + RUN /install-deps.sh # Create build directory @@ -21,4 +23,4 @@ RUN go mod download && go mod verify COPY entrypoint.sh /entrypoint.sh RUN chmod +x /entrypoint.sh -ENTRYPOINT [ "/entrypoint.sh" ] \ No newline at end of file +ENTRYPOINT [ "/entrypoint.sh" ] diff --git a/Makefile b/Makefile index c3554879..0c69d7b8 100644 --- a/Makefile +++ b/Makefile @@ -1,16 +1,55 @@ -BRANCH := $(shell git rev-parse --abbrev-ref HEAD) -BUILDDATE := $(shell date -u +%FT%T%z) -BUILDTS := $(shell date -u +%s) -REVISION := $(shell git rev-parse HEAD) +# Build ALSA and Opus static libs for ARM in /opt/jetkvm-audio-libs +build_audio_deps: + bash .devcontainer/install_audio_deps.sh $(ALSA_VERSION) $(OPUS_VERSION) + +# Prepare everything needed for local development (toolchain + audio deps + Go tools) +dev_env: build_audio_deps + $(CLEAN_GO_CACHE) + @echo "Installing Go development tools..." + go install golang.org/x/tools/cmd/goimports@latest + @echo "Development environment ready." +JETKVM_HOME ?= $(HOME)/.jetkvm +BUILDKIT_PATH ?= /opt/jetkvm-native-buildkit +BUILDKIT_FLAVOR ?= arm-rockchip830-linux-uclibcgnueabihf +AUDIO_LIBS_DIR ?= /opt/jetkvm-audio-libs + +BRANCH ?= $(shell git rev-parse --abbrev-ref HEAD) +BUILDDATE ?= $(shell date -u +%FT%T%z) +BUILDTS ?= $(shell date -u +%s) +REVISION ?= $(shell git rev-parse HEAD) VERSION_DEV := 0.4.9-dev$(shell date +%Y%m%d%H%M) VERSION := 0.4.8 + +# Audio library versions +ALSA_VERSION ?= 1.2.14 +OPUS_VERSION ?= 1.5.2 + +# Set PKG_CONFIG_PATH globally for all targets that use CGO with audio libraries +export PKG_CONFIG_PATH := $(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/utils:$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION) + +# Common command to clean Go cache with verbose output for all Go builds +CLEAN_GO_CACHE := @echo "Cleaning Go cache..."; go clean -cache -v + +# Optimization flags for ARM Cortex-A7 with NEON SIMD +OPTIM_CFLAGS := -O3 -mfpu=neon -mtune=cortex-a7 -mfloat-abi=hard -ftree-vectorize -ffast-math -funroll-loops -mvectorize-with-neon-quad -marm -D__ARM_NEON + +# Cross-compilation environment for ARM - exported globally +export GOOS := linux +export GOARCH := arm +export GOARM := 7 +export CC := $(BUILDKIT_PATH)/bin/$(BUILDKIT_FLAVOR)-gcc +export CGO_ENABLED := 1 +export CGO_CFLAGS := $(OPTIM_CFLAGS) -I$(BUILDKIT_PATH)/$(BUILDKIT_FLAVOR)/include -I$(BUILDKIT_PATH)/$(BUILDKIT_FLAVOR)/sysroot/usr/include -I$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/include -I$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/celt +export CGO_LDFLAGS := -L$(BUILDKIT_PATH)/$(BUILDKIT_FLAVOR)/lib -L$(BUILDKIT_PATH)/$(BUILDKIT_FLAVOR)/sysroot/usr/lib -lrockit -lrockchip_mpp -lrga -lpthread -L$(AUDIO_LIBS_DIR)/alsa-lib-$(ALSA_VERSION)/src/.libs -lasound -L$(AUDIO_LIBS_DIR)/opus-$(OPUS_VERSION)/.libs -lopus -lm -ldl + PROMETHEUS_TAG := github.com/prometheus/common/version KVM_PKG_NAME := github.com/jetkvm/kvm BUILDKIT_FLAVOR := arm-rockchip830-linux-uclibcgnueabihf BUILDKIT_PATH ?= /opt/jetkvm-native-buildkit SKIP_NATIVE_IF_EXISTS ?= 0 +SKIP_AUDIO_BINARIES_IF_EXISTS ?= 0 SKIP_UI_BUILD ?= 0 GO_BUILD_ARGS := -tags netgo,timetzdata,nomsgpack GO_RELEASE_BUILD_ARGS := -trimpath $(GO_BUILD_ARGS) @@ -49,22 +88,67 @@ build_native: ./scripts/build_cgo.sh; \ fi -build_dev: build_native +# Build audio output C binary (ALSA capture → Opus encode → IPC) +build_audio_output: build_audio_deps + @if [ "$(SKIP_AUDIO_BINARIES_IF_EXISTS)" = "1" ] && [ -f "$(BIN_DIR)/jetkvm_audio_output" ]; then \ + echo "jetkvm_audio_output already exists, skipping build..."; \ + else \ + echo "Building audio output binary..."; \ + mkdir -p $(BIN_DIR); \ + $(CC) $(CGO_CFLAGS) \ + -o $(BIN_DIR)/jetkvm_audio_output \ + internal/audio/c/jetkvm_audio_output.c \ + internal/audio/c/ipc_protocol.c \ + internal/audio/c/audio_common.c \ + internal/audio/c/audio.c \ + $(CGO_LDFLAGS); \ + fi + +# Build audio input C binary (IPC → Opus decode → ALSA playback) +build_audio_input: build_audio_deps + @if [ "$(SKIP_AUDIO_BINARIES_IF_EXISTS)" = "1" ] && [ -f "$(BIN_DIR)/jetkvm_audio_input" ]; then \ + echo "jetkvm_audio_input already exists, skipping build..."; \ + else \ + echo "Building audio input binary..."; \ + mkdir -p $(BIN_DIR); \ + $(CC) $(CGO_CFLAGS) \ + -o $(BIN_DIR)/jetkvm_audio_input \ + internal/audio/c/jetkvm_audio_input.c \ + internal/audio/c/ipc_protocol.c \ + internal/audio/c/audio_common.c \ + internal/audio/c/audio.c \ + $(CGO_LDFLAGS); \ + fi + +# Build both audio binaries and copy to embed location +build_audio_binaries: build_audio_output build_audio_input + @echo "Audio binaries built successfully" + @echo "Copying binaries to embed location..." + @mkdir -p internal/audio/bin + @cp $(BIN_DIR)/jetkvm_audio_output internal/audio/bin/ + @cp $(BIN_DIR)/jetkvm_audio_input internal/audio/bin/ + @echo "Binaries ready for embedding" + +build_dev: build_native build_audio_deps build_audio_binaries + $(CLEAN_GO_CACHE) @echo "Building..." - $(GO_CMD) build \ + go build \ -ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION_DEV)" \ $(GO_RELEASE_BUILD_ARGS) \ -o $(BIN_DIR)/jetkvm_app -v cmd/main.go build_test2json: + $(CLEAN_GO_CACHE) $(GO_CMD) build -o $(BIN_DIR)/test2json cmd/test2json build_gotestsum: + $(CLEAN_GO_CACHE) @echo "Building gotestsum..." $(GO_CMD) install gotest.tools/gotestsum@latest cp $(shell $(GO_CMD) env GOPATH)/bin/linux_arm/gotestsum $(BIN_DIR)/gotestsum -build_dev_test: build_test2json build_gotestsum +build_dev_test: build_audio_deps build_test2json build_gotestsum + $(CLEAN_GO_CACHE) # collect all directories that contain tests @echo "Building tests for devices ..." @rm -rf $(BIN_DIR)/tests && mkdir -p $(BIN_DIR)/tests @@ -74,7 +158,7 @@ build_dev_test: build_test2json build_gotestsum test_pkg_name=$$(echo $$test | sed 's/^.\///g'); \ test_pkg_full_name=$(KVM_PKG_NAME)/$$(echo $$test | sed 's/^.\///g'); \ test_filename=$$(echo $$test_pkg_name | sed 's/\//__/g')_test; \ - $(GO_CMD) test -v \ + go test -v \ -ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION_DEV)" \ $(GO_BUILD_ARGS) \ -c -o $(BIN_DIR)/tests/$$test_filename $$test; \ @@ -111,9 +195,10 @@ dev_release: frontend build_dev rclone copyto bin/jetkvm_app r2://jetkvm-update/app/$(VERSION_DEV)/jetkvm_app rclone copyto bin/jetkvm_app.sha256 r2://jetkvm-update/app/$(VERSION_DEV)/jetkvm_app.sha256 -build_release: frontend build_native +build_release: frontend build_native build_audio_deps build_audio_binaries + $(CLEAN_GO_CACHE) @echo "Building release..." - $(GO_CMD) build \ + go build \ -ldflags="$(GO_LDFLAGS) -X $(KVM_PKG_NAME).builtAppVersion=$(VERSION)" \ $(GO_RELEASE_BUILD_ARGS) \ -o bin/jetkvm_app cmd/main.go @@ -127,4 +212,39 @@ release: @echo "Uploading release..." @shasum -a 256 bin/jetkvm_app | cut -d ' ' -f 1 > bin/jetkvm_app.sha256 rclone copyto bin/jetkvm_app r2://jetkvm-update/app/$(VERSION)/jetkvm_app - rclone copyto bin/jetkvm_app.sha256 r2://jetkvm-update/app/$(VERSION)/jetkvm_app.sha256 \ No newline at end of file + rclone copyto bin/jetkvm_app.sha256 r2://jetkvm-update/app/$(VERSION)/jetkvm_app.sha256 + +# Run both Go and UI linting +lint: lint-go lint-ui + @echo "All linting completed successfully!" + +# Run golangci-lint locally with the same configuration as CI +lint-go: build_audio_deps + @echo "Running golangci-lint..." + @mkdir -p static && touch static/.gitkeep + golangci-lint run --verbose + +# Run both Go and UI linting with auto-fix +lint-fix: lint-go-fix lint-ui-fix + @echo "All linting with auto-fix completed successfully!" + +# Run golangci-lint with auto-fix +lint-go-fix: build_audio_deps + @echo "Running golangci-lint with auto-fix..." + @mkdir -p static && touch static/.gitkeep + golangci-lint run --fix --verbose + +# Run UI linting locally (mirrors GitHub workflow ui-lint.yml) +lint-ui: + @echo "Running UI lint..." + @cd ui && npm ci + @cd ui && npm run lint + +# Run UI linting with auto-fix +lint-ui-fix: + @echo "Running UI lint with auto-fix..." + @cd ui && npm ci + @cd ui && npm run lint:fix + +# Legacy alias for UI linting (for backward compatibility) +ui-lint: lint-ui diff --git a/README.md b/README.md index 541578c3..42cd3374 100644 --- a/README.md +++ b/README.md @@ -11,13 +11,20 @@ -JetKVM is a high-performance, open-source KVM over IP (Keyboard, Video, Mouse) solution designed for efficient remote management of computers, servers, and workstations. Whether you're dealing with boot failures, installing a new operating system, adjusting BIOS settings, or simply taking control of a machine from afar, JetKVM provides the tools to get it done effectively. + + +JetKVM is a high-performance, open-source KVM over IP (Keyboard, Video, Mouse, Audio) solution designed for efficient remote management of computers, servers, and workstations. Whether you're dealing with boot failures, installing a new operating system, adjusting BIOS settings, or simply taking control of a machine from afar, JetKVM provides the tools to get it done effectively. + + + + ## Features -- **Ultra-low Latency** - 1080p@60FPS video with 30-60ms latency using H.264 encoding. Smooth mouse and keyboard interaction for responsive remote control. +- **Ultra-low Latency** - 1080p@60FPS video with 30-60ms latency using H.264 encoding. Smooth mouse, keyboard, and audio for responsive remote control. +- **First-Class Audio Support** - JetKVM supports bidirectional, low-latency audio streaming using a dual-subprocess architecture with ALSA and Opus integration via CGO. Features both audio output (PC→Browser) and audio input (Browser→PC) with dedicated subprocesses for optimal performance and isolation. - **Free & Optional Remote Access** - Remote management via JetKVM Cloud using WebRTC. -- **Open-source software** - Written in Golang on Linux. Easily customizable through SSH access to the JetKVM device. +- **Open-source software** - Written in Golang (with CGO for audio) on Linux. Easily customizable through SSH access to the JetKVM device. ## Contributing @@ -33,18 +40,19 @@ If you've found an issue and want to report it, please check our [Issues](https: # Development -JetKVM is written in Go & TypeScript. with some bits and pieces written in C. An intermediate level of Go & TypeScript knowledge is recommended for comfortable programming. +JetKVM is written in Go & TypeScript, with some C for low-level integration -The project contains two main parts, the backend software that runs on the KVM device and the frontend software that is served by the KVM device, and also the cloud. +The project contains two main parts: the backend software (Go, CGO) that runs on the KVM device, and the frontend software (React/TypeScript) that is served by the KVM device and the cloud. For comprehensive development information, including setup, testing, debugging, and contribution guidelines, see **[DEVELOPMENT.md](DEVELOPMENT.md)**. For quick device development, use the `./dev_deploy.sh` script. It will build the frontend and backend and deploy them to the local KVM device. Run `./dev_deploy.sh --help` for more information. + ## Backend -The backend is written in Go and is responsible for the KVM device management, the cloud API and the cloud web. +The backend is written in Go and is responsible for KVM device management, audio/video streaming, the cloud API, and the cloud web. **Audio uses dedicated subprocesses for both output and input streams, with CGO-based ALSA and Opus processing, IPC communication via Unix sockets, and comprehensive process supervision for reliability.** ## Frontend -The frontend is written in React and TypeScript and is served by the KVM device. It has three build targets: `device`, `development` and `production`. Development is used for development of the cloud version on your local machine, device is used for building the frontend for the KVM device and production is used for building the frontend for the cloud. +The frontend is written in React and TypeScript and is served by the KVM device. It has three build targets: `device`, `development`, and `production`. Development is used for the cloud version on your local machine, device is used for building the frontend for the KVM device, and production is used for building the frontend for the cloud. diff --git a/audio_handlers.go b/audio_handlers.go new file mode 100644 index 00000000..8b63859f --- /dev/null +++ b/audio_handlers.go @@ -0,0 +1,36 @@ +package kvm + +import ( + "context" + + "github.com/coder/websocket" + "github.com/jetkvm/kvm/internal/audio" + "github.com/rs/zerolog" +) + +var audioControlService *audio.AudioControlService + +func ensureAudioControlService() *audio.AudioControlService { + if audioControlService == nil { + sessionProvider := &KVMSessionProvider{} + audioControlService = audio.NewAudioControlService(sessionProvider, logger) + + // Set up RPC callback function for the audio package + audio.SetRPCCallbacks( + func() *audio.AudioControlService { return audioControlService }, + ) + } + return audioControlService +} + +// handleSubscribeAudioEvents handles WebSocket audio event subscription +func handleSubscribeAudioEvents(connectionID string, wsCon *websocket.Conn, runCtx context.Context, l *zerolog.Logger) { + ensureAudioControlService() + audioControlService.SubscribeToAudioEvents(connectionID, wsCon, runCtx, l) +} + +// handleUnsubscribeAudioEvents handles WebSocket audio event unsubscription +func handleUnsubscribeAudioEvents(connectionID string, l *zerolog.Logger) { + ensureAudioControlService() + audioControlService.UnsubscribeFromAudioEvents(connectionID, l) +} diff --git a/cloud.go b/cloud.go index a851d51f..a9c0722c 100644 --- a/cloud.go +++ b/cloud.go @@ -20,6 +20,7 @@ import ( "github.com/coder/websocket" "github.com/gin-gonic/gin" + "github.com/jetkvm/kvm/internal/audio" "github.com/rs/zerolog" ) @@ -481,6 +482,16 @@ func handleSessionRequest( cancelKeyboardMacro() currentSession = session + + // Set up audio relay callback to get current session's audio track + // This is needed for audio output to work after enable/disable cycles + audio.SetCurrentSessionCallback(func() audio.AudioTrackWriter { + if currentSession != nil { + return currentSession.AudioTrack + } + return nil + }) + _ = wsjson.Write(context.Background(), c, gin.H{"type": "answer", "data": sd}) return nil } diff --git a/config.go b/config.go index c83ccfc7..99332aa0 100644 --- a/config.go +++ b/config.go @@ -159,6 +159,7 @@ var defaultConfig = &Config{ RelativeMouse: true, Keyboard: true, MassStorage: true, + Audio: true, }, NetworkConfig: &network.NetworkConfig{}, DefaultLogLevel: "INFO", diff --git a/internal/audio/audio_mute.go b/internal/audio/audio_mute.go new file mode 100644 index 00000000..d1382ee8 --- /dev/null +++ b/internal/audio/audio_mute.go @@ -0,0 +1,38 @@ +package audio + +import ( + "sync" +) + +// AudioState holds all audio-related state with a single mutex +type AudioState struct { + mu sync.RWMutex + audioMuted bool + microphoneMuted bool +} + +var globalAudioState = &AudioState{} + +func SetAudioMuted(muted bool) { + globalAudioState.mu.Lock() + defer globalAudioState.mu.Unlock() + globalAudioState.audioMuted = muted +} + +func IsAudioMuted() bool { + globalAudioState.mu.RLock() + defer globalAudioState.mu.RUnlock() + return globalAudioState.audioMuted +} + +func SetMicrophoneMuted(muted bool) { + globalAudioState.mu.Lock() + defer globalAudioState.mu.Unlock() + globalAudioState.microphoneMuted = muted +} + +func IsMicrophoneMuted() bool { + globalAudioState.mu.RLock() + defer globalAudioState.mu.RUnlock() + return globalAudioState.microphoneMuted +} diff --git a/internal/audio/c/audio.c b/internal/audio/c/audio.c new file mode 100644 index 00000000..a60a4e06 --- /dev/null +++ b/internal/audio/c/audio.c @@ -0,0 +1,825 @@ +/* + * JetKVM Audio Processing Module + * + * Bidirectional audio processing optimized for ARM NEON SIMD: + * - OUTPUT PATH: TC358743 HDMI audio → Client speakers + * Pipeline: ALSA hw:0,0 capture → 2.5x gain → Opus encode (96kbps, FEC enabled) + * + * - INPUT PATH: Client microphone → Device speakers + * Pipeline: Opus decode (with FEC) → ALSA hw:1,0 playback + * + * Key features: + * - ARM NEON SIMD optimization for all audio operations + * - Opus in-band FEC for packet loss resilience + * - Ultra-low CPU usage (~0.5% on RV1106) + * - S16_LE @ 48kHz stereo, 20ms frames (960 samples) + */ + +#include +#include +#include +#include +#include +#include +#include + +// ARM NEON SIMD support (always available on JetKVM's ARM Cortex-A7) +#include + +#define SIMD_ALIGN __attribute__((aligned(16))) +#define SIMD_PREFETCH(addr, rw, locality) __builtin_prefetch(addr, rw, locality) + +static int trace_logging_enabled = 0; +static int simd_initialized = 0; + +static void simd_init_once(void) { + if (simd_initialized) return; + simd_initialized = 1; +} + +// ============================================================================ +// GLOBAL STATE VARIABLES +// ============================================================================ + +// ALSA device handles +static snd_pcm_t *pcm_capture_handle = NULL; // OUTPUT: TC358743 HDMI audio → client +static snd_pcm_t *pcm_playback_handle = NULL; // INPUT: Client microphone → device speakers + +// Opus codec instances +static OpusEncoder *encoder = NULL; +static OpusDecoder *decoder = NULL; + +// Audio format (S16_LE @ 48kHz stereo) +static int sample_rate = 48000; +static int channels = 2; +static int frame_size = 960; // 20ms frames at 48kHz + +// Opus encoder settings (optimized for minimal CPU ~0.5% on RV1106) +static int opus_bitrate = 96000; // 96 kbps - good quality/bandwidth balance +static int opus_complexity = 1; // Complexity 1 - minimal CPU usage +static int opus_vbr = 1; // Variable bitrate enabled +static int opus_vbr_constraint = 1; // Constrained VBR - predictable bandwidth +static int opus_signal_type = -1000; // OPUS_AUTO - automatic signal type detection +static int opus_bandwidth = 1103; // OPUS_BANDWIDTH_WIDEBAND (50-8000 Hz) +static int opus_dtx = 0; // DTX disabled - no discontinuous transmission +static int opus_lsb_depth = 16; // 16-bit depth - matches S16_LE format + +// Network configuration +static int max_packet_size = 1500; + +// ALSA retry configuration +static int sleep_microseconds = 1000; +static int max_attempts_global = 5; +static int max_backoff_us_global = 500000; + +// ALSA buffer configuration (not currently used - kept for future optimization) +static const int optimized_buffer_size = 1; + + +// ============================================================================ +// FUNCTION DECLARATIONS +// ============================================================================ + +int jetkvm_audio_capture_init(); +void jetkvm_audio_capture_close(); +int jetkvm_audio_read_encode(void *opus_buf); + +int jetkvm_audio_playback_init(); +void jetkvm_audio_playback_close(); +int jetkvm_audio_decode_write(void *opus_buf, int opus_size); + +void update_audio_constants(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx, int lsb_depth, int sr, int ch, + int fs, int max_pkt, int sleep_us, int max_attempts, int max_backoff); +void set_trace_logging(int enabled); +int update_opus_encoder_params(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx); + +// ============================================================================ +// CONFIGURATION FUNCTIONS +// ============================================================================ + +/** + * Sync configuration from Go to C + */ +void update_audio_constants(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx, int lsb_depth, int sr, int ch, + int fs, int max_pkt, int sleep_us, int max_attempts, int max_backoff) { + opus_bitrate = bitrate; + opus_complexity = complexity; + opus_vbr = vbr; + opus_vbr_constraint = vbr_constraint; + opus_signal_type = signal_type; + opus_bandwidth = bandwidth; + opus_dtx = dtx; + opus_lsb_depth = lsb_depth; + sample_rate = sr; + channels = ch; + frame_size = fs; + max_packet_size = max_pkt; + sleep_microseconds = sleep_us; + max_attempts_global = max_attempts; + max_backoff_us_global = max_backoff; +} + +/** + * Enable/disable trace logging (zero overhead when disabled) + */ +void set_trace_logging(int enabled) { + trace_logging_enabled = enabled; +} + +// ============================================================================ +// SIMD-OPTIMIZED BUFFER OPERATIONS (ARM NEON) +// ============================================================================ + +/** + * Clear audio buffer using NEON (8 samples/iteration) + * @param buffer Audio buffer to clear + * @param samples Number of samples to zero out + */ +static inline void simd_clear_samples_s16(short *buffer, int samples) { + simd_init_once(); + + int simd_samples = samples & ~7; + const int16x8_t zero = vdupq_n_s16(0); + + // SIMD path: zero 8 samples per iteration + for (int i = 0; i < simd_samples; i += 8) { + vst1q_s16(&buffer[i], zero); + } + + // Scalar path: handle remaining samples + for (int i = simd_samples; i < samples; i++) { + buffer[i] = 0; + } +} + +/** + * Apply gain using NEON Q15 fixed-point math (8 samples/iteration) + * Uses vqrdmulhq_s16 for single-instruction saturating rounded multiply-high + * @param samples Audio buffer to scale in-place + * @param count Number of samples to process + * @param volume Gain multiplier (e.g., 2.5 for 2.5x gain) + */ +static inline void simd_scale_volume_s16(short *samples, int count, float volume) { + simd_init_once(); + + // Convert float gain to Q14 fixed-point for vqrdmulhq_s16 + // vqrdmulhq_s16 extracts bits [30:15], so multiply by 16384 (2^14) instead of 32768 (2^15) + int16_t vol_fixed = (int16_t)(volume * 16384.0f); + int16x8_t vol_vec = vdupq_n_s16(vol_fixed); + int simd_count = count & ~7; + + // SIMD path: process 8 samples per iteration + for (int i = 0; i < simd_count; i += 8) { + int16x8_t samples_vec = vld1q_s16(&samples[i]); + int16x8_t result = vqrdmulhq_s16(samples_vec, vol_vec); + vst1q_s16(&samples[i], result); + } + + // Scalar path: handle remaining samples + for (int i = simd_count; i < count; i++) { + samples[i] = (short)((samples[i] * vol_fixed) >> 14); + } +} + +// ============================================================================ +// INITIALIZATION STATE TRACKING +// ============================================================================ + +static volatile int capture_initializing = 0; +static volatile int capture_initialized = 0; +static volatile int playback_initializing = 0; +static volatile int playback_initialized = 0; + +/** + * Update Opus encoder settings at runtime (does NOT modify FEC settings) + * Note: FEC configuration remains unchanged - set at initialization + * @return 0 on success, -1 if not initialized, >0 if some settings failed + */ +int update_opus_encoder_params(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx) { + if (!encoder || !capture_initialized) { + return -1; + } + + // Update global configuration variables + opus_bitrate = bitrate; + opus_complexity = complexity; + opus_vbr = vbr; + opus_vbr_constraint = vbr_constraint; + opus_signal_type = signal_type; + opus_bandwidth = bandwidth; + opus_dtx = dtx; + + // Apply settings to encoder (FEC settings not modified) + int result = 0; + result |= opus_encoder_ctl(encoder, OPUS_SET_BITRATE(opus_bitrate)); + result |= opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(opus_complexity)); + result |= opus_encoder_ctl(encoder, OPUS_SET_VBR(opus_vbr)); + result |= opus_encoder_ctl(encoder, OPUS_SET_VBR_CONSTRAINT(opus_vbr_constraint)); + result |= opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(opus_signal_type)); + result |= opus_encoder_ctl(encoder, OPUS_SET_BANDWIDTH(opus_bandwidth)); + result |= opus_encoder_ctl(encoder, OPUS_SET_DTX(opus_dtx)); + + return result; +} + +// ============================================================================ +// ALSA UTILITY FUNCTIONS +// ============================================================================ + +/** + * Open ALSA device with exponential backoff retry + * @return 0 on success, negative error code on failure + */ +static int safe_alsa_open(snd_pcm_t **handle, const char *device, snd_pcm_stream_t stream) { + int attempt = 0; + int err; + int backoff_us = sleep_microseconds; + + while (attempt < max_attempts_global) { + err = snd_pcm_open(handle, device, stream, SND_PCM_NONBLOCK); + if (err >= 0) { + snd_pcm_nonblock(*handle, 0); + return 0; + } + + attempt++; + + if (err == -EBUSY || err == -EAGAIN) { + usleep(backoff_us); + backoff_us = (backoff_us * 2 < max_backoff_us_global) ? backoff_us * 2 : max_backoff_us_global; + } else if (err == -ENODEV || err == -ENOENT) { + usleep(backoff_us * 2); + backoff_us = (backoff_us * 2 < max_backoff_us_global) ? backoff_us * 2 : max_backoff_us_global; + } else if (err == -EPERM || err == -EACCES) { + usleep(backoff_us / 2); + } else { + usleep(backoff_us); + backoff_us = (backoff_us * 2 < max_backoff_us_global) ? backoff_us * 2 : max_backoff_us_global; + } + } + return err; +} + +/** + * Configure ALSA device (S16_LE @ 48kHz stereo with optimized buffering) + * @param handle ALSA PCM handle + * @param device_name Unused (for debugging only) + * @return 0 on success, negative error code on failure + */ +static int configure_alsa_device(snd_pcm_t *handle, const char *device_name) { + snd_pcm_hw_params_t *params; + snd_pcm_sw_params_t *sw_params; + int err; + + if (!handle) return -1; + + snd_pcm_hw_params_alloca(¶ms); + snd_pcm_sw_params_alloca(&sw_params); + + err = snd_pcm_hw_params_any(handle, params); + if (err < 0) return err; + + err = snd_pcm_hw_params_set_access(handle, params, SND_PCM_ACCESS_RW_INTERLEAVED); + if (err < 0) return err; + + err = snd_pcm_hw_params_set_format(handle, params, SND_PCM_FORMAT_S16_LE); + if (err < 0) return err; + + err = snd_pcm_hw_params_set_channels(handle, params, channels); + if (err < 0) return err; + + err = snd_pcm_hw_params_set_rate(handle, params, sample_rate, 0); + if (err < 0) { + unsigned int rate = sample_rate; + err = snd_pcm_hw_params_set_rate_near(handle, params, &rate, 0); + if (err < 0) return err; + } + + snd_pcm_uframes_t period_size = optimized_buffer_size ? frame_size : frame_size / 2; + if (period_size < 64) period_size = 64; + + err = snd_pcm_hw_params_set_period_size_near(handle, params, &period_size, 0); + if (err < 0) return err; + + snd_pcm_uframes_t buffer_size = optimized_buffer_size ? period_size * 2 : period_size * 4; + err = snd_pcm_hw_params_set_buffer_size_near(handle, params, &buffer_size); + if (err < 0) return err; + + err = snd_pcm_hw_params(handle, params); + if (err < 0) return err; + + err = snd_pcm_sw_params_current(handle, sw_params); + if (err < 0) return err; + + err = snd_pcm_sw_params_set_start_threshold(handle, sw_params, period_size); + if (err < 0) return err; + + err = snd_pcm_sw_params_set_avail_min(handle, sw_params, period_size); + if (err < 0) return err; + + err = snd_pcm_sw_params(handle, sw_params); + if (err < 0) return err; + + return snd_pcm_prepare(handle); +} + +// ============================================================================ +// AUDIO OUTPUT PATH FUNCTIONS (TC358743 HDMI Audio → Client Speakers) +// ============================================================================ + +/** + * Initialize OUTPUT path (TC358743 HDMI capture → Opus encoder) + * Opens hw:0,0 (TC358743) and creates Opus encoder with optimized settings + * @return 0 on success, -EBUSY if initializing, -1/-2/-3 on errors + */ +int jetkvm_audio_capture_init() { + int err; + + simd_init_once(); + + if (__sync_bool_compare_and_swap(&capture_initializing, 0, 1) == 0) { + return -EBUSY; + } + + if (capture_initialized) { + capture_initializing = 0; + return 0; + } + + if (encoder) { + opus_encoder_destroy(encoder); + encoder = NULL; + } + if (pcm_capture_handle) { + snd_pcm_close(pcm_capture_handle); + pcm_capture_handle = NULL; + } + + err = safe_alsa_open(&pcm_capture_handle, "hw:0,0", SND_PCM_STREAM_CAPTURE); + if (err < 0) { + capture_initializing = 0; + return -1; + } + + err = configure_alsa_device(pcm_capture_handle, "capture"); + if (err < 0) { + snd_pcm_close(pcm_capture_handle); + pcm_capture_handle = NULL; + capture_initializing = 0; + return -2; + } + + int opus_err = 0; + encoder = opus_encoder_create(sample_rate, channels, OPUS_APPLICATION_AUDIO, &opus_err); + if (!encoder || opus_err != OPUS_OK) { + if (pcm_capture_handle) { + snd_pcm_close(pcm_capture_handle); + pcm_capture_handle = NULL; + } + capture_initializing = 0; + return -3; + } + + // Configure encoder with optimized settings + opus_encoder_ctl(encoder, OPUS_SET_BITRATE(opus_bitrate)); + opus_encoder_ctl(encoder, OPUS_SET_COMPLEXITY(opus_complexity)); + opus_encoder_ctl(encoder, OPUS_SET_VBR(opus_vbr)); + opus_encoder_ctl(encoder, OPUS_SET_VBR_CONSTRAINT(opus_vbr_constraint)); + opus_encoder_ctl(encoder, OPUS_SET_SIGNAL(opus_signal_type)); + opus_encoder_ctl(encoder, OPUS_SET_BANDWIDTH(opus_bandwidth)); + opus_encoder_ctl(encoder, OPUS_SET_DTX(opus_dtx)); + opus_encoder_ctl(encoder, OPUS_SET_LSB_DEPTH(opus_lsb_depth)); + + // Enable in-band FEC (Forward Error Correction) for network resilience + // Embeds redundant data in packets to recover from packet loss (adds ~2-5% bitrate overhead) + opus_encoder_ctl(encoder, OPUS_SET_INBAND_FEC(1)); + opus_encoder_ctl(encoder, OPUS_SET_PACKET_LOSS_PERC(10)); // Optimize for 10% expected loss + + capture_initialized = 1; + capture_initializing = 0; + return 0; +} + +/** + * Read HDMI audio, encode to Opus (OUTPUT path hot function) + * Processing pipeline: ALSA capture → 2.5x gain → Opus encode + * @param opus_buf Output buffer for encoded Opus packet + * @return >0 = Opus packet size in bytes, -1 = error + */ +__attribute__((hot)) int jetkvm_audio_read_encode(void * __restrict__ opus_buf) { + // Static buffers persist across calls for better cache locality + static short SIMD_ALIGN pcm_buffer[1920]; // 960 frames × 2 channels + + // Local variables + unsigned char * __restrict__ out = (unsigned char*)opus_buf; + int pcm_rc; + int err = 0; + int recovery_attempts = 0; + const int max_recovery_attempts = 3; + int nb_bytes; + + // Prefetch output buffer for write + SIMD_PREFETCH(out, 1, 3); + SIMD_PREFETCH(pcm_buffer, 0, 3); + + if (__builtin_expect(!capture_initialized || !pcm_capture_handle || !encoder || !opus_buf, 0)) { + if (trace_logging_enabled) { + printf("[AUDIO_OUTPUT] jetkvm_audio_read_encode: Failed safety checks - capture_initialized=%d, pcm_capture_handle=%p, encoder=%p, opus_buf=%p\n", + capture_initialized, pcm_capture_handle, encoder, opus_buf); + } + return -1; + } + +retry_read: + // Read 960 frames (20ms) from ALSA capture device + pcm_rc = snd_pcm_readi(pcm_capture_handle, pcm_buffer, frame_size); + + if (__builtin_expect(pcm_rc < 0, 0)) { + if (pcm_rc == -EPIPE) { + recovery_attempts++; + if (recovery_attempts > max_recovery_attempts) { + return -1; + } + err = snd_pcm_prepare(pcm_capture_handle); + if (err < 0) { + snd_pcm_drop(pcm_capture_handle); + err = snd_pcm_prepare(pcm_capture_handle); + if (err < 0) return -1; + } + goto retry_read; + } else if (pcm_rc == -EAGAIN) { + return 0; + } else if (pcm_rc == -ESTRPIPE) { + recovery_attempts++; + if (recovery_attempts > max_recovery_attempts) { + return -1; + } + int resume_attempts = 0; + while ((err = snd_pcm_resume(pcm_capture_handle)) == -EAGAIN && resume_attempts < 10) { + usleep(sleep_microseconds); + resume_attempts++; + } + if (err < 0) { + err = snd_pcm_prepare(pcm_capture_handle); + if (err < 0) return -1; + } + return 0; + } else if (pcm_rc == -ENODEV) { + return -1; + } else if (pcm_rc == -EIO) { + recovery_attempts++; + if (recovery_attempts <= max_recovery_attempts) { + snd_pcm_drop(pcm_capture_handle); + err = snd_pcm_prepare(pcm_capture_handle); + if (err >= 0) { + goto retry_read; + } + } + return -1; + } else { + recovery_attempts++; + if (recovery_attempts <= 1 && pcm_rc == -EINTR) { + goto retry_read; + } else if (recovery_attempts <= 1 && pcm_rc == -EBUSY) { + usleep(sleep_microseconds / 2); + goto retry_read; + } + return -1; + } + } + + // Zero-pad if we got a short read + if (__builtin_expect(pcm_rc < frame_size, 0)) { + int remaining_samples = (frame_size - pcm_rc) * channels; + simd_clear_samples_s16(&pcm_buffer[pcm_rc * channels], remaining_samples); + } + + // Apply 2.5x gain boost to prevent quantization noise at low volumes + // HDMI audio typically transmitted at -6 to -12dB; boost prevents Opus noise floor artifacts + simd_scale_volume_s16(pcm_buffer, frame_size * channels, 2.5f); + + // Encode PCM to Opus (20ms frame → ~200 bytes at 96kbps) + nb_bytes = opus_encode(encoder, pcm_buffer, frame_size, out, max_packet_size); + + if (trace_logging_enabled && nb_bytes > 0) { + printf("[AUDIO_OUTPUT] jetkvm_audio_read_encode: Successfully encoded %d PCM frames to %d Opus bytes\n", pcm_rc, nb_bytes); + } + + return nb_bytes; +} + +// ============================================================================ +// AUDIO INPUT PATH FUNCTIONS (Client Microphone → Device Speakers) +// ============================================================================ + +/** + * Initialize INPUT path (Opus decoder → device speakers) + * Opens hw:1,0 (USB gadget) or "default" and creates Opus decoder + * @return 0 on success, -EBUSY if initializing, -1/-2 on errors + */ +int jetkvm_audio_playback_init() { + int err; + + simd_init_once(); + + if (__sync_bool_compare_and_swap(&playback_initializing, 0, 1) == 0) { + return -EBUSY; + } + + if (playback_initialized) { + playback_initializing = 0; + return 0; + } + + if (decoder) { + opus_decoder_destroy(decoder); + decoder = NULL; + } + if (pcm_playback_handle) { + snd_pcm_close(pcm_playback_handle); + pcm_playback_handle = NULL; + } + + err = safe_alsa_open(&pcm_playback_handle, "hw:1,0", SND_PCM_STREAM_PLAYBACK); + if (err < 0) { + err = safe_alsa_open(&pcm_playback_handle, "default", SND_PCM_STREAM_PLAYBACK); + if (err < 0) { + playback_initializing = 0; + return -1; + } + } + + err = configure_alsa_device(pcm_playback_handle, "playback"); + if (err < 0) { + snd_pcm_close(pcm_playback_handle); + pcm_playback_handle = NULL; + playback_initializing = 0; + return -1; + } + + int opus_err = 0; + decoder = opus_decoder_create(sample_rate, channels, &opus_err); + if (!decoder || opus_err != OPUS_OK) { + snd_pcm_close(pcm_playback_handle); + pcm_playback_handle = NULL; + playback_initializing = 0; + return -2; + } + + playback_initialized = 1; + playback_initializing = 0; + return 0; +} + +/** + * Decode Opus, write to device speakers (INPUT path hot function) + * Processing pipeline: Opus decode (with FEC) → ALSA playback with error recovery + * @param opus_buf Encoded Opus packet from client + * @param opus_size Size of Opus packet in bytes + * @return >0 = PCM frames written, 0 = frame skipped, -1/-2 = error + */ +__attribute__((hot)) int jetkvm_audio_decode_write(void * __restrict__ opus_buf, int opus_size) { + // Static buffer persists across calls for better cache locality + static short SIMD_ALIGN pcm_buffer[1920]; // 960 frames × 2 channels + + // Local variables + unsigned char * __restrict__ in = (unsigned char*)opus_buf; + int pcm_frames; + int pcm_rc; + int err = 0; + int recovery_attempts = 0; + const int max_recovery_attempts = 3; + + // Prefetch input buffer for read + SIMD_PREFETCH(in, 0, 3); + + if (__builtin_expect(!playback_initialized || !pcm_playback_handle || !decoder || !opus_buf || opus_size <= 0, 0)) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Failed safety checks - playback_initialized=%d, pcm_playback_handle=%p, decoder=%p, opus_buf=%p, opus_size=%d\n", + playback_initialized, pcm_playback_handle, decoder, opus_buf, opus_size); + } + return -1; + } + + if (opus_size > max_packet_size) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Opus packet too large - size=%d, max=%d\n", opus_size, max_packet_size); + } + return -1; + } + + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Processing Opus packet - size=%d bytes\n", opus_size); + } + + // Decode Opus packet to PCM (FEC automatically applied if embedded in packet) + // decode_fec=0 means normal decode (FEC data is used automatically when present) + pcm_frames = opus_decode(decoder, in, opus_size, pcm_buffer, frame_size, 0); + + if (__builtin_expect(pcm_frames < 0, 0)) { + // Decode failed - attempt packet loss concealment using FEC from previous packet + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Opus decode failed with error %d, attempting packet loss concealment\n", pcm_frames); + } + + // decode_fec=1 means use FEC data from the NEXT packet to reconstruct THIS lost packet + pcm_frames = opus_decode(decoder, NULL, 0, pcm_buffer, frame_size, 1); + if (pcm_frames < 0) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Packet loss concealment also failed with error %d\n", pcm_frames); + } + return -1; + } + + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Packet loss concealment succeeded, recovered %d frames\n", pcm_frames); + } + } else if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Opus decode successful - decoded %d PCM frames\n", pcm_frames); + } + +retry_write: + // Write decoded PCM to ALSA playback device + pcm_rc = snd_pcm_writei(pcm_playback_handle, pcm_buffer, pcm_frames); + if (__builtin_expect(pcm_rc < 0, 0)) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: ALSA write failed with error %d (%s), attempt %d/%d\n", + pcm_rc, snd_strerror(pcm_rc), recovery_attempts + 1, max_recovery_attempts); + } + + if (pcm_rc == -EPIPE) { + recovery_attempts++; + if (recovery_attempts > max_recovery_attempts) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Buffer underrun recovery failed after %d attempts\n", max_recovery_attempts); + } + return -2; + } + + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Buffer underrun detected, attempting recovery (attempt %d)\n", recovery_attempts); + } + err = snd_pcm_prepare(pcm_playback_handle); + if (err < 0) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: snd_pcm_prepare failed (%s), trying drop+prepare\n", snd_strerror(err)); + } + snd_pcm_drop(pcm_playback_handle); + err = snd_pcm_prepare(pcm_playback_handle); + if (err < 0) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: drop+prepare recovery failed (%s)\n", snd_strerror(err)); + } + return -2; + } + } + + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Buffer underrun recovery successful, retrying write\n"); + } + goto retry_write; + } else if (pcm_rc == -ESTRPIPE) { + recovery_attempts++; + if (recovery_attempts > max_recovery_attempts) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device suspend recovery failed after %d attempts\n", max_recovery_attempts); + } + return -2; + } + + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device suspended, attempting resume (attempt %d)\n", recovery_attempts); + } + int resume_attempts = 0; + while ((err = snd_pcm_resume(pcm_playback_handle)) == -EAGAIN && resume_attempts < 10) { + usleep(sleep_microseconds); + resume_attempts++; + } + if (err < 0) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device resume failed (%s), trying prepare fallback\n", snd_strerror(err)); + } + err = snd_pcm_prepare(pcm_playback_handle); + if (err < 0) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Prepare fallback failed (%s)\n", snd_strerror(err)); + } + return -2; + } + } + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device suspend recovery successful, skipping frame\n"); + } + return 0; + } else if (pcm_rc == -ENODEV) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device disconnected (ENODEV) - critical error\n"); + } + return -2; + } else if (pcm_rc == -EIO) { + recovery_attempts++; + if (recovery_attempts <= max_recovery_attempts) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: I/O error detected, attempting recovery\n"); + } + snd_pcm_drop(pcm_playback_handle); + err = snd_pcm_prepare(pcm_playback_handle); + if (err >= 0) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: I/O error recovery successful, retrying write\n"); + } + goto retry_write; + } + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: I/O error recovery failed (%s)\n", snd_strerror(err)); + } + } + return -2; + } else if (pcm_rc == -EAGAIN) { + recovery_attempts++; + if (recovery_attempts <= max_recovery_attempts) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device not ready (EAGAIN), waiting and retrying\n"); + } + snd_pcm_wait(pcm_playback_handle, sleep_microseconds / 4000); + goto retry_write; + } + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Device not ready recovery failed after %d attempts\n", max_recovery_attempts); + } + return -2; + } else { + recovery_attempts++; + if (recovery_attempts <= 1 && (pcm_rc == -EINTR || pcm_rc == -EBUSY)) { + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Transient error %d (%s), retrying once\n", pcm_rc, snd_strerror(pcm_rc)); + } + usleep(sleep_microseconds / 2); + goto retry_write; + } + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Unrecoverable error %d (%s)\n", pcm_rc, snd_strerror(pcm_rc)); + } + return -2; + } + } + + if (trace_logging_enabled) { + printf("[AUDIO_INPUT] jetkvm_audio_decode_write: Successfully wrote %d PCM frames to device\n", pcm_frames); + } + return pcm_frames; +} + +// ============================================================================ +// CLEANUP FUNCTIONS +// ============================================================================ + +/** + * Close INPUT path (thread-safe with drain) + */ +void jetkvm_audio_playback_close() { + while (playback_initializing) { + usleep(sleep_microseconds); + } + + if (__sync_bool_compare_and_swap(&playback_initialized, 1, 0) == 0) { + return; + } + + if (decoder) { + opus_decoder_destroy(decoder); + decoder = NULL; + } + if (pcm_playback_handle) { + snd_pcm_drain(pcm_playback_handle); + snd_pcm_close(pcm_playback_handle); + pcm_playback_handle = NULL; + } +} + +/** + * Close OUTPUT path (thread-safe with drain) + */ +void jetkvm_audio_capture_close() { + while (capture_initializing) { + usleep(sleep_microseconds); + } + + if (__sync_bool_compare_and_swap(&capture_initialized, 1, 0) == 0) { + return; + } + + if (encoder) { + opus_encoder_destroy(encoder); + encoder = NULL; + } + if (pcm_capture_handle) { + snd_pcm_drain(pcm_capture_handle); + snd_pcm_close(pcm_capture_handle); + pcm_capture_handle = NULL; + } +} diff --git a/internal/audio/c/audio_common.c b/internal/audio/c/audio_common.c new file mode 100644 index 00000000..0b7b14ec --- /dev/null +++ b/internal/audio/c/audio_common.c @@ -0,0 +1,81 @@ +/* + * JetKVM Audio Common Utilities + * + * Shared functions used by both audio input and output servers + */ + +#include "audio_common.h" +#include +#include +#include +#include + +// ============================================================================ +// GLOBAL STATE FOR SIGNAL HANDLER +// ============================================================================ + +// Pointer to the running flag that will be set to 0 on shutdown +static volatile sig_atomic_t *g_running_ptr = NULL; + +// ============================================================================ +// SIGNAL HANDLERS +// ============================================================================ + +static void signal_handler(int signo) { + if (signo == SIGTERM || signo == SIGINT) { + printf("Audio server: Received signal %d, shutting down...\n", signo); + if (g_running_ptr != NULL) { + *g_running_ptr = 0; + } + } +} + +void audio_common_setup_signal_handlers(volatile sig_atomic_t *running) { + g_running_ptr = running; + + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = signal_handler; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + + sigaction(SIGTERM, &sa, NULL); + sigaction(SIGINT, &sa, NULL); + + // Ignore SIGPIPE (write to closed socket should return error, not crash) + signal(SIGPIPE, SIG_IGN); +} + +// ============================================================================ +// CONFIGURATION PARSING +// ============================================================================ + +int audio_common_parse_env_int(const char *name, int default_value) { + const char *str = getenv(name); + if (str == NULL || str[0] == '\0') { + return default_value; + } + return atoi(str); +} + +const char* audio_common_parse_env_string(const char *name, const char *default_value) { + const char *str = getenv(name); + if (str == NULL || str[0] == '\0') { + return default_value; + } + return str; +} + +int audio_common_is_trace_enabled(void) { + const char *pion_trace = getenv("PION_LOG_TRACE"); + if (pion_trace == NULL) { + return 0; + } + + // Check if "audio" is in comma-separated list + if (strstr(pion_trace, "audio") != NULL) { + return 1; + } + + return 0; +} diff --git a/internal/audio/c/audio_common.h b/internal/audio/c/audio_common.h new file mode 100644 index 00000000..ae8b9a10 --- /dev/null +++ b/internal/audio/c/audio_common.h @@ -0,0 +1,57 @@ +/* + * JetKVM Audio Common Utilities + * + * Shared functions used by both audio input and output servers + */ + +#ifndef JETKVM_AUDIO_COMMON_H +#define JETKVM_AUDIO_COMMON_H + +#include + +// ============================================================================ +// SIGNAL HANDLERS +// ============================================================================ + +/** + * Setup signal handlers for graceful shutdown. + * Handles SIGTERM and SIGINT by setting the running flag to 0. + * Ignores SIGPIPE to prevent crashes on broken pipe writes. + * + * @param running Pointer to the volatile running flag to set on shutdown + */ +void audio_common_setup_signal_handlers(volatile sig_atomic_t *running); + +// ============================================================================ +// CONFIGURATION PARSING +// ============================================================================ + +/** + * Parse integer from environment variable. + * Returns default_value if variable is not set or empty. + * + * @param name Environment variable name + * @param default_value Default value if not set + * @return Parsed integer value or default + */ +int audio_common_parse_env_int(const char *name, int default_value); + +/** + * Parse string from environment variable. + * Returns default_value if variable is not set or empty. + * + * @param name Environment variable name + * @param default_value Default value if not set + * @return Environment variable value or default (not duplicated) + */ +const char* audio_common_parse_env_string(const char *name, const char *default_value); + +/** + * Check if trace logging is enabled for audio subsystem. + * Looks for "audio" in PION_LOG_TRACE comma-separated list. + * + * @return 1 if enabled, 0 otherwise + */ +int audio_common_is_trace_enabled(void); + +#endif // JETKVM_AUDIO_COMMON_H diff --git a/internal/audio/c/ipc_protocol.c b/internal/audio/c/ipc_protocol.c new file mode 100644 index 00000000..372cfcee --- /dev/null +++ b/internal/audio/c/ipc_protocol.c @@ -0,0 +1,309 @@ +/* + * JetKVM Audio IPC Protocol Implementation + * + * Implements Unix domain socket communication with exact byte-level + * compatibility with Go implementation in internal/audio/ipc_*.go + */ + +#include "ipc_protocol.h" + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// ============================================================================ +// HELPER FUNCTIONS +// ============================================================================ + +/** + * Read exactly N bytes from socket (loops until complete or error). + * This is critical because read() may return partial data. + */ +int ipc_read_full(int sock, void *buf, size_t len) { + uint8_t *ptr = (uint8_t *)buf; + size_t remaining = len; + + while (remaining > 0) { + ssize_t n = read(sock, ptr, remaining); + + if (n < 0) { + if (errno == EINTR) { + continue; // Interrupted by signal, retry + } + return -1; // Read error + } + + if (n == 0) { + return -1; // EOF (connection closed) + } + + ptr += n; + remaining -= n; + } + + return 0; // Success +} + +/** + * Get current time in nanoseconds (Unix epoch). + * Compatible with Go time.Now().UnixNano(). + */ +int64_t ipc_get_time_ns(void) { + struct timespec ts; + if (clock_gettime(CLOCK_REALTIME, &ts) != 0) { + return 0; // Fallback on error + } + return (int64_t)ts.tv_sec * 1000000000LL + (int64_t)ts.tv_nsec; +} + +// ============================================================================ +// MESSAGE READ/WRITE +// ============================================================================ + +/** + * Read a complete IPC message from socket. + * Returns 0 on success, -1 on error. + * Caller MUST free msg->data if non-NULL! + */ +int ipc_read_message(int sock, ipc_message_t *msg, uint32_t expected_magic) { + if (msg == NULL) { + return -1; + } + + // Initialize message + memset(msg, 0, sizeof(ipc_message_t)); + + // 1. Read header (17 bytes) + if (ipc_read_full(sock, &msg->header, IPC_HEADER_SIZE) != 0) { + return -1; + } + + // 2. Convert from little-endian (required on big-endian systems) + msg->header.magic = le32toh(msg->header.magic); + msg->header.length = le32toh(msg->header.length); + msg->header.timestamp = le64toh(msg->header.timestamp); + // Note: type is uint8_t, no conversion needed + + // 3. Validate magic number + if (msg->header.magic != expected_magic) { + fprintf(stderr, "IPC: Invalid magic number: got 0x%08X, expected 0x%08X\n", + msg->header.magic, expected_magic); + return -1; + } + + // 4. Validate length + if (msg->header.length > IPC_MAX_FRAME_SIZE) { + fprintf(stderr, "IPC: Message too large: %u bytes (max %d)\n", + msg->header.length, IPC_MAX_FRAME_SIZE); + return -1; + } + + // 5. Read payload if present + if (msg->header.length > 0) { + msg->data = malloc(msg->header.length); + if (msg->data == NULL) { + fprintf(stderr, "IPC: Failed to allocate %u bytes for payload\n", + msg->header.length); + return -1; + } + + if (ipc_read_full(sock, msg->data, msg->header.length) != 0) { + free(msg->data); + msg->data = NULL; + return -1; + } + } + + return 0; // Success +} + +/** + * Write a complete IPC message to socket. + * Uses writev() for atomic header+payload write. + * Returns 0 on success, -1 on error. + */ +int ipc_write_message(int sock, uint32_t magic, uint8_t type, + const uint8_t *data, uint32_t length) { + // Validate length + if (length > IPC_MAX_FRAME_SIZE) { + fprintf(stderr, "IPC: Message too large: %u bytes (max %d)\n", + length, IPC_MAX_FRAME_SIZE); + return -1; + } + + // Prepare header + ipc_header_t header; + header.magic = htole32(magic); + header.type = type; + header.length = htole32(length); + header.timestamp = htole64(ipc_get_time_ns()); + + // Use writev for atomic write (if possible) + struct iovec iov[2]; + iov[0].iov_base = &header; + iov[0].iov_len = IPC_HEADER_SIZE; + iov[1].iov_base = (void *)data; + iov[1].iov_len = length; + + int iovcnt = (length > 0) ? 2 : 1; + size_t total_len = IPC_HEADER_SIZE + length; + + ssize_t written = writev(sock, iov, iovcnt); + + if (written < 0) { + if (errno == EINTR) { + // Retry once on interrupt + written = writev(sock, iov, iovcnt); + } + + if (written < 0) { + perror("IPC: writev failed"); + return -1; + } + } + + if ((size_t)written != total_len) { + fprintf(stderr, "IPC: Partial write: %zd/%zu bytes\n", written, total_len); + return -1; + } + + return 0; // Success +} + +// ============================================================================ +// CONFIGURATION PARSING +// ============================================================================ + +/** + * Parse Opus configuration from message data (36 bytes, little-endian). + */ +int ipc_parse_opus_config(const uint8_t *data, uint32_t length, ipc_opus_config_t *config) { + if (data == NULL || config == NULL) { + return -1; + } + + if (length != 36) { + fprintf(stderr, "IPC: Invalid Opus config size: %u bytes (expected 36)\n", length); + return -1; + } + + // Parse little-endian uint32 fields + const uint32_t *u32_data = (const uint32_t *)data; + config->sample_rate = le32toh(u32_data[0]); + config->channels = le32toh(u32_data[1]); + config->frame_size = le32toh(u32_data[2]); + config->bitrate = le32toh(u32_data[3]); + config->complexity = le32toh(u32_data[4]); + config->vbr = le32toh(u32_data[5]); + config->signal_type = le32toh(u32_data[6]); + config->bandwidth = le32toh(u32_data[7]); + config->dtx = le32toh(u32_data[8]); + + return 0; // Success +} + +/** + * Parse basic audio configuration from message data (12 bytes, little-endian). + */ +int ipc_parse_config(const uint8_t *data, uint32_t length, ipc_config_t *config) { + if (data == NULL || config == NULL) { + return -1; + } + + if (length != 12) { + fprintf(stderr, "IPC: Invalid config size: %u bytes (expected 12)\n", length); + return -1; + } + + // Parse little-endian uint32 fields + const uint32_t *u32_data = (const uint32_t *)data; + config->sample_rate = le32toh(u32_data[0]); + config->channels = le32toh(u32_data[1]); + config->frame_size = le32toh(u32_data[2]); + + return 0; // Success +} + +/** + * Free message resources. + */ +void ipc_free_message(ipc_message_t *msg) { + if (msg != NULL && msg->data != NULL) { + free(msg->data); + msg->data = NULL; + } +} + +// ============================================================================ +// SOCKET MANAGEMENT +// ============================================================================ + +/** + * Create Unix domain socket server. + */ +int ipc_create_server(const char *socket_path) { + if (socket_path == NULL) { + return -1; + } + + // 1. Create socket + int sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + perror("IPC: socket() failed"); + return -1; + } + + // 2. Remove existing socket file (ignore errors) + unlink(socket_path); + + // 3. Bind to path + struct sockaddr_un addr; + memset(&addr, 0, sizeof(addr)); + addr.sun_family = AF_UNIX; + + if (strlen(socket_path) >= sizeof(addr.sun_path)) { + fprintf(stderr, "IPC: Socket path too long: %s\n", socket_path); + close(sock); + return -1; + } + + strncpy(addr.sun_path, socket_path, sizeof(addr.sun_path) - 1); + + if (bind(sock, (struct sockaddr *)&addr, sizeof(addr)) < 0) { + perror("IPC: bind() failed"); + close(sock); + return -1; + } + + // 4. Listen with backlog=1 (single client) + if (listen(sock, 1) < 0) { + perror("IPC: listen() failed"); + close(sock); + return -1; + } + + printf("IPC: Server listening on %s\n", socket_path); + return sock; +} + +/** + * Accept client connection. + */ +int ipc_accept_client(int server_sock) { + int client_sock = accept(server_sock, NULL, NULL); + + if (client_sock < 0) { + perror("IPC: accept() failed"); + return -1; + } + + printf("IPC: Client connected (fd=%d)\n", client_sock); + return client_sock; +} diff --git a/internal/audio/c/ipc_protocol.h b/internal/audio/c/ipc_protocol.h new file mode 100644 index 00000000..c5af32d4 --- /dev/null +++ b/internal/audio/c/ipc_protocol.h @@ -0,0 +1,210 @@ +/* + * JetKVM Audio IPC Protocol + * + * Wire protocol for Unix domain socket communication between main process + * and audio subprocesses. This protocol is 100% compatible with the Go + * implementation in internal/audio/ipc_*.go + * + * CRITICAL: All multi-byte integers use LITTLE-ENDIAN byte order. + */ + +#ifndef JETKVM_IPC_PROTOCOL_H +#define JETKVM_IPC_PROTOCOL_H + +#include +#include + +// ============================================================================ +// PROTOCOL CONSTANTS +// ============================================================================ + +// Magic numbers (ASCII representation when read as little-endian) +#define IPC_MAGIC_OUTPUT 0x4A4B4F55 // "JKOU" - JetKVM Output (device → browser) +#define IPC_MAGIC_INPUT 0x4A4B4D49 // "JKMI" - JetKVM Microphone Input (browser → device) + +// Message types (matches Go UnifiedMessageType enum) +#define IPC_MSG_TYPE_OPUS_FRAME 0 // Audio frame data (Opus encoded) +#define IPC_MSG_TYPE_CONFIG 1 // Basic audio config (12 bytes) +#define IPC_MSG_TYPE_OPUS_CONFIG 2 // Complete Opus config (36 bytes) +#define IPC_MSG_TYPE_STOP 3 // Shutdown signal +#define IPC_MSG_TYPE_HEARTBEAT 4 // Keep-alive ping +#define IPC_MSG_TYPE_ACK 5 // Acknowledgment + +// Size constraints +#define IPC_HEADER_SIZE 17 // Fixed header size +#define IPC_MAX_FRAME_SIZE 4096 // Maximum payload size (matches Go Config.MaxFrameSize) + +// Socket paths +#define IPC_SOCKET_OUTPUT "/var/run/audio_output.sock" +#define IPC_SOCKET_INPUT "/var/run/audio_input.sock" + +// ============================================================================ +// WIRE FORMAT STRUCTURES +// ============================================================================ + +/** + * IPC message header (17 bytes, little-endian) + * + * Byte layout: + * [0-3] magic uint32_t LE Magic number (0x4A4B4F55 or 0x4A4B4D49) + * [4] type uint8_t Message type (0-5) + * [5-8] length uint32_t LE Payload size in bytes + * [9-16] timestamp int64_t LE Unix nanoseconds (time.Now().UnixNano()) + * [17+] data uint8_t[] Variable payload + * + * CRITICAL: Must use __attribute__((packed)) to prevent padding. + */ +typedef struct __attribute__((packed)) { + uint32_t magic; // Magic number (LE) + uint8_t type; // Message type + uint32_t length; // Payload length in bytes (LE) + int64_t timestamp; // Unix nanoseconds (LE) +} ipc_header_t; + +/** + * Basic audio configuration (12 bytes) + * Message type: IPC_MSG_TYPE_CONFIG + * + * All fields are uint32_t little-endian. + */ +typedef struct __attribute__((packed)) { + uint32_t sample_rate; // Samples per second (e.g., 48000) + uint32_t channels; // Number of channels (e.g., 2 for stereo) + uint32_t frame_size; // Samples per frame (e.g., 960) +} ipc_config_t; + +/** + * Complete Opus encoder/decoder configuration (36 bytes) + * Message type: IPC_MSG_TYPE_OPUS_CONFIG + * + * All fields are uint32_t little-endian. + * Note: Negative values (like signal_type=-1000) are stored as two's complement uint32. + */ +typedef struct __attribute__((packed)) { + uint32_t sample_rate; // Samples per second (48000) + uint32_t channels; // Number of channels (2) + uint32_t frame_size; // Samples per frame (960) + uint32_t bitrate; // Bits per second (96000) + uint32_t complexity; // Encoder complexity 0-10 (1=fast, 10=best quality) + uint32_t vbr; // Variable bitrate: 0=disabled, 1=enabled + uint32_t signal_type; // Signal type: -1000=auto, 3001=music, 3002=voice + uint32_t bandwidth; // Bandwidth: 1101=narrowband, 1102=mediumband, 1103=wideband + uint32_t dtx; // Discontinuous transmission: 0=disabled, 1=enabled +} ipc_opus_config_t; + +/** + * Complete IPC message (header + payload) + */ +typedef struct { + ipc_header_t header; + uint8_t *data; // Dynamically allocated payload (NULL if length=0) +} ipc_message_t; + +// ============================================================================ +// FUNCTION DECLARATIONS +// ============================================================================ + +/** + * Read a complete IPC message from socket. + * + * This function: + * 1. Reads exactly 17 bytes (header) + * 2. Validates magic number + * 3. Validates length <= IPC_MAX_FRAME_SIZE + * 4. Allocates and reads payload if length > 0 + * 5. Stores result in msg->header and msg->data + * + * @param sock Socket file descriptor + * @param msg Output message (data will be malloc'd if length > 0) + * @param expected_magic Expected magic number (IPC_MAGIC_OUTPUT or IPC_MAGIC_INPUT) + * @return 0 on success, -1 on error + * + * CALLER MUST FREE msg->data if non-NULL! + */ +int ipc_read_message(int sock, ipc_message_t *msg, uint32_t expected_magic); + +/** + * Write a complete IPC message to socket. + * + * This function writes header + payload atomically (if possible via writev). + * Sets timestamp to current time. + * + * @param sock Socket file descriptor + * @param magic Magic number (IPC_MAGIC_OUTPUT or IPC_MAGIC_INPUT) + * @param type Message type (IPC_MSG_TYPE_*) + * @param data Payload data (can be NULL if length=0) + * @param length Payload length in bytes + * @return 0 on success, -1 on error + */ +int ipc_write_message(int sock, uint32_t magic, uint8_t type, + const uint8_t *data, uint32_t length); + +/** + * Parse Opus configuration from message data. + * + * @param data Payload data (must be exactly 36 bytes) + * @param length Payload length (must be 36) + * @param config Output Opus configuration + * @return 0 on success, -1 if length != 36 + */ +int ipc_parse_opus_config(const uint8_t *data, uint32_t length, ipc_opus_config_t *config); + +/** + * Parse basic audio configuration from message data. + * + * @param data Payload data (must be exactly 12 bytes) + * @param length Payload length (must be 12) + * @param config Output audio configuration + * @return 0 on success, -1 if length != 12 + */ +int ipc_parse_config(const uint8_t *data, uint32_t length, ipc_config_t *config); + +/** + * Free message resources. + * + * @param msg Message to free (frees msg->data if non-NULL) + */ +void ipc_free_message(ipc_message_t *msg); + +/** + * Get current time in nanoseconds (Unix epoch). + * + * @return Time in nanoseconds (compatible with Go time.Now().UnixNano()) + */ +int64_t ipc_get_time_ns(void); + +/** + * Create Unix domain socket server. + * + * This function: + * 1. Creates socket with AF_UNIX, SOCK_STREAM + * 2. Removes existing socket file + * 3. Binds to specified path + * 4. Listens with backlog=1 (single client) + * + * @param socket_path Path to Unix socket (e.g., "/var/run/audio_output.sock") + * @return Socket fd on success, -1 on error + */ +int ipc_create_server(const char *socket_path); + +/** + * Accept client connection with automatic retry. + * + * Blocks until client connects or error occurs. + * + * @param server_sock Server socket fd from ipc_create_server() + * @return Client socket fd on success, -1 on error + */ +int ipc_accept_client(int server_sock); + +/** + * Helper: Read exactly N bytes from socket (loops until complete or error). + * + * @param sock Socket file descriptor + * @param buf Output buffer + * @param len Number of bytes to read + * @return 0 on success, -1 on error + */ +int ipc_read_full(int sock, void *buf, size_t len); + +#endif // JETKVM_IPC_PROTOCOL_H diff --git a/internal/audio/c/jetkvm_audio_input.c b/internal/audio/c/jetkvm_audio_input.c new file mode 100644 index 00000000..17ba53af --- /dev/null +++ b/internal/audio/c/jetkvm_audio_input.c @@ -0,0 +1,294 @@ +/* + * JetKVM Audio Input Server + * + * Standalone C binary for audio input path: + * Browser → WebRTC → Go Process → IPC Receive → Opus Decode → ALSA Playback (USB Gadget) + * + * This replaces the Go subprocess that was running with --audio-input-server flag. + * + * IMPORTANT: This binary only does OPUS DECODING (not encoding). + * The browser already encodes audio to Opus before sending via WebRTC. + */ + +#include "ipc_protocol.h" +#include "audio_common.h" +#include +#include +#include +#include +#include +#include +#include + +// Forward declarations from audio.c +extern int jetkvm_audio_playback_init(void); +extern void jetkvm_audio_playback_close(void); +extern int jetkvm_audio_decode_write(void *opus_buf, int opus_size); +extern void update_audio_constants(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx, int lsb_depth, + int sr, int ch, int fs, int max_pkt, + int sleep_us, int max_attempts, int max_backoff); +extern void set_trace_logging(int enabled); + +// Note: Input server uses decoder, not encoder, so no update_opus_encoder_params + +// ============================================================================ +// GLOBAL STATE +// ============================================================================ + +static volatile sig_atomic_t g_running = 1; // Shutdown flag + +// Audio configuration (from environment variables) +typedef struct { + const char *alsa_device; // ALSA playback device (default: "hw:1,0") + int opus_bitrate; // Opus bitrate (informational for decoder) + int opus_complexity; // Opus complexity (decoder ignores this) + int sample_rate; // Sample rate (default: 48000) + int channels; // Channels (default: 2) + int frame_size; // Frame size in samples (default: 960) + int trace_logging; // Enable trace logging (default: 0) +} audio_config_t; + +// ============================================================================ +// CONFIGURATION PARSING +// ============================================================================ + +static void load_audio_config(audio_config_t *config) { + // ALSA device configuration + config->alsa_device = audio_common_parse_env_string("ALSA_PLAYBACK_DEVICE", "hw:1,0"); + + // Opus configuration (informational only for decoder) + config->opus_bitrate = audio_common_parse_env_int("OPUS_BITRATE", 96000); + config->opus_complexity = audio_common_parse_env_int("OPUS_COMPLEXITY", 1); + + // Audio format + config->sample_rate = audio_common_parse_env_int("AUDIO_SAMPLE_RATE", 48000); + config->channels = audio_common_parse_env_int("AUDIO_CHANNELS", 2); + config->frame_size = audio_common_parse_env_int("AUDIO_FRAME_SIZE", 960); + + // Logging + config->trace_logging = audio_common_is_trace_enabled(); + + // Log configuration + printf("Audio Input Server Configuration:\n"); + printf(" ALSA Device: %s\n", config->alsa_device); + printf(" Sample Rate: %d Hz\n", config->sample_rate); + printf(" Channels: %d\n", config->channels); + printf(" Frame Size: %d samples\n", config->frame_size); + printf(" Trace Logging: %s\n", config->trace_logging ? "enabled" : "disabled"); +} + +// ============================================================================ +// MESSAGE HANDLING +// ============================================================================ + +/** + * Handle OpusConfig message: informational only for decoder. + * Decoder config updates are less critical than encoder. + * Returns 0 on success. + */ +static int handle_opus_config(const uint8_t *data, uint32_t length) { + ipc_opus_config_t config; + + if (ipc_parse_opus_config(data, length, &config) != 0) { + fprintf(stderr, "Failed to parse Opus config\n"); + return -1; + } + + printf("Received Opus config (informational): bitrate=%u, complexity=%u\n", + config.bitrate, config.complexity); + + // Note: Decoder doesn't need most of these parameters. + // Opus decoder automatically adapts to encoder settings embedded in stream. + // FEC (Forward Error Correction) is enabled automatically when present in packets. + + return 0; +} + +/** + * Send ACK response for heartbeat messages. + */ +static int send_ack(int client_sock) { + return ipc_write_message(client_sock, IPC_MAGIC_INPUT, IPC_MSG_TYPE_ACK, NULL, 0); +} + +// ============================================================================ +// MAIN LOOP +// ============================================================================ + +/** + * Main audio decode and playback loop. + * Receives Opus frames via IPC, decodes, writes to ALSA. + */ +static int run_audio_loop(int client_sock) { + int consecutive_errors = 0; + const int max_consecutive_errors = 10; + int frame_count = 0; + + printf("Starting audio input loop...\n"); + + while (g_running) { + ipc_message_t msg; + + // Read message from client (blocking) + if (ipc_read_message(client_sock, &msg, IPC_MAGIC_INPUT) != 0) { + if (g_running) { + fprintf(stderr, "Failed to read message from client\n"); + } + break; // Client disconnected or error + } + + // Process message based on type + switch (msg.header.type) { + case IPC_MSG_TYPE_OPUS_FRAME: { + if (msg.header.length == 0 || msg.data == NULL) { + fprintf(stderr, "Warning: Empty Opus frame received\n"); + ipc_free_message(&msg); + continue; + } + + // Decode Opus and write to ALSA + int frames_written = jetkvm_audio_decode_write(msg.data, msg.header.length); + + if (frames_written < 0) { + consecutive_errors++; + fprintf(stderr, "Audio decode/write failed (error %d/%d)\n", + consecutive_errors, max_consecutive_errors); + + if (consecutive_errors >= max_consecutive_errors) { + fprintf(stderr, "Too many consecutive errors, giving up\n"); + ipc_free_message(&msg); + return -1; + } + } else { + // Success - reset error counter + consecutive_errors = 0; + frame_count++; + + // Trace logging (periodic) + if (frame_count % 1000 == 1) { + printf("Processed frame %d (opus_size=%u, pcm_frames=%d)\n", + frame_count, msg.header.length, frames_written); + } + } + + break; + } + + case IPC_MSG_TYPE_CONFIG: + printf("Received basic audio config\n"); + send_ack(client_sock); + break; + + case IPC_MSG_TYPE_OPUS_CONFIG: + handle_opus_config(msg.data, msg.header.length); + send_ack(client_sock); + break; + + case IPC_MSG_TYPE_STOP: + printf("Received stop message\n"); + ipc_free_message(&msg); + g_running = 0; + return 0; + + case IPC_MSG_TYPE_HEARTBEAT: + send_ack(client_sock); + break; + + default: + printf("Warning: Unknown message type: %u\n", msg.header.type); + break; + } + + ipc_free_message(&msg); + } + + printf("Audio input loop ended after %d frames\n", frame_count); + return 0; +} + +// ============================================================================ +// MAIN +// ============================================================================ + +int main(int argc, char **argv) { + printf("JetKVM Audio Input Server Starting...\n"); + + // Setup signal handlers + audio_common_setup_signal_handlers(&g_running); + + // Load configuration from environment + audio_config_t config; + load_audio_config(&config); + + // Set trace logging + set_trace_logging(config.trace_logging); + + // Apply audio constants to audio.c + update_audio_constants( + config.opus_bitrate, + config.opus_complexity, + 1, // vbr + 1, // vbr_constraint + -1000, // signal_type (auto) + 1103, // bandwidth (wideband) + 0, // dtx + 16, // lsb_depth + config.sample_rate, + config.channels, + config.frame_size, + 1500, // max_packet_size + 1000, // sleep_microseconds + 5, // max_attempts + 500000 // max_backoff_us + ); + + // Initialize audio playback (Opus decoder + ALSA playback) + printf("Initializing audio playback on device: %s\n", config.alsa_device); + if (jetkvm_audio_playback_init() != 0) { + fprintf(stderr, "Failed to initialize audio playback\n"); + return 1; + } + + // Create IPC server + int server_sock = ipc_create_server(IPC_SOCKET_INPUT); + if (server_sock < 0) { + fprintf(stderr, "Failed to create IPC server\n"); + jetkvm_audio_playback_close(); + return 1; + } + + // Main connection loop + while (g_running) { + printf("Waiting for client connection...\n"); + + int client_sock = ipc_accept_client(server_sock); + if (client_sock < 0) { + if (g_running) { + fprintf(stderr, "Failed to accept client, retrying...\n"); + sleep(1); + continue; + } + break; // Shutting down + } + + // Run audio loop with this client + run_audio_loop(client_sock); + + // Close client connection + close(client_sock); + + if (g_running) { + printf("Client disconnected, waiting for next client...\n"); + } + } + + // Cleanup + printf("Shutting down audio input server...\n"); + close(server_sock); + unlink(IPC_SOCKET_INPUT); + jetkvm_audio_playback_close(); + + printf("Audio input server exited cleanly\n"); + return 0; +} diff --git a/internal/audio/c/jetkvm_audio_output.c b/internal/audio/c/jetkvm_audio_output.c new file mode 100644 index 00000000..cd98fa7a --- /dev/null +++ b/internal/audio/c/jetkvm_audio_output.c @@ -0,0 +1,335 @@ +/* + * JetKVM Audio Output Server + * + * Standalone C binary for audio output path: + * ALSA Capture (TC358743 HDMI) → Opus Encode → IPC Send → Go Process → WebRTC → Browser + * + * This replaces the Go subprocess that was running with --audio-output-server flag. + */ + +#include "ipc_protocol.h" +#include "audio_common.h" +#include +#include +#include +#include +#include +#include +#include + +// Forward declarations from audio.c +extern int jetkvm_audio_capture_init(void); +extern void jetkvm_audio_capture_close(void); +extern int jetkvm_audio_read_encode(void *opus_buf); +extern void update_audio_constants(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx, int lsb_depth, + int sr, int ch, int fs, int max_pkt, + int sleep_us, int max_attempts, int max_backoff); +extern void set_trace_logging(int enabled); +extern int update_opus_encoder_params(int bitrate, int complexity, int vbr, int vbr_constraint, + int signal_type, int bandwidth, int dtx); + +// ============================================================================ +// GLOBAL STATE +// ============================================================================ + +static volatile sig_atomic_t g_running = 1; // Shutdown flag + +// Audio configuration (from environment variables) +typedef struct { + const char *alsa_device; // ALSA capture device (default: "hw:0,0") + int opus_bitrate; // Opus bitrate (default: 96000) + int opus_complexity; // Opus complexity 0-10 (default: 1) + int opus_vbr; // VBR enabled (default: 1) + int opus_vbr_constraint; // VBR constraint (default: 1) + int opus_signal_type; // Signal type (default: -1000 = auto) + int opus_bandwidth; // Bandwidth (default: 1103 = wideband) + int opus_dtx; // DTX enabled (default: 0) + int opus_lsb_depth; // LSB depth (default: 16) + int sample_rate; // Sample rate (default: 48000) + int channels; // Channels (default: 2) + int frame_size; // Frame size in samples (default: 960) + int trace_logging; // Enable trace logging (default: 0) +} audio_config_t; + +// ============================================================================ +// CONFIGURATION PARSING +// ============================================================================ + +static void load_audio_config(audio_config_t *config) { + // ALSA device configuration + config->alsa_device = audio_common_parse_env_string("ALSA_CAPTURE_DEVICE", "hw:0,0"); + + // Opus encoder configuration + config->opus_bitrate = audio_common_parse_env_int("OPUS_BITRATE", 96000); + config->opus_complexity = audio_common_parse_env_int("OPUS_COMPLEXITY", 1); + config->opus_vbr = audio_common_parse_env_int("OPUS_VBR", 1); + config->opus_vbr_constraint = audio_common_parse_env_int("OPUS_VBR_CONSTRAINT", 1); + config->opus_signal_type = audio_common_parse_env_int("OPUS_SIGNAL_TYPE", -1000); + config->opus_bandwidth = audio_common_parse_env_int("OPUS_BANDWIDTH", 1103); + config->opus_dtx = audio_common_parse_env_int("OPUS_DTX", 0); + config->opus_lsb_depth = audio_common_parse_env_int("OPUS_LSB_DEPTH", 16); + + // Audio format + config->sample_rate = audio_common_parse_env_int("AUDIO_SAMPLE_RATE", 48000); + config->channels = audio_common_parse_env_int("AUDIO_CHANNELS", 2); + config->frame_size = audio_common_parse_env_int("AUDIO_FRAME_SIZE", 960); + + // Logging + config->trace_logging = audio_common_is_trace_enabled(); + + // Log configuration + printf("Audio Output Server Configuration:\n"); + printf(" ALSA Device: %s\n", config->alsa_device); + printf(" Sample Rate: %d Hz\n", config->sample_rate); + printf(" Channels: %d\n", config->channels); + printf(" Frame Size: %d samples\n", config->frame_size); + printf(" Opus Bitrate: %d bps\n", config->opus_bitrate); + printf(" Opus Complexity: %d\n", config->opus_complexity); + printf(" Trace Logging: %s\n", config->trace_logging ? "enabled" : "disabled"); +} + +// ============================================================================ +// MESSAGE HANDLING +// ============================================================================ + +/** + * Handle OpusConfig message: update encoder parameters dynamically. + * Returns 0 on success, -1 on error. + */ +static int handle_opus_config(const uint8_t *data, uint32_t length) { + ipc_opus_config_t config; + + if (ipc_parse_opus_config(data, length, &config) != 0) { + fprintf(stderr, "Failed to parse Opus config\n"); + return -1; + } + + printf("Received Opus config: bitrate=%u, complexity=%u, vbr=%u\n", + config.bitrate, config.complexity, config.vbr); + + // Apply configuration to encoder + // Note: Signal type needs special handling for negative values + int signal_type = (int)(int32_t)config.signal_type; // Treat as signed + + int result = update_opus_encoder_params( + config.bitrate, + config.complexity, + config.vbr, + config.vbr, // Use VBR value for constraint (simplified) + signal_type, + config.bandwidth, + config.dtx + ); + + if (result != 0) { + fprintf(stderr, "Warning: Failed to apply some Opus encoder parameters\n"); + // Continue anyway - encoder may not be initialized yet + } + + return 0; +} + +/** + * Handle incoming IPC messages from client (non-blocking). + * Returns 0 on success, -1 on error. + */ +static int handle_incoming_messages(int client_sock) { + // Set non-blocking mode for client socket + int flags = fcntl(client_sock, F_GETFL, 0); + fcntl(client_sock, F_SETFL, flags | O_NONBLOCK); + + ipc_message_t msg; + + // Try to read message (non-blocking) + int result = ipc_read_message(client_sock, &msg, IPC_MAGIC_OUTPUT); + + // Restore blocking mode + fcntl(client_sock, F_SETFL, flags); + + if (result != 0) { + if (errno == EAGAIN || errno == EWOULDBLOCK) { + return 0; // No message available, not an error + } + return -1; // Connection error + } + + // Process message based on type + switch (msg.header.type) { + case IPC_MSG_TYPE_OPUS_CONFIG: + handle_opus_config(msg.data, msg.header.length); + break; + + case IPC_MSG_TYPE_STOP: + printf("Received stop message\n"); + g_running = 0; + break; + + case IPC_MSG_TYPE_HEARTBEAT: + // Informational only, no response needed + break; + + default: + printf("Warning: Unknown message type: %u\n", msg.header.type); + break; + } + + ipc_free_message(&msg); + return 0; +} + +// ============================================================================ +// MAIN LOOP +// ============================================================================ + +/** + * Main audio capture and encode loop. + * Continuously reads from ALSA, encodes to Opus, sends via IPC. + */ +static int run_audio_loop(int client_sock) { + uint8_t opus_buffer[IPC_MAX_FRAME_SIZE]; + int consecutive_errors = 0; + const int max_consecutive_errors = 10; + int frame_count = 0; + + printf("Starting audio output loop...\n"); + + while (g_running) { + // Handle any incoming configuration messages (non-blocking) + if (handle_incoming_messages(client_sock) < 0) { + fprintf(stderr, "Client disconnected, waiting for reconnection...\n"); + break; // Client disconnected + } + + // Capture audio and encode to Opus + int opus_size = jetkvm_audio_read_encode(opus_buffer); + + if (opus_size < 0) { + consecutive_errors++; + fprintf(stderr, "Audio read/encode failed (error %d/%d)\n", + consecutive_errors, max_consecutive_errors); + + if (consecutive_errors >= max_consecutive_errors) { + fprintf(stderr, "Too many consecutive errors, giving up\n"); + return -1; + } + + usleep(10000); // 10ms backoff + continue; + } + + if (opus_size == 0) { + // No data available (non-blocking mode or empty frame) + usleep(1000); // 1ms sleep + continue; + } + + // Reset error counter on success + consecutive_errors = 0; + frame_count++; + + // Send Opus frame via IPC + if (ipc_write_message(client_sock, IPC_MAGIC_OUTPUT, IPC_MSG_TYPE_OPUS_FRAME, + opus_buffer, opus_size) != 0) { + fprintf(stderr, "Failed to send frame to client\n"); + break; // Client disconnected + } + + // Trace logging (periodic) + if (frame_count % 1000 == 1) { + printf("Sent frame %d (size=%d bytes)\n", frame_count, opus_size); + } + + // Small delay to prevent busy-waiting (frame rate ~50 FPS @ 48kHz/960) + usleep(1000); // 1ms + } + + printf("Audio output loop ended after %d frames\n", frame_count); + return 0; +} + +// ============================================================================ +// MAIN +// ============================================================================ + +int main(int argc, char **argv) { + printf("JetKVM Audio Output Server Starting...\n"); + + // Setup signal handlers + audio_common_setup_signal_handlers(&g_running); + + // Load configuration from environment + audio_config_t config; + load_audio_config(&config); + + // Set trace logging + set_trace_logging(config.trace_logging); + + // Apply audio constants to audio.c + update_audio_constants( + config.opus_bitrate, + config.opus_complexity, + config.opus_vbr, + config.opus_vbr_constraint, + config.opus_signal_type, + config.opus_bandwidth, + config.opus_dtx, + config.opus_lsb_depth, + config.sample_rate, + config.channels, + config.frame_size, + 1500, // max_packet_size + 1000, // sleep_microseconds + 5, // max_attempts + 500000 // max_backoff_us + ); + + // Initialize audio capture + printf("Initializing audio capture on device: %s\n", config.alsa_device); + if (jetkvm_audio_capture_init() != 0) { + fprintf(stderr, "Failed to initialize audio capture\n"); + return 1; + } + + // Create IPC server + int server_sock = ipc_create_server(IPC_SOCKET_OUTPUT); + if (server_sock < 0) { + fprintf(stderr, "Failed to create IPC server\n"); + jetkvm_audio_capture_close(); + return 1; + } + + // Main connection loop + while (g_running) { + printf("Waiting for client connection...\n"); + + int client_sock = ipc_accept_client(server_sock); + if (client_sock < 0) { + if (g_running) { + fprintf(stderr, "Failed to accept client, retrying...\n"); + sleep(1); + continue; + } + break; // Shutting down + } + + // Run audio loop with this client + run_audio_loop(client_sock); + + // Close client connection + close(client_sock); + + if (g_running) { + printf("Client disconnected, waiting for next client...\n"); + } + } + + // Cleanup + printf("Shutting down audio output server...\n"); + close(server_sock); + unlink(IPC_SOCKET_OUTPUT); + jetkvm_audio_capture_close(); + + printf("Audio output server exited cleanly\n"); + return 0; +} diff --git a/internal/audio/core_config_constants.go b/internal/audio/core_config_constants.go new file mode 100644 index 00000000..5eb49ccf --- /dev/null +++ b/internal/audio/core_config_constants.go @@ -0,0 +1,559 @@ +package audio + +import ( + "time" + + "github.com/jetkvm/kvm/internal/logging" +) + +// AudioConfigConstants centralizes all hardcoded values used across audio components. +// This configuration system allows runtime tuning of audio performance, quality, and resource usage. +type AudioConfigConstants struct { + // Audio Quality Presets + MaxAudioFrameSize int // Maximum audio frame size in bytes (default: 4096) + MaxPCMBufferSize int // Maximum PCM buffer size in bytes for separate buffer optimization + + // Opus Encoding Parameters + OpusBitrate int // Target bitrate for Opus encoding in bps (default: 128000) + OpusComplexity int // Computational complexity 0-10 (default: 10 for best quality) + OpusVBR int // Variable Bit Rate: 0=CBR, 1=VBR (default: 1) + OpusVBRConstraint int // VBR constraint: 0=unconstrained, 1=constrained (default: 0) + OpusDTX int // Discontinuous Transmission: 0=disabled, 1=enabled (default: 0) + + // Audio Parameters + SampleRate int // Audio sampling frequency in Hz (default: 48000) + Channels int // Number of audio channels: 1=mono, 2=stereo (default: 2) + FrameSize int // Samples per audio frame (default: 960 for 20ms at 48kHz) + MaxPacketSize int // Maximum encoded packet size in bytes (default: 4000) + + // Optimal Audio Configuration (S16_LE @ 48kHz stereo from HDMI) + // Single optimized setting - no quality presets needed + OptimalOutputBitrate int // Output bitrate: 96 kbps (optimal for stereo @ 48kHz) + OptimalInputBitrate int // Input bitrate: 48 kbps (optimal for mono mic @ 48kHz) + + // Optimal OPUS Encoder Parameters (minimal CPU usage) + OptimalOpusComplexity int // Complexity: 1 (minimal CPU ~0.5%) + OptimalOpusVBR int // VBR: enabled for efficiency + OptimalOpusSignalType int // Signal: OPUS_SIGNAL_MUSIC (3002) + OptimalOpusBandwidth int // Bandwidth: WIDEBAND (1103 = native 48kHz) + OptimalOpusDTX int // DTX: disabled for continuous audio + + // CGO Audio Constants + CGOOpusBitrate int // Native Opus encoder bitrate in bps (default: 96000) + + CGOOpusComplexity int // Computational complexity for native Opus encoder (0-10) + CGOOpusVBR int // Variable Bit Rate in native Opus encoder (0=CBR, 1=VBR) + CGOOpusVBRConstraint int // Constrained VBR in native encoder (0/1) + CGOOpusSignalType int // Signal type hint for native Opus encoder + CGOOpusBandwidth int // Frequency bandwidth for native Opus encoder + CGOOpusDTX int // Discontinuous Transmission in native encoder (0/1) + CGOSampleRate int // Sample rate for native audio processing (Hz) + CGOChannels int // Channel count for native audio processing + CGOFrameSize int // Frame size for native Opus processing (samples) + CGOMaxPacketSize int // Maximum packet size for native encoding (bytes) + + // Input IPC Constants + InputIPCSampleRate int // Sample rate for input IPC audio processing (Hz) + InputIPCChannels int // Channel count for input IPC audio processing + InputIPCFrameSize int // Frame size for input IPC processing (samples) + + // Output IPC Constants + OutputMaxFrameSize int // Maximum frame size for output processing (bytes) + OutputHeaderSize int // Size of output message headers (bytes) + + OutputMessagePoolSize int // Output message pool size (128) + + // Socket Buffer Constants + SocketOptimalBuffer int // Optimal socket buffer size (128KB) + SocketMaxBuffer int // Maximum socket buffer size (256KB) + SocketMinBuffer int // Minimum socket buffer size (32KB) + + // Process Management + MaxRestartAttempts int // Maximum restart attempts (5) + RestartWindow time.Duration // Restart attempt window (5m) + RestartDelay time.Duration // Initial restart delay (2s) + MaxRestartDelay time.Duration // Maximum restart delay (30s) + + // Buffer Management + + MaxPoolSize int + MessagePoolSize int + OptimalSocketBuffer int + MaxSocketBuffer int + MinSocketBuffer int + ChannelBufferSize int + AudioFramePoolSize int + PageSize int + InitialBufferFrames int + BytesToMBDivisor int + MinReadEncodeBuffer int + MaxDecodeWriteBuffer int + MinBatchSizeForThreadPinning int + + MagicNumber uint32 + MaxFrameSize int + WriteTimeout time.Duration + HeaderSize int + MetricsUpdateInterval time.Duration + WarmupSamples int + MetricsChannelBuffer int + LatencyHistorySize int + MaxCPUPercent float64 + MinCPUPercent float64 + DefaultClockTicks float64 + DefaultMemoryGB int + MaxWarmupSamples int + WarmupCPUSamples int + LogThrottleIntervalSec int + MinValidClockTicks int + MaxValidClockTicks int + CPUFactor float64 + MemoryFactor float64 + LatencyFactor float64 + + // Timing Configuration + RetryDelay time.Duration // Retry delay + MaxRetryDelay time.Duration // Maximum retry delay + BackoffMultiplier float64 // Backoff multiplier + MaxConsecutiveErrors int // Maximum consecutive errors + DefaultSleepDuration time.Duration // 100ms + ShortSleepDuration time.Duration // 10ms + LongSleepDuration time.Duration // 200ms + DefaultTickerInterval time.Duration // 100ms + BufferUpdateInterval time.Duration // 500ms + InputSupervisorTimeout time.Duration // 5s + OutputSupervisorTimeout time.Duration // 5s + BatchProcessingDelay time.Duration // 10ms + + // System threshold configuration for buffer management + LowCPUThreshold float64 // CPU usage threshold for performance optimization + HighCPUThreshold float64 // CPU usage threshold for performance limits + LowMemoryThreshold float64 // 50% memory threshold + HighMemoryThreshold float64 // 75% memory threshold + CooldownPeriod time.Duration // 30s cooldown period + RollbackThreshold time.Duration // 300ms rollback threshold + + MaxLatencyThreshold time.Duration // 200ms max latency + JitterThreshold time.Duration // 20ms jitter threshold + LatencyOptimizationInterval time.Duration // 5s optimization interval + MicContentionTimeout time.Duration // 200ms contention timeout + PreallocPercentage int // 20% preallocation percentage + BackoffStart time.Duration // 50ms initial backoff + + InputMagicNumber uint32 // Magic number for input IPC messages (0x4A4B4D49 "JKMI") + + OutputMagicNumber uint32 // Magic number for output IPC messages (0x4A4B4F55 "JKOU") + + // Calculation Constants + PercentageMultiplier float64 // Multiplier for percentage calculations (100.0) + AveragingWeight float64 // Weight for weighted averaging (0.7) + ScalingFactor float64 // General scaling factor (1.5) + CPUMemoryWeight float64 // Weight for CPU factor in calculations (0.5) + MemoryWeight float64 // Weight for memory factor (0.3) + LatencyWeight float64 // Weight for latency factor (0.2) + PoolGrowthMultiplier int // Multiplier for pool size growth (2) + LatencyScalingFactor float64 // Scaling factor for latency calculations (2.0) + OptimizerAggressiveness float64 // Aggressiveness level for optimization (0.7) + + // CGO Audio Processing Constants + CGOUsleepMicroseconds int // Sleep duration for CGO usleep calls (1000μs) + + CGOPCMBufferSize int // PCM buffer size for CGO audio processing + CGONanosecondsPerSecond float64 // Nanoseconds per second conversion + + // Output Streaming Constants + OutputStreamingFrameIntervalMS int // Output frame interval (20ms for 50 FPS) + + // IPC Constants + IPCInitialBufferFrames int // Initial IPC buffer size (500 frames) + + EventTimeoutSeconds int + EventTimeFormatString string + EventSubscriptionDelayMS int + InputProcessingTimeoutMS int + InputSocketName string + OutputSocketName string + AudioInputComponentName string + AudioOutputComponentName string + AudioServerComponentName string + AudioRelayComponentName string + AudioEventsComponentName string + + TestSocketTimeout time.Duration + TestBufferSize int + TestRetryDelay time.Duration + LatencyHistogramMaxSamples int + LatencyPercentile50 int + LatencyPercentile95 int + LatencyPercentile99 int + + // Buffer Pool Configuration + BufferPoolDefaultSize int // Default buffer pool size when MaxPoolSize is invalid + BufferPoolControlSize int // Control buffer pool size + ZeroCopyPreallocSizeBytes int // Zero-copy frame pool preallocation size in bytes + ZeroCopyMinPreallocFrames int // Minimum preallocated frames for zero-copy pool + BufferPoolHitRateBase float64 // Base for hit rate percentage calculation + + HitRateCalculationBase float64 + MaxLatency time.Duration + MinMetricsUpdateInterval time.Duration + MaxMetricsUpdateInterval time.Duration + MinSampleRate int + MaxSampleRate int + MaxChannels int + + // CGO Constants + CGOMaxBackoffMicroseconds int // Maximum CGO backoff time (500ms) + CGOMaxAttempts int // Maximum CGO retry attempts (5) + + // Frame Duration Validation + MinFrameDuration time.Duration // Minimum frame duration (10ms) + MaxFrameDuration time.Duration // Maximum frame duration (100ms) + + // Valid Sample Rates + // Validation Constants + ValidSampleRates []int // Supported sample rates (8kHz to 48kHz) + MinOpusBitrate int // Minimum Opus bitrate (6000 bps) + MaxOpusBitrate int // Maximum Opus bitrate (510000 bps) + MaxValidationTime time.Duration // Validation timeout (5s) + MinFrameSize int // Minimum frame size (64 bytes) + FrameSizeTolerance int // Frame size tolerance (512 bytes) + + // Latency Histogram Buckets + LatencyBucket10ms time.Duration // 10ms latency bucket + LatencyBucket25ms time.Duration // 25ms latency bucket + LatencyBucket50ms time.Duration // 50ms latency bucket + LatencyBucket100ms time.Duration // 100ms latency bucket + LatencyBucket250ms time.Duration // 250ms latency bucket + LatencyBucket500ms time.Duration // 500ms latency bucket + LatencyBucket1s time.Duration // 1s latency bucket + LatencyBucket2s time.Duration // 2s latency bucket + + MaxAudioProcessorWorkers int + MaxAudioReaderWorkers int + AudioProcessorQueueSize int + AudioReaderQueueSize int + WorkerMaxIdleTime time.Duration + + // Connection Retry Configuration + MaxConnectionAttempts int // Maximum connection retry attempts + ConnectionRetryDelay time.Duration // Initial connection retry delay + MaxConnectionRetryDelay time.Duration // Maximum connection retry delay + ConnectionBackoffFactor float64 // Connection retry backoff factor + ConnectionTimeoutDelay time.Duration // Connection timeout for each attempt + ReconnectionInterval time.Duration // Interval for automatic reconnection attempts + HealthCheckInterval time.Duration // Health check interval for connections + + // Quality Change Timeout Configuration + QualityChangeSupervisorTimeout time.Duration // Timeout for supervisor stop during quality changes + QualityChangeTickerInterval time.Duration // Ticker interval for supervisor stop polling + QualityChangeSettleDelay time.Duration // Delay for quality change to settle + QualityChangeRecoveryDelay time.Duration // Delay before attempting recovery + +} + +// DefaultAudioConfig returns the default configuration constants +// These values are carefully chosen based on JetKVM's embedded ARM environment, +// real-time audio requirements, and extensive testing for optimal performance. +func DefaultAudioConfig() *AudioConfigConstants { + return &AudioConfigConstants{ + // Audio Quality Presets + MaxAudioFrameSize: 4096, + MaxPCMBufferSize: 8192, // Default PCM buffer size (2x MaxAudioFrameSize for safety) + + // Opus Encoding Parameters + OpusBitrate: 128000, + OpusComplexity: 10, + OpusVBR: 1, + OpusVBRConstraint: 0, + OpusDTX: 0, + + // Audio Parameters + SampleRate: 48000, + Channels: 2, + FrameSize: 960, + MaxPacketSize: 4000, + + // Optimal Audio Configuration (single setting for all use cases) + OptimalOutputBitrate: 96, // 96 kbps for stereo @ 48kHz + OptimalInputBitrate: 48, // 48 kbps for mono mic @ 48kHz + OptimalOpusComplexity: 1, // Complexity 1: minimal CPU (~0.5%) + OptimalOpusVBR: 1, // VBR enabled for efficiency + OptimalOpusSignalType: 3002, // OPUS_SIGNAL_MUSIC + OptimalOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND (native 48kHz) + OptimalOpusDTX: 0, // DTX disabled for continuous audio + + // CGO Audio Constants - Optimized for S16_LE @ 48kHz with minimal CPU + CGOOpusBitrate: 96000, // 96 kbps optimal for stereo @ 48kHz + CGOOpusComplexity: 1, // Complexity 1: minimal CPU (~0.5% on RV1106) + CGOOpusVBR: 1, // VBR enabled for efficiency + CGOOpusVBRConstraint: 1, // Constrained VBR for predictable bitrate + CGOOpusSignalType: -1000, // OPUS_AUTO (automatic voice/music detection) + CGOOpusBandwidth: 1103, // OPUS_BANDWIDTH_WIDEBAND (native 48kHz, no resampling) + CGOOpusDTX: 0, // DTX disabled for continuous audio + CGOSampleRate: 48000, // 48 kHz native HDMI sample rate + CGOChannels: 2, // Stereo + CGOFrameSize: 960, // 20ms frames at 48kHz + CGOMaxPacketSize: 1500, // Standard Ethernet MTU + + // Input IPC Constants + InputIPCSampleRate: 48000, // Input IPC sample rate (48kHz) + InputIPCChannels: 2, // Input IPC channels (stereo) + InputIPCFrameSize: 960, // Input IPC frame size (960 samples) + + // Output IPC Constants + OutputMaxFrameSize: 4096, // Maximum output frame size + OutputHeaderSize: 17, // Output frame header size + + OutputMessagePoolSize: 128, // Output message pool size + + // Socket Buffer Constants + SocketOptimalBuffer: 131072, // 128KB optimal socket buffer + SocketMaxBuffer: 262144, // 256KB maximum socket buffer + SocketMinBuffer: 32768, // 32KB minimum socket buffer + + // Process Management + MaxRestartAttempts: 5, // Maximum restart attempts + + RestartWindow: 5 * time.Minute, // Time window for restart attempt counting + RestartDelay: 1 * time.Second, // Initial delay before restart attempts + MaxRestartDelay: 30 * time.Second, // Maximum delay for exponential backoff + + // Buffer Management + + MaxPoolSize: 100, // Maximum object pool size + MessagePoolSize: 1024, // Significantly increased message pool for quality change bursts + OptimalSocketBuffer: 262144, // 256KB optimal socket buffer + MaxSocketBuffer: 1048576, // 1MB maximum socket buffer + MinSocketBuffer: 8192, // 8KB minimum socket buffer + ChannelBufferSize: 2048, // Significantly increased channel buffer for quality change bursts + AudioFramePoolSize: 1500, // Audio frame object pool size + PageSize: 4096, // Memory page size for alignment + InitialBufferFrames: 1000, // Increased initial buffer size during startup + BytesToMBDivisor: 1024 * 1024, // Byte to megabyte conversion + MinReadEncodeBuffer: 1276, // Minimum CGO read/encode buffer + MaxDecodeWriteBuffer: 4096, // Maximum CGO decode/write buffer + + // IPC Configuration - Balanced for stability + MagicNumber: 0xDEADBEEF, // IPC message validation header + MaxFrameSize: 4096, // Maximum audio frame size (4KB) + WriteTimeout: 1000 * time.Millisecond, // Further increased timeout to handle quality change bursts + HeaderSize: 8, // IPC message header size + + // Monitoring and Metrics - Balanced for stability + MetricsUpdateInterval: 1000 * time.Millisecond, // Stable metrics collection frequency + WarmupSamples: 10, // Adequate warmup samples for accuracy + MetricsChannelBuffer: 100, // Adequate metrics data channel buffer + LatencyHistorySize: 100, // Adequate latency measurements to keep + + // Process Monitoring Constants + MaxCPUPercent: 100.0, // Maximum CPU percentage + MinCPUPercent: 0.01, // Minimum CPU percentage + DefaultClockTicks: 250.0, // Default clock ticks for embedded ARM systems + DefaultMemoryGB: 8, // Default memory in GB + MaxWarmupSamples: 3, // Maximum warmup samples + WarmupCPUSamples: 2, // CPU warmup samples + LogThrottleIntervalSec: 10, // Log throttle interval in seconds + MinValidClockTicks: 50, // Minimum valid clock ticks + MaxValidClockTicks: 1000, // Maximum valid clock ticks + + // Performance Tuning + CPUFactor: 0.7, // CPU weight in performance calculations + MemoryFactor: 0.8, // Memory weight in performance calculations + LatencyFactor: 0.9, // Latency weight in performance calculations + + // Error Handling + RetryDelay: 100 * time.Millisecond, // Initial retry delay + MaxRetryDelay: 5 * time.Second, // Maximum retry delay + BackoffMultiplier: 2.0, // Exponential backoff multiplier + MaxConsecutiveErrors: 5, // Consecutive error threshold + + // Connection Retry Configuration + MaxConnectionAttempts: 15, // Maximum connection retry attempts + ConnectionRetryDelay: 50 * time.Millisecond, // Initial connection retry delay + MaxConnectionRetryDelay: 2 * time.Second, // Maximum connection retry delay + ConnectionBackoffFactor: 1.5, // Connection retry backoff factor + ConnectionTimeoutDelay: 5 * time.Second, // Connection timeout for each attempt + ReconnectionInterval: 30 * time.Second, // Interval for automatic reconnection attempts + HealthCheckInterval: 10 * time.Second, // Health check interval for connections + + // Quality Change Timeout Configuration + QualityChangeSupervisorTimeout: 5 * time.Second, // Timeout for supervisor stop during quality changes + QualityChangeTickerInterval: 100 * time.Millisecond, // Ticker interval for supervisor stop polling + QualityChangeSettleDelay: 2 * time.Second, // Delay for quality change to settle + QualityChangeRecoveryDelay: 1 * time.Second, // Delay before attempting recovery + + // Timing Constants - Optimized for quality change stability + DefaultSleepDuration: 100 * time.Millisecond, // Balanced polling interval + ShortSleepDuration: 10 * time.Millisecond, // Balanced high-frequency polling + LongSleepDuration: 200 * time.Millisecond, // Balanced background task delay + DefaultTickerInterval: 100 * time.Millisecond, // Balanced periodic task interval + BufferUpdateInterval: 250 * time.Millisecond, // Faster buffer size update frequency + InputSupervisorTimeout: 5 * time.Second, // Input monitoring timeout + OutputSupervisorTimeout: 5 * time.Second, // Output monitoring timeout + BatchProcessingDelay: 5 * time.Millisecond, // Reduced batch processing delay + + // System Load Configuration - Optimized for single-core RV1106G3 + LowCPUThreshold: 0.40, // Adjusted for single-core ARM system + HighCPUThreshold: 0.75, // Adjusted for single-core RV1106G3 (current load ~64%) + LowMemoryThreshold: 0.60, + HighMemoryThreshold: 0.85, // Adjusted for 200MB total memory system + + CooldownPeriod: 15 * time.Second, // Reduced cooldown period + RollbackThreshold: 200 * time.Millisecond, // Lower rollback threshold + + MaxLatencyThreshold: 150 * time.Millisecond, // Lower max latency threshold + JitterThreshold: 15 * time.Millisecond, // Reduced jitter threshold + LatencyOptimizationInterval: 3 * time.Second, // More frequent optimization + + // Microphone Contention Configuration + MicContentionTimeout: 200 * time.Millisecond, + + // Buffer Pool Configuration + PreallocPercentage: 20, + + // Sleep and Backoff Configuration + BackoffStart: 50 * time.Millisecond, + + // Protocol Magic Numbers + InputMagicNumber: 0x4A4B4D49, // "JKMI" (JetKVM Microphone Input) + OutputMagicNumber: 0x4A4B4F55, // "JKOU" (JetKVM Output) + + // Calculation Constants + PercentageMultiplier: 100.0, // Standard percentage conversion (0.5 * 100 = 50%) + AveragingWeight: 0.7, // Weight for smoothing values (70% recent, 30% historical) + ScalingFactor: 1.5, // General scaling factor for adaptive adjustments + + CPUMemoryWeight: 0.5, // CPU factor weight in combined calculations + MemoryWeight: 0.3, // Memory factor weight in combined calculations + LatencyWeight: 0.2, // Latency factor weight in combined calculations + PoolGrowthMultiplier: 2, // Pool growth multiplier + LatencyScalingFactor: 2.0, // Latency ratio scaling factor + OptimizerAggressiveness: 0.7, // Optimizer aggressiveness factor + + // CGO Audio Processing Constants - Balanced for stability + CGOUsleepMicroseconds: 1000, // 1000 microseconds (1ms) for stable CGO usleep calls + CGOPCMBufferSize: 1920, // 1920 samples for PCM buffer (max 2ch*960) + CGONanosecondsPerSecond: 1000000000.0, // 1000000000.0 for nanosecond conversions + + // Output Streaming Constants - Balanced for stability + OutputStreamingFrameIntervalMS: 20, // 20ms frame interval (50 FPS) for stability + + // IPC Constants + IPCInitialBufferFrames: 500, // 500 frames for initial buffer + + // Event Constants - Balanced for stability + EventTimeoutSeconds: 2, // 2 seconds for event timeout + EventTimeFormatString: "2006-01-02T15:04:05.000Z", // "2006-01-02T15:04:05.000Z" time format + EventSubscriptionDelayMS: 100, // 100ms subscription delay + + // Goroutine Pool Configuration + MaxAudioProcessorWorkers: 16, // 16 workers for audio processing tasks + MaxAudioReaderWorkers: 8, // 8 workers for audio reading tasks + AudioProcessorQueueSize: 64, // 64 tasks queue size for processor pool + AudioReaderQueueSize: 32, // 32 tasks queue size for reader pool + WorkerMaxIdleTime: 60 * time.Second, // 60s maximum idle time before worker termination + + // Input Processing Constants - Balanced for stability + InputProcessingTimeoutMS: 10, // 10ms processing timeout threshold + + // Socket Names + InputSocketName: "audio_input.sock", // Socket name for audio input IPC + OutputSocketName: "audio_output.sock", // Socket name for audio output IPC + + // Component Names + AudioInputComponentName: "audio-input", // Component name for input logging + AudioOutputComponentName: "audio-output", // Component name for output logging + AudioServerComponentName: "audio-server", // Component name for server logging + AudioRelayComponentName: "audio-relay", // Component name for relay logging + AudioEventsComponentName: "audio-events", // Component name for events logging + + // Test Configuration + TestSocketTimeout: 100 * time.Millisecond, // 100ms timeout for test socket operations + TestBufferSize: 4096, // 4096 bytes buffer size for test operations + TestRetryDelay: 200 * time.Millisecond, // 200ms delay between test retry attempts + + // Latency Histogram Configuration + LatencyHistogramMaxSamples: 1000, // 1000 samples for latency tracking + LatencyPercentile50: 50, // 50th percentile calculation factor + LatencyPercentile95: 95, // 95th percentile calculation factor + LatencyPercentile99: 99, // 99th percentile calculation factor + + // Buffer Pool Configuration + BufferPoolDefaultSize: 64, // Default buffer pool size when MaxPoolSize is invalid + BufferPoolControlSize: 512, // Control buffer pool size + ZeroCopyPreallocSizeBytes: 1024 * 1024, // Zero-copy frame pool preallocation size in bytes (1MB) + ZeroCopyMinPreallocFrames: 1, // Minimum preallocated frames for zero-copy pool + BufferPoolHitRateBase: 100.0, // Base for hit rate percentage calculation + + // Buffer Pool Efficiency Constants + HitRateCalculationBase: 100.0, // 100.0 base for hit rate percentage calculation + + // Validation Constants + MaxLatency: 500 * time.Millisecond, // 500ms maximum allowed latency + MinMetricsUpdateInterval: 100 * time.Millisecond, // 100ms minimum metrics update interval + MaxMetricsUpdateInterval: 10 * time.Second, // 10s maximum metrics update interval + MinSampleRate: 8000, // 8kHz minimum sample rate + MaxSampleRate: 48000, // 48kHz maximum sample rate + MaxChannels: 8, // 8 maximum audio channels + + // CGO Constants + CGOMaxBackoffMicroseconds: 500000, // 500ms maximum backoff in microseconds + CGOMaxAttempts: 5, // 5 maximum retry attempts + + // Validation Frame Size Limits + MinFrameDuration: 10 * time.Millisecond, // 10ms minimum frame duration + MaxFrameDuration: 100 * time.Millisecond, // 100ms maximum frame duration + + // Valid Sample Rates + ValidSampleRates: []int{8000, 12000, 16000, 22050, 24000, 44100, 48000}, // Supported sample rates + + // Opus Bitrate Validation Constants + MinOpusBitrate: 6000, // 6000 bps minimum Opus bitrate + MaxOpusBitrate: 510000, // 510000 bps maximum Opus bitrate + + // Validation Configuration + MaxValidationTime: 5 * time.Second, // 5s maximum validation timeout + MinFrameSize: 1, // 1 byte minimum frame size (allow small frames) + FrameSizeTolerance: 512, // 512 bytes frame size tolerance + + // Latency Histogram Bucket Configuration + LatencyBucket10ms: 10 * time.Millisecond, // 10ms latency bucket + LatencyBucket25ms: 25 * time.Millisecond, // 25ms latency bucket + LatencyBucket50ms: 50 * time.Millisecond, // 50ms latency bucket + LatencyBucket100ms: 100 * time.Millisecond, // 100ms latency bucket + LatencyBucket250ms: 250 * time.Millisecond, // 250ms latency bucket + LatencyBucket500ms: 500 * time.Millisecond, // 500ms latency bucket + LatencyBucket1s: 1 * time.Second, // 1s latency bucket + LatencyBucket2s: 2 * time.Second, // 2s latency bucket + + // Batch Audio Processing Configuration + MinBatchSizeForThreadPinning: 5, // Minimum batch size to pin thread + + // Performance Configuration Flags - Production optimizations + + } +} + +// Global configuration instance +var Config = DefaultAudioConfig() + +// UpdateConfig allows runtime configuration updates +func UpdateConfig(newConfig *AudioConfigConstants) { + // Validate the new configuration before applying it + if err := ValidateAudioConfigConstants(newConfig); err != nil { + // Log validation error and keep current configuration + logger := logging.GetDefaultLogger().With().Str("component", "AudioConfig").Logger() + logger.Error().Err(err).Msg("Configuration validation failed, keeping current configuration") + return + } + + Config = newConfig + logger := logging.GetDefaultLogger().With().Str("component", "AudioConfig").Logger() + logger.Info().Msg("Audio configuration updated successfully") +} + +// GetConfig returns the current configuration +func GetConfig() *AudioConfigConstants { + return Config +} diff --git a/internal/audio/core_handlers.go b/internal/audio/core_handlers.go new file mode 100644 index 00000000..71e1e5aa --- /dev/null +++ b/internal/audio/core_handlers.go @@ -0,0 +1,271 @@ +package audio + +import ( + "context" + "errors" + + "github.com/coder/websocket" + "github.com/rs/zerolog" +) + +// AudioControlService provides core audio control operations +type AudioControlService struct { + sessionProvider SessionProvider + logger *zerolog.Logger +} + +// NewAudioControlService creates a new audio control service +func NewAudioControlService(sessionProvider SessionProvider, logger *zerolog.Logger) *AudioControlService { + return &AudioControlService{ + sessionProvider: sessionProvider, + logger: logger, + } +} + +// MuteAudio sets the audio mute state by controlling the audio output subprocess +func (s *AudioControlService) MuteAudio(muted bool) error { + if muted { + // Mute: Stop audio output subprocess and relay + supervisor := GetAudioOutputSupervisor() + if supervisor != nil { + supervisor.Stop() + } + StopAudioRelay() + SetAudioMuted(true) + } else { + // Unmute: Start audio output subprocess and relay + if !s.sessionProvider.IsSessionActive() { + return errors.New("no active session for audio unmute") + } + + supervisor := GetAudioOutputSupervisor() + if supervisor != nil { + err := supervisor.Start() + if err != nil { + s.logger.Debug().Err(err).Msg("failed to start audio output supervisor") + return err + } + } + + // Start audio relay + err := StartAudioRelay(nil) + if err != nil { + s.logger.Error().Err(err).Msg("failed to start audio relay during unmute") + return err + } + + // Connect the relay to the current WebRTC session's audio track + // This is needed because UpdateAudioRelayTrack is normally only called during session creation + if err := connectRelayToCurrentSession(); err != nil { + s.logger.Warn().Err(err).Msg("failed to connect relay to current session, audio may not work") + } + SetAudioMuted(false) + s.logger.Info().Msg("audio output unmuted (subprocess and relay started)") + } + + // Broadcast audio mute state change via WebSocket + broadcaster := GetAudioEventBroadcaster() + broadcaster.BroadcastAudioMuteChanged(muted) + + return nil +} + +// StartMicrophone starts the microphone input +func (s *AudioControlService) StartMicrophone() error { + if !s.sessionProvider.IsSessionActive() { + return errors.New("no active session") + } + + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager == nil { + return errors.New("audio input manager not available") + } + + if audioInputManager.IsRunning() { + s.logger.Info().Msg("microphone already running") + return nil + } + + if err := audioInputManager.Start(); err != nil { + s.logger.Error().Err(err).Msg("failed to start microphone") + return err + } + + s.logger.Info().Msg("microphone started successfully") + + // Broadcast microphone state change via WebSocket + broadcaster := GetAudioEventBroadcaster() + sessionActive := s.sessionProvider.IsSessionActive() + broadcaster.BroadcastMicrophoneStateChanged(true, sessionActive) + + return nil +} + +// StopMicrophone stops the microphone input +func (s *AudioControlService) StopMicrophone() error { + if !s.sessionProvider.IsSessionActive() { + return errors.New("no active session") + } + + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager == nil { + return errors.New("audio input manager not available") + } + + if !audioInputManager.IsRunning() { + s.logger.Info().Msg("microphone already stopped") + return nil + } + + audioInputManager.Stop() + s.logger.Info().Msg("microphone stopped successfully") + + // Broadcast microphone state change via WebSocket + broadcaster := GetAudioEventBroadcaster() + sessionActive := s.sessionProvider.IsSessionActive() + broadcaster.BroadcastMicrophoneStateChanged(false, sessionActive) + + return nil +} + +// MuteMicrophone sets the microphone mute state by controlling data flow (like audio output) +func (s *AudioControlService) MuteMicrophone(muted bool) error { + if muted { + // Mute: Control data flow, don't stop subprocess (like audio output) + SetMicrophoneMuted(true) + s.logger.Info().Msg("microphone muted (data flow disabled)") + } else { + // Unmute: Ensure subprocess is running, then enable data flow + if !s.sessionProvider.IsSessionActive() { + return errors.New("no active session for microphone unmute") + } + + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager == nil { + return errors.New("audio input manager not available") + } + + // Start subprocess if not already running (async, non-blocking) + if !audioInputManager.IsRunning() { + go func() { + if err := audioInputManager.Start(); err != nil { + s.logger.Error().Err(err).Msg("failed to start microphone during unmute") + } + }() + } + + // Enable data flow immediately + SetMicrophoneMuted(false) + s.logger.Info().Msg("microphone unmuted (data flow enabled)") + } + + // Broadcast microphone state change via WebSocket + broadcaster := GetAudioEventBroadcaster() + sessionActive := s.sessionProvider.IsSessionActive() + + // Get actual subprocess running status (not mute status) + var subprocessRunning bool + if sessionActive { + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager != nil { + subprocessRunning = audioInputManager.IsRunning() + } + } + + broadcaster.BroadcastMicrophoneStateChanged(subprocessRunning, sessionActive) + + return nil +} + +// ResetMicrophone resets the microphone +func (s *AudioControlService) ResetMicrophone() error { + if !s.sessionProvider.IsSessionActive() { + return errors.New("no active session") + } + + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager == nil { + return errors.New("audio input manager not available") + } + + if audioInputManager.IsRunning() { + audioInputManager.Stop() + s.logger.Info().Msg("stopped microphone for reset") + } + + if err := audioInputManager.Start(); err != nil { + s.logger.Error().Err(err).Msg("failed to restart microphone during reset") + return err + } + + s.logger.Info().Msg("microphone reset successfully") + return nil +} + +// GetAudioStatus returns the current audio output status +func (s *AudioControlService) GetAudioStatus() map[string]interface{} { + return map[string]interface{}{ + "muted": IsAudioMuted(), + } +} + +// GetMicrophoneStatus returns the current microphone status +func (s *AudioControlService) GetMicrophoneStatus() map[string]interface{} { + if s.sessionProvider == nil { + return map[string]interface{}{ + "error": "no session provider", + } + } + + if !s.sessionProvider.IsSessionActive() { + return map[string]interface{}{ + "error": "no active session", + } + } + + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager == nil { + return map[string]interface{}{ + "error": "no audio input manager", + } + } + + return map[string]interface{}{ + "running": audioInputManager.IsRunning(), + "ready": audioInputManager.IsReady(), + } +} + +// SubscribeToAudioEvents subscribes to audio events via WebSocket +func (s *AudioControlService) SubscribeToAudioEvents(connectionID string, wsCon *websocket.Conn, runCtx context.Context, logger *zerolog.Logger) { + logger.Info().Msg("client subscribing to audio events") + broadcaster := GetAudioEventBroadcaster() + broadcaster.Subscribe(connectionID, wsCon, runCtx, logger) +} + +// UnsubscribeFromAudioEvents unsubscribes from audio events +func (s *AudioControlService) UnsubscribeFromAudioEvents(connectionID string, logger *zerolog.Logger) { + logger.Info().Str("connection_id", connectionID).Msg("client unsubscribing from audio events") + broadcaster := GetAudioEventBroadcaster() + broadcaster.Unsubscribe(connectionID) +} + +// IsAudioOutputActive returns whether the audio output subprocess is running +func (s *AudioControlService) IsAudioOutputActive() bool { + return !IsAudioMuted() && IsAudioRelayRunning() +} + +// IsMicrophoneActive returns whether the microphone subprocess is running +func (s *AudioControlService) IsMicrophoneActive() bool { + if !s.sessionProvider.IsSessionActive() { + return false + } + + audioInputManager := s.sessionProvider.GetAudioInputManager() + if audioInputManager == nil { + return false + } + + // For Enable/Disable buttons, we check subprocess status + return audioInputManager.IsRunning() +} diff --git a/internal/audio/core_metrics.go b/internal/audio/core_metrics.go new file mode 100644 index 00000000..f7f7eec5 --- /dev/null +++ b/internal/audio/core_metrics.go @@ -0,0 +1,256 @@ +package audio + +import ( + "runtime" + "sync/atomic" + "time" + + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" +) + +var ( + // Audio output metrics + audioFramesReceivedTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_audio_frames_received_total", + Help: "Total number of audio frames received", + }, + ) + + audioFramesDroppedTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_audio_frames_dropped_total", + Help: "Total number of audio frames dropped", + }, + ) + + audioBytesProcessedTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_audio_bytes_processed_total", + Help: "Total number of audio bytes processed", + }, + ) + + audioConnectionDropsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_audio_connection_drops_total", + Help: "Total number of audio connection drops", + }, + ) + + audioAverageLatencyMilliseconds = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "jetkvm_audio_average_latency_milliseconds", + Help: "Average audio latency in milliseconds", + }, + ) + + audioLastFrameTimestamp = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "jetkvm_audio_last_frame_timestamp_seconds", + Help: "Timestamp of the last audio frame received", + }, + ) + + // Microphone input metrics + microphoneFramesSentTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_microphone_frames_sent_total", + Help: "Total number of microphone frames sent", + }, + ) + + microphoneFramesDroppedTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_microphone_frames_dropped_total", + Help: "Total number of microphone frames dropped", + }, + ) + + microphoneBytesProcessedTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_microphone_bytes_processed_total", + Help: "Total number of microphone bytes processed", + }, + ) + + microphoneConnectionDropsTotal = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_microphone_connection_drops_total", + Help: "Total number of microphone connection drops", + }, + ) + + microphoneAverageLatencyMilliseconds = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "jetkvm_microphone_average_latency_milliseconds", + Help: "Average microphone latency in milliseconds", + }, + ) + + microphoneLastFrameTimestamp = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "jetkvm_microphone_last_frame_timestamp_seconds", + Help: "Timestamp of the last microphone frame sent", + }, + ) + + // Memory metrics (basic monitoring) + memoryHeapAllocBytes = promauto.NewGauge( + prometheus.GaugeOpts{ + Name: "jetkvm_audio_memory_heap_alloc_bytes", + Help: "Current heap allocation in bytes", + }, + ) + + memoryGCCount = promauto.NewCounter( + prometheus.CounterOpts{ + Name: "jetkvm_audio_memory_gc_total", + Help: "Total number of garbage collections", + }, + ) + + // Metrics update tracking + lastMetricsUpdate int64 + + // Counter value tracking (since prometheus counters don't have Get() method) + audioFramesReceivedValue uint64 + audioFramesDroppedValue uint64 + audioBytesProcessedValue uint64 + audioConnectionDropsValue uint64 + micFramesSentValue uint64 + micFramesDroppedValue uint64 + micBytesProcessedValue uint64 + micConnectionDropsValue uint64 + + // Atomic counter for memory GC + memoryGCCountValue uint32 +) + +// UnifiedAudioMetrics provides a common structure for both input and output audio streams +type UnifiedAudioMetrics struct { + FramesReceived uint64 `json:"frames_received"` + FramesDropped uint64 `json:"frames_dropped"` + FramesSent uint64 `json:"frames_sent,omitempty"` + BytesProcessed uint64 `json:"bytes_processed"` + ConnectionDrops uint64 `json:"connection_drops"` + LastFrameTime time.Time `json:"last_frame_time"` + AverageLatency time.Duration `json:"average_latency"` +} + +// convertAudioInputMetricsToUnified converts AudioInputMetrics to UnifiedAudioMetrics +func convertAudioInputMetricsToUnified(metrics AudioInputMetrics) UnifiedAudioMetrics { + return UnifiedAudioMetrics{ + FramesReceived: 0, // AudioInputMetrics doesn't have FramesReceived + FramesDropped: uint64(metrics.FramesDropped), + FramesSent: uint64(metrics.FramesSent), + BytesProcessed: uint64(metrics.BytesProcessed), + ConnectionDrops: uint64(metrics.ConnectionDrops), + LastFrameTime: metrics.LastFrameTime, + AverageLatency: metrics.AverageLatency, + } +} + +// UpdateAudioMetrics updates Prometheus metrics with current audio data +func UpdateAudioMetrics(metrics UnifiedAudioMetrics) { + oldReceived := atomic.SwapUint64(&audioFramesReceivedValue, metrics.FramesReceived) + if metrics.FramesReceived > oldReceived { + audioFramesReceivedTotal.Add(float64(metrics.FramesReceived - oldReceived)) + } + + oldDropped := atomic.SwapUint64(&audioFramesDroppedValue, metrics.FramesDropped) + if metrics.FramesDropped > oldDropped { + audioFramesDroppedTotal.Add(float64(metrics.FramesDropped - oldDropped)) + } + + oldBytes := atomic.SwapUint64(&audioBytesProcessedValue, metrics.BytesProcessed) + if metrics.BytesProcessed > oldBytes { + audioBytesProcessedTotal.Add(float64(metrics.BytesProcessed - oldBytes)) + } + + oldDrops := atomic.SwapUint64(&audioConnectionDropsValue, metrics.ConnectionDrops) + if metrics.ConnectionDrops > oldDrops { + audioConnectionDropsTotal.Add(float64(metrics.ConnectionDrops - oldDrops)) + } + + // Update gauges + audioAverageLatencyMilliseconds.Set(float64(metrics.AverageLatency.Nanoseconds()) / 1e6) + if !metrics.LastFrameTime.IsZero() { + audioLastFrameTimestamp.Set(float64(metrics.LastFrameTime.Unix())) + } + + atomic.StoreInt64(&lastMetricsUpdate, time.Now().Unix()) +} + +// UpdateMicrophoneMetrics updates Prometheus metrics with current microphone data +func UpdateMicrophoneMetrics(metrics UnifiedAudioMetrics) { + oldSent := atomic.SwapUint64(&micFramesSentValue, metrics.FramesSent) + if metrics.FramesSent > oldSent { + microphoneFramesSentTotal.Add(float64(metrics.FramesSent - oldSent)) + } + + oldDropped := atomic.SwapUint64(&micFramesDroppedValue, metrics.FramesDropped) + if metrics.FramesDropped > oldDropped { + microphoneFramesDroppedTotal.Add(float64(metrics.FramesDropped - oldDropped)) + } + + oldBytes := atomic.SwapUint64(&micBytesProcessedValue, metrics.BytesProcessed) + if metrics.BytesProcessed > oldBytes { + microphoneBytesProcessedTotal.Add(float64(metrics.BytesProcessed - oldBytes)) + } + + oldDrops := atomic.SwapUint64(&micConnectionDropsValue, metrics.ConnectionDrops) + if metrics.ConnectionDrops > oldDrops { + microphoneConnectionDropsTotal.Add(float64(metrics.ConnectionDrops - oldDrops)) + } + + // Update gauges + microphoneAverageLatencyMilliseconds.Set(float64(metrics.AverageLatency.Nanoseconds()) / 1e6) + if !metrics.LastFrameTime.IsZero() { + microphoneLastFrameTimestamp.Set(float64(metrics.LastFrameTime.Unix())) + } + + atomic.StoreInt64(&lastMetricsUpdate, time.Now().Unix()) +} + +// UpdateMemoryMetrics updates basic memory metrics +func UpdateMemoryMetrics() { + var m runtime.MemStats + runtime.ReadMemStats(&m) + + memoryHeapAllocBytes.Set(float64(m.HeapAlloc)) + + // Update GC count with delta calculation + currentGCCount := uint32(m.NumGC) + prevGCCount := atomic.SwapUint32(&memoryGCCountValue, currentGCCount) + if prevGCCount > 0 && currentGCCount > prevGCCount { + memoryGCCount.Add(float64(currentGCCount - prevGCCount)) + } + + atomic.StoreInt64(&lastMetricsUpdate, time.Now().Unix()) +} + +// GetLastMetricsUpdate returns the timestamp of the last metrics update +func GetLastMetricsUpdate() time.Time { + timestamp := atomic.LoadInt64(&lastMetricsUpdate) + return time.Unix(timestamp, 0) +} + +// StartMetricsUpdater starts a goroutine that periodically updates Prometheus metrics +func StartMetricsUpdater() { + // Start the centralized metrics collector + registry := GetMetricsRegistry() + registry.StartMetricsCollector() + + // Start a separate goroutine for periodic updates + go func() { + ticker := time.NewTicker(5 * time.Second) // Update every 5 seconds + defer ticker.Stop() + + for range ticker.C { + // Update memory metrics (not part of centralized registry) + UpdateMemoryMetrics() + } + }() +} diff --git a/internal/audio/core_metrics_registry.go b/internal/audio/core_metrics_registry.go new file mode 100644 index 00000000..2573d29c --- /dev/null +++ b/internal/audio/core_metrics_registry.go @@ -0,0 +1,79 @@ +//go:build cgo + +package audio + +import ( + "sync" + "sync/atomic" + "time" +) + +// MetricsRegistry provides a centralized source of truth for all audio metrics +// This eliminates duplication between session-specific and global managers +type MetricsRegistry struct { + mu sync.RWMutex + audioInputMetrics AudioInputMetrics + lastUpdate int64 // Unix timestamp +} + +var ( + globalMetricsRegistry *MetricsRegistry + registryOnce sync.Once +) + +// GetMetricsRegistry returns the global metrics registry instance +func GetMetricsRegistry() *MetricsRegistry { + registryOnce.Do(func() { + globalMetricsRegistry = &MetricsRegistry{ + lastUpdate: time.Now().Unix(), + } + }) + return globalMetricsRegistry +} + +// UpdateAudioInputMetrics updates the centralized audio input metrics +func (mr *MetricsRegistry) UpdateAudioInputMetrics(metrics AudioInputMetrics) { + mr.mu.Lock() + mr.audioInputMetrics = metrics + mr.lastUpdate = time.Now().Unix() + mr.mu.Unlock() + + // Update Prometheus metrics directly to avoid circular dependency + UpdateMicrophoneMetrics(convertAudioInputMetricsToUnified(metrics)) +} + +// GetAudioInputMetrics returns the current audio input metrics +func (mr *MetricsRegistry) GetAudioInputMetrics() AudioInputMetrics { + mr.mu.RLock() + defer mr.mu.RUnlock() + return mr.audioInputMetrics +} + +// GetLastUpdate returns the timestamp of the last metrics update +func (mr *MetricsRegistry) GetLastUpdate() time.Time { + timestamp := atomic.LoadInt64(&mr.lastUpdate) + return time.Unix(timestamp, 0) +} + +// StartMetricsCollector starts a background goroutine to collect metrics +func (mr *MetricsRegistry) StartMetricsCollector() { + go func() { + ticker := time.NewTicker(1 * time.Second) + defer ticker.Stop() + + for range ticker.C { + // Collect from session-specific manager if available + if sessionProvider := GetSessionProvider(); sessionProvider != nil && sessionProvider.IsSessionActive() { + if inputManager := sessionProvider.GetAudioInputManager(); inputManager != nil { + metrics := inputManager.GetMetrics() + mr.UpdateAudioInputMetrics(metrics) + } + } else { + // Fallback to global manager if no session is active + globalManager := getAudioInputManager() + metrics := globalManager.GetMetrics() + mr.UpdateAudioInputMetrics(metrics) + } + } + }() +} diff --git a/internal/audio/core_validation.go b/internal/audio/core_validation.go new file mode 100644 index 00000000..5f695d2f --- /dev/null +++ b/internal/audio/core_validation.go @@ -0,0 +1,354 @@ +//go:build cgo || arm +// +build cgo arm + +package audio + +import ( + "errors" + "fmt" + "time" +) + +// Validation errors +var ( + ErrInvalidFrameSize = errors.New("invalid frame size") + ErrInvalidFrameData = errors.New("invalid frame data") + ErrFrameDataEmpty = errors.New("invalid frame data: frame data is empty") + ErrFrameDataTooLarge = errors.New("invalid frame data: exceeds maximum") + ErrInvalidBufferSize = errors.New("invalid buffer size") + + ErrInvalidLatency = errors.New("invalid latency value") + ErrInvalidConfiguration = errors.New("invalid configuration") + ErrInvalidSocketConfig = errors.New("invalid socket configuration") + ErrInvalidMetricsInterval = errors.New("invalid metrics interval") + ErrInvalidSampleRate = errors.New("invalid sample rate") + ErrInvalidChannels = errors.New("invalid channels") + ErrInvalidBitrate = errors.New("invalid bitrate") + ErrInvalidFrameDuration = errors.New("invalid frame duration") + ErrInvalidOffset = errors.New("invalid offset") + ErrInvalidLength = errors.New("invalid length") +) + +// ValidateAudioQuality is deprecated - quality is now fixed at optimal settings +func ValidateAudioQuality(quality int) error { + // Quality validation removed - using fixed optimal configuration + return nil +} + +// ValidateZeroCopyFrame validates zero-copy audio frame +// Optimized to use cached max frame size +func ValidateZeroCopyFrame(frame *ZeroCopyAudioFrame) error { + if frame == nil { + return ErrInvalidFrameData + } + data := frame.Data() + if len(data) == 0 { + return ErrInvalidFrameData + } + + // Fast path: use cached max frame size + maxFrameSize := cachedMaxFrameSize + if maxFrameSize == 0 { + // Fallback: get from cache + cache := Config + maxFrameSize = cache.MaxAudioFrameSize + if maxFrameSize == 0 { + // Last resort: use default + maxFrameSize = cache.MaxAudioFrameSize + } + // Cache globally for next calls + cachedMaxFrameSize = maxFrameSize + } + + if len(data) > maxFrameSize { + return ErrInvalidFrameSize + } + return nil +} + +// ValidateBufferSize validates buffer size parameters with enhanced boundary checks +// Optimized for minimal overhead in hotpath +func ValidateBufferSize(size int) error { + if size <= 0 { + return fmt.Errorf("%w: buffer size %d must be positive", ErrInvalidBufferSize, size) + } + // Single boundary check using pre-cached value + if size > Config.SocketMaxBuffer { + return fmt.Errorf("%w: buffer size %d exceeds maximum %d", + ErrInvalidBufferSize, size, Config.SocketMaxBuffer) + } + return nil +} + +// ValidateLatency validates latency duration values with reasonable bounds +// Optimized to use AudioConfigCache for frequently accessed values +func ValidateLatency(latency time.Duration) error { + if latency < 0 { + return fmt.Errorf("%w: latency %v cannot be negative", ErrInvalidLatency, latency) + } + + // Fast path: check against cached max latency + cache := Config + maxLatency := time.Duration(cache.MaxLatency) + + // If we have a valid cached value, use it + if maxLatency > 0 { + minLatency := time.Millisecond // Minimum reasonable latency + if latency > 0 && latency < minLatency { + return fmt.Errorf("%w: latency %v below minimum %v", + ErrInvalidLatency, latency, minLatency) + } + if latency > maxLatency { + return fmt.Errorf("%w: latency %v exceeds maximum %v", + ErrInvalidLatency, latency, maxLatency) + } + return nil + } + + minLatency := time.Millisecond // Minimum reasonable latency + if latency > 0 && latency < minLatency { + return fmt.Errorf("%w: latency %v below minimum %v", + ErrInvalidLatency, latency, minLatency) + } + if latency > Config.MaxLatency { + return fmt.Errorf("%w: latency %v exceeds maximum %v", + ErrInvalidLatency, latency, Config.MaxLatency) + } + return nil +} + +// ValidateMetricsInterval validates metrics update interval +// Optimized to use AudioConfigCache for frequently accessed values +func ValidateMetricsInterval(interval time.Duration) error { + // Fast path: check against cached values + cache := Config + minInterval := time.Duration(cache.MinMetricsUpdateInterval) + maxInterval := time.Duration(cache.MaxMetricsUpdateInterval) + + // If we have valid cached values, use them + if minInterval > 0 && maxInterval > 0 { + if interval < minInterval { + return fmt.Errorf("%w: interval %v below minimum %v", + ErrInvalidMetricsInterval, interval, minInterval) + } + if interval > maxInterval { + return fmt.Errorf("%w: interval %v exceeds maximum %v", + ErrInvalidMetricsInterval, interval, maxInterval) + } + return nil + } + + minInterval = Config.MinMetricsUpdateInterval + maxInterval = Config.MaxMetricsUpdateInterval + if interval < minInterval { + return ErrInvalidMetricsInterval + } + if interval > maxInterval { + return ErrInvalidMetricsInterval + } + return nil +} + +// ValidateInputIPCConfig validates input IPC configuration +func ValidateInputIPCConfig(sampleRate, channels, frameSize int) error { + minSampleRate := Config.MinSampleRate + maxSampleRate := Config.MaxSampleRate + maxChannels := Config.MaxChannels + if sampleRate < minSampleRate || sampleRate > maxSampleRate { + return ErrInvalidSampleRate + } + if channels < 1 || channels > maxChannels { + return ErrInvalidChannels + } + if frameSize <= 0 { + return ErrInvalidFrameSize + } + return nil +} + +// ValidateOutputIPCConfig validates output IPC configuration +func ValidateOutputIPCConfig(sampleRate, channels, frameSize int) error { + minSampleRate := Config.MinSampleRate + maxSampleRate := Config.MaxSampleRate + maxChannels := Config.MaxChannels + if sampleRate < minSampleRate || sampleRate > maxSampleRate { + return ErrInvalidSampleRate + } + if channels < 1 || channels > maxChannels { + return ErrInvalidChannels + } + if frameSize <= 0 { + return ErrInvalidFrameSize + } + return nil +} + +// ValidateSampleRate validates audio sample rate values +// Optimized for minimal overhead in hotpath +func ValidateSampleRate(sampleRate int) error { + if sampleRate <= 0 { + return fmt.Errorf("%w: sample rate %d must be positive", ErrInvalidSampleRate, sampleRate) + } + // Direct validation against valid rates + for _, rate := range Config.ValidSampleRates { + if sampleRate == rate { + return nil + } + } + return fmt.Errorf("%w: sample rate %d not in valid rates %v", + ErrInvalidSampleRate, sampleRate, Config.ValidSampleRates) +} + +// ValidateChannelCount validates audio channel count +// Optimized for minimal overhead in hotpath +func ValidateChannelCount(channels int) error { + if channels <= 0 { + return fmt.Errorf("%w: channel count %d must be positive", ErrInvalidChannels, channels) + } + // Direct boundary check + if channels > Config.MaxChannels { + return fmt.Errorf("%w: channel count %d exceeds maximum %d", + ErrInvalidChannels, channels, Config.MaxChannels) + } + return nil +} + +// ValidateBitrate validates audio bitrate values (expects kbps) +// Optimized for minimal overhead in hotpath +func ValidateBitrate(bitrate int) error { + if bitrate <= 0 { + return fmt.Errorf("%w: bitrate %d must be positive", ErrInvalidBitrate, bitrate) + } + // Direct boundary check with single conversion + bitrateInBps := bitrate * 1000 + if bitrateInBps < Config.MinOpusBitrate { + return fmt.Errorf("%w: bitrate %d kbps (%d bps) below minimum %d bps", + ErrInvalidBitrate, bitrate, bitrateInBps, Config.MinOpusBitrate) + } + if bitrateInBps > Config.MaxOpusBitrate { + return fmt.Errorf("%w: bitrate %d kbps (%d bps) exceeds maximum %d bps", + ErrInvalidBitrate, bitrate, bitrateInBps, Config.MaxOpusBitrate) + } + return nil +} + +// ValidateFrameDuration validates frame duration values +// Optimized to use AudioConfigCache for frequently accessed values +func ValidateFrameDuration(duration time.Duration) error { + if duration <= 0 { + return fmt.Errorf("%w: frame duration %v must be positive", ErrInvalidFrameDuration, duration) + } + + // Fast path: Check against cached frame size first + cache := Config + + // Convert frameSize (samples) to duration for comparison + cachedFrameSize := cache.FrameSize + cachedSampleRate := cache.SampleRate + + // Only do this calculation if we have valid cached values + if cachedFrameSize > 0 && cachedSampleRate > 0 { + cachedDuration := time.Duration(cachedFrameSize) * time.Second / time.Duration(cachedSampleRate) + + // Most common case: validating against the current frame duration + if duration == cachedDuration { + return nil + } + } + + // Fast path: Check against cached min/max frame duration + cachedMinDuration := time.Duration(cache.MinFrameDuration) + cachedMaxDuration := time.Duration(cache.MaxFrameDuration) + + if cachedMinDuration > 0 && cachedMaxDuration > 0 { + if duration < cachedMinDuration { + return fmt.Errorf("%w: frame duration %v below minimum %v", + ErrInvalidFrameDuration, duration, cachedMinDuration) + } + if duration > cachedMaxDuration { + return fmt.Errorf("%w: frame duration %v exceeds maximum %v", + ErrInvalidFrameDuration, duration, cachedMaxDuration) + } + return nil + } + + // Slow path: Use current config values + updatedMinDuration := time.Duration(cache.MinFrameDuration) + updatedMaxDuration := time.Duration(cache.MaxFrameDuration) + + if duration < updatedMinDuration { + return fmt.Errorf("%w: frame duration %v below minimum %v", + ErrInvalidFrameDuration, duration, updatedMinDuration) + } + if duration > updatedMaxDuration { + return fmt.Errorf("%w: frame duration %v exceeds maximum %v", + ErrInvalidFrameDuration, duration, updatedMaxDuration) + } + return nil +} + +// ValidateAudioConfigConstants validates audio configuration constants +func ValidateAudioConfigConstants(config *AudioConfigConstants) error { + // Quality validation removed - using fixed optimal configuration + // Validate configuration values if config is provided + if config != nil { + if Config.MaxFrameSize <= 0 { + return fmt.Errorf("invalid MaxFrameSize: %d", Config.MaxFrameSize) + } + if Config.SampleRate <= 0 { + return fmt.Errorf("invalid SampleRate: %d", Config.SampleRate) + } + } + return nil +} + +// Global variable for backward compatibility +var cachedMaxFrameSize int + +// InitValidationCache initializes cached validation values with actual config +func InitValidationCache() { + // Initialize the global cache variable for backward compatibility + cachedMaxFrameSize = Config.MaxAudioFrameSize + + // Initialize the global audio config cache + cachedMaxFrameSize = Config.MaxAudioFrameSize +} + +// ValidateAudioFrame validates audio frame data with cached max size for performance +// +//go:inline +func ValidateAudioFrame(data []byte) error { + // Fast path: check length against cached max size in single operation + dataLen := len(data) + if dataLen == 0 { + return ErrFrameDataEmpty + } + + // Use global cached value for fastest access - updated during initialization + maxSize := cachedMaxFrameSize + if maxSize == 0 { + // Fallback: get from cache only if global cache not initialized + cache := Config + maxSize = cache.MaxAudioFrameSize + if maxSize == 0 { + // Last resort: get fresh value + maxSize = cache.MaxAudioFrameSize + } + // Cache the value globally for next calls + cachedMaxFrameSize = maxSize + } + + // Single comparison for validation + if dataLen > maxSize { + return ErrFrameDataTooLarge + } + return nil +} + +// WrapWithMetadata wraps error with metadata for enhanced validation context +func WrapWithMetadata(err error, component, operation string, metadata map[string]interface{}) error { + if err == nil { + return nil + } + return fmt.Errorf("%s.%s: %w (metadata: %+v)", component, operation, err, metadata) +} diff --git a/internal/audio/embed.go b/internal/audio/embed.go new file mode 100644 index 00000000..f7a4df40 --- /dev/null +++ b/internal/audio/embed.go @@ -0,0 +1,93 @@ +//go:build cgo +// +build cgo + +package audio + +import ( + _ "embed" + "fmt" + "os" +) + +// Embedded C audio binaries (built during compilation) +// +//go:embed bin/jetkvm_audio_output +var audioOutputBinary []byte + +//go:embed bin/jetkvm_audio_input +var audioInputBinary []byte + +const ( + audioBinDir = "/userdata/jetkvm/bin" + audioOutputBinPath = audioBinDir + "/jetkvm_audio_output" + audioInputBinPath = audioBinDir + "/jetkvm_audio_input" + binaryFileMode = 0755 // rwxr-xr-x +) + +// ExtractEmbeddedBinaries extracts the embedded C audio binaries to disk +// This should be called during application startup before audio supervisors are started +func ExtractEmbeddedBinaries() error { + // Create bin directory if it doesn't exist + if err := os.MkdirAll(audioBinDir, 0755); err != nil { + return fmt.Errorf("failed to create audio bin directory: %w", err) + } + + // Extract audio output binary + if err := extractBinary(audioOutputBinary, audioOutputBinPath); err != nil { + return fmt.Errorf("failed to extract audio output binary: %w", err) + } + + // Extract audio input binary + if err := extractBinary(audioInputBinary, audioInputBinPath); err != nil { + return fmt.Errorf("failed to extract audio input binary: %w", err) + } + + return nil +} + +// extractBinary writes embedded binary data to disk with executable permissions +func extractBinary(data []byte, path string) error { + // Check if binary already exists and is valid + if info, err := os.Stat(path); err == nil { + // File exists - check if size matches + if info.Size() == int64(len(data)) { + // Binary already extracted and matches embedded version + return nil + } + // Size mismatch - need to update + } + + // Write to temporary file first for atomic replacement + tmpPath := path + ".tmp" + if err := os.WriteFile(tmpPath, data, binaryFileMode); err != nil { + return fmt.Errorf("failed to write binary to %s: %w", tmpPath, err) + } + + // Atomically rename to final path + if err := os.Rename(tmpPath, path); err != nil { + os.Remove(tmpPath) // Clean up on error + return fmt.Errorf("failed to rename binary to %s: %w", path, err) + } + + return nil +} + +// GetAudioOutputBinaryPath returns the path to the audio output binary +func GetAudioOutputBinaryPath() string { + return audioOutputBinPath +} + +// GetAudioInputBinaryPath returns the path to the audio input binary +func GetAudioInputBinaryPath() string { + return audioInputBinPath +} + +// init ensures binaries are extracted when package is imported +func init() { + // Extract binaries on package initialization + // This ensures binaries are available before supervisors start + if err := ExtractEmbeddedBinaries(); err != nil { + // Log error but don't panic - let caller handle initialization failure + fmt.Fprintf(os.Stderr, "Warning: Failed to extract embedded audio binaries: %v\n", err) + } +} diff --git a/internal/audio/input_api.go b/internal/audio/input_api.go new file mode 100644 index 00000000..66f64d6d --- /dev/null +++ b/internal/audio/input_api.go @@ -0,0 +1,94 @@ +package audio + +import ( + "sync/atomic" + "unsafe" +) + +var ( + // Global audio input manager instance + globalInputManager unsafe.Pointer // *AudioInputManager +) + +// AudioInputInterface defines the common interface for audio input managers +type AudioInputInterface interface { + Start() error + Stop() + WriteOpusFrame(frame []byte) error + IsRunning() bool + GetMetrics() AudioInputMetrics +} + +// GetSupervisor returns the audio input supervisor for advanced management +func (m *AudioInputManager) GetSupervisor() *AudioInputSupervisor { + return GetAudioInputSupervisor() +} + +// getAudioInputManager returns the audio input manager +func getAudioInputManager() AudioInputInterface { + ptr := atomic.LoadPointer(&globalInputManager) + if ptr == nil { + // Create new manager + newManager := NewAudioInputManager() + if atomic.CompareAndSwapPointer(&globalInputManager, nil, unsafe.Pointer(newManager)) { + return newManager + } + // Another goroutine created it, use that one + ptr = atomic.LoadPointer(&globalInputManager) + } + return (*AudioInputManager)(ptr) +} + +// StartAudioInput starts the audio input system using the appropriate manager +func StartAudioInput() error { + manager := getAudioInputManager() + return manager.Start() +} + +// StopAudioInput stops the audio input system +func StopAudioInput() { + manager := getAudioInputManager() + manager.Stop() +} + +// WriteAudioInputFrame writes an Opus frame to the audio input system +func WriteAudioInputFrame(frame []byte) error { + manager := getAudioInputManager() + return manager.WriteOpusFrame(frame) +} + +// IsAudioInputRunning returns whether the audio input system is running +func IsAudioInputRunning() bool { + manager := getAudioInputManager() + return manager.IsRunning() +} + +// GetAudioInputMetrics returns current audio input metrics +func GetAudioInputMetrics() AudioInputMetrics { + manager := getAudioInputManager() + return manager.GetMetrics() +} + +// GetAudioInputIPCSupervisor returns the IPC supervisor +func GetAudioInputIPCSupervisor() *AudioInputSupervisor { + ptr := atomic.LoadPointer(&globalInputManager) + if ptr == nil { + return nil + } + + manager := (*AudioInputManager)(ptr) + return manager.GetSupervisor() +} + +// Helper functions + +// ResetAudioInputManagers resets the global manager (for testing) +func ResetAudioInputManagers() { + // Stop existing manager first + if ptr := atomic.LoadPointer(&globalInputManager); ptr != nil { + (*AudioInputManager)(ptr).Stop() + } + + // Reset pointer + atomic.StorePointer(&globalInputManager, nil) +} diff --git a/internal/audio/input_microphone_manager.go b/internal/audio/input_microphone_manager.go new file mode 100644 index 00000000..1ec702e4 --- /dev/null +++ b/internal/audio/input_microphone_manager.go @@ -0,0 +1,269 @@ +package audio + +import ( + "fmt" + "sync/atomic" + "time" + + "github.com/jetkvm/kvm/internal/logging" +) + +// Component name constant for logging +const ( + AudioInputManagerComponent = "audio-input-manager" +) + +// AudioInputMetrics holds metrics for microphone input +// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) +type AudioInputMetrics struct { + // Atomic int64 field first for proper ARM32 alignment + FramesSent int64 `json:"frames_sent"` // Total frames sent (input-specific) + + // Embedded struct with atomic fields properly aligned + BaseAudioMetrics +} + +// AudioInputManager manages microphone input stream using IPC mode only +type AudioInputManager struct { + *BaseAudioManager + framesSent int64 // Input-specific metric +} + +// NewAudioInputManager creates a new audio input manager +func NewAudioInputManager() *AudioInputManager { + logger := logging.GetDefaultLogger().With().Str("component", AudioInputManagerComponent).Logger() + return &AudioInputManager{ + BaseAudioManager: NewBaseAudioManager(logger), + } +} + +// getClient returns the audio input client from the global supervisor +func (aim *AudioInputManager) getClient() *AudioInputClient { + supervisor := GetAudioInputSupervisor() + if supervisor == nil { + return nil + } + return supervisor.GetClient() +} + +// Start begins processing microphone input +func (aim *AudioInputManager) Start() error { + if !aim.setRunning(true) { + return fmt.Errorf("audio input manager is already running") + } + + aim.logComponentStart(AudioInputManagerComponent) + + // Ensure supervisor and client are available + supervisor := GetAudioInputSupervisor() + if supervisor == nil { + aim.setRunning(false) + return fmt.Errorf("audio input supervisor not available") + } + + // Start the supervisor if not already running + if !supervisor.IsRunning() { + err := supervisor.Start() + if err != nil { + aim.logComponentError(AudioInputManagerComponent, err, "failed to start supervisor") + aim.setRunning(false) + aim.resetMetrics() + return err + } + } + + aim.logComponentStarted(AudioInputManagerComponent) + return nil +} + +// Stop stops processing microphone input +func (aim *AudioInputManager) Stop() { + if !aim.setRunning(false) { + return // Already stopped + } + + aim.logComponentStop(AudioInputManagerComponent) + + // Note: We don't stop the supervisor here as it may be shared + // The supervisor lifecycle is managed by the main process + + aim.logComponentStopped(AudioInputManagerComponent) +} + +// resetMetrics resets all metrics to zero +func (aim *AudioInputManager) resetMetrics() { + aim.BaseAudioManager.resetMetrics() + atomic.StoreInt64(&aim.framesSent, 0) +} + +// WriteOpusFrame writes an Opus frame to the audio input system with latency tracking +func (aim *AudioInputManager) WriteOpusFrame(frame []byte) error { + if !aim.IsRunning() { + return nil // Not running, silently drop + } + + // Check mute state - drop frames if microphone is muted (like audio output) + if IsMicrophoneMuted() { + return nil // Muted, silently drop + } + + // Use ultra-fast validation for critical audio path + if err := ValidateAudioFrame(frame); err != nil { + aim.logComponentError(AudioInputManagerComponent, err, "Frame validation failed") + return fmt.Errorf("input frame validation failed: %w", err) + } + + // Get client from supervisor + client := aim.getClient() + if client == nil { + return fmt.Errorf("audio input client not available") + } + + // Track end-to-end latency from WebRTC to IPC + startTime := time.Now() + err := client.SendFrame(frame) + processingTime := time.Since(startTime) + + // Log high latency warnings + if processingTime > time.Duration(Config.InputProcessingTimeoutMS)*time.Millisecond { + latencyMs := float64(processingTime.Milliseconds()) + aim.logger.Warn(). + Float64("latency_ms", latencyMs). + Msg("High audio processing latency detected") + } + + if err != nil { + return err + } + + return nil +} + +// WriteOpusFrameZeroCopy writes an Opus frame using zero-copy optimization +func (aim *AudioInputManager) WriteOpusFrameZeroCopy(frame *ZeroCopyAudioFrame) error { + if !aim.IsRunning() { + return nil // Not running, silently drop + } + + // Check mute state - drop frames if microphone is muted (like audio output) + if IsMicrophoneMuted() { + return nil // Muted, silently drop + } + + if frame == nil { + atomic.AddInt64(&aim.metrics.FramesDropped, 1) + return nil + } + + // Get client from supervisor + client := aim.getClient() + if client == nil { + atomic.AddInt64(&aim.metrics.FramesDropped, 1) + return fmt.Errorf("audio input client not available") + } + + // Track end-to-end latency from WebRTC to IPC + startTime := time.Now() + err := client.SendFrameZeroCopy(frame) + processingTime := time.Since(startTime) + + // Log high latency warnings + if processingTime > time.Duration(Config.InputProcessingTimeoutMS)*time.Millisecond { + latencyMs := float64(processingTime.Milliseconds()) + aim.logger.Warn(). + Float64("latency_ms", latencyMs). + Msg("High audio processing latency detected") + } + + if err != nil { + atomic.AddInt64(&aim.metrics.FramesDropped, 1) + return err + } + + // Update metrics + atomic.AddInt64(&aim.framesSent, 1) + + return nil +} + +// GetMetrics returns current metrics +func (aim *AudioInputManager) GetMetrics() AudioInputMetrics { + return AudioInputMetrics{ + FramesSent: atomic.LoadInt64(&aim.framesSent), + BaseAudioMetrics: aim.getBaseMetrics(), + } +} + +// GetComprehensiveMetrics returns detailed performance metrics across all components +func (aim *AudioInputManager) GetComprehensiveMetrics() map[string]interface{} { + // Get base metrics + baseMetrics := aim.GetMetrics() + + // Get client stats if available + var clientStats map[string]interface{} + client := aim.getClient() + if client != nil { + total, dropped := client.GetFrameStats() + clientStats = map[string]interface{}{ + "frames_sent": total, + "frames_dropped": dropped, + } + } else { + clientStats = map[string]interface{}{ + "frames_sent": 0, + "frames_dropped": 0, + } + } + + comprehensiveMetrics := map[string]interface{}{ + "manager": map[string]interface{}{ + "frames_sent": baseMetrics.FramesSent, + "frames_dropped": baseMetrics.FramesDropped, + "bytes_processed": baseMetrics.BytesProcessed, + "average_latency_ms": float64(baseMetrics.AverageLatency.Nanoseconds()) / 1e6, + "last_frame_time": baseMetrics.LastFrameTime, + "running": aim.IsRunning(), + }, + "client": clientStats, + } + + return comprehensiveMetrics +} + +// IsRunning returns whether the audio input manager is running +// This checks both the internal state and existing system processes +func (aim *AudioInputManager) IsRunning() bool { + // First check internal state + if aim.BaseAudioManager.IsRunning() { + return true + } + + // If internal state says not running, check supervisor + supervisor := GetAudioInputSupervisor() + if supervisor != nil { + if existingPID, exists := supervisor.HasExistingProcess(); exists { + aim.logger.Info().Int("existing_pid", existingPID).Msg("Found existing audio input server process") + // Update internal state to reflect reality + aim.setRunning(true) + return true + } + } + + return false +} + +// IsReady returns whether the audio input manager is ready to receive frames +// This checks both that it's running and that the IPC connection is established +func (aim *AudioInputManager) IsReady() bool { + if !aim.IsRunning() { + return false + } + + // Check if client is connected + client := aim.getClient() + if client == nil { + return false + } + + return client.IsConnected() +} diff --git a/internal/audio/input_supervisor.go b/internal/audio/input_supervisor.go new file mode 100644 index 00000000..4f356f15 --- /dev/null +++ b/internal/audio/input_supervisor.go @@ -0,0 +1,304 @@ +//go:build cgo +// +build cgo + +package audio + +import ( + "fmt" + "os" + "os/exec" + "strconv" + "strings" + "sync/atomic" + "syscall" + "time" +) + +// AudioInputSupervisor manages the audio input server subprocess +type AudioInputSupervisor struct { + *BaseSupervisor + client *AudioInputClient + + // Environment variables for OPUS configuration + opusEnv []string +} + +// NewAudioInputSupervisor creates a new audio input supervisor +func NewAudioInputSupervisor() *AudioInputSupervisor { + return &AudioInputSupervisor{ + BaseSupervisor: NewBaseSupervisor("audio-input-supervisor"), + client: NewAudioInputClient(), + } +} + +// SetOpusConfig sets OPUS configuration parameters as environment variables +// for the audio input subprocess +func (ais *AudioInputSupervisor) SetOpusConfig(bitrate, complexity, vbr, signalType, bandwidth, dtx int) { + ais.mutex.Lock() + defer ais.mutex.Unlock() + + // Store OPUS parameters as environment variables for C binary + ais.opusEnv = []string{ + "OPUS_BITRATE=" + strconv.Itoa(bitrate), + "OPUS_COMPLEXITY=" + strconv.Itoa(complexity), + "OPUS_VBR=" + strconv.Itoa(vbr), + "OPUS_SIGNAL_TYPE=" + strconv.Itoa(signalType), + "OPUS_BANDWIDTH=" + strconv.Itoa(bandwidth), + "OPUS_DTX=" + strconv.Itoa(dtx), + "ALSA_PLAYBACK_DEVICE=hw:1,0", // USB Gadget audio playback + } +} + +// Start begins supervising the audio input server process +func (ais *AudioInputSupervisor) Start() error { + if !atomic.CompareAndSwapInt32(&ais.running, 0, 1) { + return fmt.Errorf("audio input supervisor is already running") + } + + ais.logSupervisorStart() + ais.createContext() + + // Recreate channels in case they were closed by a previous Stop() call + ais.initializeChannels() + + // Start the supervision loop + go ais.supervisionLoop() + + ais.logger.Info().Str("component", "audio-input-supervisor").Msg("component started successfully") + return nil +} + +// supervisionLoop is the main supervision loop +func (ais *AudioInputSupervisor) supervisionLoop() { + // Configure supervision parameters (no restart for input supervisor) + config := SupervisionConfig{ + ProcessType: "audio input server", + Timeout: Config.InputSupervisorTimeout, + EnableRestart: false, // Input supervisor doesn't restart + MaxRestartAttempts: 0, + RestartWindow: 0, + RestartDelay: 0, + MaxRestartDelay: 0, + } + + // Configure callbacks (input supervisor doesn't have callbacks currently) + callbacks := ProcessCallbacks{ + OnProcessStart: nil, + OnProcessExit: nil, + OnRestart: nil, + } + + // Use the base supervision loop template + ais.SupervisionLoop( + config, + callbacks, + ais.startProcess, + func() bool { return false }, // Never restart + func() time.Duration { return 0 }, // No restart delay needed + ) +} + +// startProcess starts the audio input server process +func (ais *AudioInputSupervisor) startProcess() error { + // Use embedded C binary path + binaryPath := GetAudioInputBinaryPath() + + ais.mutex.Lock() + defer ais.mutex.Unlock() + + // Create new command (no args needed for C binary) + ais.cmd = exec.CommandContext(ais.ctx, binaryPath) + ais.cmd.Stdout = os.Stdout + ais.cmd.Stderr = os.Stderr + + // Set environment variables for OPUS configuration + env := append(os.Environ(), ais.opusEnv...) + + // Pass logging environment variables directly to subprocess + // The subprocess will inherit all PION_LOG_* variables from os.Environ() + // This ensures the audio scope gets the correct trace level + + ais.cmd.Env = env + + // Set process group to allow clean termination + ais.cmd.SysProcAttr = &syscall.SysProcAttr{ + Setpgid: true, + } + + // Start the process + if err := ais.cmd.Start(); err != nil { + return fmt.Errorf("failed to start audio input server process: %w", err) + } + + ais.processPID = ais.cmd.Process.Pid + ais.logger.Info().Int("pid", ais.processPID).Str("binary", binaryPath).Strs("opus_env", ais.opusEnv).Msg("audio input server process started") + + // Connect client to the server synchronously to avoid race condition + ais.connectClient() + + return nil +} + +// Stop gracefully stops the audio input server and supervisor +func (ais *AudioInputSupervisor) Stop() { + if !atomic.CompareAndSwapInt32(&ais.running, 1, 0) { + return // Already stopped + } + + ais.logSupervisorStop() + + // Disconnect client first + if ais.client != nil { + ais.client.Disconnect() + } + + // Signal stop and wait for cleanup + ais.closeStopChan() + ais.cancelContext() + + // Wait for process to exit + select { + case <-ais.processDone: + ais.logger.Info().Str("component", "audio-input-supervisor").Msg("component stopped gracefully") + case <-time.After(Config.InputSupervisorTimeout): + ais.logger.Warn().Str("component", "audio-input-supervisor").Msg("component did not stop gracefully, forcing termination") + ais.forceKillProcess("audio input server") + } + + ais.logger.Info().Str("component", "audio-input-supervisor").Msg("component stopped") +} + +// IsConnected returns whether the client is connected to the audio input server +func (ais *AudioInputSupervisor) IsConnected() bool { + ais.mutex.Lock() + defer ais.mutex.Unlock() + if !ais.IsRunning() { + return false + } + return ais.client.IsConnected() +} + +// GetClient returns the IPC client for sending audio frames +func (ais *AudioInputSupervisor) GetClient() *AudioInputClient { + return ais.client +} + +// connectClient attempts to connect the client to the server +func (ais *AudioInputSupervisor) connectClient() { + // Wait briefly for the server to start and create socket + time.Sleep(Config.DefaultSleepDuration) + + // Additional small delay to ensure socket is ready after restart + time.Sleep(20 * time.Millisecond) + + err := ais.client.Connect() + if err != nil { + ais.logger.Error().Err(err).Msg("Failed to connect to audio input server") + return + } + + ais.logger.Info().Msg("Connected to audio input server") +} + +// SendFrame sends an audio frame to the subprocess (convenience method) +func (ais *AudioInputSupervisor) SendFrame(frame []byte) error { + if ais.client == nil { + return fmt.Errorf("client not initialized") + } + + if !ais.client.IsConnected() { + return fmt.Errorf("client not connected") + } + + return ais.client.SendFrame(frame) +} + +// SendFrameZeroCopy sends a zero-copy frame to the subprocess +func (ais *AudioInputSupervisor) SendFrameZeroCopy(frame *ZeroCopyAudioFrame) error { + if ais.client == nil { + return fmt.Errorf("client not initialized") + } + + if !ais.client.IsConnected() { + return fmt.Errorf("client not connected") + } + + return ais.client.SendFrameZeroCopy(frame) +} + +// SendConfig sends a configuration update to the subprocess (convenience method) +func (ais *AudioInputSupervisor) SendConfig(config UnifiedIPCConfig) error { + if ais.client == nil { + return fmt.Errorf("client not initialized") + } + + if !ais.client.IsConnected() { + return fmt.Errorf("client not connected") + } + + return ais.client.SendConfig(config) +} + +// SendOpusConfig sends a complete Opus encoder configuration to the audio input server +func (ais *AudioInputSupervisor) SendOpusConfig(config UnifiedIPCOpusConfig) error { + if ais.client == nil { + return fmt.Errorf("client not initialized") + } + + if !ais.client.IsConnected() { + return fmt.Errorf("client not connected") + } + + return ais.client.SendOpusConfig(config) +} + +// findExistingAudioInputProcess checks if there's already an audio input server process running +func (ais *AudioInputSupervisor) findExistingAudioInputProcess() (int, error) { + // Look for the C binary name + binaryName := "jetkvm_audio_input" + + // Use ps to find processes with C binary name + cmd := exec.Command("ps", "aux") + output, err := cmd.Output() + if err != nil { + return 0, fmt.Errorf("failed to run ps command: %w", err) + } + + // Parse ps output to find audio input server processes + lines := strings.Split(string(output), "\n") + for _, line := range lines { + if strings.Contains(line, binaryName) { + // Extract PID from ps output (second column) + fields := strings.Fields(line) + if len(fields) >= 2 { + // PID is the first field + if pid, err := strconv.Atoi(fields[0]); err == nil { + if ais.isProcessRunning(pid) { + return pid, nil + } + } + } + } + } + + return 0, fmt.Errorf("no existing audio input server process found") +} + +// isProcessRunning checks if a process with the given PID is still running +func (ais *AudioInputSupervisor) isProcessRunning(pid int) bool { + // Try to send signal 0 to check if process exists + process, err := os.FindProcess(pid) + if err != nil { + return false + } + + err = process.Signal(syscall.Signal(0)) + return err == nil +} + +// HasExistingProcess checks if there's already an audio input server process running +// This is a public wrapper around findExistingAudioInputProcess for external access +func (ais *AudioInputSupervisor) HasExistingProcess() (int, bool) { + pid, err := ais.findExistingAudioInputProcess() + return pid, err == nil +} diff --git a/internal/audio/ipc_common.go b/internal/audio/ipc_common.go new file mode 100644 index 00000000..d828129c --- /dev/null +++ b/internal/audio/ipc_common.go @@ -0,0 +1,257 @@ +package audio + +import ( + "encoding/binary" + "fmt" + "net" + "sync" + "sync/atomic" + "time" +) + +// Common IPC message interface +type IPCMessage interface { + GetMagic() uint32 + GetType() uint8 + GetLength() uint32 + GetTimestamp() int64 + GetData() []byte +} + +// Common optimized message structure +type OptimizedMessage struct { + header [17]byte // Pre-allocated header buffer + data []byte // Reusable data buffer +} + +// Generic message pool for both input and output +type GenericMessagePool struct { + // 64-bit fields must be first for proper alignment on ARM + hitCount int64 // Pool hit counter (atomic) + missCount int64 // Pool miss counter (atomic) + + pool chan *OptimizedMessage + preallocated []*OptimizedMessage // Pre-allocated messages + preallocSize int + maxPoolSize int + mutex sync.RWMutex +} + +// NewGenericMessagePool creates a new generic message pool +func NewGenericMessagePool(size int) *GenericMessagePool { + pool := &GenericMessagePool{ + pool: make(chan *OptimizedMessage, size), + preallocSize: size / 4, // 25% pre-allocated for immediate use + maxPoolSize: size, + } + + // Pre-allocate some messages for immediate use + pool.preallocated = make([]*OptimizedMessage, pool.preallocSize) + for i := 0; i < pool.preallocSize; i++ { + pool.preallocated[i] = &OptimizedMessage{ + data: make([]byte, 0, Config.MaxFrameSize), + } + } + + // Fill the channel pool + for i := 0; i < size-pool.preallocSize; i++ { + select { + case pool.pool <- &OptimizedMessage{ + data: make([]byte, 0, Config.MaxFrameSize), + }: + default: + break + } + } + + return pool +} + +// Get retrieves an optimized message from the pool +func (mp *GenericMessagePool) Get() *OptimizedMessage { + // Try pre-allocated first (fastest path) + mp.mutex.Lock() + if len(mp.preallocated) > 0 { + msg := mp.preallocated[len(mp.preallocated)-1] + mp.preallocated = mp.preallocated[:len(mp.preallocated)-1] + mp.mutex.Unlock() + atomic.AddInt64(&mp.hitCount, 1) + return msg + } + mp.mutex.Unlock() + + // Try channel pool + select { + case msg := <-mp.pool: + atomic.AddInt64(&mp.hitCount, 1) + return msg + default: + // Pool empty, create new message + atomic.AddInt64(&mp.missCount, 1) + return &OptimizedMessage{ + data: make([]byte, 0, Config.MaxFrameSize), + } + } +} + +// Put returns an optimized message to the pool +func (mp *GenericMessagePool) Put(msg *OptimizedMessage) { + if msg == nil { + return + } + + // Reset the message for reuse + msg.data = msg.data[:0] + + // Try to return to pre-allocated slice first + mp.mutex.Lock() + if len(mp.preallocated) < mp.preallocSize { + mp.preallocated = append(mp.preallocated, msg) + mp.mutex.Unlock() + return + } + mp.mutex.Unlock() + + // Try to return to channel pool + select { + case mp.pool <- msg: + // Successfully returned to pool + default: + // Pool full, let GC handle it + } +} + +// GetStats returns pool statistics +func (mp *GenericMessagePool) GetStats() (hitCount, missCount int64, hitRate float64) { + hits := atomic.LoadInt64(&mp.hitCount) + misses := atomic.LoadInt64(&mp.missCount) + total := hits + misses + if total > 0 { + hitRate = float64(hits) / float64(total) * 100 + } + return hits, misses, hitRate +} + +// Helper functions + +// EncodeMessageHeader encodes a message header into a provided byte slice +func EncodeMessageHeader(header []byte, magic uint32, msgType uint8, length uint32, timestamp int64) { + binary.LittleEndian.PutUint32(header[0:4], magic) + header[4] = msgType + binary.LittleEndian.PutUint32(header[5:9], length) + binary.LittleEndian.PutUint64(header[9:17], uint64(timestamp)) +} + +// EncodeAudioConfig encodes basic audio configuration to binary format +func EncodeAudioConfig(sampleRate, channels, frameSize int) []byte { + data := make([]byte, 12) // 3 * int32 + binary.LittleEndian.PutUint32(data[0:4], uint32(sampleRate)) + binary.LittleEndian.PutUint32(data[4:8], uint32(channels)) + binary.LittleEndian.PutUint32(data[8:12], uint32(frameSize)) + return data +} + +// EncodeOpusConfig encodes complete Opus configuration to binary format +func EncodeOpusConfig(sampleRate, channels, frameSize, bitrate, complexity, vbr, signalType, bandwidth, dtx int) []byte { + data := make([]byte, 36) // 9 * int32 + binary.LittleEndian.PutUint32(data[0:4], uint32(sampleRate)) + binary.LittleEndian.PutUint32(data[4:8], uint32(channels)) + binary.LittleEndian.PutUint32(data[8:12], uint32(frameSize)) + binary.LittleEndian.PutUint32(data[12:16], uint32(bitrate)) + binary.LittleEndian.PutUint32(data[16:20], uint32(complexity)) + binary.LittleEndian.PutUint32(data[20:24], uint32(vbr)) + binary.LittleEndian.PutUint32(data[24:28], uint32(signalType)) + binary.LittleEndian.PutUint32(data[28:32], uint32(bandwidth)) + binary.LittleEndian.PutUint32(data[32:36], uint32(dtx)) + return data +} + +// Common write message function +func WriteIPCMessage(conn net.Conn, msg IPCMessage, pool *GenericMessagePool, droppedFramesCounter *int64) error { + if conn == nil { + return fmt.Errorf("connection is nil") + } + + // Get optimized message from pool for header preparation + optMsg := pool.Get() + defer pool.Put(optMsg) + + // Prepare header in pre-allocated buffer + EncodeMessageHeader(optMsg.header[:], msg.GetMagic(), msg.GetType(), msg.GetLength(), msg.GetTimestamp()) + + // Set write deadline for timeout handling (more efficient than goroutines) + if deadline := time.Now().Add(Config.WriteTimeout); deadline.After(time.Now()) { + if err := conn.SetWriteDeadline(deadline); err != nil { + // If we can't set deadline, proceed without it + _ = err // Explicitly ignore error for linter + } + } + + // Write header using pre-allocated buffer (synchronous for better performance) + _, err := conn.Write(optMsg.header[:]) + if err != nil { + if droppedFramesCounter != nil { + atomic.AddInt64(droppedFramesCounter, 1) + } + return err + } + + // Write data if present + if msg.GetLength() > 0 && msg.GetData() != nil { + _, err = conn.Write(msg.GetData()) + if err != nil { + if droppedFramesCounter != nil { + atomic.AddInt64(droppedFramesCounter, 1) + } + return err + } + } + + // Clear write deadline after successful write + _ = conn.SetWriteDeadline(time.Time{}) // Ignore error as this is cleanup + return nil +} + +// Common connection acceptance with retry logic +func AcceptConnectionWithRetry(listener net.Listener, maxRetries int, retryDelay time.Duration) (net.Conn, error) { + var lastErr error + for i := 0; i < maxRetries; i++ { + conn, err := listener.Accept() + if err == nil { + return conn, nil + } + lastErr = err + if i < maxRetries-1 { + time.Sleep(retryDelay) + } + } + return nil, fmt.Errorf("failed to accept connection after %d retries: %w", maxRetries, lastErr) +} + +// Common frame statistics structure +type FrameStats struct { + Total int64 + Dropped int64 +} + +// GetFrameStats safely retrieves frame statistics +func GetFrameStats(totalCounter, droppedCounter *int64) FrameStats { + return FrameStats{ + Total: atomic.LoadInt64(totalCounter), + Dropped: atomic.LoadInt64(droppedCounter), + } +} + +// CalculateDropRate calculates the drop rate percentage +func CalculateDropRate(stats FrameStats) float64 { + if stats.Total == 0 { + return 0.0 + } + return float64(stats.Dropped) / float64(stats.Total) * 100.0 +} + +// ResetFrameStats resets frame counters +func ResetFrameStats(totalCounter, droppedCounter *int64) { + atomic.StoreInt64(totalCounter, 0) + atomic.StoreInt64(droppedCounter, 0) +} diff --git a/internal/audio/ipc_input.go b/internal/audio/ipc_input.go new file mode 100644 index 00000000..0316eb3f --- /dev/null +++ b/internal/audio/ipc_input.go @@ -0,0 +1,285 @@ +package audio + +import ( + "fmt" + "net" + "sync" + "sync/atomic" + "time" + + "github.com/jetkvm/kvm/internal/logging" +) + +// Component name constant for logging +const ( + AudioInputClientComponent = "audio-input-client" +) + +// Constants are now defined in unified_ipc.go +var ( + maxFrameSize = Config.MaxFrameSize // Maximum Opus frame size + messagePoolSize = Config.MessagePoolSize // Pre-allocated message pool size +) + + +// AudioInputClient handles IPC communication from the main process +type AudioInputClient struct { + // Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) + droppedFrames int64 // Atomic counter for dropped frames + totalFrames int64 // Atomic counter for total frames + + conn net.Conn + mtx sync.Mutex + running bool +} + +// NewAudioInputClient creates a new audio input client +func NewAudioInputClient() *AudioInputClient { + return &AudioInputClient{} +} + +// Connect connects to the audio input server +func (aic *AudioInputClient) Connect() error { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if aic.running { + return nil // Already connected + } + + // Ensure clean state before connecting + if aic.conn != nil { + aic.conn.Close() + aic.conn = nil + } + + socketPath := getInputSocketPath() + // Try connecting multiple times as the server might not be ready + // Reduced retry count and delay for faster startup + for i := 0; i < 10; i++ { + conn, err := net.Dial("unix", socketPath) + if err == nil { + aic.conn = conn + aic.running = true + // Reset frame counters on successful connection + atomic.StoreInt64(&aic.totalFrames, 0) + atomic.StoreInt64(&aic.droppedFrames, 0) + return nil + } + // Exponential backoff starting from config + backoffStart := Config.BackoffStart + delay := time.Duration(backoffStart.Nanoseconds()*(1< maxDelay { + delay = maxDelay + } + time.Sleep(delay) + } + + // Ensure clean state on connection failure + aic.conn = nil + aic.running = false + return fmt.Errorf("failed to connect to audio input server after 10 attempts") +} + +// Disconnect disconnects from the audio input server +func (aic *AudioInputClient) Disconnect() { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if !aic.running { + return + } + + aic.running = false + + if aic.conn != nil { + // Send stop message + msg := &UnifiedIPCMessage{ + Magic: inputMagicNumber, + Type: MessageTypeStop, + Length: 0, + Timestamp: time.Now().UnixNano(), + } + _ = aic.writeMessage(msg) // Ignore errors during shutdown + + aic.conn.Close() + aic.conn = nil + } +} + +// SendFrame sends an Opus frame to the audio input server +func (aic *AudioInputClient) SendFrame(frame []byte) error { + // Fast path validation + if len(frame) == 0 { + return nil + } + + aic.mtx.Lock() + if !aic.running || aic.conn == nil { + aic.mtx.Unlock() + return fmt.Errorf("not connected") + } + + // Direct message creation without timestamp overhead + msg := &UnifiedIPCMessage{ + Magic: inputMagicNumber, + Type: MessageTypeOpusFrame, + Length: uint32(len(frame)), + Data: frame, + } + + err := aic.writeMessage(msg) + aic.mtx.Unlock() + return err +} + +// SendFrameZeroCopy sends a zero-copy Opus frame to the audio input server +func (aic *AudioInputClient) SendFrameZeroCopy(frame *ZeroCopyAudioFrame) error { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if !aic.running || aic.conn == nil { + return fmt.Errorf("not connected to audio input server") + } + + if frame == nil { + return nil // Nil frame, ignore + } + + frameLen := frame.Length() + if frameLen == 0 { + return nil // Empty frame, ignore + } + + // Inline frame validation to reduce function call overhead + if frameLen > maxFrameSize { + return ErrFrameDataTooLarge + } + + // Use zero-copy data directly + msg := &UnifiedIPCMessage{ + Magic: inputMagicNumber, + Type: MessageTypeOpusFrame, + Length: uint32(frameLen), + Timestamp: time.Now().UnixNano(), + Data: frame.Data(), // Zero-copy data access + } + + return aic.writeMessage(msg) +} + +// SendConfig sends a configuration update to the audio input server +func (aic *AudioInputClient) SendConfig(config UnifiedIPCConfig) error { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if !aic.running || aic.conn == nil { + return fmt.Errorf("not connected to audio input server") + } + + // Validate configuration parameters + if err := ValidateInputIPCConfig(config.SampleRate, config.Channels, config.FrameSize); err != nil { + logger := logging.GetDefaultLogger().With().Str("component", AudioInputClientComponent).Logger() + logger.Error().Err(err).Msg("Configuration validation failed") + return fmt.Errorf("input configuration validation failed: %w", err) + } + + // Serialize config using common function + data := EncodeAudioConfig(config.SampleRate, config.Channels, config.FrameSize) + + msg := &UnifiedIPCMessage{ + Magic: inputMagicNumber, + Type: MessageTypeConfig, + Length: uint32(len(data)), + Timestamp: time.Now().UnixNano(), + Data: data, + } + + return aic.writeMessage(msg) +} + +// SendOpusConfig sends a complete Opus encoder configuration update to the audio input server +func (aic *AudioInputClient) SendOpusConfig(config UnifiedIPCOpusConfig) error { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if !aic.running || aic.conn == nil { + return fmt.Errorf("not connected to audio input server") + } + + // Validate configuration parameters + if config.SampleRate <= 0 || config.Channels <= 0 || config.FrameSize <= 0 || config.Bitrate <= 0 { + return fmt.Errorf("invalid Opus configuration: SampleRate=%d, Channels=%d, FrameSize=%d, Bitrate=%d", + config.SampleRate, config.Channels, config.FrameSize, config.Bitrate) + } + + // Serialize Opus configuration using common function + data := EncodeOpusConfig(config.SampleRate, config.Channels, config.FrameSize, config.Bitrate, config.Complexity, config.VBR, config.SignalType, config.Bandwidth, config.DTX) + + msg := &UnifiedIPCMessage{ + Magic: inputMagicNumber, + Type: MessageTypeOpusConfig, + Length: uint32(len(data)), + Timestamp: time.Now().UnixNano(), + Data: data, + } + + return aic.writeMessage(msg) +} + +// SendHeartbeat sends a heartbeat message +func (aic *AudioInputClient) SendHeartbeat() error { + aic.mtx.Lock() + defer aic.mtx.Unlock() + + if !aic.running || aic.conn == nil { + return fmt.Errorf("not connected to audio input server") + } + + msg := &UnifiedIPCMessage{ + Magic: inputMagicNumber, + Type: MessageTypeHeartbeat, + Length: 0, + Timestamp: time.Now().UnixNano(), + } + + return aic.writeMessage(msg) +} + +// writeMessage writes a message to the server +// Global shared message pool for input IPC clients +var globalInputMessagePool = NewGenericMessagePool(messagePoolSize) + +func (aic *AudioInputClient) writeMessage(msg *UnifiedIPCMessage) error { + // Increment total frames counter + atomic.AddInt64(&aic.totalFrames, 1) + + // Use shared WriteIPCMessage function with global message pool + return WriteIPCMessage(aic.conn, msg, globalInputMessagePool, &aic.droppedFrames) +} + +// IsConnected returns whether the client is connected +func (aic *AudioInputClient) IsConnected() bool { + aic.mtx.Lock() + defer aic.mtx.Unlock() + return aic.running && aic.conn != nil +} + +// GetFrameStats returns frame statistics +func (aic *AudioInputClient) GetFrameStats() (total, dropped int64) { + stats := GetFrameStats(&aic.totalFrames, &aic.droppedFrames) + return stats.Total, stats.Dropped +} + +// GetDropRate returns the current frame drop rate as a percentage +func (aic *AudioInputClient) GetDropRate() float64 { + stats := GetFrameStats(&aic.totalFrames, &aic.droppedFrames) + return CalculateDropRate(stats) +} + +// ResetStats resets frame statistics +func (aic *AudioInputClient) ResetStats() { + ResetFrameStats(&aic.totalFrames, &aic.droppedFrames) +} + diff --git a/internal/audio/ipc_output.go b/internal/audio/ipc_output.go new file mode 100644 index 00000000..95dd61cb --- /dev/null +++ b/internal/audio/ipc_output.go @@ -0,0 +1,213 @@ +package audio + +import ( + "encoding/binary" + "fmt" + "io" + "net" + "sync" + "sync/atomic" + "time" + + "github.com/jetkvm/kvm/internal/logging" + "github.com/rs/zerolog" +) + +// Global shared message pool for output IPC client header reading +var globalOutputClientMessagePool = NewGenericMessagePool(Config.OutputMessagePoolSize) + +// AudioOutputClient provides audio output IPC client functionality +type AudioOutputClient struct { + droppedFrames int64 + totalFrames int64 + + conn net.Conn + mtx sync.Mutex + running bool + logger zerolog.Logger + socketPath string + magicNumber uint32 + bufferPool *AudioBufferPool + + autoReconnect bool +} + +func NewAudioOutputClient() *AudioOutputClient { + socketPath := getOutputSocketPath() + logger := logging.GetDefaultLogger().With().Str("component", "audio-output-client").Logger() + + return &AudioOutputClient{ + socketPath: socketPath, + magicNumber: Config.OutputMagicNumber, + logger: logger, + bufferPool: NewAudioBufferPool(Config.MaxFrameSize), + autoReconnect: true, + } +} + +// Connect connects to the audio output server +func (c *AudioOutputClient) Connect() error { + c.mtx.Lock() + defer c.mtx.Unlock() + + if c.running { + return fmt.Errorf("audio output client is already connected") + } + + conn, err := net.Dial("unix", c.socketPath) + if err != nil { + return fmt.Errorf("failed to connect to audio output server: %w", err) + } + + c.conn = conn + c.running = true + c.logger.Info().Str("socket_path", c.socketPath).Msg("Connected to audio output server") + return nil +} + +// Disconnect disconnects from the audio output server +func (c *AudioOutputClient) Disconnect() { + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.running { + return + } + + c.running = false + + if c.conn != nil { + c.conn.Close() + c.conn = nil + } + + c.logger.Info().Msg("Disconnected from audio output server") +} + +// IsConnected returns whether the client is connected +func (c *AudioOutputClient) IsConnected() bool { + c.mtx.Lock() + defer c.mtx.Unlock() + return c.running && c.conn != nil +} + +func (c *AudioOutputClient) ReceiveFrame() ([]byte, error) { + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.running || c.conn == nil { + return nil, fmt.Errorf("not connected to audio output server") + } + + // Get optimized message from pool for header reading + optMsg := globalOutputClientMessagePool.Get() + defer globalOutputClientMessagePool.Put(optMsg) + + // Read header + if _, err := io.ReadFull(c.conn, optMsg.header[:]); err != nil { + return nil, fmt.Errorf("failed to read IPC message header from audio output server: %w", err) + } + + // Parse header + magic := binary.LittleEndian.Uint32(optMsg.header[0:4]) + if magic != outputMagicNumber { + return nil, fmt.Errorf("invalid magic number in IPC message: got 0x%x, expected 0x%x", magic, outputMagicNumber) + } + + msgType := UnifiedMessageType(optMsg.header[4]) + if msgType != MessageTypeOpusFrame { + return nil, fmt.Errorf("unexpected message type: %d", msgType) + } + + size := binary.LittleEndian.Uint32(optMsg.header[5:9]) + timestamp := int64(binary.LittleEndian.Uint64(optMsg.header[9:17])) + maxFrameSize := Config.OutputMaxFrameSize + if int(size) > maxFrameSize { + return nil, fmt.Errorf("received frame size validation failed: got %d bytes, maximum allowed %d bytes", size, maxFrameSize) + } + + // Read frame data using buffer pool to avoid allocation + frame := c.bufferPool.Get() + frame = frame[:size] // Resize to actual frame size + if size > 0 { + if _, err := io.ReadFull(c.conn, frame); err != nil { + c.bufferPool.Put(frame) // Return buffer on error + return nil, fmt.Errorf("failed to read frame data: %w", err) + } + } + + // Note: Caller is responsible for returning frame to pool via PutAudioFrameBuffer() + + atomic.AddInt64(&c.totalFrames, 1) + + // Zero-cost trace logging for frame reception + if c.logger.GetLevel() <= zerolog.TraceLevel { + totalFrames := atomic.LoadInt64(&c.totalFrames) + if totalFrames <= 5 || totalFrames%1000 == 1 { + c.logger.Trace(). + Int("frame_size", int(size)). + Int64("timestamp", timestamp). + Int64("total_frames_received", totalFrames). + Msg("Received audio frame from output server") + } + } + + return frame, nil +} + +// SendOpusConfig sends Opus configuration to the audio output server +func (c *AudioOutputClient) SendOpusConfig(config UnifiedIPCOpusConfig) error { + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.running || c.conn == nil { + return fmt.Errorf("not connected to audio output server") + } + + // Validate configuration parameters + if config.SampleRate <= 0 || config.Channels <= 0 || config.FrameSize <= 0 || config.Bitrate <= 0 { + return fmt.Errorf("invalid Opus configuration: SampleRate=%d, Channels=%d, FrameSize=%d, Bitrate=%d", + config.SampleRate, config.Channels, config.FrameSize, config.Bitrate) + } + + // Serialize Opus configuration using common function + data := EncodeOpusConfig(config.SampleRate, config.Channels, config.FrameSize, config.Bitrate, config.Complexity, config.VBR, config.SignalType, config.Bandwidth, config.DTX) + + msg := &UnifiedIPCMessage{ + Magic: c.magicNumber, + Type: MessageTypeOpusConfig, + Length: uint32(len(data)), + Timestamp: time.Now().UnixNano(), + Data: data, + } + + return c.writeMessage(msg) +} + +// writeMessage writes a message to the connection +func (c *AudioOutputClient) writeMessage(msg *UnifiedIPCMessage) error { + header := make([]byte, 17) + EncodeMessageHeader(header, msg.Magic, uint8(msg.Type), msg.Length, msg.Timestamp) + + if _, err := c.conn.Write(header); err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + + if msg.Length > 0 && msg.Data != nil { + if _, err := c.conn.Write(msg.Data); err != nil { + return fmt.Errorf("failed to write data: %w", err) + } + } + + atomic.AddInt64(&c.totalFrames, 1) + return nil +} + +// GetClientStats returns client performance statistics +func (c *AudioOutputClient) GetClientStats() (total, dropped int64) { + stats := GetFrameStats(&c.totalFrames, &c.droppedFrames) + return stats.Total, stats.Dropped +} + +// Helper functions +// getOutputSocketPath is defined in ipc_unified.go diff --git a/internal/audio/ipc_unified.go b/internal/audio/ipc_unified.go new file mode 100644 index 00000000..5e42d388 --- /dev/null +++ b/internal/audio/ipc_unified.go @@ -0,0 +1,681 @@ +package audio + +import ( + "encoding/binary" + "fmt" + "io" + "math" + "net" + "os" + "path/filepath" + "strings" + "sync" + "sync/atomic" + "time" + + "github.com/jetkvm/kvm/internal/logging" + "github.com/rs/zerolog" +) + +// Unified IPC constants +var ( + outputMagicNumber uint32 = Config.OutputMagicNumber // "JKOU" (JetKVM Output) + inputMagicNumber uint32 = Config.InputMagicNumber // "JKMI" (JetKVM Microphone Input) + outputSocketName = "audio_output.sock" + inputSocketName = "audio_input.sock" + headerSize = 17 // Fixed header size: 4+1+4+8 bytes +) + +// Header buffer pool to reduce allocation overhead +var headerBufferPool = sync.Pool{ + New: func() interface{} { + buf := make([]byte, headerSize) + return &buf + }, +} + +// UnifiedMessageType represents the type of IPC message for both input and output +type UnifiedMessageType uint8 + +const ( + MessageTypeOpusFrame UnifiedMessageType = iota + MessageTypeConfig + MessageTypeOpusConfig + MessageTypeStop + MessageTypeHeartbeat + MessageTypeAck +) + +// UnifiedIPCMessage represents a message sent over IPC for both input and output +type UnifiedIPCMessage struct { + Magic uint32 + Type UnifiedMessageType + Length uint32 + Timestamp int64 + Data []byte +} + +// Implement IPCMessage interface +func (msg *UnifiedIPCMessage) GetMagic() uint32 { + return msg.Magic +} + +func (msg *UnifiedIPCMessage) GetType() uint8 { + return uint8(msg.Type) +} + +func (msg *UnifiedIPCMessage) GetLength() uint32 { + return msg.Length +} + +func (msg *UnifiedIPCMessage) GetTimestamp() int64 { + return msg.Timestamp +} + +func (msg *UnifiedIPCMessage) GetData() []byte { + return msg.Data +} + +// UnifiedIPCConfig represents configuration for audio +type UnifiedIPCConfig struct { + SampleRate int + Channels int + FrameSize int +} + +// UnifiedIPCOpusConfig represents Opus-specific configuration +type UnifiedIPCOpusConfig struct { + SampleRate int + Channels int + FrameSize int + Bitrate int + Complexity int + VBR int + SignalType int + Bandwidth int + DTX int +} + +// UnifiedAudioServer provides common functionality for both input and output servers +type UnifiedAudioServer struct { + // Atomic counters for performance monitoring + droppedFrames int64 // Dropped frames counter (atomic) + totalFrames int64 // Total frames counter (atomic) + + listener net.Listener + conn net.Conn + mtx sync.Mutex + running bool + logger zerolog.Logger + + // Message channels + messageChan chan *UnifiedIPCMessage // Buffered channel for incoming messages + processChan chan *UnifiedIPCMessage // Buffered channel for processing queue + wg sync.WaitGroup // Wait group for goroutine coordination + + // Configuration + socketPath string + magicNumber uint32 + sendBufferSize int + recvBufferSize int +} + +// NewUnifiedAudioServer creates a new unified audio server +func NewUnifiedAudioServer(isInput bool) (*UnifiedAudioServer, error) { + var socketPath string + var magicNumber uint32 + var componentName string + + if isInput { + socketPath = getInputSocketPath() + magicNumber = inputMagicNumber + componentName = "audio-input-server" + } else { + socketPath = getOutputSocketPath() + magicNumber = outputMagicNumber + componentName = "audio-output-server" + } + + logger := logging.GetDefaultLogger().With().Str("component", componentName).Logger() + + server := &UnifiedAudioServer{ + logger: logger, + socketPath: socketPath, + magicNumber: magicNumber, + messageChan: make(chan *UnifiedIPCMessage, Config.ChannelBufferSize), + processChan: make(chan *UnifiedIPCMessage, Config.ChannelBufferSize), + sendBufferSize: Config.SocketOptimalBuffer, + recvBufferSize: Config.SocketOptimalBuffer, + } + + return server, nil +} + +// Start starts the unified audio server +func (s *UnifiedAudioServer) Start() error { + s.mtx.Lock() + defer s.mtx.Unlock() + + if s.running { + return fmt.Errorf("server already running") + } + + // Remove existing socket file with retry logic + for i := 0; i < 3; i++ { + if err := os.Remove(s.socketPath); err != nil && !os.IsNotExist(err) { + s.logger.Warn().Err(err).Int("attempt", i+1).Msg("failed to remove existing socket file, retrying") + time.Sleep(100 * time.Millisecond) + continue + } + break + } + + // Create listener with retry on address already in use + var listener net.Listener + var err error + for i := 0; i < 3; i++ { + listener, err = net.Listen("unix", s.socketPath) + if err == nil { + break + } + + // If address is still in use, try to remove socket file again + if strings.Contains(err.Error(), "address already in use") { + s.logger.Warn().Err(err).Int("attempt", i+1).Msg("socket address in use, attempting cleanup and retry") + os.Remove(s.socketPath) + time.Sleep(200 * time.Millisecond) + continue + } + + return fmt.Errorf("failed to create unix socket: %w", err) + } + + if err != nil { + return fmt.Errorf("failed to create unix socket after retries: %w", err) + } + + s.listener = listener + s.running = true + + // Start goroutines + s.wg.Add(3) + go s.acceptConnections() + go s.startReaderGoroutine() + go s.startProcessorGoroutine() + + s.logger.Info().Str("socket_path", s.socketPath).Msg("Unified audio server started") + return nil +} + +// Stop stops the unified audio server +func (s *UnifiedAudioServer) Stop() { + s.mtx.Lock() + defer s.mtx.Unlock() + + if !s.running { + return + } + + s.running = false + + if s.listener != nil { + s.listener.Close() + } + + if s.conn != nil { + s.conn.Close() + } + + // Close channels + close(s.messageChan) + close(s.processChan) + + // Wait for goroutines to finish + s.wg.Wait() + + // Remove socket file + os.Remove(s.socketPath) + + s.logger.Info().Msg("Unified audio server stopped") +} + +// acceptConnections handles incoming connections +func (s *UnifiedAudioServer) acceptConnections() { + defer s.wg.Done() + + for s.running { + conn, err := AcceptConnectionWithRetry(s.listener, 3, 100*time.Millisecond) + if err != nil { + if s.running { + s.logger.Error().Err(err).Msg("Failed to accept connection") + } + continue + } + + s.mtx.Lock() + if s.conn != nil { + s.conn.Close() + } + s.conn = conn + s.mtx.Unlock() + + s.logger.Info().Msg("Client connected") + } +} + +// startReaderGoroutine handles reading messages from the connection +func (s *UnifiedAudioServer) startReaderGoroutine() { + defer s.wg.Done() + + for s.running { + s.mtx.Lock() + conn := s.conn + s.mtx.Unlock() + + if conn == nil { + time.Sleep(10 * time.Millisecond) + continue + } + + msg, err := s.readMessage(conn) + if err != nil { + if s.running { + s.logger.Error().Err(err).Msg("Failed to read message") + } + continue + } + + select { + case s.messageChan <- msg: + default: + atomic.AddInt64(&s.droppedFrames, 1) + s.logger.Warn().Msg("Message channel full, dropping message") + } + } +} + +// startProcessorGoroutine handles processing messages +func (s *UnifiedAudioServer) startProcessorGoroutine() { + defer s.wg.Done() + + for msg := range s.messageChan { + select { + case s.processChan <- msg: + atomic.AddInt64(&s.totalFrames, 1) + default: + atomic.AddInt64(&s.droppedFrames, 1) + s.logger.Warn().Msg("Process channel full, dropping message") + } + } +} + +// readMessage reads a message from the connection +func (s *UnifiedAudioServer) readMessage(conn net.Conn) (*UnifiedIPCMessage, error) { + // Get header buffer from pool + headerPtr := headerBufferPool.Get().(*[]byte) + header := *headerPtr + defer headerBufferPool.Put(headerPtr) + + if _, err := io.ReadFull(conn, header); err != nil { + return nil, fmt.Errorf("failed to read header: %w", err) + } + + // Parse header + magic := binary.LittleEndian.Uint32(header[0:4]) + if magic != s.magicNumber { + return nil, fmt.Errorf("invalid magic number: expected %d, got %d", s.magicNumber, magic) + } + + msgType := UnifiedMessageType(header[4]) + length := binary.LittleEndian.Uint32(header[5:9]) + timestamp := int64(binary.LittleEndian.Uint64(header[9:17])) + + // Validate length + if length > uint32(Config.MaxFrameSize) { + return nil, fmt.Errorf("message too large: %d bytes", length) + } + + // Read data + var data []byte + if length > 0 { + data = make([]byte, length) + if _, err := io.ReadFull(conn, data); err != nil { + return nil, fmt.Errorf("failed to read data: %w", err) + } + } + + return &UnifiedIPCMessage{ + Magic: magic, + Type: msgType, + Length: length, + Timestamp: timestamp, + Data: data, + }, nil +} + +// SendFrame sends a frame to the connected client +func (s *UnifiedAudioServer) SendFrame(frame []byte) error { + s.mtx.Lock() + defer s.mtx.Unlock() + + if !s.running || s.conn == nil { + // Silently drop frames when no client is connected + // This prevents "no client connected" warnings during startup and quality changes + atomic.AddInt64(&s.droppedFrames, 1) + return nil // Return nil to avoid flooding logs with connection warnings + } + + start := time.Now() + + // Create message + msg := &UnifiedIPCMessage{ + Magic: s.magicNumber, + Type: MessageTypeOpusFrame, + Length: uint32(len(frame)), + Timestamp: start.UnixNano(), + Data: frame, + } + + // Write message to connection + err := s.writeMessage(s.conn, msg) + if err != nil { + atomic.AddInt64(&s.droppedFrames, 1) + return err + } + + // Record latency for monitoring + + atomic.AddInt64(&s.totalFrames, 1) + return nil +} + +// writeMessage writes a message to the connection +func (s *UnifiedAudioServer) writeMessage(conn net.Conn, msg *UnifiedIPCMessage) error { + header := make([]byte, 17) + EncodeMessageHeader(header, msg.Magic, uint8(msg.Type), msg.Length, msg.Timestamp) + + // Optimize: Use single write for header+data to reduce system calls + if msg.Length > 0 && msg.Data != nil { + // Pre-allocate combined buffer to avoid copying + combined := make([]byte, len(header)+len(msg.Data)) + copy(combined, header) + copy(combined[len(header):], msg.Data) + if _, err := conn.Write(combined); err != nil { + return fmt.Errorf("failed to write message: %w", err) + } + } else { + if _, err := conn.Write(header); err != nil { + return fmt.Errorf("failed to write header: %w", err) + } + } + + return nil +} + +// UnifiedAudioClient provides common functionality for both input and output clients +type UnifiedAudioClient struct { + // Atomic counters for frame statistics + droppedFrames int64 // Atomic counter for dropped frames + totalFrames int64 // Atomic counter for total frames + + conn net.Conn + mtx sync.Mutex + running bool + logger zerolog.Logger + socketPath string + magicNumber uint32 + bufferPool *AudioBufferPool // Buffer pool for memory optimization + + // Connection health monitoring + lastHealthCheck time.Time + connectionErrors int64 // Atomic counter for connection errors + autoReconnect bool // Enable automatic reconnection + healthCheckTicker *time.Ticker + stopHealthCheck chan struct{} +} + +// NewUnifiedAudioClient creates a new unified audio client +func NewUnifiedAudioClient(isInput bool) *UnifiedAudioClient { + var socketPath string + var magicNumber uint32 + var componentName string + + if isInput { + socketPath = getInputSocketPath() + magicNumber = inputMagicNumber + componentName = "audio-input-client" + } else { + socketPath = getOutputSocketPath() + magicNumber = outputMagicNumber + componentName = "audio-output-client" + } + + logger := logging.GetDefaultLogger().With().Str("component", componentName).Logger() + + return &UnifiedAudioClient{ + logger: logger, + socketPath: socketPath, + magicNumber: magicNumber, + bufferPool: NewAudioBufferPool(Config.MaxFrameSize), + autoReconnect: true, // Enable automatic reconnection by default + stopHealthCheck: make(chan struct{}), + } +} + +// Connect connects the client to the server +func (c *UnifiedAudioClient) Connect() error { + c.mtx.Lock() + defer c.mtx.Unlock() + + if c.running { + return nil // Already connected + } + + // Ensure clean state before connecting + if c.conn != nil { + c.conn.Close() + c.conn = nil + } + + // Try connecting multiple times as the server might not be ready + // Use configurable retry parameters for better control + maxAttempts := Config.MaxConnectionAttempts + initialDelay := Config.ConnectionRetryDelay + maxDelay := Config.MaxConnectionRetryDelay + backoffFactor := Config.ConnectionBackoffFactor + + for i := 0; i < maxAttempts; i++ { + // Set connection timeout for each attempt + conn, err := net.DialTimeout("unix", c.socketPath, Config.ConnectionTimeoutDelay) + if err == nil { + c.conn = conn + c.running = true + // Reset frame counters on successful connection + atomic.StoreInt64(&c.totalFrames, 0) + atomic.StoreInt64(&c.droppedFrames, 0) + atomic.StoreInt64(&c.connectionErrors, 0) + c.lastHealthCheck = time.Now() + // Start health check monitoring if auto-reconnect is enabled + if c.autoReconnect { + c.startHealthCheck() + } + c.logger.Info().Str("socket_path", c.socketPath).Int("attempt", i+1).Msg("Connected to server") + return nil + } + + // Log connection attempt failure + c.logger.Debug().Err(err).Str("socket_path", c.socketPath).Int("attempt", i+1).Int("max_attempts", maxAttempts).Msg("Connection attempt failed") + + // Don't sleep after the last attempt + if i < maxAttempts-1 { + // Calculate adaptive delay based on connection failure patterns + delay := c.calculateAdaptiveDelay(i, initialDelay, maxDelay, backoffFactor) + time.Sleep(delay) + } + } + + // Ensure clean state on connection failure + c.conn = nil + c.running = false + return fmt.Errorf("failed to connect to audio server after %d attempts", Config.MaxConnectionAttempts) +} + +// Disconnect disconnects the client from the server +func (c *UnifiedAudioClient) Disconnect() { + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.running { + return + } + + c.running = false + + // Stop health check monitoring + c.stopHealthCheckMonitoring() + + if c.conn != nil { + c.conn.Close() + c.conn = nil + } + + c.logger.Info().Msg("Disconnected from server") +} + +// IsConnected returns whether the client is connected +func (c *UnifiedAudioClient) IsConnected() bool { + c.mtx.Lock() + defer c.mtx.Unlock() + return c.running && c.conn != nil +} + +// GetFrameStats returns frame statistics +func (c *UnifiedAudioClient) GetFrameStats() (total, dropped int64) { + total = atomic.LoadInt64(&c.totalFrames) + dropped = atomic.LoadInt64(&c.droppedFrames) + return +} + +// startHealthCheck starts the connection health monitoring +func (c *UnifiedAudioClient) startHealthCheck() { + if c.healthCheckTicker != nil { + c.healthCheckTicker.Stop() + } + + c.healthCheckTicker = time.NewTicker(Config.HealthCheckInterval) + go func() { + for { + select { + case <-c.healthCheckTicker.C: + c.performHealthCheck() + case <-c.stopHealthCheck: + return + } + } + }() +} + +// stopHealthCheckMonitoring stops the health check monitoring +func (c *UnifiedAudioClient) stopHealthCheckMonitoring() { + if c.healthCheckTicker != nil { + c.healthCheckTicker.Stop() + c.healthCheckTicker = nil + } + select { + case c.stopHealthCheck <- struct{}{}: + default: + } +} + +// performHealthCheck checks the connection health and attempts reconnection if needed +func (c *UnifiedAudioClient) performHealthCheck() { + c.mtx.Lock() + defer c.mtx.Unlock() + + if !c.running || c.conn == nil { + return + } + + // Simple health check: try to get connection info + if tcpConn, ok := c.conn.(*net.UnixConn); ok { + if _, err := tcpConn.File(); err != nil { + // Connection is broken + atomic.AddInt64(&c.connectionErrors, 1) + c.logger.Warn().Err(err).Msg("Connection health check failed, attempting reconnection") + + // Close the broken connection + c.conn.Close() + c.conn = nil + c.running = false + + // Attempt reconnection + go func() { + time.Sleep(Config.ReconnectionInterval) + if err := c.Connect(); err != nil { + c.logger.Error().Err(err).Msg("Failed to reconnect during health check") + } + }() + } + } + + c.lastHealthCheck = time.Now() +} + +// SetAutoReconnect enables or disables automatic reconnection +func (c *UnifiedAudioClient) SetAutoReconnect(enabled bool) { + c.mtx.Lock() + defer c.mtx.Unlock() + + c.autoReconnect = enabled + if !enabled { + c.stopHealthCheckMonitoring() + } else if c.running { + c.startHealthCheck() + } +} + +// GetConnectionErrors returns the number of connection errors +func (c *UnifiedAudioClient) GetConnectionErrors() int64 { + return atomic.LoadInt64(&c.connectionErrors) +} + +// calculateAdaptiveDelay calculates retry delay based on system load and failure patterns +func (c *UnifiedAudioClient) calculateAdaptiveDelay(attempt int, initialDelay, maxDelay time.Duration, backoffFactor float64) time.Duration { + // Base exponential backoff + baseDelay := time.Duration(float64(initialDelay.Nanoseconds()) * math.Pow(backoffFactor, float64(attempt))) + + // Get connection error history for adaptive adjustment + errorCount := atomic.LoadInt64(&c.connectionErrors) + + // Adjust delay based on recent connection errors + // More errors = longer delays to avoid overwhelming the server + adaptiveFactor := 1.0 + if errorCount > 5 { + adaptiveFactor = 1.5 // 50% longer delays after many errors + } else if errorCount > 10 { + adaptiveFactor = 2.0 // Double delays after excessive errors + } + + // Apply adaptive factor + adaptiveDelay := time.Duration(float64(baseDelay.Nanoseconds()) * adaptiveFactor) + + // Ensure we don't exceed maximum delay + if adaptiveDelay > maxDelay { + adaptiveDelay = maxDelay + } + + // Add small random jitter to avoid thundering herd + jitter := time.Duration(float64(adaptiveDelay.Nanoseconds()) * 0.1 * (0.5 + float64(attempt%3)/6.0)) + adaptiveDelay += jitter + + return adaptiveDelay +} + +// Helper functions for socket paths +func getInputSocketPath() string { + return filepath.Join("/var/run", inputSocketName) +} + +func getOutputSocketPath() string { + return filepath.Join("/var/run", outputSocketName) +} diff --git a/internal/audio/mgmt_base_manager.go b/internal/audio/mgmt_base_manager.go new file mode 100644 index 00000000..2d52883b --- /dev/null +++ b/internal/audio/mgmt_base_manager.go @@ -0,0 +1,97 @@ +package audio + +import ( + "sync/atomic" + "time" + + "github.com/rs/zerolog" +) + +// BaseAudioMetrics provides common metrics fields for both input and output +// Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) +type BaseAudioMetrics struct { + // Atomic int64 fields first for proper ARM32 alignment + FramesProcessed int64 `json:"frames_processed"` + FramesDropped int64 `json:"frames_dropped"` + BytesProcessed int64 `json:"bytes_processed"` + ConnectionDrops int64 `json:"connection_drops"` + + // Non-atomic fields after atomic fields + LastFrameTime time.Time `json:"last_frame_time"` + AverageLatency time.Duration `json:"average_latency"` +} + +// BaseAudioManager provides common functionality for audio managers +type BaseAudioManager struct { + // Core metrics and state + metrics BaseAudioMetrics + logger zerolog.Logger + running int32 +} + +// NewBaseAudioManager creates a new base audio manager +func NewBaseAudioManager(logger zerolog.Logger) *BaseAudioManager { + return &BaseAudioManager{ + logger: logger, + } +} + +// IsRunning returns whether the manager is running +func (bam *BaseAudioManager) IsRunning() bool { + return atomic.LoadInt32(&bam.running) == 1 +} + +// setRunning atomically sets the running state +func (bam *BaseAudioManager) setRunning(running bool) bool { + if running { + return atomic.CompareAndSwapInt32(&bam.running, 0, 1) + } + return atomic.CompareAndSwapInt32(&bam.running, 1, 0) +} + +// resetMetrics resets all metrics to zero +func (bam *BaseAudioManager) resetMetrics() { + atomic.StoreInt64(&bam.metrics.FramesProcessed, 0) + atomic.StoreInt64(&bam.metrics.FramesDropped, 0) + atomic.StoreInt64(&bam.metrics.BytesProcessed, 0) + atomic.StoreInt64(&bam.metrics.ConnectionDrops, 0) + bam.metrics.LastFrameTime = time.Time{} + bam.metrics.AverageLatency = 0 +} + +// getBaseMetrics returns a copy of the base metrics +func (bam *BaseAudioManager) getBaseMetrics() BaseAudioMetrics { + return BaseAudioMetrics{ + FramesProcessed: atomic.LoadInt64(&bam.metrics.FramesProcessed), + FramesDropped: atomic.LoadInt64(&bam.metrics.FramesDropped), + BytesProcessed: atomic.LoadInt64(&bam.metrics.BytesProcessed), + ConnectionDrops: atomic.LoadInt64(&bam.metrics.ConnectionDrops), + LastFrameTime: bam.metrics.LastFrameTime, + AverageLatency: bam.metrics.AverageLatency, + } +} + +// logComponentStart logs component start with consistent format +func (bam *BaseAudioManager) logComponentStart(component string) { + bam.logger.Debug().Str("component", component).Msg("starting component") +} + +// logComponentStarted logs component started with consistent format +func (bam *BaseAudioManager) logComponentStarted(component string) { + bam.logger.Debug().Str("component", component).Msg("component started successfully") +} + +// logComponentStop logs component stop with consistent format +func (bam *BaseAudioManager) logComponentStop(component string) { + bam.logger.Debug().Str("component", component).Msg("stopping component") +} + +// logComponentStopped logs component stopped with consistent format +func (bam *BaseAudioManager) logComponentStopped(component string) { + bam.logger.Debug().Str("component", component).Msg("component stopped") +} + +// logComponentError logs component error with consistent format +func (bam *BaseAudioManager) logComponentError(component string, err error, msg string) { + bam.logger.Error().Err(err).Str("component", component).Msg(msg) +} diff --git a/internal/audio/mgmt_base_supervisor.go b/internal/audio/mgmt_base_supervisor.go new file mode 100644 index 00000000..bface756 --- /dev/null +++ b/internal/audio/mgmt_base_supervisor.go @@ -0,0 +1,342 @@ +//go:build cgo +// +build cgo + +package audio + +import ( + "context" + "os/exec" + "sync" + "sync/atomic" + "syscall" + "time" + + "github.com/jetkvm/kvm/internal/logging" + "github.com/rs/zerolog" +) + +// BaseSupervisor provides common functionality for audio supervisors +type BaseSupervisor struct { + ctx context.Context + cancel context.CancelFunc + logger *zerolog.Logger + mutex sync.RWMutex + running int32 + + // Process management + cmd *exec.Cmd + processPID int + + // Process monitoring + + // Exit tracking + lastExitCode int + lastExitTime time.Time + + // Channel management + stopChan chan struct{} + processDone chan struct{} + stopChanClosed bool + processDoneClosed bool +} + +// NewBaseSupervisor creates a new base supervisor +func NewBaseSupervisor(componentName string) *BaseSupervisor { + logger := logging.GetDefaultLogger().With().Str("component", componentName).Logger() + return &BaseSupervisor{ + logger: &logger, + + stopChan: make(chan struct{}), + processDone: make(chan struct{}), + } +} + +// IsRunning returns whether the supervisor is currently running +func (bs *BaseSupervisor) IsRunning() bool { + return atomic.LoadInt32(&bs.running) == 1 +} + +// GetProcessPID returns the current process PID +func (bs *BaseSupervisor) GetProcessPID() int { + bs.mutex.RLock() + defer bs.mutex.RUnlock() + return bs.processPID +} + +// GetLastExitInfo returns the last exit code and time +func (bs *BaseSupervisor) GetLastExitInfo() (exitCode int, exitTime time.Time) { + bs.mutex.RLock() + defer bs.mutex.RUnlock() + return bs.lastExitCode, bs.lastExitTime +} + +// logSupervisorStart logs supervisor start event +func (bs *BaseSupervisor) logSupervisorStart() { + bs.logger.Info().Msg("Supervisor starting") +} + +// logSupervisorStop logs supervisor stop event +func (bs *BaseSupervisor) logSupervisorStop() { + bs.logger.Info().Msg("Supervisor stopping") +} + +// createContext creates a new context for the supervisor +func (bs *BaseSupervisor) createContext() { + bs.ctx, bs.cancel = context.WithCancel(context.Background()) +} + +// cancelContext cancels the supervisor context +func (bs *BaseSupervisor) cancelContext() { + if bs.cancel != nil { + bs.cancel() + } +} + +// initializeChannels recreates channels for a new supervision cycle +func (bs *BaseSupervisor) initializeChannels() { + bs.mutex.Lock() + defer bs.mutex.Unlock() + + bs.stopChan = make(chan struct{}) + bs.processDone = make(chan struct{}) + bs.stopChanClosed = false + bs.processDoneClosed = false +} + +// closeStopChan safely closes the stop channel +func (bs *BaseSupervisor) closeStopChan() { + bs.mutex.Lock() + defer bs.mutex.Unlock() + + if !bs.stopChanClosed { + close(bs.stopChan) + bs.stopChanClosed = true + } +} + +// closeProcessDone safely closes the process done channel +func (bs *BaseSupervisor) closeProcessDone() { + bs.mutex.Lock() + defer bs.mutex.Unlock() + + if !bs.processDoneClosed { + close(bs.processDone) + bs.processDoneClosed = true + } +} + +// terminateProcess gracefully terminates the current process with configurable timeout +func (bs *BaseSupervisor) terminateProcess(timeout time.Duration, processType string) { + bs.mutex.RLock() + cmd := bs.cmd + pid := bs.processPID + bs.mutex.RUnlock() + + if cmd == nil || cmd.Process == nil { + return + } + + bs.logger.Info().Int("pid", pid).Msgf("terminating %s process", processType) + + // Send SIGTERM first + if err := cmd.Process.Signal(syscall.SIGTERM); err != nil { + bs.logger.Warn().Err(err).Int("pid", pid).Msgf("failed to send SIGTERM to %s process", processType) + } + + // Wait for graceful shutdown + done := make(chan struct{}) + go func() { + _ = cmd.Wait() + close(done) + }() + + select { + case <-done: + bs.logger.Info().Int("pid", pid).Msgf("%s process terminated gracefully", processType) + case <-time.After(timeout): + bs.logger.Warn().Int("pid", pid).Msg("process did not terminate gracefully, sending SIGKILL") + bs.forceKillProcess(processType) + } +} + +// forceKillProcess forcefully kills the current process +func (bs *BaseSupervisor) forceKillProcess(processType string) { + bs.mutex.RLock() + cmd := bs.cmd + pid := bs.processPID + bs.mutex.RUnlock() + + if cmd == nil || cmd.Process == nil { + return + } + + bs.logger.Warn().Int("pid", pid).Msgf("force killing %s process", processType) + if err := cmd.Process.Kill(); err != nil { + bs.logger.Error().Err(err).Int("pid", pid).Msg("failed to kill process") + } +} + +// waitForProcessExit waits for the current process to exit and logs the result +func (bs *BaseSupervisor) waitForProcessExit(processType string) { + bs.mutex.RLock() + cmd := bs.cmd + pid := bs.processPID + bs.mutex.RUnlock() + + if cmd == nil { + return + } + + // Wait for process to exit + err := cmd.Wait() + + bs.mutex.Lock() + bs.lastExitTime = time.Now() + bs.processPID = 0 + + var exitCode int + if err != nil { + if exitError, ok := err.(*exec.ExitError); ok { + exitCode = exitError.ExitCode() + } else { + // Process was killed or other error + exitCode = -1 + } + } else { + exitCode = 0 + } + + bs.lastExitCode = exitCode + bs.mutex.Unlock() + + // Remove process from monitoring + + if exitCode != 0 { + bs.logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msgf("%s process exited with error", processType) + } else { + bs.logger.Info().Int("pid", pid).Msgf("%s process exited gracefully", processType) + } +} + +// SupervisionConfig holds configuration for the supervision loop +type SupervisionConfig struct { + ProcessType string + Timeout time.Duration + EnableRestart bool + MaxRestartAttempts int + RestartWindow time.Duration + RestartDelay time.Duration + MaxRestartDelay time.Duration +} + +// ProcessCallbacks holds callback functions for process lifecycle events +type ProcessCallbacks struct { + OnProcessStart func(pid int) + OnProcessExit func(pid int, exitCode int, crashed bool) + OnRestart func(attempt int, delay time.Duration) +} + +// SupervisionLoop provides a template for supervision loops that can be extended by specific supervisors +func (bs *BaseSupervisor) SupervisionLoop( + config SupervisionConfig, + callbacks ProcessCallbacks, + startProcessFunc func() error, + shouldRestartFunc func() bool, + calculateDelayFunc func() time.Duration, +) { + defer func() { + bs.closeProcessDone() + bs.logger.Info().Msgf("%s supervision ended", config.ProcessType) + }() + + for atomic.LoadInt32(&bs.running) == 1 { + select { + case <-bs.stopChan: + bs.logger.Info().Msg("received stop signal") + bs.terminateProcess(config.Timeout, config.ProcessType) + return + case <-bs.ctx.Done(): + bs.logger.Info().Msg("context cancelled") + bs.terminateProcess(config.Timeout, config.ProcessType) + return + default: + // Start or restart the process + if err := startProcessFunc(); err != nil { + bs.logger.Error().Err(err).Msgf("failed to start %s process", config.ProcessType) + + // Check if we should attempt restart (only if restart is enabled) + if !config.EnableRestart || !shouldRestartFunc() { + bs.logger.Error().Msgf("maximum restart attempts exceeded or restart disabled, stopping %s supervisor", config.ProcessType) + return + } + + delay := calculateDelayFunc() + bs.logger.Warn().Dur("delay", delay).Msgf("retrying %s process start after delay", config.ProcessType) + + if callbacks.OnRestart != nil { + callbacks.OnRestart(0, delay) // 0 indicates start failure, not exit restart + } + + select { + case <-time.After(delay): + case <-bs.stopChan: + return + case <-bs.ctx.Done(): + return + } + continue + } + + // Wait for process to exit + bs.waitForProcessExitWithCallback(config.ProcessType, callbacks) + + // Check if we should restart (only if restart is enabled) + if !config.EnableRestart { + bs.logger.Info().Msgf("%s process completed, restart disabled", config.ProcessType) + return + } + + if !shouldRestartFunc() { + bs.logger.Error().Msgf("maximum restart attempts exceeded, stopping %s supervisor", config.ProcessType) + return + } + + // Calculate restart delay + delay := calculateDelayFunc() + bs.logger.Info().Dur("delay", delay).Msgf("restarting %s process after delay", config.ProcessType) + + if callbacks.OnRestart != nil { + callbacks.OnRestart(1, delay) // 1 indicates restart after exit + } + + // Wait for restart delay + select { + case <-time.After(delay): + case <-bs.stopChan: + return + case <-bs.ctx.Done(): + return + } + } + } +} + +// waitForProcessExitWithCallback extends waitForProcessExit with callback support +func (bs *BaseSupervisor) waitForProcessExitWithCallback(processType string, callbacks ProcessCallbacks) { + bs.mutex.RLock() + pid := bs.processPID + bs.mutex.RUnlock() + + // Use the base waitForProcessExit logic + bs.waitForProcessExit(processType) + + // Handle callbacks if provided + if callbacks.OnProcessExit != nil { + bs.mutex.RLock() + exitCode := bs.lastExitCode + bs.mutex.RUnlock() + + crashed := exitCode != 0 + callbacks.OnProcessExit(pid, exitCode, crashed) + } +} diff --git a/internal/audio/output_supervisor.go b/internal/audio/output_supervisor.go new file mode 100644 index 00000000..310c07fe --- /dev/null +++ b/internal/audio/output_supervisor.go @@ -0,0 +1,316 @@ +//go:build cgo +// +build cgo + +package audio + +import ( + "fmt" + "os" + "os/exec" + "strconv" + "sync/atomic" + "time" +) + +// Component name constants for logging +const ( + AudioOutputSupervisorComponent = "audio-output-supervisor" +) + +// AudioOutputSupervisor manages the audio output server subprocess lifecycle +type AudioOutputSupervisor struct { + *BaseSupervisor + + // Restart management + restartAttempts []time.Time + + // Environment variables for OPUS configuration + opusEnv []string + + // Callbacks + onProcessStart func(pid int) + onProcessExit func(pid int, exitCode int, crashed bool) + onRestart func(attempt int, delay time.Duration) +} + +// NewAudioOutputSupervisor creates a new audio output server supervisor +func NewAudioOutputSupervisor() *AudioOutputSupervisor { + return &AudioOutputSupervisor{ + BaseSupervisor: NewBaseSupervisor("audio-output-supervisor"), + restartAttempts: make([]time.Time, 0), + } +} + +// SetCallbacks sets optional callbacks for process lifecycle events +func (s *AudioOutputSupervisor) SetCallbacks( + onStart func(pid int), + onExit func(pid int, exitCode int, crashed bool), + onRestart func(attempt int, delay time.Duration), +) { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.onProcessStart = onStart + + // Wrap the exit callback to include restart tracking + if onExit != nil { + s.onProcessExit = func(pid int, exitCode int, crashed bool) { + if crashed { + s.recordRestartAttempt() + } + onExit(pid, exitCode, crashed) + } + } else { + s.onProcessExit = func(pid int, exitCode int, crashed bool) { + if crashed { + s.recordRestartAttempt() + } + } + } + + s.onRestart = onRestart +} + +// SetOpusConfig sets OPUS configuration parameters as environment variables +// for the audio output subprocess +func (s *AudioOutputSupervisor) SetOpusConfig(bitrate, complexity, vbr, signalType, bandwidth, dtx int) { + s.mutex.Lock() + defer s.mutex.Unlock() + + // Store OPUS parameters as environment variables for C binary + s.opusEnv = []string{ + "OPUS_BITRATE=" + strconv.Itoa(bitrate), + "OPUS_COMPLEXITY=" + strconv.Itoa(complexity), + "OPUS_VBR=" + strconv.Itoa(vbr), + "OPUS_SIGNAL_TYPE=" + strconv.Itoa(signalType), + "OPUS_BANDWIDTH=" + strconv.Itoa(bandwidth), + "OPUS_DTX=" + strconv.Itoa(dtx), + "ALSA_CAPTURE_DEVICE=hw:0,0", // TC358743 HDMI audio capture + } +} + +// Start begins supervising the audio output server process +func (s *AudioOutputSupervisor) Start() error { + if !atomic.CompareAndSwapInt32(&s.running, 0, 1) { + return fmt.Errorf("audio output supervisor is already running") + } + + s.logSupervisorStart() + s.createContext() + + // Recreate channels in case they were closed by a previous Stop() call + s.initializeChannels() + + // Reset restart tracking on start + s.mutex.Lock() + s.restartAttempts = s.restartAttempts[:0] + s.mutex.Unlock() + + // Start the supervision loop + go s.supervisionLoop() + + // Establish IPC connection to subprocess after a brief delay + go func() { + time.Sleep(500 * time.Millisecond) // Wait for subprocess to start + s.connectClient() + }() + + s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component started successfully") + return nil +} + +// Stop gracefully stops the audio server and supervisor +func (s *AudioOutputSupervisor) Stop() { + if !atomic.CompareAndSwapInt32(&s.running, 1, 0) { + return // Already stopped + } + + s.logSupervisorStop() + + // Signal stop and wait for cleanup + s.closeStopChan() + s.cancelContext() + + // Wait for process to exit + select { + case <-s.processDone: + s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component stopped gracefully") + case <-time.After(Config.OutputSupervisorTimeout): + s.logger.Warn().Str("component", AudioOutputSupervisorComponent).Msg("component did not stop gracefully, forcing termination") + s.forceKillProcess("audio output server") + } + + // Ensure socket file cleanup even if subprocess didn't clean up properly + // This prevents "address already in use" errors on restart + outputSocketPath := getOutputSocketPath() + if err := os.Remove(outputSocketPath); err != nil && !os.IsNotExist(err) { + s.logger.Warn().Err(err).Str("socket_path", outputSocketPath).Msg("failed to remove output socket file during supervisor stop") + } else if err == nil { + s.logger.Debug().Str("socket_path", outputSocketPath).Msg("cleaned up output socket file") + } + + s.logger.Info().Str("component", AudioOutputSupervisorComponent).Msg("component stopped") +} + +// supervisionLoop is the main loop that manages the audio output process +func (s *AudioOutputSupervisor) supervisionLoop() { + // Configure supervision parameters + config := SupervisionConfig{ + ProcessType: "audio output server", + Timeout: Config.OutputSupervisorTimeout, + EnableRestart: true, + MaxRestartAttempts: Config.MaxRestartAttempts, + RestartWindow: Config.RestartWindow, + RestartDelay: Config.RestartDelay, + MaxRestartDelay: Config.MaxRestartDelay, + } + + // Configure callbacks + callbacks := ProcessCallbacks{ + OnProcessStart: s.onProcessStart, + OnProcessExit: s.onProcessExit, + OnRestart: s.onRestart, + } + + // Use the base supervision loop template + s.SupervisionLoop( + config, + callbacks, + s.startProcess, + s.shouldRestart, + s.calculateRestartDelay, + ) +} + +// startProcess starts the audio server process +func (s *AudioOutputSupervisor) startProcess() error { + // Use embedded C binary path + binaryPath := GetAudioOutputBinaryPath() + + s.mutex.Lock() + defer s.mutex.Unlock() + + // Create new command (no args needed for C binary) + s.cmd = exec.CommandContext(s.ctx, binaryPath) + s.cmd.Stdout = os.Stdout + s.cmd.Stderr = os.Stderr + + // Set environment variables for OPUS configuration + env := append(os.Environ(), s.opusEnv...) + + // Pass logging environment variables directly to subprocess + // The subprocess will inherit all PION_LOG_* variables from os.Environ() + // This ensures the audio scope gets the correct trace level + + s.cmd.Env = env + + // Start the process + if err := s.cmd.Start(); err != nil { + return fmt.Errorf("failed to start audio output server process: %w", err) + } + + s.processPID = s.cmd.Process.Pid + s.logger.Info().Int("pid", s.processPID).Str("binary", binaryPath).Strs("opus_env", s.opusEnv).Msg("audio server process started") + + // Add process to monitoring + + if s.onProcessStart != nil { + s.onProcessStart(s.processPID) + } + + return nil +} + +// shouldRestart determines if the process should be restarted +func (s *AudioOutputSupervisor) shouldRestart() bool { + if atomic.LoadInt32(&s.running) == 0 { + return false // Supervisor is stopping + } + + s.mutex.RLock() + defer s.mutex.RUnlock() + + // Clean up old restart attempts outside the window + now := time.Now() + var recentAttempts []time.Time + for _, attempt := range s.restartAttempts { + if now.Sub(attempt) < Config.RestartWindow { + recentAttempts = append(recentAttempts, attempt) + } + } + s.restartAttempts = recentAttempts + + return len(s.restartAttempts) < Config.MaxRestartAttempts +} + +// recordRestartAttempt records a restart attempt +func (s *AudioOutputSupervisor) recordRestartAttempt() { + s.mutex.Lock() + defer s.mutex.Unlock() + + s.restartAttempts = append(s.restartAttempts, time.Now()) +} + +// calculateRestartDelay calculates the delay before next restart attempt +func (s *AudioOutputSupervisor) calculateRestartDelay() time.Duration { + s.mutex.RLock() + defer s.mutex.RUnlock() + + // Exponential backoff based on recent restart attempts + attempts := len(s.restartAttempts) + if attempts == 0 { + return Config.RestartDelay + } + + // Calculate exponential backoff: 2^attempts * base delay + delay := Config.RestartDelay + for i := 0; i < attempts && delay < Config.MaxRestartDelay; i++ { + delay *= 2 + } + + if delay > Config.MaxRestartDelay { + delay = Config.MaxRestartDelay + } + + return delay +} + +// client holds the IPC client for communicating with the subprocess +var outputClient *AudioOutputClient + +// IsConnected returns whether the supervisor has an active connection to the subprocess +func (s *AudioOutputSupervisor) IsConnected() bool { + return outputClient != nil && outputClient.IsConnected() +} + +// GetClient returns the IPC client for the subprocess +func (s *AudioOutputSupervisor) GetClient() *AudioOutputClient { + return outputClient +} + +// connectClient establishes connection to the audio output subprocess +func (s *AudioOutputSupervisor) connectClient() { + if outputClient == nil { + outputClient = NewAudioOutputClient() + } + + // Try to connect to the subprocess + if err := outputClient.Connect(); err != nil { + s.logger.Warn().Err(err).Msg("Failed to connect to audio output subprocess") + } else { + s.logger.Info().Msg("Connected to audio output subprocess") + } +} + +// SendOpusConfig sends Opus configuration to the audio output subprocess +func (aos *AudioOutputSupervisor) SendOpusConfig(config UnifiedIPCOpusConfig) error { + if outputClient == nil { + return fmt.Errorf("client not initialized") + } + + if !outputClient.IsConnected() { + return fmt.Errorf("client not connected") + } + + return outputClient.SendOpusConfig(config) +} diff --git a/internal/audio/relay_api.go b/internal/audio/relay_api.go new file mode 100644 index 00000000..666cb69e --- /dev/null +++ b/internal/audio/relay_api.go @@ -0,0 +1,219 @@ +package audio + +import ( + "errors" + "fmt" + "sync" + "time" +) + +// Global relay instance for the main process +var ( + globalRelay *AudioRelay + relayMutex sync.RWMutex +) + +// StartAudioRelay starts the audio relay system for the main process +// This replaces the CGO-based audio system when running in main process mode +// audioTrack can be nil initially and updated later via UpdateAudioRelayTrack +func StartAudioRelay(audioTrack AudioTrackWriter) error { + relayMutex.Lock() + defer relayMutex.Unlock() + + if globalRelay != nil { + return nil // Already running + } + + // Create new relay + relay := NewAudioRelay() + + // Retry starting the relay with exponential backoff + // This handles cases where the subprocess hasn't created its socket yet + maxAttempts := 5 + baseDelay := 200 * time.Millisecond + maxDelay := 2 * time.Second + + var lastErr error + for i := 0; i < maxAttempts; i++ { + if err := relay.Start(audioTrack); err != nil { + lastErr = err + if i < maxAttempts-1 { + // Calculate exponential backoff delay + delay := time.Duration(float64(baseDelay) * (1.5 * float64(i+1))) + if delay > maxDelay { + delay = maxDelay + } + time.Sleep(delay) + continue + } + return fmt.Errorf("failed to start audio relay after %d attempts: %w", maxAttempts, lastErr) + } + + // Success + globalRelay = relay + return nil + } + + return fmt.Errorf("failed to start audio relay after %d attempts: %w", maxAttempts, lastErr) +} + +// StopAudioRelay stops the audio relay system +func StopAudioRelay() { + relayMutex.Lock() + defer relayMutex.Unlock() + + if globalRelay != nil { + globalRelay.Stop() + globalRelay = nil + } +} + +// SetAudioRelayMuted sets the mute state for the audio relay +func SetAudioRelayMuted(muted bool) { + relayMutex.RLock() + defer relayMutex.RUnlock() + + if globalRelay != nil { + globalRelay.SetMuted(muted) + } +} + +// IsAudioRelayMuted returns the current mute state of the audio relay +func IsAudioRelayMuted() bool { + relayMutex.RLock() + defer relayMutex.RUnlock() + + if globalRelay != nil { + return globalRelay.IsMuted() + } + return false +} + +// GetAudioRelayStats returns statistics from the audio relay +func GetAudioRelayStats() (framesRelayed, framesDropped int64) { + relayMutex.RLock() + defer relayMutex.RUnlock() + + if globalRelay != nil { + return globalRelay.GetStats() + } + return 0, 0 +} + +// IsAudioRelayRunning returns whether the audio relay is currently running +func IsAudioRelayRunning() bool { + relayMutex.RLock() + defer relayMutex.RUnlock() + + return globalRelay != nil +} + +// UpdateAudioRelayTrack updates the WebRTC audio track for the relay +// This function is refactored to prevent mutex deadlocks during quality changes +func UpdateAudioRelayTrack(audioTrack AudioTrackWriter) error { + var needsCallback bool + var callbackFunc TrackReplacementCallback + + // Critical section: minimize time holding the mutex + relayMutex.Lock() + if globalRelay == nil { + // No relay running, start one with the provided track + relay := NewAudioRelay() + if err := relay.Start(audioTrack); err != nil { + relayMutex.Unlock() + return err + } + globalRelay = relay + } else { + // Update the track in the existing relay + globalRelay.UpdateTrack(audioTrack) + } + + // Capture callback state while holding mutex + needsCallback = trackReplacementCallback != nil + if needsCallback { + callbackFunc = trackReplacementCallback + } + relayMutex.Unlock() + + // Execute callback outside of mutex to prevent deadlock + if needsCallback && callbackFunc != nil { + // Use goroutine with timeout to prevent blocking + done := make(chan error, 1) + go func() { + done <- callbackFunc(audioTrack) + }() + + // Wait for callback with timeout + select { + case err := <-done: + if err != nil { + // Log error but don't fail the relay operation + // The relay can still work even if WebRTC track replacement fails + _ = err // Suppress linter warning + } + case <-time.After(5 * time.Second): + // Timeout: log warning but continue + // This prevents indefinite blocking during quality changes + _ = fmt.Errorf("track replacement callback timed out") + } + } + + return nil +} + +// CurrentSessionCallback is a function type for getting the current session's audio track +type CurrentSessionCallback func() AudioTrackWriter + +// TrackReplacementCallback is a function type for replacing the WebRTC audio track +type TrackReplacementCallback func(AudioTrackWriter) error + +// currentSessionCallback holds the callback function to get the current session's audio track +var currentSessionCallback CurrentSessionCallback + +// trackReplacementCallback holds the callback function to replace the WebRTC audio track +var trackReplacementCallback TrackReplacementCallback + +// SetCurrentSessionCallback sets the callback function to get the current session's audio track +func SetCurrentSessionCallback(callback CurrentSessionCallback) { + currentSessionCallback = callback +} + +// SetTrackReplacementCallback sets the callback function to replace the WebRTC audio track +func SetTrackReplacementCallback(callback TrackReplacementCallback) { + trackReplacementCallback = callback +} + +// UpdateAudioRelayTrackAsync performs async track update to prevent blocking +// This is used during WebRTC session creation to avoid deadlocks +func UpdateAudioRelayTrackAsync(audioTrack AudioTrackWriter) { + go func() { + if err := UpdateAudioRelayTrack(audioTrack); err != nil { + // Log error but don't block session creation + _ = err // Suppress linter warning + } + }() +} + +// connectRelayToCurrentSession connects the audio relay to the current WebRTC session's audio track +// This is used when restarting the relay during unmute operations +func connectRelayToCurrentSession() error { + if currentSessionCallback == nil { + return errors.New("no current session callback set") + } + + track := currentSessionCallback() + if track == nil { + return errors.New("no current session audio track available") + } + + relayMutex.Lock() + defer relayMutex.Unlock() + + if globalRelay != nil { + globalRelay.UpdateTrack(track) + return nil + } + + return errors.New("no global relay running") +} diff --git a/internal/audio/rpc_handlers.go b/internal/audio/rpc_handlers.go new file mode 100644 index 00000000..b19738db --- /dev/null +++ b/internal/audio/rpc_handlers.go @@ -0,0 +1,102 @@ +package audio + +import ( + "fmt" +) + +// RPC wrapper functions for audio control +// These functions bridge the RPC layer to the AudioControlService + +// This variable will be set by the main package to provide access to the global service +var ( + getAudioControlServiceFunc func() *AudioControlService +) + +// SetRPCCallbacks sets the callback function for RPC operations +func SetRPCCallbacks(getService func() *AudioControlService) { + getAudioControlServiceFunc = getService +} + +// RPCAudioMute handles audio mute/unmute RPC requests +func RPCAudioMute(muted bool) error { + if getAudioControlServiceFunc == nil { + return fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return fmt.Errorf("audio control service not initialized") + } + return service.MuteAudio(muted) +} + +// RPCMicrophoneStart handles microphone start RPC requests +func RPCMicrophoneStart() error { + if getAudioControlServiceFunc == nil { + return fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return fmt.Errorf("audio control service not initialized") + } + return service.StartMicrophone() +} + +// RPCMicrophoneStop handles microphone stop RPC requests +func RPCMicrophoneStop() error { + if getAudioControlServiceFunc == nil { + return fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return fmt.Errorf("audio control service not initialized") + } + return service.StopMicrophone() +} + +// RPCAudioStatus handles audio status RPC requests (read-only) +func RPCAudioStatus() (map[string]interface{}, error) { + if getAudioControlServiceFunc == nil { + return nil, fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return nil, fmt.Errorf("audio control service not initialized") + } + return service.GetAudioStatus(), nil +} + +// RPCMicrophoneStatus handles microphone status RPC requests (read-only) +func RPCMicrophoneStatus() (map[string]interface{}, error) { + if getAudioControlServiceFunc == nil { + return nil, fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return nil, fmt.Errorf("audio control service not initialized") + } + return service.GetMicrophoneStatus(), nil +} + +// RPCMicrophoneReset handles microphone reset RPC requests +func RPCMicrophoneReset() error { + if getAudioControlServiceFunc == nil { + return fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return fmt.Errorf("audio control service not initialized") + } + return service.ResetMicrophone() +} + +// RPCMicrophoneMute handles microphone mute RPC requests +func RPCMicrophoneMute(muted bool) error { + if getAudioControlServiceFunc == nil { + return fmt.Errorf("audio control service not available") + } + service := getAudioControlServiceFunc() + if service == nil { + return fmt.Errorf("audio control service not initialized") + } + return service.MuteMicrophone(muted) +} diff --git a/internal/audio/session_provider.go b/internal/audio/session_provider.go new file mode 100644 index 00000000..73464548 --- /dev/null +++ b/internal/audio/session_provider.go @@ -0,0 +1,30 @@ +package audio + +// SessionProvider interface abstracts session management for audio events +type SessionProvider interface { + IsSessionActive() bool + GetAudioInputManager() *AudioInputManager +} + +// DefaultSessionProvider is a no-op implementation +type DefaultSessionProvider struct{} + +func (d *DefaultSessionProvider) IsSessionActive() bool { + return false +} + +func (d *DefaultSessionProvider) GetAudioInputManager() *AudioInputManager { + return nil +} + +var sessionProvider SessionProvider = &DefaultSessionProvider{} + +// SetSessionProvider allows the main package to inject session management +func SetSessionProvider(provider SessionProvider) { + sessionProvider = provider +} + +// GetSessionProvider returns the current session provider +func GetSessionProvider() SessionProvider { + return sessionProvider +} diff --git a/internal/audio/supervisor_api.go b/internal/audio/supervisor_api.go new file mode 100644 index 00000000..4980a4c0 --- /dev/null +++ b/internal/audio/supervisor_api.go @@ -0,0 +1,39 @@ +package audio + +import ( + "sync/atomic" + "unsafe" +) + +var ( + globalOutputSupervisor unsafe.Pointer // *AudioOutputSupervisor + globalInputSupervisor unsafe.Pointer // *AudioInputSupervisor +) + +// SetAudioOutputSupervisor sets the global audio output supervisor +func SetAudioOutputSupervisor(supervisor *AudioOutputSupervisor) { + atomic.StorePointer(&globalOutputSupervisor, unsafe.Pointer(supervisor)) +} + +// GetAudioOutputSupervisor returns the global audio output supervisor +func GetAudioOutputSupervisor() *AudioOutputSupervisor { + ptr := atomic.LoadPointer(&globalOutputSupervisor) + if ptr == nil { + return nil + } + return (*AudioOutputSupervisor)(ptr) +} + +// SetAudioInputSupervisor sets the global audio input supervisor +func SetAudioInputSupervisor(supervisor *AudioInputSupervisor) { + atomic.StorePointer(&globalInputSupervisor, unsafe.Pointer(supervisor)) +} + +// GetAudioInputSupervisor returns the global audio input supervisor +func GetAudioInputSupervisor() *AudioInputSupervisor { + ptr := atomic.LoadPointer(&globalInputSupervisor) + if ptr == nil { + return nil + } + return (*AudioInputSupervisor)(ptr) +} diff --git a/internal/audio/util_buffer_pool.go b/internal/audio/util_buffer_pool.go new file mode 100644 index 00000000..aabcd4d9 --- /dev/null +++ b/internal/audio/util_buffer_pool.go @@ -0,0 +1,141 @@ +//go:build cgo +// +build cgo + +package audio + +import ( + "sync/atomic" +) + +// AudioBufferPool provides a simple buffer pool for audio processing +type AudioBufferPool struct { + // Atomic counters + hitCount int64 // Pool hit counter (atomic) + missCount int64 // Pool miss counter (atomic) + + // Pool configuration + bufferSize int + pool chan []byte + maxSize int +} + +// NewAudioBufferPool creates a new simple audio buffer pool +func NewAudioBufferPool(bufferSize int) *AudioBufferPool { + maxSize := Config.MaxPoolSize + if maxSize <= 0 { + maxSize = Config.BufferPoolDefaultSize + } + + pool := &AudioBufferPool{ + bufferSize: bufferSize, + pool: make(chan []byte, maxSize), + maxSize: maxSize, + } + + // Pre-populate the pool + for i := 0; i < maxSize/2; i++ { + buf := make([]byte, bufferSize) + select { + case pool.pool <- buf: + default: + break + } + } + + return pool +} + +// Get retrieves a buffer from the pool +func (p *AudioBufferPool) Get() []byte { + select { + case buf := <-p.pool: + atomic.AddInt64(&p.hitCount, 1) + return buf[:0] // Reset length but keep capacity + default: + atomic.AddInt64(&p.missCount, 1) + return make([]byte, 0, p.bufferSize) + } +} + +// Put returns a buffer to the pool +func (p *AudioBufferPool) Put(buf []byte) { + if buf == nil || cap(buf) != p.bufferSize { + return // Invalid buffer + } + + // Reset the buffer + buf = buf[:0] + + // Try to return to pool + select { + case p.pool <- buf: + // Successfully returned to pool + default: + // Pool is full, discard buffer + } +} + +// GetStats returns pool statistics +func (p *AudioBufferPool) GetStats() AudioBufferPoolStats { + hitCount := atomic.LoadInt64(&p.hitCount) + missCount := atomic.LoadInt64(&p.missCount) + totalRequests := hitCount + missCount + + var hitRate float64 + if totalRequests > 0 { + hitRate = float64(hitCount) / float64(totalRequests) * Config.BufferPoolHitRateBase + } + + return AudioBufferPoolStats{ + BufferSize: p.bufferSize, + MaxPoolSize: p.maxSize, + CurrentSize: int64(len(p.pool)), + HitCount: hitCount, + MissCount: missCount, + HitRate: hitRate, + } +} + +// AudioBufferPoolStats represents pool statistics +type AudioBufferPoolStats struct { + BufferSize int + MaxPoolSize int + CurrentSize int64 + HitCount int64 + MissCount int64 + HitRate float64 +} + +// Global buffer pools +var ( + audioFramePool = NewAudioBufferPool(Config.AudioFramePoolSize) + audioControlPool = NewAudioBufferPool(Config.BufferPoolControlSize) +) + +// GetAudioFrameBuffer gets a buffer for audio frames +func GetAudioFrameBuffer() []byte { + return audioFramePool.Get() +} + +// PutAudioFrameBuffer returns a buffer to the frame pool +func PutAudioFrameBuffer(buf []byte) { + audioFramePool.Put(buf) +} + +// GetAudioControlBuffer gets a buffer for control messages +func GetAudioControlBuffer() []byte { + return audioControlPool.Get() +} + +// PutAudioControlBuffer returns a buffer to the control pool +func PutAudioControlBuffer(buf []byte) { + audioControlPool.Put(buf) +} + +// GetAudioBufferPoolStats returns statistics for all pools +func GetAudioBufferPoolStats() map[string]AudioBufferPoolStats { + return map[string]AudioBufferPoolStats{ + "frame_pool": audioFramePool.GetStats(), + "control_pool": audioControlPool.GetStats(), + } +} diff --git a/internal/audio/webrtc_relay.go b/internal/audio/webrtc_relay.go new file mode 100644 index 00000000..2784cfc0 --- /dev/null +++ b/internal/audio/webrtc_relay.go @@ -0,0 +1,247 @@ +package audio + +import ( + "context" + "fmt" + "reflect" + "sync" + "sync/atomic" + "time" + + "github.com/jetkvm/kvm/internal/logging" + "github.com/pion/webrtc/v4/pkg/media" + "github.com/rs/zerolog" +) + +// AudioRelay handles forwarding audio frames from the audio server subprocess +// to WebRTC without any CGO audio processing. This runs in the main process. +type AudioRelay struct { + // Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) + framesRelayed int64 + framesDropped int64 + + client *AudioOutputClient + ctx context.Context + cancel context.CancelFunc + wg sync.WaitGroup + logger *zerolog.Logger + running bool + mutex sync.RWMutex + bufferPool *AudioBufferPool // Buffer pool for memory optimization + + // WebRTC integration + audioTrack AudioTrackWriter + muted bool +} + +// AudioTrackWriter interface for WebRTC audio track +type AudioTrackWriter interface { + WriteSample(sample media.Sample) error +} + +// NewAudioRelay creates a new audio relay for the main process +func NewAudioRelay() *AudioRelay { + ctx, cancel := context.WithCancel(context.Background()) + logger := logging.GetDefaultLogger().With().Str("component", "audio-relay").Logger() + + return &AudioRelay{ + ctx: ctx, + cancel: cancel, + logger: &logger, + bufferPool: NewAudioBufferPool(Config.MaxAudioFrameSize), + } +} + +// Start begins the audio relay process +func (r *AudioRelay) Start(audioTrack AudioTrackWriter) error { + r.mutex.Lock() + defer r.mutex.Unlock() + + if r.running { + return nil // Already running + } + + // Create audio client to connect to subprocess + client := NewAudioOutputClient() + r.client = client + r.audioTrack = audioTrack + + // Connect to the audio output server + if err := client.Connect(); err != nil { + return fmt.Errorf("failed to connect to audio output server: %w", err) + } + + // Start relay goroutine + r.wg.Add(1) + go r.relayLoop() + + r.running = true + r.logger.Info().Msg("Audio relay connected to output server") + return nil +} + +// Stop stops the audio relay +func (r *AudioRelay) Stop() { + r.mutex.Lock() + defer r.mutex.Unlock() + + if !r.running { + return + } + + r.cancel() + r.wg.Wait() + + if r.client != nil { + r.client.Disconnect() + r.client = nil + } + + r.running = false + r.logger.Info().Msgf("Audio relay stopped after relaying %d frames", r.framesRelayed) +} + +// SetMuted sets the mute state +func (r *AudioRelay) SetMuted(muted bool) { + r.mutex.Lock() + defer r.mutex.Unlock() + r.muted = muted +} + +// IsMuted returns the current mute state (checks both relay and global mute) +func (r *AudioRelay) IsMuted() bool { + r.mutex.RLock() + defer r.mutex.RUnlock() + return r.muted || IsAudioMuted() +} + +// GetStats returns relay statistics +func (r *AudioRelay) GetStats() (framesRelayed, framesDropped int64) { + return atomic.LoadInt64(&r.framesRelayed), atomic.LoadInt64(&r.framesDropped) +} + +// UpdateTrack updates the WebRTC audio track for the relay +func (r *AudioRelay) UpdateTrack(audioTrack AudioTrackWriter) { + r.mutex.Lock() + defer r.mutex.Unlock() + r.audioTrack = audioTrack +} + +func (r *AudioRelay) relayLoop() { + defer r.wg.Done() + + var maxConsecutiveErrors = Config.MaxConsecutiveErrors + consecutiveErrors := 0 + backoffDelay := time.Millisecond * 10 + maxBackoff := time.Second * 5 + + for { + select { + case <-r.ctx.Done(): + return + default: + frame, err := r.client.ReceiveFrame() + if err != nil { + consecutiveErrors++ + r.incrementDropped() + + // Exponential backoff for stability + if consecutiveErrors >= maxConsecutiveErrors { + // Attempt reconnection + if r.attemptReconnection() { + consecutiveErrors = 0 + backoffDelay = time.Millisecond * 10 + continue + } + return + } + + time.Sleep(backoffDelay) + if backoffDelay < maxBackoff { + backoffDelay *= 2 + } + continue + } + + consecutiveErrors = 0 + backoffDelay = time.Millisecond * 10 + if err := r.forwardToWebRTC(frame); err != nil { + r.incrementDropped() + } else { + r.incrementRelayed() + } + } + } +} + +// forwardToWebRTC forwards a frame to the WebRTC audio track +func (r *AudioRelay) forwardToWebRTC(frame []byte) error { + // Use ultra-fast validation for critical audio path + if err := ValidateAudioFrame(frame); err != nil { + r.incrementDropped() + r.logger.Debug().Err(err).Msg("invalid frame data in relay") + return err + } + + r.mutex.RLock() + defer r.mutex.RUnlock() + + audioTrack := r.audioTrack + muted := r.muted + + // Comprehensive nil check for audioTrack to prevent panic + if audioTrack == nil { + return nil // No audio track available + } + + // Check if interface contains nil pointer using reflection + if reflect.ValueOf(audioTrack).IsNil() { + return nil // Audio track interface contains nil pointer + } + + // Prepare sample data + var sampleData []byte + if muted { + // Send silence when muted - use buffer pool to avoid allocation + sampleData = r.bufferPool.Get() + sampleData = sampleData[:len(frame)] // Resize to frame length + // Clear the buffer to create silence + for i := range sampleData { + sampleData[i] = 0 + } + defer r.bufferPool.Put(sampleData) // Return to pool after use + } else { + sampleData = frame + } + + // Write sample to WebRTC track while holding the read lock + // Frame size is fixed at 20ms for HDMI audio + return audioTrack.WriteSample(media.Sample{ + Data: sampleData, + Duration: 20 * time.Millisecond, + }) +} + +// incrementRelayed atomically increments the relayed frames counter +func (r *AudioRelay) incrementRelayed() { + atomic.AddInt64(&r.framesRelayed, 1) +} + +// incrementDropped atomically increments the dropped frames counter +func (r *AudioRelay) incrementDropped() { + atomic.AddInt64(&r.framesDropped, 1) +} + +// attemptReconnection tries to reconnect the audio client for stability +func (r *AudioRelay) attemptReconnection() bool { + if r.client == nil { + return false + } + + // Disconnect and reconnect + r.client.Disconnect() + time.Sleep(time.Millisecond * 100) + + err := r.client.Connect() + return err == nil +} diff --git a/internal/audio/websocket_events.go b/internal/audio/websocket_events.go new file mode 100644 index 00000000..d2e2146c --- /dev/null +++ b/internal/audio/websocket_events.go @@ -0,0 +1,244 @@ +package audio + +import ( + "context" + "strings" + "sync" + "time" + + "github.com/coder/websocket" + "github.com/coder/websocket/wsjson" + "github.com/jetkvm/kvm/internal/logging" + "github.com/rs/zerolog" +) + +// AudioEventType represents different types of audio events +type AudioEventType string + +const ( + AudioEventMuteChanged AudioEventType = "audio-mute-changed" + AudioEventMicrophoneState AudioEventType = "microphone-state-changed" + AudioEventDeviceChanged AudioEventType = "audio-device-changed" +) + +// AudioEvent represents a WebSocket audio event +type AudioEvent struct { + Type AudioEventType `json:"type"` + Data interface{} `json:"data"` +} + +// AudioMuteData represents audio mute state change data +type AudioMuteData struct { + Muted bool `json:"muted"` +} + +// MicrophoneStateData represents microphone state data +type MicrophoneStateData struct { + Running bool `json:"running"` + SessionActive bool `json:"session_active"` +} + +// AudioDeviceChangedData represents audio device configuration change data +type AudioDeviceChangedData struct { + Enabled bool `json:"enabled"` + Reason string `json:"reason"` +} + +// AudioEventSubscriber represents a WebSocket connection subscribed to audio events +type AudioEventSubscriber struct { + conn *websocket.Conn + ctx context.Context + logger *zerolog.Logger +} + +// AudioEventBroadcaster manages audio event subscriptions and broadcasting +type AudioEventBroadcaster struct { + subscribers map[string]*AudioEventSubscriber + mutex sync.RWMutex + logger *zerolog.Logger +} + +var ( + audioEventBroadcaster *AudioEventBroadcaster + audioEventOnce sync.Once +) + +// initializeBroadcaster creates and initializes the audio event broadcaster +func initializeBroadcaster() { + l := logging.GetDefaultLogger().With().Str("component", "audio-events").Logger() + audioEventBroadcaster = &AudioEventBroadcaster{ + subscribers: make(map[string]*AudioEventSubscriber), + logger: &l, + } +} + +// InitializeAudioEventBroadcaster initializes the global audio event broadcaster +func InitializeAudioEventBroadcaster() { + audioEventOnce.Do(initializeBroadcaster) +} + +// GetAudioEventBroadcaster returns the singleton audio event broadcaster +func GetAudioEventBroadcaster() *AudioEventBroadcaster { + audioEventOnce.Do(initializeBroadcaster) + return audioEventBroadcaster +} + +// Subscribe adds a WebSocket connection to receive audio events +func (aeb *AudioEventBroadcaster) Subscribe(connectionID string, conn *websocket.Conn, ctx context.Context, logger *zerolog.Logger) { + aeb.mutex.Lock() + defer aeb.mutex.Unlock() + + // Check if there's already a subscription for this connectionID + if _, exists := aeb.subscribers[connectionID]; exists { + aeb.logger.Debug().Str("connectionID", connectionID).Msg("duplicate audio events subscription detected; replacing existing entry") + // Do NOT close the existing WebSocket connection here because it's shared + // with the signaling channel. Just replace the subscriber map entry. + delete(aeb.subscribers, connectionID) + } + + aeb.subscribers[connectionID] = &AudioEventSubscriber{ + conn: conn, + ctx: ctx, + logger: logger, + } + + aeb.logger.Debug().Str("connectionID", connectionID).Msg("audio events subscription added") + + // Send initial state to new subscriber + go aeb.sendInitialState(connectionID) +} + +// Unsubscribe removes a WebSocket connection from audio events +func (aeb *AudioEventBroadcaster) Unsubscribe(connectionID string) { + aeb.mutex.Lock() + defer aeb.mutex.Unlock() + + delete(aeb.subscribers, connectionID) + aeb.logger.Debug().Str("connectionID", connectionID).Msg("audio events subscription removed") +} + +// BroadcastAudioMuteChanged broadcasts audio mute state changes +func (aeb *AudioEventBroadcaster) BroadcastAudioMuteChanged(muted bool) { + event := createAudioEvent(AudioEventMuteChanged, AudioMuteData{Muted: muted}) + aeb.broadcast(event) +} + +// BroadcastMicrophoneStateChanged broadcasts microphone state changes +func (aeb *AudioEventBroadcaster) BroadcastMicrophoneStateChanged(running, sessionActive bool) { + event := createAudioEvent(AudioEventMicrophoneState, MicrophoneStateData{ + Running: running, + SessionActive: sessionActive, + }) + aeb.broadcast(event) +} + +// BroadcastAudioDeviceChanged broadcasts audio device configuration changes +func (aeb *AudioEventBroadcaster) BroadcastAudioDeviceChanged(enabled bool, reason string) { + event := createAudioEvent(AudioEventDeviceChanged, AudioDeviceChangedData{ + Enabled: enabled, + Reason: reason, + }) + aeb.broadcast(event) +} + +// sendInitialState sends current audio state to a new subscriber +func (aeb *AudioEventBroadcaster) sendInitialState(connectionID string) { + aeb.mutex.RLock() + subscriber, exists := aeb.subscribers[connectionID] + aeb.mutex.RUnlock() + + if !exists { + return + } + + // Send current audio mute state + muteEvent := AudioEvent{ + Type: AudioEventMuteChanged, + Data: AudioMuteData{Muted: IsAudioMuted()}, + } + aeb.sendToSubscriber(subscriber, muteEvent) + + // Send current microphone state using session provider + sessionProvider := GetSessionProvider() + sessionActive := sessionProvider.IsSessionActive() + var running bool + if sessionActive { + if inputManager := sessionProvider.GetAudioInputManager(); inputManager != nil { + running = inputManager.IsRunning() + } + } + + micStateEvent := AudioEvent{ + Type: AudioEventMicrophoneState, + Data: MicrophoneStateData{ + Running: running, + SessionActive: sessionActive, + }, + } + aeb.sendToSubscriber(subscriber, micStateEvent) +} + +// createAudioEvent creates an AudioEvent +func createAudioEvent(eventType AudioEventType, data interface{}) AudioEvent { + return AudioEvent{ + Type: eventType, + Data: data, + } +} + +// broadcast sends an event to all subscribers +func (aeb *AudioEventBroadcaster) broadcast(event AudioEvent) { + aeb.mutex.RLock() + // Create a copy of subscribers to avoid holding the lock during sending + subscribersCopy := make(map[string]*AudioEventSubscriber) + for id, sub := range aeb.subscribers { + subscribersCopy[id] = sub + } + aeb.mutex.RUnlock() + + // Track failed subscribers to remove them after sending + var failedSubscribers []string + + // Send to all subscribers without holding the lock + for connectionID, subscriber := range subscribersCopy { + if !aeb.sendToSubscriber(subscriber, event) { + failedSubscribers = append(failedSubscribers, connectionID) + } + } + + // Remove failed subscribers if any + if len(failedSubscribers) > 0 { + aeb.mutex.Lock() + for _, connectionID := range failedSubscribers { + delete(aeb.subscribers, connectionID) + aeb.logger.Warn().Str("connectionID", connectionID).Msg("removed failed audio events subscriber") + } + aeb.mutex.Unlock() + } +} + +// sendToSubscriber sends an event to a specific subscriber +func (aeb *AudioEventBroadcaster) sendToSubscriber(subscriber *AudioEventSubscriber, event AudioEvent) bool { + // Check if subscriber context is already cancelled + if subscriber.ctx.Err() != nil { + return false + } + + ctx, cancel := context.WithTimeout(subscriber.ctx, time.Duration(Config.EventTimeoutSeconds)*time.Second) + defer cancel() + + err := wsjson.Write(ctx, subscriber.conn, event) + if err != nil { + // Don't log network errors for closed connections as warnings, they're expected + if strings.Contains(err.Error(), "use of closed network connection") || + strings.Contains(err.Error(), "connection reset by peer") || + strings.Contains(err.Error(), "context canceled") { + subscriber.logger.Debug().Err(err).Msg("websocket connection closed during audio event send") + } else { + subscriber.logger.Warn().Err(err).Msg("failed to send audio event to subscriber") + } + return false + } + + return true +} diff --git a/internal/audio/zero_copy.go b/internal/audio/zero_copy.go new file mode 100644 index 00000000..9af02302 --- /dev/null +++ b/internal/audio/zero_copy.go @@ -0,0 +1,377 @@ +package audio + +import ( + "sync" + "sync/atomic" + "unsafe" +) + +// ZeroCopyAudioFrame represents a reference-counted audio frame for zero-copy operations. +// +// This structure implements a sophisticated memory management system designed to minimize +// allocations and memory copying in the audio pipeline: +// +// Key Features: +// +// 1. Reference Counting: Multiple components can safely share the same frame data +// without copying. The frame is automatically returned to the pool when the last +// reference is released. +// +// 2. Thread Safety: All operations are protected by RWMutex, allowing concurrent +// reads while ensuring exclusive access for modifications. +// +// 3. Pool Integration: Frames are automatically managed by ZeroCopyFramePool, +// enabling efficient reuse and preventing memory fragmentation. +// +// 4. Unsafe Pointer Access: For performance-critical CGO operations, direct +// memory access is provided while maintaining safety through reference counting. +// +// Usage Pattern: +// +// frame := pool.Get() // Acquire frame (refCount = 1) +// frame.AddRef() // Share with another component (refCount = 2) +// data := frame.Data() // Access data safely +// frame.Release() // Release reference (refCount = 1) +// frame.Release() // Final release, returns to pool (refCount = 0) +// +// Memory Safety: +// - Frames cannot be modified while shared (refCount > 1) +// - Data access is bounds-checked to prevent buffer overruns +// - Pool management prevents use-after-free scenarios +type ZeroCopyAudioFrame struct { + data []byte + length int + capacity int + refCount int32 + mutex sync.RWMutex + pooled bool +} + +// ZeroCopyFramePool manages a pool of reusable zero-copy audio frames. +// +// This pool implements a three-tier memory management strategy optimized for +// real-time audio processing with minimal allocation overhead: +// +// Tier 1 - Pre-allocated Frames: +// +// A small number of frames are pre-allocated at startup and kept ready +// for immediate use. This provides the fastest possible allocation for +// the most common case and eliminates allocation latency spikes. +// +// Tier 2 - sync.Pool Cache: +// +// The standard Go sync.Pool provides efficient reuse of frames with +// automatic garbage collection integration. Frames are automatically +// returned here when memory pressure is low. +// +// Tier 3 - Memory Guard: +// +// A configurable limit prevents excessive memory usage by limiting +// the total number of allocated frames. When the limit is reached, +// allocation requests are denied to prevent OOM conditions. +// +// Performance Characteristics: +// - Pre-allocated tier: ~10ns allocation time +// - sync.Pool tier: ~50ns allocation time +// - Memory guard: Prevents unbounded growth +// - Metrics tracking: Hit/miss rates for optimization +// +// The pool is designed for embedded systems with limited memory (256MB) +// where predictable memory usage is more important than absolute performance. +type ZeroCopyFramePool struct { + // Atomic fields MUST be first for ARM32 alignment (int64 fields need 8-byte alignment) + counter int64 // Frame counter (atomic) + hitCount int64 // Pool hit counter (atomic) + missCount int64 // Pool miss counter (atomic) + allocationCount int64 // Total allocations counter (atomic) + + // Other fields + pool sync.Pool + maxSize int + mutex sync.RWMutex + // Memory optimization fields + preallocated []*ZeroCopyAudioFrame // Pre-allocated frames for immediate use + preallocSize int // Number of pre-allocated frames + maxPoolSize int // Maximum pool size to prevent memory bloat +} + +// NewZeroCopyFramePool creates a new zero-copy frame pool +func NewZeroCopyFramePool(maxFrameSize int) *ZeroCopyFramePool { + // Pre-allocate frames for immediate availability + preallocSizeBytes := Config.ZeroCopyPreallocSizeBytes + maxPoolSize := Config.MaxPoolSize // Limit total pool size + + // Calculate number of frames based on memory budget, not frame count + preallocFrameCount := preallocSizeBytes / maxFrameSize + if preallocFrameCount > maxPoolSize { + preallocFrameCount = maxPoolSize + } + if preallocFrameCount < Config.ZeroCopyMinPreallocFrames { + preallocFrameCount = Config.ZeroCopyMinPreallocFrames + } + + preallocated := make([]*ZeroCopyAudioFrame, 0, preallocFrameCount) + + // Pre-allocate frames to reduce initial allocation overhead + for i := 0; i < preallocFrameCount; i++ { + frame := &ZeroCopyAudioFrame{ + data: make([]byte, 0, maxFrameSize), + capacity: maxFrameSize, + pooled: true, + } + preallocated = append(preallocated, frame) + } + + return &ZeroCopyFramePool{ + maxSize: maxFrameSize, + preallocated: preallocated, + preallocSize: preallocFrameCount, + maxPoolSize: maxPoolSize, + pool: sync.Pool{ + New: func() interface{} { + return &ZeroCopyAudioFrame{ + data: make([]byte, 0, maxFrameSize), + capacity: maxFrameSize, + pooled: true, + } + }, + }, + } +} + +// Get retrieves a zero-copy frame from the pool +func (p *ZeroCopyFramePool) Get() *ZeroCopyAudioFrame { + // Memory guard: Track allocation count to prevent excessive memory usage + allocationCount := atomic.LoadInt64(&p.allocationCount) + if allocationCount > int64(p.maxPoolSize*2) { + // If we've allocated too many frames, force pool reuse + frame := p.pool.Get().(*ZeroCopyAudioFrame) + frame.mutex.Lock() + atomic.StoreInt32(&frame.refCount, 1) + frame.length = 0 + frame.data = frame.data[:0] + frame.mutex.Unlock() + + return frame + } + + // First try pre-allocated frames for fastest access + p.mutex.Lock() + if len(p.preallocated) > 0 { + frame := p.preallocated[len(p.preallocated)-1] + p.preallocated = p.preallocated[:len(p.preallocated)-1] + p.mutex.Unlock() + + frame.mutex.Lock() + atomic.StoreInt32(&frame.refCount, 1) + frame.length = 0 + frame.data = frame.data[:0] + frame.mutex.Unlock() + + atomic.AddInt64(&p.hitCount, 1) + return frame + } + p.mutex.Unlock() + + // Try sync.Pool next and track allocation + frame := p.pool.Get().(*ZeroCopyAudioFrame) + frame.mutex.Lock() + atomic.StoreInt32(&frame.refCount, 1) + frame.length = 0 + frame.data = frame.data[:0] + frame.mutex.Unlock() + + atomic.AddInt64(&p.hitCount, 1) + + return frame +} + +// Put returns a zero-copy frame to the pool +func (p *ZeroCopyFramePool) Put(frame *ZeroCopyAudioFrame) { + if frame == nil || !frame.pooled { + return + } + + // Reset frame state for reuse + frame.mutex.Lock() + atomic.StoreInt32(&frame.refCount, 0) + frame.length = 0 + frame.data = frame.data[:0] + frame.mutex.Unlock() + + // First try to return to pre-allocated pool for fastest reuse + p.mutex.Lock() + if len(p.preallocated) < p.preallocSize { + p.preallocated = append(p.preallocated, frame) + p.mutex.Unlock() + return + } + p.mutex.Unlock() + + // Check pool size limit to prevent excessive memory usage + p.mutex.RLock() + currentCount := atomic.LoadInt64(&p.counter) + p.mutex.RUnlock() + + if currentCount >= int64(p.maxPoolSize) { + return // Pool is full, let GC handle this frame + } + + // Return to sync.Pool + p.pool.Put(frame) + atomic.AddInt64(&p.counter, 1) +} + +// Data returns the frame data as a slice (zero-copy view) +func (f *ZeroCopyAudioFrame) Data() []byte { + f.mutex.RLock() + defer f.mutex.RUnlock() + return f.data[:f.length] +} + +// SetData sets the frame data (zero-copy if possible) +func (f *ZeroCopyAudioFrame) SetData(data []byte) error { + f.mutex.Lock() + defer f.mutex.Unlock() + + if len(data) > f.capacity { + // Need to reallocate - not zero-copy but necessary + f.data = make([]byte, len(data)) + f.capacity = len(data) + f.pooled = false // Can't return to pool anymore + } + + // Zero-copy assignment when data fits in existing buffer + if cap(f.data) >= len(data) { + f.data = f.data[:len(data)] + copy(f.data, data) + } else { + f.data = append(f.data[:0], data...) + } + f.length = len(data) + return nil +} + +// SetDataDirect sets frame data using direct buffer assignment (true zero-copy) +// WARNING: The caller must ensure the buffer remains valid for the frame's lifetime +func (f *ZeroCopyAudioFrame) SetDataDirect(data []byte) { + f.mutex.Lock() + defer f.mutex.Unlock() + f.data = data + f.length = len(data) + f.capacity = cap(data) + f.pooled = false // Direct assignment means we can't pool this frame +} + +// AddRef increments the reference count atomically +func (f *ZeroCopyAudioFrame) AddRef() { + atomic.AddInt32(&f.refCount, 1) +} + +// Release decrements the reference count atomically +// Returns true if this was the final reference +func (f *ZeroCopyAudioFrame) Release() bool { + newCount := atomic.AddInt32(&f.refCount, -1) + if newCount == 0 { + // Final reference released, return to pool if pooled + if f.pooled { + globalZeroCopyPool.Put(f) + } + return true + } + return false +} + +// RefCount returns the current reference count atomically +func (f *ZeroCopyAudioFrame) RefCount() int32 { + return atomic.LoadInt32(&f.refCount) +} + +// Length returns the current data length +func (f *ZeroCopyAudioFrame) Length() int { + f.mutex.RLock() + defer f.mutex.RUnlock() + return f.length +} + +// Capacity returns the buffer capacity +func (f *ZeroCopyAudioFrame) Capacity() int { + f.mutex.RLock() + defer f.mutex.RUnlock() + return f.capacity +} + +// UnsafePointer returns an unsafe pointer to the data for CGO calls +// WARNING: Only use this for CGO interop, ensure frame lifetime +func (f *ZeroCopyAudioFrame) UnsafePointer() unsafe.Pointer { + f.mutex.RLock() + defer f.mutex.RUnlock() + if len(f.data) == 0 { + return nil + } + return unsafe.Pointer(&f.data[0]) +} + +// Global zero-copy frame pool +// GetZeroCopyPoolStats returns detailed statistics about the zero-copy frame pool +func (p *ZeroCopyFramePool) GetZeroCopyPoolStats() ZeroCopyFramePoolStats { + p.mutex.RLock() + preallocatedCount := len(p.preallocated) + currentCount := atomic.LoadInt64(&p.counter) + p.mutex.RUnlock() + + hitCount := atomic.LoadInt64(&p.hitCount) + missCount := atomic.LoadInt64(&p.missCount) + allocationCount := atomic.LoadInt64(&p.allocationCount) + totalRequests := hitCount + missCount + + var hitRate float64 + if totalRequests > 0 { + hitRate = float64(hitCount) / float64(totalRequests) * Config.PercentageMultiplier + } + + return ZeroCopyFramePoolStats{ + MaxFrameSize: p.maxSize, + MaxPoolSize: p.maxPoolSize, + CurrentPoolSize: currentCount, + PreallocatedCount: int64(preallocatedCount), + PreallocatedMax: int64(p.preallocSize), + HitCount: hitCount, + MissCount: missCount, + AllocationCount: allocationCount, + HitRate: hitRate, + } +} + +// ZeroCopyFramePoolStats provides detailed zero-copy pool statistics +type ZeroCopyFramePoolStats struct { + MaxFrameSize int + MaxPoolSize int + CurrentPoolSize int64 + PreallocatedCount int64 + PreallocatedMax int64 + HitCount int64 + MissCount int64 + AllocationCount int64 + HitRate float64 // Percentage +} + +var ( + globalZeroCopyPool = NewZeroCopyFramePool(Config.MaxAudioFrameSize) +) + +// GetZeroCopyFrame gets a frame from the global pool +func GetZeroCopyFrame() *ZeroCopyAudioFrame { + return globalZeroCopyPool.Get() +} + +// GetGlobalZeroCopyPoolStats returns statistics for the global zero-copy pool +func GetGlobalZeroCopyPoolStats() ZeroCopyFramePoolStats { + return globalZeroCopyPool.GetZeroCopyPoolStats() +} + +// PutZeroCopyFrame returns a frame to the global pool +func PutZeroCopyFrame(frame *ZeroCopyAudioFrame) { + globalZeroCopyPool.Put(frame) +} + diff --git a/internal/usbgadget/changeset_resolver.go b/internal/usbgadget/changeset_resolver.go index 67812e0d..c06fac96 100644 --- a/internal/usbgadget/changeset_resolver.go +++ b/internal/usbgadget/changeset_resolver.go @@ -1,7 +1,9 @@ package usbgadget import ( + "context" "fmt" + "time" "github.com/rs/zerolog" "github.com/sourcegraph/tf-dag/dag" @@ -114,7 +116,20 @@ func (c *ChangeSetResolver) resolveChanges(initial bool) error { } func (c *ChangeSetResolver) applyChanges() error { + return c.applyChangesWithTimeout(45 * time.Second) +} + +func (c *ChangeSetResolver) applyChangesWithTimeout(timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + for _, change := range c.resolvedChanges { + select { + case <-ctx.Done(): + return fmt.Errorf("USB gadget reconfiguration timed out after %v: %w", timeout, ctx.Err()) + default: + } + change.ResetActionResolution() action := change.Action() actionStr := FileChangeResolvedActionString[action] @@ -126,7 +141,7 @@ func (c *ChangeSetResolver) applyChanges() error { l.Str("action", actionStr).Str("change", change.String()).Msg("applying change") - err := c.changeset.applyChange(change) + err := c.applyChangeWithTimeout(ctx, change) if err != nil { if change.IgnoreErrors { c.l.Warn().Str("change", change.String()).Err(err).Msg("ignoring error") @@ -139,6 +154,20 @@ func (c *ChangeSetResolver) applyChanges() error { return nil } +func (c *ChangeSetResolver) applyChangeWithTimeout(ctx context.Context, change *FileChange) error { + done := make(chan error, 1) + go func() { + done <- c.changeset.applyChange(change) + }() + + select { + case err := <-done: + return err + case <-ctx.Done(): + return fmt.Errorf("change application timed out for %s: %w", change.String(), ctx.Err()) + } +} + func (c *ChangeSetResolver) GetChanges() ([]*FileChange, error) { localChanges := c.changeset.Changes changesMap := make(map[string]*FileChange) diff --git a/internal/usbgadget/config.go b/internal/usbgadget/config.go index 6d1bd391..ff802fc4 100644 --- a/internal/usbgadget/config.go +++ b/internal/usbgadget/config.go @@ -59,6 +59,23 @@ var defaultGadgetConfig = map[string]gadgetConfigItem{ // mass storage "mass_storage_base": massStorageBaseConfig, "mass_storage_lun0": massStorageLun0Config, + // audio + "audio": { + order: 4000, + device: "uac1.usb0", + path: []string{"functions", "uac1.usb0"}, + configPath: []string{"uac1.usb0"}, + attrs: gadgetAttributes{ + "p_chmask": "3", + "p_srate": "48000", + "p_ssize": "2", + "p_volume_present": "0", + "c_chmask": "3", + "c_srate": "48000", + "c_ssize": "2", + "c_volume_present": "0", + }, + }, } func (u *UsbGadget) isGadgetConfigItemEnabled(itemKey string) bool { @@ -73,6 +90,8 @@ func (u *UsbGadget) isGadgetConfigItemEnabled(itemKey string) bool { return u.enabledDevices.MassStorage case "mass_storage_lun0": return u.enabledDevices.MassStorage + case "audio": + return u.enabledDevices.Audio default: return true } @@ -182,6 +201,9 @@ func (u *UsbGadget) Init() error { return u.logError("unable to initialize USB stack", err) } + // Pre-open HID files to reduce input latency + u.PreOpenHidFiles() + return nil } @@ -191,11 +213,17 @@ func (u *UsbGadget) UpdateGadgetConfig() error { u.loadGadgetConfig() + // Close HID files before reconfiguration to prevent "file already closed" errors + u.CloseHidFiles() + err := u.configureUsbGadget(true) if err != nil { return u.logError("unable to update gadget config", err) } + // Reopen HID files after reconfiguration + u.PreOpenHidFiles() + return nil } diff --git a/internal/usbgadget/config_tx.go b/internal/usbgadget/config_tx.go index df8a3d1b..6905d0e5 100644 --- a/internal/usbgadget/config_tx.go +++ b/internal/usbgadget/config_tx.go @@ -1,10 +1,12 @@ package usbgadget import ( + "context" "fmt" "path" "path/filepath" "sort" + "time" "github.com/rs/zerolog" ) @@ -52,22 +54,50 @@ func (u *UsbGadget) newUsbGadgetTransaction(lock bool) error { } func (u *UsbGadget) WithTransaction(fn func() error) error { - u.txLock.Lock() - defer u.txLock.Unlock() + return u.WithTransactionTimeout(fn, 60*time.Second) +} - err := u.newUsbGadgetTransaction(false) - if err != nil { - u.log.Error().Err(err).Msg("failed to create transaction") - return err - } - if err := fn(); err != nil { - u.log.Error().Err(err).Msg("transaction failed") - return err - } - result := u.tx.Commit() - u.tx = nil +// WithTransactionTimeout executes a USB gadget transaction with a specified timeout +// to prevent indefinite blocking during USB reconfiguration operations +func (u *UsbGadget) WithTransactionTimeout(fn func() error, timeout time.Duration) error { + // Create a context with timeout for the entire transaction + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() - return result + // Channel to signal when the transaction is complete + done := make(chan error, 1) + + // Execute the transaction in a goroutine + go func() { + u.txLock.Lock() + defer u.txLock.Unlock() + + err := u.newUsbGadgetTransaction(false) + if err != nil { + u.log.Error().Err(err).Msg("failed to create transaction") + done <- err + return + } + + if err := fn(); err != nil { + u.log.Error().Err(err).Msg("transaction failed") + done <- err + return + } + + result := u.tx.Commit() + u.tx = nil + done <- result + }() + + // Wait for either completion or timeout + select { + case err := <-done: + return err + case <-ctx.Done(): + u.log.Error().Dur("timeout", timeout).Msg("USB gadget transaction timed out") + return fmt.Errorf("USB gadget transaction timed out after %v: %w", timeout, ctx.Err()) + } } func (tx *UsbGadgetTransaction) addFileChange(component string, change RequestedFileChange) string { diff --git a/internal/usbgadget/udc.go b/internal/usbgadget/udc.go index 4b7fbe36..3d8536dd 100644 --- a/internal/usbgadget/udc.go +++ b/internal/usbgadget/udc.go @@ -1,10 +1,12 @@ package usbgadget import ( + "context" "fmt" "os" "path" "strings" + "time" ) func getUdcs() []string { @@ -26,17 +28,44 @@ func getUdcs() []string { } func rebindUsb(udc string, ignoreUnbindError bool) error { - err := os.WriteFile(path.Join(dwc3Path, "unbind"), []byte(udc), 0644) + return rebindUsbWithTimeout(udc, ignoreUnbindError, 10*time.Second) +} + +func rebindUsbWithTimeout(udc string, ignoreUnbindError bool, timeout time.Duration) error { + ctx, cancel := context.WithTimeout(context.Background(), timeout) + defer cancel() + + // Unbind with timeout + err := writeFileWithTimeout(ctx, path.Join(dwc3Path, "unbind"), []byte(udc), 0644) if err != nil && !ignoreUnbindError { - return err + return fmt.Errorf("failed to unbind UDC: %w", err) } - err = os.WriteFile(path.Join(dwc3Path, "bind"), []byte(udc), 0644) + + // Small delay to allow unbind to complete + time.Sleep(100 * time.Millisecond) + + // Bind with timeout + err = writeFileWithTimeout(ctx, path.Join(dwc3Path, "bind"), []byte(udc), 0644) if err != nil { - return err + return fmt.Errorf("failed to bind UDC: %w", err) } return nil } +func writeFileWithTimeout(ctx context.Context, filename string, data []byte, perm os.FileMode) error { + done := make(chan error, 1) + go func() { + done <- os.WriteFile(filename, data, perm) + }() + + select { + case err := <-done: + return err + case <-ctx.Done(): + return fmt.Errorf("write operation timed out: %w", ctx.Err()) + } +} + func (u *UsbGadget) rebindUsb(ignoreUnbindError bool) error { u.log.Info().Str("udc", u.udc).Msg("rebinding USB gadget to UDC") return rebindUsb(u.udc, ignoreUnbindError) diff --git a/internal/usbgadget/usbgadget.go b/internal/usbgadget/usbgadget.go index f01ae09d..04db4699 100644 --- a/internal/usbgadget/usbgadget.go +++ b/internal/usbgadget/usbgadget.go @@ -19,6 +19,7 @@ type Devices struct { RelativeMouse bool `json:"relative_mouse"` Keyboard bool `json:"keyboard"` MassStorage bool `json:"mass_storage"` + Audio bool `json:"audio"` } // Config is a struct that represents the customizations for a USB gadget. @@ -106,6 +107,66 @@ func NewUsbGadget(name string, enabledDevices *Devices, config *Config, logger * return newUsbGadget(name, defaultGadgetConfig, enabledDevices, config, logger) } +// CloseHidFiles closes all open HID files +func (u *UsbGadget) CloseHidFiles() { + u.log.Debug().Msg("closing HID files") + + // Close keyboard HID file + if u.keyboardHidFile != nil { + if err := u.keyboardHidFile.Close(); err != nil { + u.log.Debug().Err(err).Msg("failed to close keyboard HID file") + } + u.keyboardHidFile = nil + } + + // Close absolute mouse HID file + if u.absMouseHidFile != nil { + if err := u.absMouseHidFile.Close(); err != nil { + u.log.Debug().Err(err).Msg("failed to close absolute mouse HID file") + } + u.absMouseHidFile = nil + } + + // Close relative mouse HID file + if u.relMouseHidFile != nil { + if err := u.relMouseHidFile.Close(); err != nil { + u.log.Debug().Err(err).Msg("failed to close relative mouse HID file") + } + u.relMouseHidFile = nil + } +} + +// PreOpenHidFiles opens all HID files to reduce input latency +func (u *UsbGadget) PreOpenHidFiles() { + // Add a small delay to allow USB gadget reconfiguration to complete + // This prevents "no such device or address" errors when trying to open HID files + time.Sleep(100 * time.Millisecond) + + if u.enabledDevices.Keyboard { + if err := u.openKeyboardHidFile(); err != nil { + u.log.Debug().Err(err).Msg("failed to pre-open keyboard HID file") + } + } + if u.enabledDevices.AbsoluteMouse { + if u.absMouseHidFile == nil { + var err error + u.absMouseHidFile, err = os.OpenFile("/dev/hidg1", os.O_RDWR, 0666) + if err != nil { + u.log.Debug().Err(err).Msg("failed to pre-open absolute mouse HID file") + } + } + } + if u.enabledDevices.RelativeMouse { + if u.relMouseHidFile == nil { + var err error + u.relMouseHidFile, err = os.OpenFile("/dev/hidg2", os.O_RDWR, 0666) + if err != nil { + u.log.Debug().Err(err).Msg("failed to pre-open relative mouse HID file") + } + } + } +} + func newUsbGadget(name string, configMap map[string]gadgetConfigItem, enabledDevices *Devices, config *Config, logger *zerolog.Logger) *UsbGadget { if logger == nil { logger = defaultLogger diff --git a/jsonrpc.go b/jsonrpc.go index 0ff44a78..c6025865 100644 --- a/jsonrpc.go +++ b/jsonrpc.go @@ -18,6 +18,7 @@ import ( "github.com/rs/zerolog" "go.bug.st/serial" + "github.com/jetkvm/kvm/internal/audio" "github.com/jetkvm/kvm/internal/hidrpc" "github.com/jetkvm/kvm/internal/usbgadget" "github.com/jetkvm/kvm/internal/utils" @@ -922,9 +923,87 @@ func updateUsbRelatedConfig() error { } func rpcSetUsbDevices(usbDevices usbgadget.Devices) error { + // Check if audio state is changing + previousAudioEnabled := config.UsbDevices != nil && config.UsbDevices.Audio + newAudioEnabled := usbDevices.Audio + + // Handle audio process management if state is changing + if previousAudioEnabled != newAudioEnabled { + if !newAudioEnabled { + // Stop audio processes when audio is disabled + logger.Info().Msg("stopping audio processes due to audio device being disabled") + + // Stop audio input manager if active + if currentSession != nil && currentSession.AudioInputManager != nil && currentSession.AudioInputManager.IsRunning() { + logger.Info().Msg("stopping audio input manager") + currentSession.AudioInputManager.Stop() + // Wait for audio input to fully stop + for i := 0; i < 50; i++ { // Wait up to 5 seconds + if !currentSession.AudioInputManager.IsRunning() { + break + } + time.Sleep(100 * time.Millisecond) + } + logger.Info().Msg("audio input manager stopped") + } + + // Stop audio output supervisor + if audioSupervisor != nil && audioSupervisor.IsRunning() { + logger.Info().Msg("stopping audio output supervisor") + audioSupervisor.Stop() + // Wait for audio processes to fully stop before proceeding + for i := 0; i < 50; i++ { // Wait up to 5 seconds + if !audioSupervisor.IsRunning() { + break + } + time.Sleep(100 * time.Millisecond) + } + logger.Info().Msg("audio output supervisor stopped") + } + + logger.Info().Msg("audio processes stopped, proceeding with USB gadget reconfiguration") + } else if newAudioEnabled && audioSupervisor != nil && !audioSupervisor.IsRunning() { + // Start audio processes when audio is enabled (after USB reconfiguration) + logger.Info().Msg("audio will be started after USB gadget reconfiguration") + } + } + config.UsbDevices = &usbDevices gadget.SetGadgetDevices(config.UsbDevices) - return updateUsbRelatedConfig() + + // Apply USB gadget configuration changes + err := updateUsbRelatedConfig() + if err != nil { + return err + } + + // Start audio processes after successful USB reconfiguration if needed + if previousAudioEnabled != newAudioEnabled && newAudioEnabled && audioSupervisor != nil { + // Ensure supervisor is fully stopped before starting + for i := 0; i < 50; i++ { // Wait up to 5 seconds + if !audioSupervisor.IsRunning() { + break + } + time.Sleep(100 * time.Millisecond) + } + logger.Info().Msg("starting audio processes after USB gadget reconfiguration") + if err := audioSupervisor.Start(); err != nil { + logger.Error().Err(err).Msg("failed to start audio supervisor") + // Don't return error here as USB reconfiguration was successful + } else { + // Broadcast audio device change event to notify WebRTC session + broadcaster := audio.GetAudioEventBroadcaster() + broadcaster.BroadcastAudioDeviceChanged(true, "usb_reconfiguration") + logger.Info().Msg("broadcasted audio device change event after USB reconfiguration") + } + } else if previousAudioEnabled != newAudioEnabled { + // Broadcast audio device change event for disabling audio + broadcaster := audio.GetAudioEventBroadcaster() + broadcaster.BroadcastAudioDeviceChanged(newAudioEnabled, "usb_reconfiguration") + logger.Info().Bool("enabled", newAudioEnabled).Msg("broadcasted audio device change event after USB reconfiguration") + } + + return nil } func rpcSetUsbDeviceState(device string, enabled bool) error { @@ -937,6 +1016,63 @@ func rpcSetUsbDeviceState(device string, enabled bool) error { config.UsbDevices.Keyboard = enabled case "massStorage": config.UsbDevices.MassStorage = enabled + case "audio": + // Handle audio process management + if !enabled { + // Stop audio processes when audio is disabled + logger.Info().Msg("stopping audio processes due to audio device being disabled") + + // Stop audio input manager if active + if currentSession != nil && currentSession.AudioInputManager != nil && currentSession.AudioInputManager.IsRunning() { + logger.Info().Msg("stopping audio input manager") + currentSession.AudioInputManager.Stop() + // Wait for audio input to fully stop + for i := 0; i < 50; i++ { // Wait up to 5 seconds + if !currentSession.AudioInputManager.IsRunning() { + break + } + time.Sleep(100 * time.Millisecond) + } + logger.Info().Msg("audio input manager stopped") + } + + // Stop audio output supervisor + if audioSupervisor != nil && audioSupervisor.IsRunning() { + logger.Info().Msg("stopping audio output supervisor") + audioSupervisor.Stop() + // Wait for audio processes to fully stop + for i := 0; i < 50; i++ { // Wait up to 5 seconds + if !audioSupervisor.IsRunning() { + break + } + time.Sleep(100 * time.Millisecond) + } + logger.Info().Msg("audio output supervisor stopped") + } + } else if enabled && audioSupervisor != nil { + // Ensure supervisor is fully stopped before starting + for i := 0; i < 50; i++ { // Wait up to 5 seconds + if !audioSupervisor.IsRunning() { + break + } + time.Sleep(100 * time.Millisecond) + } + // Start audio processes when audio is enabled + logger.Info().Msg("starting audio processes due to audio device being enabled") + if err := audioSupervisor.Start(); err != nil { + logger.Error().Err(err).Msg("failed to start audio supervisor") + } else { + // Broadcast audio device change event to notify WebRTC session + broadcaster := audio.GetAudioEventBroadcaster() + broadcaster.BroadcastAudioDeviceChanged(true, "device_enabled") + logger.Info().Msg("broadcasted audio device change event after enabling audio device") + } + // Always broadcast the audio device change event regardless of enable/disable + broadcaster := audio.GetAudioEventBroadcaster() + broadcaster.BroadcastAudioDeviceChanged(enabled, "device_state_changed") + logger.Info().Bool("enabled", enabled).Msg("broadcasted audio device state change event") + } + config.UsbDevices.Audio = enabled default: return fmt.Errorf("invalid device: %s", device) } @@ -1181,6 +1317,35 @@ func rpcDoExecuteKeyboardMacro(ctx context.Context, macro []hidrpc.KeyboardMacro return nil } +// Audio control RPC handlers - delegated to audio package +func rpcAudioMute(muted bool) error { + return audio.RPCAudioMute(muted) +} + +func rpcMicrophoneStart() error { + return audio.RPCMicrophoneStart() +} + +func rpcMicrophoneStop() error { + return audio.RPCMicrophoneStop() +} + +func rpcAudioStatus() (map[string]interface{}, error) { + return audio.RPCAudioStatus() +} + +func rpcMicrophoneStatus() (map[string]interface{}, error) { + return audio.RPCMicrophoneStatus() +} + +func rpcMicrophoneReset() error { + return audio.RPCMicrophoneReset() +} + +func rpcMicrophoneMute(muted bool) error { + return audio.RPCMicrophoneMute(muted) +} + var rpcHandlers = map[string]RPCHandler{ "ping": {Func: rpcPing}, "reboot": {Func: rpcReboot, Params: []string{"force"}}, @@ -1231,6 +1396,13 @@ var rpcHandlers = map[string]RPCHandler{ "isUpdatePending": {Func: rpcIsUpdatePending}, "getUsbEmulationState": {Func: rpcGetUsbEmulationState}, "setUsbEmulationState": {Func: rpcSetUsbEmulationState, Params: []string{"enabled"}}, + "audioMute": {Func: rpcAudioMute, Params: []string{"muted"}}, + "audioStatus": {Func: rpcAudioStatus}, + "microphoneStart": {Func: rpcMicrophoneStart}, + "microphoneStop": {Func: rpcMicrophoneStop}, + "microphoneStatus": {Func: rpcMicrophoneStatus}, + "microphoneReset": {Func: rpcMicrophoneReset}, + "microphoneMute": {Func: rpcMicrophoneMute, Params: []string{"muted"}}, "getUsbConfig": {Func: rpcGetUsbConfig}, "setUsbConfig": {Func: rpcSetUsbConfig, Params: []string{"usbConfig"}}, "checkMountUrl": {Func: rpcCheckMountUrl, Params: []string{"url"}}, diff --git a/main.go b/main.go index e9931d46..c079d5ed 100644 --- a/main.go +++ b/main.go @@ -2,6 +2,7 @@ package kvm import ( "context" + "fmt" "net/http" "os" "os/signal" @@ -9,11 +10,123 @@ import ( "time" "github.com/gwatts/rootcerts" + "github.com/jetkvm/kvm/internal/audio" + "github.com/pion/webrtc/v4" ) -var appCtx context.Context +var ( + appCtx context.Context + audioProcessDone chan struct{} + audioSupervisor *audio.AudioOutputSupervisor +) + +func startAudioSubprocess() error { + // Initialize validation cache for optimal performance + audio.InitValidationCache() + + // Create audio server supervisor + audioSupervisor = audio.NewAudioOutputSupervisor() + + // Set the global supervisor for access from audio package + audio.SetAudioOutputSupervisor(audioSupervisor) + + // Create and register audio input supervisor (but don't start it) + // Audio input will be started on-demand through the UI + audioInputSupervisor := audio.NewAudioInputSupervisor() + audio.SetAudioInputSupervisor(audioInputSupervisor) + + // Set optimal OPUS configuration for audio input supervisor (48 kbps mono mic) + audioConfig := audio.Config + audioInputSupervisor.SetOpusConfig( + audioConfig.OptimalInputBitrate*1000, // Convert kbps to bps (48 kbps) + audioConfig.OptimalOpusComplexity, // Complexity 1 for minimal CPU + audioConfig.OptimalOpusVBR, // VBR enabled + audioConfig.OptimalOpusSignalType, // MUSIC signal type + audioConfig.OptimalOpusBandwidth, // WIDEBAND for 48kHz + audioConfig.OptimalOpusDTX, // DTX disabled + ) + + // Note: Audio input supervisor is NOT started here - it will be started on-demand + // when the user activates microphone input through the UI + + // Set up callbacks for process lifecycle events + audioSupervisor.SetCallbacks( + // onProcessStart + func(pid int) { + logger.Info().Int("pid", pid).Msg("audio server process started") + + // Wait for audio output server to be fully ready before starting relay + // This prevents "no client connected" errors during quality changes + go func() { + // Give the audio output server time to initialize and start listening + // Increased delay to reduce frame drops during connection establishment + time.Sleep(1 * time.Second) + + // Start audio relay system for main process + // If there's an active WebRTC session, use its audio track + var audioTrack *webrtc.TrackLocalStaticSample + if currentSession != nil && currentSession.AudioTrack != nil { + audioTrack = currentSession.AudioTrack + logger.Info().Msg("restarting audio relay with existing WebRTC audio track") + } else { + logger.Info().Msg("starting audio relay without WebRTC track (will be updated when session is created)") + } + + if err := audio.StartAudioRelay(audioTrack); err != nil { + logger.Error().Err(err).Msg("failed to start audio relay") + // Retry once after additional delay if initial attempt fails + time.Sleep(1 * time.Second) + if err := audio.StartAudioRelay(audioTrack); err != nil { + logger.Error().Err(err).Msg("failed to start audio relay after retry") + } + } + }() + }, + // onProcessExit + func(pid int, exitCode int, crashed bool) { + if crashed { + logger.Error().Int("pid", pid).Int("exit_code", exitCode).Msg("audio server process crashed") + } else { + logger.Info().Int("pid", pid).Msg("audio server process exited gracefully") + } + + // Stop audio relay when process exits + audio.StopAudioRelay() + }, + // onRestart + func(attempt int, delay time.Duration) { + logger.Warn().Int("attempt", attempt).Dur("delay", delay).Msg("restarting audio server process") + }, + ) + + // Check if USB audio device is enabled before starting audio processes + if config.UsbDevices == nil || !config.UsbDevices.Audio { + logger.Info().Msg("USB audio device disabled - skipping audio supervisor startup") + return nil + } + + // Start the supervisor + if err := audioSupervisor.Start(); err != nil { + return fmt.Errorf("failed to start audio supervisor: %w", err) + } + + // Monitor supervisor and handle cleanup + go func() { + defer close(audioProcessDone) + + // Wait for supervisor to stop + for audioSupervisor.IsRunning() { + time.Sleep(100 * time.Millisecond) + } + + logger.Info().Msg("audio supervisor stopped") + }() + + return nil +} func Main() { + audioProcessDone = make(chan struct{}) LoadConfig() var cancel context.CancelFunc @@ -65,6 +178,20 @@ func Main() { // initialize usb gadget initUsbGadget() + + // Start audio subprocess + err = startAudioSubprocess() + if err != nil { + logger.Warn().Err(err).Msg("failed to start audio subprocess") + } + + // Initialize session provider for audio events + initializeAudioSessionProvider() + + // Initialize audio event broadcaster for WebSocket-based real-time updates + audio.InitializeAudioEventBroadcaster() + logger.Info().Msg("audio event broadcaster initialized") + if err := setInitialVirtualMediaState(); err != nil { logger.Warn().Err(err).Msg("failed to set initial virtual media state") } @@ -123,6 +250,13 @@ func Main() { signal.Notify(sigs, syscall.SIGINT, syscall.SIGTERM) <-sigs logger.Info().Msg("JetKVM Shutting Down") + + // Stop audio supervisor and wait for cleanup + if audioSupervisor != nil { + logger.Info().Msg("stopping audio supervisor") + audioSupervisor.Stop() + } + <-audioProcessDone //if fuseServer != nil { // err := setMassStorageImage(" ") // if err != nil { diff --git a/prometheus.go b/prometheus.go index 5d4c5e75..16cbb245 100644 --- a/prometheus.go +++ b/prometheus.go @@ -1,6 +1,7 @@ package kvm import ( + "github.com/jetkvm/kvm/internal/audio" "github.com/prometheus/client_golang/prometheus" versioncollector "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/version" @@ -10,4 +11,7 @@ func initPrometheus() { // A Prometheus metrics endpoint. version.Version = builtAppVersion prometheus.MustRegister(versioncollector.NewCollector("jetkvm")) + + // Start audio metrics collection + audio.StartMetricsUpdater() } diff --git a/resource/dev_test.sh b/resource/dev_test.sh old mode 100644 new mode 100755 index 04978011..7451b500 --- a/resource/dev_test.sh +++ b/resource/dev_test.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash JSON_OUTPUT=false GET_COMMANDS=false if [ "$1" = "-json" ]; then diff --git a/scripts/build_cgo.sh b/scripts/build_cgo.sh index 87577e39..057ce42d 100755 --- a/scripts/build_cgo.sh +++ b/scripts/build_cgo.sh @@ -15,27 +15,52 @@ if [ "$CLEAN_ALL" -eq 1 ]; then fi TMP_DIR=$(mktemp -d) +# Ensure temp directory persists and is cleaned up properly +# Also handle SIGINT (CTRL+C) and SIGTERM - kill all child processes +trap 'pkill -P $$; rm -rf "${TMP_DIR}"; exit 1' INT TERM pushd "${CGO_PATH}" > /dev/null msg_info "▶ Generating UI index" ./ui_index.gen.sh msg_info "▶ Building native library" + +# Fix clock skew issues by resetting file timestamps +find "${CGO_PATH}" -type f -exec touch {} + + +# Only clean CMake cache if the build configuration files don't exist +# This prevents re-running expensive compiler detection on every build +if [ ! -f "${BUILD_DIR}/CMakeCache.txt" ]; then + msg_info "First build - CMake will configure the project" +fi + VERBOSE=1 cmake -B "${BUILD_DIR}" \ -DCMAKE_SYSTEM_PROCESSOR=armv7l \ -DCMAKE_SYSTEM_NAME=Linux \ -DCMAKE_CROSSCOMPILING=1 \ -DCMAKE_TOOLCHAIN_FILE=$CMAKE_TOOLCHAIN_FILE \ + -DCMAKE_C_COMPILER_WORKS=1 \ + -DCMAKE_CXX_COMPILER_WORKS=1 \ + -DCMAKE_C_ABI_COMPILED=1 \ + -DCMAKE_CXX_ABI_COMPILED=1 \ + -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY \ -DLV_BUILD_USE_KCONFIG=ON \ -DLV_BUILD_DEFCONFIG_PATH=${CGO_PATH}/lvgl_defconfig \ -DCONFIG_LV_BUILD_EXAMPLES=OFF \ -DCONFIG_LV_BUILD_DEMOS=OFF \ - -DSKIP_GLIBC_NAMES=ON \ -DCMAKE_BUILD_TYPE=Release \ -DCMAKE_INSTALL_PREFIX="${TMP_DIR}" msg_info "▶ Copying built library and header files" -cmake --build "${BUILD_DIR}" --target install +# Clock skew can cause make to return 1 even when build succeeds +# We verify success by checking if the output file exists +cmake --build "${BUILD_DIR}" --target install || true + +if [ ! -f "${TMP_DIR}/lib/libjknative.a" ]; then + msg_err "Build failed - libjknative.a not found" + exit 1 +fi + cp -r "${TMP_DIR}/include" "${CGO_PATH}" cp -r "${TMP_DIR}/lib" "${CGO_PATH}" rm -rf "${TMP_DIR}" diff --git a/scripts/dev_deploy.sh b/scripts/dev_deploy.sh index 1ff9296b..2d24fb9e 100755 --- a/scripts/dev_deploy.sh +++ b/scripts/dev_deploy.sh @@ -16,7 +16,8 @@ show_help() { echo " --run-go-tests-only Run go tests and exit" echo " --skip-ui-build Skip frontend/UI build" echo " --skip-native-build Skip native build" - echo " --disable-docker Disable docker build" + echo " --skip-audio-binaries Skip audio binaries build if they exist" + echo " --disable-docker Disable docker build (auto-detected if Docker unavailable)" echo " -i, --install Build for release and install the app" echo " --help Display this help message" echo @@ -32,8 +33,9 @@ REMOTE_PATH="/userdata/jetkvm/bin" SKIP_UI_BUILD=false SKIP_UI_BUILD_RELEASE=0 SKIP_NATIVE_BUILD=0 +SKIP_AUDIO_BINARIES=0 RESET_USB_HID_DEVICE=false -LOG_TRACE_SCOPES="${LOG_TRACE_SCOPES:-jetkvm,cloud,websocket,native,jsonrpc}" +LOG_TRACE_SCOPES="${LOG_TRACE_SCOPES:-jetkvm,cloud,websocket,native,jsonrpc,audio}" RUN_GO_TESTS=false RUN_GO_TESTS_ONLY=false INSTALL_APP=false @@ -60,6 +62,10 @@ while [[ $# -gt 0 ]]; do SKIP_NATIVE_BUILD=1 shift ;; + --skip-audio-binaries) + SKIP_AUDIO_BINARIES=1 + shift + ;; --reset-usb-hid) RESET_USB_HID_DEVICE=true shift @@ -106,14 +112,38 @@ if [ -z "$REMOTE_HOST" ]; then exit 1 fi +# Auto-detect architecture requirements # check if the current CPU architecture is x86_64 if [ "$(uname -m)" != "x86_64" ]; then msg_warn "Warning: This script is only supported on x86_64 architecture" BUILD_IN_DOCKER=true fi +# Auto-detect Docker availability and fallback if not available +# This is especially useful in devcontainers where Docker-in-Docker might not be available if [ "$BUILD_IN_DOCKER" = true ]; then - build_docker_image + # Check if Docker is available and accessible + if ! command -v docker &> /dev/null; then + msg_warn "Docker command not found, disabling Docker build" + msg_info "Building on host instead (equivalent to --disable-docker)" + BUILD_IN_DOCKER=false + elif ! docker info &> /dev/null; then + msg_warn "Docker daemon not accessible (possibly in devcontainer without Docker socket), disabling Docker build" + msg_info "Building on host instead (equivalent to --disable-docker)" + BUILD_IN_DOCKER=false + else + msg_info "Docker is available and accessible" + fi +fi + +if [ "$BUILD_IN_DOCKER" = true ]; then + # Double-check Docker availability before building image + if ! docker info &> /dev/null; then + msg_warn "Docker daemon became unavailable, switching to host build" + BUILD_IN_DOCKER=false + else + build_docker_image + fi fi # Build the development version on the host @@ -124,10 +154,13 @@ if [[ "$SKIP_UI_BUILD" = true && ! -f "static/index.html" ]]; then SKIP_UI_BUILD=false fi -if [[ "$SKIP_UI_BUILD" = false && "$JETKVM_INSIDE_DOCKER" != 1 ]]; then +if [[ "$SKIP_UI_BUILD" = false && "$JETKVM_INSIDE_DOCKER" != 1 ]]; then msg_info "▶ Building frontend" make frontend SKIP_UI_BUILD=0 SKIP_UI_BUILD_RELEASE=1 +elif [[ "$SKIP_UI_BUILD" = true ]]; then + # User explicitly requested to skip UI build and static files exist + SKIP_UI_BUILD_RELEASE=1 fi if [[ "$SKIP_UI_BUILD_RELEASE" = 0 && "$BUILD_IN_DOCKER" = true ]]; then @@ -180,16 +213,16 @@ fi if [ "$INSTALL_APP" = true ] then msg_info "▶ Building release binary" - do_make build_release SKIP_NATIVE_IF_EXISTS=${SKIP_NATIVE_BUILD} SKIP_UI_BUILD=${SKIP_UI_BUILD_RELEASE} - + do_make build_release SKIP_NATIVE_IF_EXISTS=${SKIP_NATIVE_BUILD} SKIP_UI_BUILD=${SKIP_UI_BUILD_RELEASE} SKIP_AUDIO_BINARIES_IF_EXISTS=${SKIP_AUDIO_BINARIES} + # Copy the binary to the remote host as if we were the OTA updater. ssh "${REMOTE_USER}@${REMOTE_HOST}" "cat > /userdata/jetkvm/jetkvm_app.update" < bin/jetkvm_app - + # Reboot the device, the new app will be deployed by the startup process. ssh "${REMOTE_USER}@${REMOTE_HOST}" "reboot" else msg_info "▶ Building development binary" - do_make build_dev SKIP_NATIVE_IF_EXISTS=${SKIP_NATIVE_BUILD} SKIP_UI_BUILD=${SKIP_UI_BUILD_RELEASE} + do_make build_dev SKIP_NATIVE_IF_EXISTS=${SKIP_NATIVE_BUILD} SKIP_UI_BUILD=${SKIP_UI_BUILD_RELEASE} SKIP_AUDIO_BINARIES_IF_EXISTS=${SKIP_AUDIO_BINARIES} # Kill any existing instances of the application ssh "${REMOTE_USER}@${REMOTE_HOST}" "killall jetkvm_app_debug || true" diff --git a/session_provider.go b/session_provider.go new file mode 100644 index 00000000..68823a01 --- /dev/null +++ b/session_provider.go @@ -0,0 +1,24 @@ +package kvm + +import "github.com/jetkvm/kvm/internal/audio" + +// KVMSessionProvider implements the audio.SessionProvider interface +type KVMSessionProvider struct{} + +// IsSessionActive returns whether there's an active session +func (k *KVMSessionProvider) IsSessionActive() bool { + return currentSession != nil +} + +// GetAudioInputManager returns the current session's audio input manager +func (k *KVMSessionProvider) GetAudioInputManager() *audio.AudioInputManager { + if currentSession == nil { + return nil + } + return currentSession.AudioInputManager +} + +// initializeAudioSessionProvider sets up the session provider for the audio package +func initializeAudioSessionProvider() { + audio.SetSessionProvider(&KVMSessionProvider{}) +} diff --git a/ui/src/components/ActionBar.tsx b/ui/src/components/ActionBar.tsx index 4f79d7ed..f205f2a8 100644 --- a/ui/src/components/ActionBar.tsx +++ b/ui/src/components/ActionBar.tsx @@ -1,4 +1,4 @@ -import { MdOutlineContentPasteGo } from "react-icons/md"; +import { MdOutlineContentPasteGo, MdVolumeOff, MdVolumeUp, MdGraphicEq } from "react-icons/md"; import { LuCable, LuHardDrive, LuMaximize, LuSettings, LuSignal } from "react-icons/lu"; import { FaKeyboard } from "react-icons/fa6"; import { Popover, PopoverButton, PopoverPanel } from "@headlessui/react"; @@ -6,24 +6,53 @@ import { Fragment, useCallback, useRef } from "react"; import { CommandLineIcon } from "@heroicons/react/20/solid"; import { Button } from "@components/Button"; +import Container from "@components/Container"; import { useHidStore, useMountMediaStore, useSettingsStore, useUiStore, } from "@/hooks/stores"; -import Container from "@components/Container"; import { cx } from "@/cva.config"; import PasteModal from "@/components/popovers/PasteModal"; import WakeOnLanModal from "@/components/popovers/WakeOnLan/Index"; import MountPopopover from "@/components/popovers/MountPopover"; import ExtensionPopover from "@/components/popovers/ExtensionPopover"; +import AudioControlPopover from "@/components/popovers/AudioControlPopover"; import { useDeviceUiNavigation } from "@/hooks/useAppNavigation"; +import { useAudioEvents } from "@/hooks/useAudioEvents"; +import { useUsbDeviceConfig } from "@/hooks/useUsbDeviceConfig"; + + +// Type for microphone error +interface MicrophoneError { + type: 'permission' | 'device' | 'network' | 'unknown'; + message: string; +} + +// Type for microphone hook return value +interface MicrophoneHookReturn { + isMicrophoneActive: boolean; + isMicrophoneMuted: boolean; + microphoneStream: MediaStream | null; + startMicrophone: (deviceId?: string) => Promise<{ success: boolean; error?: MicrophoneError }>; + stopMicrophone: () => Promise<{ success: boolean; error?: MicrophoneError }>; + toggleMicrophoneMute: () => Promise<{ success: boolean; error?: MicrophoneError }>; + syncMicrophoneState: () => Promise; + // Loading states + isStarting: boolean; + isStopping: boolean; + isToggling: boolean; + // HTTP/HTTPS detection + isHttpsRequired: boolean; +} export default function Actionbar({ requestFullscreen, + microphone, }: { requestFullscreen: () => Promise; + microphone: MicrophoneHookReturn; }) { const { navigateTo } = useDeviceUiNavigation(); const { isVirtualKeyboardEnabled, setVirtualKeyboardEnabled } = useHidStore(); @@ -52,6 +81,17 @@ export default function Actionbar({ [setDisableVideoFocusTrap], ); + // Use WebSocket-based audio events for real-time updates + const { audioMuted } = useAudioEvents(); + + // Use WebSocket data exclusively - no polling fallback + const isMuted = audioMuted ?? false; // Default to false if WebSocket data not available yet + + // Get USB device configuration to check if audio is enabled + const { usbDeviceConfig, loading: usbConfigLoading } = useUsbDeviceConfig(); + // Default to false while loading to prevent premature access when audio hasn't been enabled yet + const isAudioEnabledInUsb = usbDeviceConfig?.audio ?? false; + return (
- {({ open }) => { + {({ open }: { open: boolean }) => { checkIfStateChanged(open); return (
@@ -131,7 +171,7 @@ export default function Actionbar({ "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", )} > - {({ open }) => { + {({ open }: { open: boolean }) => { checkIfStateChanged(open); return (
@@ -183,7 +223,7 @@ export default function Actionbar({ "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", )} > - {({ open }) => { + {({ open }: { open: boolean }) => { checkIfStateChanged(open); return (
@@ -226,7 +266,7 @@ export default function Actionbar({ "flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0", )} > - {({ open }) => { + {({ open }: { open: boolean }) => { checkIfStateChanged(open); return ; }} @@ -258,6 +298,7 @@ export default function Actionbar({ }} />
+
+ + +
+
diff --git a/ui/src/components/Combobox.tsx b/ui/src/components/Combobox.tsx index 3fce228f..8f115f3b 100644 --- a/ui/src/components/Combobox.tsx +++ b/ui/src/components/Combobox.tsx @@ -11,6 +11,8 @@ import { cva } from "@/cva.config"; import Card from "./Card"; + + export interface ComboboxOption { value: string; label: string; diff --git a/ui/src/components/EmptyCard.tsx b/ui/src/components/EmptyCard.tsx index ad3370e3..ba031205 100644 --- a/ui/src/components/EmptyCard.tsx +++ b/ui/src/components/EmptyCard.tsx @@ -4,6 +4,8 @@ import { GridCard } from "@/components/Card"; import { cx } from "../cva.config"; + + interface Props { IconElm?: React.FC<{ className: string | undefined }>; headline: string; diff --git a/ui/src/components/Header.tsx b/ui/src/components/Header.tsx index a650693f..86d2a6d7 100644 --- a/ui/src/components/Header.tsx +++ b/ui/src/components/Header.tsx @@ -4,20 +4,22 @@ import { ArrowLeftEndOnRectangleIcon, ChevronDownIcon } from "@heroicons/react/1 import { Button, Menu, MenuButton, MenuItem, MenuItems } from "@headlessui/react"; import { LuMonitorSmartphone } from "react-icons/lu"; +import USBStateStatus from "@components/USBStateStatus"; +import PeerConnectionStatusCard from "@components/PeerConnectionStatusCard"; import Container from "@/components/Container"; import Card from "@/components/Card"; import { useHidStore, useRTCStore, useUserStore } from "@/hooks/stores"; import LogoBlueIcon from "@/assets/logo-blue.svg"; import LogoWhiteIcon from "@/assets/logo-white.svg"; -import USBStateStatus from "@components/USBStateStatus"; -import PeerConnectionStatusCard from "@components/PeerConnectionStatusCard"; import { CLOUD_API, DEVICE_API } from "@/ui.config"; -import api from "../api"; import { isOnDevice } from "../main"; +import api from "../api"; import { LinkButton } from "./Button"; + + interface NavbarProps { isLoggedIn: boolean; primaryLinks?: { title: string; to: string }[]; diff --git a/ui/src/components/JigglerSetting.tsx b/ui/src/components/JigglerSetting.tsx index fc0f50dd..44094d8d 100644 --- a/ui/src/components/JigglerSetting.tsx +++ b/ui/src/components/JigglerSetting.tsx @@ -7,6 +7,7 @@ import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; import { InputFieldWithLabel } from "./InputField"; import { SelectMenuBasic } from "./SelectMenuBasic"; + export interface JigglerConfig { inactivity_limit_seconds: number; jitter_percentage: number; diff --git a/ui/src/components/SelectMenuBasic.tsx b/ui/src/components/SelectMenuBasic.tsx index b92f837a..2898f8bb 100644 --- a/ui/src/components/SelectMenuBasic.tsx +++ b/ui/src/components/SelectMenuBasic.tsx @@ -1,12 +1,14 @@ import React, { JSX } from "react"; import clsx from "clsx"; + import FieldLabel from "@/components/FieldLabel"; import { cva } from "@/cva.config"; import Card from "./Card"; + type SelectMenuProps = Pick< JSX.IntrinsicElements["select"], "disabled" | "onChange" | "name" | "value" diff --git a/ui/src/components/Terminal.tsx b/ui/src/components/Terminal.tsx index ba3e667c..f5159c78 100644 --- a/ui/src/components/Terminal.tsx +++ b/ui/src/components/Terminal.tsx @@ -8,11 +8,13 @@ import { WebglAddon } from "@xterm/addon-webgl"; import { Unicode11Addon } from "@xterm/addon-unicode11"; import { ClipboardAddon } from "@xterm/addon-clipboard"; + import { cx } from "@/cva.config"; import { AvailableTerminalTypes, useUiStore } from "@/hooks/stores"; import { Button } from "./Button"; + const isWebGl2Supported = !!document.createElement("canvas").getContext("webgl2"); // Terminal theme configuration diff --git a/ui/src/components/USBStateStatus.tsx b/ui/src/components/USBStateStatus.tsx index ffe2fce6..2dbd8d4d 100644 --- a/ui/src/components/USBStateStatus.tsx +++ b/ui/src/components/USBStateStatus.tsx @@ -1,9 +1,9 @@ import React from "react"; -import { cx } from "@/cva.config"; -import KeyboardAndMouseConnectedIcon from "@/assets/keyboard-and-mouse-connected.png"; import LoadingSpinner from "@components/LoadingSpinner"; import StatusCard from "@components/StatusCards"; +import { cx } from "@/cva.config"; +import KeyboardAndMouseConnectedIcon from "@/assets/keyboard-and-mouse-connected.png"; import { USBStates } from "@/hooks/stores"; type StatusProps = Record< diff --git a/ui/src/components/UpdateInProgressStatusCard.tsx b/ui/src/components/UpdateInProgressStatusCard.tsx index b61752f2..fa2bc68e 100644 --- a/ui/src/components/UpdateInProgressStatusCard.tsx +++ b/ui/src/components/UpdateInProgressStatusCard.tsx @@ -1,3 +1,4 @@ + import { cx } from "@/cva.config"; import { useDeviceUiNavigation } from "../hooks/useAppNavigation"; @@ -6,6 +7,7 @@ import { Button } from "./Button"; import { GridCard } from "./Card"; import LoadingSpinner from "./LoadingSpinner"; + export default function UpdateInProgressStatusCard() { const { navigateTo } = useDeviceUiNavigation(); diff --git a/ui/src/components/UsbDeviceSetting.tsx b/ui/src/components/UsbDeviceSetting.tsx index 26146da6..598889bb 100644 --- a/ui/src/components/UsbDeviceSetting.tsx +++ b/ui/src/components/UsbDeviceSetting.tsx @@ -23,6 +23,7 @@ export interface UsbDeviceConfig { absolute_mouse: boolean; relative_mouse: boolean; mass_storage: boolean; + audio: boolean; } const defaultUsbDeviceConfig: UsbDeviceConfig = { @@ -30,17 +31,30 @@ const defaultUsbDeviceConfig: UsbDeviceConfig = { absolute_mouse: true, relative_mouse: true, mass_storage: true, + audio: true, }; const usbPresets = [ { - label: "Keyboard, Mouse and Mass Storage", + label: "Keyboard, Mouse, Mass Storage and Audio", value: "default", config: { keyboard: true, absolute_mouse: true, relative_mouse: true, mass_storage: true, + audio: true, + }, + }, + { + label: "Keyboard, Mouse and Mass Storage", + value: "no_audio", + config: { + keyboard: true, + absolute_mouse: true, + relative_mouse: true, + mass_storage: true, + audio: false, }, }, { @@ -51,6 +65,7 @@ const usbPresets = [ absolute_mouse: false, relative_mouse: false, mass_storage: false, + audio: false, }, }, { @@ -218,6 +233,17 @@ export function UsbDeviceSetting() { />
+
+ + + +
@@ -518,7 +551,7 @@ export default function WebRTCVideo() { controls={false} onPlaying={onVideoPlaying} onPlay={onVideoPlaying} - muted + muted={false} playsInline disablePictureInPicture controlsList="nofullscreen" diff --git a/ui/src/components/extensions/ATXPowerControl.tsx b/ui/src/components/extensions/ATXPowerControl.tsx index 323e2419..6aa65f09 100644 --- a/ui/src/components/extensions/ATXPowerControl.tsx +++ b/ui/src/components/extensions/ATXPowerControl.tsx @@ -9,6 +9,7 @@ import LoadingSpinner from "@/components/LoadingSpinner"; import { JsonRpcResponse, useJsonRpc } from "../../hooks/useJsonRpc"; + const LONG_PRESS_DURATION = 3000; // 3 seconds for long press interface ATXState { diff --git a/ui/src/components/extensions/DCPowerControl.tsx b/ui/src/components/extensions/DCPowerControl.tsx index 7f950491..722f2b67 100644 --- a/ui/src/components/extensions/DCPowerControl.tsx +++ b/ui/src/components/extensions/DCPowerControl.tsx @@ -4,11 +4,11 @@ import { useCallback, useEffect, useState } from "react"; import { Button } from "@components/Button"; import Card from "@components/Card"; import { SettingsPageHeader } from "@components/SettingsPageheader"; -import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; -import notifications from "@/notifications"; import FieldLabel from "@components/FieldLabel"; import LoadingSpinner from "@components/LoadingSpinner"; import {SelectMenuBasic} from "@components/SelectMenuBasic"; +import notifications from "@/notifications"; +import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; interface DCPowerState { isOn: boolean; diff --git a/ui/src/components/extensions/SerialConsole.tsx b/ui/src/components/extensions/SerialConsole.tsx index e36365ff..b43b820b 100644 --- a/ui/src/components/extensions/SerialConsole.tsx +++ b/ui/src/components/extensions/SerialConsole.tsx @@ -4,10 +4,10 @@ import { useEffect, useState } from "react"; import { Button } from "@components/Button"; import Card from "@components/Card"; import { SettingsPageHeader } from "@components/SettingsPageheader"; +import { SelectMenuBasic } from "@components/SelectMenuBasic"; import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; import notifications from "@/notifications"; import { useUiStore } from "@/hooks/stores"; -import { SelectMenuBasic } from "@components/SelectMenuBasic"; interface SerialSettings { baudRate: string; diff --git a/ui/src/components/popovers/AudioControlPopover.tsx b/ui/src/components/popovers/AudioControlPopover.tsx new file mode 100644 index 00000000..70422c9d --- /dev/null +++ b/ui/src/components/popovers/AudioControlPopover.tsx @@ -0,0 +1,450 @@ +import { useEffect, useState } from "react"; +import { MdVolumeOff, MdVolumeUp, MdGraphicEq, MdMic, MdMicOff, MdRefresh } from "react-icons/md"; + +import { Button } from "@components/Button"; +import { cx } from "@/cva.config"; +import { useAudioDevices } from "@/hooks/useAudioDevices"; +import { useAudioEvents } from "@/hooks/useAudioEvents"; +import { useJsonRpc, JsonRpcResponse } from "@/hooks/useJsonRpc"; +import { useRTCStore } from "@/hooks/stores"; +import notifications from "@/notifications"; + +// Type for microphone error +interface MicrophoneError { + type: 'permission' | 'device' | 'network' | 'unknown'; + message: string; +} + +// Type for microphone hook return value +interface MicrophoneHookReturn { + isMicrophoneActive: boolean; + isMicrophoneMuted: boolean; + microphoneStream: MediaStream | null; + startMicrophone: (deviceId?: string) => Promise<{ success: boolean; error?: MicrophoneError }>; + stopMicrophone: () => Promise<{ success: boolean; error?: MicrophoneError }>; + toggleMicrophoneMute: () => Promise<{ success: boolean; error?: MicrophoneError }>; + syncMicrophoneState: () => Promise; + // Loading states + isStarting: boolean; + isStopping: boolean; + isToggling: boolean; + // HTTP/HTTPS detection + isHttpsRequired: boolean; +} + +interface AudioConfig { + Quality: number; + Bitrate: number; + SampleRate: number; + Channels: number; + FrameSize: string; +} + +interface AudioControlPopoverProps { + microphone: MicrophoneHookReturn; +} + +export default function AudioControlPopover({ microphone }: AudioControlPopoverProps) { + const [currentConfig, setCurrentConfig] = useState(null); + + const [isLoading, setIsLoading] = useState(false); + + // Add cache flags to prevent unnecessary API calls + const [configsLoaded, setConfigsLoaded] = useState(false); + + // Add cooldown to prevent rapid clicking + const [lastClickTime, setLastClickTime] = useState(0); + const CLICK_COOLDOWN = 500; // 500ms cooldown between clicks + + // Use WebSocket-based audio events for real-time updates + const { + audioMuted, + // microphoneState - now using hook state instead + isConnected: wsConnected + } = useAudioEvents(); + + // RPC for device communication (works both locally and via cloud) + const { rpcDataChannel } = useRTCStore(); + const { send } = useJsonRpc(); + + // Initialize audio quality service with RPC for cloud compatibility + // Audio quality service removed - using fixed optimal configuration + + // WebSocket-only implementation - no fallback polling + + // Microphone state from props (keeping hook for legacy device operations) + const { + isMicrophoneActive: isMicrophoneActiveFromHook, + startMicrophone, + stopMicrophone, + syncMicrophoneState, + // Loading states + isStarting, + isStopping, + isToggling, + // HTTP/HTTPS detection + isHttpsRequired, + } = microphone; + + // Use WebSocket data exclusively - no polling fallback + const isMuted = audioMuted ?? false; + const isConnected = wsConnected; + + + + // Audio devices + const { + audioInputDevices, + audioOutputDevices, + selectedInputDevice, + selectedOutputDevice, + setSelectedInputDevice, + setSelectedOutputDevice, + isLoading: devicesLoading, + error: devicesError, + refreshDevices + } = useAudioDevices(); + + + + // Load initial configurations once - cache to prevent repeated calls + useEffect(() => { + if (!configsLoaded) { + loadAudioConfigurations(); + } + }, [configsLoaded]); + + // WebSocket-only implementation - sync microphone state when needed + useEffect(() => { + // Always sync microphone state, but debounce it + const syncTimeout = setTimeout(() => { + syncMicrophoneState(); + }, 500); + + return () => clearTimeout(syncTimeout); + }, [syncMicrophoneState]); + + const loadAudioConfigurations = async () => { + try { + // Load audio configuration directly via RPC + if (!send) return; + + await new Promise((resolve, reject) => { + send("audioStatus", {}, (resp: JsonRpcResponse) => { + if ("error" in resp) { + reject(new Error(resp.error.message)); + } else if ("result" in resp && resp.result) { + const result = resp.result as any; + if (result.config) { + setCurrentConfig(result.config); + } + resolve(); + } else { + resolve(); + } + }); + }); + + setConfigsLoaded(true); + } catch { + // Failed to load audio configurations + } + }; + + const handleToggleMute = async () => { + const now = Date.now(); + + // Prevent rapid clicking + if (isLoading || (now - lastClickTime < CLICK_COOLDOWN)) { + return; + } + + setLastClickTime(now); + setIsLoading(true); + + try { + // Use RPC for device communication - works for both local and cloud + if (rpcDataChannel?.readyState !== "open") { + throw new Error("Device connection not available"); + } + + await new Promise((resolve, reject) => { + send("audioMute", { muted: !isMuted }, (resp: JsonRpcResponse) => { + if ("error" in resp) { + reject(new Error(resp.error.message)); + } else { + resolve(); + } + }); + }); + + // WebSocket will handle the state update automatically + } catch (error) { + const errorMessage = error instanceof Error ? error.message : "Failed to toggle audio mute"; + notifications.error(errorMessage); + } finally { + setIsLoading(false); + } + }; + + // Quality change handler removed - quality is now fixed at optimal settings + + const handleToggleMicrophoneEnable = async () => { + const now = Date.now(); + + // Prevent rapid clicking - if any operation is in progress or within cooldown, ignore the click + if (isStarting || isStopping || isToggling || (now - lastClickTime < CLICK_COOLDOWN)) { + return; + } + + setLastClickTime(now); + setIsLoading(true); + + try { + if (isMicrophoneActiveFromHook) { + // Disable: Use the hook's stopMicrophone which handles both RPC and local cleanup + const result = await stopMicrophone(); + if (!result.success) { + throw new Error(result.error?.message || "Failed to stop microphone"); + } + } else { + // Enable: Use the hook's startMicrophone which handles both RPC and local setup + const result = await startMicrophone(); + if (!result.success) { + throw new Error(result.error?.message || "Failed to start microphone"); + } + } + } catch (error) { + const errorMessage = error instanceof Error ? error.message : "Failed to toggle microphone"; + notifications.error(errorMessage); + } finally { + setIsLoading(false); + } + }; + + // Handle microphone device change + const handleMicrophoneDeviceChange = async (deviceId: string) => { + // Don't process device changes for HTTPS-required placeholder + if (deviceId === 'https-required') { + return; + } + + setSelectedInputDevice(deviceId); + + // If microphone is currently active, restart it with the new device + if (isMicrophoneActiveFromHook) { + try { + // Stop current microphone + await stopMicrophone(); + // Start with new device + const result = await startMicrophone(deviceId); + if (!result.success && result.error) { + notifications.error(result.error.message); + } + } catch { + // Failed to change microphone device + notifications.error("Failed to change microphone device"); + } + } + }; + + const handleAudioOutputDeviceChange = async (deviceId: string) => { + setSelectedOutputDevice(deviceId); + + // Find the video element and set the audio output device + const videoElement = document.querySelector('video'); + if (videoElement && 'setSinkId' in videoElement) { + try { + await (videoElement as HTMLVideoElement & { setSinkId: (deviceId: string) => Promise }).setSinkId(deviceId); + } catch { + // Failed to change audio output device + } + } else { + // setSinkId not supported or video element not found + } + }; + + + + return ( +
+
+ {/* Header */} +
+

+ Audio Controls +

+
+
+ + {isConnected ? "Connected" : "Disconnected"} + +
+
+ + {/* Mute Control */} +
+
+ {isMuted ? ( + + ) : ( + + )} + + {isMuted ? "Muted" : "Unmuted"} + +
+
+ + {/* Microphone Control */} +
+
+ + + Microphone Input + +
+ +
+
+ {isMicrophoneActiveFromHook ? ( + + ) : ( + + )} + + {isMicrophoneActiveFromHook ? "Enabled" : "Disabled"} + +
+
+ + {/* HTTPS requirement notice */} + {isHttpsRequired && ( +
+

HTTPS Required for Microphone Input

+

+ Microphone access requires a secure connection due to browser security policies. Audio output works fine on HTTP, but microphone input needs HTTPS. +

+

+ Current: {window.location.protocol + '//' + window.location.host} +
+ Secure: {'https://' + window.location.host} +

+
+ )} + +
+ + {/* Device Selection */} +
+
+ + + Audio Devices + + {devicesLoading && ( +
+ )} +
+ + {devicesError && ( +
+ {devicesError} +
+ )} + + {/* Microphone Selection */} +
+ + + {isHttpsRequired ? ( +

+ HTTPS connection required for microphone device selection +

+ ) : isMicrophoneActiveFromHook ? ( +

+ Changing device will restart the microphone +

+ ) : null} +
+ + {/* Speaker Selection */} +
+ + +
+ + +
+ + {/* Audio Quality Info (fixed optimal configuration) */} + {currentConfig && ( +
+
+ + + Audio Configuration + +
+
+ Optimized for S16_LE @ 48kHz stereo HDMI audio +
+
+ Bitrate: {currentConfig.Bitrate} kbps | Sample Rate: {currentConfig.SampleRate} Hz | Channels: {currentConfig.Channels} +
+
+ )} +
+
+ ); +} \ No newline at end of file diff --git a/ui/src/components/popovers/ExtensionPopover.tsx b/ui/src/components/popovers/ExtensionPopover.tsx index f36c0503..81c4e54f 100644 --- a/ui/src/components/popovers/ExtensionPopover.tsx +++ b/ui/src/components/popovers/ExtensionPopover.tsx @@ -1,13 +1,13 @@ import { useEffect, useState } from "react"; import { LuPower, LuTerminal, LuPlugZap } from "react-icons/lu"; -import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; import Card, { GridCard } from "@components/Card"; import { SettingsPageHeader } from "@components/SettingsPageheader"; import { ATXPowerControl } from "@components/extensions/ATXPowerControl"; import { DCPowerControl } from "@components/extensions/DCPowerControl"; import { SerialConsole } from "@components/extensions/SerialConsole"; import { Button } from "@components/Button"; +import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; import notifications from "@/notifications"; interface Extension { diff --git a/ui/src/components/popovers/MountPopover.tsx b/ui/src/components/popovers/MountPopover.tsx index 8b6a8a55..0ff2d97e 100644 --- a/ui/src/components/popovers/MountPopover.tsx +++ b/ui/src/components/popovers/MountPopover.tsx @@ -10,9 +10,9 @@ import { useLocation } from "react-router"; import { Button } from "@components/Button"; import Card, { GridCard } from "@components/Card"; +import { SettingsPageHeader } from "@components/SettingsPageheader"; import { formatters } from "@/utils"; import { RemoteVirtualMediaState, useMountMediaStore } from "@/hooks/stores"; -import { SettingsPageHeader } from "@components/SettingsPageheader"; import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; import { useDeviceUiNavigation } from "@/hooks/useAppNavigation"; import notifications from "@/notifications"; diff --git a/ui/src/components/popovers/PasteModal.tsx b/ui/src/components/popovers/PasteModal.tsx index ac97e29b..1460e04c 100644 --- a/ui/src/components/popovers/PasteModal.tsx +++ b/ui/src/components/popovers/PasteModal.tsx @@ -3,17 +3,17 @@ import { ExclamationCircleIcon } from "@heroicons/react/16/solid"; import { useCallback, useEffect, useMemo, useRef, useState } from "react"; import { LuCornerDownLeft } from "react-icons/lu"; +import { Button } from "@components/Button"; +import { GridCard } from "@components/Card"; +import { InputFieldWithLabel } from "@components/InputField"; +import { SettingsPageHeader } from "@components/SettingsPageheader"; +import { TextAreaWithLabel } from "@components/TextArea"; import { cx } from "@/cva.config"; import { useHidStore, useSettingsStore, useUiStore } from "@/hooks/stores"; import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; import useKeyboard, { type MacroStep } from "@/hooks/useKeyboard"; import useKeyboardLayout from "@/hooks/useKeyboardLayout"; import notifications from "@/notifications"; -import { Button } from "@components/Button"; -import { GridCard } from "@components/Card"; -import { InputFieldWithLabel } from "@components/InputField"; -import { SettingsPageHeader } from "@components/SettingsPageheader"; -import { TextAreaWithLabel } from "@components/TextArea"; // uint32 max value / 4 const pasteMaxLength = 1073741824; diff --git a/ui/src/components/popovers/WakeOnLan/Index.tsx b/ui/src/components/popovers/WakeOnLan/Index.tsx index 6ebf3c79..6de8a4fd 100644 --- a/ui/src/components/popovers/WakeOnLan/Index.tsx +++ b/ui/src/components/popovers/WakeOnLan/Index.tsx @@ -11,6 +11,8 @@ import EmptyStateCard from "./EmptyStateCard"; import DeviceList, { StoredDevice } from "./DeviceList"; import AddDeviceForm from "./AddDeviceForm"; + + export default function WakeOnLanModal() { const [storedDevices, setStoredDevices] = useState([]); const [showAddForm, setShowAddForm] = useState(false); diff --git a/ui/src/components/sidebar/connectionStats.tsx b/ui/src/components/sidebar/connectionStats.tsx index a69cd94e..20e39dab 100644 --- a/ui/src/components/sidebar/connectionStats.tsx +++ b/ui/src/components/sidebar/connectionStats.tsx @@ -1,11 +1,13 @@ import { useInterval } from "usehooks-ts"; + import SidebarHeader from "@/components/SidebarHeader"; import { useRTCStore, useUiStore } from "@/hooks/stores"; import { someIterable } from "@/utils"; -import { createChartArray, Metric } from "../Metric"; import { SettingsSectionHeader } from "../SettingsSectionHeader"; +import { createChartArray, Metric } from "../Metric"; + export default function ConnectionStatsSidebar() { const { sidebarView, setSidebarView } = useUiStore(); diff --git a/ui/src/config/constants.ts b/ui/src/config/constants.ts new file mode 100644 index 00000000..d9e3d10c --- /dev/null +++ b/ui/src/config/constants.ts @@ -0,0 +1,113 @@ +// Centralized configuration constants + +// Network and API Configuration +export const NETWORK_CONFIG = { + WEBSOCKET_RECONNECT_INTERVAL: 3000, + LONG_PRESS_DURATION: 3000, + ERROR_MESSAGE_TIMEOUT: 3000, + AUDIO_TEST_DURATION: 5000, + BACKEND_RETRY_DELAY: 500, + RESET_DELAY: 200, + STATE_CHECK_DELAY: 100, + VERIFICATION_DELAY: 1000, +} as const; + +// Default URLs and Endpoints +export const DEFAULT_URLS = { + JETKVM_PROD_API: "https://api.jetkvm.com", + JETKVM_PROD_APP: "https://app.jetkvm.com", + JETKVM_DOCS_TROUBLESHOOTING: "https://jetkvm.com/docs/getting-started/troubleshooting", + JETKVM_DOCS_REMOTE_ACCESS: "https://jetkvm.com/docs/networking/remote-access", + JETKVM_DOCS_LOCAL_ACCESS_RESET: "https://jetkvm.com/docs/networking/local-access#reset-password", + JETKVM_GITHUB: "https://github.com/jetkvm", + CRONTAB_GURU: "https://crontab.guru/examples.html", +} as const; + +// Sample ISO URLs for mounting +export const SAMPLE_ISOS = { + UBUNTU_24_04: { + name: "Ubuntu 24.04.2 Desktop", + url: "https://releases.ubuntu.com/24.04.2/ubuntu-24.04.2-desktop-amd64.iso", + }, + DEBIAN_13: { + name: "Debian 13.0.0 (Testing)", + url: "https://cdimage.debian.org/debian-cd/current/amd64/iso-cd/debian-13.0.0-amd64-netinst.iso", + }, + DEBIAN_12: { + name: "Debian 12.11.0 (Stable)", + url: "https://cdimage.debian.org/mirror/cdimage/archive/12.11.0/amd64/iso-cd/debian-12.11.0-amd64-netinst.iso", + }, + FEDORA_41: { + name: "Fedora 41 Workstation", + url: "https://download.fedoraproject.org/pub/fedora/linux/releases/41/Workstation/x86_64/iso/Fedora-Workstation-Live-x86_64-41-1.4.iso", + }, + OPENSUSE_LEAP: { + name: "openSUSE Leap 15.6", + url: "https://download.opensuse.org/distribution/leap/15.6/iso/openSUSE-Leap-15.6-NET-x86_64-Media.iso", + }, + OPENSUSE_TUMBLEWEED: { + name: "openSUSE Tumbleweed", + url: "https://download.opensuse.org/tumbleweed/iso/openSUSE-Tumbleweed-NET-x86_64-Current.iso", + }, + ARCH_LINUX: { + name: "Arch Linux", + url: "https://archlinux.doridian.net/iso/2025.02.01/archlinux-2025.02.01-x86_64.iso", + }, + NETBOOT_XYZ: { + name: "netboot.xyz", + url: "https://boot.netboot.xyz/ipxe/netboot.xyz.iso", + }, +} as const; + +// Security and Access Configuration +export const SECURITY_CONFIG = { + LOCALHOST_ONLY_IP: "127.0.0.1", + LOCALHOST_HOSTNAME: "localhost", + HTTPS_PROTOCOL: "https:", +} as const; + +// Default Hardware Configuration +export const HARDWARE_CONFIG = { + DEFAULT_OFF_AFTER: 50000, + SAMPLE_EDID: "00FFFFFFFFFFFF00047265058A3F6101101E0104A53420783FC125A8554EA0260D5054BFEF80714F8140818081C081008B009500B300283C80A070B023403020360006442100001A000000FD00304C575716010A202020202020000000FC0042323436574C0A202020202020000000FF0054384E4545303033383532320A01F802031CF14F90020304050607011112131415161F2309070783010000011D8018711C1620582C250006442100009E011D007251D01E206E28550006442100001E8C0AD08A20E02D10103E9600064421000018C344806E70B028401720A80406442100001E00000000000000000000000000000000000000000000000000000096", +} as const; + +// Audio Configuration +export const AUDIO_CONFIG = { + // Audio Level Analysis + LEVEL_UPDATE_INTERVAL: 100, // ms - throttle audio level updates for performance + FFT_SIZE: 128, // reduced from 256 for better performance + SMOOTHING_TIME_CONSTANT: 0.8, + RELEVANT_FREQUENCY_BINS: 32, // focus on lower frequencies for voice + RMS_SCALING_FACTOR: 180, // for converting RMS to percentage + MAX_LEVEL_PERCENTAGE: 100, + + // Microphone Configuration + SAMPLE_RATE: 48000, // Hz - high quality audio sampling + CHANNEL_COUNT: 1, // mono for microphone input + OPERATION_DEBOUNCE_MS: 1000, // debounce microphone operations + SYNC_DEBOUNCE_MS: 1000, // debounce state synchronization + AUDIO_TEST_TIMEOUT: 100, // ms - timeout for audio testing + + // Audio quality is fixed at optimal settings (96 kbps @ 48kHz stereo) + // No quality presets needed - single optimal configuration for all use cases + + // Audio Analysis + ANALYSIS_FFT_SIZE: 256, // for detailed audio analysis + ANALYSIS_UPDATE_INTERVAL: 100, // ms - 10fps for audio level updates + LEVEL_SCALING_FACTOR: 255, // for RMS to percentage conversion + + // Audio Metrics Thresholds + DROP_RATE_WARNING_THRESHOLD: 1, // percentage - yellow warning + DROP_RATE_CRITICAL_THRESHOLD: 5, // percentage - red critical + PERCENTAGE_MULTIPLIER: 100, // for converting ratios to percentages + PERCENTAGE_DECIMAL_PLACES: 2, // decimal places for percentage display +} as const; + +// Placeholder URLs +export const PLACEHOLDERS = { + ISO_URL: "https://example.com/image.iso", + PROXY_URL: "http://proxy.example.com:8080/", + API_URL: "https://api.example.com", + APP_URL: "https://app.example.com", +} as const; \ No newline at end of file diff --git a/ui/src/hooks/stores.ts b/ui/src/hooks/stores.ts index bfbbb26e..85dca5d3 100644 --- a/ui/src/hooks/stores.ts +++ b/ui/src/hooks/stores.ts @@ -129,6 +129,16 @@ export interface RTCState { mediaStream: MediaStream | null; setMediaStream: (stream: MediaStream) => void; + // Microphone stream management + microphoneStream: MediaStream | null; + setMicrophoneStream: (stream: MediaStream | null) => void; + microphoneSender: RTCRtpSender | null; + setMicrophoneSender: (sender: RTCRtpSender | null) => void; + isMicrophoneActive: boolean; + setMicrophoneActive: (active: boolean) => void; + isMicrophoneMuted: boolean; + setMicrophoneMuted: (muted: boolean) => void; + videoStreamStats: RTCInboundRtpStreamStats | null; appendVideoStreamStats: (stats: RTCInboundRtpStreamStats) => void; videoStreamStatsHistory: Map; @@ -190,6 +200,16 @@ export const useRTCStore = create(set => ({ mediaStream: null, setMediaStream: (stream: MediaStream) => set({ mediaStream: stream }), + // Microphone stream management + microphoneStream: null, + setMicrophoneStream: stream => set({ microphoneStream: stream }), + microphoneSender: null, + setMicrophoneSender: sender => set({ microphoneSender: sender }), + isMicrophoneActive: false, + setMicrophoneActive: active => set({ isMicrophoneActive: active }), + isMicrophoneMuted: false, + setMicrophoneMuted: muted => set({ isMicrophoneMuted: muted }), + videoStreamStats: null, appendVideoStreamStats: (stats: RTCInboundRtpStreamStats) => set({ videoStreamStats: stats }), videoStreamStatsHistory: new Map(), @@ -351,6 +371,10 @@ export interface SettingsState { setVideoBrightness: (value: number) => void; videoContrast: number; setVideoContrast: (value: number) => void; + + // Microphone persistence settings + microphoneWasEnabled: boolean; + setMicrophoneWasEnabled: (enabled: boolean) => void; } export const useSettingsStore = create( @@ -396,6 +420,10 @@ export const useSettingsStore = create( setVideoBrightness: (value: number) => set({ videoBrightness: value }), videoContrast: 1.0, setVideoContrast: (value: number) => set({ videoContrast: value }), + + // Microphone persistence settings + microphoneWasEnabled: false, + setMicrophoneWasEnabled: (enabled: boolean) => set({ microphoneWasEnabled: enabled }), }), { name: "settings", diff --git a/ui/src/hooks/useAppNavigation.ts b/ui/src/hooks/useAppNavigation.ts index af9a247d..590d2d7e 100644 --- a/ui/src/hooks/useAppNavigation.ts +++ b/ui/src/hooks/useAppNavigation.ts @@ -3,6 +3,7 @@ import type { NavigateOptions } from "react-router"; import { useCallback, useMemo } from "react"; import { isOnDevice } from "../main"; +import { devError } from '../utils/debug'; /** * Generates the correct path based on whether the app is running on device or in cloud mode @@ -22,7 +23,7 @@ export function getDeviceUiPath(path: string, deviceId?: string): string { return normalizedPath; } else { if (!deviceId) { - console.error("No device ID provided when generating path in cloud mode"); + devError("No device ID provided when generating path in cloud mode"); throw new Error("Device ID is required for cloud mode path generation"); } return `/devices/${deviceId}${normalizedPath}`; diff --git a/ui/src/hooks/useAudioDevices.ts b/ui/src/hooks/useAudioDevices.ts new file mode 100644 index 00000000..ad2b7f69 --- /dev/null +++ b/ui/src/hooks/useAudioDevices.ts @@ -0,0 +1,187 @@ +import { useState, useEffect, useCallback } from 'react'; + +import { devError } from '../utils/debug'; + +export interface AudioDevice { + deviceId: string; + label: string; + kind: 'audioinput' | 'audiooutput'; +} + +export interface UseAudioDevicesReturn { + audioInputDevices: AudioDevice[]; + audioOutputDevices: AudioDevice[]; + selectedInputDevice: string; + selectedOutputDevice: string; + isLoading: boolean; + error: string | null; + refreshDevices: () => Promise; + setSelectedInputDevice: (deviceId: string) => void; + setSelectedOutputDevice: (deviceId: string) => void; +} + +export function useAudioDevices(): UseAudioDevicesReturn { + const [audioInputDevices, setAudioInputDevices] = useState([]); + const [audioOutputDevices, setAudioOutputDevices] = useState([]); + const [selectedInputDevice, setSelectedInputDevice] = useState('default'); + const [selectedOutputDevice, setSelectedOutputDevice] = useState('default'); + const [isLoading, setIsLoading] = useState(false); + const [error, setError] = useState(null); + + const refreshDevices = useCallback(async () => { + setIsLoading(true); + setError(null); + + try { + // Check if we're on HTTP (microphone requires HTTPS, but speakers can work) + const isHttp = window.location.protocol === 'http:'; + const hasMediaDevices = !!navigator.mediaDevices; + const hasGetUserMedia = !!navigator.mediaDevices?.getUserMedia; + const hasEnumerateDevices = !!navigator.mediaDevices?.enumerateDevices; + + if (isHttp || !hasMediaDevices || !hasGetUserMedia) { + // Set placeholder devices when HTTPS is required for microphone + setAudioInputDevices([ + { deviceId: 'https-required', label: 'HTTPS Required for Microphone Access', kind: 'audioinput' } + ]); + + // Try to enumerate speakers if possible, otherwise provide defaults + if (hasMediaDevices && hasEnumerateDevices) { + try { + const devices = await navigator.mediaDevices.enumerateDevices(); + const outputDevices: AudioDevice[] = [ + { deviceId: 'default', label: 'Default Speaker', kind: 'audiooutput' } + ]; + + devices.forEach(device => { + if (device.kind === 'audiooutput' && device.deviceId !== 'default') { + outputDevices.push({ + deviceId: device.deviceId, + label: device.label || `Speaker ${device.deviceId.slice(0, 8)}`, + kind: 'audiooutput' + }); + } + }); + + setAudioOutputDevices(outputDevices); + } catch { + // Fallback to default speakers if enumeration fails + setAudioOutputDevices([ + { deviceId: 'default', label: 'Default Speaker', kind: 'audiooutput' }, + { deviceId: 'system-default', label: 'System Default Audio Output', kind: 'audiooutput' } + ]); + } + } else { + // No enumeration available, use defaults + setAudioOutputDevices([ + { deviceId: 'default', label: 'Default Speaker', kind: 'audiooutput' }, + { deviceId: 'system-default', label: 'System Default Audio Output', kind: 'audiooutput' } + ]); + } + + setSelectedInputDevice('https-required'); + setSelectedOutputDevice('default'); + return; // Exit gracefully without throwing error on HTTP + } + + // Request permissions first to get device labels + await navigator.mediaDevices.getUserMedia({ audio: true }); + + const devices = await navigator.mediaDevices.enumerateDevices(); + + const inputDevices: AudioDevice[] = [ + { deviceId: 'default', label: 'Default Microphone', kind: 'audioinput' } + ]; + + const outputDevices: AudioDevice[] = [ + { deviceId: 'default', label: 'Default Speaker', kind: 'audiooutput' } + ]; + + devices.forEach(device => { + if (device.kind === 'audioinput' && device.deviceId !== 'default') { + inputDevices.push({ + deviceId: device.deviceId, + label: device.label || `Microphone ${device.deviceId.slice(0, 8)}`, + kind: 'audioinput' + }); + } else if (device.kind === 'audiooutput' && device.deviceId !== 'default') { + outputDevices.push({ + deviceId: device.deviceId, + label: device.label || `Speaker ${device.deviceId.slice(0, 8)}`, + kind: 'audiooutput' + }); + } + }); + + setAudioInputDevices(inputDevices); + setAudioOutputDevices(outputDevices); + + // Audio devices enumerated + + } catch (err) { + // Only log errors on HTTPS where we expect full device access + const isHttp = window.location.protocol === 'http:'; + if (!isHttp) { + devError('Failed to enumerate audio devices:', err); + } + + let errorMessage = 'Failed to access audio devices'; + + if (err instanceof Error) { + if (err.message.includes('HTTPS')) { + errorMessage = err.message; + } else if (err.name === 'NotAllowedError' || err.name === 'PermissionDeniedError') { + errorMessage = 'Microphone permission denied. Please allow microphone access.'; + } else if (err.name === 'NotFoundError' || err.name === 'DevicesNotFoundError') { + errorMessage = 'No microphone devices found.'; + } else if (err.name === 'NotSupportedError') { + errorMessage = 'Audio devices are not supported on this connection. Please use HTTPS.'; + } else { + errorMessage = err.message || errorMessage; + } + } + + // Only set error state on HTTPS where we expect device access to work + if (!isHttp) { + setError(errorMessage); + } + } finally { + setIsLoading(false); + } + }, []); + + // Listen for device changes + useEffect(() => { + const handleDeviceChange = () => { + // Audio devices changed, refreshing + refreshDevices(); + }; + + // Check if navigator.mediaDevices exists and supports addEventListener + if (navigator.mediaDevices && typeof navigator.mediaDevices.addEventListener === 'function') { + navigator.mediaDevices.addEventListener('devicechange', handleDeviceChange); + } + + // Initial load + refreshDevices(); + + return () => { + // Check if navigator.mediaDevices exists and supports removeEventListener + if (navigator.mediaDevices && typeof navigator.mediaDevices.removeEventListener === 'function') { + navigator.mediaDevices.removeEventListener('devicechange', handleDeviceChange); + } + }; + }, [refreshDevices]); + + return { + audioInputDevices, + audioOutputDevices, + selectedInputDevice, + selectedOutputDevice, + isLoading, + error, + refreshDevices, + setSelectedInputDevice, + setSelectedOutputDevice, + }; +} \ No newline at end of file diff --git a/ui/src/hooks/useAudioEvents.ts b/ui/src/hooks/useAudioEvents.ts new file mode 100644 index 00000000..6d8b76b5 --- /dev/null +++ b/ui/src/hooks/useAudioEvents.ts @@ -0,0 +1,308 @@ +import { useCallback, useEffect, useRef, useState } from 'react'; +import useWebSocket, { ReadyState } from 'react-use-websocket'; + +import { devError, devWarn } from '../utils/debug'; +import { NETWORK_CONFIG } from '../config/constants'; + +import { JsonRpcResponse, useJsonRpc } from './useJsonRpc'; +import { useRTCStore } from './stores'; + +// Audio event types matching the backend +export type AudioEventType = + | 'audio-mute-changed' + | 'microphone-state-changed' + | 'audio-device-changed'; + +// Audio event data interfaces +export interface AudioMuteData { + muted: boolean; +} + +export interface MicrophoneStateData { + running: boolean; + session_active: boolean; +} + +export interface AudioDeviceChangedData { + enabled: boolean; + reason: string; +} + +// Audio event structure +export interface AudioEvent { + type: AudioEventType; + data: AudioMuteData | MicrophoneStateData | AudioDeviceChangedData; +} + +// Hook return type +export interface UseAudioEventsReturn { + // Connection state + connectionState: ReadyState; + isConnected: boolean; + + // Audio state + audioMuted: boolean | null; + + // Microphone state + microphoneState: MicrophoneStateData | null; + + // Device change events + onAudioDeviceChanged?: (data: AudioDeviceChangedData) => void; + + // Manual subscription control + subscribe: () => void; + unsubscribe: () => void; +} + +// Global subscription management to prevent multiple subscriptions per WebSocket connection +const globalSubscriptionState = { + isSubscribed: false, + subscriberCount: 0, + connectionId: null as string | null +}; + +export function useAudioEvents(onAudioDeviceChanged?: (data: AudioDeviceChangedData) => void): UseAudioEventsReturn { + // State for audio data + const [audioMuted, setAudioMuted] = useState(null); + const [microphoneState, setMicrophoneState] = useState(null); + + // Get RTC store and JSON RPC functionality + const { rpcDataChannel } = useRTCStore(); + const { send } = useJsonRpc(); + + // Fetch initial audio status using RPC for cloud compatibility + const fetchInitialAudioStatus = useCallback(async () => { + // Early return if RPC data channel is not open + if (rpcDataChannel?.readyState !== "open") { + devWarn('RPC connection not available for initial audio status, skipping'); + return; + } + + try { + await new Promise((resolve) => { + send("audioStatus", {}, (resp: JsonRpcResponse) => { + if ("error" in resp) { + devError('RPC audioStatus failed:', resp.error); + } else if ("result" in resp) { + const data = resp.result as { muted: boolean }; + setAudioMuted(data.muted); + } + resolve(); // Continue regardless of result + }); + }); + } catch (error) { + devError('Failed to fetch initial audio status via RPC:', error); + } + }, [rpcDataChannel?.readyState, send]); + + // Local subscription state + const [isLocallySubscribed, setIsLocallySubscribed] = useState(false); + const subscriptionTimeoutRef = useRef(null); + + // Get WebSocket URL + const getWebSocketUrl = () => { + const protocol = window.location.protocol === 'https:' ? 'wss:' : 'ws:'; + const host = window.location.host; + return `${protocol}//${host}/webrtc/signaling/client`; + }; + + // Shared WebSocket connection using the `share` option for better resource management + const { + sendMessage, + lastMessage, + readyState, + } = useWebSocket(getWebSocketUrl(), { + shouldReconnect: () => true, + reconnectAttempts: 10, + reconnectInterval: NETWORK_CONFIG.WEBSOCKET_RECONNECT_INTERVAL, + share: true, // Share the WebSocket connection across multiple hooks + onOpen: () => { + // WebSocket connected + // Reset global state on new connection + globalSubscriptionState.isSubscribed = false; + globalSubscriptionState.connectionId = Math.random().toString(36); + }, + onClose: () => { + // WebSocket disconnected + // Reset global state on disconnect + globalSubscriptionState.isSubscribed = false; + globalSubscriptionState.subscriberCount = 0; + globalSubscriptionState.connectionId = null; + }, + onError: (event) => { + devError('[AudioEvents] WebSocket error:', event); + }, + }); + + // Subscribe to audio events + const subscribe = useCallback(() => { + if (readyState === ReadyState.OPEN && !globalSubscriptionState.isSubscribed) { + // Clear any pending subscription timeout + if (subscriptionTimeoutRef.current) { + clearTimeout(subscriptionTimeoutRef.current); + subscriptionTimeoutRef.current = null; + } + + // Add a small delay to prevent rapid subscription attempts + subscriptionTimeoutRef.current = setTimeout(() => { + if (readyState === ReadyState.OPEN && !globalSubscriptionState.isSubscribed) { + const subscribeMessage = { + type: 'subscribe-audio-events', + data: {} + }; + + sendMessage(JSON.stringify(subscribeMessage)); + globalSubscriptionState.isSubscribed = true; + // Subscribed to audio events + } + }, 100); // 100ms delay to debounce subscription attempts + } + + // Track local subscription regardless of global state + if (!isLocallySubscribed) { + globalSubscriptionState.subscriberCount++; + setIsLocallySubscribed(true); + } + }, [readyState, sendMessage, isLocallySubscribed]); + + // Unsubscribe from audio events + const unsubscribe = useCallback(() => { + // Clear any pending subscription timeout + if (subscriptionTimeoutRef.current) { + clearTimeout(subscriptionTimeoutRef.current); + subscriptionTimeoutRef.current = null; + } + + if (isLocallySubscribed) { + globalSubscriptionState.subscriberCount--; + setIsLocallySubscribed(false); + + // Only send unsubscribe message if this is the last subscriber and connection is still open + if (globalSubscriptionState.subscriberCount <= 0 && + readyState === ReadyState.OPEN && + globalSubscriptionState.isSubscribed) { + + const unsubscribeMessage = { + type: 'unsubscribe-audio-events', + data: {} + }; + + sendMessage(JSON.stringify(unsubscribeMessage)); + globalSubscriptionState.isSubscribed = false; + globalSubscriptionState.subscriberCount = 0; + // Sent unsubscribe message to backend + } + } + + // Component unsubscribed from audio events + }, [readyState, isLocallySubscribed, sendMessage]); + + // Handle incoming messages + useEffect(() => { + if (lastMessage !== null) { + try { + const message = JSON.parse(lastMessage.data); + + // Handle audio events + if (message.type && message.data) { + const audioEvent = message as AudioEvent; + + switch (audioEvent.type) { + case 'audio-mute-changed': { + const muteData = audioEvent.data as AudioMuteData; + setAudioMuted(muteData.muted); + // Audio mute changed + break; + } + + case 'microphone-state-changed': { + const micStateData = audioEvent.data as MicrophoneStateData; + setMicrophoneState(micStateData); + // Microphone state changed + break; + } + + case 'audio-device-changed': { + const deviceChangedData = audioEvent.data as AudioDeviceChangedData; + // Audio device changed + if (onAudioDeviceChanged) { + onAudioDeviceChanged(deviceChangedData); + } + break; + } + + default: + // Ignore other message types (WebRTC signaling, etc.) + break; + } + } + } catch (error) { + // Ignore parsing errors for non-JSON messages (like "pong") + if (lastMessage.data !== 'pong') { + devWarn('[AudioEvents] Failed to parse WebSocket message:', error); + } + } + } + }, [lastMessage, onAudioDeviceChanged]); + + // Auto-subscribe when connected + useEffect(() => { + if (readyState === ReadyState.OPEN) { + subscribe(); + } + + // Cleanup subscription on component unmount or connection change + return () => { + if (subscriptionTimeoutRef.current) { + clearTimeout(subscriptionTimeoutRef.current); + subscriptionTimeoutRef.current = null; + } + unsubscribe(); + }; + }, [readyState, subscribe, unsubscribe]); + + // Reset local subscription state on disconnect + useEffect(() => { + if (readyState === ReadyState.CLOSED || readyState === ReadyState.CLOSING) { + setIsLocallySubscribed(false); + if (subscriptionTimeoutRef.current) { + clearTimeout(subscriptionTimeoutRef.current); + subscriptionTimeoutRef.current = null; + } + } + }, [readyState]); + + // Fetch initial audio status on component mount - but only when RPC is ready + useEffect(() => { + // Only fetch when RPC data channel is open and ready + if (rpcDataChannel?.readyState === "open") { + fetchInitialAudioStatus(); + } + }, [fetchInitialAudioStatus, rpcDataChannel?.readyState]); + + // Cleanup on component unmount + useEffect(() => { + return () => { + unsubscribe(); + }; + }, [unsubscribe]); + + return { + // Connection state + connectionState: readyState, + isConnected: readyState === ReadyState.OPEN && globalSubscriptionState.isSubscribed, + + // Audio state + audioMuted, + + // Microphone state + microphoneState, + + // Device change events + onAudioDeviceChanged, + + // Manual subscription control + subscribe, + unsubscribe, + }; +} \ No newline at end of file diff --git a/ui/src/hooks/useHidRpc.ts b/ui/src/hooks/useHidRpc.ts index aeb1c4fa..b47d105b 100644 --- a/ui/src/hooks/useHidRpc.ts +++ b/ui/src/hooks/useHidRpc.ts @@ -17,6 +17,8 @@ import { unmarshalHidRpcMessage, } from "./hidRpc"; + + const KEEPALIVE_MESSAGE = new KeypressKeepAliveMessage(); interface sendMessageParams { diff --git a/ui/src/hooks/useMicrophone.ts b/ui/src/hooks/useMicrophone.ts new file mode 100644 index 00000000..41293440 --- /dev/null +++ b/ui/src/hooks/useMicrophone.ts @@ -0,0 +1,700 @@ +import { useCallback, useEffect, useRef, useState } from "react"; + +import { useRTCStore, useSettingsStore } from "@/hooks/stores"; +import { JsonRpcResponse, useJsonRpc } from "@/hooks/useJsonRpc"; +import { useUsbDeviceConfig } from "@/hooks/useUsbDeviceConfig"; +import { useAudioEvents, AudioDeviceChangedData } from "@/hooks/useAudioEvents"; +import { devLog, devInfo, devWarn, devError, devOnly } from "@/utils/debug"; +import { AUDIO_CONFIG } from "@/config/constants"; + +export interface MicrophoneError { + type: 'permission' | 'device' | 'network' | 'unknown'; + message: string; +} + +// Helper function to check if HTTPS is required for microphone access +export function isHttpsRequired(): boolean { + // Check if we're on HTTP (not HTTPS) + const isHttp = window.location.protocol === 'http:'; + + // Check if media devices are available + const hasMediaDevices = !!navigator.mediaDevices; + const hasGetUserMedia = !!navigator.mediaDevices?.getUserMedia; + + // HTTPS is required if we're on HTTP OR if media devices aren't available + return isHttp || !hasMediaDevices || !hasGetUserMedia; +} + +export function useMicrophone() { + const { + peerConnection, + microphoneStream, + setMicrophoneStream, + microphoneSender, + setMicrophoneSender, + isMicrophoneActive, + setMicrophoneActive, + isMicrophoneMuted, + setMicrophoneMuted, + rpcDataChannel, + } = useRTCStore(); + + const { microphoneWasEnabled, setMicrophoneWasEnabled } = useSettingsStore(); + const { send } = useJsonRpc(); + + // Check USB audio status and handle microphone restoration when USB audio is re-enabled + const { usbDeviceConfig } = useUsbDeviceConfig(); + const isUsbAudioEnabled = usbDeviceConfig?.audio ?? true; + + // RPC helper functions to replace HTTP API calls + const rpcMicrophoneStart = useCallback((): Promise => { + return new Promise((resolve, reject) => { + if (rpcDataChannel?.readyState !== "open") { + reject(new Error("Device connection not available")); + return; + } + + send("microphoneStart", {}, (resp: JsonRpcResponse) => { + if ("error" in resp) { + reject(new Error(resp.error.message)); + } else { + resolve(); + } + }); + }); + }, [rpcDataChannel?.readyState, send]); + + const microphoneStreamRef = useRef(null); + + // Loading states + const [isStarting, setIsStarting] = useState(false); + const [isStopping, setIsStopping] = useState(false); + const [isToggling, setIsToggling] = useState(false); + + // Add debouncing refs to prevent rapid operations + const lastOperationRef = useRef(0); + const operationTimeoutRef = useRef(null); + + // Debounced operation wrapper + const debouncedOperation = useCallback((operation: () => Promise, operationType: string) => { + const now = Date.now(); + const timeSinceLastOp = now - lastOperationRef.current; + + if (timeSinceLastOp < AUDIO_CONFIG.OPERATION_DEBOUNCE_MS) { + devLog(`Debouncing ${operationType} operation - too soon (${timeSinceLastOp}ms since last)`); + return; + } + + // Clear any pending operation + if (operationTimeoutRef.current) { + clearTimeout(operationTimeoutRef.current); + operationTimeoutRef.current = null; + } + + lastOperationRef.current = now; + operation().catch(error => { + devError(`Debounced ${operationType} operation failed:`, error); + }); + }, []); + + // Cleanup function to stop microphone stream + const stopMicrophoneStream = useCallback(async () => { + if (microphoneStreamRef.current) { + microphoneStreamRef.current.getTracks().forEach((track: MediaStreamTrack) => { + track.stop(); + }); + microphoneStreamRef.current = null; + setMicrophoneStream(null); + } + + if (microphoneSender && peerConnection) { + // Instead of removing the track, replace it with null to keep the transceiver + try { + await microphoneSender.replaceTrack(null); + } catch (error) { + devWarn("Failed to replace track with null:", error); + // Fallback to removing the track + peerConnection.removeTrack(microphoneSender); + } + setMicrophoneSender(null); + } + + setMicrophoneActive(false); + setMicrophoneMuted(false); + }, [microphoneSender, peerConnection, setMicrophoneStream, setMicrophoneSender, setMicrophoneActive, setMicrophoneMuted]); + + + + const lastSyncRef = useRef(0); + const isStartingRef = useRef(false); // Track if we're in the middle of starting + + const syncMicrophoneState = useCallback(async () => { + // Debounce sync calls to prevent race conditions + const now = Date.now(); + if (now - lastSyncRef.current < AUDIO_CONFIG.SYNC_DEBOUNCE_MS) { + return; + } + lastSyncRef.current = now; + + // Don't sync if we're in the middle of starting the microphone + if (isStartingRef.current) { + return; + } + + // Early return if RPC data channel is not ready + if (rpcDataChannel?.readyState !== "open") { + devWarn("RPC connection not available for microphone sync, skipping"); + return; + } + + try { + await new Promise((resolve, reject) => { + send("microphoneStatus", {}, (resp: JsonRpcResponse) => { + if ("error" in resp) { + devError("RPC microphone status failed:", resp.error); + reject(new Error(resp.error.message)); + } else if ("result" in resp) { + const data = resp.result as { running: boolean }; + const backendRunning = data.running; + + // Only sync if there's a significant state difference and we're not in a transition + if (backendRunning !== isMicrophoneActive) { + devInfo(`Syncing microphone state: backend=${backendRunning}, frontend=${isMicrophoneActive}`); + + // If backend is running but frontend thinks it's not, just update frontend state + if (backendRunning && !isMicrophoneActive) { + devLog("Backend running, updating frontend state to active"); + setMicrophoneActive(true); + } + // If backend is not running but frontend thinks it is, clean up and update state + else if (!backendRunning && isMicrophoneActive) { + devLog("Backend not running, cleaning up frontend state"); + setMicrophoneActive(false); + // Only clean up stream if we actually have one + if (microphoneStreamRef.current) { + stopMicrophoneStream(); + } + setMicrophoneMuted(false); + } + } + resolve(); + } else { + reject(new Error("Invalid response")); + } + }); + }); + } catch (error) { + devError("Error syncing microphone state:", error); + } + }, [isMicrophoneActive, setMicrophoneActive, setMicrophoneMuted, stopMicrophoneStream, rpcDataChannel?.readyState, send]); + + // Start microphone stream + const startMicrophone = useCallback(async (deviceId?: string): Promise<{ success: boolean; error?: MicrophoneError }> => { + // Prevent multiple simultaneous start operations + if (isStarting || isStopping || isToggling) { + devLog("Microphone operation already in progress, skipping start"); + return { success: false, error: { type: 'unknown', message: 'Operation already in progress' } }; + } + + setIsStarting(true); + try { + // Set flag to prevent sync during startup + isStartingRef.current = true; + + // Check if getUserMedia is available (requires HTTPS in most browsers) + if (!navigator.mediaDevices || !navigator.mediaDevices.getUserMedia) { + setIsStarting(false); + isStartingRef.current = false; + return { + success: false, + error: { + type: 'permission', + message: 'Microphone access requires HTTPS connection. Please use HTTPS to use audio input.' + } + }; + } + + // Request microphone permission and get stream + const audioConstraints: MediaTrackConstraints = { + echoCancellation: true, + noiseSuppression: true, + autoGainControl: true, + sampleRate: AUDIO_CONFIG.SAMPLE_RATE, + channelCount: AUDIO_CONFIG.CHANNEL_COUNT, + }; + + // Add device ID if specified + if (deviceId && deviceId !== 'default') { + audioConstraints.deviceId = { exact: deviceId }; + } + + const stream = await navigator.mediaDevices.getUserMedia({ + audio: audioConstraints + }); + + // Store the stream in both ref and store + microphoneStreamRef.current = stream; + setMicrophoneStream(stream); + + // Add audio track to peer connection if available + if (peerConnection && stream.getAudioTracks().length > 0) { + const audioTrack = stream.getAudioTracks()[0]; + + // Find the audio transceiver (should already exist with sendrecv direction) + const transceivers = peerConnection.getTransceivers(); + + // Look for an audio transceiver that can send (has sendrecv or sendonly direction) + const audioTransceiver = transceivers.find((transceiver: RTCRtpTransceiver) => { + // Check if this transceiver is for audio and can send + const canSend = transceiver.direction === 'sendrecv' || transceiver.direction === 'sendonly'; + + // For newly created transceivers, we need to check if they're for audio + // We can do this by checking if the sender doesn't have a track yet and direction allows sending + if (canSend && !transceiver.sender.track) { + return true; + } + + // For existing transceivers, check if they already have an audio track + if (transceiver.sender.track?.kind === 'audio' || transceiver.receiver.track?.kind === 'audio') { + return canSend; + } + + return false; + }); + + let sender: RTCRtpSender; + if (audioTransceiver && audioTransceiver.sender) { + // Use the existing audio transceiver's sender + await audioTransceiver.sender.replaceTrack(audioTrack); + sender = audioTransceiver.sender; + } else { + // Fallback: add new track if no transceiver found + sender = peerConnection.addTrack(audioTrack, stream); + } + + setMicrophoneSender(sender); + + // Check sender stats to verify audio is being transmitted + devOnly(() => { + setTimeout(async () => { + try { + const stats = await sender.getStats(); + stats.forEach((report) => { + if (report.type === 'outbound-rtp' && report.kind === 'audio') { + devLog("Audio RTP stats:", { + packetsSent: report.packetsSent, + bytesSent: report.bytesSent + }); + } + }); + } catch (error) { + devError("Failed to get sender stats:", error); + } + }, 2000); + }); + } + + // Notify backend that microphone is started - only if USB audio is enabled + if (!isUsbAudioEnabled) { + devInfo("USB audio is disabled, skipping backend microphone start"); + // Still set frontend state as active since the stream was successfully created + setMicrophoneActive(true); + setMicrophoneMuted(false); + setMicrophoneWasEnabled(true); + isStartingRef.current = false; + setIsStarting(false); + return { success: true }; + } + + // Retry logic for backend failures + let backendSuccess = false; + let lastError: Error | string | null = null; + + for (let attempt = 1; attempt <= 3; attempt++) { + // If this is a retry, first try to reset the backend microphone state + if (attempt > 1) { + try { + // Use RPC for reset (cloud-compatible) + if (rpcDataChannel?.readyState === "open") { + await new Promise((resolve) => { + send("microphoneReset", {}, (resp: JsonRpcResponse) => { + if ("error" in resp) { + devWarn("RPC microphone reset failed:", resp.error); + // Try stop as fallback + send("microphoneStop", {}, (stopResp: JsonRpcResponse) => { + if ("error" in stopResp) { + devWarn("RPC microphone stop also failed:", stopResp.error); + } + resolve(); // Continue even if both fail + }); + } else { + resolve(); + } + }); + }); + // Wait a bit for the backend to reset + await new Promise(resolve => setTimeout(resolve, 200)); + } else { + devWarn("RPC connection not available for reset"); + } + } catch (resetError) { + devWarn("Failed to reset backend state:", resetError); + } + } + + try { + await rpcMicrophoneStart(); + backendSuccess = true; + break; // Exit the retry loop on success + } catch (rpcError) { + lastError = `Backend RPC error: ${rpcError instanceof Error ? rpcError.message : 'Unknown error'}`; + devError(`Backend microphone start failed with RPC error: ${lastError} (attempt ${attempt})`); + + // For RPC errors, try again after a short delay + if (attempt < 3) { + await new Promise(resolve => setTimeout(resolve, 500)); + continue; + } + } + } + + // If all backend attempts failed, cleanup and return error + if (!backendSuccess) { + devError("All backend start attempts failed, cleaning up stream"); + await stopMicrophoneStream(); + isStartingRef.current = false; + setIsStarting(false); + return { + success: false, + error: { + type: 'network', + message: `Failed to start microphone on backend after 3 attempts. Last error: ${lastError}` + } + }; + } + + // Only set active state after backend confirms success + setMicrophoneActive(true); + setMicrophoneMuted(false); + + // Save microphone enabled state for auto-restore on page reload + setMicrophoneWasEnabled(true); + + // Clear the starting flag + isStartingRef.current = false; + setIsStarting(false); + return { success: true }; + } catch (error) { + let micError: MicrophoneError; + if (error instanceof Error) { + if (error.name === 'NotAllowedError' || error.name === 'PermissionDeniedError') { + micError = { + type: 'permission', + message: 'Microphone permission denied. Please allow microphone access and try again.' + }; + } else if (error.name === 'NotFoundError' || error.name === 'DevicesNotFoundError') { + micError = { + type: 'device', + message: 'No microphone device found. Please check your microphone connection.' + }; + } else { + micError = { + type: 'unknown', + message: error.message || 'Failed to access microphone' + }; + } + } else { + micError = { + type: 'unknown', + message: 'Unknown error occurred while accessing microphone' + }; + } + + // Clear the starting flag on error + isStartingRef.current = false; + setIsStarting(false); + return { success: false, error: micError }; + } + }, [peerConnection, setMicrophoneStream, setMicrophoneSender, setMicrophoneActive, setMicrophoneMuted, setMicrophoneWasEnabled, stopMicrophoneStream, isStarting, isStopping, isToggling, rpcMicrophoneStart, rpcDataChannel?.readyState, send, isUsbAudioEnabled]); + + + + // Stop microphone + const stopMicrophone = useCallback(async (): Promise<{ success: boolean; error?: MicrophoneError }> => { + // Prevent multiple simultaneous stop operations + if (isStarting || isStopping || isToggling) { + devLog("Microphone operation already in progress, skipping stop"); + return { success: false, error: { type: 'unknown', message: 'Operation already in progress' } }; + } + + setIsStopping(true); + try { + // First stop the stream + await stopMicrophoneStream(); + + // Then notify backend that microphone is stopped using RPC + try { + if (rpcDataChannel?.readyState === "open") { + await new Promise((resolve) => { + send("microphoneStop", {}, (resp: JsonRpcResponse) => { + if ("error" in resp) { + devWarn("RPC microphone stop failed:", resp.error); + } + resolve(); // Continue regardless of result + }); + }); + } else { + devWarn("RPC connection not available for microphone stop"); + } + } catch (error) { + devWarn("Failed to notify backend about microphone stop:", error); + } + + // Update frontend state immediately + setMicrophoneActive(false); + setMicrophoneMuted(false); + + // Save microphone disabled state for persistence + setMicrophoneWasEnabled(false); + + // Sync state after stopping to ensure consistency (with longer delay) + setTimeout(() => syncMicrophoneState(), 500); + + setIsStopping(false); + return { success: true }; + } catch (error) { + devError("Failed to stop microphone:", error); + setIsStopping(false); + return { + success: false, + error: { + type: 'unknown', + message: error instanceof Error ? error.message : 'Failed to stop microphone' + } + }; + } + }, [stopMicrophoneStream, syncMicrophoneState, setMicrophoneActive, setMicrophoneMuted, setMicrophoneWasEnabled, isStarting, isStopping, isToggling, rpcDataChannel?.readyState, send]); + + // Toggle microphone mute + const toggleMicrophoneMute = useCallback(async (): Promise<{ success: boolean; error?: MicrophoneError }> => { + // Prevent multiple simultaneous toggle operations + if (isStarting || isStopping || isToggling) { + devLog("Microphone operation already in progress, skipping toggle"); + return { success: false, error: { type: 'unknown', message: 'Operation already in progress' } }; + } + + setIsToggling(true); + try { + // Use the ref instead of store value to avoid race conditions + const currentStream = microphoneStreamRef.current || microphoneStream; + + if (!currentStream || !isMicrophoneActive) { + const errorDetails = { + hasStream: !!currentStream, + isActive: isMicrophoneActive, + streamId: currentStream?.id, + audioTracks: currentStream?.getAudioTracks().length || 0 + }; + devWarn("Microphone mute failed: stream or active state missing", errorDetails); + + // Provide more specific error message + let errorMessage = 'Microphone is not active'; + if (!currentStream) { + errorMessage = 'No microphone stream found. Please restart the microphone.'; + } else if (!isMicrophoneActive) { + errorMessage = 'Microphone is not marked as active. Please restart the microphone.'; + } + + setIsToggling(false); + return { + success: false, + error: { + type: 'device', + message: errorMessage + } + }; + } + + const audioTracks = currentStream.getAudioTracks(); + if (audioTracks.length === 0) { + setIsToggling(false); + return { + success: false, + error: { + type: 'device', + message: 'No audio tracks found in microphone stream' + } + }; + } + + const newMutedState = !isMicrophoneMuted; + + // Mute/unmute the audio track + audioTracks.forEach((track: MediaStreamTrack) => { + track.enabled = !newMutedState; + }); + + setMicrophoneMuted(newMutedState); + + // Notify backend about mute state using RPC + try { + if (rpcDataChannel?.readyState === "open") { + await new Promise((resolve) => { + send("microphoneMute", { muted: newMutedState }, (resp: JsonRpcResponse) => { + if ("error" in resp) { + devWarn("RPC microphone mute failed:", resp.error); + } + resolve(); // Continue regardless of result + }); + }); + } else { + devWarn("RPC connection not available for microphone mute"); + } + } catch (error) { + devWarn("Failed to notify backend about microphone mute:", error); + } + + setIsToggling(false); + return { success: true }; + } catch (error) { + devError("Failed to toggle microphone mute:", error); + setIsToggling(false); + return { + success: false, + error: { + type: 'unknown', + message: error instanceof Error ? error.message : 'Failed to toggle microphone mute' + } + }; + } + }, [microphoneStream, isMicrophoneActive, isMicrophoneMuted, setMicrophoneMuted, isStarting, isStopping, isToggling, rpcDataChannel?.readyState, send]); + + + + + + const startMicrophoneDebounced = useCallback((deviceId?: string) => { + debouncedOperation(async () => { + await startMicrophone(deviceId).catch(devError); + }, "start"); + }, [startMicrophone, debouncedOperation]); + + const stopMicrophoneDebounced = useCallback(() => { + debouncedOperation(async () => { + await stopMicrophone().catch(devError); + }, "stop"); + }, [stopMicrophone, debouncedOperation]); + + + + // Sync state on mount and auto-restore microphone if it was enabled before page reload + useEffect(() => { + const autoRestoreMicrophone = async () => { + // Wait for RPC connection to be ready before attempting any operations + if (rpcDataChannel?.readyState !== "open") { + return; + } + + // First sync the current state + await syncMicrophoneState(); + + // If microphone was enabled before page reload and is not currently active, restore it + if (microphoneWasEnabled && !isMicrophoneActive && peerConnection) { + try { + const result = await startMicrophone(); + if (result.success) { + devInfo("Microphone auto-restored successfully after page reload"); + } else { + devWarn("Failed to auto-restore microphone:", result.error); + } + } catch (error) { + devWarn("Error during microphone auto-restoration:", error); + } + } + }; + + // Add a delay to ensure RTC connection is fully established + const timer = setTimeout(autoRestoreMicrophone, 1000); + return () => clearTimeout(timer); + }, [syncMicrophoneState, microphoneWasEnabled, isMicrophoneActive, peerConnection, startMicrophone, rpcDataChannel?.readyState]); + + // Handle audio device changes (USB audio enable/disable) via WebSocket events + const handleAudioDeviceChanged = useCallback((data: AudioDeviceChangedData) => { + devInfo("Audio device changed:", data); + devInfo("Current microphone state:", { isMicrophoneActive, microphoneWasEnabled }); + + // USB audio was just disabled + if (!data.enabled && data.reason === "usb_reconfiguration") { + devInfo(`USB audio disabled via device change event - microphone was ${isMicrophoneActive ? 'active' : 'inactive'}`); + + // The microphoneWasEnabled flag is already being managed by the microphone start/stop functions + // We don't need to do anything special here - it will be preserved for restoration + devInfo(`Current microphoneWasEnabled flag: ${microphoneWasEnabled}`); + } + + // USB audio was just re-enabled + else if (data.enabled && data.reason === "usb_reconfiguration") { + devInfo("USB audio re-enabled via device change event - checking if microphone should be restored"); + devInfo(`microphoneWasEnabled: ${microphoneWasEnabled}`); + devInfo(`Current microphone active: ${isMicrophoneActive}`); + devInfo(`RPC ready: ${rpcDataChannel?.readyState === "open"}`); + + // If microphone was enabled before (using the same logic as page reload restore), restore it + if (microphoneWasEnabled && !isMicrophoneActive && rpcDataChannel?.readyState === "open") { + devInfo("Restoring microphone after USB audio re-enabled (using microphoneWasEnabled flag)"); + setTimeout(async () => { + try { + const result = await startMicrophone(); + if (result.success) { + devInfo("Microphone successfully restored after USB audio re-enable"); + } else { + devWarn("Failed to restore microphone after USB audio re-enable:", result.error); + } + } catch (error) { + devWarn("Error restoring microphone after USB audio re-enable:", error); + } + }, 500); // Small delay to ensure USB device reconfiguration is complete + } else { + devInfo("Not restoring microphone - conditions not met or microphone was not previously enabled"); + } + } + }, [isMicrophoneActive, microphoneWasEnabled, startMicrophone, rpcDataChannel?.readyState]); + + // Subscribe to audio device change events + useAudioEvents(handleAudioDeviceChanged); + + // Cleanup on unmount - use ref to avoid dependency on stopMicrophoneStream + useEffect(() => { + return () => { + // Clean up stream directly without depending on the callback + const stream = microphoneStreamRef.current; + if (stream) { + stream.getAudioTracks().forEach((track: MediaStreamTrack) => { + track.stop(); + }); + microphoneStreamRef.current = null; + } + }; + }, []); // No dependencies to prevent re-running + + return { + isMicrophoneActive, + isMicrophoneMuted, + microphoneStream, + startMicrophone, + stopMicrophone, + toggleMicrophoneMute, + + // Expose debounced variants for UI handlers + startMicrophoneDebounced, + stopMicrophoneDebounced, + // Expose sync and loading flags for consumers that expect them + syncMicrophoneState, + isStarting, + isStopping, + isToggling, + + // HTTP/HTTPS detection + isHttpsRequired: isHttpsRequired(), + }; +} \ No newline at end of file diff --git a/ui/src/hooks/useUsbDeviceConfig.ts b/ui/src/hooks/useUsbDeviceConfig.ts new file mode 100644 index 00000000..41e09ae9 --- /dev/null +++ b/ui/src/hooks/useUsbDeviceConfig.ts @@ -0,0 +1,60 @@ +import { useCallback, useEffect, useState } from "react"; + +import { devError } from '../utils/debug'; + +import { JsonRpcResponse, useJsonRpc } from "./useJsonRpc"; +import { useAudioEvents } from "./useAudioEvents"; + +export interface UsbDeviceConfig { + keyboard: boolean; + absolute_mouse: boolean; + relative_mouse: boolean; + mass_storage: boolean; + audio: boolean; +} + +export function useUsbDeviceConfig() { + const { send } = useJsonRpc(); + const [usbDeviceConfig, setUsbDeviceConfig] = useState(null); + const [loading, setLoading] = useState(true); + const [error, setError] = useState(null); + + const fetchUsbDeviceConfig = useCallback(() => { + setLoading(true); + setError(null); + + send("getUsbDevices", {}, (resp: JsonRpcResponse) => { + setLoading(false); + + if ("error" in resp) { + devError("Failed to load USB devices:", resp.error); + setError(resp.error.data || "Unknown error"); + setUsbDeviceConfig(null); + } else { + const config = resp.result as UsbDeviceConfig; + setUsbDeviceConfig(config); + setError(null); + } + }); + }, [send]); + + // Listen for audio device changes to update USB config in real-time + const handleAudioDeviceChanged = useCallback(() => { + // Audio device changed, refetching USB config + fetchUsbDeviceConfig(); + }, [fetchUsbDeviceConfig]); + + // Subscribe to audio events for real-time updates + useAudioEvents(handleAudioDeviceChanged); + + useEffect(() => { + fetchUsbDeviceConfig(); + }, [fetchUsbDeviceConfig]); + + return { + usbDeviceConfig, + loading, + error, + refetch: fetchUsbDeviceConfig, + }; +} \ No newline at end of file diff --git a/ui/src/main.tsx b/ui/src/main.tsx index 79ca6717..7dd0e0a3 100644 --- a/ui/src/main.tsx +++ b/ui/src/main.tsx @@ -10,9 +10,6 @@ import { } from "react-router"; import { ExclamationTriangleIcon } from "@heroicons/react/16/solid"; -import { CLOUD_API, DEVICE_API } from "@/ui.config"; -import api from "@/api"; -import Root from "@/root"; import Card from "@components/Card"; import EmptyCard from "@components/EmptyCard"; import NotFoundPage from "@components/NotFoundPage"; @@ -28,6 +25,9 @@ import DeviceIdRename from "@routes/devices.$id.rename"; import DevicesRoute from "@routes/devices"; import SettingsIndexRoute from "@routes/devices.$id.settings._index"; import SettingsAccessIndexRoute from "@routes/devices.$id.settings.access._index"; +import Root from "@/root"; +import api from "@/api"; +import { CLOUD_API, DEVICE_API } from "@/ui.config"; import Notifications from "@/notifications"; const SignupRoute = lazy(() => import("@routes/signup")); const LoginRoute = lazy(() => import("@routes/login")); diff --git a/ui/src/routes/devices.$id.deregister.tsx b/ui/src/routes/devices.$id.deregister.tsx index e5dd2a35..69c0d434 100644 --- a/ui/src/routes/devices.$id.deregister.tsx +++ b/ui/src/routes/devices.$id.deregister.tsx @@ -6,9 +6,9 @@ import { Button, LinkButton } from "@components/Button"; import Card from "@components/Card"; import { CardHeader } from "@components/CardHeader"; import DashboardNavbar from "@components/Header"; +import Fieldset from "@components/Fieldset"; import { User } from "@/hooks/stores"; import { checkAuth } from "@/main"; -import Fieldset from "@components/Fieldset"; import { CLOUD_API } from "@/ui.config"; interface LoaderData { diff --git a/ui/src/routes/devices.$id.mount.tsx b/ui/src/routes/devices.$id.mount.tsx index bc29c455..152ff3c6 100644 --- a/ui/src/routes/devices.$id.mount.tsx +++ b/ui/src/routes/devices.$id.mount.tsx @@ -9,12 +9,12 @@ import { PlusCircleIcon, ExclamationTriangleIcon } from "@heroicons/react/20/sol import { TrashIcon } from "@heroicons/react/16/solid"; import { useNavigate } from "react-router"; -import Card, { GridCard } from "@/components/Card"; import { Button } from "@components/Button"; +import AutoHeight from "@components/AutoHeight"; +import Card, { GridCard } from "@/components/Card"; import LogoBlueIcon from "@/assets/logo-blue.svg"; import LogoWhiteIcon from "@/assets/logo-white.svg"; import { formatters } from "@/utils"; -import AutoHeight from "@components/AutoHeight"; import { InputFieldWithLabel } from "@/components/InputField"; import DebianIcon from "@/assets/debian-icon.png"; import UbuntuIcon from "@/assets/ubuntu-icon.png"; @@ -25,16 +25,17 @@ import NetBootIcon from "@/assets/netboot-icon.svg"; import Fieldset from "@/components/Fieldset"; import { DEVICE_API } from "@/ui.config"; -import { JsonRpcResponse, useJsonRpc } from "../hooks/useJsonRpc"; -import notifications from "../notifications"; -import { isOnDevice } from "../main"; -import { cx } from "../cva.config"; import { MountMediaState, RemoteVirtualMediaState, useMountMediaStore, useRTCStore, } from "../hooks/stores"; +import { cx } from "../cva.config"; +import { isOnDevice } from "../main"; +import notifications from "../notifications"; +import { JsonRpcResponse, useJsonRpc } from "../hooks/useJsonRpc"; + export default function MountRoute() { const navigate = useNavigate(); diff --git a/ui/src/routes/devices.$id.other-session.tsx b/ui/src/routes/devices.$id.other-session.tsx index 8a767d51..284d0711 100644 --- a/ui/src/routes/devices.$id.other-session.tsx +++ b/ui/src/routes/devices.$id.other-session.tsx @@ -1,7 +1,7 @@ import { useNavigate, useOutletContext } from "react-router"; -import { GridCard } from "@/components/Card"; import { Button } from "@components/Button"; +import { GridCard } from "@/components/Card"; import LogoBlue from "@/assets/logo-blue.svg"; import LogoWhite from "@/assets/logo-white.svg"; diff --git a/ui/src/routes/devices.$id.rename.tsx b/ui/src/routes/devices.$id.rename.tsx index 39f06bcf..c07601cc 100644 --- a/ui/src/routes/devices.$id.rename.tsx +++ b/ui/src/routes/devices.$id.rename.tsx @@ -7,13 +7,14 @@ import Card from "@components/Card"; import { CardHeader } from "@components/CardHeader"; import { InputFieldWithLabel } from "@components/InputField"; import DashboardNavbar from "@components/Header"; +import Fieldset from "@components/Fieldset"; import { User } from "@/hooks/stores"; import { checkAuth } from "@/main"; -import Fieldset from "@components/Fieldset"; import { CLOUD_API } from "@/ui.config"; import api from "../api"; + interface LoaderData { device: { id: string; name: string; user: { googleId: string } }; user: User; diff --git a/ui/src/routes/devices.$id.settings.general.reboot.tsx b/ui/src/routes/devices.$id.settings.general.reboot.tsx index db0e0530..4cc7d836 100644 --- a/ui/src/routes/devices.$id.settings.general.reboot.tsx +++ b/ui/src/routes/devices.$id.settings.general.reboot.tsx @@ -1,8 +1,8 @@ import { useNavigate } from "react-router"; import { useCallback } from "react"; -import { useJsonRpc } from "@/hooks/useJsonRpc"; import { Button } from "@components/Button"; +import { useJsonRpc } from "@/hooks/useJsonRpc"; export default function SettingsGeneralRebootRoute() { const navigate = useNavigate(); diff --git a/ui/src/routes/devices.$id.settings.general.update.tsx b/ui/src/routes/devices.$id.settings.general.update.tsx index 38c15412..72c864dd 100644 --- a/ui/src/routes/devices.$id.settings.general.update.tsx +++ b/ui/src/routes/devices.$id.settings.general.update.tsx @@ -2,9 +2,9 @@ import { useLocation, useNavigate } from "react-router"; import { useCallback, useEffect, useRef, useState } from "react"; import { CheckCircleIcon } from "@heroicons/react/20/solid"; +import { Button } from "@components/Button"; import Card from "@/components/Card"; import { useJsonRpc } from "@/hooks/useJsonRpc"; -import { Button } from "@components/Button"; import { UpdateState, useUpdateStore } from "@/hooks/stores"; import LoadingSpinner from "@/components/LoadingSpinner"; import { useDeviceUiNavigation } from "@/hooks/useAppNavigation"; diff --git a/ui/src/routes/devices.$id.setup.tsx b/ui/src/routes/devices.$id.setup.tsx index 2fd65f50..7814bbb4 100644 --- a/ui/src/routes/devices.$id.setup.tsx +++ b/ui/src/routes/devices.$id.setup.tsx @@ -13,6 +13,7 @@ import { CLOUD_API } from "@/ui.config"; import api from "../api"; + const loader: LoaderFunction = async ({ params }: LoaderFunctionArgs) => { await checkAuth(); const res = await fetch(`${CLOUD_API}/devices/${params.id}`, { diff --git a/ui/src/routes/devices.$id.tsx b/ui/src/routes/devices.$id.tsx index a1ace077..183a4ad5 100644 --- a/ui/src/routes/devices.$id.tsx +++ b/ui/src/routes/devices.$id.tsx @@ -15,6 +15,9 @@ import { FocusTrap } from "focus-trap-react"; import { motion, AnimatePresence } from "framer-motion"; import useWebSocket from "react-use-websocket"; +import WebRTCVideo from "@components/WebRTCVideo"; +import DashboardNavbar from "@components/Header"; +import { DeviceStatus } from "@routes/welcome-local"; import { CLOUD_API, DEVICE_API } from "@/ui.config"; import api from "@/api"; import { checkAuth, isInCloud, isOnDevice } from "@/main"; @@ -34,11 +37,8 @@ import { useVideoStore, VideoState, } from "@/hooks/stores"; -import WebRTCVideo from "@components/WebRTCVideo"; -import DashboardNavbar from "@components/Header"; -const ConnectionStatsSidebar = lazy(() => import('@/components/sidebar/connectionStats')); -const Terminal = lazy(() => import('@components/Terminal')); -const UpdateInProgressStatusCard = lazy(() => import("@/components/UpdateInProgressStatusCard")); +import { useMicrophone } from "@/hooks/useMicrophone"; +import { useAudioEvents } from "@/hooks/useAudioEvents"; import Modal from "@/components/Modal"; import { JsonRpcRequest, JsonRpcResponse, RpcMethodNotFound, useJsonRpc } from "@/hooks/useJsonRpc"; import { @@ -48,9 +48,12 @@ import { } from "@/components/VideoOverlay"; import { useDeviceUiNavigation } from "@/hooks/useAppNavigation"; import { FeatureFlagProvider } from "@/providers/FeatureFlagProvider"; -import { DeviceStatus } from "@routes/welcome-local"; import { useVersion } from "@/hooks/useVersion"; +const ConnectionStatsSidebar = lazy(() => import('@/components/sidebar/connectionStats')); +const Terminal = lazy(() => import('@components/Terminal')); +const UpdateInProgressStatusCard = lazy(() => import("@/components/UpdateInProgressStatusCard")); + interface LocalLoaderResp { authMode: "password" | "noPassword" | null; } @@ -139,6 +142,7 @@ export default function KvmIdRoute() { } = useRTCStore(); const location = useLocation(); + const isLegacySignalingEnabled = useRef(false); const [connectionFailed, setConnectionFailed] = useState(false); @@ -471,11 +475,32 @@ export default function KvmIdRoute() { } }; - pc.ontrack = function (event) { - setMediaStream(event.streams[0]); + pc.ontrack = function (event: RTCTrackEvent) { + // Handle separate MediaStreams for audio and video tracks + const track = event.track; + const streams = event.streams; + + if (streams && streams.length > 0) { + // Get existing MediaStream or create a new one + const existingStream = useRTCStore.getState().mediaStream; + let combinedStream: MediaStream; + + if (existingStream) { + combinedStream = existingStream; + // Add the new track to the existing stream + combinedStream.addTrack(track); + } else { + // Create a new MediaStream with the track + combinedStream = new MediaStream([track]); + } + + setMediaStream(combinedStream); + } }; setTransceiver(pc.addTransceiver("video", { direction: "recvonly" })); + // Add audio transceiver to receive audio from the server and send microphone audio + pc.addTransceiver("audio", { direction: "sendrecv" }); const rpcDataChannel = pc.createDataChannel("rpc"); rpcDataChannel.onopen = () => { @@ -670,6 +695,25 @@ export default function KvmIdRoute() { const { send } = useJsonRpc(onJsonRpcRequest); + // Initialize microphone hook + const microphoneHook = useMicrophone(); + const { syncMicrophoneState } = microphoneHook; + + // Handle audio device changes to sync microphone state + const handleAudioDeviceChanged = useCallback((data: { enabled: boolean; reason: string }) => { + console.log('[AudioDeviceChanged] Audio device changed:', data); + // Sync microphone state when audio device configuration changes + // This ensures the microphone state is properly synchronized after USB audio reconfiguration + if (syncMicrophoneState) { + setTimeout(() => { + syncMicrophoneState(); + }, 500); // Small delay to ensure backend state is settled + } + }, [syncMicrophoneState]); + + // Use audio events hook with device change handler + useAudioEvents(handleAudioDeviceChanged); + useEffect(() => { if (rpcDataChannel?.readyState !== "open") return; console.log("Requesting video state"); @@ -841,7 +885,7 @@ export default function KvmIdRoute() { />
- +
)} +
diff --git a/ui/src/routes/devices.already-adopted.tsx b/ui/src/routes/devices.already-adopted.tsx index ee189a8a..81a47f7d 100644 --- a/ui/src/routes/devices.already-adopted.tsx +++ b/ui/src/routes/devices.already-adopted.tsx @@ -1,7 +1,7 @@ +import GridBackground from "@components/GridBackground"; import { LinkButton } from "@/components/Button"; import SimpleNavbar from "@/components/SimpleNavbar"; import Container from "@/components/Container"; -import GridBackground from "@components/GridBackground"; export default function DevicesAlreadyAdopted() { return ( diff --git a/ui/src/routes/login-local.tsx b/ui/src/routes/login-local.tsx index 5fab7e6e..4f4c05b3 100644 --- a/ui/src/routes/login-local.tsx +++ b/ui/src/routes/login-local.tsx @@ -18,6 +18,9 @@ import ExtLink from "../components/ExtLink"; import { DeviceStatus } from "./welcome-local"; + + + const loader: LoaderFunction = async () => { const res = await api .GET(`${DEVICE_API}/device/status`) diff --git a/ui/src/routes/welcome-local.mode.tsx b/ui/src/routes/welcome-local.mode.tsx index 8d1a808b..f2fd9cce 100644 --- a/ui/src/routes/welcome-local.mode.tsx +++ b/ui/src/routes/welcome-local.mode.tsx @@ -5,9 +5,9 @@ import { useState } from "react"; import GridBackground from "@components/GridBackground"; import Container from "@components/Container"; import { Button } from "@components/Button"; -import LogoBlueIcon from "@/assets/logo-blue.png"; -import LogoWhiteIcon from "@/assets/logo-white.svg"; import { DEVICE_API } from "@/ui.config"; +import LogoWhiteIcon from "@/assets/logo-white.svg"; +import LogoBlueIcon from "@/assets/logo-blue.png"; import { GridCard } from "../components/Card"; import { cx } from "../cva.config"; @@ -15,6 +15,7 @@ import api from "../api"; import { DeviceStatus } from "./welcome-local"; + const loader: LoaderFunction = async () => { const res = await api .GET(`${DEVICE_API}/device/status`) diff --git a/ui/src/routes/welcome-local.password.tsx b/ui/src/routes/welcome-local.password.tsx index d0b7c7a9..7d80a5e6 100644 --- a/ui/src/routes/welcome-local.password.tsx +++ b/ui/src/routes/welcome-local.password.tsx @@ -16,6 +16,8 @@ import api from "../api"; import { DeviceStatus } from "./welcome-local"; + + const loader: LoaderFunction = async () => { const res = await api .GET(`${DEVICE_API}/device/status`) diff --git a/ui/src/routes/welcome-local.tsx b/ui/src/routes/welcome-local.tsx index d7ff117e..6fd4e78b 100644 --- a/ui/src/routes/welcome-local.tsx +++ b/ui/src/routes/welcome-local.tsx @@ -14,6 +14,7 @@ import { DEVICE_API } from "@/ui.config"; import api from "../api"; + export interface DeviceStatus { isSetup: boolean; } diff --git a/ui/src/utils/debug.ts b/ui/src/utils/debug.ts new file mode 100644 index 00000000..916ae010 --- /dev/null +++ b/ui/src/utils/debug.ts @@ -0,0 +1,64 @@ +/** + * Debug utilities for development mode logging + */ + +// Check if we're in development mode +const isDevelopment = import.meta.env.DEV || import.meta.env.MODE === 'development'; + +/** + * Development-only console.log wrapper + * Only logs in development mode, silent in production + */ +export const devLog = (...args: unknown[]): void => { + if (isDevelopment) { + console.log(...args); + } +}; + +/** + * Development-only console.info wrapper + * Only logs in development mode, silent in production + */ +export const devInfo = (...args: unknown[]): void => { + if (isDevelopment) { + console.info(...args); + } +}; + +/** + * Development-only console.warn wrapper + * Only logs in development mode, silent in production + */ +export const devWarn = (...args: unknown[]): void => { + if (isDevelopment) { + console.warn(...args); + } +}; + +/** + * Development-only console.error wrapper + * Always logs errors, but with dev prefix in development + */ +export const devError = (...args: unknown[]): void => { + if (isDevelopment) { + console.error('[DEV]', ...args); + } else { + console.error(...args); + } +}; + +/** + * Development-only debug function wrapper + * Only executes the function in development mode + */ +export const devOnly = (fn: () => T): T | undefined => { + if (isDevelopment) { + return fn(); + } + return undefined; +}; + +/** + * Check if we're in development mode + */ +export const isDevMode = (): boolean => isDevelopment; \ No newline at end of file diff --git a/ui/vite.config.ts b/ui/vite.config.ts index 13b2da02..9aa0fca1 100644 --- a/ui/vite.config.ts +++ b/ui/vite.config.ts @@ -17,11 +17,7 @@ export default defineConfig(({ mode, command }) => { const { JETKVM_PROXY_URL, USE_SSL } = process.env; const useSSL = USE_SSL === "true"; - const plugins = [ - tailwindcss(), - tsconfigPaths(), - react() - ]; + const plugins = [tailwindcss(), tsconfigPaths(), react()]; if (useSSL) { plugins.push(basicSsl()); } @@ -60,6 +56,8 @@ export default defineConfig(({ mode, command }) => { "/storage": JETKVM_PROXY_URL, "/cloud": JETKVM_PROXY_URL, "/developer": JETKVM_PROXY_URL, + "/microphone": JETKVM_PROXY_URL, + "/audio": JETKVM_PROXY_URL, } : undefined, }, diff --git a/web.go b/web.go index 45253579..66e697fa 100644 --- a/web.go +++ b/web.go @@ -20,6 +20,7 @@ import ( gin_logger "github.com/gin-contrib/logger" "github.com/gin-gonic/gin" "github.com/google/uuid" + "github.com/jetkvm/kvm/internal/audio" "github.com/jetkvm/kvm/internal/logging" "github.com/pion/webrtc/v4" "github.com/prometheus/client_golang/prometheus" @@ -233,6 +234,16 @@ func handleWebRTCSession(c *gin.Context) { cancelKeyboardMacro() currentSession = session + + // Set up audio relay callback to get current session's audio track + // This is needed for audio output to work after enable/disable cycles + audio.SetCurrentSessionCallback(func() audio.AudioTrackWriter { + if currentSession != nil { + return currentSession.AudioTrack + } + return nil + }) + c.JSON(http.StatusOK, gin.H{"sd": sd}) } @@ -458,6 +469,10 @@ func handleWebRTCSignalWsMessages( if err = currentSession.peerConnection.AddICECandidate(candidate); err != nil { l.Warn().Str("error", err.Error()).Msg("failed to add incoming ICE candidate to our peer connection") } + } else if message.Type == "subscribe-audio-events" { + handleSubscribeAudioEvents(connectionID, wsCon, runCtx, &l) + } else if message.Type == "unsubscribe-audio-events" { + handleUnsubscribeAudioEvents(connectionID, &l) } } } diff --git a/webrtc.go b/webrtc.go index a0a8473b..09677e21 100644 --- a/webrtc.go +++ b/webrtc.go @@ -4,7 +4,9 @@ import ( "context" "encoding/base64" "encoding/json" + "fmt" "net" + "runtime" "strings" "sync" "time" @@ -12,6 +14,7 @@ import ( "github.com/coder/websocket" "github.com/coder/websocket/wsjson" "github.com/gin-gonic/gin" + "github.com/jetkvm/kvm/internal/audio" "github.com/jetkvm/kvm/internal/hidrpc" "github.com/jetkvm/kvm/internal/logging" "github.com/jetkvm/kvm/internal/usbgadget" @@ -22,10 +25,18 @@ import ( type Session struct { peerConnection *webrtc.PeerConnection VideoTrack *webrtc.TrackLocalStaticSample + AudioTrack *webrtc.TrackLocalStaticSample + AudioRtpSender *webrtc.RTPSender ControlChannel *webrtc.DataChannel RPCChannel *webrtc.DataChannel HidChannel *webrtc.DataChannel + DiskChannel *webrtc.DataChannel + AudioInputManager *audio.AudioInputManager shouldUmountVirtualMedia bool + micCooldown time.Duration + audioFrameChan chan []byte + audioStopChan chan struct{} + audioWg sync.WaitGroup rpcQueue chan webrtc.DataChannelMessage @@ -218,7 +229,17 @@ func newSession(config SessionConfig) (*Session, error) { return nil, err } - session := &Session{peerConnection: peerConnection} + session := &Session{ + peerConnection: peerConnection, + AudioInputManager: audio.NewAudioInputManager(), + micCooldown: 100 * time.Millisecond, + audioFrameChan: make(chan []byte, 1000), + audioStopChan: make(chan struct{}), + } + + // Start audio processing goroutine + session.startAudioProcessor(*logger) + session.rpcQueue = make(chan webrtc.DataChannelMessage, 256) session.initQueues() session.initKeysDownStateQueue() @@ -272,29 +293,79 @@ func newSession(config SessionConfig) (*Session, error) { } }) - session.VideoTrack, err = webrtc.NewTrackLocalStaticSample(webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeH264}, "video", "kvm") + session.VideoTrack, err = webrtc.NewTrackLocalStaticSample(webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeH264}, "video", "kvm-video") if err != nil { scopedLogger.Warn().Err(err).Msg("Failed to create VideoTrack") return nil, err } - rtpSender, err := peerConnection.AddTrack(session.VideoTrack) + session.AudioTrack, err = webrtc.NewTrackLocalStaticSample(webrtc.RTPCodecCapability{MimeType: webrtc.MimeTypeOpus}, "audio", "kvm-audio") if err != nil { scopedLogger.Warn().Err(err).Msg("Failed to add VideoTrack to PeerConnection") return nil, err } + // Update the audio relay with the new WebRTC audio track asynchronously + // This prevents blocking during session creation and avoids mutex deadlocks + audio.UpdateAudioRelayTrackAsync(session.AudioTrack) + + videoRtpSender, err := peerConnection.AddTrack(session.VideoTrack) + if err != nil { + return nil, err + } + + // Add bidirectional audio transceiver for microphone input + audioTransceiver, err := peerConnection.AddTransceiverFromTrack(session.AudioTrack, webrtc.RTPTransceiverInit{ + Direction: webrtc.RTPTransceiverDirectionSendrecv, + }) + if err != nil { + return nil, err + } + audioRtpSender := audioTransceiver.Sender() + session.AudioRtpSender = audioRtpSender + + // Handle incoming audio track (microphone from browser) + peerConnection.OnTrack(func(track *webrtc.TrackRemote, receiver *webrtc.RTPReceiver) { + scopedLogger.Info().Str("codec", track.Codec().MimeType).Str("id", track.ID()).Msg("Got remote track") + + if track.Kind() == webrtc.RTPCodecTypeAudio && track.Codec().MimeType == webrtc.MimeTypeOpus { + scopedLogger.Info().Msg("Processing incoming audio track for microphone input") + + go func() { + // Lock to OS thread to isolate RTP processing + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + for { + rtpPacket, _, err := track.ReadRTP() + if err != nil { + scopedLogger.Debug().Err(err).Msg("Error reading RTP packet from audio track") + return + } + + // Extract Opus payload from RTP packet + opusPayload := rtpPacket.Payload + if len(opusPayload) > 0 { + // Send to buffered channel for processing + select { + case session.audioFrameChan <- opusPayload: + // Frame sent successfully + default: + // Channel is full, drop the frame + scopedLogger.Warn().Msg("Audio frame channel full, dropping frame") + } + } + } + }() + } + }) + // Read incoming RTCP packets // Before these packets are returned they are processed by interceptors. For things // like NACK this needs to be called. - go func() { - rtcpBuf := make([]byte, 1500) - for { - if _, _, rtcpErr := rtpSender.Read(rtcpBuf); rtcpErr != nil { - return - } - } - }() + go drainRtpSender(videoRtpSender) + go drainRtpSender(audioRtpSender) + var isConnected bool peerConnection.OnICECandidate(func(candidate *webrtc.ICECandidate) { @@ -351,6 +422,11 @@ func newSession(config SessionConfig) (*Session, error) { scopedLogger.Warn().Err(err).Msg("unmount image failed on connection close") } } + // Stop audio processing and input manager + session.stopAudioProcessor() + if session.AudioInputManager != nil { + session.AudioInputManager.Stop() + } if isConnected { isConnected = false actionSessions-- @@ -364,6 +440,72 @@ func newSession(config SessionConfig) (*Session, error) { return session, nil } +// startAudioProcessor starts the dedicated audio processing goroutine +func (s *Session) startAudioProcessor(logger zerolog.Logger) { + s.audioWg.Add(1) + go func() { + defer s.audioWg.Done() + logger.Debug().Msg("Audio processor goroutine started") + + for { + select { + case frame := <-s.audioFrameChan: + if s.AudioInputManager != nil { + // Check if audio input manager is ready before processing frames + if s.AudioInputManager.IsReady() { + err := s.AudioInputManager.WriteOpusFrame(frame) + if err != nil { + logger.Warn().Err(err).Msg("Failed to write Opus frame to audio input manager") + } + } else { + // Audio input manager not ready, drop frame silently + // This prevents the "client not connected" errors during startup + logger.Debug().Msg("Audio input manager not ready, dropping frame") + } + } + case <-s.audioStopChan: + logger.Debug().Msg("Audio processor goroutine stopping") + return + } + } + }() +} + +// stopAudioProcessor stops the audio processing goroutine +func (s *Session) stopAudioProcessor() { + close(s.audioStopChan) + s.audioWg.Wait() +} + +// ReplaceAudioTrack replaces the current audio track with a new one +func (s *Session) ReplaceAudioTrack(newTrack *webrtc.TrackLocalStaticSample) error { + if s.AudioRtpSender == nil { + return fmt.Errorf("audio RTP sender not available") + } + + // Replace the track using the RTP sender + if err := s.AudioRtpSender.ReplaceTrack(newTrack); err != nil { + return fmt.Errorf("failed to replace audio track: %w", err) + } + + // Update the session's audio track reference + s.AudioTrack = newTrack + return nil +} + +func drainRtpSender(rtpSender *webrtc.RTPSender) { + // Lock to OS thread to isolate RTCP processing + runtime.LockOSThread() + defer runtime.UnlockOSThread() + + rtcpBuf := make([]byte, 1500) + for { + if _, _, err := rtpSender.Read(rtcpBuf); err != nil { + return + } + } +} + var actionSessions = 0 func onActiveSessionsChanged() {