feat: ocr

This commit is contained in:
Siyuan Miao 2025-09-10 14:10:02 +02:00
parent bcc307b147
commit 598fd966dd
5 changed files with 79 additions and 12 deletions

View File

@ -4,7 +4,7 @@
"version": "2025.09.03.2100", "version": "2025.09.03.2100",
"type": "module", "type": "module",
"engines": { "engines": {
"node": "22.15.0" "node": "^22.15.0"
}, },
"scripts": { "scripts": {
"dev": "./dev_device.sh", "dev": "./dev_device.sh",
@ -78,6 +78,7 @@
"prettier": "^3.6.2", "prettier": "^3.6.2",
"prettier-plugin-tailwindcss": "^0.6.14", "prettier-plugin-tailwindcss": "^0.6.14",
"tailwindcss": "^4.1.12", "tailwindcss": "^4.1.12",
"tesseract.js": "6.0.1",
"typescript": "^5.9.2", "typescript": "^5.9.2",
"vite": "^7.1.4", "vite": "^7.1.4",
"vite-tsconfig-paths": "^5.1.4" "vite-tsconfig-paths": "^5.1.4"

View File

@ -1,4 +1,4 @@
import { MdOutlineContentPasteGo } from "react-icons/md"; import { MdOutlineContentPasteGo, MdOutlineDocumentScanner } from "react-icons/md";
import { LuCable, LuHardDrive, LuMaximize, LuSettings, LuSignal } from "react-icons/lu"; import { LuCable, LuHardDrive, LuMaximize, LuSettings, LuSignal } from "react-icons/lu";
import { FaKeyboard } from "react-icons/fa6"; import { FaKeyboard } from "react-icons/fa6";
import { Popover, PopoverButton, PopoverPanel } from "@headlessui/react"; import { Popover, PopoverButton, PopoverPanel } from "@headlessui/react";
@ -19,11 +19,14 @@ import WakeOnLanModal from "@/components/popovers/WakeOnLan/Index";
import MountPopopover from "@/components/popovers/MountPopover"; import MountPopopover from "@/components/popovers/MountPopover";
import ExtensionPopover from "@/components/popovers/ExtensionPopover"; import ExtensionPopover from "@/components/popovers/ExtensionPopover";
import { useDeviceUiNavigation } from "@/hooks/useAppNavigation"; import { useDeviceUiNavigation } from "@/hooks/useAppNavigation";
import OCRModal from "./popovers/OCRModal";
export default function Actionbar({ export default function Actionbar({
requestFullscreen, requestFullscreen,
videoElmRef,
}: { }: {
requestFullscreen: () => Promise<void>; requestFullscreen: () => Promise<void>;
videoElmRef?: React.RefObject<HTMLVideoElement | null>;
}) { }) {
const { navigateTo } = useDeviceUiNavigation(); const { navigateTo } = useDeviceUiNavigation();
const { isVirtualKeyboardEnabled, setVirtualKeyboardEnabled } = useHidStore(); const { isVirtualKeyboardEnabled, setVirtualKeyboardEnabled } = useHidStore();
@ -99,6 +102,36 @@ export default function Actionbar({
}} }}
</PopoverPanel> </PopoverPanel>
</Popover> </Popover>
<Popover>
<PopoverButton as={Fragment}>
<Button
size="XS"
theme="light"
text="OCR"
LeadingIcon={MdOutlineDocumentScanner}
onClick={() => {
setDisableVideoFocusTrap(true);
}}
/>
</PopoverButton>
<PopoverPanel
anchor="bottom start"
transition
className={cx(
"z-10 flex w-[420px] origin-top flex-col overflow-visible!",
"flex origin-top flex-col transition duration-300 ease-out data-closed:translate-y-8 data-closed:opacity-0",
)}
>
{({ open }) => {
checkIfStateChanged(open);
return (
<div className="mx-auto w-full max-w-xl">
<OCRModal videoElmRef={videoElmRef} />
</div>
);
}}
</PopoverPanel>
</Popover>
<div className="relative"> <div className="relative">
<Popover> <Popover>
<PopoverButton as={Fragment}> <PopoverButton as={Fragment}>

View File

@ -487,7 +487,7 @@ export default function WebRTCVideo() {
disabled={peerConnection?.connectionState !== "connected"} disabled={peerConnection?.connectionState !== "connected"}
className="contents" className="contents"
> >
<Actionbar requestFullscreen={requestFullscreen} /> <Actionbar requestFullscreen={requestFullscreen} videoElmRef={videoElm} />
<MacroBar /> <MacroBar />
</fieldset> </fieldset>
</div> </div>

23
ui/src/hooks/useOCR.ts Normal file
View File

@ -0,0 +1,23 @@
import { type WorkerOptions } from "tesseract.js";
export type ImageLike = string | HTMLImageElement | HTMLCanvasElement | HTMLVideoElement
| CanvasRenderingContext2D | File | Blob | OffscreenCanvas;
// tesseract.js is h
async function ocrImage(
language: string | string[],
image: ImageLike,
options?: Partial<WorkerOptions>,
) {
const { createWorker } = await import('tesseract.js')
const worker = await createWorker(language, undefined, options)
const { data: { text } } = await worker.recognize(image)
await worker.terminate()
return text
}
export default function useOCR() {
return {
ocrImage,
}
}

View File

@ -31,20 +31,30 @@ export default defineConfig(({ mode, command }) => {
esbuild: { esbuild: {
pure: ["console.debug"], pure: ["console.debug"],
}, },
build: { outDir: isCloud ? "dist" : "../static" }, build: {
outDir: isCloud ? "dist" : "../static",
rollupOptions: {
external: ["tesseract.js"],
output: {
paths: {
"tesseract.js": "https://cdn.jsdelivr.net/npm/tesseract.js@6.0.1/dist/tesseract.esm.min.js",
},
},
},
},
server: { server: {
host: "0.0.0.0", host: "0.0.0.0",
https: useSSL, https: useSSL,
proxy: JETKVM_PROXY_URL proxy: JETKVM_PROXY_URL
? { ? {
"/me": JETKVM_PROXY_URL, "/me": JETKVM_PROXY_URL,
"/device": JETKVM_PROXY_URL, "/device": JETKVM_PROXY_URL,
"/webrtc": JETKVM_PROXY_URL, "/webrtc": JETKVM_PROXY_URL,
"/auth": JETKVM_PROXY_URL, "/auth": JETKVM_PROXY_URL,
"/storage": JETKVM_PROXY_URL, "/storage": JETKVM_PROXY_URL,
"/cloud": JETKVM_PROXY_URL, "/cloud": JETKVM_PROXY_URL,
"/developer": JETKVM_PROXY_URL, "/developer": JETKVM_PROXY_URL,
} }
: undefined, : undefined,
}, },
base: onDevice && command === "build" ? "/static" : "/", base: onDevice && command === "build" ? "/static" : "/",