diff --git a/package-lock.json b/package-lock.json index e0fa5a2..f4264ec 100644 --- a/package-lock.json +++ b/package-lock.json @@ -14,6 +14,7 @@ "@thepassle/app-tools": "^0.9.12", "@xenova/transformers": "^2.17.1", "lit": "^3.1.3", + "marked": "^12.0.2", "onnxruntime-web": "^1.18.0", "urlpattern-polyfill": "^10.0.0", "workbox-build": "^7.1.0", @@ -3971,6 +3972,17 @@ "sourcemap-codec": "^1.4.8" } }, + "node_modules/marked": { + "version": "12.0.2", + "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.2.tgz", + "integrity": "sha512-qXUm7e/YKFoqFPYPa3Ukg9xlI5cyAtGmyEIzMfW//m6kXwCy2Ps9DYf5ioijFKQ8qyuscrHoY04iJGctu2Kg0Q==", + "bin": { + "marked": "bin/marked.js" + }, + "engines": { + "node": ">= 18" + } + }, "node_modules/merge2": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", @@ -8197,6 +8209,11 @@ "sourcemap-codec": "^1.4.8" } }, + "marked": { + "version": "12.0.2", + "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.2.tgz", + "integrity": "sha512-qXUm7e/YKFoqFPYPa3Ukg9xlI5cyAtGmyEIzMfW//m6kXwCy2Ps9DYf5ioijFKQ8qyuscrHoY04iJGctu2Kg0Q==" + }, "merge2": { "version": "1.4.1", "resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz", diff --git a/package.json b/package.json index 10be8b7..bd35bd3 100644 --- a/package.json +++ b/package.json @@ -20,6 +20,7 @@ "@thepassle/app-tools": "^0.9.12", "@xenova/transformers": "^2.17.1", "lit": "^3.1.3", + "marked": "^12.0.2", "onnxruntime-web": "^1.18.0", "urlpattern-polyfill": "^10.0.0", "workbox-build": "^7.1.0", diff --git a/src/pages/app-home.ts b/src/pages/app-home.ts index 78b1969..174ff53 100644 --- a/src/pages/app-home.ts +++ b/src/pages/app-home.ts @@ -4,7 +4,6 @@ import { property, state, customElement } from 'lit/decorators.js'; import '@fluentui/web-components/button.js'; import '@fluentui/web-components/text-input.js'; import '@fluentui/web-components/text.js'; -import '@fluentui/web-components/label.js'; import { styles } from '../styles/shared-styles'; @@ -15,8 +14,6 @@ import '../components/loading-toast'; @customElement('app-home') export class AppHome extends LitElement { - // For more information on using properties and state in lit - // check out this link https://lit.dev/docs/components/properties/ @property() message = 'Welcome!'; @state() previousMessages: any[] = []; @@ -24,6 +21,8 @@ export class AppHome extends LitElement { @state() query: string | undefined = undefined; + phiWorker: Worker | undefined; + static styles = [ styles, css` @@ -56,6 +55,18 @@ export class AppHome extends LitElement { align-items: center; } + fluent-text p { + font-size: 16px; + } + + li.assistant fluent-text { + background: #292929; + } + + li fluent-text { + background: var(--colorBrandBackground); + } + fluent-text-input { flex: 1; max-width: unset; @@ -91,47 +102,6 @@ export class AppHome extends LitElement { margin-right: unset; } - #actions-menu { - display: flex; - gap: 8px; - flex-direction: row; - justify-content: space-between; - - margin-bottom: 10px; - } - - #main-action-block { - display: flex; - align-items: center; - gap: 8px; - } - - #file-data-block { - display: flex; - gap: 4px; - } - - #file-size { - color: grey; - font-size: 10px; - } - - #file-name { - color: grey; - font-size: 12px; - font-weight: bold; - - max-width: 169px; - white-space: nowrap; - text-overflow: ellipsis; - overflow-x: hidden; - } - - #file-data-block { - display: flex; - flex-direction: column; - } - #toolbar { display: flex; align-items: center; @@ -162,16 +132,20 @@ export class AppHome extends LitElement { `]; async firstUpdated() { - // this method is a lifecycle even in lit - // for more info check out the lit docs https://lit.dev/docs/components/lifecycle/ - console.log('This is your home page'); + this.phiWorker = new Worker( + new URL('../services/phi.ts', import.meta.url), + { type: 'module' } + ); + console.log("phiWorker", this.phiWorker) + this.phiWorker.onmessage = (event: any) => { + if (event.data.type === "loaded") { + this.loaded = true; + } + } - const { Init } = await import('../services/phi'); - await Init(false); - + this.phiWorker.postMessage({ type: "Init" }); - this.loaded = true; //set up to listen for the enter button window.addEventListener("keydown", (e) => { @@ -184,6 +158,8 @@ export class AppHome extends LitElement { } async sendMessage() { + const marked = await import('marked'); + this.previousMessages = [ ...this.previousMessages, { @@ -209,15 +185,25 @@ export class AppHome extends LitElement { } ]; - const { Query } = await import('../services/phi'); - await Query(false, origQuery, (message: string) => { - console.log("Message received: ", message); - completeMessage = message; + this.phiWorker!.onmessage = async (event: any) => { + if (event.data.type === "response") { + const message = event.data.response; + completeMessage = message; + + this.previousMessages[this.previousMessages.length - 1].content = await marked.parse(completeMessage); + + this.previousMessages = this.previousMessages; - // update last previous message.content - this.previousMessages[this.previousMessages.length - 1].content = completeMessage; - this.requestUpdate(); + this.requestUpdate(); + } + } + + this.phiWorker!.postMessage({ + type: "Query", + continuation: false, + prompt: origQuery }); + } handleInputChange(query: string) { @@ -236,10 +222,10 @@ export class AppHome extends LitElement { ` : html`
No messages yet diff --git a/src/services/ai-worker.ts b/src/services/ai-worker.ts deleted file mode 100644 index bef156d..0000000 --- a/src/services/ai-worker.ts +++ /dev/null @@ -1,123 +0,0 @@ -// @ts-ignore -import { AutomaticSpeechRecognitionPipeline, pipeline } from '@xenova/transformers'; - -let transcriber: AutomaticSpeechRecognitionPipeline | undefined = undefined; - -self.onmessage = async (e) => { - if (e.data.type === 'transcribe') { - return new Promise((resolve) => { - localTranscribe(e.data.blob, e.data.model).then((transcription) => { - self.postMessage({ - type: 'transcribe', - transcription: transcription.text, - }); - resolve(transcription); - }) - }) - } - else if (e.data.type === "load") { - await loadTranscriber(e.data.model || "tiny"); - return Promise.resolve(); - } - else { - return Promise.reject('Unknown message type'); - } -} - -export async function loadTranscriber(model: "tiny" | "base"): Promise { - return new Promise(async (resolve) => { - if (!transcriber) { - try { - transcriber = await pipeline('automatic-speech-recognition', `Xenova/whisper-${model}`); - console.log("Transcriber loaded", transcriber) - } - catch (err) { - console.error("err", err); - } - - resolve(); - } - else { - resolve(); - } - }) -} - -export async function localTranscribe(audio: Blob, model: "tiny" | "base"): Promise { - return new Promise(async (resolve, reject) => { - await loadTranscriber(model); - - if (transcriber) { - // @ts-ignore - const output = await transcriber(audio, { - top_k: 0, - do_sample: false, - return_timestamps: true, - force_full_sequences: false, - chunk_length_s: 30, - stride_length_s: 5, - callback_function: callback_function, // after each generation step - chunk_callback: chunk_callback, // after each chunk is processed - }); - - resolve(output); - } - else { - reject("No transcriber loaded"); - } - }) -} - -// Storage for chunks to be processed. Initialise with an empty chunk. -const chunks_to_process = [ - { - tokens: [], - finalised: false, - }, -]; - -function chunk_callback(chunk: any) { - let last = chunks_to_process[chunks_to_process.length - 1]; - - // Overwrite last chunk with new info - Object.assign(last, chunk); - last.finalised = true; - - // Create an empty chunk after, if it not the last chunk - if (!chunk.is_last) { - chunks_to_process.push({ - tokens: [], - finalised: false, - }); - } -} - - -// Inject custom callback function to handle merging of chunks -function callback_function(item: any) { - const time_precision = - transcriber?.processor.feature_extractor.config.chunk_length / - transcriber?.model.config.max_source_positions; - - const last: any = chunks_to_process[chunks_to_process.length - 1]; - - // Update tokens of last chunk - last.tokens = [...item[0].output_token_ids]; - - if (last.tokens.length > 1) { - // Merge text chunks - // TODO optimise so we don't have to decode all chunks every time - - // @ts-ignore - const data = transcriber?.tokenizer._decode_asr(chunks_to_process, { - time_precision: time_precision, - return_timestamps: true, - force_full_sequences: false, - }); - - self.postMessage({ - type: 'transcribe-interim', - transcription: data[0], - }); - } -} \ No newline at end of file diff --git a/src/services/llm.ts b/src/services/llm.ts index 98781d4..d5e33f7 100644 --- a/src/services/llm.ts +++ b/src/services/llm.ts @@ -1,10 +1,12 @@ +// @ts-ignore import * as ort from 'onnxruntime-web/webgpu'; ort.env.wasm.numThreads = 1; ort.env.wasm.simd = true; -ort.env.wasm.wasmPaths = document.location.pathname.replace('index.html', '') + 'public/'; +// todo: set this with env variable for easier dev / prod builds +ort.env.wasm.wasmPaths = "/"; // // load file from server or cache diff --git a/src/services/phi.ts b/src/services/phi.ts index 28730bf..7a42aff 100644 --- a/src/services/phi.ts +++ b/src/services/phi.ts @@ -1,13 +1,29 @@ import { env, AutoTokenizer } from '@xenova/transformers'; import { LLM } from './llm.js'; +onmessage = async (e) => { + if (e.data.type === 'Init') { + Init(false).then(() => { + console.log("loaded from worker"); + postMessage({ type: 'loaded' }); + }) + } + else if (e.data.type === 'Query') { + Query(e.data.continuation, e.data.prompt).then(() => { + postMessage({ type: 'done' }); + }); + } +} + + const MODELS: any = { "phi3": { name: "phi3", path: "microsoft/Phi-3-mini-4k-instruct-onnx-web", externaldata: true }, "phi3dev": { name: "phi3dev", path: "schmuell/Phi-3-mini-4k-instruct-onnx-web", externaldata: true }, }; + function getConfig() { - const query = window.location.search.substring(1); + const query = ''; let config: any = { model: "phi3", provider: "webgpu", @@ -20,7 +36,7 @@ function getConfig() { local: 0, } let vars = query.split("&"); - for (var i = 0; i < vars.length; i++) { + for (let i = 0; i < vars.length; i++) { let pair = vars[i].split("="); if (pair[0] in config) { const key = pair[0]; @@ -58,52 +74,63 @@ function token_to_text(tokenizer: any, tokens: any, startidx: any) { return txt; } -export async function Query(continuation: any, query: any, cb: Function) { - let prompt = (continuation) ? query : `<|system|>\nYou are a friendly assistant.<|end|>\n<|user|>\n${query}<|end|>\n<|assistant|>\n`; +export async function Query(continuation: any, query: any/*, cb: Function*/): Promise { + return new Promise(async (resolve) => { + let prompt = (continuation) ? query : `<|system|>\nYou are a friendly assistant.<|end|>\n<|user|>\n${query}<|end|>\n<|assistant|>\n`; - const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true }); + const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true }); - // clear caches - // TODO: use kv_cache for continuation - llm.initilize_feed(); + // clear caches + // TODO: use kv_cache for continuation + llm.initilize_feed(); - const start_timer = performance.now(); - const output_index = llm.output_tokens.length + input_ids.length; - const output_tokens = await llm.generate(input_ids, (output_tokens: any) => { - if (output_tokens.length == input_ids.length + 1) { - // time to first token - const took = (performance.now() - start_timer) / 1000; - console.log(`time to first token in ${took.toFixed(1)}sec, ${input_ids.length} tokens`); - } - cb(token_to_text(tokenizer, output_tokens, output_index)); - }, { max_tokens: config.max_tokens }); - - const took = (performance.now() - start_timer) / 1000; - cb(token_to_text(tokenizer, output_tokens, output_index)); - const seqlen = output_tokens.length - output_index; - console.log(`${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`); + const start_timer = performance.now(); + const output_index = llm.output_tokens.length + input_ids.length; + const output_tokens = await llm.generate(input_ids, (output_tokens: any) => { + if (output_tokens.length == input_ids.length + 1) { + // time to first token + const took = (performance.now() - start_timer) / 1000; + console.log(`time to first token in ${took.toFixed(1)}sec, ${input_ids.length} tokens`); + } + // cb(token_to_text(tokenizer, output_tokens, output_index)); + postMessage({ type: 'response', response: token_to_text(tokenizer, output_tokens, output_index) }); + }, { max_tokens: config.max_tokens }); + + const took = (performance.now() - start_timer) / 1000; + // cb(token_to_text(tokenizer, output_tokens, output_index)); + postMessage({ type: 'response', response: token_to_text(tokenizer, output_tokens, output_index) }); + const seqlen = output_tokens.length - output_index; + console.log(`${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`); + + resolve(); + }); } // // Load the model and tokenizer // -export async function Init(hasFP16: boolean) { - try { - tokenizer = await AutoTokenizer.from_pretrained(config.model.path); - - console.log("Loading model..."); - await llm.load(config.model, { - provider: config.provider, - profiler: config.profiler, - verbose: config.verbose, - local: config.local, - max_tokens: config.max_tokens, - hasFP16: hasFP16, - }); - console.log("Ready."); - } catch (error) { - console.log(error); - } +export async function Init(hasFP16: boolean): Promise { + return new Promise(async (resolve, reject) => { + try { + tokenizer = await AutoTokenizer.from_pretrained(config.model.path); + + console.log("Loading model..."); + await llm.load(config.model, { + provider: config.provider, + profiler: config.profiler, + verbose: config.verbose, + local: config.local, + max_tokens: config.max_tokens, + hasFP16: hasFP16, + }); + console.log("Ready."); + + resolve("Ready"); + } catch (error) { + console.log(error); + reject(error); + } + }) } // e \ No newline at end of file diff --git a/src/services/whisper.ts b/src/services/whisper.ts deleted file mode 100644 index 3e48c34..0000000 --- a/src/services/whisper.ts +++ /dev/null @@ -1,48 +0,0 @@ -let whisperWorker: Worker; - -// @ts-ignore -import WhisperWorker from './ai-worker?worker' - -export async function loadTranscriber(model: "tiny" | "base"): Promise { - whisperWorker = new WhisperWorker(); - whisperWorker.postMessage({ - type: "load", - model: model || "tiny", - }); -} - -export function doLocalWhisper(audioFile: Blob, model: "tiny" | "base"): Promise { - return new Promise((resolve) => { - const fileReader = new FileReader(); - fileReader.onloadend = async () => { - const audioCTX = new AudioContext({ - sampleRate: 16000, - }); - const arrayBuffer = fileReader.result as ArrayBuffer; - const audioData = await audioCTX.decodeAudioData(arrayBuffer); - - let audio = audioData.getChannelData(0); - - whisperWorker.onmessage = async (e) => { - if (e.data.type === "transcribe") { - resolve(e.data); - } - else if (e.data.type === "transcribe-interim") { - window.dispatchEvent(new CustomEvent('interim-transcription', { - detail: { - message: e.data.transcription, - } - })); - } - } - - whisperWorker.postMessage({ - type: "transcribe", - blob: audio, - model: model || "tiny", - }) - - }; - fileReader.readAsArrayBuffer(audioFile); - }) -} \ No newline at end of file