diff --git a/package-lock.json b/package-lock.json
index e0fa5a2..f4264ec 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -14,6 +14,7 @@
"@thepassle/app-tools": "^0.9.12",
"@xenova/transformers": "^2.17.1",
"lit": "^3.1.3",
+ "marked": "^12.0.2",
"onnxruntime-web": "^1.18.0",
"urlpattern-polyfill": "^10.0.0",
"workbox-build": "^7.1.0",
@@ -3971,6 +3972,17 @@
"sourcemap-codec": "^1.4.8"
}
},
+ "node_modules/marked": {
+ "version": "12.0.2",
+ "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.2.tgz",
+ "integrity": "sha512-qXUm7e/YKFoqFPYPa3Ukg9xlI5cyAtGmyEIzMfW//m6kXwCy2Ps9DYf5ioijFKQ8qyuscrHoY04iJGctu2Kg0Q==",
+ "bin": {
+ "marked": "bin/marked.js"
+ },
+ "engines": {
+ "node": ">= 18"
+ }
+ },
"node_modules/merge2": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
@@ -8197,6 +8209,11 @@
"sourcemap-codec": "^1.4.8"
}
},
+ "marked": {
+ "version": "12.0.2",
+ "resolved": "https://registry.npmjs.org/marked/-/marked-12.0.2.tgz",
+ "integrity": "sha512-qXUm7e/YKFoqFPYPa3Ukg9xlI5cyAtGmyEIzMfW//m6kXwCy2Ps9DYf5ioijFKQ8qyuscrHoY04iJGctu2Kg0Q=="
+ },
"merge2": {
"version": "1.4.1",
"resolved": "https://registry.npmjs.org/merge2/-/merge2-1.4.1.tgz",
diff --git a/package.json b/package.json
index 10be8b7..bd35bd3 100644
--- a/package.json
+++ b/package.json
@@ -20,6 +20,7 @@
"@thepassle/app-tools": "^0.9.12",
"@xenova/transformers": "^2.17.1",
"lit": "^3.1.3",
+ "marked": "^12.0.2",
"onnxruntime-web": "^1.18.0",
"urlpattern-polyfill": "^10.0.0",
"workbox-build": "^7.1.0",
diff --git a/src/pages/app-home.ts b/src/pages/app-home.ts
index 78b1969..174ff53 100644
--- a/src/pages/app-home.ts
+++ b/src/pages/app-home.ts
@@ -4,7 +4,6 @@ import { property, state, customElement } from 'lit/decorators.js';
import '@fluentui/web-components/button.js';
import '@fluentui/web-components/text-input.js';
import '@fluentui/web-components/text.js';
-import '@fluentui/web-components/label.js';
import { styles } from '../styles/shared-styles';
@@ -15,8 +14,6 @@ import '../components/loading-toast';
@customElement('app-home')
export class AppHome extends LitElement {
- // For more information on using properties and state in lit
- // check out this link https://lit.dev/docs/components/properties/
@property() message = 'Welcome!';
@state() previousMessages: any[] = [];
@@ -24,6 +21,8 @@ export class AppHome extends LitElement {
@state() query: string | undefined = undefined;
+ phiWorker: Worker | undefined;
+
static styles = [
styles,
css`
@@ -56,6 +55,18 @@ export class AppHome extends LitElement {
align-items: center;
}
+ fluent-text p {
+ font-size: 16px;
+ }
+
+ li.assistant fluent-text {
+ background: #292929;
+ }
+
+ li fluent-text {
+ background: var(--colorBrandBackground);
+ }
+
fluent-text-input {
flex: 1;
max-width: unset;
@@ -91,47 +102,6 @@ export class AppHome extends LitElement {
margin-right: unset;
}
- #actions-menu {
- display: flex;
- gap: 8px;
- flex-direction: row;
- justify-content: space-between;
-
- margin-bottom: 10px;
- }
-
- #main-action-block {
- display: flex;
- align-items: center;
- gap: 8px;
- }
-
- #file-data-block {
- display: flex;
- gap: 4px;
- }
-
- #file-size {
- color: grey;
- font-size: 10px;
- }
-
- #file-name {
- color: grey;
- font-size: 12px;
- font-weight: bold;
-
- max-width: 169px;
- white-space: nowrap;
- text-overflow: ellipsis;
- overflow-x: hidden;
- }
-
- #file-data-block {
- display: flex;
- flex-direction: column;
- }
-
#toolbar {
display: flex;
align-items: center;
@@ -162,16 +132,20 @@ export class AppHome extends LitElement {
`];
async firstUpdated() {
- // this method is a lifecycle even in lit
- // for more info check out the lit docs https://lit.dev/docs/components/lifecycle/
- console.log('This is your home page');
+ this.phiWorker = new Worker(
+ new URL('../services/phi.ts', import.meta.url),
+ { type: 'module' }
+ );
+ console.log("phiWorker", this.phiWorker)
+ this.phiWorker.onmessage = (event: any) => {
+ if (event.data.type === "loaded") {
+ this.loaded = true;
+ }
+ }
- const { Init } = await import('../services/phi');
- await Init(false);
-
+ this.phiWorker.postMessage({ type: "Init" });
- this.loaded = true;
//set up to listen for the enter button
window.addEventListener("keydown", (e) => {
@@ -184,6 +158,8 @@ export class AppHome extends LitElement {
}
async sendMessage() {
+ const marked = await import('marked');
+
this.previousMessages = [
...this.previousMessages,
{
@@ -209,15 +185,25 @@ export class AppHome extends LitElement {
}
];
- const { Query } = await import('../services/phi');
- await Query(false, origQuery, (message: string) => {
- console.log("Message received: ", message);
- completeMessage = message;
+ this.phiWorker!.onmessage = async (event: any) => {
+ if (event.data.type === "response") {
+ const message = event.data.response;
+ completeMessage = message;
+
+ this.previousMessages[this.previousMessages.length - 1].content = await marked.parse(completeMessage);
+
+ this.previousMessages = this.previousMessages;
- // update last previous message.content
- this.previousMessages[this.previousMessages.length - 1].content = completeMessage;
- this.requestUpdate();
+ this.requestUpdate();
+ }
+ }
+
+ this.phiWorker!.postMessage({
+ type: "Query",
+ continuation: false,
+ prompt: origQuery
});
+
}
handleInputChange(query: string) {
@@ -236,10 +222,10 @@ export class AppHome extends LitElement {
${this.previousMessages.map((message) => html`
-
- ${message.content}
+
`)
- }
+ }
` : html`
No messages yet
diff --git a/src/services/ai-worker.ts b/src/services/ai-worker.ts
deleted file mode 100644
index bef156d..0000000
--- a/src/services/ai-worker.ts
+++ /dev/null
@@ -1,123 +0,0 @@
-// @ts-ignore
-import { AutomaticSpeechRecognitionPipeline, pipeline } from '@xenova/transformers';
-
-let transcriber: AutomaticSpeechRecognitionPipeline | undefined = undefined;
-
-self.onmessage = async (e) => {
- if (e.data.type === 'transcribe') {
- return new Promise((resolve) => {
- localTranscribe(e.data.blob, e.data.model).then((transcription) => {
- self.postMessage({
- type: 'transcribe',
- transcription: transcription.text,
- });
- resolve(transcription);
- })
- })
- }
- else if (e.data.type === "load") {
- await loadTranscriber(e.data.model || "tiny");
- return Promise.resolve();
- }
- else {
- return Promise.reject('Unknown message type');
- }
-}
-
-export async function loadTranscriber(model: "tiny" | "base"): Promise
{
- return new Promise(async (resolve) => {
- if (!transcriber) {
- try {
- transcriber = await pipeline('automatic-speech-recognition', `Xenova/whisper-${model}`);
- console.log("Transcriber loaded", transcriber)
- }
- catch (err) {
- console.error("err", err);
- }
-
- resolve();
- }
- else {
- resolve();
- }
- })
-}
-
-export async function localTranscribe(audio: Blob, model: "tiny" | "base"): Promise {
- return new Promise(async (resolve, reject) => {
- await loadTranscriber(model);
-
- if (transcriber) {
- // @ts-ignore
- const output = await transcriber(audio, {
- top_k: 0,
- do_sample: false,
- return_timestamps: true,
- force_full_sequences: false,
- chunk_length_s: 30,
- stride_length_s: 5,
- callback_function: callback_function, // after each generation step
- chunk_callback: chunk_callback, // after each chunk is processed
- });
-
- resolve(output);
- }
- else {
- reject("No transcriber loaded");
- }
- })
-}
-
-// Storage for chunks to be processed. Initialise with an empty chunk.
-const chunks_to_process = [
- {
- tokens: [],
- finalised: false,
- },
-];
-
-function chunk_callback(chunk: any) {
- let last = chunks_to_process[chunks_to_process.length - 1];
-
- // Overwrite last chunk with new info
- Object.assign(last, chunk);
- last.finalised = true;
-
- // Create an empty chunk after, if it not the last chunk
- if (!chunk.is_last) {
- chunks_to_process.push({
- tokens: [],
- finalised: false,
- });
- }
-}
-
-
-// Inject custom callback function to handle merging of chunks
-function callback_function(item: any) {
- const time_precision =
- transcriber?.processor.feature_extractor.config.chunk_length /
- transcriber?.model.config.max_source_positions;
-
- const last: any = chunks_to_process[chunks_to_process.length - 1];
-
- // Update tokens of last chunk
- last.tokens = [...item[0].output_token_ids];
-
- if (last.tokens.length > 1) {
- // Merge text chunks
- // TODO optimise so we don't have to decode all chunks every time
-
- // @ts-ignore
- const data = transcriber?.tokenizer._decode_asr(chunks_to_process, {
- time_precision: time_precision,
- return_timestamps: true,
- force_full_sequences: false,
- });
-
- self.postMessage({
- type: 'transcribe-interim',
- transcription: data[0],
- });
- }
-}
\ No newline at end of file
diff --git a/src/services/llm.ts b/src/services/llm.ts
index 98781d4..d5e33f7 100644
--- a/src/services/llm.ts
+++ b/src/services/llm.ts
@@ -1,10 +1,12 @@
+// @ts-ignore
import * as ort from 'onnxruntime-web/webgpu';
ort.env.wasm.numThreads = 1;
ort.env.wasm.simd = true;
-ort.env.wasm.wasmPaths = document.location.pathname.replace('index.html', '') + 'public/';
+// todo: set this with env variable for easier dev / prod builds
+ort.env.wasm.wasmPaths = "/";
//
// load file from server or cache
diff --git a/src/services/phi.ts b/src/services/phi.ts
index 28730bf..7a42aff 100644
--- a/src/services/phi.ts
+++ b/src/services/phi.ts
@@ -1,13 +1,29 @@
import { env, AutoTokenizer } from '@xenova/transformers';
import { LLM } from './llm.js';
+onmessage = async (e) => {
+ if (e.data.type === 'Init') {
+ Init(false).then(() => {
+ console.log("loaded from worker");
+ postMessage({ type: 'loaded' });
+ })
+ }
+ else if (e.data.type === 'Query') {
+ Query(e.data.continuation, e.data.prompt).then(() => {
+ postMessage({ type: 'done' });
+ });
+ }
+}
+
+
const MODELS: any = {
"phi3": { name: "phi3", path: "microsoft/Phi-3-mini-4k-instruct-onnx-web", externaldata: true },
"phi3dev": { name: "phi3dev", path: "schmuell/Phi-3-mini-4k-instruct-onnx-web", externaldata: true },
};
+
function getConfig() {
- const query = window.location.search.substring(1);
+ const query = '';
let config: any = {
model: "phi3",
provider: "webgpu",
@@ -20,7 +36,7 @@ function getConfig() {
local: 0,
}
let vars = query.split("&");
- for (var i = 0; i < vars.length; i++) {
+ for (let i = 0; i < vars.length; i++) {
let pair = vars[i].split("=");
if (pair[0] in config) {
const key = pair[0];
@@ -58,52 +74,63 @@ function token_to_text(tokenizer: any, tokens: any, startidx: any) {
return txt;
}
-export async function Query(continuation: any, query: any, cb: Function) {
- let prompt = (continuation) ? query : `<|system|>\nYou are a friendly assistant.<|end|>\n<|user|>\n${query}<|end|>\n<|assistant|>\n`;
+export async function Query(continuation: any, query: any/*, cb: Function*/): Promise {
+ return new Promise(async (resolve) => {
+ let prompt = (continuation) ? query : `<|system|>\nYou are a friendly assistant.<|end|>\n<|user|>\n${query}<|end|>\n<|assistant|>\n`;
- const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
+ const { input_ids } = await tokenizer(prompt, { return_tensor: false, padding: true, truncation: true });
- // clear caches
- // TODO: use kv_cache for continuation
- llm.initilize_feed();
+ // clear caches
+ // TODO: use kv_cache for continuation
+ llm.initilize_feed();
- const start_timer = performance.now();
- const output_index = llm.output_tokens.length + input_ids.length;
- const output_tokens = await llm.generate(input_ids, (output_tokens: any) => {
- if (output_tokens.length == input_ids.length + 1) {
- // time to first token
- const took = (performance.now() - start_timer) / 1000;
- console.log(`time to first token in ${took.toFixed(1)}sec, ${input_ids.length} tokens`);
- }
- cb(token_to_text(tokenizer, output_tokens, output_index));
- }, { max_tokens: config.max_tokens });
-
- const took = (performance.now() - start_timer) / 1000;
- cb(token_to_text(tokenizer, output_tokens, output_index));
- const seqlen = output_tokens.length - output_index;
- console.log(`${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`);
+ const start_timer = performance.now();
+ const output_index = llm.output_tokens.length + input_ids.length;
+ const output_tokens = await llm.generate(input_ids, (output_tokens: any) => {
+ if (output_tokens.length == input_ids.length + 1) {
+ // time to first token
+ const took = (performance.now() - start_timer) / 1000;
+ console.log(`time to first token in ${took.toFixed(1)}sec, ${input_ids.length} tokens`);
+ }
+ // cb(token_to_text(tokenizer, output_tokens, output_index));
+ postMessage({ type: 'response', response: token_to_text(tokenizer, output_tokens, output_index) });
+ }, { max_tokens: config.max_tokens });
+
+ const took = (performance.now() - start_timer) / 1000;
+ // cb(token_to_text(tokenizer, output_tokens, output_index));
+ postMessage({ type: 'response', response: token_to_text(tokenizer, output_tokens, output_index) });
+ const seqlen = output_tokens.length - output_index;
+ console.log(`${seqlen} tokens in ${took.toFixed(1)}sec, ${(seqlen / took).toFixed(2)} tokens/sec`);
+
+ resolve();
+ });
}
//
// Load the model and tokenizer
//
-export async function Init(hasFP16: boolean) {
- try {
- tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
-
- console.log("Loading model...");
- await llm.load(config.model, {
- provider: config.provider,
- profiler: config.profiler,
- verbose: config.verbose,
- local: config.local,
- max_tokens: config.max_tokens,
- hasFP16: hasFP16,
- });
- console.log("Ready.");
- } catch (error) {
- console.log(error);
- }
+export async function Init(hasFP16: boolean): Promise {
+ return new Promise(async (resolve, reject) => {
+ try {
+ tokenizer = await AutoTokenizer.from_pretrained(config.model.path);
+
+ console.log("Loading model...");
+ await llm.load(config.model, {
+ provider: config.provider,
+ profiler: config.profiler,
+ verbose: config.verbose,
+ local: config.local,
+ max_tokens: config.max_tokens,
+ hasFP16: hasFP16,
+ });
+ console.log("Ready.");
+
+ resolve("Ready");
+ } catch (error) {
+ console.log(error);
+ reject(error);
+ }
+ })
}
// e
\ No newline at end of file
diff --git a/src/services/whisper.ts b/src/services/whisper.ts
deleted file mode 100644
index 3e48c34..0000000
--- a/src/services/whisper.ts
+++ /dev/null
@@ -1,48 +0,0 @@
-let whisperWorker: Worker;
-
-// @ts-ignore
-import WhisperWorker from './ai-worker?worker'
-
-export async function loadTranscriber(model: "tiny" | "base"): Promise {
- whisperWorker = new WhisperWorker();
- whisperWorker.postMessage({
- type: "load",
- model: model || "tiny",
- });
-}
-
-export function doLocalWhisper(audioFile: Blob, model: "tiny" | "base"): Promise {
- return new Promise((resolve) => {
- const fileReader = new FileReader();
- fileReader.onloadend = async () => {
- const audioCTX = new AudioContext({
- sampleRate: 16000,
- });
- const arrayBuffer = fileReader.result as ArrayBuffer;
- const audioData = await audioCTX.decodeAudioData(arrayBuffer);
-
- let audio = audioData.getChannelData(0);
-
- whisperWorker.onmessage = async (e) => {
- if (e.data.type === "transcribe") {
- resolve(e.data);
- }
- else if (e.data.type === "transcribe-interim") {
- window.dispatchEvent(new CustomEvent('interim-transcription', {
- detail: {
- message: e.data.transcription,
- }
- }));
- }
- }
-
- whisperWorker.postMessage({
- type: "transcribe",
- blob: audio,
- model: model || "tiny",
- })
-
- };
- fileReader.readAsArrayBuffer(audioFile);
- })
-}
\ No newline at end of file