diff --git a/app/__init__.py b/app/__init__.py
deleted file mode 100644
index e69de29..0000000
diff --git a/app/webui/README.md b/app/webui/README.md
index d3955fc..6d8fc45 100644
--- a/app/webui/README.md
+++ b/app/webui/README.md
@@ -1,7 +1,7 @@
 
 ## Translation Agent WebUI
 
-This repository contains a Gradio web UI for a translation agent that utilizes various language models for translation.
+A Gradio Web UI for translation agent.
 
 ### Preview
 
@@ -14,35 +14,30 @@ This repository contains a Gradio web UI for a translation agent that utilizes v
 - **Multiple API Support:**  Integrates with popular language models like:
     - Groq
     - OpenAI
-    - Cohere
     - Ollama
     - Together AI
-    - Hugging Face Inference API
     ...
-Llama Index supported, easily extendable
 - **Different LLM for reflection**: Now you can enable second Endpoint to use another LLM for reflection.
 
 
 **Getting Started**
 
-1. **Install Dependencies(Using Python Venv):**
+1. **Install Dependencies:**
 
     **Linux**
     ```bash
         git clone https://github.com/andrewyng/translation-agent.git
         cd translation-agent
-        python -m venv web_ui
-        source web_ui/bin/activate
-        pip install -r app/webui/requirements.txt
-
+        poetry install --with app
+        poetry shell
     ```
     **Windows**
     ```bash
         git clone https://github.com/andrewyng/translation-agent.git
         cd translation-agent
-        python -m venv web_ui
-        .\web_ui\Scripts\activate
-        pip install -r app/webui/requirements.txt
+        poetry install --with app
+        poetry shell
+        python .\app\webui\app.py
 
     ```
 
@@ -52,15 +47,19 @@ Llama Index supported, easily extendable
      ```
      OPENAI_API_KEY="sk-xxxxx" # Keep this field
      GROQ_API_KEY="xxxxx"
-     COHERE_API_KEY="xxxxx"
      TOGETHER_API_KEY="xxxxx"
-     HF_TOKEN="xxxxx"
      ```
     - Then you can also set the API_KEY in webui.
 
 3. **Run the Web UI:**
+
+    **Linux**
+    ```bash
+    python app/webui/app.py
+    ```
+    **Windows**
     ```bash
-    python -m app.webui.app
+    python .\app\webui\app.py
     ```
 
 4. **Access the Web UI:**
@@ -70,10 +69,9 @@ Llama Index supported, easily extendable
 
 1. Select your desired translation API from the Endpoint dropdown menu.
 2. Input the source language, target language, and country(optional).
-3. If using Hugging Face API, enter your `HF_TOKEN` in the `api_key` textbox, enter `MODEL_ID` or `HF_ENDPOINT_URL` in `Model`  textbox.
-4. Input the source text or upload your document file.
-5. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
-6. Enable Second Endpoint, you can add another endpoint by different LLMs for reflection.
+3. Input the source text or upload your document file.
+4. Submit and get translation, the UI will display the translated text with tokenization and highlight differences.
+5. Enable Second Endpoint, you can add another endpoint by different LLMs for reflection.
 
 **Customization:**
 
@@ -89,4 +87,4 @@ This project is licensed under the MIT License.
 
 **DEMO:**
 
-[Huggingface Demo](https://huggingface.co/spaces/vilarin/Translation-Agent-WebUI)
+[Huggingface Demo](https://huggingface.co/spaces/vilarin/Translation-Agent-WebUI)
\ No newline at end of file
diff --git a/app/webui/__init__.py b/app/webui/__init__.py
index e69de29..9cfe7b4 100644
--- a/app/webui/__init__.py
+++ b/app/webui/__init__.py
@@ -0,0 +1 @@
+from .app import *
\ No newline at end of file
diff --git a/app/webui/app.py b/app/webui/app.py
index a210b2c..39ac80b 100644
--- a/app/webui/app.py
+++ b/app/webui/app.py
@@ -1,15 +1,8 @@
-import sys
 import os
-
-# Add the project root to the Python path
-project_root = os.path.abspath(os.path.join(os.path.dirname(__file__), '..', '..'))
-sys.path.insert(0, project_root)
-
 import re
 import gradio as gr
 from glob import glob
-from app.webui.process import model_load, diff_texts, translator, translator_sec
-from llama_index.core import SimpleDirectoryReader
+from process import model_load, diff_texts, translator, translator_sec, extract_docx, extract_pdf, extract_text
 
 def huanik(
     endpoint: str,
@@ -24,8 +17,7 @@ def huanik(
     source_text: str,
     country: str,
     max_tokens: int,
-    context_window: int,
-    num_output: int,
+    temperature: int,
     rpm: int,
 ):
 
@@ -33,7 +25,7 @@ def huanik(
         raise gr.Error("Please check that the content or options are entered correctly.")
 
     try:
-        model_load(endpoint, model, api_key, context_window, num_output, rpm)
+        model_load(endpoint, model, api_key, temperature, rpm)
     except Exception as e:
         raise gr.Error(f"An unexpected error occurred: {e}")
 
@@ -44,8 +36,6 @@ def huanik(
             endpoint2=endpoint2,
             model2=model2,
             api_key2=api_key2,
-            context_window=context_window,
-            num_output=num_output,
             source_lang=source_lang,
             target_lang=target_lang,
             source_text=source_text,
@@ -76,20 +66,24 @@ def update_model(endpoint):
     endpoint_model_map = {
         "Groq": "llama3-70b-8192",
         "OpenAI": "gpt-4o",
-        "Cohere": "command-r",
         "TogetherAI": "Qwen/Qwen2-72B-Instruct",
         "Ollama": "llama3",
-        "Huggingface": "mistralai/Mistral-7B-Instruct-v0.3"
     }
     return gr.update(value=endpoint_model_map[endpoint])
 
-def read_doc(file):
-    docs = SimpleDirectoryReader(input_files=[file]).load_data()
-    texts = ""
-    for doc in docs:
-        texts += doc.text
-    texts = re.sub(r'(?m)^\s*$\n?', '', texts)
-    return texts
+def read_doc(path):
+    file_type = path.split(".")[-1]
+    print(file_type)
+    if file_type in ["pdf", "txt", "py", "docx", "json", "cpp", "md"]:
+        if file_type.endswith("pdf"):
+            content = extract_pdf(path)
+        elif file_type.endswith("docx"):
+            content = extract_docx(path)
+        else:
+            content = extract_text(path)
+        return re.sub(r'(?m)^\s*$\n?', '', content)
+    else:
+        raise gr.Error("Oops, unsupported files.")
 
 def enable_sec(choice):
     if choice:
@@ -195,7 +189,7 @@ def closeBtnHide(output_final):
         with gr.Column(scale=1) as menubar:
             endpoint = gr.Dropdown(
                 label="Endpoint",
-                choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
+                choices=["Groq","OpenAI","TogetherAI","Ollama"],
                 value="OpenAI",
             )
             choice = gr.Checkbox(label="Additional Endpoint", info="Additional endpoint for reflection")
@@ -204,7 +198,7 @@ def closeBtnHide(output_final):
             with gr.Column(visible=False) as AddEndpoint:
                 endpoint2 = gr.Dropdown(
                     label="Additional Endpoint",
-                    choices=["Groq","OpenAI","Cohere","TogetherAI","Ollama","Huggingface"],
+                    choices=["Groq","OpenAI","TogetherAI","Ollama"],
                     value="OpenAI",
                 )
                 model2 = gr.Textbox(label="Model", value="gpt-4o", )
@@ -230,19 +224,12 @@ def closeBtnHide(output_final):
                     value=1000,
                     step=8,
                     )
-                context_window = gr.Slider(
-                    label="Context Window",
-                    minimum=512,
-                    maximum=8192,
-                    value=4096,
-                    step=8,
-                    )
-                num_output = gr.Slider(
-                    label="Output Num",
-                    minimum=256,
-                    maximum=8192,
-                    value=512,
-                    step=8,
+                temperature = gr.Slider(
+                    label="Temperature",
+                    minimum=0,
+                    maximum=1.0,
+                    value=0.3,
+                    step=0.1,
                     )
                 rpm = gr.Slider(
                     label="Request Per Minute",
@@ -251,6 +238,10 @@ def closeBtnHide(output_final):
                     value=60,
                     step=1,
                     )
+                # json_mode = gr.Checkbox(
+                #     False,
+                #     label="Json Mode",
+                #     )
         with gr.Column(scale=4):
             source_text = gr.Textbox(
                 label="Source Text",
@@ -275,14 +266,14 @@ def closeBtnHide(output_final):
         close = gr.Button(value="Stop", visible=False)
 
     switchBtn.click(fn=switch, inputs=[source_lang,source_text,target_lang,output_final], outputs=[source_lang,source_text,target_lang,output_final])
-    
+
     menuBtn.click(fn=update_menu, inputs=visible, outputs=[visible, menubar], js=JS)
     endpoint.change(fn=update_model, inputs=[endpoint], outputs=[model])
-    
+
     choice.select(fn=enable_sec, inputs=[choice], outputs=[AddEndpoint])
     endpoint2.change(fn=update_model, inputs=[endpoint2], outputs=[model2])
-   
-    start_ta = submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, context_window, num_output, rpm], outputs=[output_init, output_reflect, output_final, output_diff])
+
+    start_ta = submit.click(fn=huanik, inputs=[endpoint, model, api_key, choice, endpoint2, model2, api_key2, source_lang, target_lang, source_text, country, max_tokens, temperature, rpm], outputs=[output_init, output_reflect, output_final, output_diff])
     upload.upload(fn=read_doc, inputs = upload, outputs = source_text)
     output_final.change(fn=export_txt, inputs=output_final, outputs=[export])
 
diff --git a/app/webui/patch.py b/app/webui/patch.py
index 50b3fc1..48a3509 100644
--- a/app/webui/patch.py
+++ b/app/webui/patch.py
@@ -1,72 +1,47 @@
-# a monkey patch to use llama-index completion
+# a monkey patch for completion
 import os
 import time
 from functools import wraps
 from threading import Lock
 from typing import Union
-import src.translation_agent.utils as utils
-
-from llama_index.llms.groq import Groq
-from llama_index.llms.cohere import Cohere
-from llama_index.llms.openai import OpenAI
-from llama_index.llms.together import TogetherLLM
-from llama_index.llms.ollama import Ollama
-from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI
-
-from llama_index.core import Settings
-from llama_index.core.llms import ChatMessage
+import translation_agent.utils as Utils
+from groq import Groq
+from together import Together
+from ollama import Client
+import gradio as gr
 
 RPM = 60
+MODEL = ""
+TEMPERATURE = 0.3
+JS_MODE = False
+ENDPOINT = ""
+client = Utils.client
 
 # Add your LLMs here
 def model_load(
         endpoint: str,
         model: str,
         api_key: str = None,
-        context_window: int = 4096,
-        num_output: int = 512,
+        temperature: float = TEMPERATURE,
         rpm: int = RPM,
+        js_mode: bool = JS_MODE,
+
 ):
+    global client, RPM, MODEL, TEMPERATURE, JS_MODE, ENDPOINT
+    ENDPOINT = endpoint
+    RPM = rpm
+    MODEL = model
+    TEMPERATURE = temperature
+    JS_MODE = js_mode
+
     if endpoint == "Groq":
-        llm = Groq(
-            model=model,
-            api_key=api_key if api_key else os.getenv("GROQ_API_KEY"),
-        )
-    elif endpoint == "Cohere":
-        llm = Cohere(
-            model=model,
-            api_key=api_key if api_key else os.getenv("COHERE_API_KEY"),
-        )
-    elif endpoint == "OpenAI":
-        llm = OpenAI(
-            model=model,
-            api_key=api_key if api_key else os.getenv("OPENAI_API_KEY"),
-        )
+        client = Groq(api_key=api_key if api_key else os.getenv("GROQ_API_KEY"))
     elif endpoint == "TogetherAI":
-        llm = TogetherLLM(
-            model=model,
-            api_key=api_key if api_key else os.getenv("TOGETHER_API_KEY"),
-        )
+        client = Together(api_key=api_key if api_key else os.getenv("TOGETHER_API_KEY"))
     elif endpoint == "Ollama":
-        llm = Ollama(
-            model=model,
-            request_timeout=120.0)
-    elif endpoint == "Huggingface":
-        llm = HuggingFaceInferenceAPI(
-            model_name=model,
-            token=api_key if api_key else os.getenv("HF_TOKEN"),
-            task="text-generation",
-        )
-
-    global RPM
-    RPM = rpm
-
-    Settings.llm = llm
-    # maximum input size to the LLM
-    Settings.context_window = context_window
-
-    # number of tokens reserved for text generation.
-    Settings.num_output = num_output
+        client = Client(host='http://localhost:11434')
+    else:
+        client = Utils.openai.OpenAI(api_key=api_key if api_key else os.getenv("OPENAI_API_KEY"))
 
 def rate_limit(get_max_per_minute):
     def decorator(func):
@@ -92,77 +67,111 @@ def wrapper(*args, **kwargs):
 
 @rate_limit(lambda: RPM)
 def get_completion(
-        prompt: str,
-        system_message: str = "You are a helpful assistant.",
-        temperature: float = 0.3,
-        json_mode: bool = False,
-    ) -> Union[str, dict]:
-        """
-            Generate a completion using the OpenAI API.
-
-        Args:
-            prompt (str): The user's prompt or query.
-            system_message (str, optional): The system message to set the context for the assistant.
-                Defaults to "You are a helpful assistant.".
-            temperature (float, optional): The sampling temperature for controlling the randomness of the generated text.
-                Defaults to 0.3.
-            json_mode (bool, optional): Whether to return the response in JSON format.
-                Defaults to False.
-
-        Returns:
-            Union[str, dict]: The generated completion.
-                If json_mode is True, returns the complete API response as a dictionary.
-                If json_mode is False, returns the generated text as a string.
-        """
-        llm = Settings.llm
-        if llm.class_name() == "HuggingFaceInferenceAPI":
-            llm.system_prompt = system_message
-            messages = [
-                ChatMessage(
-                    role="user", content=prompt),
-            ]
+    prompt: str,
+    system_message: str = "You are a helpful assistant.",
+    model: str = "gpt-4-turbo",
+    temperature: float = 0.3,
+    json_mode: bool = False,
+) -> Union[str, dict]:
+    """
+        Generate a completion using the OpenAI API.
+
+    Args:
+        prompt (str): The user's prompt or query.
+        system_message (str, optional): The system message to set the context for the assistant.
+            Defaults to "You are a helpful assistant.".
+        model (str, optional): The name of the OpenAI model to use for generating the completion.
+            Defaults to "gpt-4-turbo".
+        temperature (float, optional): The sampling temperature for controlling the randomness of the generated text.
+            Defaults to 0.3.
+        json_mode (bool, optional): Whether to return the response in JSON format.
+            Defaults to False.
+
+    Returns:
+        Union[str, dict]: The generated completion.
+            If json_mode is True, returns the complete API response as a dictionary.
+            If json_mode is False, returns the generated text as a string.
+    """
+
+    model = MODEL
+    temperature = TEMPERATURE
+    json_mode = JS_MODE
+
+    if ENDPOINT == "Ollama":
+        if json_mode:
             try:
-                response = llm.chat(
-                    messages=messages,
-                    temperature=temperature,
+                response = client.chat(
+                    model=model,
+                    format="json",
+                    messages=[
+                        {"role": "system", "content": system_message},
+                        {"role": "user", "content": prompt},
+                    ],
+                    options = {
+                        "temperature": TEMPERATURE,
+                        "top_p": 1.0
+                    },
                 )
-                return response.message.content
+                print(response)
+                return response['message']['content']
             except Exception as e:
                 raise gr.Error(f"An unexpected error occurred: {e}")
         else:
-            messages = [
-                ChatMessage(
-                    role="system", content=system_message),
-                ChatMessage(
-                    role="user", content=prompt),
-            ]
-
-            if json_mode:
-                response = llm.chat(
+            try:
+                response = client.chat(
+                    model=model,
+                    messages=[
+                        {"role": "system", "content": system_message},
+                        {"role": "user", "content": prompt},
+                    ],
+                    options = {
+                        "temperature": TEMPERATURE,
+                        "top_p": 1.0
+                    },
+                )
+                return response['message']['content']
+            except Exception as e:
+                raise gr.Error(f"An unexpected error occurred: {e}")
+    else:
+        if json_mode:
+            try:
+                response = client.chat.completions.create(
+                    model=model,
                     temperature=temperature,
+                    top_p=1,
                     response_format={"type": "json_object"},
-                    messages=messages,
+                    messages=[
+                        {"role": "system", "content": system_message},
+                        {"role": "user", "content": prompt},
+                    ],
                 )
-                return response.message.content
-            else:
-                try:
-                    response = llm.chat(
-                        temperature=temperature,
-                        messages=messages,
-                    )
-                    return response.message.content
-                except Exception as e:
-                    raise gr.Error(f"An unexpected error occurred: {e}")
-
-utils.get_completion = get_completion
+                return response.choices[0].message.content
+            except Exception as e:
+                raise gr.Error(f"An unexpected error occurred: {e}")
+        else:
+            try:
+                response = client.chat.completions.create(
+                    model=model,
+                    temperature=temperature,
+                    top_p=1,
+                    messages=[
+                        {"role": "system", "content": system_message},
+                        {"role": "user", "content": prompt},
+                    ],
+                )
+                return response.choices[0].message.content
+            except Exception as e:
+                raise gr.Error(f"An unexpected error occurred: {e}")
 
-one_chunk_initial_translation = utils.one_chunk_initial_translation
-one_chunk_reflect_on_translation = utils.one_chunk_reflect_on_translation
-one_chunk_improve_translation = utils.one_chunk_improve_translation
-one_chunk_translate_text = utils.one_chunk_translate_text
-num_tokens_in_string = utils.num_tokens_in_string
-multichunk_initial_translation = utils.multichunk_initial_translation
-multichunk_reflect_on_translation = utils.multichunk_reflect_on_translation
-multichunk_improve_translation = utils.multichunk_improve_translation
-multichunk_translation = utils.multichunk_translation
-calculate_chunk_size =utils.calculate_chunk_size
\ No newline at end of file
+Utils.get_completion = get_completion
+
+one_chunk_initial_translation = Utils.one_chunk_initial_translation
+one_chunk_reflect_on_translation = Utils.one_chunk_reflect_on_translation
+one_chunk_improve_translation = Utils.one_chunk_improve_translation
+one_chunk_translate_text = Utils.one_chunk_translate_text
+num_tokens_in_string = Utils.num_tokens_in_string
+multichunk_initial_translation = Utils.multichunk_initial_translation
+multichunk_reflect_on_translation = Utils.multichunk_reflect_on_translation
+multichunk_improve_translation = Utils.multichunk_improve_translation
+multichunk_translation = Utils.multichunk_translation
+calculate_chunk_size =Utils.calculate_chunk_size
\ No newline at end of file
diff --git a/app/webui/process.py b/app/webui/process.py
index e943598..cdde226 100644
--- a/app/webui/process.py
+++ b/app/webui/process.py
@@ -2,13 +2,32 @@
 from simplemma import simple_tokenizer
 from difflib import Differ
 from icecream import ic
-from app.webui.patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
-from app.webui.patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
-
-from llama_index.core.node_parser import SentenceSplitter
+from patch import model_load,num_tokens_in_string,one_chunk_initial_translation, one_chunk_reflect_on_translation, one_chunk_improve_translation
+from patch import calculate_chunk_size, multichunk_initial_translation, multichunk_reflect_on_translation, multichunk_improve_translation
+import pymupdf
+import docx
+from langchain_text_splitters import RecursiveCharacterTextSplitter
 
 progress=gr.Progress()
 
+def extract_text(path):
+    return open(path, 'r').read()
+
+def extract_pdf(path):
+    doc = pymupdf.open(path)
+    text = ""
+    for page in doc:
+        text += page.get_text()
+    return text
+
+def extract_docx(path):
+    doc = docx.Document(path)
+    data = []
+    for paragraph in doc.paragraphs:
+        data.append(paragraph.text)
+    content = '\n\n'.join(data)
+    return content
+
 def tokenize(text):
     # Use nltk to tokenize the text
     words = simple_tokenizer(text)
@@ -82,29 +101,28 @@ def translator(
     else:
         ic("Translating text as multiple chunks")
 
-        progress((1,5), desc="Calculate chunk size...")
         token_size = calculate_chunk_size(
             token_count=num_tokens_in_text, token_limit=max_tokens
         )
 
         ic(token_size)
 
-        #using sentence splitter
-        text_parser = SentenceSplitter(
-           chunk_size=token_size,
+        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+            model_name="gpt-4",
+            chunk_size=token_size,
+            chunk_overlap=0,
         )
 
-        progress((2,5), desc="Spilt source text...")
-        source_text_chunks = text_parser.split_text(source_text)
+        source_text_chunks = text_splitter.split_text(source_text)
 
-        progress((3,5), desc="First translation...")
+        progress((1,3), desc="First translation...")
         translation_1_chunks = multichunk_initial_translation(
             source_lang, target_lang, source_text_chunks
         )
 
         init_translation = "".join(translation_1_chunks)
 
-        progress((4,5), desc="Reflection...")
+        progress((2,3), desc="Reflection...")
         reflection_chunks = multichunk_reflect_on_translation(
             source_lang,
             target_lang,
@@ -115,7 +133,7 @@ def translator(
 
         reflection = "".join(reflection_chunks)
 
-        progress((5,5), desc="Second translation...")
+        progress((3,3), desc="Second translation...")
         translation_2_chunks = multichunk_improve_translation(
             source_lang,
             target_lang,
@@ -133,8 +151,6 @@ def translator_sec(
         endpoint2: str,
         model2: str,
         api_key2: str,
-        context_window: int,
-        num_output: int,
         source_lang: str,
         target_lang: str,
         source_text: str,
@@ -156,7 +172,7 @@ def translator_sec(
         )
 
         try:
-            model_load(endpoint2, model2, api_key2, context_window, num_output)
+            model_load(endpoint2, model2, api_key2)
         except Exception as e:
             raise gr.Error(f"An unexpected error occurred: {e}")
 
@@ -175,22 +191,21 @@ def translator_sec(
     else:
         ic("Translating text as multiple chunks")
 
-        progress((1,5), desc="Calculate chunk size...")
         token_size = calculate_chunk_size(
             token_count=num_tokens_in_text, token_limit=max_tokens
         )
 
         ic(token_size)
 
-        #using sentence splitter
-        text_parser = SentenceSplitter(
-           chunk_size=token_size,
+        text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
+            model_name="gpt-4",
+            chunk_size=token_size,
+            chunk_overlap=0,
         )
 
-        progress((2,5), desc="Spilt source text...")
-        source_text_chunks = text_parser.split_text(source_text)
+        source_text_chunks = text_splitter.split_text(source_text)
 
-        progress((3,5), desc="First translation...")
+        progress((1,3), desc="First translation...")
         translation_1_chunks = multichunk_initial_translation(
             source_lang, target_lang, source_text_chunks
         )
@@ -198,11 +213,11 @@ def translator_sec(
         init_translation = "".join(translation_1_chunks)
 
         try:
-            model_load(endpoint2, model2, api_key2, context_window, num_output)
+            model_load(endpoint2, model2, api_key2)
         except Exception as e:
             raise gr.Error(f"An unexpected error occurred: {e}")
 
-        progress((4,5), desc="Reflection...")
+        progress((2,3), desc="Reflection...")
         reflection_chunks = multichunk_reflect_on_translation(
             source_lang,
             target_lang,
@@ -213,7 +228,7 @@ def translator_sec(
 
         reflection = "".join(reflection_chunks)
 
-        progress((5,5), desc="Second translation...")
+        progress((3,3), desc="Second translation...")
         translation_2_chunks = multichunk_improve_translation(
             source_lang,
             target_lang,
diff --git a/app/webui/requirements.txt b/app/webui/requirements.txt
deleted file mode 100644
index eeac3ce..0000000
--- a/app/webui/requirements.txt
+++ /dev/null
@@ -1,13 +0,0 @@
-llama-index
-llama-index-llms-groq
-llama-index-llms-openai
-llama-index-llms-cohere
-llama-index-llms-together
-llama-index-llms-ollama
-llama-index-llms-huggingface-api
-tiktoken
-icecream
-simplemma
-langchain-text-splitters
-gradio
-docx2txt
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index db621f6..f0947b3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -6,7 +6,10 @@ authors = ["Andrew Ng <ng@deeplearning.ai>"]
 license = "MIT"
 readme = "README.md"
 package-mode = true
-packages = [{ include = "translation_agent", from = "src" }]
+packages = [
+    { include = "translation_agent", from = "src" },
+    { include = "webui", from = "app" },
+    ]
 repository = "https://github.com/andrewyng/translation-agent"
 keywords = ["translation", "agents", "LLM", "machine translation"]
 
@@ -21,6 +24,15 @@ icecream = "^2.1.3"
 langchain-text-splitters = "^0.0.1"
 python-dotenv = "^1.0.1"
 
+[tool.poetry.group.app.dependencies]
+simplemma = "^1.0.0"
+gradio = "4.37.2"
+python-docx = "^1.1.2"
+PyMuPDF = "^1.24.7"
+groq = "^0.9.0"
+ollama = "^0.2.1"
+together = "^1.2.1"
+
 [tool.poetry.group.dev]
 optional = true