zylon-ai · juan-m12i · Dec 9, 2023 · Dec 19, 2023 · Dec 19, 2023 · Dec 19, 2023
diff --git a/private_gpt/server/chat/chat_service.py b/private_gpt/server/chat/chat_service.py
@@ -100,6 +100,7 @@ def __init__(
             embed_model=embedding_component.embedding_model,
             show_progress=True,
         )
+        self.default_context_template = settings.rag.default_context_template
 
     def _chat_engine(
         self,
@@ -109,6 +110,10 @@ def _chat_engine(
     ) -> BaseChatEngine:
         settings = self.settings
         if use_context:
+            if self.default_context_template is not None:
+                context_template = self.default_context_template
+            else:
+                context_template = None
             vector_index_retriever = self.vector_store_component.get_retriever(
                 index=self.index,
                 context_filter=context_filter,
@@ -132,6 +137,7 @@ def _chat_engine(
                 retriever=vector_index_retriever,
                 llm=self.llm_component.llm,  # Takes no effect at the moment
                 node_postprocessors=node_postprocessors,
+                context_template=context_template,
             )
         else:
             return SimpleChatEngine.from_defaults(

diff --git a/private_gpt/settings/settings.py b/private_gpt/settings/settings.py
@@ -345,6 +345,13 @@ class RerankSettings(BaseModel):
 
 
 class RagSettings(BaseModel):
+    default_context_template: str | None = Field(
+        None,
+        description=(
+            "The default context template to use for the chat engine when using RAG. "
+            "If none is given - use the default system prompt (from the llama_index). "
+        ),
+    )
     similarity_top_k: int = Field(
         2,
         description="This value controls the number of documents returned by the RAG pipeline or considered for reranking if enabled.",

diff --git a/settings.yaml b/settings.yaml
@@ -44,6 +44,11 @@ llm:
   temperature: 0.1      # The temperature of the model. Increasing the temperature will make the model answer more creatively. A value of 0.1 would be more factual. (Default: 0.1)
 
 rag:
+  default_context_template: |
+    Context information is below.
+    --------------------
+    {context_str}
+    --------------------
   similarity_top_k: 2
   #This value controls how many "top" documents the RAG returns to use in the context.
   #similarity_value: 0.45
@@ -54,11 +59,11 @@ rag:
     top_n: 1
 
 clickhouse:
-    host: localhost
-    port: 8443
-    username: admin
-    password: clickhouse
-    database: embeddings
+  host: localhost
+  port: 8443
+  username: admin
+  password: clickhouse
+  database: embeddings
 
 llamacpp:
   llm_hf_repo_id: TheBloke/Mistral-7B-Instruct-v0.2-GGUF
@@ -125,3 +130,4 @@ gemini:
   api_key: ${GOOGLE_API_KEY:}
   model: models/gemini-pro
   embedding_model: models/embedding-001
+