Add prompt_to_lora_id_mapping adjustment in fix_prompts()

quic-jouachen · quic-jouachen · commit 244793710d78 · 2025-01-27T14:22:38.000-08:00
Signed-off-by: Jou-An Chen &lt;quic_jouachen@quicinc.com&gt;
diff --git a/QEfficient/generation/text_generation_inference.py b/QEfficient/generation/text_generation_inference.py
@@ -192,32 +192,53 @@ def get_input_prompts(prompt: str, prompts_txt_file_path: str) -> List[str]:
     return prompt
 
 
-def fix_prompts(prompt: List[str], batch_size: int, full_batch_size: int = None):
+def fix_prompts(
+    prompt: List[str],
+    batch_size: int,
+    prompt_to_lora_id_mapping: Optional[List[int]] = None,
+    full_batch_size: int = None,
+):
     """
-    Adjusts the list of prompts to match the required batch size.
+    Adjusts the list of prompts and prompt_to_lora_id_mapping to match the required batch size.
 
     ``Mandatory`` Args:
         prompt (List[str]): List of input prompts.
         batch_size (int): The batch size to process at a time.
 
     ``Optional`` Args:
+        prompt_to_lora_id_mapping (Optional[List[int]]): Mapping to associate prompts with their respective LoRA adapter.
         full_batch_size (Optional[int]): The full batch size if different from batch_size.
 
     Returns:
         List[str]: Adjusted list of prompts.
+        List[str]: Adjusted list of prompt_to_lora_id_mapping.
     """
     exec_batch_size = full_batch_size if full_batch_size is not None else batch_size
 
     if len(prompt) < exec_batch_size:
         logger.warning("Number of prompts are less than batch size/full batch size, repeating to required batch size")
         prompt = (prompt * (exec_batch_size // len(prompt) + 1))[:exec_batch_size]
+        if prompt_to_lora_id_mapping is not None:
+            logger.warning(
+                "Prompt_to_lora_id_mapping are less than batch size/full batch size, repeating to required batch size"
+            )
+            prompt_to_lora_id_mapping = (
+                prompt_to_lora_id_mapping * (exec_batch_size // len(prompt_to_lora_id_mapping) + 1)
+            )[:exec_batch_size]
     elif full_batch_size is None and len(prompt) % batch_size != 0:
         logger.warning(
             "Number of prompts are not multiple of batch size, dropping last incomplete batch from given input prompts"
         )
         prompt = prompt[: batch_size * (len(prompt) // batch_size)]
+        if prompt_to_lora_id_mapping is not None:
+            logger.warning(
+                "prompt_to_lora_id_mapping are not multiple of batch size, dropping last incomplete batch from given input prompts"
+            )
+            prompt_to_lora_id_mapping = prompt_to_lora_id_mapping[
+                : batch_size * (len(prompt_to_lora_id_mapping) // batch_size)
+            ]
 
-    return prompt
+    return prompt, prompt_to_lora_id_mapping
 
 
 def read_prompts_txt_file(prompts_txt_file_path: str):
@@ -311,7 +332,7 @@ def cloud_ai_100_exec_kv(
     """
     batch_size, ctx_len, full_batch_size = get_compilation_dims(qpc_path)
     prompt: List[str] = get_input_prompts(prompt, prompts_txt_file_path)
-    prompt = fix_prompts(prompt, batch_size, full_batch_size)
+    prompt, prompt_to_lora_id_mapping = fix_prompts(prompt, batch_size, prompt_to_lora_id_mapping, full_batch_size)
     generate_text = TextGeneration(
         tokenizer=tokenizer,
         qpc_path=qpc_path,