refactor(examples) Update FedRAG example (#5157)

dstripelis · web-flow · commit ad06f8e55b2f · 2025-03-22T15:45:27.000Z
diff --git a/examples/fedrag/fedrag/client_app.py b/examples/fedrag/fedrag/client_app.py
@@ -1,12 +1,11 @@
 """fedrag: A Flower Federated RAG app."""
 
-import os
-
 from flwr.client import ClientApp
 from flwr.common import ConfigRecord, Context, Message, RecordDict
 
 from fedrag.retriever import Retriever
 
+
 # Flower ClientApp
 app = ClientApp()
 
diff --git a/examples/fedrag/fedrag/llm_querier.py b/examples/fedrag/fedrag/llm_querier.py
@@ -5,6 +5,10 @@
 
 from transformers import AutoTokenizer, AutoModelForCausalLM
 
+import os
+
+os.environ["TOKENIZERS_PARALLELISM"] = "false"  # to avoid deadlocks during tokenization
+
 
 class LLMQuerier:
 
@@ -16,34 +20,47 @@ def __init__(self, model_name, use_gpu=False):
         self.model = AutoModelForCausalLM.from_pretrained(model_name).to(self.device)
         self.tokenizer = AutoTokenizer.from_pretrained(model_name)
 
+        # set pad token if empty
+        if self.tokenizer.pad_token_id is None:
+            self.tokenizer.pad_token = self.tokenizer.eos_token
+            self.tokenizer.pad_token_id = self.tokenizer.convert_tokens_to_ids(
+                self.tokenizer.pad_token
+            )
+
     def answer(self, question, documents, options, dataset_name, max_new_tokens=10):
         # Format options as A) ... B) ... etc.
         formatted_options = "\n".join([f"{k}) {v}" for k, v in options.items()])
 
         prompt = self.__format_prompt(
             question, documents, formatted_options, dataset_name
         )
-        inputs = self.tokenizer(prompt, return_tensors="pt", truncation=True).to(
-            self.device
-        )
 
+        inputs = self.tokenizer(
+            prompt, padding=True, return_tensors="pt", truncation=True
+        ).to(self.device)
+
+        # Perform element-wise comparison and create attention mask tensor
+        attention_mask = (inputs.input_ids != self.tokenizer.pad_token_id).long()
         outputs = self.model.generate(
             inputs.input_ids,
+            attention_mask=attention_mask,
             max_new_tokens=max_new_tokens,
             early_stopping=False,
-            eos_token_id=self.tokenizer.eos_token_id,
+            pad_token_id=self.tokenizer.pad_token_id,  # set explicitly to avoid open-end generation print statement
+            eos_token_id=self.tokenizer.eos_token_id,  # set explicitly to avoid open-end generation print statement
         )
 
-        full_response = self.tokenizer.decode(outputs[0], skip_special_tokens=False)
-        return full_response, self.__parse_response(full_response, prompt)
+        generated_text = self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+        generated_answer = self.__extract_answer(generated_text, prompt)
+        return prompt, generated_answer
 
     @classmethod
     def __format_prompt(cls, question, documents, options, dataset_name):
-        instruction = None
+        instruction = "You are a helpful medical expert, and your task is to answer a medical question using the relevant documents."
         if dataset_name == "pubmedqa":
             instruction = "As an expert doctor in clinical science and medical knowledge, can you tell me if the following statement is correct? Answer yes, no, or maybe."
         elif dataset_name == "bioasq":
-            "You are an advanced biomedical AI assistant trained to understand and process medical and scientific texts. Given a biomedical question, your goal is to provide a concise and accurate answer based on relevant scientific literature."
+            instruction = "You are an advanced biomedical AI assistant trained to understand and process medical and scientific texts. Given a biomedical question, your goal is to provide a concise and accurate answer based on relevant scientific literature."
 
         ctx_documents = "\n".join(
             [f"Document {i + 1}: {doc}" for i, doc in enumerate(documents)]
@@ -59,16 +76,16 @@ def __format_prompt(cls, question, documents, options, dataset_name):
             Options:
             {options}
 
-            Please answer with only the correct option: """
+            Answer only with the correct option: """
         return prompt
 
     @classmethod
-    def __parse_response(cls, full_response, original_prompt):
+    def __extract_answer(cls, generated_text, original_prompt):
         # Extract only the new generated text
-        response = full_response[len(original_prompt) :].strip()
+        response = generated_text[len(original_prompt) :].strip()
 
         # Find first occurrence of A-D (case-insensitive)
-        match = re.search(r"\b([A-Da-d])\b", response)
-        if match:
-            return match.group(1).upper()
+        option = re.search(r"\b([A-Da-d])\b", response)
+        if option:
+            return option.group(1).upper()
         return None
diff --git a/examples/fedrag/fedrag/retriever.py b/examples/fedrag/fedrag/retriever.py
@@ -2,8 +2,8 @@
 
 import warnings
 
-# Suppress FAISS-specific warnings
-warnings.filterwarnings("ignore", category=DeprecationWarning, module="faiss")
+# Suppress deprecation warnings
+warnings.filterwarnings("ignore", category=DeprecationWarning)
 
 import os
 import json
diff --git a/examples/fedrag/fedrag/server_app.py b/examples/fedrag/fedrag/server_app.py
@@ -2,7 +2,6 @@
 
 import hashlib
 import os
-import random
 import time
 from collections import defaultdict
 from itertools import cycle
@@ -168,14 +167,13 @@ def main(grid: Grid, context: Context) -> None:
             options = q["options"]
             answer = q["answer"]
 
-            response, predicted_answer = llm_querier.answer(
+            prompt, predicted_answer = llm_querier.answer(
                 question, merged_docs, options, dataset_name
             )
 
             # If the model did not predict any value,
             # then discard the question
             if predicted_answer is not None:
-                predicted_answer = random.choice(list(options.keys()))
                 expected_answers[dataset_name].append(answer)
                 predicted_answers[dataset_name].append(predicted_answer)
                 q_et = time.time()