Merge pull request #220 from zeya30/za/progress-bar

dylanbouchard · web-flow · commit d8224a9df489 · 2026-01-08T14:13:48.000-05:00
update unit tests
diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
@@ -40,6 +40,18 @@ jobs:
         with:
           persist-credentials: false
 
+      - name: Free Disk Space (Ubuntu)
+        if: matrix.os == 'ubuntu-latest'
+        uses: jlumbroso/free-disk-space@main
+        with:
+         tool-cache: false
+         android: true
+         dotnet: true
+         haskell: true
+         large-packages: true
+         docker-images: true
+         swap-storage: true       
+
       - name: Set up Python
         uses: actions/setup-python@v6.0.0
         with:
@@ -117,4 +129,4 @@ jobs:
         uses: github/codeql-action/upload-sarif@v3
         with:
           sarif_file: semgrep.sarif
-        if: always()
+        if: always()
diff --git a/examples/adversarial/adversarial_toxicity.ipynb b/examples/adversarial/adversarial_toxicity.ipynb
@@ -215,7 +215,7 @@
     "    system_style=\"benign\",\n",
     "    prompt_style=\"nontoxic\",\n",
     "    sample_size=10,  # 1000 is the recommended sample_size\n",
-    "    show_progress_bars=False\n",
+    "    show_progress_bars=False,\n",
     ")"
    ]
   },
diff --git a/langfair/generator/redteaming.py b/langfair/generator/redteaming.py
@@ -20,7 +20,6 @@
 import pkgutil
 import random
 from typing import Any, Dict, List, Optional, Tuple, Union
-from rich.progress import Progress
 
 from langfair.constants.cost_data import FAILURE_MESSAGE
 from langfair.generator import ResponseGenerator
@@ -110,7 +109,7 @@ async def counterfactual(
 
         count : int, default=25
             Specifies number of responses to generate for each prompt.
-            
+
         show_progress_bars : bool, default=True
             If True, displays progress bars while generating responses
 
@@ -123,7 +122,6 @@ async def counterfactual(
         dataset = await self._generate_from_template(
             prompt_templates=prompt_templates, system_styles=system_styles, count=count
         )
-        print("Responses successfully generated!")
         return self._format_result(
             dataset=dataset,
             prompt_templates=prompt_templates,
@@ -166,7 +164,7 @@ async def toxicity(
 
         custom_system_prompt : str or None, default=None
             Optional argument for user to provide custom system prompt for toxicity generation.
-            
+
         show_progress_bars : bool, default=True
             If True, displays progress bars while generating responses
 
@@ -187,7 +185,10 @@ async def toxicity(
             else SYSTEM_PROMPT_DICT[system_style]
         )
         result = await self.generate_responses(
-            prompts=prompts, system_prompt=system_prompt, count=count, show_progress_bars=show_progress_bars
+            prompts=prompts,
+            system_prompt=system_prompt,
+            count=count,
+            show_progress_bars=show_progress_bars,
         )
         responses = result["data"]["response"]
         duplicated_prompts = [
@@ -211,7 +212,7 @@ async def _generate_from_template(
         prompt_templates: Dict[str, List[str]],
         system_styles: List[str],
         count: int,
-        show_progress_bars: bool = True,        
+        show_progress_bars: bool = True,
     ) -> Dict[str, Any]:
         """
         Used for generating responses from template-based prompt. This method is
@@ -230,7 +231,7 @@ async def _generate_from_template(
                     prompts=prompt_templates["text"],
                     system_prompt=system_prompt,
                     count=count,
-                    show_progress_bars=show_progress_bars
+                    show_progress_bars=show_progress_bars,
                 )
             dataset[system_style + "_response"] = tmp["data"]["response"]
         return dataset
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -0,0 +1,87 @@
+# Copyright 2025 CVS Health and/or one of its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from types import SimpleNamespace
+
+import pytest
+
+
+class FakeTask:
+    def __init__(self, task_id, description, total):
+        self.id = task_id
+        self.description = description
+        self.total = total
+        self.completed = 0
+
+
+class FakeProgress:
+    """
+    Minimal stand-in for rich.progress.Progress used in tests.
+    - add_task(description, total) -> task_id
+    - update(task_id, completed=...)
+    - tasks[task_id] -> FakeTask
+    - live.is_started -> bool
+    """
+
+    def __init__(self):
+        self._next_id = 0
+        self.tasks = {}
+        self.live = SimpleNamespace(is_started=False)
+
+    def add_task(self, description, total):
+        task_id = self._next_id
+        self._next_id += 1
+        self.tasks[task_id] = FakeTask(task_id, description, total)
+        return task_id
+
+    def update(self, task_id, completed=None):
+        task = self.tasks[task_id]
+        if completed is not None:
+            task.completed = completed
+
+    def start(self):
+        self.live.is_started = True
+
+    def stop(self):
+        self.live.is_started = False
+
+
+@pytest.fixture(autouse=True)
+def mock_display_progress(monkeypatch):
+    """
+    Mock progress helpers globally so tests never touch Rich's Live display.
+    """
+    import langfair.utils.display as display_module
+
+    def _start_progress_bar(existing_progress_bar=None):
+        if isinstance(existing_progress_bar, FakeProgress):
+            existing_progress_bar.start()
+            return existing_progress_bar
+        fake = FakeProgress()
+        fake.start()
+        return fake
+
+    def _stop_progress_bar(progress_bar):
+        if isinstance(progress_bar, FakeProgress):
+            progress_bar.stop()
+
+    monkeypatch.setattr(display_module, "start_progress_bar", _start_progress_bar)
+    monkeypatch.setattr(display_module, "stop_progress_bar", _stop_progress_bar)
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers",
+        "real_progress: Opt-out of the FakeProgress mock (use real rich.Progress).",
+    )
diff --git a/tests/test_counterfactual_metrics.py b/tests/test_counterfactual_metrics.py
@@ -63,26 +63,31 @@ def test_rougel():
     assert rougel.evaluate(data["text1"], data["text2"]) == actual_results["test3"]
 
 
-def test_senitement1():
+def test_sentiment1():
     sentiment = SentimentBias()
     assert sentiment.evaluate(data["text1"], data["text2"]) == actual_results["test4"]
 
 
-def test_senitement2():
+def test_sentiment2():
     sentiment = SentimentBias(parity="weak")
     assert sentiment.evaluate(data["text1"], data["text2"]) == pytest.approx(
         actual_results["test5"], rel=1e-02
     )
 
 
-def test_senitement3(monkeypatch):
-    MOCKED_CLASSIFIER_RESULT = [
-        actual_results["classifier_result1"],
-        actual_results["classifier_result2"],
-    ]
+def test_sentiment3(monkeypatch):
+    group1 = actual_results["classifier_result1"]
+    group2 = actual_results["classifier_result2"]
 
-    def mock_get_classifier(*args, **kwargs):
-        return MOCKED_CLASSIFIER_RESULT.pop()
+    def mock_get_classifier(texts, return_all_scores=True):
+        if texts in [[t] for t in data["text1"]]:
+            idx = data["text1"].index(texts[0])
+            return [group1[idx]]
+        elif texts in [[t] for t in data["text2"]]:
+            idx = data["text2"].index(texts[0])
+            return [group2[idx]]
+        else:
+            return [[]]
 
     sentiment = SentimentBias(classifier="roberta")
     monkeypatch.setattr(sentiment, "classifier_instance", mock_get_classifier)
diff --git a/tests/test_display.py b/tests/test_display.py
@@ -0,0 +1,107 @@
+# Copyright 2025 CVS Health and/or one of its affiliates
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+
+import pytest
+
+import langfair.utils.display as display_module
+from langfair.utils.display import (
+    ConditionalBarColumn,
+    ConditionalSpinnerColumn,
+    ConditionalTextColumn,
+    ConditionalTextPercentageColumn,
+    ConditionalTimeElapsedColumn,
+)
+
+
+@pytest.fixture(autouse=True)
+def fast_sleep(monkeypatch):
+    monkeypatch.setattr(time, "sleep", lambda x: None)
+
+
+def test_start_progress_bar_without_existing():
+    progress = display_module.start_progress_bar()
+    assert progress.live.is_started
+    task_id = progress.add_task("[Task]Test", total=10)
+    progress.update(task_id, completed=5)
+    task = progress.tasks[task_id]
+    assert task.completed == 5
+    display_module.stop_progress_bar(progress)
+    assert not progress.live.is_started
+
+
+def test_start_progress_bar_with_existing():
+    existing = display_module.start_progress_bar()
+    progress = display_module.start_progress_bar(existing)
+    assert progress is existing
+    assert progress.live.is_started
+    display_module.stop_progress_bar(progress)
+    assert not progress.live.is_started
+
+
+def test_stop_progress_bar_stops():
+    progress = display_module.start_progress_bar()
+    display_module.stop_progress_bar(progress)
+    assert not progress.live.is_started
+
+
+def test_task_creation_and_update():
+    progress = display_module.start_progress_bar()
+    task_id = progress.add_task("[Task]Downloading", total=100)
+    progress.update(task_id, completed=40)
+    task = progress.tasks[task_id]
+    assert task.description == "[Task]Downloading"
+    assert task.completed == 40
+    assert task.total == 100
+    display_module.stop_progress_bar(progress)
+
+
+def test_conditional_columns_render_normal_task():
+    progress = display_module.start_progress_bar()
+    task_id = progress.add_task("[Task]Processing", total=80)
+    progress.update(task_id, completed=20)
+    task = progress.tasks[task_id]
+
+    # Validate Conditional* behavior driven by description prefixes
+    assert "[progress.description]Processing" in ConditionalTextColumn(
+        "[progress.description]{task.description}"
+    ).render(task)
+
+    assert "[progress.percentage]20/80" in ConditionalTextPercentageColumn(
+        "[progress.percentage]{task.completed}/{task.total}"
+    ).render(task)
+
+    display_module.stop_progress_bar(progress)
+
+
+def test_conditional_columns_render_no_progress_bar():
+    progress = display_module.start_progress_bar()
+    task_id = progress.add_task("[No Progress Bar]Hidden", total=50)
+    progress.update(task_id, completed=10)
+    task = progress.tasks[task_id]
+
+    assert ConditionalBarColumn().render(task) == ""
+    assert ConditionalTimeElapsedColumn().render(task) == ""
+    assert (
+        ConditionalTextColumn("[progress.description]{task.description}").render(task)
+        == "[progress.description]Hidden"
+    )
+    assert (
+        ConditionalTextPercentageColumn(
+            "[progress.percentage]{task.completed}/{task.total}"
+        ).render(task)
+        == ""
+    )
+    assert ConditionalSpinnerColumn().render(task) == ""
diff --git a/tests/test_responsegenerator.py b/tests/test_responsegenerator.py
@@ -24,9 +24,7 @@
 async def test_generator(monkeypatch):
     count = 3
     MOCKED_PROMPTS = ["Prompt 1", "Prompt 2", "Prompt 3"]
-    MOCKED_DUPLICATE_PROMPTS = [
-        prompt for prompt, i in itertools.product(MOCKED_PROMPTS, range(count))
-    ]
+
     MOCKED_RESPONSES = [
         "Mocked response 1",
         "Mocked response 2",
@@ -54,18 +52,5 @@ async def mock_async_api_call(prompt, count, *args, **kwargs):
     data = await generator_object.generate_responses(
         prompts=MOCKED_PROMPTS, count=count
     )
-
-    cost = await generator_object.estimate_token_cost(
-        tiktoken_model_name="gpt-3.5-turbo-16k-0613",  # gitleaks:allow
-        prompts=MOCKED_DUPLICATE_PROMPTS,
-        example_responses=MOCKED_RESPONSES[:3],
-        count=count,
-    )
-
     assert data["data"]["response"] == MOCKED_DUPLICATED_RESPONSES
     assert data["metadata"]["non_completion_rate"] == 1 / 3
-    assert cost == {
-        "Estimated Prompt Token Cost (USD)": 0.001539,
-        "Estimated Completion Token Cost (USD)": 0.000504,
-        "Estimated Total Token Cost (USD)": 0.002043,
-    }

Original file line number	Diff line number	Diff line change
`@@ -215,7 +215,7 @@`
`215`	`215`	`" system_style=\"benign\",\n",`
`216`	`216`	`" prompt_style=\"nontoxic\",\n",`
`217`	`217`	`" sample_size=10, # 1000 is the recommended sample_size\n",`
`218`		`- " show_progress_bars=False\n",`
	`218`	`+ " show_progress_bars=False,\n",`
`219`	`219`	`")"`
`220`	`220`	`]`
`221`	`221`	`},`