Merge branch 'nathan-add-doc' of github.com:huggingface/lighteval int…

…o nathan-add-doc
huggingface · Sep 18, 2024 · e86912a · e86912a
2 parents 016cea4 + 33c1e7f
commit e86912a
Show file tree

Hide file tree

Showing 4 changed files with 21 additions and 24 deletions.
diff --git a/docs/source/adding_new_metric.md b/docs/source/adding_new_metric.md
@@ -19,23 +19,23 @@ from aenum import extend_enum
 from lighteval.metrics import Metrics
 ```
 
-You need to define sample level metric:
+You need to define a sample level metric:
 
 ```python
 def custom_metric(predictions: list[str], formatted_doc: Doc, **kwargs) -> bool:
     response = predictions[0]
     return response == formatted_doc.choices[formatted_doc.gold_index]
 ```
 
-Here the sample level metric only return one metric, if you want to return multiple metrics per sample you need to return a dictionary with the metrics as keys and the values as values.
+Here the sample level metric only returns one metric, if you want to return multiple metrics per sample you need to return a dictionary with the metrics as keys and the values as values.
 
 ```python
 def custom_metric(predictions: list[str], formatted_doc: Doc, **kwargs) -> dict:
     response = predictions[0]
     return {"accuracy": response == formatted_doc.choices[formatted_doc.gold_index], "other_metric": 0.5}
 ```
 
-Then, you can define an aggreagtion function if needed, a comon aggregation function is `np.mean`.
+Then, you can define an aggregation function if needed, a common aggregation function is `np.mean`.
 
 ```python
 def agg_function(items):
@@ -73,7 +73,7 @@ custom_metric = SampleLevelMetricGrouping(
 )
 ```
 
-And to end with the following, so that it adds your metric to our metrics list
+To finish, add the following, so that it adds your metric to our metrics list
 when loaded as a module.
 
 ```python

diff --git a/docs/source/adding_new_task.md b/docs/source/adding_new_task.md
@@ -31,14 +31,14 @@ dataset to a document to be used for evaluation.
 # Define as many as you need for your different tasks
 def prompt_fn(line, task_name: str = None):
     """Defines how to go from a dataset line to a doc object.
-    Follow examples in src/lighteval/tasks/tasks_prompt_formatting.py, or get more info
+    Follow examples in src/lighteval/tasks/default_prompts.py, or get more info
     about what this function should do in the README.
     """
     return Doc(
         task_name=task_name,
-        query="",
-        choices="",
-        gold_index=0,
+        query=line["question"],
+        choices=[f" {c}" for c in line["choices"]],
+        gold_index=line["gold"],
         instruction="",
     )
 ```

diff --git a/docs/source/installation.md b/docs/source/installation.md
@@ -31,7 +31,7 @@ appropriate extras group.
 | adapters     | To evaluate adapters models (delta and peft)                              |
 | tensorboardX | To upload your results to tensorboard                                     |
 | vllm         | To use vllm as backend for inference                                      |
-
+| s3         | To upload results to s3                                      |
 ## Hugging Face login
 
 If you want to push your results to the Hugging Face Hub or evaluate your own

diff --git a/docs/source/saving_results.md b/docs/source/saving_results.md
@@ -1,30 +1,30 @@
 # Saving results
 
-## Saving results locally
+## Saving results elsewhere
 
 Lighteval will automatically save results and evaluation details in the directory
 set with the `--output_dir` argument. The results will be saved in
-`{output_dir}/results/{model_org}/{model_name}/results_{timestamp}.json`.
-[Here is an example of a result file](#example-of-a-result-file).
+`{output_dir}/results/{model_name}/results_{timestamp}.json`.
+[Here is an example of a result file](#example-of-a-result-file). The output path can be any [fsspec](https://filesystem-spec.readthedocs.io/en/latest/index.html) compliant path (local, s3, hf hub, gdrive, ftp, etc).
 
 To save the details of the evaluation, you can use the `--save_details`
 argument. The details will be saved in a parquet file
-`{output_dir}/details/{model_org}/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet`.
+`{output_dir}/details/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet`.
 
 ## Pushing results to the HuggingFace hub
 
 You can push the results and evaluation details to the HuggingFace hub. To do
-so, you need to set the `--push_results_to_hub` as well as the `--results_org`
+so, you need to set the `--push_to_hub` as well as the `--results_org`
 argument. The results will be saved in a dataset with the name at
 `{results_org}/{model_org}/{model_name}`. To push the details, you need to set
-the `--push_details_to_hub` argument.
+the `--save_details` argument.
 The dataset created will be private by default, you can make it public by
 setting the `--public_run` argument.
 
 
 ## Pushing results to Tensorboard
 
-You can push the results to Tensorboard by setting the `--push_results_to_tensorboard`.
+You can push the results to Tensorboard by setting `--push_to_tensorboard`.
 
 
 ## How to load and investigate details
@@ -36,13 +36,11 @@ from datasets import load_dataset
 import os
 
 output_dir = "evals_doc"
-model = "HuggingFaceH4/zephyr-7b-beta"
-model_org = model.split("/")[0]
-model_name = model.split("/")[1]
+model_name = "HuggingFaceH4/zephyr-7b-beta"
 timestamp = "2024-09-03T15-06-11.234678"
 task = "lighteval|gsm8k|0"
 
-details_path = f"{output_dir}/details/{model_org}/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet"
+details_path = f"{output_dir}/details/{model_name}/{timestamp}/details_{task}_{timestamp}.parquet"
 
 # Load the details
 details = load_dataset("parquet", data_files=details_path, split="train")
@@ -58,14 +56,13 @@ from datasets import load_dataset
 
 output_dir = "evals_doc"
 results_org = "SaylorTwift"
-model = "HuggingFaceH4/zephyr-7b-beta"
-model_org = model.split("/")[0]
-model_name = model.split("/")[1]
+model_name = "HuggingFaceH4/zephyr-7b-beta"
+sanitized_model_name = model_name.replace("/", "__")
 timestamp = "2024-09-03T15-06-11.234678"
 task = "lighteval|gsm8k|0"
 public_run = False
 
-dataset_path = f"{results_org}/details_{model_name}{'_private' if not public_run else ''}"
+dataset_path = f"{results_org}/details_{sanitized_model_name}{'_private' if not public_run else ''}"
 details = load_dataset(dataset_path, task.replace("|", "_"), split="latest")
 
 for detail in details: