Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Translate task template to Catalan and Galician and fix typos #506

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions src/lighteval/tasks/default_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def asdiv(line, task_name: str = None):

def babi_qa(line, task_name: str = None): # HELM
def process_path(path: str) -> str:
"""Turn a path string (task 19) from the original format 's,w' to a verbal model-friendly format 'south west'"""
"""Turn a path string (task 19) from the original format 's,w' into a verbal model-friendly format 'south west'"""
steps = path.split(",")
directions = {"s": "south", "n": "north", "e": "east", "w": "west"}
path = " ".join([directions[step] for step in steps])
Expand Down Expand Up @@ -281,7 +281,7 @@ def bbh_logical_deduction_three_objects(line, task_name: str = None):
def bbh_movie_recommendation(line, task_name: str = None):
if line["target"] == "Monsters, Inc": # this line is not correctly formatted
logger.warning(
"One sample removed from task bbh:movie_recommentation because its line is incorrectly formatted."
"One sample removed from task bbh:movie_recommendation because its line is incorrectly formatted."
)
return []
instruction = "Recommend movies similar to the given list of movies.\n\n"
Expand Down Expand Up @@ -500,7 +500,7 @@ def civil_comments(line, task_name: str = None):
def cnn_dm(line, task_name: str = None):
return Doc(
task_name=task_name,
query=f"###\nArticle:{line['article']}\n\nSummarize the above article in 3 sentence.\n",
query=f"###\nArticle:{line['article']}\n\nSummarize the above article in 3 sentences.\n",
choices=[str(line["summary"])],
gold_index=0,
specific={"text": line["article"]},
Expand Down Expand Up @@ -730,7 +730,7 @@ def gpqa(line, task_name: str = None):


def gsm8k(line, task_name: str = None):
# Has special analysis in metric for number decomposiition
# Has special analysis in metric for number decomposition
return Doc(
task_name=task_name,
query=f"Question: {line['question']}\nAnswer:",
Expand Down Expand Up @@ -2076,7 +2076,7 @@ def rte(line, task_name: str = None):
return Doc(
task_name=task_name,
query=f"{line['sentence1']}\nQuestion: {line['sentence2']} True or False?\nAnswer:",
choices=[" True", " False"], # 0 = entailement, 1 = not entailment
choices=[" True", " False"], # 0 = entailment, 1 = not entailment
gold_index=int(line["label"]),
# "metric": "choices_loglikelihood",
)
Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/tasks/lighteval_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -621,7 +621,7 @@ def create_requests_from_tasks( # noqa: C901
n_samples = min(max_samples, len(task_docs)) if max_samples else len(task_docs)
evaluation_tracker.task_config_logger.log_num_docs(task_name, len(task_docs), n_samples)

# logs out the diferent versions of the tasks for every few shot
# logs out the different versions of the tasks for every few shot
for num_fewshot, _ in fewshot_dict[task_name]:
cur_task_name = f"{task_name}|{num_fewshot}"
evaluation_tracker.versions_logger.log(cur_task_name, task.version)
Expand All @@ -633,7 +633,7 @@ def create_requests_from_tasks( # noqa: C901
prompt_manager = PromptManager(lm=lm, task=task)
seeds = prompt_manager.few_shot_sampler.get_fewshot_seeds(num_fewshot_seeds)

# We can do several round of fewshots sampling to get some variance informations
# We can do several round of fewshots sampling to get some variance information
for seed in seeds:
for doc_id in range(n_samples):
doc_id_seed = f"{doc_id}_{seed}" # if we do several rounds of few shot sampling we have several seeds
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/prompt_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,7 @@ def _multi_turn_contexts(self, doc: Doc, use_chat_template: bool, system_prompt:
Multi turn tasks need use chat templating.

Args:
doc (Doc): Formated document.
doc (Doc): Formatted document.
use_chat_template (bool): wether or not to use chat template. Will fail if false.
system_prompt (Optional[str]): The system prompt to use
tokenizer (PreTrainedTokenizer): The tokenizer used for the chat template
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def _task_superset_dict(self):
"lighteval|mmlu" -> ["lighteval|mmlu:abstract_algebra", "lighteval|mmlu:college_biology", ...]
}
"""
# Note: sorted before groupby is imporant as the python implementation of groupby does not
# Note: sorted before groupby is important as the python implementation of groupby does not
# behave like sql groupby. For more info see the docs of itertools.groupby
superset_dict = {k: list(v) for k, v in groupby(sorted(self.task_registry.keys()), lambda x: x.split(":")[0])}
# Only consider supersets with more than one task
Expand Down
2 changes: 1 addition & 1 deletion src/lighteval/tasks/templates/continuation.py
Original file line number Diff line number Diff line change
Expand Up @@ -112,7 +112,7 @@ def get_continuation_prompt_function(
C. Continuation 3
Answer: A/B/C

This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and don't use the anchor labels (Question/Answer)
This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and doesn't use the anchor labels (Question/Answer)

Args:
language (Language): The language of the Continuation task.
Expand Down
6 changes: 3 additions & 3 deletions src/lighteval/tasks/templates/copa.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,17 +86,17 @@ def get_copa_prompt_function(

Format:
*CF*
Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)
Context Premise therefore/cause | (Continuation 1, Continuation 2, Continuation 3)

*Hybrid*
Context Premise thefore/cause
Context Premise therefore/cause
A. Continuation 1
B. Continuation 2
C. Continuation 3
Answer: | Continuation 1/Continuation 2/Continuation 3

*MCF*
Context Premise thefore/cause
Context Premise therefore/cause
A. Continuation 1
B. Continuation 2
C. Continuation 3
Expand Down
4 changes: 2 additions & 2 deletions src/lighteval/tasks/templates/hellaswag.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ def get_hellaswag_prompt_function(
Create a templated prompt function for a Hellaswag task.

Format:
Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)
Context Premise therefore/cause | (Continuation 1, Continuation 2, Continuation 3)

Args:
language (Language): The language of the Hellaswag task.
Expand Down Expand Up @@ -126,7 +126,7 @@ def hellaswag_prompt(
if ctx_b:
ctx_a = join_ctxs(ctx_a, ctx_b)

# Removoal of the [header] can happen and we need the first letter to be capital afterwards
# Removal of the [header] can happen and we need the first letter to be capital afterwards
full_context = HELLASWAG_QUERY.format(activity_label=activity_label, ctx=ctx_a)
choices = [
hellaswag_preprocess(
Expand Down
12 changes: 6 additions & 6 deletions src/lighteval/tasks/templates/nli.py
Original file line number Diff line number Diff line change
Expand Up @@ -228,23 +228,23 @@ def prompt_fn(line: dict, task_name: str):
if input_data is None:
return None

# Template based on dicussion here: https://github.com/EleutherAI/lm-evaluation-harness/issues/450
# Template based on discussion here: https://github.com/EleutherAI/lm-evaluation-harness/issues/450
labels = [capitalize(get_relation_label(label, translation_literals)) for label in relations]

premise, hypothesis, gold_idx = input_data["premise"], input_data["hypothesis"], input_data["gold_idx"]
premise = fix_ending_punct(capitalize(input_data["premise"]), translation_literals)
hypothesis = input_data["hypothesis"]
if isinstance(formulation, HybridFormulation):
# If we have the neither option move it to the end to be consistent with standard NLI evaluation
rearanged_labales = labels
rearranged_labels = labels
if "neutral" in relations:
neutral_idx = relations.index("neutral")
rearanged_labales = labels[:neutral_idx] + labels[neutral_idx + 1 :] + [labels[neutral_idx]]
rearranged_labels = labels[:neutral_idx] + labels[neutral_idx + 1 :] + [labels[neutral_idx]]

choices_str = f"{translation_literals.comma}{translation_literals.word_space}".join(rearanged_labales[:-1])
hypothesis = f"{hypothesis.rstrip(PUNCT)}{translation_literals.sentence_space}{choices_str}{translation_literals.word_space}{translation_literals.or_word}{translation_literals.word_space}{rearanged_labales[-1]}{translation_literals.question_mark}"
choices_str = f"{translation_literals.comma}{translation_literals.word_space}".join(rearranged_labels[:-1])
hypothesis = f"{hypothesis.rstrip(PUNCT)}{translation_literals.sentence_space}{choices_str}{translation_literals.word_space}{translation_literals.or_word}{translation_literals.word_space}{rearranged_labels[-1]}{translation_literals.question_mark}"

# (hynky1999): Ideally we would not compute logprobs of the Yes/No/Also in CF fomulation. However as of right now lighteval doesn't allow to
# (hynky1999): Ideally we would not compute logprobs of the Yes/No/Also in CF formulation. However as of right now lighteval doesn't allow to
# use multi-context.
row = {
"instruction": input_data.get("instruction", ""),
Expand Down
48 changes: 46 additions & 2 deletions src/lighteval/tasks/templates/utils/translation_literals.py
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,29 @@ def __getattribute__(self, name: str) -> str:
Language.BRETON: TranslationLiterals(language=Language.BRETON),
Language.BULGARIAN: TranslationLiterals(language=Language.BULGARIAN),
Language.BURMESE: TranslationLiterals(language=Language.BURMESE),
Language.CATALAN: TranslationLiterals(language=Language.CATALAN),
Language.CATALAN: TranslationLiterals(
language=Language.CATALAN,
question_word="pregunta",
answer="resposta",
confirmation_word="cert",
yes="sí",
no="no",
also="també",
cause_word="perquè",
effect_word="per tant",
or_word="o",
true="veritable",
false="fals",
neither="cap",
full_stop=".",
comma=",",
question_mark="?",
exclamation_mark="!",
word_space=" ",
sentence_space=" ",
colon=":",
semicolon=";",
),
Language.CEBUANO: TranslationLiterals(language=Language.CEBUANO),
Language.CHINESE: TranslationLiterals(
language=Language.CHINESE,
Expand Down Expand Up @@ -348,7 +370,29 @@ def __getattribute__(self, name: str) -> str:
sentence_space=" ",
colon=":",
),
Language.GALICIAN: TranslationLiterals(language=Language.GALICIAN),
Language.GALICIAN: TranslationLiterals(
language=Language.GALICIAN,
question_word="pregunta",
answer="resposta",
confirmation_word="certo",
yes="si",
no="non",
also="tamén",
cause_word="porque",
effect_word="polo tanto",
or_word="ou",
true="verdadeiro",
false="falso",
neither="ningún",
full_stop=".",
comma=",",
question_mark="?",
exclamation_mark="!",
word_space=" ",
sentence_space=" ",
colon=":",
semicolon=";",
),
Language.GEORGIAN: TranslationLiterals(language=Language.GEORGIAN),
Language.GERMAN: TranslationLiterals(
language=Language.GERMAN,
Expand Down
Loading