Skip to content

Commit 412ccfc

Browse files
Adds Global MLMU (#426)
* add global mmlu + zulu * add global mmlu + zulu * fix translatin literals * add unk for global mmlu * Update src/lighteval/tasks/multilingual/tasks.py Co-authored-by: Clémentine Fourrier <[email protected]> --------- Co-authored-by: Clémentine Fourrier <[email protected]>
1 parent f2d0a65 commit 412ccfc

File tree

3 files changed

+89
-0
lines changed

3 files changed

+89
-0
lines changed

src/lighteval/tasks/multilingual/tasks.py

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1707,6 +1707,92 @@
17071707
]
17081708
]
17091709

1710+
# Translated MMLU using both professional and non-professional translators. Contains tags for cultural sensitivity.
1711+
# CA: Cultural Agnostic
1712+
# CS: Cultural Specific
1713+
# UNK: Not annotated
1714+
# ALL: All of the above
1715+
# https://huggingface.co/papers/2412.03304
1716+
global_mmlu_tasks = [
1717+
LightevalTaskConfig(
1718+
name=f"global_mmlu_{sensitivity_label.lower()}_{language.value}_{formulation.name.lower()}:{subset}",
1719+
prompt_function=get_mcq_prompt_function(
1720+
language,
1721+
lambda line: {
1722+
"question": line["question"],
1723+
"choices": [line["option_a"], line["option_b"], line["option_c"], line["option_d"]],
1724+
"gold_idx": LETTER_INDICES.index(line["answer"]),
1725+
},
1726+
formulation=formulation,
1727+
),
1728+
suite=("lighteval",),
1729+
hf_repo="CohereForAI/Global-MMLU",
1730+
hf_subset=standardize_tag(language.value),
1731+
evaluation_splits=("test",),
1732+
few_shots_split="dev",
1733+
hf_filter=partial(
1734+
lambda subset, sensitivity_label, x: x["subject"].lower() == subset
1735+
and (
1736+
sensitivity_label == "ALL" or sensitivity_label in x["cultural_sensitivity_label"].replace("-", "UNK")
1737+
),
1738+
subset,
1739+
sensitivity_label,
1740+
),
1741+
metric=get_metrics_for_formulation(
1742+
formulation,
1743+
[
1744+
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
1745+
loglikelihood_acc_metric(normalization=LogProbCharNorm()),
1746+
loglikelihood_acc_metric(normalization=LogProbPMINorm()),
1747+
],
1748+
),
1749+
)
1750+
for subset in MMLU_SUBSETS
1751+
for language in [
1752+
Language.AMHARIC,
1753+
Language.ARABIC,
1754+
Language.BENGALI,
1755+
Language.CHINESE,
1756+
Language.CZECH,
1757+
Language.GERMAN,
1758+
Language.ENGLISH,
1759+
Language.SPANISH,
1760+
Language.FRENCH,
1761+
Language.HEBREW,
1762+
Language.HINDI,
1763+
Language.INDONESIAN,
1764+
Language.ITALIAN,
1765+
Language.JAPANESE,
1766+
Language.KOREAN,
1767+
Language.MALAY,
1768+
Language.DUTCH,
1769+
Language.NORWEGIAN,
1770+
Language.POLISH,
1771+
Language.PORTUGUESE,
1772+
Language.ROMANIAN,
1773+
Language.RUSSIAN,
1774+
Language.SERBIAN,
1775+
Language.SWEDISH,
1776+
Language.SWAHILI,
1777+
Language.TAMIL,
1778+
Language.TELUGU,
1779+
Language.THAI,
1780+
Language.TURKISH,
1781+
Language.UKRAINIAN,
1782+
Language.URDU,
1783+
Language.VIETNAMESE,
1784+
Language.YORUBA,
1785+
Language.ZULU,
1786+
]
1787+
for formulation in [
1788+
MCFFormulation(),
1789+
CFFormulation(),
1790+
HybridFormulation(),
1791+
]
1792+
for sensitivity_label in ["ALL", "CA", "CS", "UNK"]
1793+
]
1794+
1795+
17101796
# There are only these subsets in the African MMLU
17111797
AFRI_MMLU_SUBSETS = [
17121798
"elementary_mathematics",
@@ -2088,6 +2174,7 @@
20882174
*arabic_mmlu_tasks,
20892175
*turkish_mmlu_tasks,
20902176
*afri_mmlu_tasks,
2177+
*global_mmlu_tasks,
20912178
]
20922179
)
20932180

src/lighteval/tasks/templates/utils/translation_literals.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1007,4 +1007,5 @@ def __getattribute__(self, name: str) -> str:
10071007
Language.WESTERN_FRISIAN: TranslationLiterals(language=Language.WESTERN_FRISIAN),
10081008
Language.YIDDISH: TranslationLiterals(language=Language.YIDDISH),
10091009
Language.YORUBA: TranslationLiterals(language=Language.YORUBA),
1010+
Language.ZULU: TranslationLiterals(language=Language.ZULU),
10101011
}

src/lighteval/utils/language.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,7 @@ class Language(Enum):
122122
WAR = "war"
123123
SHAN = "shn"
124124
UDMURT = "udm"
125+
ZULU = "zul"
125126

126127

127128
# This mapping was created for beleble, it converts iso_639_3 individual codes to iso_639_3 macro codes

0 commit comments

Comments
 (0)