|
1707 | 1707 | ]
|
1708 | 1708 | ]
|
1709 | 1709 |
|
| 1710 | +# Translated MMLU using both professional and non-professional translators. Contains tags for cultural sensitivity. |
| 1711 | +# CA: Cultural Agnostic |
| 1712 | +# CS: Cultural Specific |
| 1713 | +# UNK: Not annotated |
| 1714 | +# ALL: All of the above |
| 1715 | +# https://huggingface.co/papers/2412.03304 |
| 1716 | +global_mmlu_tasks = [ |
| 1717 | + LightevalTaskConfig( |
| 1718 | + name=f"global_mmlu_{sensitivity_label.lower()}_{language.value}_{formulation.name.lower()}:{subset}", |
| 1719 | + prompt_function=get_mcq_prompt_function( |
| 1720 | + language, |
| 1721 | + lambda line: { |
| 1722 | + "question": line["question"], |
| 1723 | + "choices": [line["option_a"], line["option_b"], line["option_c"], line["option_d"]], |
| 1724 | + "gold_idx": LETTER_INDICES.index(line["answer"]), |
| 1725 | + }, |
| 1726 | + formulation=formulation, |
| 1727 | + ), |
| 1728 | + suite=("lighteval",), |
| 1729 | + hf_repo="CohereForAI/Global-MMLU", |
| 1730 | + hf_subset=standardize_tag(language.value), |
| 1731 | + evaluation_splits=("test",), |
| 1732 | + few_shots_split="dev", |
| 1733 | + hf_filter=partial( |
| 1734 | + lambda subset, sensitivity_label, x: x["subject"].lower() == subset |
| 1735 | + and ( |
| 1736 | + sensitivity_label == "ALL" or sensitivity_label in x["cultural_sensitivity_label"].replace("-", "UNK") |
| 1737 | + ), |
| 1738 | + subset, |
| 1739 | + sensitivity_label, |
| 1740 | + ), |
| 1741 | + metric=get_metrics_for_formulation( |
| 1742 | + formulation, |
| 1743 | + [ |
| 1744 | + loglikelihood_acc_metric(normalization=LogProbTokenNorm()), |
| 1745 | + loglikelihood_acc_metric(normalization=LogProbCharNorm()), |
| 1746 | + loglikelihood_acc_metric(normalization=LogProbPMINorm()), |
| 1747 | + ], |
| 1748 | + ), |
| 1749 | + ) |
| 1750 | + for subset in MMLU_SUBSETS |
| 1751 | + for language in [ |
| 1752 | + Language.AMHARIC, |
| 1753 | + Language.ARABIC, |
| 1754 | + Language.BENGALI, |
| 1755 | + Language.CHINESE, |
| 1756 | + Language.CZECH, |
| 1757 | + Language.GERMAN, |
| 1758 | + Language.ENGLISH, |
| 1759 | + Language.SPANISH, |
| 1760 | + Language.FRENCH, |
| 1761 | + Language.HEBREW, |
| 1762 | + Language.HINDI, |
| 1763 | + Language.INDONESIAN, |
| 1764 | + Language.ITALIAN, |
| 1765 | + Language.JAPANESE, |
| 1766 | + Language.KOREAN, |
| 1767 | + Language.MALAY, |
| 1768 | + Language.DUTCH, |
| 1769 | + Language.NORWEGIAN, |
| 1770 | + Language.POLISH, |
| 1771 | + Language.PORTUGUESE, |
| 1772 | + Language.ROMANIAN, |
| 1773 | + Language.RUSSIAN, |
| 1774 | + Language.SERBIAN, |
| 1775 | + Language.SWEDISH, |
| 1776 | + Language.SWAHILI, |
| 1777 | + Language.TAMIL, |
| 1778 | + Language.TELUGU, |
| 1779 | + Language.THAI, |
| 1780 | + Language.TURKISH, |
| 1781 | + Language.UKRAINIAN, |
| 1782 | + Language.URDU, |
| 1783 | + Language.VIETNAMESE, |
| 1784 | + Language.YORUBA, |
| 1785 | + Language.ZULU, |
| 1786 | + ] |
| 1787 | + for formulation in [ |
| 1788 | + MCFFormulation(), |
| 1789 | + CFFormulation(), |
| 1790 | + HybridFormulation(), |
| 1791 | + ] |
| 1792 | + for sensitivity_label in ["ALL", "CA", "CS", "UNK"] |
| 1793 | +] |
| 1794 | + |
| 1795 | + |
1710 | 1796 | # There are only these subsets in the African MMLU
|
1711 | 1797 | AFRI_MMLU_SUBSETS = [
|
1712 | 1798 | "elementary_mathematics",
|
|
2088 | 2174 | *arabic_mmlu_tasks,
|
2089 | 2175 | *turkish_mmlu_tasks,
|
2090 | 2176 | *afri_mmlu_tasks,
|
| 2177 | + *global_mmlu_tasks, |
2091 | 2178 | ]
|
2092 | 2179 | )
|
2093 | 2180 |
|
|
0 commit comments