Skip to content

Commit

Permalink
Multilingual NLI Tasks (#329)
Browse files Browse the repository at this point in the history
* add multilignaul dynamic generative metrics

* draft

* finish multichoice config

* update tokenizers + install nltk reqs

* use punkt tab

* Update src/lighteval/utils/imports.py

Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>

* Update src/lighteval/metrics/normalizations.py

Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>

* fix imports

* remove unused import

* finish implementation of templates + move stuff around

* resolve nits

* when in rome do as romans do (handle error messages the same way)

* fix utils

* nicers tests + fix them

* nicer todo

* add nice doscrings 📃

* add even more docstring

* nit

* fix test

* add multilingual to dev group

* merge nli, add languagees to literals

* translation literals

* add nli

* add rcb + chinese nli

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* Update src/lighteval/tasks/multilingual/tasks.py

Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>

* add two new tasks + docs

---------

Co-authored-by: Nathan Habib <30601243+NathanHB@users.noreply.github.com>
Co-authored-by: Hynek Kydlicek <kydliceh.hynek@gmail.com>
Co-authored-by: Clémentine Fourrier <22726840+clefourrier@users.noreply.github.com>
  • Loading branch information
4 people authored Sep 30, 2024
1 parent 170ed87 commit 551572a
Showing 1 changed file with 324 additions and 0 deletions.
324 changes: 324 additions & 0 deletions src/lighteval/tasks/multilingual/tasks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,324 @@
# MIT License

# Copyright (c) 2024 The HuggingFace Team

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from langcodes import Language as LangCodeLanguage
from langcodes import standardize_tag

from lighteval.metrics.dynamic_metrics import loglikelihood_acc_metric
from lighteval.metrics.normalizations import LogProbTokenNorm
from lighteval.tasks.lighteval_task import LightevalTaskConfig
from lighteval.tasks.templates.nli import get_nli_prompt_function
from lighteval.tasks.templates.utils.formulation import (
CFFormulation,
HybridFormulation,
MCFFormulation,
)
from lighteval.utils.language import Language


# ------------------------------- NLI Tasks ------------------------------- #
# NLI (Natural Language Inference) tasks involve determining the logical relationship
# between two given sentences: a premise and a hypothesis. The goal is to classify
# whether the hypothesis is entailed by, contradicts, or is neutral with respect to
# the premise. After our inspection we found the neutral label to be quite ambiguous
# and decided to exclude it. But you can easily add it by modifying the adapters


# The XNLI dataset is a multilingual variant of MultiNLI
# https://aclanthology.org/D18-1269/
xnli_tasks = [
LightevalTaskConfig(
name=f"xnli_{language.value}_{formulation.name.lower()}",
suite=["lighteval"],
metric=[loglikelihood_acc_metric(normalization=LogProbTokenNorm())],
prompt_function=get_nli_prompt_function(
language=language,
adapter=lambda line: {
"premise": line["premise"],
"hypothesis": line["hypothesis"],
# Since we ignore the neutral label
"gold_idx": {0: 0, 2: 1}[line["label"]],
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
hf_filter=lambda line: line["label"] in [0, 2],
hf_repo="facebook/xnli",
hf_subset=standardize_tag(language.value),
evaluation_splits=["validation"],
few_shots_split="train",
)
for language in [
Language.ARABIC,
Language.ENGLISH,
Language.FRENCH,
Language.SPANISH,
Language.BULGARIAN,
Language.GERMAN,
Language.GREEK,
Language.ENGLISH,
Language.FRENCH,
Language.HINDI,
Language.RUSSIAN,
Language.SWAHILI,
Language.THAI,
Language.TURKISH,
Language.URDU,
Language.VIETNAMESE,
Language.CHINESE,
]
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

# Improvement on XNLI with better translation, from our experience models tend to
# perform better on XNLI2.0 than XNLI
# https://arxiv.org/abs/2301.06527
xnli2_tasks = [
LightevalTaskConfig(
name=f"xnli2.0_{language.value}_{formulation.name.lower()}",
suite=["lighteval"],
metric=[loglikelihood_acc_metric(normalization=LogProbTokenNorm())],
prompt_function=get_nli_prompt_function(
language=language,
adapter=lambda line: {
"premise": line["premise"],
"hypothesis": line["hypothesis"],
# Since we ignore the neutral label
"gold_idx": {0: 0, 2: 1}[line["label"]],
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
hf_filter=lambda line: line["label"] in [0, 2],
hf_repo=f"Harsit/xnli2.0_train_{LangCodeLanguage(standardize_tag(language.value)).language_name().lower()}",
hf_subset="default",
evaluation_splits=["train"],
)
for language in [
Language.ENGLISH,
Language.FRENCH,
Language.PUNJABI,
Language.GUJARATI,
Language.KANNADA,
Language.ASSAMESE,
Language.BENGALI,
Language.MARATHI,
Language.SANSKRIT,
Language.TAMIL,
Language.GERMAN,
Language.ENGLISH,
Language.URDU,
Language.VIETNAMESE,
Language.TURKISH,
Language.THAI,
Language.SWAHILI,
Language.SPANISH,
Language.RUSSIAN,
Language.HINDI,
Language.GREEK,
Language.CHINESE,
Language.BULGARIAN,
Language.ARABIC,
# Theoretically also: Bhojpuri, Gujarati, Odiya
]
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

# Another variant of XNLI, with emphasis on Indic languages
# https://arxiv.org/abs/2204.08776
xnli_indic_tasks = [
LightevalTaskConfig(
name=f"indicnxnli_{language.value}_{formulation.name.lower()}",
suite=["lighteval"],
prompt_function=get_nli_prompt_function(
language=language,
adapter=lambda line: {
"premise": line["premise"],
"hypothesis": line["hypothesis"],
# Since we ignore the neutral label
"gold_idx": {0: 0, 2: 1}[line["label"]],
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
hf_repo="Divyanshu/indicxnli",
hf_subset=standardize_tag(language.value),
# Ignore neutral
hf_filter=lambda x: int(x["label"]) in [0, 2],
evaluation_splits=["validation"],
few_shots_split="train",
few_shots_select=None,
generation_size=-1,
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for language in [
Language.ASSAMESE,
Language.BENGALI,
Language.GUJARATI,
Language.HINDI,
Language.KANNADA,
Language.MALAYALAM,
Language.MARATHI,
Language.ORIYA,
Language.PUNJABI,
Language.TAMIL,
Language.TELUGU,
]
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

# PAWS-X: A Cross-lingual Adversarial Dataset for Paraphrase Identification
# This dataset contains paraphrase identification pairs in multiple languages.
# It's derived from PAWS (Paraphrase Adversaries from Word Scrambling) and
# We treat paraphrase as entailment and non-paraphrase as contradiction
# https://arxiv.org/abs/1908.11828

paws_x_tasks = [
LightevalTaskConfig(
name=f"pawsx_{language.value}_{formulation.name.lower()}",
suite=("lighteval",),
prompt_function=get_nli_prompt_function(
language=language,
adapter=lambda line: {
"premise": line["sentence1"],
"hypothesis": line["sentence2"],
# Since we ignore the neutral label
"gold_idx": int(line["label"]),
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
hf_repo="google-research-datasets/paws-x",
hf_subset=standardize_tag(language.value),
evaluation_splits=("test",),
few_shots_split="train",
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for language in [
Language.GERMAN,
Language.ENGLISH,
Language.SPANISH,
Language.FRENCH,
Language.JAPANESE,
Language.KOREAN,
Language.CHINESE,
]
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

# Russian Commitment Bank (RCB) is a large-scale NLI dataset with Russian sentences,
# collected from the web and crowdsourcing.
# https://arxiv.org/abs/2401.04531
rcb_tasks = [
LightevalTaskConfig(
name=f"rcb_{Language.RUSSIAN.value}_{formulation.name.lower()}",
prompt_function=get_nli_prompt_function(
language=Language.RUSSIAN,
adapter=lambda line: {
"premise": line["inputs"]["premise"],
"hypothesis": line["inputs"]["hypothesis"],
# Since we ignore the neutral label
"gold_idx": int(line["outputs"]) - 1,
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
suite=("lighteval",),
hf_repo="ai-forever/MERA",
hf_subset="rcb",
# Ignore neutral label
hf_filter=lambda x: int(x["outputs"] or "0") in [1, 2],
evaluation_splits=("train", "validation"),
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

# Native Chinese NLI dataset based.
# https://arxiv.org/pdf/2010.05444
# We find this benchmark to have really good signal compared to other Chinese NLI
ocnli_tasks = [
LightevalTaskConfig(
name=f"ocnli_{Language.CHINESE.value}_{formulation.name.lower()}",
prompt_function=get_nli_prompt_function(
language=Language.CHINESE,
adapter=lambda line: {
"premise": line["sentence1"],
"hypothesis": line["sentence2"],
# Since we ignore the neutral label
"gold_idx": {1: 0, 2: 1}[line["label"]],
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
suite=("lighteval",),
hf_repo="clue/clue",
hf_subset="ocnli",
# Only keep the positive and negative examples
hf_filter=lambda x: int(x["label"]) in [1, 2],
evaluation_splits=("validation",),
few_shots_split="train",
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]

# https://arxiv.org/abs/2004.05986
# Native Chinese NLI dataset based on MNLI approach (Machine Translated)
cmnli_tasks = [
LightevalTaskConfig(
name=f"cmnli_{Language.CHINESE.value}_{formulation.name.lower()}",
prompt_function=get_nli_prompt_function(
language=Language.CHINESE,
adapter=lambda line: {
"premise": line["sentence1"],
"hypothesis": line["sentence2"],
# Since we ignore the neutral label
"gold_idx": {"entailment": 0, "contradiction": 1}[line["label"]],
},
relations=["entailment", "contradiction"],
formulation=formulation,
),
suite=("lighteval",),
hf_repo="fenffef/cmnli",
hf_subset="default",
hf_filter=lambda x: x["label"] in ["entailment", "contradiction"],
# Only keep the positive and negative examples
evaluation_splits=("validation",),
few_shots_split="train",
metric=[
loglikelihood_acc_metric(normalization=LogProbTokenNorm()),
],
)
for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()]
]


TASKS_TABLE = [*xnli_tasks, *xnli2_tasks, *xnli_indic_tasks, *paws_x_tasks, *rcb_tasks, *ocnli_tasks, *cmnli_tasks]

0 comments on commit 551572a

Please sign in to comment.