Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[v2] reupload reranking datasets in old format #2097

Open
wants to merge 11 commits into
base: v2.0.0
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
26 changes: 4 additions & 22 deletions mteb/abstasks/AbsTaskReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,24 +10,7 @@

logger = logging.getLogger(__name__)

OLD_FORMAT_RERANKING_TASKS = [
"MindSmallReranking",
"SciDocsRR",
"StackOverflowDupQuestions",
"WebLINXCandidatesReranking",
"AlloprofReranking",
"SyntecReranking",
"VoyageMMarcoReranking",
"ESCIReranking",
"MIRACLReranking",
"WikipediaRerankingMultilingual",
"RuBQReranking",
"T2Reranking",
"MMarcoReranking",
"CMedQAv1-reranking",
"CMedQAv2-reranking",
"NamaaMrTydiReranking",
]
OLD_FORMAT_RERANKING_TASKS = []


class AbsTaskReranking(AbsTaskRetrieval):
Expand Down Expand Up @@ -85,7 +68,6 @@ def transform_old_dataset_format(self, given_dataset=None):

Args:
given_dataset (Dataset, optional): The dataset to transform. Defaults to None. This is helpful for some older datasets which are loaded with custom code, but need to be transformed still.

"""
if self.metadata.name not in OLD_FORMAT_RERANKING_TASKS:
return
Expand All @@ -106,9 +88,9 @@ def transform_old_dataset_format(self, given_dataset=None):
cur_dataset = given_dataset
elif "name" in self.metadata.dataset:
cur_dataset = datasets.load_dataset(**self.metadata.dataset) # type: ignore
assert (
hf_subset == "default"
), f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata."
assert hf_subset == "default", (
f"Only default subset is supported for {self.metadata.name} since `name` is given in the metadata."
)
else:
cur_dataset = datasets.load_dataset(
**self.metadata.dataset, name=hf_subset
Expand Down
2 changes: 0 additions & 2 deletions mteb/abstasks/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from .AbsTaskClusteringFast import AbsTaskClusteringFast
from .AbsTaskMultilabelClassification import AbsTaskMultilabelClassification
from .AbsTaskPairClassification import AbsTaskPairClassification
from .AbsTaskReranking import AbsTaskReranking
from .AbsTaskRetrieval import AbsTaskRetrieval
from .AbsTaskSpeedTask import AbsTaskSpeedTask
from .AbsTaskSTS import AbsTaskSTS
Expand All @@ -33,7 +32,6 @@
"AbsTaskClusteringFast",
"AbsTaskMultilabelClassification",
"AbsTaskPairClassification",
"AbsTaskReranking",
"AbsTaskRetrieval",
"AbsTaskSpeedTask",
"AbsTaskSTS",
Expand Down
35 changes: 27 additions & 8 deletions mteb/abstasks/dataloaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,14 @@
import logging
from collections import defaultdict

from datasets import Features, Sequence, Value, get_dataset_config_names, load_dataset
from datasets import (
Features,
Sequence,
Value,
get_dataset_config_names,
get_dataset_split_names,
load_dataset,
)

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -73,7 +80,7 @@ def load(
f"Instructions loaded: {len(self.instructions) if self.instructions else 0}"
)

self._load_qrels(self.split, self.config)
self._load_qrels(self.config)
# filter queries with no qrels
qrels_dict = defaultdict(dict)

Expand Down Expand Up @@ -106,15 +113,26 @@ def load_corpus(self, config: str | None = None) -> dict[str, dict[str, str]]:

return self.corpus

def get_split(self, config: str) -> str:
splits = get_dataset_split_names(
self.hf_repo,
revision=self.revision,
config_name=config,
)
if self.split in splits:
return self.split
if len(splits) == 1:
return splits[0]

def _load_corpus(self, config: str | None = None):
config = f"{config}-corpus" if config is not None else "corpus"
corpus_ds = load_dataset(
self.hf_repo,
config,
split=self.get_split(config),
trust_remote_code=self.trust_remote_code,
revision=self.revision,
)
corpus_ds = next(iter(corpus_ds.values())) # get first split
corpus_ds = corpus_ds.cast_column("_id", Value("string"))
corpus_ds = corpus_ds.rename_column("_id", "id")
corpus_ds = corpus_ds.remove_columns(
Expand All @@ -131,26 +149,27 @@ def _load_queries(self, config: str | None = None):
queries_ds = load_dataset(
self.hf_repo,
config,
split=self.get_split(config),
trust_remote_code=self.trust_remote_code,
revision=self.revision,
)
queries_ds = next(iter(queries_ds.values())) # get first split
queries_ds = queries_ds.cast_column("_id", Value("string"))
queries_ds = queries_ds.rename_column("_id", "id")
queries_ds = queries_ds.remove_columns(
[col for col in queries_ds.column_names if col not in ["id", "text"]]
)
self.queries = queries_ds

def _load_qrels(self, split: str, config: str | None = None):
def _load_qrels(self, config: str | None = None):
config = f"{config}-qrels" if config is not None else "default"

qrels_ds = load_dataset(
self.hf_repo,
name=config,
split=self.get_split(config),
trust_remote_code=self.trust_remote_code,
revision=self.revision,
)[split]
)

features = Features(
{
Expand All @@ -167,11 +186,11 @@ def _load_top_ranked(self, config: str | None = None):
top_ranked_ds = load_dataset(
self.hf_repo,
config,
split=self.get_split(config),
trust_remote_code=self.trust_remote_code,
revision=self.revision,
)

top_ranked_ds = next(iter(top_ranked_ds.values())) # get first split
if (
"query-id" in top_ranked_ds.column_names
and "corpus-ids" in top_ranked_ds.column_names
Expand Down Expand Up @@ -205,10 +224,10 @@ def _load_instructions(self, config: str | None = None):
instructions_ds = load_dataset(
self.hf_repo,
config,
split=self.get_split(config),
trust_remote_code=self.trust_remote_code,
revision=self.revision,
)
instructions_ds = next(iter(instructions_ds.values()))
instructions_ds = instructions_ds.cast_column("query-id", Value("string"))
instructions_ds = instructions_ds.cast_column("instruction", Value("string"))
instructions_ds = instructions_ds.remove_columns(
Expand Down
4 changes: 1 addition & 3 deletions mteb/overview.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@

import pandas as pd

from mteb.abstasks import AbsTask, AbsTaskMultilabelClassification, AbsTaskReranking
from mteb.abstasks import AbsTask, AbsTaskMultilabelClassification
from mteb.abstasks.TaskMetadata import TASK_CATEGORY, TASK_DOMAIN, TASK_TYPE
from mteb.languages import (
ISO_TO_LANGUAGE,
Expand All @@ -27,14 +27,12 @@
def create_task_list() -> list[type[AbsTask]]:
# reranking subclasses retrieval to share methods, but is an abstract task
tasks_categories_cls = list(AbsTask.__subclasses__()) + [
AbsTaskReranking,
AbsTaskMultilabelClassification,
]
tasks = []
for cat_cls in tasks_categories_cls:
for cls in cat_cls.__subclasses__():
if cat_cls.__name__.startswith("AbsTask") and cls.__name__ not in (
"AbsTaskReranking",
"AbsTaskMultilabelClassification",
):
tasks.append(cls)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class Core17InstructionRetrieval(AbsTaskReranking):
class Core17InstructionRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="Core17InstructionRetrieval",
description="Measuring retrieval instruction following ability on Core17 narratives for the FollowIR benchmark.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class News21InstructionRetrieval(AbsTaskReranking):
class News21InstructionRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="News21InstructionRetrieval",
description="Measuring retrieval instruction following ability on News21 narratives for the FollowIR benchmark.",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class Robust04InstructionRetrieval(AbsTaskReranking):
class Robust04InstructionRetrieval(AbsTaskRetrieval):
metadata = TaskMetadata(
name="Robust04InstructionRetrieval",
description="Measuring retrieval instruction following ability on Robust04 narratives for the FollowIR benchmark.",
Expand Down
6 changes: 3 additions & 3 deletions mteb/tasks/InstructionReranking/multilingual/mFollowIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval

logger = getLogger(__name__)

Expand Down Expand Up @@ -128,7 +128,7 @@ def load_data(
return (corpus, queries, instructions, relevant_docs, top_ranked)


class mFollowIRCrossLingual(AbsTaskReranking):
class mFollowIRCrossLingual(AbsTaskRetrieval):
metadata = TaskMetadata(
name="mFollowIRCrossLingual",
description="This tasks measures retrieval instruction following ability on NeuCLIR narratives for the mFollowIR benchmark on the Farsi, Russian, and Chinese languages with English queries/instructions.",
Expand Down Expand Up @@ -179,7 +179,7 @@ def load_data(self, **kwargs):
self.data_loaded = True


class mFollowIR(AbsTaskReranking):
class mFollowIR(AbsTaskRetrieval):
metadata = TaskMetadata(
name="mFollowIR",
description="This tasks measures retrieval instruction following ability on NeuCLIR narratives for the mFollowIR benchmark on the Farsi, Russian, and Chinese languages.",
Expand Down
8 changes: 4 additions & 4 deletions mteb/tasks/Reranking/ara/NamaaMrTydiReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,17 +2,17 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class NamaaMrTydiReranking(AbsTaskReranking):
class NamaaMrTydiReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="NamaaMrTydiReranking",
description="Mr. TyDi is a multi-lingual benchmark dataset built on TyDi, covering eleven typologically diverse languages. It is designed for monolingual retrieval, specifically to evaluate ranking with learned dense representations. This dataset adapts the arabic test split for Reranking evaluation purposes by the addition of multiple (Hard) Negatives to each query and positive",
reference="https://huggingface.co/NAMAA-Space",
dataset={
"path": "NAMAA-Space/mteb-eval-mrtydi",
"revision": "502637220a7ad0ecc5c39ff5518d7508d2624af8",
"path": "mteb/NamaaMrTydiReranking",
"revision": "4d574b8caf8463c741b84a293aea8ace67801cdc",
},
type="Reranking",
category="s2s",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/AskUbuntuDupQuestions.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class AskUbuntuDupQuestions(AbsTaskReranking):
class AskUbuntuDupQuestions(AbsTaskRetrieval):
metadata = TaskMetadata(
name="AskUbuntuDupQuestions",
description="AskUbuntu Question Dataset - Questions from AskUbuntu with manual annotations marking pairs of questions as similar or non-similar",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/BIRCOArguAnaReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class BIRCOArguAnaReranking(AbsTaskReranking):
class BIRCOArguAnaReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="BIRCO-ArguAna",
description=(
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/BIRCOClinicalTrialReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class BIRCOClinicalTrialReranking(AbsTaskReranking):
class BIRCOClinicalTrialReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="BIRCO-ClinicalTrial",
description=(
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/BIRCODorisMaeReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class BIRCODorisMaeReranking(AbsTaskReranking):
class BIRCODorisMaeReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="BIRCO-DorisMae",
description=(
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/BIRCORelicReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class BIRCORelicReranking(AbsTaskReranking):
class BIRCORelicReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="BIRCO-Relic",
description=(
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/BIRCOWhatsThatBookReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class BIRCOWhatsThatBookReranking(AbsTaskReranking):
class BIRCOWhatsThatBookReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="BIRCO-WTB",
description=(
Expand Down
7 changes: 3 additions & 4 deletions mteb/tasks/Reranking/eng/MindSmallReranking.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,21 +7,20 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval

logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)


class MindSmallReranking(AbsTaskReranking):
class MindSmallReranking(AbsTaskRetrieval):
metadata = TaskMetadata(
name="MindSmallReranking",
description="Microsoft News Dataset: A Large-Scale English Dataset for News Recommendation Research",
reference="https://msnews.github.io/assets/doc/ACL2020_MIND.pdf",
dataset={
"path": "mteb/mind_small",
"revision": "59042f120c80e8afa9cdbb224f67076cec0fc9a7",
"path": "mteb/MindSmallReranking",
"revision": "227478e3235572039f4f7661840e059f31ef6eb1",
},
type="Reranking",
category="s2s",
Expand Down
4 changes: 2 additions & 2 deletions mteb/tasks/Reranking/eng/NevIR.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@

from mteb.abstasks.TaskMetadata import TaskMetadata

from ....abstasks.AbsTaskReranking import AbsTaskReranking
from ....abstasks.AbsTaskRetrieval import AbsTaskRetrieval


class NevIR(AbsTaskReranking):
class NevIR(AbsTaskRetrieval):
metadata = TaskMetadata(
name="NevIR",
description="Paired evaluation of real world negation in retrieval, with questions and passages. Since models generally prefer one passage over the other always, there are two questions that the model must get right to understand the negation (hence the `paired_accuracy` metric).",
Expand Down
Loading
Loading