Implement 3 different score_mode for poll recommendations computed …

…by Mehestan (#869) * Compute global scores in 3 modes and store them in EntityCriteriaScore * Implement ?score_mode parameter on poll recommendations * fix twitterbot after rebase * Update admin for EntityCriteriaScore
tournesol-app · May 3, 2022 · 1cd8131 · 1cd8131
1 parent b789114
commit 1cd8131
Show file tree

Hide file tree

Showing 16 changed files with 298 additions and 125 deletions.
diff --git a/backend/ml/mehestan/global_scores.py b/backend/ml/mehestan/global_scores.py
@@ -5,6 +5,7 @@
 import pandas as pd
 
 from ml.inputs import MlInput
+from tournesol.models.entity_score import ScoreMode
 
 from .primitives import BrMean, QrDev, QrMed, QrUnc
 
@@ -221,16 +222,19 @@ def get_scaling_for_supertrusted(ml_input: MlInput, individual_scores: pd.DataFr
     return compute_scaling(df, ml_input=ml_input)
 
 
-def get_global_scores(
+def compute_scaled_scores(
     ml_input: MlInput, individual_scores: pd.DataFrame
 ) -> Tuple[pd.DataFrame, pd.DataFrame]:
     """
-    Returns (global_scores, scalings):
-        - global_scores: Dataframe with columns
+    Returns:
+        - scaled individual scores: Dataframe with columns
+            * `user_id`
             * `entity_id`
             * `score`
             * `uncertainty`
-            * `deviation`
+            * `is_public`
+            * `is_trusted`
+            * `is_supertrusted`
         - scalings: DataFrame with index `entity_id` and columns:
             * `s`: scaling factor
             * `tau`: translation value
@@ -239,7 +243,15 @@ def get_global_scores(
     """
     if len(individual_scores) == 0:
         scores = pd.DataFrame(
-            columns=["entity_id", "score", "uncertainty", "deviation"]
+            columns=[
+                "user_id",
+                "entity_id",
+                "score",
+                "uncertainty",
+                "is_public",
+                "is_trusted",
+                "is_supertrusted",
+            ]
         )
         scalings = pd.DataFrame(columns=["s", "tau", "delta_s", "delta_tau"])
         return scores, scalings
@@ -289,49 +301,69 @@ def get_global_scores(
     df["score"] = df["score"] * df["s"] + df["tau"]
     df.drop(["s", "tau", "delta_s", "delta_tau"], axis=1, inplace=True)
 
-    # Voting weight for non trusted users will be computed per entity
-    df["voting_weight"] = 0
-    df["voting_weight"].mask(
-        (df.is_trusted) & (df.is_public),
-        VOTE_WEIGHT_TRUSTED_PUBLIC,
-        inplace=True,
-    )
-    df["voting_weight"].mask(
-        (df.is_trusted) & (~df.is_public),
-        VOTE_WEIGHT_TRUSTED_PRIVATE,
-        inplace=True,
-    )
+    all_scalings = pd.concat([supertrusted_scaling, non_supertrusted_scaling])
+    return df, all_scalings
+
+
+def get_global_scores(scaled_individual_scores: pd.DataFrame, score_mode: ScoreMode):
+    df = scaled_individual_scores.copy(deep=False)
+
+    if score_mode == ScoreMode.TRUSTED_ONLY:
+        df = df[df["is_trusted"]]
+        df["voting_weight"] = 1
+
+    if score_mode == ScoreMode.ALL_EQUAL:
+        df["voting_weight"] = 1
+
+    if score_mode == ScoreMode.DEFAULT:
+        # Voting weight for non trusted users will be computed per entity
+        df["voting_weight"] = 0
+        df["voting_weight"].mask(
+            (df.is_trusted) & (df.is_public),
+            VOTE_WEIGHT_TRUSTED_PUBLIC,
+            inplace=True,
+        )
+        df["voting_weight"].mask(
+            (df.is_trusted) & (~df.is_public),
+            VOTE_WEIGHT_TRUSTED_PRIVATE,
+            inplace=True,
+        )
 
     global_scores = {}
     for (entity_id, scores) in df.groupby("entity_id"):
-        trusted_weight = scores["voting_weight"].sum()
-        non_trusted_weight = (
-            TOTAL_VOTE_WEIGHT_NONTRUSTED_DEFAULT
-            + TOTAL_VOTE_WEIGHT_NONTRUSTED_FRACTION * trusted_weight
-        )
-        nb_non_trusted_public = (scores["is_public"] & (~scores["is_trusted"])).sum()
-        nb_non_trusted_private = (~scores["is_public"] & (~scores["is_trusted"])).sum()
-
-        if (nb_non_trusted_private > 0) or (nb_non_trusted_public > 0):
-            scores["voting_weight"].mask(
-                scores["is_public"] & (scores["voting_weight"] == 0),
-                min(
-                    VOTE_WEIGHT_TRUSTED_PUBLIC,
-                    2
-                    * non_trusted_weight
-                    / (2 * nb_non_trusted_public + nb_non_trusted_private),
-                ),
-                inplace=True,
-            )
-            scores["voting_weight"].mask(
-                ~scores["is_public"] & (scores["voting_weight"] == 0),
-                min(
-                    VOTE_WEIGHT_TRUSTED_PRIVATE,
-                    non_trusted_weight
-                    / (2 * nb_non_trusted_public + nb_non_trusted_private),
-                ),
-                inplace=True,
+        if score_mode == ScoreMode.DEFAULT:
+            trusted_weight = scores["voting_weight"].sum()
+            non_trusted_weight = (
+                TOTAL_VOTE_WEIGHT_NONTRUSTED_DEFAULT
+                + TOTAL_VOTE_WEIGHT_NONTRUSTED_FRACTION * trusted_weight
             )
+            nb_non_trusted_public = (
+                scores["is_public"] & (~scores["is_trusted"])
+            ).sum()
+            nb_non_trusted_private = (
+                ~scores["is_public"] & (~scores["is_trusted"])
+            ).sum()
+
+            if (nb_non_trusted_private > 0) or (nb_non_trusted_public > 0):
+                scores["voting_weight"].mask(
+                    scores["is_public"] & (scores["voting_weight"] == 0),
+                    min(
+                        VOTE_WEIGHT_TRUSTED_PUBLIC,
+                        2
+                        * non_trusted_weight
+                        / (2 * nb_non_trusted_public + nb_non_trusted_private),
+                    ),
+                    inplace=True,
+                )
+                scores["voting_weight"].mask(
+                    ~scores["is_public"] & (scores["voting_weight"] == 0),
+                    min(
+                        VOTE_WEIGHT_TRUSTED_PRIVATE,
+                        non_trusted_weight
+                        / (2 * nb_non_trusted_public + nb_non_trusted_private),
+                    ),
+                    inplace=True,
+                )
 
         w = scores.voting_weight
         theta = scores.score
@@ -345,14 +377,9 @@ def get_global_scores(
             "deviation": rho_deviation,
         }
 
-    all_scalings = pd.concat([supertrusted_scaling, non_supertrusted_scaling])
-
     if len(global_scores) == 0:
-        scores = pd.DataFrame(
-            columns=["entity_id", "score", "uncertainty", "deviation"]
-        )
-        return scores, all_scalings
+        return pd.DataFrame(columns=["entity_id", "score", "uncertainty", "deviation"])
 
     result = pd.DataFrame.from_dict(global_scores, orient="index")
     result.index.name = "entity_id"
-    return result.reset_index(), all_scalings
+    return result.reset_index()
diff --git a/backend/ml/mehestan/run.py b/backend/ml/mehestan/run.py
@@ -16,8 +16,9 @@
     save_tournesol_score_as_sum_of_criteria,
 )
 from tournesol.models import Poll
+from tournesol.models.entity_score import ScoreMode
 
-from .global_scores import get_global_scores
+from .global_scores import compute_scaled_scores, get_global_scores
 from .individual import compute_individual_score
 
 logger = logging.getLogger(__name__)
@@ -43,18 +44,6 @@ def get_individual_scores(
     return result[["user_id", "entity_id", "score", "uncertainty"]]
 
 
-def compute_mehestan_scores(ml_input, criteria):
-    indiv_scores = get_individual_scores(ml_input, criteria=criteria)
-    logger.debug("Individual scores computed for crit '%s'", criteria)
-    global_scores, scalings = get_global_scores(
-        ml_input, individual_scores=indiv_scores
-    )
-    logger.debug("Global scores computed for crit '%s'", criteria)
-    indiv_scores["criteria"] = criteria
-    global_scores["criteria"] = criteria
-    return indiv_scores, global_scores, scalings
-
-
 def update_user_scores(poll: Poll, user: User):
     ml_input = MlInputFromDb(poll_name=poll.name)
     for criteria in poll.criterias_list:
@@ -77,20 +66,31 @@ def _run_mehestan_for_criterion(criteria: str, ml_input: MlInput, poll_pk: int):
         poll.name,
         criteria,
     )
-    indiv_scores, global_scores, scalings = compute_mehestan_scores(
-        ml_input, criteria=criteria
-    )
-    logger.info(
-        "Mehestan for poll '%s': scores computed for crit '%s'",
-        poll.name,
-        criteria,
+
+    indiv_scores = get_individual_scores(ml_input, criteria=criteria)
+    logger.debug("Individual scores computed for crit '%s'", criteria)
+    scaled_scores, scalings = compute_scaled_scores(
+        ml_input, individual_scores=indiv_scores
     )
 
+    indiv_scores["criteria"] = criteria
     save_contributor_scalings(poll, criteria, scalings)
     save_contributor_scores(poll, indiv_scores, single_criteria=criteria)
-    save_entity_scores(poll, global_scores, single_criteria=criteria)
+
+    for mode in ScoreMode:
+        global_scores = get_global_scores(scaled_scores, score_mode=mode)
+        global_scores["criteria"] = criteria
+
+        logger.info(
+            "Mehestan for poll '%s': scores computed for crit '%s' and mode '%s'",
+            poll.name,
+            criteria,
+            mode,
+        )
+        save_entity_scores(poll, global_scores, single_criteria=criteria, score_mode=mode)
+
     logger.info(
-        "Mehestan for poll '%s': scores saved for crit '%s'",
+        "Mehestan for poll '%s': done with crit '%s'",
         poll.name,
         criteria,
     )

diff --git a/backend/ml/outputs.py b/backend/ml/outputs.py
@@ -14,10 +14,12 @@
     EntityCriteriaScore,
     Poll,
 )
+from tournesol.models.entity_score import ScoreMode
 
 
 def save_entity_scores(
-    poll, entity_scores: Union[pd.DataFrame, Iterable[tuple]], single_criteria=None
+    poll, entity_scores: Union[pd.DataFrame, Iterable[tuple]], single_criteria=None,
+    score_mode=ScoreMode.DEFAULT
 ):
     if isinstance(entity_scores, pd.DataFrame):
         scores_iterator = entity_scores[
@@ -30,7 +32,7 @@ def save_entity_scores(
     scores_iterator = (t if len(t) == 5 else t + (None,) for t in scores_iterator)
 
     with transaction.atomic():
-        scores_to_delete = EntityCriteriaScore.objects.filter(poll=poll)
+        scores_to_delete = EntityCriteriaScore.objects.filter(poll=poll, score_mode=score_mode)
         if single_criteria:
             scores_to_delete = scores_to_delete.filter(criteria=single_criteria)
         scores_to_delete.delete()
@@ -44,6 +46,7 @@ def save_entity_scores(
                     score=score,
                     uncertainty=uncertainty,
                     deviation=deviation,
+                    score_mode=score_mode,
                 )
                 for entity_id, criteria, score, uncertainty, deviation in scores_iterator
             ),
@@ -54,12 +57,12 @@ def save_entity_scores(
 def save_tournesol_score_as_sum_of_criteria(poll):
     entities = []
     for entity in (
-        Entity.objects.filter(criteria_scores__poll=poll)
+        Entity.objects.filter(all_criteria_scores__poll=poll)
         .distinct()
-        .prefetch_related("criteria_scores")
+        .with_prefetched_scores(poll_name=poll.name)
     ):
         entity.tournesol_score = 10 * sum(
-            criterion.score for criterion in entity.criteria_scores.all()
+            criterion.score for criterion in entity.criteria_scores
         )
         entities.append(entity)
     Entity.objects.bulk_update(entities, ["tournesol_score"])

diff --git a/backend/tournesol/admin.py b/backend/tournesol/admin.py
@@ -108,12 +108,21 @@ def get_language(obj):
 class EntityCriteriaScoreAdmin(admin.ModelAdmin):
     list_display = (
         'entity',
+        'poll',
         'criteria',
+        'score_mode',
         'score'
     )
+    list_filter = (
+        'poll',
+        'score_mode',
+    )
     search_fields = (
         'entity__uid',
     )
+    raw_id_fields = (
+        'entity',
+    )
 
 
 @admin.register(ContributorRating)

diff --git a/backend/tournesol/migrations/0038_alter_entitycriteriascore_unique_together_and_more.py b/backend/tournesol/migrations/0038_alter_entitycriteriascore_unique_together_and_more.py
@@ -0,0 +1,32 @@
+# Generated by Django 4.0.3 on 2022-04-29 13:23
+
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('tournesol', '0037_contributorscaling'),
+    ]
+
+    operations = [
+        migrations.AlterUniqueTogether(
+            name='entitycriteriascore',
+            unique_together=set(),
+        ),
+        migrations.AddField(
+            model_name='entitycriteriascore',
+            name='score_mode',
+            field=models.CharField(choices=[('default', 'Default'), ('all_equal', 'All Equal'), ('trusted_only', 'Trusted Only')], default='default', max_length=30),
+        ),
+        migrations.AlterField(
+            model_name='entitycriteriascore',
+            name='entity',
+            field=models.ForeignKey(help_text='Foreign key to the video', on_delete=django.db.models.deletion.CASCADE, related_name='all_criteria_scores', to='tournesol.entity'),
+        ),
+        migrations.AlterUniqueTogether(
+            name='entitycriteriascore',
+            unique_together={('entity', 'poll', 'criteria', 'score_mode')},
+        ),
+    ]