Skip to content

Commit

Permalink
Implement 3 different score_mode for poll recommendations computed …
Browse files Browse the repository at this point in the history
…by Mehestan (#869)

* Compute global scores in 3 modes and store them in EntityCriteriaScore

* Implement ?score_mode parameter on poll recommendations

* fix twitterbot after rebase

* Update admin for EntityCriteriaScore
  • Loading branch information
amatissart authored May 3, 2022
1 parent b789114 commit 1cd8131
Show file tree
Hide file tree
Showing 16 changed files with 298 additions and 125 deletions.
129 changes: 78 additions & 51 deletions backend/ml/mehestan/global_scores.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pandas as pd

from ml.inputs import MlInput
from tournesol.models.entity_score import ScoreMode

from .primitives import BrMean, QrDev, QrMed, QrUnc

Expand Down Expand Up @@ -221,16 +222,19 @@ def get_scaling_for_supertrusted(ml_input: MlInput, individual_scores: pd.DataFr
return compute_scaling(df, ml_input=ml_input)


def get_global_scores(
def compute_scaled_scores(
ml_input: MlInput, individual_scores: pd.DataFrame
) -> Tuple[pd.DataFrame, pd.DataFrame]:
"""
Returns (global_scores, scalings):
- global_scores: Dataframe with columns
Returns:
- scaled individual scores: Dataframe with columns
* `user_id`
* `entity_id`
* `score`
* `uncertainty`
* `deviation`
* `is_public`
* `is_trusted`
* `is_supertrusted`
- scalings: DataFrame with index `entity_id` and columns:
* `s`: scaling factor
* `tau`: translation value
Expand All @@ -239,7 +243,15 @@ def get_global_scores(
"""
if len(individual_scores) == 0:
scores = pd.DataFrame(
columns=["entity_id", "score", "uncertainty", "deviation"]
columns=[
"user_id",
"entity_id",
"score",
"uncertainty",
"is_public",
"is_trusted",
"is_supertrusted",
]
)
scalings = pd.DataFrame(columns=["s", "tau", "delta_s", "delta_tau"])
return scores, scalings
Expand Down Expand Up @@ -289,49 +301,69 @@ def get_global_scores(
df["score"] = df["score"] * df["s"] + df["tau"]
df.drop(["s", "tau", "delta_s", "delta_tau"], axis=1, inplace=True)

# Voting weight for non trusted users will be computed per entity
df["voting_weight"] = 0
df["voting_weight"].mask(
(df.is_trusted) & (df.is_public),
VOTE_WEIGHT_TRUSTED_PUBLIC,
inplace=True,
)
df["voting_weight"].mask(
(df.is_trusted) & (~df.is_public),
VOTE_WEIGHT_TRUSTED_PRIVATE,
inplace=True,
)
all_scalings = pd.concat([supertrusted_scaling, non_supertrusted_scaling])
return df, all_scalings


def get_global_scores(scaled_individual_scores: pd.DataFrame, score_mode: ScoreMode):
df = scaled_individual_scores.copy(deep=False)

if score_mode == ScoreMode.TRUSTED_ONLY:
df = df[df["is_trusted"]]
df["voting_weight"] = 1

if score_mode == ScoreMode.ALL_EQUAL:
df["voting_weight"] = 1

if score_mode == ScoreMode.DEFAULT:
# Voting weight for non trusted users will be computed per entity
df["voting_weight"] = 0
df["voting_weight"].mask(
(df.is_trusted) & (df.is_public),
VOTE_WEIGHT_TRUSTED_PUBLIC,
inplace=True,
)
df["voting_weight"].mask(
(df.is_trusted) & (~df.is_public),
VOTE_WEIGHT_TRUSTED_PRIVATE,
inplace=True,
)

global_scores = {}
for (entity_id, scores) in df.groupby("entity_id"):
trusted_weight = scores["voting_weight"].sum()
non_trusted_weight = (
TOTAL_VOTE_WEIGHT_NONTRUSTED_DEFAULT
+ TOTAL_VOTE_WEIGHT_NONTRUSTED_FRACTION * trusted_weight
)
nb_non_trusted_public = (scores["is_public"] & (~scores["is_trusted"])).sum()
nb_non_trusted_private = (~scores["is_public"] & (~scores["is_trusted"])).sum()

if (nb_non_trusted_private > 0) or (nb_non_trusted_public > 0):
scores["voting_weight"].mask(
scores["is_public"] & (scores["voting_weight"] == 0),
min(
VOTE_WEIGHT_TRUSTED_PUBLIC,
2
* non_trusted_weight
/ (2 * nb_non_trusted_public + nb_non_trusted_private),
),
inplace=True,
)
scores["voting_weight"].mask(
~scores["is_public"] & (scores["voting_weight"] == 0),
min(
VOTE_WEIGHT_TRUSTED_PRIVATE,
non_trusted_weight
/ (2 * nb_non_trusted_public + nb_non_trusted_private),
),
inplace=True,
if score_mode == ScoreMode.DEFAULT:
trusted_weight = scores["voting_weight"].sum()
non_trusted_weight = (
TOTAL_VOTE_WEIGHT_NONTRUSTED_DEFAULT
+ TOTAL_VOTE_WEIGHT_NONTRUSTED_FRACTION * trusted_weight
)
nb_non_trusted_public = (
scores["is_public"] & (~scores["is_trusted"])
).sum()
nb_non_trusted_private = (
~scores["is_public"] & (~scores["is_trusted"])
).sum()

if (nb_non_trusted_private > 0) or (nb_non_trusted_public > 0):
scores["voting_weight"].mask(
scores["is_public"] & (scores["voting_weight"] == 0),
min(
VOTE_WEIGHT_TRUSTED_PUBLIC,
2
* non_trusted_weight
/ (2 * nb_non_trusted_public + nb_non_trusted_private),
),
inplace=True,
)
scores["voting_weight"].mask(
~scores["is_public"] & (scores["voting_weight"] == 0),
min(
VOTE_WEIGHT_TRUSTED_PRIVATE,
non_trusted_weight
/ (2 * nb_non_trusted_public + nb_non_trusted_private),
),
inplace=True,
)

w = scores.voting_weight
theta = scores.score
Expand All @@ -345,14 +377,9 @@ def get_global_scores(
"deviation": rho_deviation,
}

all_scalings = pd.concat([supertrusted_scaling, non_supertrusted_scaling])

if len(global_scores) == 0:
scores = pd.DataFrame(
columns=["entity_id", "score", "uncertainty", "deviation"]
)
return scores, all_scalings
return pd.DataFrame(columns=["entity_id", "score", "uncertainty", "deviation"])

result = pd.DataFrame.from_dict(global_scores, orient="index")
result.index.name = "entity_id"
return result.reset_index(), all_scalings
return result.reset_index()
44 changes: 22 additions & 22 deletions backend/ml/mehestan/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,9 @@
save_tournesol_score_as_sum_of_criteria,
)
from tournesol.models import Poll
from tournesol.models.entity_score import ScoreMode

from .global_scores import get_global_scores
from .global_scores import compute_scaled_scores, get_global_scores
from .individual import compute_individual_score

logger = logging.getLogger(__name__)
Expand All @@ -43,18 +44,6 @@ def get_individual_scores(
return result[["user_id", "entity_id", "score", "uncertainty"]]


def compute_mehestan_scores(ml_input, criteria):
indiv_scores = get_individual_scores(ml_input, criteria=criteria)
logger.debug("Individual scores computed for crit '%s'", criteria)
global_scores, scalings = get_global_scores(
ml_input, individual_scores=indiv_scores
)
logger.debug("Global scores computed for crit '%s'", criteria)
indiv_scores["criteria"] = criteria
global_scores["criteria"] = criteria
return indiv_scores, global_scores, scalings


def update_user_scores(poll: Poll, user: User):
ml_input = MlInputFromDb(poll_name=poll.name)
for criteria in poll.criterias_list:
Expand All @@ -77,20 +66,31 @@ def _run_mehestan_for_criterion(criteria: str, ml_input: MlInput, poll_pk: int):
poll.name,
criteria,
)
indiv_scores, global_scores, scalings = compute_mehestan_scores(
ml_input, criteria=criteria
)
logger.info(
"Mehestan for poll '%s': scores computed for crit '%s'",
poll.name,
criteria,

indiv_scores = get_individual_scores(ml_input, criteria=criteria)
logger.debug("Individual scores computed for crit '%s'", criteria)
scaled_scores, scalings = compute_scaled_scores(
ml_input, individual_scores=indiv_scores
)

indiv_scores["criteria"] = criteria
save_contributor_scalings(poll, criteria, scalings)
save_contributor_scores(poll, indiv_scores, single_criteria=criteria)
save_entity_scores(poll, global_scores, single_criteria=criteria)

for mode in ScoreMode:
global_scores = get_global_scores(scaled_scores, score_mode=mode)
global_scores["criteria"] = criteria

logger.info(
"Mehestan for poll '%s': scores computed for crit '%s' and mode '%s'",
poll.name,
criteria,
mode,
)
save_entity_scores(poll, global_scores, single_criteria=criteria, score_mode=mode)

logger.info(
"Mehestan for poll '%s': scores saved for crit '%s'",
"Mehestan for poll '%s': done with crit '%s'",
poll.name,
criteria,
)
Expand Down
13 changes: 8 additions & 5 deletions backend/ml/outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,12 @@
EntityCriteriaScore,
Poll,
)
from tournesol.models.entity_score import ScoreMode


def save_entity_scores(
poll, entity_scores: Union[pd.DataFrame, Iterable[tuple]], single_criteria=None
poll, entity_scores: Union[pd.DataFrame, Iterable[tuple]], single_criteria=None,
score_mode=ScoreMode.DEFAULT
):
if isinstance(entity_scores, pd.DataFrame):
scores_iterator = entity_scores[
Expand All @@ -30,7 +32,7 @@ def save_entity_scores(
scores_iterator = (t if len(t) == 5 else t + (None,) for t in scores_iterator)

with transaction.atomic():
scores_to_delete = EntityCriteriaScore.objects.filter(poll=poll)
scores_to_delete = EntityCriteriaScore.objects.filter(poll=poll, score_mode=score_mode)
if single_criteria:
scores_to_delete = scores_to_delete.filter(criteria=single_criteria)
scores_to_delete.delete()
Expand All @@ -44,6 +46,7 @@ def save_entity_scores(
score=score,
uncertainty=uncertainty,
deviation=deviation,
score_mode=score_mode,
)
for entity_id, criteria, score, uncertainty, deviation in scores_iterator
),
Expand All @@ -54,12 +57,12 @@ def save_entity_scores(
def save_tournesol_score_as_sum_of_criteria(poll):
entities = []
for entity in (
Entity.objects.filter(criteria_scores__poll=poll)
Entity.objects.filter(all_criteria_scores__poll=poll)
.distinct()
.prefetch_related("criteria_scores")
.with_prefetched_scores(poll_name=poll.name)
):
entity.tournesol_score = 10 * sum(
criterion.score for criterion in entity.criteria_scores.all()
criterion.score for criterion in entity.criteria_scores
)
entities.append(entity)
Entity.objects.bulk_update(entities, ["tournesol_score"])
Expand Down
9 changes: 9 additions & 0 deletions backend/tournesol/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,12 +108,21 @@ def get_language(obj):
class EntityCriteriaScoreAdmin(admin.ModelAdmin):
list_display = (
'entity',
'poll',
'criteria',
'score_mode',
'score'
)
list_filter = (
'poll',
'score_mode',
)
search_fields = (
'entity__uid',
)
raw_id_fields = (
'entity',
)


@admin.register(ContributorRating)
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Generated by Django 4.0.3 on 2022-04-29 13:23

from django.db import migrations, models
import django.db.models.deletion


class Migration(migrations.Migration):

dependencies = [
('tournesol', '0037_contributorscaling'),
]

operations = [
migrations.AlterUniqueTogether(
name='entitycriteriascore',
unique_together=set(),
),
migrations.AddField(
model_name='entitycriteriascore',
name='score_mode',
field=models.CharField(choices=[('default', 'Default'), ('all_equal', 'All Equal'), ('trusted_only', 'Trusted Only')], default='default', max_length=30),
),
migrations.AlterField(
model_name='entitycriteriascore',
name='entity',
field=models.ForeignKey(help_text='Foreign key to the video', on_delete=django.db.models.deletion.CASCADE, related_name='all_criteria_scores', to='tournesol.entity'),
),
migrations.AlterUniqueTogether(
name='entitycriteriascore',
unique_together={('entity', 'poll', 'criteria', 'score_mode')},
),
]
Loading

0 comments on commit 1cd8131

Please sign in to comment.