Skip to content

Change similarity function so larger is more similar #2

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 0 additions & 21 deletions matching/LICENSE

This file was deleted.

10 changes: 5 additions & 5 deletions matching/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,31 +28,31 @@ mentor_df = pd.DataFrame({
# 2. Define similarity functions
def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series):
'''You can define any similarity function you want, as long as you return a number (you might be
able to return other comparable objects but I haven't tested it). Smaller is more similar.
able to return other comparable objects but I haven't tested it). Larger is more similar.

Args:
mentors: list of pd.Series, each representing a mentor
mentee: pd.Series, representing a single mentee
'''
acc = 0
acc += sum(
acc -= sum( # Penalize any differences (alternatively, you can and should use Word Error Rate for more sensible comparison)
np.abs(mentor['feat1'] - mentee['feat1'])
for mentor in mentors
)
acc += sum(
acc -= sum( # Penalize any differences (alternatively, you can and should use Word Error Rate for more sensible comparison)
np.abs(mentor['feat2'] - mentee['feat2'])
for mentor in mentors
)
return acc

def similarity_mentor_mentor(mentor1: pd.Series, mentor2: pd.Series):
'''Again, you can define any similarity function you want, as long as you return a number. Smaller is more similar.
'''Again, you can define any similarity function you want, as long as you return a number. Larger is more similar.

Args:
mentor1: pd.Series, representing a single mentor
mentor2: pd.Series, representing a single mentor
'''
return np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15
return -np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15

# 3. Run the matching
assignments_by_mentor, assignments_by_mentee = manytomany.match(
Expand Down
10 changes: 5 additions & 5 deletions matching/example.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,18 +19,18 @@
def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series):
'''You can define any similarity function you want, as long as you return a number (you might be able to return other comparable objects but I haven't tested it).

Smaller is more similar.
Larger is more similar.

Args:
mentors: list of pd.Series, each representing a mentor
mentee: pd.Series, representing a single mentee
'''
acc = 0
acc += sum(
acc -= sum( # Penalize every difference
np.abs(mentor['feat1'] - mentee['feat1'])
for mentor in mentors
)
acc += sum(
acc -= sum( # Penalize every difference
np.abs(mentor['feat2'] - mentee['feat2'])
for mentor in mentors
)
Expand All @@ -39,13 +39,13 @@ def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series):
def similarity_mentor_mentor(mentor1: pd.Series, mentor2: pd.Series):
'''Again, you can define any similarity function you want, as long as you return a number.

Smaller is more similar.
Larger is more similar.

Args:
mentor1: pd.Series, representing a single mentor
mentor2: pd.Series, representing a single mentor
'''
return np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15
return -np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15

# 3. Run the matching
assignments_by_mentor, assignments_by_mentee = manytomany.match(
Expand Down
8 changes: 4 additions & 4 deletions matching/manytomany/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ def group_mentors(mentors: pd.DataFrame,
Args:
mentors: pd.DataFrame, representing the mentors
mentors_per_mentee: int, the number of mentors per mentee
similarity_func: callable, a function that takes two pd.Series and returns a number. Smaller is more similar.
similarity_func: callable, a function that takes two pd.Series and returns a number. Larger is more similar.

Returns:
dict, mapping mentor group IDs to lists of mentor IDs
Expand All @@ -23,7 +23,7 @@ def group_mentors(mentors: pd.DataFrame,
if mentor_id1 == mentor_id2:
similarity_matrix.loc[mentor_id1, mentor_id2] = np.inf
continue
similarity_matrix.loc[mentor_id1, mentor_id2] = similarity_func(mentors.loc[mentor_id1], mentors.loc[mentor_id2])
similarity_matrix.loc[mentor_id1, mentor_id2] = -similarity_func(mentors.loc[mentor_id1], mentors.loc[mentor_id2])

# Cluster mentors
n_clusters = len(mentors.index) // mentors_per_mentee
Expand All @@ -46,7 +46,7 @@ def match_mentees_to_mentor_groups(mentors: pd.DataFrame,
mentees: pd.DataFrame, representing the mentees
mentor_groups: dict, mapping mentor group IDs to lists of mentor IDs
mentees_per_mentor: int, the number of mentees per mentor
similarity_func: callable, a function that takes two pd.Series and returns a number. Smaller is more similar.
similarity_func: callable, a function that takes two pd.Series and returns a number. Larger is more similar.

Returns:
pd.DataFrame, representing the assignments from mentor POV.
Expand All @@ -56,7 +56,7 @@ def match_mentees_to_mentor_groups(mentors: pd.DataFrame,
similarity_matrix = pd.DataFrame(index=mentor_groups.keys(), columns=mentees.index)
for mentor_group_id, mentor_group in mentor_groups.items():
for mentee_id, mentee in mentees.iterrows():
similarity_matrix.loc[mentor_group_id, mentee_id] = similarity_func([mentors.iloc[mentor_id] for mentor_id in mentor_group], mentee)
similarity_matrix.loc[mentor_group_id, mentee_id] = -similarity_func([mentors.iloc[mentor_id] for mentor_id in mentor_group], mentee)

assignments = pd.DataFrame(index=mentor_groups.keys(), columns=[f'assigned_{i}' for i in range(mentees_per_mentor)])

Expand Down