nushackers · shotnothing · Jun 8, 2025 · Jun 8, 2025 · Jun 8, 2025 · Jun 8, 2025
diff --git a/matching/LICENSE b/matching/LICENSE
diff --git a/matching/README.md b/matching/README.md
@@ -28,31 +28,31 @@ mentor_df = pd.DataFrame({
 # 2. Define similarity functions
 def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series):
     '''You can define any similarity function you want, as long as you return a number (you might be
-    able to return other comparable objects but I haven't tested it). Smaller is more similar.
+    able to return other comparable objects but I haven't tested it). Larger is more similar.
 
     Args:
         mentors: list of pd.Series, each representing a mentor
         mentee: pd.Series, representing a single mentee
     '''
     acc = 0
-    acc += sum(
+    acc -= sum( # Penalize any differences (alternatively, you can and should use Word Error Rate for more sensible comparison)
         np.abs(mentor['feat1'] - mentee['feat1'])
         for mentor in mentors
     )
-    acc += sum(
+    acc -= sum( # Penalize any differences (alternatively, you can and should use Word Error Rate for more sensible comparison)
         np.abs(mentor['feat2'] - mentee['feat2'])
         for mentor in mentors
     )
     return acc
 
 def similarity_mentor_mentor(mentor1: pd.Series, mentor2: pd.Series):
-    '''Again, you can define any similarity function you want, as long as you return a number. Smaller is more similar.
+    '''Again, you can define any similarity function you want, as long as you return a number. Larger is more similar.
 
     Args:
         mentor1: pd.Series, representing a single mentor
         mentor2: pd.Series, representing a single mentor
     '''
-    return np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15
+    return -np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15
 
 # 3. Run the matching
 assignments_by_mentor, assignments_by_mentee = manytomany.match(

diff --git a/matching/example.py b/matching/example.py
@@ -19,18 +19,18 @@
 def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series):
     '''You can define any similarity function you want, as long as you return a number (you might be able to return other comparable objects but I haven't tested it).
 
-    Smaller is more similar.
+    Larger is more similar.
 
     Args:
         mentors: list of pd.Series, each representing a mentor
         mentee: pd.Series, representing a single mentee
     '''
     acc = 0
-    acc += sum(
+    acc -= sum( # Penalize every difference
         np.abs(mentor['feat1'] - mentee['feat1'])
         for mentor in mentors
     )
-    acc += sum(
+    acc -= sum( # Penalize every difference
         np.abs(mentor['feat2'] - mentee['feat2'])
         for mentor in mentors
     )
@@ -39,13 +39,13 @@ def similarity_mentee_mentor_group(mentors: list, mentee: pd.Series):
 def similarity_mentor_mentor(mentor1: pd.Series, mentor2: pd.Series):
     '''Again, you can define any similarity function you want, as long as you return a number.
 
-    Smaller is more similar.
+    Larger is more similar.
 
     Args:
         mentor1: pd.Series, representing a single mentor
         mentor2: pd.Series, representing a single mentor
     '''
-    return np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15
+    return -np.abs(mentor1['feat1'] - mentor2['feat1']) * np.abs(mentor1['feat2'] - mentor2['feat2'])**0.15
 
 # 3. Run the matching
 assignments_by_mentor, assignments_by_mentee = manytomany.match(

diff --git a/matching/manytomany/__init__.py b/matching/manytomany/__init__.py
@@ -11,7 +11,7 @@ def group_mentors(mentors: pd.DataFrame,
     Args:
         mentors: pd.DataFrame, representing the mentors
         mentors_per_mentee: int, the number of mentors per mentee
-        similarity_func: callable, a function that takes two pd.Series and returns a number. Smaller is more similar.
+        similarity_func: callable, a function that takes two pd.Series and returns a number. Larger is more similar.
 
     Returns:
         dict, mapping mentor group IDs to lists of mentor IDs
@@ -23,7 +23,7 @@ def group_mentors(mentors: pd.DataFrame,
             if mentor_id1 == mentor_id2:
                 similarity_matrix.loc[mentor_id1, mentor_id2] = np.inf
                 continue
-            similarity_matrix.loc[mentor_id1, mentor_id2] = similarity_func(mentors.loc[mentor_id1], mentors.loc[mentor_id2])
+            similarity_matrix.loc[mentor_id1, mentor_id2] = -similarity_func(mentors.loc[mentor_id1], mentors.loc[mentor_id2])
 
     # Cluster mentors
     n_clusters = len(mentors.index) // mentors_per_mentee
@@ -46,7 +46,7 @@ def match_mentees_to_mentor_groups(mentors: pd.DataFrame,
         mentees: pd.DataFrame, representing the mentees
         mentor_groups: dict, mapping mentor group IDs to lists of mentor IDs
         mentees_per_mentor: int, the number of mentees per mentor
-        similarity_func: callable, a function that takes two pd.Series and returns a number. Smaller is more similar.
+        similarity_func: callable, a function that takes two pd.Series and returns a number. Larger is more similar.
 
     Returns:
         pd.DataFrame, representing the assignments from mentor POV.
@@ -56,7 +56,7 @@ def match_mentees_to_mentor_groups(mentors: pd.DataFrame,
     similarity_matrix = pd.DataFrame(index=mentor_groups.keys(), columns=mentees.index)
     for mentor_group_id, mentor_group in mentor_groups.items():
         for mentee_id, mentee in mentees.iterrows():
-            similarity_matrix.loc[mentor_group_id, mentee_id] = similarity_func([mentors.iloc[mentor_id] for mentor_id in mentor_group], mentee)
+            similarity_matrix.loc[mentor_group_id, mentee_id] = -similarity_func([mentors.iloc[mentor_id] for mentor_id in mentor_group], mentee)
 
     assignments = pd.DataFrame(index=mentor_groups.keys(), columns=[f'assigned_{i}' for i in range(mentees_per_mentor)])