forked from traceloop/openllmetry
-
Notifications
You must be signed in to change notification settings - Fork 0
/
gender bias
28 lines (21 loc) · 1.17 KB
/
gender bias
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
import pandas as pd
from sklearn.metrics import confusion_matrix
# Simulated model predictions and actual labels with gender information
data = pd.DataFrame({
'gender': ['male', 'female', 'male', 'female', 'male', 'female'],
'actual_label': [1, 0, 1, 1, 0, 0], # Ground truth
'predicted_label': [1, 0, 1, 0, 1, 0] # Model predictions
})
# Check the prediction rate for each gender
male_predictions = data[data['gender'] == 'male']['predicted_label']
female_predictions = data[data['gender'] == 'female']['predicted_label']
print(f"Male Prediction Mean: {male_predictions.mean()}")
print(f"Female Prediction Mean: {female_predictions.mean()}")
# Calculate and compare confusion matrices for each gender
cm_male = confusion_matrix(data[data['gender'] == 'male']['actual_label'], male_predictions)
cm_female = confusion_matrix(data[data['gender'] == 'female']['actual_label'], female_predictions)
print(f"Confusion Matrix for males: \n{cm_male}")
print(f"Confusion Matrix for females: \n{cm_female}")
# Calculate demographic parity
parity = abs(male_predictions.mean() - female_predictions.mean())
print(f"Demographic Parity (Difference in Positive Prediction Rates): {parity}")