Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Outputs to slots #4

Merged
merged 10 commits into from
Mar 15, 2018
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -99,3 +99,4 @@ ENV/

# mypy
.mypy_cache/
*.xlsx
12 changes: 4 additions & 8 deletions data/NCAATourneySeeds.csv
Original file line number Diff line number Diff line change
Expand Up @@ -2159,14 +2159,12 @@ Season,Seed,TeamID
2018,W08,1439
2018,W09,1104
2018,W10,1139
2018,W11a,1382
2018,W11b,1417
2018,W11,1382
2018,W12,1293
2018,W13,1267
2018,W14,1372
2018,W15,1168
2018,W16a,1254
2018,W16b,1347
2018,W16,1347
2018,X01,1242
2018,X02,1181
2018,X03,1277
Expand All @@ -2177,8 +2175,7 @@ Season,Seed,TeamID
2018,X08,1371
2018,X09,1301
2018,X10,1328
2018,X11a,1113
2018,X11b,1393
2018,X11,1393
2018,X12,1308
2018,X13,1158
2018,X14,1137
Expand Down Expand Up @@ -2215,5 +2212,4 @@ Season,Seed,TeamID
2018,Z13,1422
2018,Z14,1285
2018,Z15,1252
2018,Z16a,1300
2018,Z16b,1411
2018,Z16,1411
68 changes: 0 additions & 68 deletions data/NCAATourneySlots_Detailed.csv

This file was deleted.

65 changes: 65 additions & 0 deletions data/NCAATourneySlots_Detailed_2018.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
Season,Slot,NextSlot,NextSeed,StrongSeed,WeakSeed,StrongTeamID,WeakTeamID,StrongTeamName,WeakTeamName
2018,R1W1,R2W1,Strong,W01,W16,1437,1347,Villanova,Radford
2018,R1W2,R2W2,Strong,W02,W15,1345,1168,Purdue,CS Fullerton
2018,R1W3,R2W3,Strong,W03,W14,1403,1372,Texas Tech,SF Austin
2018,R1W4,R2W4,Strong,W04,W13,1455,1267,Wichita St,Marshall
2018,R1W5,R2W4,Weak,W05,W12,1452,1293,West Virginia,Murray St
2018,R1W6,R2W3,Weak,W06,W11,1196,1382,Florida,St Bonaventure
2018,R1W7,R2W2,Weak,W07,W10,1116,1139,Arkansas,Butler
2018,R1W8,R2W1,Weak,W08,W09,1439,1104,Virginia Tech,Alabama
2018,R1X1,R2X1,Strong,X01,X16,1242,1335,Kansas,Penn
2018,R1X2,R2X2,Strong,X02,X15,1181,1233,Duke,Iona
2018,R1X3,R2X3,Strong,X03,X14,1277,1137,Michigan St,Bucknell
2018,R1X4,R2X4,Strong,X04,X13,1120,1158,Auburn,Col Charleston
2018,R1X5,R2X4,Weak,X05,X12,1155,1308,Clemson,New Mexico St
2018,R1X6,R2X3,Weak,X06,X11,1395,1393,TCU,Syracuse
2018,R1X7,R2X2,Weak,X07,X10,1348,1328,Rhode Island,Oklahoma
2018,R1X8,R2X1,Weak,X08,X09,1371,1301,Seton Hall,NC State
2018,R1Y1,R2Y1,Strong,Y01,Y16,1438,1420,Virginia,UMBC
2018,R1Y2,R2Y2,Strong,Y02,Y15,1153,1209,Cincinnati,Georgia St
2018,R1Y3,R2Y3,Strong,Y03,Y14,1397,1460,Tennessee,Wright St
2018,R1Y4,R2Y4,Strong,Y04,Y13,1112,1138,Arizona,Buffalo
2018,R1Y5,R2Y4,Weak,Y05,Y12,1246,1172,Kentucky,Davidson
2018,R1Y6,R2Y3,Weak,Y06,Y11,1274,1260,Miami FL,Loyola-Chicago
2018,R1Y7,R2Y2,Weak,Y07,Y10,1305,1400,Nevada,Texas
2018,R1Y8,R2Y1,Weak,Y08,Y09,1166,1243,Creighton,Kansas St
2018,R1Z1,R2Z1,Strong,Z01,Z16,1462,1411,Xavier,TX Southern
2018,R1Z2,R2Z2,Strong,Z02,Z15,1314,1252,North Carolina,Lipscomb
2018,R1Z3,R2Z3,Strong,Z03,Z14,1276,1285,Michigan,Montana
2018,R1Z4,R2Z4,Strong,Z04,Z13,1211,1422,Gonzaga,UNC Greensboro
2018,R1Z5,R2Z4,Weak,Z05,Z12,1326,1355,Ohio St,S Dakota St
2018,R1Z6,R2Z3,Weak,Z06,Z11,1222,1361,Houston,San Diego St
2018,R1Z7,R2Z2,Weak,Z07,Z10,1401,1344,Texas A&M,Providence
2018,R1Z8,R2Z1,Weak,Z08,Z09,1281,1199,Missouri,Florida St
2018,R2W1,R3W1,Strong,R1W1,R1W8,,,,
2018,R2W2,R3W2,Strong,R1W2,R1W7,,,,
2018,R2W3,R3W2,Weak,R1W3,R1W6,,,,
2018,R2W4,R3W1,Weak,R1W4,R1W5,,,,
2018,R2X1,R3X1,Strong,R1X1,R1X8,,,,
2018,R2X2,R3X2,Strong,R1X2,R1X7,,,,
2018,R2X3,R3X2,Weak,R1X3,R1X6,,,,
2018,R2X4,R3X1,Weak,R1X4,R1X5,,,,
2018,R2Y1,R3Y1,Strong,R1Y1,R1Y8,,,,
2018,R2Y2,R3Y2,Strong,R1Y2,R1Y7,,,,
2018,R2Y3,R3Y2,Weak,R1Y3,R1Y6,,,,
2018,R2Y4,R3Y1,Weak,R1Y4,R1Y5,,,,
2018,R2Z1,R3Z1,Strong,R1Z1,R1Z8,,,,
2018,R2Z2,R3Z2,Strong,R1Z2,R1Z7,,,,
2018,R2Z3,R3Z2,Weak,R1Z3,R1Z6,,,,
2018,R2Z4,R3Z1,Weak,R1Z4,R1Z5,,,,
2018,R3W1,R4W1,Strong,R2W1,R2W4,,,,
2018,R3W2,R4W1,Weak,R2W2,R2W3,,,,
2018,R3X1,R4X1,Strong,R2X1,R2X4,,,,
2018,R3X2,R4X1,Weak,R2X2,R2X3,,,,
2018,R3Y1,R4Y1,Strong,R2Y1,R2Y4,,,,
2018,R3Y2,R4Y1,Weak,R2Y2,R2Y3,,,,
2018,R3Z1,R4Z1,Strong,R2Z1,R2Z4,,,,
2018,R3Z2,R4Z1,Weak,R2Z2,R2Z3,,,,
2018,R4W1,R5WX,Strong,R3W1,R3W2,,,,
2018,R4X1,R5WX,Weak,R3X1,R3X2,,,,
2018,R4Y1,R5YZ,Strong,R3Y1,R3Y2,,,,
2018,R4Z1,R5YZ,Weak,R3Z1,R3Z2,,,,
2018,R5WX,R6CH,Strong,R4W1,R4X1,,,,
2018,R5YZ,R6CH,Weak,R4Y1,R4Z1,,,,
2018,R6CH,R7WIN,Strong,R5WX,R5YZ,,,,
2018,R7WIN,R6CH,,,,,,,
200 changes: 200 additions & 0 deletions join_outputs_to_slots.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,200 @@
import pandas as pd
import csv

folder = 'data'
results_folder = 'results'
prediction_year = 2018
# list to collect all print statements to go in a results file
readable_outputs = []

def format_pred_outputs( pred ):
'''
Take outputs from model.py and format them for joining to slot data
Return pred dataframe with added columns Year, TeamAID, TeamBID, PredIDConcat, ProbAOverB
'''
print("format prediction outputs")
formatted_data = []
headers = [ 'Year', 'TeamAID', 'TeamBID', 'PredIDConcat', 'ProbAOverB' ]
# for each row in pred data
for index, row in pred.iterrows():
# split id column, format is YYYY_TeamAID_TeamBID
id_split = row.id.split( '_' )

# get year
year = id_split[0]
# get team A (first one alphabetically)
teamA = id_split[1]
# get team B (second one alphabetically)
teamB = id_split[2]

# get win probability of team A beating team B
prob = row.pred

# save to new dataframe
row_data = [year, teamA, teamB, teamA + "_" + teamB, prob]
formatted_data.append(row_data)

# return dataframe
return pd.DataFrame(formatted_data, columns=headers)

def get_readable_slot( slot_id ):
'''
Splits slot id into components Round, Region to get readable names
Returns readable string of the slot
'''
print("get readable slot name")
# rounds lookup
rounds = {
'R0': 'First Four',
'R1': 'First Round',
'R2': 'Second Round',
'R3': 'Sweet 16',
'R4': 'Elite Eight',
'R5': 'Final Four',
'R6': 'National Championship',
}
# region lookup
regions = {
'W': 'East',
'X': 'Midwest',
'Y': 'South',
'Z': 'West',
'WX': 'East/Midwest',
'YZ': 'South/West',
'CH': 'Final'
}

# get round from slot id split, first two letters
slot_round = slot_id[:2]
# get region from slot id split
slot_region = ''
if int(slot_id[1]) < 5:
# first 4 rounds, it's the third letter
slot_region = slot_id[2]
else:
# rounds 5 and 6, third and fourth letters
slot_region = slot_id[2:4]
# return combined region - round string
return regions[ slot_region ] + ' - ' + rounds[ slot_round ]

def get_slot_winner( pred_df, slot_row ):
'''
For the given round, get the teams in the given slots
and then join to the team's probability of beating that other one
Return list of StrongTeamID, StrongTeamName
where StrongTeam = the team with the higher win probability
'''
print("get slot winner")
# create lookup of strong team ID by strong team name, also same for weak team
name_id_dict = {}
name_id_dict[slot_row.StrongTeamName] = slot_row.StrongTeamID
name_id_dict[slot_row.WeakTeamName] = slot_row.WeakTeamID

# concatenate them in the alphabetical order of StrongTeamName vs WeakTeamName
# to match the unique pred id, which is alpha sorted
alpha_teams = sorted([slot_row.StrongTeamName, slot_row.WeakTeamName])
teamA = alpha_teams[0]
teamB = alpha_teams[1]
SlotIDConcat = name_id_dict[ teamA ] + "_" + name_id_dict[ teamB ]

# get the ProbAOverB from pred_data
# where SlotIDConcat equals PredIDConcat
probA = pred_df[pred_df['PredIDConcat'] == SlotIDConcat]['ProbAOverB'].iloc[0]
readable_outputs.append(
[
'Chance that %s beats %s: %f' %
(teamA,teamB, probA)
]
)
readable_outputs.append(
[
'Chance that %s beats %s: %f' %
(teamB,teamA, 1 - probA)
]
)
if 0.39 <= probA <= 0.61:
readable_outputs.append(
['***Close call!']
)

# save the highest probability for the next round
winner_data = []
# get readable slots from slot ID
slot_name = get_readable_slot( row.Slot )
if probA >= 0.5:
# if ProbAOverB is bigger, then Team A advances
readable_outputs.append(
[
'%s winner: %s (%f)' %
(slot_name, teamA, probA)
]
)
winner_data = [name_id_dict[ teamA ], teamA]
else:
# else, Team B advances
readable_outputs.append(
[
'%s winner: %s (%f)' %
(slot_name, teamB, 1 - probA)
]
)
winner_data = [name_id_dict[ teamB ], teamB]

# alert if weak team beats strong team
if slot_row.WeakTeamID == winner_data[0]:
readable_outputs.append(
['*****Upset alert!']
)
return winner_data


if __name__ == "__main__":
# slots data
slot_dtypes = {
'StrongTeamID': str,
'WeakTeamID': str
}
slots = pd.read_csv(folder + '/NCAATourneySlots_Detailed_2018.csv', dtype=slot_dtypes)
slots.head()
# model prediction outputs
predictions = pd.read_csv(results_folder + '/submission.csv')
predictions.head()

# format pred data to join with slots
pred_formatted = format_pred_outputs( predictions )
pred_formatted.head()

# for each slot
for row in slots.itertuples(index=False):
print("Joining slots to predictions")
readable_outputs.append(
['------------------------------------------']
)
if row.Slot == 'R7WIN':
readable_outputs.append(
[
'Overall 2018 champion: %s' %
(row.StrongTeamName)
]
)
break
# join slots and pred to get higher prob team
# returns [StrongTeamID, StrongTeamName]
slot_winner = get_slot_winner(pred_formatted, row)

# save higher team in appropriate slot for next round
next_slot = row.NextSlot
seed_type = row.NextSeed

# assign the updated winner data in the next slot
slots.loc[slots['Slot'] == next_slot, [seed_type + 'TeamID',seed_type + 'TeamName']] = slot_winner[0], slot_winner[1]

slots.tail()
# output updated slot data to csv in results folder
print("Write updated slots data to file")
slots.to_csv(results_folder + '/less-readable-predictions-by-slots.csv', index=False)
# create readable results csv
print("Writing %d readable bracket results." % len(slots))
with open(results_folder + '/readable-predictions-by-slots.csv', 'w') as f:
writer = csv.writer(f)
writer.writerows(readable_outputs)
Loading