|
| 1 | +import urllib2 |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import requests |
| 4 | +import pandas as pd |
| 5 | +import random |
| 6 | +import math |
| 7 | +wct = ['Argentina', 'Australia', 'Belgium', 'Brazil', 'Colombia', 'Costa Rica', 'Croatia', 'Denmark', 'Egypt', 'England', 'France', 'Germany', |
| 8 | + 'Iceland', 'Iran', 'Japan', 'Mexico', 'Morocco', 'Nigeria', 'Panama', 'Peru', 'Poland', 'Portugal', 'Russia', 'Saudi Arabia', 'Senegal', |
| 9 | + 'Serbia', 'South Korea', 'Spain', 'Sweden', 'Switzerland', 'Tunisia', 'Uruguay'] |
| 10 | + |
| 11 | + |
| 12 | +df = pd.read_csv('WorldCup2018.csv') |
| 13 | +#dates = [str(i)+'/'+str(j) if j>9 else str(i)+'/0'+str(j) for i in range(2016,2019) for j in range(1,13)] |
| 14 | + |
| 15 | +#req = urllib2.Request('http://www.betexplorer.com/soccer/world/friendly-international/mutual-matches/?home=I9l9aqLq&away=f9OppQjp&where=0', None, headers) |
| 16 | +def H2H(): |
| 17 | + link1 = 'http://www.betexplorer.com/soccer/world/friendly-international/mutual-matches/?home=' |
| 18 | + link2 = '&away=' |
| 19 | + link3 = '&where=0' |
| 20 | + teamID = {'Russia': 'hrgrswHh', 'Saudi Arabia': 'biSY8ox4', 'Egypt':'bejDn7NN', 'Uruguay':'xMk44orG', 'Spain':'bLyo6mco', 'Portugal':'WvJrjFVN', 'Morocco': 'IDKYO3R8', |
| 21 | + 'Iran': 'xrRx85iA', 'France':'QkGeVG1n', 'Australia':'xSrf6qMM', 'Peru':'Uend67D3','Denmark':'0KUdxQVi','Argentina':'f9OppQjp', 'Nigeria':'EBE2Xb3l','Croatia':'K8aznggo', |
| 22 | + 'Iceland':'6TsAIrGN','Brazil':'I9l9aqLq','Switzerland':'rHJ2vy1B', 'Costa Rica':'C4ePE2kq', 'Serbia':'8Kl6iq0i', 'Germany':'ptQide1O', 'Mexico':'O6iHcNkd', 'Sweden': 'OQyqbHWB', |
| 23 | + 'South Korea':'K6Gs7P6G', 'Belgium':'GbB957na', 'Panama':'OWKqbCfi', 'Tunisia': 'QqZVYk95', 'England':'j9N9ZNFA', 'Poland':'2HzmcynI','Senegal':'hOIsJLJr', 'Colombia':'G02s4PCS', 'Japan':'ULXPdOUj'} |
| 24 | + |
| 25 | + headers = { 'User-Agent' : 'Mozilla/5.0' } |
| 26 | + req = urllib2.Request('https://www.bbc.co.uk/sport/football/international-friendlies/scores-fixtures/2018-05', None, headers) |
| 27 | + page = urllib2.urlopen(req).read() |
| 28 | + #page = urllib2.urlopen(link1 + teamID['Germany'] + link2 + teamID['Brazil']) |
| 29 | + #page = urllib2.urlopen('https://stackoverflow.com/questions/44854334/no-handlers-could-be-found-for-logger-bs4-dammit') |
| 30 | + soup = BeautifulSoup(page, 'html.parser') |
| 31 | + scoH = soup.findAll('span',{'class':'sp-c-fixture__number sp-c-fixture__number--home sp-c-fixture__number--ft'}) |
| 32 | + scoA = soup.findAll('span',{'class':'sp-c-fixture__number sp-c-fixture__number--away sp-c-fixture__number--ft'}) |
| 33 | + print(len(scoH)) |
| 34 | + print(len(scoA)) |
| 35 | + Ht = soup.findAll('span',{'class':'gs-u-display-none gs-u-display-block@m qa-full-team-name sp-c-fixture__team-name-trunc'}) |
| 36 | + del Ht[0] |
| 37 | + del Ht[0] |
| 38 | + print(len(Ht)) |
| 39 | + for i in range(0,len(Ht),2): |
| 40 | + try: |
| 41 | + team1, team2 = Poisson(Ht[i].text, Ht[i+1].text) |
| 42 | + print(scoH[i].text, scoA[i].text) |
| 43 | + print(int(team1.index(max(team1))),int(team2.index(max(team2)))) |
| 44 | + except: |
| 45 | + pass |
| 46 | + #print(Ht[-8].text.decode('UTF-8'), Ht[-6].text.decode('UTF-8'), Ht[-5].text.decode('UTF-8'),Ht[-4].text.decode('UTF-8')) |
| 47 | +## r = (float(Ht[-6].text))/float(Ht[-7].text) |
| 48 | +## return 1-r, r |
| 49 | + |
| 50 | + #print(Ht[-16].text.decode('UTF-8'), Ht[-14].text.decode('UTF-8'), Ht[-13].text.decode('UTF-8'),Ht[-12].text.decode('UTF-8')) |
| 51 | +## except IndexError: |
| 52 | +## #print('This two team have never played against each other') |
| 53 | +## return 0.0, 0.0 |
| 54 | +## except KeyError as e: |
| 55 | +## print('Name of {} does not match in our Database'.format(e)) |
| 56 | + |
| 57 | + |
| 58 | +def Cal(): |
| 59 | + group = {'A':'Russia,Egypt,Saudi Arabia,Uruguay', 'B':'Spain,Portugal,Morocco,Iran','C':'France,Australia,Peru,Denmark', 'D':'Argentina,Iceland,Croatia,Nigeria', |
| 60 | + 'E':'Brazil,Switzerland,Costa Rica,Serbia','F':'Germany,Mexico,Sweden,South Korea','G':'Belgium,Panama,Tunisia,England', 'H':'Poland,Senegal,Colombia,Japan'} |
| 61 | +## group = {'A':'Brazil,Croatia,Cameroon,Mexico', 'B':'Spain,Netherlands,Chile,Australia','C':'Colombia,Greece,Ivory Coast,Japan', 'D':'Uruguay,Costa Rica,England,Italy', |
| 62 | +## 'E':'Switzerland,France,Ecuador,Honduras','F':'Argentina,Nigeria,Bosnian-Herzegovina,Iran','G':'Germany,Portugal,Ghana,USA', 'H':'Belgium,Russia,Algeria,South Korea'} |
| 63 | +## group = {'A':'South Africa,Mexico,Uruguay,France', 'B':'Argentina,Nigeria,South Korea,Greece','C':'England,USA,Algeria,Slovenia', 'D':'Germany,Australia,Serbia,Ghana', |
| 64 | +## 'E':'Netherlands,Denmark,Japan,Cameroon','F':'Italy,Paraguay,New Zealand,Slovakia','G':'Brazil,Portugal,North Korea,Ivory Coast', 'H':'Spain,Switzerland,Honduras,Chile'} |
| 65 | + |
| 66 | + final= {} |
| 67 | + for letter in ['A','B','C','D','E','F','G','H']: |
| 68 | + names = group[letter].split(',') |
| 69 | + points = {} |
| 70 | + gd = {} |
| 71 | + for n in names: |
| 72 | + points[n]=0 |
| 73 | + gd[n]=0 |
| 74 | + n=0 |
| 75 | + for name in names: |
| 76 | + n+=1 |
| 77 | + v = names[n:] |
| 78 | + for oppo in v: |
| 79 | + team1, team2 = Poisson(name,oppo) |
| 80 | + print(int(team1.index(max(team1))),int(team2.index(max(team2)))) |
| 81 | + gd[name]+= int(team1.index(max(team1))) - int(team2.index(max(team2))) |
| 82 | + gd[oppo]+= int(team2.index(max(team2))) - int(team1.index(max(team1))) |
| 83 | + if int(team1.index(max(team1))) > int(team2.index(max(team2))): |
| 84 | + points[name] +=3 |
| 85 | + #points[oppo]-=1 |
| 86 | + elif int(team1.index(max(team1))) < int(team2.index(max(team2))): |
| 87 | + points[oppo] +=3 |
| 88 | + #points[name]-=1 |
| 89 | + else: |
| 90 | + points[oppo] -=float(df[oppo][12]) |
| 91 | + points[name] -=float(df[name][12]) |
| 92 | + print points |
| 93 | + point = {} |
| 94 | + po =[] |
| 95 | + for team in names: |
| 96 | + point[points[team] + gd[team]] = team |
| 97 | + po.append(points[team] + gd[team]) |
| 98 | + po = sorted(po, reverse=True) |
| 99 | + final[letter+'1'] = point[po[0]] |
| 100 | + final[letter+'2'] = point[po[1]] |
| 101 | + |
| 102 | + return final |
| 103 | + |
| 104 | + |
| 105 | +#{'F1': 'Mexico', 'F2': 'Germany', 'G2': 'England', 'G1': 'Belgium', 'H2': 'Senegal', |
| 106 | +#'E1': 'Brazil', 'H1': 'Colombia', 'A1': 'Uruguay', 'A2': 'Russia', 'B1': 'Spain', |
| 107 | +#'B2': 'Portugal', 'C2': 'Peru', 'C1': 'France', 'E2': 'Serbia', 'D2': 'Croatia', 'D1': 'Argentina'} |
| 108 | +def R16(teams={}): |
| 109 | + r16 = {} |
| 110 | + print(teams) |
| 111 | + groupList = [A+B for A in ['A','B','C','D','E','F','G','H'] for B in ['1','2']] |
| 112 | + qL = [a+str(b) for a in ['Q'] for b in range(1,9)] |
| 113 | + for n in range(0,16,4): |
| 114 | + print(n) |
| 115 | + team1, team2 = Poisson(teams[groupList[n]], teams[groupList[n+3]]) |
| 116 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 117 | + r16[qL[n/2]] = teams[groupList[n]] |
| 118 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 119 | + r16[qL[n/2]] = teams[groupList[n+3]] |
| 120 | + else: |
| 121 | + if float(df[teams[groupList[n]]][12])< float(df[teams[groupList[n+3]]][12]): |
| 122 | + r16[qL[n/2]] = teams[groupList[n+3]] |
| 123 | + else: |
| 124 | + r16[qL[n/2]] = teams[groupList[n]] |
| 125 | + |
| 126 | + team1, team2 = Poisson(teams[groupList[n+1]], teams[groupList[n+2]]) |
| 127 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 128 | + r16[qL[n/2+1]] = teams[groupList[n+1]] |
| 129 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 130 | + r16[qL[n/2+1]] = teams[groupList[n+2]] |
| 131 | + else: |
| 132 | + if float(df[teams[groupList[n+1]]][12])< float(df[teams[groupList[n+2]]][12]): |
| 133 | + r16[qL[n/2+1]] = teams[groupList[n+2]] |
| 134 | + else: |
| 135 | + r16[qL[n/2+1]] = teams[groupList[n+1]] |
| 136 | + print(r16) |
| 137 | + return r16 |
| 138 | + |
| 139 | + |
| 140 | +def R8(team={}): |
| 141 | + r8={} |
| 142 | + qL = [a+str(b) for a in ['Q'] for b in range(1,9)] |
| 143 | + sL = [a+str(b) for a in ['S'] for b in range(1,5)] |
| 144 | + for i in range(0,5,4): |
| 145 | + team1,team2= Poisson(team[qL[i]], team[qL[i+2]]) |
| 146 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 147 | + r8[sL[i/2]] = team[qL[i]] |
| 148 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 149 | + r8[sL[i/2]] = team[qL[i+2]] |
| 150 | + else: |
| 151 | + if float(df[team[qL[i]]][12])< float(df[team[qL[i+2]]][12]): |
| 152 | + r8[sL[i/2]] = team[qL[i+2]] |
| 153 | + else: |
| 154 | + r8[sL[i/2]] = team[qL[i]] |
| 155 | + team1,team2= Poisson(team[qL[i+1]], team[qL[i+3]]) |
| 156 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 157 | + r8[sL[i/2+1]] = team[qL[i+1]] |
| 158 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 159 | + r8[sL[i/2+1]] = team[qL[i+3]] |
| 160 | + else: |
| 161 | + if float(df[team[qL[i+1]]][12])< float(df[team[qL[i+3]]][12]): |
| 162 | + r8[sL[i/2+1]] = team[qL[i+3]] |
| 163 | + else: |
| 164 | + r8[sL[i/2+1]] = team[qL[i+1]] |
| 165 | + print(r8) |
| 166 | + return r8 |
| 167 | + |
| 168 | +def R4(team={}): |
| 169 | + r4={} |
| 170 | + i=0 |
| 171 | + sL = [a+str(b) for a in ['S'] for b in range(1,5)] |
| 172 | + fL = [a+str(b) for a in ['F'] for b in range(1,3)] |
| 173 | + team1,team2= Poisson(team[sL[i]], team[sL[i+2]]) |
| 174 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 175 | + r4[fL[i/2]] = team[sL[i]] |
| 176 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 177 | + r4[fL[i/2]] = team[sL[i+2]] |
| 178 | + else: |
| 179 | + if float(df[team[sL[i]]][12])< float(df[team[sL[i+2]]][12]): |
| 180 | + r4[fL[i/2]] = team[sL[i+2]] |
| 181 | + else: |
| 182 | + r4[fL[i/2]] = team[sL[i]] |
| 183 | + team1,team2= Poisson(team[sL[i+1]], team[sL[i+3]]) |
| 184 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 185 | + r4[fL[i/2+1]] = team[sL[i+1]] |
| 186 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 187 | + r4[fL[i/2+1]] = team[sL[i+3]] |
| 188 | + else: |
| 189 | + if float(df[team[sL[i+1]]][12])< float(df[team[sL[i+3]]][12]): |
| 190 | + r4[fL[i/2+1]] = team[sL[i+3]] |
| 191 | + else: |
| 192 | + r4[fL[i/2+1]] = team[sL[i+1]] |
| 193 | + print(r4) |
| 194 | + return r4 |
| 195 | + |
| 196 | +def Finals(team={}): |
| 197 | + final={} |
| 198 | + team1,team2= Poisson(team['F1'], team['F2']) |
| 199 | + if team1.index(max(team1)) > team2.index(max(team2)): |
| 200 | + final['WinningTeam'] = team['F1'] |
| 201 | + elif team1.index(max(team1)) < team2.index(max(team2)): |
| 202 | + final['WinningTeam'] = team['F2'] |
| 203 | + else: |
| 204 | + if float(df[team['F1']][12])< float(df[team['F2']][12]): |
| 205 | + final['WinningTeam'] = team['F2'] |
| 206 | + else: |
| 207 | + final['WinningTeam'] = team['F1'] |
| 208 | + print(final) |
| 209 | + return('Made by: Yash Soni') |
| 210 | + |
| 211 | + |
| 212 | +def Poisson(HT='',AT=''): |
| 213 | + print(HT,AT) |
| 214 | + HTS = 0 |
| 215 | + ATS = 0 |
| 216 | + #print h2h_H, h2h_A |
| 217 | + ATS = ((float(df[AT][12])+(float(df[AT][13])))*(float(df[AT][2])/(float(df[AT][0]))))+random.uniform(0.1,float(df[AT][13])) |
| 218 | + HTS = ((float(df[HT][12])+(float(df[HT][13])))*(float(df[HT][2])/(float(df[HT][0]))))+random.uniform(0.1,float(df[HT][13])) |
| 219 | + |
| 220 | + |
| 221 | + print(HTS,ATS) |
| 222 | + |
| 223 | + return [(((math.exp(-HTS))*(math.pow(HTS,i))))/(math.factorial(i)) for i in range(0,5)], [(((math.exp(-ATS))*(math.pow(ATS,i))))/(math.factorial(i)) for i in range(0,5)] |
| 224 | + |
| 225 | + |
| 226 | +print(Finals(R4(R8(R16(Cal()))))) |
0 commit comments