-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathdownloads.py
122 lines (90 loc) · 3.51 KB
/
downloads.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
from dataclasses import dataclass
from os import mkdir
from os.path import dirname, exists, join, realpath
from typing import Iterator, List, Optional, Union
import hashlib
import requests
import lxml.html
from models import Match, Game, Team, Player
CURRENT_DIR = dirname(realpath(__file__))
CACHE_DIR = join(CURRENT_DIR, 'cache')
URL_PREFIX = 'https://heroeslounge.gg'
@dataclass
class MatchLink:
url: str
team1_name: str
team2_name: str
def get_html(path: str) -> str:
cached_file_path = join(CACHE_DIR, hashlib.sha1(path.encode('utf8')).hexdigest())
if exists(cached_file_path):
with open(cached_file_path) as f:
return f.read()
response = requests.get(URL_PREFIX + path)
if response.status_code != 200:
raise Exception(f'status code = {response.status_code}')
html = response.content.decode('utf8')
if not exists(CACHE_DIR):
mkdir(CACHE_DIR)
with open(cached_file_path, 'w') as f:
f.write(html)
return html
def p(el):
print(lxml.html.tostring(el))
def get_match_links(*, region: str, season: int, division: Union[int, str]) -> Iterator[MatchLink]:
if division == 's':
path = f'/{region}-division-s-season-{season}/{region}-division-s-season-{season}'
else:
path = f'/{region}season-{season}/division-{division}'
html = get_html(path)
doc = lxml.html.fromstring(html)
links = doc.cssselect('.card a:contains("VS")')
for link in links:
url = link.attrib['href']
row = link.getparent().getparent()
team_links = row.cssselect('a[href*="/team/view"]')
team_names = [el.text for el in team_links]
yield MatchLink(url, *team_names)
def get_matches(*, region: str, season: int, division: Union[int, str], team_name: str) -> Iterator[Match]:
links = get_match_links(region=region, season=season, division=division)
for link in links:
if team_name in {link.team1_name, link.team2_name}:
match = get_match(link.url)
if match:
yield match
def parse_players(el) -> Iterator[Player]:
for figure in el.cssselect('figure'):
yield Player(
name=figure.cssselect('figcaption')[0].attrib['title'],
hero=figure.cssselect('img')[0].attrib['title'],
)
def get_match(url: str) -> Optional[Match]:
path = url[len(URL_PREFIX):]
html = get_html(path)
doc = lxml.html.fromstring(html)
games = []
for pane in doc.cssselect('.tab-pane'):
rows = pane.cssselect('.row')
if not rows:
return None
(blue_team_name, red_team_name) = [el.text.strip() for el in rows[0].cssselect('h3 a')]
(blue_team_players, red_team_players) = [parse_players(el) for el in rows[2].cssselect('.col')]
blue_team = Team(name=blue_team_name, players=list(blue_team_players))
red_team = Team(name=red_team_name, players=list(red_team_players))
map = rows[4].cssselect('.badge-info')[0].text.strip()
level_winner_columns = rows[5].cssselect('.col-6')[-2:]
did_blue_team_win = len(level_winner_columns[0].cssselect('.badge-success')) == 1
games.append(Game(
blue_team=blue_team,
red_team=red_team,
winner=blue_team if did_blue_team_win else red_team,
map=map,
))
if games:
return Match(
url=url,
team1_name=games[0].blue_team.name,
team2_name=games[0].red_team.name,
games=games,
)
else:
return None