Skip to content

Commit

Permalink
Work well version
Browse files Browse the repository at this point in the history
  • Loading branch information
trentenwen committed Mar 30, 2023
1 parent 2a20625 commit e10aa2a
Show file tree
Hide file tree
Showing 16 changed files with 79,819 additions and 46 deletions.
6,841 changes: 6,841 additions & 0 deletions csvoutput/Atomic-Heart.csv

Large diffs are not rendered by default.

8,720 changes: 8,720 additions & 0 deletions csvoutput/Cities:-Skylines.csv

Large diffs are not rendered by default.

17,131 changes: 17,131 additions & 0 deletions csvoutput/Cyberpunk-2077.csv

Large diffs are not rendered by default.

6,551 changes: 6,551 additions & 0 deletions csvoutput/Forza-Horizon-5.csv

Large diffs are not rendered by default.

1,521 changes: 1,521 additions & 0 deletions csvoutput/Grand-Theft-Auto-V.csv

Large diffs are not rendered by default.

2,796 changes: 2,793 additions & 3 deletions csvoutput/Hogwarts-Legacy.csv

Large diffs are not rendered by default.

8,121 changes: 8,121 additions & 0 deletions csvoutput/It-Takes-Two.csv

Large diffs are not rendered by default.

7,351 changes: 7,351 additions & 0 deletions csvoutput/LEGO-Star-Wars:-The-Skywalker-Saga.csv

Large diffs are not rendered by default.

5,691 changes: 5,691 additions & 0 deletions csvoutput/Monster-Hunter:-World.csv

Large diffs are not rendered by default.

5,521 changes: 5,521 additions & 0 deletions csvoutput/Overcooked!-2.csv

Large diffs are not rendered by default.

961 changes: 961 additions & 0 deletions csvoutput/Red-Dead-Redemption-2.csv

Large diffs are not rendered by default.

1,081 changes: 1,081 additions & 0 deletions csvoutput/Sid-Meier’s-Civilization-VI.csv

Large diffs are not rendered by default.

7,461 changes: 7,461 additions & 0 deletions csvoutput/Tomb-Raider.csv

Large diffs are not rendered by default.

Binary file modified webparse/__pycache__/steam_review_parse.cpython-38.pyc
Binary file not shown.
104 changes: 61 additions & 43 deletions webtools/rolling.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,52 +10,70 @@
from webparse.steam_review_parse import steam_helper

# User Configuration
infinity_search = 0
infinity_search = 1
target_page = 10 # 100 pages contain about 1000 reviews
waiting_time = 1 # range of waiting in each time
tolerated_times = 60*10 # number of tolerant times for each page

# Initialize
driver = webdriver.Chrome()
# Crawl Target
driver.implicitly_wait(waiting_time)
driver.get('https://steamcommunity.com/app/990080/reviews/?browsefilter=toprated&snr=1_5_100010_')

# Start Crawling
page_counter = 1
current_tolerated = tolerated_times
while True:

try:
curr_id = 'page' + str(page_counter)
time.sleep(waiting_time)
driver.find_element(By.ID, curr_id)
print(curr_id)
except NoSuchElementException:
# Not find
# - rolling -> to avoid too much rolling - since one window maybe contain several page block
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
# - decrease tolerant
if current_tolerated > 0:
current_tolerated -= 1
continue
games_dict = {
'Hogwarts Legacy': 'https://steamcommunity.com/app/990080/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Forza Horizon 5': 'https://steamcommunity.com/app/1551360/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Cities: Skylines': 'https://steamcommunity.com/app/255710/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Atomic Heart': 'https://steamcommunity.com/app/668580/reviews/?browsefilter=toprated&snr=1_5_100010_',
'It Takes Two': 'https://steamcommunity.com/app/1426210/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Sid Meier’s Civilization VI': 'https://steamcommunity.com/app/289070/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Red Dead Redemption 2': 'https://steamcommunity.com/app/1174180/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Grand Theft Auto V': 'https://steamcommunity.com/app/271590/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Overcooked! 2': 'https://steamcommunity.com/app/728880/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Monster Hunter: World': 'https://steamcommunity.com/app/582010/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Tomb Raider': 'https://steamcommunity.com/app/203160/reviews/?browsefilter=toprated&snr=1_5_100010_',
'Cyberpunk 2077': 'https://steamcommunity.com/app/1091500/reviews/?browsefilter=toprated&snr=1_5_100010_',
'LEGO Star Wars: The Skywalker Saga': 'https://steamcommunity.com/app/920210/reviews/?browsefilter=toprated&snr=1_5_100010_'
}

for games_title in games_dict:

# Initialize
driver = webdriver.Chrome()
# Crawl Target
driver.implicitly_wait(waiting_time)
driver.get(games_dict[games_title])

# Start Crawling
page_counter = 1
current_tolerated = tolerated_times
while True:

try:
curr_id = 'page' + str(page_counter)
time.sleep(waiting_time)
driver.find_element(By.ID, curr_id)
print(curr_id)
except NoSuchElementException:
# Not find
# - rolling -> to avoid too much rolling - since one window maybe contain several page block
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
# - decrease tolerant
if current_tolerated > 0:
current_tolerated -= 1
continue
else:
# Reach the end or Network break
break
else:
# Reach the end or Network break
break
else:
# Find new page, reset tolerated counter
if current_tolerated != tolerated_times:
current_tolerated = tolerated_times
# Page counter + 1
page_counter += 1
if (page_counter == target_page) & (infinity_search == 0):
print('Finish searching.')
break

# Start Parsing
selector = Selector(text=driver.page_source)

steam_helper(selector, 'Hogwarts-Legacy.csv')

driver.quit()
# Find new page, reset tolerated counter
if current_tolerated != tolerated_times:
current_tolerated = tolerated_times
# Page counter + 1
page_counter += 1
if (page_counter == target_page) & (infinity_search == 0):
print('Finish searching.')
break

# Start Parsing
selector = Selector(text=driver.page_source)

steam_helper(selector, '-'.join(games_title.split(' ')) + '.csv')

driver.quit()

14 changes: 14 additions & 0 deletions webtools/viewer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@

import os

import pandas as pd

csv_path = '../csvoutput'

total_length = 0
for file in os.listdir(csv_path):
df = pd.read_csv(os.path.join(csv_path, file), index_col=0, header=0)
total_length += len(df)
print(df)

print('Total length:', total_length)

0 comments on commit e10aa2a

Please sign in to comment.