-
Notifications
You must be signed in to change notification settings - Fork 0
/
Amazon_Review.py
50 lines (39 loc) · 1.72 KB
/
Amazon_Review.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
import pandas as pd
def scrape_all_reviews(url):
options = webdriver.ChromeOptions()
options.add_argument("--headless") # Run Chrome in headless mode (no GUI)
driver = webdriver.Chrome(options=options)
all_reviews = []
page_number = 1
try:
while True:
page_url = f"{url}?pageNumber={page_number}"
driver.get(page_url)
time.sleep(5) # Add a delay to allow dynamic loading (adjust as needed)
soup = BeautifulSoup(driver.page_source, "html.parser")
review_elements = soup.find_all("div", class_="a-section review aok-relative")
if not review_elements:
break
for element in review_elements:
review_text = element.find("span", class_="a-size-base review-text review-text-content").text.strip()
rating_element = element.find("span", class_="a-icon-alt") or element.find("i", class_="review-rating")
rating = rating_element.text.strip() if rating_element else "N/A"
date = element.find("span", class_="a-size-base a-color-secondary review-date").text.strip()
review_data = {
"text": review_text,
"rating": rating,
"date": date
}
all_reviews.append(review_data)
page_number += 1
finally:
driver.quit()
return all_reviews
url = "" # Enter the products you want to scrape (Example: https://www.amazon.com/product-reviews/B08KGLMB1L)
all_reviews = scrape_all_reviews(url)
df = pd.DataFrame(all_reviews)
df