Skip to content

Commit c3f4391

Browse files
author
Aaron
committed
Updated movie_details
Added a list of movie types
1 parent 24d4938 commit c3f4391

File tree

1 file changed

+23
-28
lines changed

1 file changed

+23
-28
lines changed

movie_details

+23-28
Original file line numberDiff line numberDiff line change
@@ -3,51 +3,46 @@ import urllib2
33
from bs4 import BeautifulSoup
44

55
# Create a Browser
6-
b = mechanize.Browser()
6+
browser = mechanize.Browser()
77

88
# Disable loading robots.txt
9-
b.set_handle_robots(False)
9+
browser.set_handle_robots(False)
1010

11-
b.addheaders = [('User-agent',
11+
browser.addheaders = [('User-agent',
1212
'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98;)')]
13-
nm=raw_input("enter title ")
13+
14+
movie_title = raw_input("Enter movie title: ")
15+
16+
movie_types = ['feature', 'tv_movie', 'tv_series', 'tv_episode', 'tv_special', 'mini_series', 'documentary', 'game', 'short', 'video']
17+
1418
# Navigate
15-
b.open('http://www.imdb.com/search/title')
19+
browser.open('http://www.imdb.com/search/title')
1620

1721
# Choose a form
18-
b.select_form(nr=1)
22+
browser.select_form(nr=1)
1923

20-
b['title'] = nm
21-
22-
b.find_control(type="checkbox",nr=0).get("feature").selected = True
24+
browser['title'] = movie_title
2325

26+
# Check all the boxes of movie types
27+
for type in movie_types:
28+
browser.find_control(type='checkbox',nr=0).get(type).selected = True
2429

2530
# Submit
26-
fd = b.submit()
31+
fd = browser.submit()
2732
soup = BeautifulSoup(fd.read(),'html5lib')
2833

29-
#data= soup.find_all('td',class_="title")
30-
#for div in data:
31-
# links= div.find_all('a')
32-
# for a in links:
33-
# print a['href'];
34-
3534

36-
for div in soup.findAll('td', {'class': 'title'},limit=1):
35+
# Updated from td tag to h3 tag
36+
for div in soup.findAll('h3', {'class': 'lister-item-header'}, limit=1):
3737
a = div.findAll('a')[0]
38-
print (a.text.strip(), '=>', a.attrs['href'])
3938
hht = 'http://www.imdb.com'+a.attrs['href']
4039
print(hht)
4140
page = urllib2.urlopen(hht)
4241
soup2 = BeautifulSoup(page.read(),'html.parser')
42+
find = soup2.find
4343

44-
print( "title of the movie: ")
45-
print(soup2.find(itemprop="name").get_text())
46-
print( "timerun: ")
47-
print(soup2.find(itemprop="duration").get_text())
48-
print( "genre: ")
49-
print(soup2.find(itemprop="genre").get_text())
50-
print("current IMDB rating:")
51-
print(soup2.find(itemprop="ratingValue").get_text())
52-
print( "summary:")
53-
print(soup2.find(itemprop="description").get_text())
44+
print("title of the movie: " + find(itemprop='name').get_text().strip())
45+
print("timerun: " + find(itemprop='duration').get_text().strip())
46+
print("genre: " + find(itemprop='genre').get_text().strip())
47+
print("IMDB rating: " + find(itemprop='ratingValue').get_text().strip())
48+
print("summary: " + find(itemprop='description').get_text().strip())

0 commit comments

Comments
 (0)