@@ -3,51 +3,46 @@ import urllib2
3
3
from bs4 import BeautifulSoup
4
4
5
5
# Create a Browser
6
- b = mechanize.Browser()
6
+ browser = mechanize.Browser()
7
7
8
8
# Disable loading robots.txt
9
- b .set_handle_robots(False)
9
+ browser .set_handle_robots(False)
10
10
11
- b .addheaders = [('User-agent',
11
+ browser .addheaders = [('User-agent',
12
12
'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98;)')]
13
- nm=raw_input("enter title ")
13
+
14
+ movie_title = raw_input("Enter movie title: ")
15
+
16
+ movie_types = ['feature', 'tv_movie', 'tv_series', 'tv_episode', 'tv_special', 'mini_series', 'documentary', 'game', 'short', 'video']
17
+
14
18
# Navigate
15
- b .open('http://www.imdb.com/search/title')
19
+ browser .open('http://www.imdb.com/search/title')
16
20
17
21
# Choose a form
18
- b .select_form(nr=1)
22
+ browser .select_form(nr=1)
19
23
20
- b['title'] = nm
21
-
22
- b.find_control(type="checkbox",nr=0).get("feature").selected = True
24
+ browser['title'] = movie_title
23
25
26
+ # Check all the boxes of movie types
27
+ for type in movie_types:
28
+ browser.find_control(type='checkbox',nr=0).get(type).selected = True
24
29
25
30
# Submit
26
- fd = b .submit()
31
+ fd = browser .submit()
27
32
soup = BeautifulSoup(fd.read(),'html5lib')
28
33
29
- #data= soup.find_all('td',class_="title")
30
- #for div in data:
31
- # links= div.find_all('a')
32
- # for a in links:
33
- # print a['href'];
34
-
35
34
36
- for div in soup.findAll('td', {'class': 'title'},limit=1):
35
+ # Updated from td tag to h3 tag
36
+ for div in soup.findAll('h3', {'class': 'lister-item-header'}, limit=1):
37
37
a = div.findAll('a')[0]
38
- print (a.text.strip(), '=>', a.attrs['href'])
39
38
hht = 'http://www.imdb.com'+a.attrs['href']
40
39
print(hht)
41
40
page = urllib2.urlopen(hht)
42
41
soup2 = BeautifulSoup(page.read(),'html.parser')
42
+ find = soup2.find
43
43
44
- print( "title of the movie: ")
45
- print(soup2.find(itemprop="name").get_text())
46
- print( "timerun: ")
47
- print(soup2.find(itemprop="duration").get_text())
48
- print( "genre: ")
49
- print(soup2.find(itemprop="genre").get_text())
50
- print("current IMDB rating:")
51
- print(soup2.find(itemprop="ratingValue").get_text())
52
- print( "summary:")
53
- print(soup2.find(itemprop="description").get_text())
44
+ print("title of the movie: " + find(itemprop='name').get_text().strip())
45
+ print("timerun: " + find(itemprop='duration').get_text().strip())
46
+ print("genre: " + find(itemprop='genre').get_text().strip())
47
+ print("IMDB rating: " + find(itemprop='ratingValue').get_text().strip())
48
+ print("summary: " + find(itemprop='description').get_text().strip())
0 commit comments