1
1
import mechanize
2
- from bs4 import BeautifulSoup
3
2
import urllib2
3
+ from bs4 import BeautifulSoup
4
+
4
5
# Create a Browser
5
6
b = mechanize.Browser()
6
7
@@ -16,38 +17,37 @@ b.open('http://www.imdb.com/search/title')
16
17
# Choose a form
17
18
b.select_form(nr=1)
18
19
19
-
20
20
b['title'] = nm
21
21
22
22
b.find_control(type="checkbox",nr=0).get("feature").selected = True
23
23
24
24
25
25
# Submit
26
- fd = b.submit()
27
-
26
+ fd = b.submit()
28
27
soup = BeautifulSoup(fd.read(),'html5lib')
29
28
30
29
#data= soup.find_all('td',class_="title")
31
30
#for div in data:
32
31
# links= div.find_all('a')
33
- # for a in links:
34
- # print a['href'];
32
+ # for a in links:
33
+ # print a['href'];
35
34
36
35
37
36
for div in soup.findAll('td', {'class': 'title'},limit=1):
38
37
a = div.findAll('a')[0]
39
- print a.text.strip(), '=>', a.attrs['href']
40
- hht= 'http://www.imdb.com'+a.attrs['href']
38
+ print ( a.text.strip(), '=>', a.attrs['href'])
39
+ hht = 'http://www.imdb.com'+a.attrs['href']
41
40
print(hht)
42
- page= urllib2.urlopen(hht)
41
+ page = urllib2.urlopen(hht)
43
42
soup2 = BeautifulSoup(page.read(),'html.parser')
44
- print("title of the movie: ")
43
+
44
+ print( "title of the movie: ")
45
45
print(soup2.find(itemprop="name").get_text())
46
- print("timerun: ")
46
+ print( "timerun: ")
47
47
print(soup2.find(itemprop="duration").get_text())
48
- print("genre: ")
48
+ print( "genre: ")
49
49
print(soup2.find(itemprop="genre").get_text())
50
50
print("current IMDB rating:")
51
51
print(soup2.find(itemprop="ratingValue").get_text())
52
- print("summary:")
52
+ print( "summary:")
53
53
print(soup2.find(itemprop="description").get_text())
0 commit comments