Updated movie_details

Aaron · Aaron · commit c3f43912bdcf · 2016-11-16T20:37:34.000+11:00
Added a list of movie types
diff --git a/movie_details b/movie_details
@@ -3,51 +3,46 @@ import urllib2
 from bs4 import BeautifulSoup 
 
 # Create a Browser
-b = mechanize.Browser()
+browser = mechanize.Browser()
 
 # Disable loading robots.txt
-b.set_handle_robots(False)
+browser.set_handle_robots(False)
 
-b.addheaders = [('User-agent',
+browser.addheaders = [('User-agent',
                  'Mozilla/4.0 (compatible; MSIE 5.0; Windows 98;)')]
-nm=raw_input("enter title ")
+
+movie_title = raw_input("Enter movie title: ")
+
+movie_types = ['feature', 'tv_movie', 'tv_series', 'tv_episode', 'tv_special', 'mini_series', 'documentary', 'game', 'short', 'video']
+
 # Navigate
-b.open('http://www.imdb.com/search/title')
+browser.open('http://www.imdb.com/search/title')
 
 # Choose a form
-b.select_form(nr=1)
+browser.select_form(nr=1)
 
-b['title'] = nm
-
-b.find_control(type="checkbox",nr=0).get("feature").selected = True
+browser['title'] = movie_title
 
+# Check all the boxes of movie types
+for type in movie_types:
+    browser.find_control(type='checkbox',nr=0).get(type).selected = True
 
 # Submit
-fd   = b.submit()
+fd   = browser.submit()
 soup = BeautifulSoup(fd.read(),'html5lib')
 
-#data= soup.find_all('td',class_="title")
-#for div in data:
-#  links= div.find_all('a')
-#   for a in links:
-#        print a['href'];
-
 
-for div in soup.findAll('td', {'class': 'title'},limit=1):
+# Updated from td tag to h3 tag
+for div in soup.findAll('h3', {'class': 'lister-item-header'}, limit=1):
     a = div.findAll('a')[0]
-    print (a.text.strip(), '=>', a.attrs['href'])
     hht = 'http://www.imdb.com'+a.attrs['href']
     print(hht)
     page  = urllib2.urlopen(hht)
     soup2 = BeautifulSoup(page.read(),'html.parser')
+    find = soup2.find
     
-    print( "title of the movie: ")
-    print(soup2.find(itemprop="name").get_text())
-    print(            "timerun: ")
-    print(soup2.find(itemprop="duration").get_text())
-    print(              "genre: ")
-    print(soup2.find(itemprop="genre").get_text())
-    print("current IMDB rating:")
-    print(soup2.find(itemprop="ratingValue").get_text())
-    print(            "summary:")
-    print(soup2.find(itemprop="description").get_text())
+    print("title of the movie: " + find(itemprop='name').get_text().strip())
+    print("timerun: " + find(itemprop='duration').get_text().strip())
+    print("genre: " + find(itemprop='genre').get_text().strip())
+    print("IMDB rating: " + find(itemprop='ratingValue').get_text().strip())
+    print("summary: " + find(itemprop='description').get_text().strip())