erikriver · erikriver · Nov 23, 2011 · Nov 22, 2011
diff --git a/opengraph/opengraph.py b/opengraph/opengraph.py
@@ -14,7 +14,16 @@
 class OpenGraph(dict):
     """
     """
-    def __init__(self, url=None, html=None, **kwargs):
+
+    required_attrs = ['title', 'type', 'image', 'url']
+    scrape = False
+
+    def __init__(self, url=None, html=None, scrape=False, **kwargs):
+        # If scrape == True, then will try to fetch missing attribtues
+        # from the page's body
+        self.scrape = scrape
+        self._url = url
+
         for k in kwargs.keys():
             self[k] = kwargs[k]
 
@@ -49,14 +58,18 @@ def parser(self, html):
         ogs = doc.html.head.findAll(property=re.compile(r'^og'))
         for og in ogs:
             self[og[u'property'][3:]]=og[u'content']
+
+        # Couldn't fetch all attrs from og tags, try scraping body
+        if not self.is_valid() and self.scrape:
+            for attr in self.required_attrs:
+                if not hasattr(self, attr):
+                    try:
+                        self[attr] = getattr(self, 'scrape_%s' % attr)(doc)
+                    except AttributeError:
+                        pass
 
     def is_valid(self):
-        if hasattr(self,'title')  and  \
-            hasattr(self,'type')  and  \
-            hasattr(self,'image') and  \
-            hasattr(self,'url'):
-
-            return True
+        return all([hasattr(self, attr) for attr in self.required_attrs])
 
     def to_html(self):
         if not self.is_valid():
@@ -82,3 +95,21 @@ def to_json(self):
 
     def to_xml(self):
         pass
+
+    def scrape_image(self, doc):
+        images = [dict(img.attrs)['src'] 
+            for img in doc.html.body.findAll('img')]
+
+        if images:
+            return images[0]
+
+        return u''
+
+    def scrape_title(self, doc):
+        return doc.html.head.title.text
+
+    def scrape_type(self, doc):
+        return 'other'
+
+    def scrape_url(self, doc):
+        return self._url