Skip to content
This repository was archived by the owner on Nov 2, 2023. It is now read-only.

Commit 751dc9b

Browse files
committed
stores metadata into on information retrieval
1 parent 727b6b5 commit 751dc9b

File tree

1 file changed

+10
-7
lines changed

1 file changed

+10
-7
lines changed

wekeypedia/wikipedia/page.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def url2lang(url):
5858
"""
5959
lang = url.split("/", 3)[2]
6060
lang = lang.split(".")[0]
61-
61+
6262
return lang
6363

6464
class WikipediaPage(object):
@@ -73,6 +73,7 @@ def __init__(self, title=None, lang="en"):
7373
self.query = None
7474
self.page = None
7575
self.problem = None
76+
self.data = {}
7677

7778
self.content = ""
7879

@@ -105,6 +106,8 @@ def fetch_info(self, title, opt_params={ "prop": "info", "inprop": "url" }, lang
105106
self.lang = lang
106107
self.url = pages[ self.page_id ]["fullurl"]
107108

109+
self.data.update(pages[ self.page_id ])
110+
108111
# print r.url
109112
# print r.text
110113

@@ -244,7 +247,7 @@ def extract_diff_text(self, response):
244247
r = response
245248

246249
content = r["query"]["pages"][list(r["query"]["pages"].keys())[0]]
247-
250+
248251
if "diff" in content["revisions"][0]:
249252
content = content["revisions"][0]["diff"]["*"]
250253
else:
@@ -357,7 +360,7 @@ def get_revisions(self, extra_params={}):
357360
page = pages[ list(pages.keys())[0] ]
358361

359362
revisions += page["revisions"]
360-
363+
361364
if "continue" in r:
362365
params.update(r["continue"])
363366
else:
@@ -540,8 +543,8 @@ def extract_plusminus(self, diff_html):
540543

541544
for what in [ ["added", "ins"], ["deleted", "del"] ]:
542545
a = []
543-
544-
# checking block
546+
547+
# checking block
545548
# we also check this is not only context showing for non-substition edits
546549
a = [ t.find("td", "diff-%sline" % (what[0])) for t in tr if len(t.find_all(what[1])) == 0 and len(t.find_all("td", "diff-empty")) > 0 ]
547550

@@ -608,6 +611,6 @@ def print_plusminus_overview(self, diff):
608611
for plus in diff["added"]:
609612
print "+ %s" % (plus)
610613

611-
614+
612615
def print_plusminus_terms_overview(self, stems):
613-
print "\n%s|%s\n" % ("+"*len(stems["added"].items()), "-"*len(stems["deleted"].items()))
616+
print "\n%s|%s\n" % ("+"*len(stems["added"].items()), "-"*len(stems["deleted"].items()))

0 commit comments

Comments
 (0)