Skip to content

Commit 90fd826

Browse files
committed
implemented thing enrichment from google's Knowledge Graph Search API
1 parent e3c4e83 commit 90fd826

File tree

4 files changed

+184
-2
lines changed

4 files changed

+184
-2
lines changed

APIs.py

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import requests
2+
import json
3+
import logging
4+
import general_config as gconf
5+
import personal_config as pconf
6+
7+
8+
logger = logging.getLogger(gconf.Logs.LOGGER_NAME)
9+
10+
11+
def enrich_with_apis(data_dict, items=1, app_id_google=None):
12+
"""
13+
Enrich a dict of scraped data with data from APIs
14+
:param data_dict: A dict containing project save data
15+
:param items: max number of items to get from the each API that returns lists
16+
:param app_id_google: google developer key for the API
17+
:return: None
18+
"""
19+
logger.info('Starting enrichment with PAIs')
20+
enrich_things_with_google_ktree(data_dict["things"], items=items, app_id=app_id_google)
21+
22+
"""things = data_dict["things"]
23+
for item in things:
24+
thing = things[item]
25+
thing_api_data = thing.get('ktree_data', None)
26+
if thing_api_data is not None:
27+
print(f"printing for {thing[gconf.ThingSettings.Elements.MODEL_NAME]}:")
28+
for sub_dict in thing_api_data:
29+
print('\t' + '*' * 25)
30+
for key in sub_dict:
31+
print(f"\t{key}: {sub_dict[key]}")"""
32+
logger.info('Done enriching with APIs')
33+
34+
35+
def enrich_things_with_google_ktree(things_dict, items=1, app_id=None):
36+
"""
37+
Enrich a dict of things with data from google knowledge tree
38+
:param things_dict: a dict of things, the keys are ids, and the values are dicts representing the thing.
39+
:param items: max number of items to get from the knowledge tree
40+
:param app_id: google developer key for the API
41+
:return: None
42+
"""
43+
logger.info("Using google's knowledge tree API")
44+
for thing_id in things_dict:
45+
thing = things_dict[thing_id]
46+
thing_name = thing[gconf.ThingSettings.Elements.MODEL_NAME]
47+
ex_data = query_google_ktree(thing_name, items, app_id=app_id)
48+
if ex_data is not None:
49+
if len(ex_data) > 0:
50+
ex_data = parse_data_from_ktree_list(ex_data)
51+
thing['ktree_data'] = ex_data
52+
logger.info("Done using google's knowledge tree API")
53+
54+
55+
def query_google_ktree(thing, nitems=1, lan='en', app_id=None):
56+
"""
57+
Look for item in google knowledge tree, and pass results in a list with minimal
58+
processing
59+
:param thing: search query
60+
:param nitems: max amount of results to deliver
61+
:param lan: language of results
62+
:param app_id: google developer key for the API. Can also be provided in personal config file (pass as None)
63+
:return: A list of results from google knowledge tree
64+
"""
65+
if app_id is None:
66+
app_id = pconf.google_ktree_API_key
67+
q = gconf.google_ktree.api_address + f'query={thing}' \
68+
f'&key={app_id}' \
69+
f'&limit={nitems}' \
70+
f'&indent=True' \
71+
f'&types=Thing' \
72+
f'&languages={lan}'
73+
response = requests.get(q)
74+
if response.status_code == 200:
75+
data = json.loads(response.text)
76+
data = data.get(gconf.google_ktree.main_list_identifier, None)
77+
else:
78+
logger.error(f'bad google knowledge tree response: {response.status_code}')
79+
data = None
80+
return data
81+
82+
83+
def parse_data_from_ktree_list(ktree_data):
84+
"""
85+
Get a list of results from google knowledge tree, and process it, keep only useful info in a convenient format
86+
:param ktree_data: a list of results from google knowledge tree
87+
:return: ktree_data parsed
88+
"""
89+
parsed_data = []
90+
for res in ktree_data:
91+
res = parse_item_from_ktree_list(res)
92+
parsed_data.append(res)
93+
return parsed_data
94+
95+
96+
def parse_item_from_ktree_list(ktree_item):
97+
"""
98+
get one search result from google knowledge tree and parse it
99+
:param ktree_item: one search result in the form of a dict
100+
:return: parsed dict
101+
"""
102+
if ktree_item['@type'] == 'EntitySearchResult':
103+
score = ktree_item['resultScore']
104+
ktree_item = parse_item_from_ktree_list(ktree_item['result'])
105+
ktree_item['resultScore'] = score
106+
else:
107+
if "@id" in ktree_item:
108+
ktree_item["id"] = ktree_item.pop("@id")
109+
if "@type" in ktree_item:
110+
ktree_item["type"] = ktree_item.pop("@type")
111+
if "detailedDescription" in ktree_item:
112+
ktree_item.update(ktree_item["detailedDescription"])
113+
del ktree_item["detailedDescription"]
114+
return ktree_item

cli.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ def cli_set_arguments():
3131
'20 = normal, '
3232
'30 = debug, '
3333
'40 = verbose')
34+
parser.add_argument('--google-app-name', help='google developer code used to access google APIs',
35+
type=str, default=pconf.google_ktree_API_key)
3436

3537
gr_volume = parser.add_mutually_exclusive_group()
3638
# volume of CLI output
@@ -39,7 +41,7 @@ def cli_set_arguments():
3941

4042
gr_data = parser.add_mutually_exclusive_group()
4143
# where to load data from at the start of the run
42-
gr_data.add_argument('-j', '--load-json', help='Saves as json', action='store_true')
44+
gr_data.add_argument('-j', '--load-json', help='loads a json save file', action='store_true')
4345
gr_data.add_argument('-d', '--load-db', help='(el) Loads json save', action='store_true')
4446

4547
gr_db = parser.add_mutually_exclusive_group()
@@ -85,4 +87,5 @@ def inter_parser(args=None, parser=None):
8587
inp['do_save_json'] = vars(args).get("save_json", False)
8688
inp['Interactive'] = vars(args).get("Interactive", False)
8789
inp['preliminary_count'] = vars(args).get("pre_search", 0) if inp['search_type'] != 'thing' else 0
90+
inp['google_app_id'] = vars(args)['google_app_name']
8891
return inp

main.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -316,6 +316,16 @@ def scrape_remixes_in_db(settings, db):
316316
return db, failed
317317

318318

319+
def enrich_with_apis(inp, data):
320+
"""
321+
322+
:param inp:
323+
:param data:
324+
:return:
325+
"""
326+
APIs.enrich_with_apis(data, inp['num_runs'], inp['google_app_id'])
327+
328+
319329
def follow_cli(inp, data=None):
320330
"""
321331
Follow instructions from CLI
@@ -340,11 +350,14 @@ def follow_cli(inp, data=None):
340350
data, fail = scrape_make_in_db(inp, data)
341351
elif search_type == 'remix':
342352
data, fail = scrape_remixes_in_db(inp, data)
353+
elif search_type == 'apis':
354+
enrich_with_apis(inp, data)
343355
elif search_type == 'all':
344356
data, fail = scrape_main_page(settings=inp, data=data)
345357
data, fail = scrape_remixes_in_db(inp, data)
346358
data, fail = scrape_make_in_db(inp, data)
347359
data, fail = scrape_users_in_db(inp, data)
360+
enrich_with_apis(inp, data)
348361
else:
349362
logger.warning(f"{search_type} scraping not implemented yet")
350363

@@ -364,7 +377,6 @@ def log_file_gen():
364377
os.mkdir(gconf.Logs.LOG_DIR)
365378
# generate saving path for log file
366379
saving_path = os.path.join(gconf.Logs.LOG_DIR, gconf.Logs.NAME_LOG + '.log')
367-
368380
return os.path.abspath(saving_path)
369381

370382

test_APIs.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
import APIs
2+
import logging
3+
import general_config as gconf
4+
5+
6+
logger = logging.getLogger(gconf.Logs.LOGGER_NAME)
7+
8+
9+
sample_data_template = {
10+
"things": dict(),
11+
"users": dict(),
12+
"makes": dict()
13+
}
14+
sample_data = sample_data_template.copy()
15+
sample_data['things'] = {"4760325": {"model_name": "Uno Box Holder"},
16+
"4760326": {"model_name": "Pocket hole jig"},
17+
"4760327": {"model_name": "brick"},
18+
"4760328": {"model_name": "Bricks"},
19+
"4760329": {"model_name": "maya"},
20+
"4760330": {"model_name": "Finally done with this test :)"}}
21+
22+
23+
def test_enrich_things_with_google_ktree_sample_run():
24+
data = sample_data["things"].copy()
25+
print(len(data))
26+
27+
28+
def test_enrich_things_with_google_ktree_len_conservation():
29+
data = sample_data["things"].copy()
30+
L = len(data)
31+
APIs.enrich_things_with_google_ktree(data)
32+
L_new = len(data)
33+
assert L == L_new
34+
35+
36+
def test_query_google_ktree_res_type():
37+
data = APIs.query_google_ktree('maya', 5)
38+
assert type(data) == list
39+
40+
41+
def test_query_google_ktree_res_len():
42+
data = APIs.query_google_ktree('maya', 5)
43+
assert len(data) <= 5
44+
45+
46+
def test_query_google_ktree_has_dicts():
47+
data = APIs.query_google_ktree('maya', 5)
48+
for sub in data:
49+
assert type(sub) == dict
50+
51+
52+
def test_parse_data_from_ktree_list():
53+
pass

0 commit comments

Comments
 (0)