|
| 1 | +import requests |
| 2 | +import json |
| 3 | +import logging |
| 4 | +import general_config as gconf |
| 5 | +import personal_config as pconf |
| 6 | + |
| 7 | + |
| 8 | +logger = logging.getLogger(gconf.Logs.LOGGER_NAME) |
| 9 | + |
| 10 | + |
| 11 | +def enrich_with_apis(data_dict, items=1, app_id_google=None): |
| 12 | + """ |
| 13 | + Enrich a dict of scraped data with data from APIs |
| 14 | + :param data_dict: A dict containing project save data |
| 15 | + :param items: max number of items to get from the each API that returns lists |
| 16 | + :param app_id_google: google developer key for the API |
| 17 | + :return: None |
| 18 | + """ |
| 19 | + logger.info('Starting enrichment with PAIs') |
| 20 | + enrich_things_with_google_ktree(data_dict["things"], items=items, app_id=app_id_google) |
| 21 | + |
| 22 | + """things = data_dict["things"] |
| 23 | + for item in things: |
| 24 | + thing = things[item] |
| 25 | + thing_api_data = thing.get('ktree_data', None) |
| 26 | + if thing_api_data is not None: |
| 27 | + print(f"printing for {thing[gconf.ThingSettings.Elements.MODEL_NAME]}:") |
| 28 | + for sub_dict in thing_api_data: |
| 29 | + print('\t' + '*' * 25) |
| 30 | + for key in sub_dict: |
| 31 | + print(f"\t{key}: {sub_dict[key]}")""" |
| 32 | + logger.info('Done enriching with APIs') |
| 33 | + |
| 34 | + |
| 35 | +def enrich_things_with_google_ktree(things_dict, items=1, app_id=None): |
| 36 | + """ |
| 37 | + Enrich a dict of things with data from google knowledge tree |
| 38 | + :param things_dict: a dict of things, the keys are ids, and the values are dicts representing the thing. |
| 39 | + :param items: max number of items to get from the knowledge tree |
| 40 | + :param app_id: google developer key for the API |
| 41 | + :return: None |
| 42 | + """ |
| 43 | + logger.info("Using google's knowledge tree API") |
| 44 | + for thing_id in things_dict: |
| 45 | + thing = things_dict[thing_id] |
| 46 | + thing_name = thing[gconf.ThingSettings.Elements.MODEL_NAME] |
| 47 | + ex_data = query_google_ktree(thing_name, items, app_id=app_id) |
| 48 | + if ex_data is not None: |
| 49 | + if len(ex_data) > 0: |
| 50 | + ex_data = parse_data_from_ktree_list(ex_data) |
| 51 | + thing['ktree_data'] = ex_data |
| 52 | + logger.info("Done using google's knowledge tree API") |
| 53 | + |
| 54 | + |
| 55 | +def query_google_ktree(thing, nitems=1, lan='en', app_id=None): |
| 56 | + """ |
| 57 | + Look for item in google knowledge tree, and pass results in a list with minimal |
| 58 | + processing |
| 59 | + :param thing: search query |
| 60 | + :param nitems: max amount of results to deliver |
| 61 | + :param lan: language of results |
| 62 | + :param app_id: google developer key for the API. Can also be provided in personal config file (pass as None) |
| 63 | + :return: A list of results from google knowledge tree |
| 64 | + """ |
| 65 | + if app_id is None: |
| 66 | + app_id = pconf.google_ktree_API_key |
| 67 | + q = gconf.google_ktree.api_address + f'query={thing}' \ |
| 68 | + f'&key={app_id}' \ |
| 69 | + f'&limit={nitems}' \ |
| 70 | + f'&indent=True' \ |
| 71 | + f'&types=Thing' \ |
| 72 | + f'&languages={lan}' |
| 73 | + response = requests.get(q) |
| 74 | + if response.status_code == 200: |
| 75 | + data = json.loads(response.text) |
| 76 | + data = data.get(gconf.google_ktree.main_list_identifier, None) |
| 77 | + else: |
| 78 | + logger.error(f'bad google knowledge tree response: {response.status_code}') |
| 79 | + data = None |
| 80 | + return data |
| 81 | + |
| 82 | + |
| 83 | +def parse_data_from_ktree_list(ktree_data): |
| 84 | + """ |
| 85 | + Get a list of results from google knowledge tree, and process it, keep only useful info in a convenient format |
| 86 | + :param ktree_data: a list of results from google knowledge tree |
| 87 | + :return: ktree_data parsed |
| 88 | + """ |
| 89 | + parsed_data = [] |
| 90 | + for res in ktree_data: |
| 91 | + res = parse_item_from_ktree_list(res) |
| 92 | + parsed_data.append(res) |
| 93 | + return parsed_data |
| 94 | + |
| 95 | + |
| 96 | +def parse_item_from_ktree_list(ktree_item): |
| 97 | + """ |
| 98 | + get one search result from google knowledge tree and parse it |
| 99 | + :param ktree_item: one search result in the form of a dict |
| 100 | + :return: parsed dict |
| 101 | + """ |
| 102 | + if ktree_item['@type'] == 'EntitySearchResult': |
| 103 | + score = ktree_item['resultScore'] |
| 104 | + ktree_item = parse_item_from_ktree_list(ktree_item['result']) |
| 105 | + ktree_item['resultScore'] = score |
| 106 | + else: |
| 107 | + if "@id" in ktree_item: |
| 108 | + ktree_item["id"] = ktree_item.pop("@id") |
| 109 | + if "@type" in ktree_item: |
| 110 | + ktree_item["type"] = ktree_item.pop("@type") |
| 111 | + if "detailedDescription" in ktree_item: |
| 112 | + ktree_item.update(ktree_item["detailedDescription"]) |
| 113 | + del ktree_item["detailedDescription"] |
| 114 | + return ktree_item |
0 commit comments