|
| 1 | +"""Tool for matching system numbers with their uuids in digital library.""" |
| 2 | +from modules import config, pull, reports |
| 3 | +import csv |
| 4 | + |
| 5 | + |
| 6 | +def process_data(configuration, counter): |
| 7 | + """Read csv and try to match.""" |
| 8 | + with open('./data/kramerius_in_856_latest.csv') as csvfile: |
| 9 | + reader = csv.reader(csvfile, delimiter=',') |
| 10 | + uuidlist = list() |
| 11 | + results = dict() |
| 12 | + results['ok'] = dict() |
| 13 | + results['unresolved'] = dict() |
| 14 | + |
| 15 | + for row in reader: |
| 16 | + uuid = row[1] |
| 17 | + uuidlist.append(uuid) |
| 18 | + |
| 19 | + oai_target = pull.Site(configuration['TARGETS']['OAI'], |
| 20 | + user=None, passw=None) |
| 21 | + fedora_target = pull.Site(location=configuration['TARGETS']['FEDORA'], |
| 22 | + user=configuration['AUTH']['FEDORA_USER'], |
| 23 | + passw=configuration['AUTH']['FEDORA_PASS']) |
| 24 | + index = pull.oai_index(oai_target.location) |
| 25 | + |
| 26 | + for doc in index['response']["docs"]: |
| 27 | + if doc['PID'] not in uuidlist: |
| 28 | + print('Lookup system number for ' + doc['PID']) |
| 29 | + sysno = pull.fedora_record_identif(fedora_target.location, |
| 30 | + fedora_target.user, |
| 31 | + fedora_target.passw, |
| 32 | + uuid=doc['PID']) |
| 33 | + if sysno is None: |
| 34 | + counter.add('unresolved') |
| 35 | + results['unresolved'][doc['PID']] = None |
| 36 | + else: |
| 37 | + counter.add('resolved') |
| 38 | + results['ok'][doc['PID']] = sysno |
| 39 | + |
| 40 | + counter.add('total') |
| 41 | + |
| 42 | + counter.report() |
| 43 | + return results |
| 44 | + |
| 45 | + |
| 46 | +def write_outfile(results): |
| 47 | + """Write output.""" |
| 48 | + with open('./data/856_kramerius_export.txt', 'a') as outfile: |
| 49 | + # from dict with uuid keys and sysno values, generate file with lines |
| 50 | + for key, val in results['ok'].items(): |
| 51 | + line1 = str(val) + ' 85640 L $$uhttps://kramerius.techlib.cz/search/handle/' + key + '$$yDigitalizovany dokument\n' |
| 52 | + line2 = str(val) + ' BAS L di\n' |
| 53 | + outfile.write(line1) |
| 54 | + outfile.write(line2) |
| 55 | + for key, val in results['unresolved'].items(): |
| 56 | + line1 = 'SYSNO' + ' 85640 L $$uhttps://kramerius.techlib.cz/search/handle/' + key + '$$yDigitalizovany dokument\n' |
| 57 | + line2 = 'SYSNO' + ' BAS L di\n' |
| 58 | + outfile.write(line1) |
| 59 | + outfile.write(line2) |
| 60 | + |
| 61 | + |
| 62 | +if __name__ == '__main__': |
| 63 | + configuration = config.load_config("./configuration/config.json") |
| 64 | + missing_counter = reports.Counter() |
| 65 | + results = process_data(configuration, missing_counter) |
| 66 | + write_outfile(results) |
0 commit comments