|
| 1 | +# Download the historical compressed flat files (with all entities or just companies) |
| 2 | +# they are decompressed and combined in a single csv file per year |
| 3 | + |
| 4 | +import os |
| 5 | +import zipfile |
| 6 | + |
| 7 | +import requests |
| 8 | + |
| 9 | +from ravenpackapi import RPApi |
| 10 | +from ravenpackapi.util import parse_csv_line |
| 11 | + |
| 12 | +api_key = os.environ['RP_API_KEY'] # set your API KEY here |
| 13 | +api = RPApi(api_key) |
| 14 | + |
| 15 | +flat_type = 'companies' # can be 'companies' or 'full' |
| 16 | +full_list_api_url = 'https://app.ravenpack.com/downloads/history-list/%s' % flat_type |
| 17 | +response = requests.get( |
| 18 | + full_list_api_url, |
| 19 | + params=dict(token=api_key) |
| 20 | +) |
| 21 | +response.raise_for_status() |
| 22 | +for flat_file in response.json(): |
| 23 | + local_filename = flat_file['name'] |
| 24 | + output_filename = '%s.combined.csv' % local_filename |
| 25 | + if not os.path.isfile(output_filename): |
| 26 | + with open(output_filename, 'wb') as output: |
| 27 | + headers_written = False |
| 28 | + with requests.get( |
| 29 | + 'https://app.ravenpack.com/history/getfile', |
| 30 | + dict(token=api_key, id=flat_file['id'], type=flat_type), |
| 31 | + stream=True, |
| 32 | + ) as flatzip: |
| 33 | + flatzip.raise_for_status() |
| 34 | + if not os.path.isfile(local_filename): |
| 35 | + print("Downloading", local_filename, flat_file['size']) |
| 36 | + with open(local_filename, 'wb') as f: |
| 37 | + for chunk in flatzip.iter_content(chunk_size=8192): |
| 38 | + f.write(chunk) |
| 39 | + with zipfile.ZipFile(local_filename) as zipped: |
| 40 | + for fileinfo in zipped.namelist(): |
| 41 | + print(fileinfo) |
| 42 | + with zipped.open(fileinfo) as csv: |
| 43 | + header_line = next(csv) |
| 44 | + headers = parse_csv_line(header_line) |
| 45 | + if not headers_written: |
| 46 | + output.write(header_line) |
| 47 | + headers_written = True |
| 48 | + for line in csv: |
| 49 | + row = parse_csv_line(line) |
| 50 | + output.write(line) |
0 commit comments