API Databank – Downloading Entire Databanks

In most circumstances, narrowly-defined selections are the best and quickest method to download data with the API. However, in some cases it may be necessary to fetch everything all at once. The way to do this is nearly identical to any other download but for the following two conditions:

The Regions and Variables fields in the selection dictionary are assigned empty lists (see example below).
There is a hard upper limit on the number of rows of data that can be downloaded in a given page. In fact, if employing the method described in this section, we recommend using a page size of 5000 – 20000 rows.

A simple example Python implementation using a custom selection:

#
# Copyright (c) 2022 Oxford Economics Ltd. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the
# project root for full license information.
#

import json
import requests

API_KEY = # insert api key
BASE_URL = ‘https://services.oxfordeconomics.com’

sample_selection = {
    ‘DatabankCode’: ‘WDMacro’,
    ‘Frequency’: ‘Annual’,
    ‘GroupingMode’: ‘false’,
    ‘IndicatorSortOrder’: ‘AlphabeticalOrder’,
    ‘IsTemporarySelection’: ‘true’,
    ‘ListingType’: ‘Private’,
    ‘LocationSortOrder’: ‘AlphabeticalOrder’,
    ‘Order’: ‘IndicatorLocation’,
    ‘Sequence’: ‘EarliestToLatest’,
    ‘StackedQuarters’: ‘false’,
    ‘StartYear’: 1980,
    ‘EndYear’: 2045,
    # note: the fields below have been assigned empty lists
    ‘Regions’: [
    ],
    ‘Variables’: [
    ]
}

def databank_download(selection_dictionary):
    headers = {‘Accept’: ‘application/json’,
        ‘Api-Key’: API_KEY,
        ‘Content-Type’: ‘application/json; charset=utf8’ }
    def _download_url(base_url, page, pagesize):
        url = base_url + ‘/api/download?includemetadata=true’
        return url + ‘&page={1}+&pagesize={2}’.format(base_url, page, page_size)
    page = 0
    page_size = 5000
    data = None
    while True:
        # note: _download_url returns a link of the form
        # ../api/download?includemetadata=true&page={page}+&page_size={page_size}
        response = requests.post(_download_url(BASE_URL, page, page_size),
            headers=headers,
            data=json.dumps(selection_dictionary))
        new_data = response.json()
        page += 1
        if data is None:
            data = new_data
        else:
            data.extend(new_data)
        if len(new_data) < page_size:
            break
    return data

if __name__ == ‘__main__’:
    data = databank_download(sample_selection)
    data_file = open(‘data_file.tmp’, ‘w’)
    json.dump(data, data_file)

A more advanced example Python implementation, using a saved selection ID:
#
# Copyright (c) 2017 Oxford Economics Ltd. All rights reserved.
# Licensed under the MIT License. See LICENSE file in the
# project root for full license information.
#

import sys
import json
import requests
import time

# WARNING: In production these values should be provided via environment or command line args
API_KEY = ‘<<ENTER API KEY HERE>>’
SELECTION = {‘ID’: ‘<<ENTER SELECTION ID HERE>>’, ‘NAME’: ‘<<ENTER NAME OF SELECTION HERE>>’}
PAGE_SIZE = 20000   # set to number of records per page (20k is the max allowed by API)
MAX_PAGES = None    # set to None for as many pages as possible
CHECKPOINT = 5      # checkpoint (save) after this many pages

def databank_download(f, selection_id, show_progress=True):
    BASE_URL = ‘https://services.oxfordeconomics.com’
    INCLUDE_METADATA=’false’ # or set = ‘true’

    headers = { ‘Accept’: ‘application/json’,
                ‘Api-Key’: API_KEY,
                ‘Content-Type’: ‘application/json; charset=utf8’ }

    def _download_url(base_url, page, pagesize):
        return  f'{base_url}/api/download/{selection_id}?includemetadata={INCLUDE_METADATA}&page={page}&pagesize={page_size}’
    page = 0
    page_size = PAGE_SIZE
    max_pages = MAX_PAGES
    data = None
    api_request_time_total = 0.0
    disk_write_time_total = 0.0

    while True:
        # download selection_id with some escalating wait retry logic
        max_retries = 5
        retries = 0
        while retries < max_retries:
            try:
                start = time.time()
                response = requests.get(_download_url(BASE_URL, page, page_size), headers=headers)
                end = time.time()
                api_request_time_total += (end – start)
                break
            except ConnectionError as msg:
                print(f’ConnectionError Exception!\n{msg}’)
                retries += 1
                print(f’Waiting to retry ({retries}/5) in {retries*5} seconds…\n{msg}’)
                time.sleep(retries*5)
                pass
            except TimeoutError as msg:
                print(f’ConnectionError Exception!\n{msg}’)
                retries += 1
                print(f’Waiting to retry ({retries}/5) in {retries*5} seconds…\n{msg}’)
                time.sleep(retries*5)
                pass
            except Exception as msg:
                print(f’Exception!\n{msg}’)
                raise RuntimeError(‘Something bad happened!’)
        if retries == max_retries:
            return page, api_request_time_total, disk_write_time_total
        page_data = response.json()
        page += 1
        if show_progress:
            sys.stdout.write(‘#’)
            sys.stdout.flush()

        # accumulate data
        if data is None:
             data = page_data
        else:
             data.extend(page_data)
        # dump to file every so many pages
        if page % CHECKPOINT == 0 and data != None:
            sys.stdout.write(‘+’)
            sys.stdout.flush()
            start = time.time()
            json.dump(data, f)
            end = time.time()
            disk_write_time_total += (end – start)
            data = None

        # get all pages or limit to max_pages
        if max_pages == None:
            pass
        elif page == max_pages:
            break
        # have we got to the end
        if len(page_data) < page_size:
            break

    # dump the last chunk of data, if any
    if not data is None:
        start = time.time()
        json.dump(data, f)
        end = time.time()
        disk_write_time_total += (end – start)
    return page, api_request_time_total, disk_write_time_total

if __name__ == ‘__main__’:
    SELECTION_ID = SELECTION[‘ID’]
    DATA_FILE = SELECTION[‘NAME’] + ‘.tmp’
    start = time.asctime(time.localtime(time.time()))
    print(f’Started @ {start} | DATA_FILE={DATA_FILE}, PAGE_SIZE={PAGE_SIZE}, MAX_PAGES={MAX_PAGES}, CHECKPOINT={CHECKPOINT}’)
    with open(DATA_FILE, ‘w’) as f:
        pages, api_request_time_total, disk_write_time_total = databank_download(f, SELECTION_ID)
    end = time.asctime(time.localtime(time.time()))
    print(f’\nCOMPLETE! Downloaded {pages} pages’)
    print(f’Started @ {start}’)
    print(f’Ended @ {end}’)
    print(‘Total API Request Time: %.1f secs, %.1f mins’ % (api_request_time_total, api_request_time_total/60))
    print(‘Total Disk Write Time: %.1f secs, %.1f mins’ % (disk_write_time_total, disk_write_time_total/60))
”’

Please note:

If you’re using vscode to develop/debug this program and it encounters an SSL error, then do the following:

#     mklink “<YOUR ANACONDA LOCATION>\Anaconda3\DLLs\libcrypto-1_1-x64.dll” “<YOUR ANACONDA LOCATION>\Anaconda3\Library\bin\libcrypto-1_1-x64.dll”
#     mklink “<YOUR ANACONDA LOCATION>\Anaconda3\DLLs\libssl-1_1-x64.dll” “<YOUR ANACONDA LOCATION>\Anaconda3\Library\bin\libssl-1_1-x64.dll”

You can also physically copy them over.

The exception you may see will be something like this:

#     HTTPSConnectionPool(host=’services.oxfordeconomics.com’, port=443):
#     Max retries exceeded with url: /api/download/…?includemetadata=false&page=0+&pagesize=2000
#     (Caused by SSLError(“Can’t connect to HTTPS URL because the SSL module is not available.”))