Getting paginated results from REST API with continuation token

brangwein · June 11, 2022, 10:25pm

Posted this here:

Can anyone recommend a strategy in Node for retrieving the full list of results from the REST API with continuation token using recursive fetch?

DevOps3883 · December 1, 2023, 12:04pm

Hello,

Here is a commented example of a strategy to get all the results, using the continuation token, in an Excel sheet, not in Node but in Python. It applies for a Nexus repository.
I would advise to use Python to treat data as it is a more efficient language for it.

import yaml
import time
import pandas as pd
import asyncio
import aiohttp
import requests

# read Nexus credentials from a YAML file
with open("credentials.yaml", 'r') as stream:
    try:
        credentials = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

# set the base URL and authentication credentials
base_url = 'https://yournexus.fr/service/rest/v1/repositories'
auth = (credentials['all']['vars']['user'], credentials['all']['vars']['password'])


assets = []
repositories = []
# send a GET request to the repositories URL with authentication
response = requests.get(base_url, auth=auth)
# parse the response as JSON
response_data = response.json()

# extract the list of repositories from the response
for item in response_data:
    repositories.append(item)


# define a coroutine function to fetch assets for a given repository
async def get_assets_by_repo(url, continuation_token=None):
    async with aiohttp.ClientSession(auth=aiohttp.BasicAuth(*auth)) as session:
        while True:
            url_with_token = url
            if continuation_token:
                url_with_token += f"&continuationToken={continuation_token}"
            async with session.get(url_with_token) as response:
                # parse the response as JSON
                response_data = await response.json()
                # handle the case where the response data is a list of assets
                if isinstance(response_data, list):
                    for item in response_data:
                        assets.append(item)
                # handle the case where the response data is a dictionary with an "items" key
                elif isinstance(response_data, dict):
                    items = response_data.get("items")
                    if items:
                        # if there are assets in the list, add them to the global list
                        for item in items:
                            assets.append(item)
                    continuation_token = response_data.get("continuationToken")
                    if not continuation_token:
                        print(f"I am done for {url}")
                        break

# record the start time
start_time = time.time()

list_async_function = []
#
async def main():

    for name in set([ r['name'] for r  in repositories]):
        list_async_function.append(get_assets_by_repo(f'https://yournexus.fr/service/rest/v1/assets?repository={name}'))

    await asyncio.gather(*list_async_function)

asyncio.run(main())

# create a list of dictionaries containing the asset data
asset_data = [{'Format': asset['format'],'Repository': asset['repository'], 'Path': asset['path'], 'File Size': asset['fileSize'], 'Last Modified': asset['lastModified']} for asset in assets]

# create a Pandas DataFrame from the asset data
df = pd.DataFrame(asset_data)

# write the DataFrame to an Excel file
with pd.ExcelWriter('unsorted_assets_async.xlsx') as writer:
    df.to_excel(writer, sheet_name="nexus-output", index=False)

# calculate and print the time it took to fetch and sort the assets
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time:.2f} seconds")