Posted this here:
Can anyone recommend a strategy in Node for retrieving the full list of results from the REST API with continuation token using recursive fetch?
Posted this here:
Can anyone recommend a strategy in Node for retrieving the full list of results from the REST API with continuation token using recursive fetch?
Hello,
Here is a commented example of a strategy to get all the results, using the continuation token, in an Excel sheet, not in Node but in Python. It applies for a Nexus repository.
I would advise to use Python to treat data as it is a more efficient language for it.
import yaml
import time
import pandas as pd
import asyncio
import aiohttp
import requests
# read Nexus credentials from a YAML file
with open("credentials.yaml", 'r') as stream:
try:
credentials = yaml.safe_load(stream)
except yaml.YAMLError as exc:
print(exc)
# set the base URL and authentication credentials
base_url = 'https://yournexus.fr/service/rest/v1/repositories'
auth = (credentials['all']['vars']['user'], credentials['all']['vars']['password'])
assets = []
repositories = []
# send a GET request to the repositories URL with authentication
response = requests.get(base_url, auth=auth)
# parse the response as JSON
response_data = response.json()
# extract the list of repositories from the response
for item in response_data:
repositories.append(item)
# define a coroutine function to fetch assets for a given repository
async def get_assets_by_repo(url, continuation_token=None):
async with aiohttp.ClientSession(auth=aiohttp.BasicAuth(*auth)) as session:
while True:
url_with_token = url
if continuation_token:
url_with_token += f"&continuationToken={continuation_token}"
async with session.get(url_with_token) as response:
# parse the response as JSON
response_data = await response.json()
# handle the case where the response data is a list of assets
if isinstance(response_data, list):
for item in response_data:
assets.append(item)
# handle the case where the response data is a dictionary with an "items" key
elif isinstance(response_data, dict):
items = response_data.get("items")
if items:
# if there are assets in the list, add them to the global list
for item in items:
assets.append(item)
continuation_token = response_data.get("continuationToken")
if not continuation_token:
print(f"I am done for {url}")
break
# record the start time
start_time = time.time()
list_async_function = []
#
async def main():
for name in set([ r['name'] for r in repositories]):
list_async_function.append(get_assets_by_repo(f'https://yournexus.fr/service/rest/v1/assets?repository={name}'))
await asyncio.gather(*list_async_function)
asyncio.run(main())
# create a list of dictionaries containing the asset data
asset_data = [{'Format': asset['format'],'Repository': asset['repository'], 'Path': asset['path'], 'File Size': asset['fileSize'], 'Last Modified': asset['lastModified']} for asset in assets]
# create a Pandas DataFrame from the asset data
df = pd.DataFrame(asset_data)
# write the DataFrame to an Excel file
with pd.ExcelWriter('unsorted_assets_async.xlsx') as writer:
df.to_excel(writer, sheet_name="nexus-output", index=False)
# calculate and print the time it took to fetch and sort the assets
end_time = time.time()
elapsed_time = end_time - start_time
print(f"Time taken: {elapsed_time:.2f} seconds")