I ran into some issues while trying to sort the dataframe. My code gets data that only allows for 1000 rows at a time and then it send a continuation URL which my script follows with the while loop, but the problem is that on each pass I have it writing and appending to the CSV. It worked fine but now that I need to sort the whole data frame it is an issue.
How can I have this write to the data frame on each pass then write the dataframe to the CSV. Would I append to the data frame on each loop or would I have it make new dataframes on each pass then combine them at then end somehow?
import requests
import json
import pandas as pd
import time
import os
from itertools import product
#what I need to loop through
instrument = ('btc-usd')
exchange = ('cbse')
interval = ('1m','3m')
start_time = '2021-01-14T00:00:00Z'
end_time = '2021-01-16T23:59:59Z'
for (interval) in product(interval):
page_size = '1000'
url = f'https://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/{exchange}/spot/{instrument}/aggregations/count_ohlcv_vwap'
#params = {'interval': interval, 'page_size': page_size, 'start_time': start_time, 'end_time': end_time }
params = {'interval': interval, 'page_size': page_size }
KEY = 'xxx'
headers = {
"X-Api-Key": KEY,
"Accept": "application/json",
"Accept-Encoding": "gzip"
}
csv_file = f"{exchange}-{instrument}-{interval}.csv"
c_token = True
while(c_token):
res = requests.get(url, params=params, headers=headers)
j_data = res.json()
parse_data = j_data['data']
c_token = j_data.get('continuation_token')
today = time.strftime("%Y-%m-%d")
params = {'continuation_token': c_token}
if c_token:
url = f'https://us.market-api.kaiko.io/v2/data/trades.v1/exchanges/cbse/spot/btc-usd/aggregations/count_ohlcv_vwap?continuation_token={c_token}'
# create dataframe
df = pd.DataFrame.from_dict(pd.json_normalize(parse_data), orient='columns')
df.insert(1, 'time', pd.to_datetime(df.timestamp.astype(int),unit='ms'))
df['range'] = df['high'].astype(float) - df['low'].astype(float)
df.range = df.range.astype(float)
#sort
df = df.sort_values(by='range')
#that means file already exists need to append
if(csv_file in os.listdir()):
csv_string = df.to_csv(index=False, encoding='utf-8', header=False)
with open(csv_file, 'a') as f:
f.write(csv_string)
#that means writing file for the first time
else:
csv_string = df.to_csv(index=False, encoding='utf-8')
with open(csv_file, 'w') as f:
f.write(csv_string)