I did the following
- Created DB and table in prepate_table function
- Extracted data from binance in GetHistoricalData function
- Saved the data to DB
My code works fine but I want to optimize my code and remove redundant steps
My solution
import os
import pandas as pd
import time
import clickhouse_driver
import datetime
from binance.client import Client
# Binance test_key https://testnet.binance.vision/key/generate
API_KEY = "--"
API_SECRET = "--"
def GetHistoricalData(
timedelta_days=10,
ticker="BTCUSDT",
kline_interval=Client.KLINE_INTERVAL_1HOUR
):
# Calculate the timestamps for the binance api function
untilThisDate = datetime.datetime.now()
sinceThisDate = untilThisDate - datetime.timedelta(days=timedelta_days)
client = Client(API_KEY, API_SECRET)
client.API_URL = 'https://testnet.binance.vision/api'
candle = client.get_historical_klines(ticker, kline_interval, str(sinceThisDate), str(untilThisDate))
# Create a dataframe to label all the columns returned by binance so we work with them later.
df = pd.DataFrame(candle, columns=['dateTime', 'open', 'high', 'low', 'close', 'volume', 'closeTime', 'quoteAssetVolume', 'numberOfTrades', 'takerBuyBaseVol', 'takerBuyQuoteVol', 'ignore'])
# as timestamp is returned in ms, let us convert this back to proper timestamps.
df.dateTime = pd.to_datetime(df.dateTime, unit='ms').dt.strftime("%Y-%m-%d %X")
df.set_index('dateTime', inplace=True)
# Get rid of columns we do not need
df = df.drop(['quoteAssetVolume', 'numberOfTrades', 'takerBuyBaseVol','takerBuyQuoteVol', 'ignore'], axis=1)
return df
def prepare_table():
client = clickhouse_driver.Client.from_url(f'clickhouse://default:{os.getenv("CLICK_PASSWORD")}@localhost:9000/crypto_exchange')
# field names from binance API
client.execute('''
CREATE TABLE IF NOT EXISTS historical_data_binance
(
dateTime DateTime,
closeTime Int64,
open Float64,
high Float64,
low Float64,
close Float64,
volume Float64,
kline_type String,
ticker String
) ENGINE = Memory
''')
return client
def insert_data(client, insert_data, db_name="crypto_exchange", table_name="historical_data_binance"):
"""
insert_data = {
"dateTime": dateTime,
"closeTime": closeTime,
"open": open,
"high": hign,
"low": low,
"close": close,
"volume": volume,
"kline_type": kline_type,
"ticker": ticker
}
"""
columns = ', '.join(insert_data.keys())
query = 'insert into {}.{} ({}) values'.format(db_name, table_name, columns)
data = []
data.append(insert_data)
client.execute(query, data)
client_db = prepare_table()
hist_data = GetHistoricalData(kline_interval=Client.KLINE_INTERVAL_1HOUR, ticker="BTCUSDT",)
for row in hist_data.iterrows():
data = row[1].to_dict()
data["dateTime"] = datetime.datetime.strptime(row[0], "%Y-%m-%d %X")
data["closeTime"] = int(data["closeTime"])
data["open"] = float(data["open"])
data["high"] = float(data["high"])
data["low"] = float(data["low"])
data["close"] = float(data["close"])
data["volume"] = float(data["volume"])
data["kline_type"] = Client.KLINE_INTERVAL_1HOUR
data["ticker"] = "BTCUSDT"
insert_data(client_db, data)
What can be improved?