Source code for crypto_env.dataloader.ethloader

import os
import pandas as pd
import numpy as np

from .dataloader import DataLoader

[docs]class ETHLoader(DataLoader): """ Our example implementation of :py:class:`DataLoader` class. We use the Ethereum history data from the coinmetrics repo. See https://raw.githubusercontent.com/coinmetrics/data for more details. """
[docs] def __init__(self, base_dir, start_idx, end_idx, features: list, dropna=False, download=True, url="https://raw.githubusercontent.com/coinmetrics/data/master/csv/eth.csv"): """__init__ Args: base_dir (str): Directory to save the download data start_idx (int): Where to start in the data source end_idx (int): Where to end in the data source features (list): Input variables for the environment dropna (bool, optional): Whether to drop lines including empty values. Defaults to False. download (bool, optional): Whether to re-download the data. Defaults to True. url (str, optional): Link to the data source. Defaults to "https://raw.githubusercontent.com/coinmetrics/data/master/csv/eth.csv". """ self._dir = os.path.join(base_dir, 'eth_data') self._features = features addr = None if download: addr = url else: addr = os.path.join(self._dir, 'data.csv') self._data = pd.read_csv(addr)[[*features]].iloc[start_idx:end_idx] if dropna: self._data = self._data.dropna().reset_index() else: self._data = self._data.reset_index() if not os.path.isdir(self._dir): os.mkdir(self._dir) if 'index' in self._data.columns: self._data.drop('index', axis=1, inplace=True) self._data.to_csv(os.path.join(self._dir, 'data.csv')) # var for the iterator self._idx = 0 self._duration = len(self._data)
[docs] def __len__(self): """Number of items Returns: int """ return len(self._data)
def __next__(self): # end of the iteration if self._idx == len(self._data): raise StopIteration() payload = self._data.iloc[self._idx] self._idx += 1 return self._idx - 1, payload
[docs] def get_feature(self, feature_name): return self._data[feature_name]
[docs] def get_duration(self): return self._duration
[docs] def get_idx(self): return pd.Series(np.arange(len(self)))
[docs] def reset(self): self._idx = 0