未だ綺麗にする作業が完了していないが、途中経過を置いておく。
#!/usr/bin/env python # coding: utf-8 # In[1]: filepath = './data/日経225mini 歩み値(ティック) (2022 08).zip' # In[2]: get_ipython().run_line_magic('pip', 'install mplfinance plotly joblib') get_ipython().run_line_magic('pip', 'install -U kaleido') # %pip install mplfinance import mplfinance as mpf import matplotlib.pyplot as plt import seaborn as sns get_ipython().run_line_magic('matplotlib', 'inline') # %pip install plotly # %pip install -U kaleido import plotly.graph_objects as go from plotly.subplots import make_subplots import plotly.express as px tips = sns.load_dataset('tips') # https://note.nkmk.me/python-joblib-parallel-usage/ # %pip install joblib from joblib import Parallel, delayed import pandas as pd from pandas import DatetimeIndex from datetime import datetime, timedelta, date from typing import List, Tuple, Dict, Union, Annotated, NewType # In[3]: reset_df = '_df_original' in globals() dtype = { 'trade_date': str, 'make_date': str, 'index_type': 'uint8', 'security_code': str, 'time': str, 'trade_price': 'float32', 'price_type': str, 'trade_volume': 'uint32', 'no': 'uint32', 'contract_month': str, } if not reset_df: _df_original = pd.read_csv(filepath, dtype=dtype) df = _df_original.copy() # In[4]: def parse_date(df): df.trade_date = pd.to_datetime(df.trade_date + 'T' + df.time, format='%Y%m%dT%H%M%S%f') df.make_date = pd.to_datetime(df.make_date + 'T' + df.time, format='%Y%m%dT%H%M%S%f') df.index = df.make_date return df df = parse_date(df) # In[5]: df # In[6]: def convert_into_ohlcv(df, frequency: str) -> pd.DataFrame: ohlcv = df.trade_price.resample(frequency).ohlc() ohlcv['volume'] = df.trade_volume.resample(frequency).sum() return ohlcv ohlcv = convert_into_ohlcv(df, 'S') ohlcv # In[ ]: # In[7]: def show_by_seaborn(df, x, y, hue=None, kind='line', **kwargs): sns.lineplot(data=df, x=x, y=y, hue=hue, **kwargs) #sns.lineplot(data=df, x=x, y=y, hue=hue, **kwargs) plt.show() # show_by_seaborn(df, df.index, df.trade_price) show_by_seaborn(ohlcv['2022-08-01'], 'make_date', 'open') # In[8]: show_by_seaborn(ohlcv['2022-08-01'], 'make_date', 'volume') # In[9]: ohlcv_H = convert_into_ohlcv(df, 'H') ohlcv_H # In[10]: df.sort_index().loc['2022-08-01T16:30:00':'2022-08-02T06:00:10', :] # In[11]: len(df.sort_index().loc['2022-08-07']) # In[12]: def plotly_candlestick(df, title=None, save_fig=False, filename=None, base_dir=None): ''' https://stackoverflow.com/a/65997291/15983717 ''' # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close)) # Plot OHLC on 1st row plotly = make_subplots(rows=2, cols=1, shared_xaxes=True, vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'), row_width=[1.0, 1.0]) plotly.add_trace(go.Candlestick( x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC'), row=1, col=1) # Bar trace for volumes on 2nd row without legend plotly.add_trace(go.Bar(x=df.index, y=df.volume, showlegend=False), row=2, col=1) #plotly.update(layout_xaxis_rangeslider_visible=True) if save_fig: plotly.update_layout( xaxis=dict( rangeslider=dict( visible=False ), ) ) plotly.write_image(f'{base_dir}/figure_{filename}.png', engine="kaleido", scale=20) plotly.update_layout( # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9 xaxis=dict( rangeselector=dict( buttons=list([ dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all") ]) ), rangeslider=dict( visible=True ), type="date" ) ) save_html = False if save_html: plotly.write_html(f'{base_dir}/figure_{filename}.html') # https://zenn.dev/ganariya/articles/plotly-high-resolution else: plotly.show() # plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min')) # In[13]: def plotly_candlestick_combined(df, title=None, save_fig=False, filename=None, base_dir=None): ''' https://stackoverflow.com/a/65997291/15983717 ''' # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close)) # Plot OHLC on 1st row plotly = make_subplots(shared_xaxes=True, #rows=2, cols=1, vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'), specs=[[{"secondary_y": True}]]) #row_width=[1.0, 1.0]) plotly.add_trace( go.Candlestick( x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC' ),#, row=1, col=1) secondary_y=True ) # Bar trace for volumes on 2nd row without legend plotly.add_trace( go.Bar(x=df.index, y=df.volume, showlegend=False), secondary_y=False )#, row=2, col=1) #plotly.update(layout_xaxis_rangeslider_visible=True) if save_fig: plotly.update_layout( xaxis=dict( rangeslider=dict( visible=False ), ) ) plotly.update_yaxes(type='log') plotly.layout.yaxis2.showgrid=False plotly.write_image(f'{base_dir}/figure_{filename}.pdf', engine="kaleido", scale=10) #plotly.write_image(f'./figure_{filename}.png', engine="kaleido", scale = 20) plotly.update_layout( # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9 xaxis=dict( rangeselector=dict( buttons=list([ dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all") ]) ), rangeslider=dict( visible=True ), type="date" ) ) save_html = False if save_html: plotly.write_html(f'{base_dir}/figure_{filename}.html') # https://zenn.dev/ganariya/articles/plotly-high-resolution else: plotly.show() # plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min')) # In[14]: def plotly_candlestick_separated(df, title=None, save_fig=False, filename=None, base_dir=None): ''' https://stackoverflow.com/a/65997291/15983717 ''' # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close)) # Plot OHLC on 1st row plotly = make_subplots(shared_xaxes=True, rows=2, cols=1, vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'), row_width=[1.0, 1.0]) plotly.add_trace( go.Candlestick( x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC' ) ,row=1, col=1 ) # Bar trace for volumes on 2nd row without legend plotly.add_trace( go.Scatter(x=df.index, y=df.volume, showlegend=False), row=2, col=1 ) plotly.update(layout_xaxis_rangeslider_visible=True) if save_fig: plotly.update_layout( xaxis=dict( rangeslider=dict( visible=False ), ) ) plotly.update_yaxes(type='log') plotly.layout.yaxis2.showgrid=False plotly.write_image(f'{base_dir}/figure_{filename}.pdf', engine="kaleido", scale=10) #plotly.write_image(f'./figure_{filename}.png', engine="kaleido", scale = 20) plotly.update_layout( # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9 xaxis=dict( rangeselector=dict( buttons=list([ dict(count=1, label="1m", step="month", stepmode="backward"), dict(count=6, label="6m", step="month", stepmode="backward"), dict(count=1, label="YTD", step="year", stepmode="todate"), dict(count=1, label="1y", step="year", stepmode="backward"), dict(step="all") ]) ), rangeslider=dict( visible=True ), type="date" ) ) save_html = False if save_html: plotly.write_html(f'{base_dir}/figure_{filename}.html') # https://zenn.dev/ganariya/articles/plotly-high-resolution else: plotly.show() # plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min')) # In[15]: def hourly_loop(day): pass # def main_loop(day, graph_method): # year, month, day = [2022, 8, day] # time = { 'day': [[8,30,0], [15,30,0]], 'night': [[16,15,0], [6,15,0]]} # base_dir = '/content/drive/MyDrive/documents/d-kit/tick_data/nikkei225mini_202208/figures/images' # for session in ['day', 'night']: # from_dt = datetime(year, month, day, *time[session][0]) # if session == 'day': # until_date = from_dt.date() # else: # until_date = date(year, month, day) + timedelta(days=1) # until_dt = datetime(until_date.year, until_date.month, until_date.day, *time[session][1]) # data = df.sort_index().loc[from_dt:until_dt, :] # if len(data) is 0: # return # print(f'STARTED : {day:02} {session}') # graph_method( # convert_into_ohlcv(data, '2S'), # base_dir=base_dir, # save_fig=True, title=from_dt, filename=from_dt # ) # print(f'COMPLETED: {day:02} {session}') def main_loop_hourly(from_dt, until_dt, graph_method): base_dir = '/content/drive/MyDrive/documents/d-kit/tick_data/nikkei225mini_202208/figures/images' main_loop(from_dt, until_dt, str(from_dt), base_dir, graph_method) def main_loop_daily(day, graph_method): year, month = [2022, 8] time = { 'day': [[8,30,0], [15,30,0]], 'night': [[16,15,0], [6,15,0]]} base_dir = '/content/drive/MyDrive/documents/d-kit/tick_data/nikkei225mini_202208/figures/images' for session in ['day', 'night']: from_dt = datetime(year, month, day, *time[session][0]) if session == 'day': until_date = from_dt.date() else: until_date = date(year, month, day) + timedelta(days=1) until_dt = datetime(until_date.year, until_date.month, until_date.day, *time[session][1]) main_loop(from_dt, until_dt, str(from_dt), base_dir, graph_method) def main_loop(from_dt, until_dt, filename, base_dir, graph_method): data = df.sort_index().loc[from_dt:until_dt, :] if len(data) is 0: print(f'SKIPPED: {filename}') return print(f'STARTED : {filename}') graph_method( convert_into_ohlcv(data, '2S'), base_dir=base_dir, save_fig=True, title=from_dt, filename=from_dt ) print(f'COMPLETED: {filename}') def main_loop_hourly_separated(from_dt: datetime): until_dt = from_dt + timedelta(hours=1) main_loop_hourly(from_dt, until_dt, plotly_candlestick_separated) def main_loop_separated(day): main_loop_daily(day, plotly_candlestick_separated) def main_loop_combined(day): main_loop_daily(day, plotly_candlestick_combined) def daterange(from_dt, until_dt): ''' http://ailaby.com/date_range/ ''' return pd.date_range(start=from_dt, end=until_dt, freq='H') def main_hourly(): use_parallel = False from_dt = datetime(2022, 8, 1, 8, 30, 0) until_dt = datetime(2022, 8, 1, 10, 30, 0) #until_dt = datetime(2022, 9, 1, 6, 30, 0) from_dts = [d for d in daterange(from_dt, until_dt)] if use_parallel: Parallel(n_jobs=-1)([delayed(main_loop_hourly_separated)(dt) for dt in from_dts]) else: [main_loop_hourly_separated(dt) for dt in from_dts] def main_daily(): use_parallel = False if use_parallel: Parallel(n_jobs=-1)([delayed(main_loop_separated)(date) for date in range(1, 31 + 1)]) else: [main_loop_separated(date) for date in range(1, 31 + 1)] # In[16]: # class PlotlyCandlestick: # @staticmethod # def graph_method(): # raise NotImplementedError # # class Separated(PlotlyCandlestick): # @staticmethod # def graph_method(df, title=None, save_fig=False, filename=None, output_base_dir=None): # ''' # https://stackoverflow.com/a/65997291/15983717 # ''' # # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close)) # # Plot OHLC on 1st row # plotly = make_subplots(shared_xaxes=True, rows=2, cols=1, # vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'), # row_width=[1.0, 1.0]) # plotly.add_trace( # go.Candlestick( # x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, # name='OHLC' # ) # ,row=1, col=1 # ) # # Bar trace for volumes on 2nd row without legend # plotly.add_trace( # go.Scatter(x=df.index, y=df.volume, showlegend=False), # row=2, col=1 # ) # plotly.update(layout_xaxis_rangeslider_visible=True) # if save_fig: # plotly.update_layout( # xaxis=dict( # rangeslider=dict( # visible=False # ), # ) # ) # plotly.update_yaxes(type='log') # plotly.layout.yaxis2.showgrid=False # plotly.write_image(f'{output_base_dir}/figure_{filename}.pdf', engine="kaleido", scale=10) # #plotly.write_image(f'./figure_{filename}.png', engine="kaleido", scale = 20) # plotly.update_layout( # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9 # xaxis=dict( # rangeselector=dict( # buttons=list([ # dict(count=1, # label="1m", # step="month", # stepmode="backward"), # dict(count=6, # label="6m", # step="month", # stepmode="backward"), # dict(count=1, # label="YTD", # step="year", # stepmode="todate"), # dict(count=1, # label="1y", # step="year", # stepmode="backward"), # dict(step="all") # ]) # ), # rangeslider=dict( # visible=True # ), # type="date" # ) # ) # save_html = False # if save_html: # plotly.write_html(f'{output_base_dir}/figure_{filename}.html') # # https://zenn.dev/ganariya/articles/plotly-high-resolution # else: # plotly.show() # # plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min')) # In[17]: from plotly_candle_stick import PlotlyCandleStick from separated import Separated # In[ ]: if 'TickData' in globals(): del TickData class TickData: DatetimeLike = NewType('DatetimeLike', Union[datetime, DatetimeIndex]) Datetimes = NewType('DateTimes', list[DatetimeLike]) DatetimeSet = NewType('DateTimeSet', list[tuple[datetime, datetime]]) def __init__(self, from_dt: datetime, until_dt: datetime, df: pd.DataFrame, output_base_dir: str, freq: str = 'H', parallel: bool = False): self.from_dt = from_dt self.until_dt = until_dt self.df = df self.output_base_dir = output_base_dir self.parallel = parallel self.freq = freq self.graph_method: PlotlyCandlestick = Separated def _timedelta(self) -> dict: if self.freq == 'H': return { 'hours': 1 } elif self.freq == 'D': return { 'days': 1 } else: raise ValueError(f'freq={self.freq} is not supported') def _from_to_dts(self) -> Datetimes: return [ [from_dt, from_dt + timedelta(**self._timedelta())] for from_dt in pd.date_range(start=self.from_dt, end=self.until_dt, freq=self.freq) ] def _loop_export_procedure(self, dt_set: DatetimeSet): from_dt, until_dt = dt_set filename: str = str(from_dt) data = self.df.sort_index().loc[from_dt:until_dt, :] if len(data) is 0: print(f'SKIPPED: {filename}') return print(f'STARTED : {filename}') self.graph_method.graph_method( convert_into_ohlcv(data, '2S'), output_base_dir=self.output_base_dir, save_fig=True, title=from_dt, filename=from_dt ) print(f'COMPLETED: {filename}') def export(self): if self.parallel: Parallel(n_jobs=-1)([delayed(self._loop_export_procedure)(dt) for dt in self._from_to_dts()]) else: [self._loop_export_procedure(dt) for dt in self._from_to_dts()] def main_class(df): td = TickData( from_dt=datetime(2022, 8, 1, 8, 30, 0), until_dt=datetime(2022, 8, 1, 10, 30, 0), df=df, output_base_dir='./figures' ) td.export() def main(): main_hourly() DEBUG = True main_class(df) # # In[ ]: def plotly_test(df): plotly = make_subplots( shared_xaxes=True, subplot_titles=('OHLC', 'Volume'), specs=[[{"secondary_y": True}]], ) plotly.add_trace( go.Candlestick( x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC' ), secondary_y=True ) plotly.add_trace( go.Bar(x=df.index, y=df.volume, showlegend=False), secondary_y=False, ) plotly.update_yaxes(type='log') plotly.show() plotly_test(convert_into_ohlcv(df['2022-08-01'], '2S')) # In[ ]: plot_mpf = False if plot_mpf: mpf.plot(ohlcv_H, figratio=(12,4), type='candle', style="yahoo", volume=True) # In[ ]: get_ipython().system('python --version')