株のシステムトレードをしよう - 1から始める株自動取引システムの作り方

株式をコンピュータに売買させる仕組みを少しずつ作っていきます。できあがってから公開ではなく、書いたら途中でも記事として即掲載して、後から固定ページにして体裁を整える方式で進めていきます。

コードの見通しが悪いので、もう少し分割してリファクタリングしてる

未だ綺麗にする作業が完了していないが、途中経過を置いておく。

#!/usr/bin/env python
# coding: utf-8

# In[1]:



filepath = './data/日経225mini 歩み値(ティック) (2022 08).zip'


# In[2]:


get_ipython().run_line_magic('pip', 'install mplfinance plotly joblib')
get_ipython().run_line_magic('pip', 'install -U kaleido')

# %pip install mplfinance
import mplfinance as mpf

import matplotlib.pyplot as plt
import seaborn as sns
get_ipython().run_line_magic('matplotlib', 'inline')

# %pip install plotly
# %pip install -U kaleido
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import plotly.express as px
 
tips = sns.load_dataset('tips')

# https://note.nkmk.me/python-joblib-parallel-usage/
# %pip install joblib
from joblib import Parallel, delayed

import pandas as pd
from pandas import DatetimeIndex
from datetime import datetime, timedelta, date
from typing import List, Tuple, Dict, Union, Annotated, NewType


# In[3]:


reset_df = '_df_original' in globals()

dtype = {
  'trade_date': str,
  'make_date': str,
  'index_type': 'uint8',
  'security_code': str,
  'time': str,
  'trade_price': 'float32',
  'price_type': str,
  'trade_volume': 'uint32',
  'no': 'uint32',
  'contract_month': str,
}

if not reset_df:
  _df_original = pd.read_csv(filepath, dtype=dtype)
df = _df_original.copy()


# In[4]:


def parse_date(df):
  df.trade_date = pd.to_datetime(df.trade_date + 'T' + df.time, format='%Y%m%dT%H%M%S%f')
  df.make_date = pd.to_datetime(df.make_date + 'T' + df.time, format='%Y%m%dT%H%M%S%f')
  df.index = df.make_date
  return df
df = parse_date(df)


# In[5]:


df


# In[6]:


def convert_into_ohlcv(df, frequency: str) -> pd.DataFrame:
  ohlcv = df.trade_price.resample(frequency).ohlc()
  ohlcv['volume'] = df.trade_volume.resample(frequency).sum()
  return ohlcv
ohlcv = convert_into_ohlcv(df, 'S')
ohlcv


# In[ ]:





# In[7]:


def show_by_seaborn(df, x, y, hue=None, kind='line', **kwargs):
  sns.lineplot(data=df, x=x, y=y, hue=hue, **kwargs)
  #sns.lineplot(data=df, x=x, y=y, hue=hue, **kwargs)
  plt.show()
# show_by_seaborn(df, df.index, df.trade_price)
show_by_seaborn(ohlcv['2022-08-01'], 'make_date', 'open')


# In[8]:


show_by_seaborn(ohlcv['2022-08-01'], 'make_date', 'volume')


# In[9]:


ohlcv_H = convert_into_ohlcv(df, 'H')
ohlcv_H


# In[10]:


df.sort_index().loc['2022-08-01T16:30:00':'2022-08-02T06:00:10', :]


# In[11]:


len(df.sort_index().loc['2022-08-07'])


# In[12]:


def plotly_candlestick(df, title=None, save_fig=False, filename=None, base_dir=None):
  '''
  https://stackoverflow.com/a/65997291/15983717
  '''
  # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close))
  # Plot OHLC on 1st row
  plotly = make_subplots(rows=2, cols=1, shared_xaxes=True, 
            vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'), 
            row_width=[1.0, 1.0])
  plotly.add_trace(go.Candlestick(
    x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC'), row=1, col=1)
  # Bar trace for volumes on 2nd row without legend
  plotly.add_trace(go.Bar(x=df.index, y=df.volume, showlegend=False), row=2, col=1)
  #plotly.update(layout_xaxis_rangeslider_visible=True)
  if save_fig:
    plotly.update_layout(
        xaxis=dict(
            rangeslider=dict(
                visible=False
            ),
        )
    )
    plotly.write_image(f'{base_dir}/figure_{filename}.png', engine="kaleido", scale=20)
    plotly.update_layout(  # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )
    save_html = False
    if save_html:
        plotly.write_html(f'{base_dir}/figure_{filename}.html')
    # https://zenn.dev/ganariya/articles/plotly-high-resolution
  else:
    plotly.show()
# plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min'))


# In[13]:


def plotly_candlestick_combined(df, title=None, save_fig=False, filename=None, base_dir=None):
  '''
  https://stackoverflow.com/a/65997291/15983717
  '''
  # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close))
  # Plot OHLC on 1st row
  plotly = make_subplots(shared_xaxes=True, #rows=2, cols=1, 
            vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'), specs=[[{"secondary_y": True}]])
            #row_width=[1.0, 1.0])
  plotly.add_trace(
    go.Candlestick(
        x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC'
    ),#, row=1, col=1)
    secondary_y=True
  )
  # Bar trace for volumes on 2nd row without legend
  plotly.add_trace(
      go.Bar(x=df.index, y=df.volume, showlegend=False),
      secondary_y=False
  )#, row=2, col=1)
  #plotly.update(layout_xaxis_rangeslider_visible=True)
  if save_fig:
    plotly.update_layout(
        xaxis=dict(
            rangeslider=dict(
                visible=False
            ),
        )
    )
    plotly.update_yaxes(type='log')
    plotly.layout.yaxis2.showgrid=False
    plotly.write_image(f'{base_dir}/figure_{filename}.pdf', engine="kaleido", scale=10)
    #plotly.write_image(f'./figure_{filename}.png', engine="kaleido", scale = 20)
    plotly.update_layout(  # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )
    save_html = False
    if save_html:
        plotly.write_html(f'{base_dir}/figure_{filename}.html')
    # https://zenn.dev/ganariya/articles/plotly-high-resolution
  else:
    plotly.show()
# plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min'))


# In[14]:


def plotly_candlestick_separated(df, title=None, save_fig=False, filename=None, base_dir=None):
  '''
  https://stackoverflow.com/a/65997291/15983717
  '''
  # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close))
  # Plot OHLC on 1st row
  plotly = make_subplots(shared_xaxes=True, rows=2, cols=1, 
            vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'),
            row_width=[1.0, 1.0])
  plotly.add_trace(
    go.Candlestick(
        x=df.index, open=df.open, high=df.high, low=df.low, close=df.close,
        name='OHLC'
    )
    ,row=1, col=1
  )
  # Bar trace for volumes on 2nd row without legend
  plotly.add_trace(
      go.Scatter(x=df.index, y=df.volume, showlegend=False),
      row=2, col=1
  )
  plotly.update(layout_xaxis_rangeslider_visible=True)
  if save_fig:
    plotly.update_layout(
        xaxis=dict(
            rangeslider=dict(
                visible=False
            ),
        )
    )
    plotly.update_yaxes(type='log')
    plotly.layout.yaxis2.showgrid=False
    plotly.write_image(f'{base_dir}/figure_{filename}.pdf', engine="kaleido", scale=10)
    #plotly.write_image(f'./figure_{filename}.png', engine="kaleido", scale = 20)
    plotly.update_layout(  # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9
        xaxis=dict(
            rangeselector=dict(
                buttons=list([
                    dict(count=1,
                         label="1m",
                         step="month",
                         stepmode="backward"),
                    dict(count=6,
                         label="6m",
                         step="month",
                         stepmode="backward"),
                    dict(count=1,
                         label="YTD",
                         step="year",
                         stepmode="todate"),
                    dict(count=1,
                         label="1y",
                         step="year",
                         stepmode="backward"),
                    dict(step="all")
                ])
            ),
            rangeslider=dict(
                visible=True
            ),
            type="date"
        )
    )
    save_html = False
    if save_html:
        plotly.write_html(f'{base_dir}/figure_{filename}.html')
    # https://zenn.dev/ganariya/articles/plotly-high-resolution
  else:
    plotly.show()
# plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min'))


# In[15]:


def hourly_loop(day):
    pass

# def main_loop(day, graph_method):
#   year, month, day = [2022, 8, day]
#   time = { 'day': [[8,30,0], [15,30,0]], 'night': [[16,15,0], [6,15,0]]}
#   base_dir = '/content/drive/MyDrive/documents/d-kit/tick_data/nikkei225mini_202208/figures/images'
#   for session in ['day', 'night']:
#     from_dt = datetime(year, month, day, *time[session][0])
#     if session == 'day':
#         until_date = from_dt.date()
#     else:
#         until_date = date(year, month, day) + timedelta(days=1)
#     until_dt = datetime(until_date.year, until_date.month, until_date.day, *time[session][1])
#     data = df.sort_index().loc[from_dt:until_dt, :]
#     if len(data) is 0:
#         return
#     print(f'STARTED  : {day:02} {session}')
#     graph_method(
#       convert_into_ohlcv(data, '2S'),
#       base_dir=base_dir,
#       save_fig=True, title=from_dt, filename=from_dt
#     )
#     print(f'COMPLETED: {day:02} {session}')

def main_loop_hourly(from_dt, until_dt, graph_method):
  base_dir = '/content/drive/MyDrive/documents/d-kit/tick_data/nikkei225mini_202208/figures/images'
  main_loop(from_dt, until_dt, str(from_dt), base_dir, graph_method)

def main_loop_daily(day, graph_method):
  year, month = [2022, 8]
  time = { 'day': [[8,30,0], [15,30,0]], 'night': [[16,15,0], [6,15,0]]}
  base_dir = '/content/drive/MyDrive/documents/d-kit/tick_data/nikkei225mini_202208/figures/images'
  for session in ['day', 'night']:
    from_dt = datetime(year, month, day, *time[session][0])
    if session == 'day':
        until_date = from_dt.date()
    else:
        until_date = date(year, month, day) + timedelta(days=1)
    until_dt = datetime(until_date.year, until_date.month, until_date.day, *time[session][1])
    main_loop(from_dt, until_dt, str(from_dt), base_dir, graph_method)

def main_loop(from_dt, until_dt, filename, base_dir, graph_method):
    data = df.sort_index().loc[from_dt:until_dt, :]
    if len(data) is 0:
        print(f'SKIPPED: {filename}')
        return
    print(f'STARTED  : {filename}')
    graph_method(
      convert_into_ohlcv(data, '2S'),
      base_dir=base_dir,
      save_fig=True, title=from_dt, filename=from_dt
    )
    print(f'COMPLETED: {filename}')

def main_loop_hourly_separated(from_dt: datetime):
    until_dt = from_dt + timedelta(hours=1)
    main_loop_hourly(from_dt, until_dt, plotly_candlestick_separated)

def main_loop_separated(day):
    main_loop_daily(day, plotly_candlestick_separated)

def main_loop_combined(day):
    main_loop_daily(day, plotly_candlestick_combined)

def daterange(from_dt, until_dt):
    '''
    http://ailaby.com/date_range/
    '''
    return pd.date_range(start=from_dt, end=until_dt, freq='H')

def main_hourly():
  use_parallel = False
  from_dt = datetime(2022, 8, 1, 8, 30, 0)
  until_dt = datetime(2022, 8, 1, 10, 30, 0)
  #until_dt = datetime(2022, 9, 1, 6, 30, 0)
  from_dts = [d for d in daterange(from_dt, until_dt)]
  if use_parallel:
    Parallel(n_jobs=-1)([delayed(main_loop_hourly_separated)(dt) for dt in from_dts])
  else:
    [main_loop_hourly_separated(dt) for dt in from_dts]

def main_daily():
  use_parallel = False
  if use_parallel:
    Parallel(n_jobs=-1)([delayed(main_loop_separated)(date) for date in range(1, 31 + 1)])
  else:
    [main_loop_separated(date) for date in range(1, 31 + 1)]


# In[16]:


# class PlotlyCandlestick:
#     @staticmethod
#     def graph_method():
#         raise NotImplementedError
# 
# class Separated(PlotlyCandlestick):
#     @staticmethod
#     def graph_method(df, title=None, save_fig=False, filename=None, output_base_dir=None):
#       '''
#       https://stackoverflow.com/a/65997291/15983717
#       '''
#       # plotly = go.Figure(data=go.Candlestick(x=df.index, open=df.open, high=df.high, low=df.low, close=df.close))
#       # Plot OHLC on 1st row
#       plotly = make_subplots(shared_xaxes=True, rows=2, cols=1, 
#                 vertical_spacing=0.03, subplot_titles=('OHLC', 'Volume'),
#                 row_width=[1.0, 1.0])
#       plotly.add_trace(
#         go.Candlestick(
#             x=df.index, open=df.open, high=df.high, low=df.low, close=df.close,
#             name='OHLC'
#         )
#         ,row=1, col=1
#       )
#       # Bar trace for volumes on 2nd row without legend
#       plotly.add_trace(
#           go.Scatter(x=df.index, y=df.volume, showlegend=False),
#           row=2, col=1
#       )
#       plotly.update(layout_xaxis_rangeslider_visible=True)
#       if save_fig:
#         plotly.update_layout(
#             xaxis=dict(
#                 rangeslider=dict(
#                     visible=False
#                 ),
#             )
#         )
#         plotly.update_yaxes(type='log')
#         plotly.layout.yaxis2.showgrid=False
#         plotly.write_image(f'{output_base_dir}/figure_{filename}.pdf', engine="kaleido", scale=10)
#         #plotly.write_image(f'./figure_{filename}.png', engine="kaleido", scale = 20)
#         plotly.update_layout(  # https://qiita.com/Ringa_hyj/items/b13e3e721519c2842cc9
#             xaxis=dict(
#                 rangeselector=dict(
#                     buttons=list([
#                         dict(count=1,
#                              label="1m",
#                              step="month",
#                              stepmode="backward"),
#                         dict(count=6,
#                              label="6m",
#                              step="month",
#                              stepmode="backward"),
#                         dict(count=1,
#                              label="YTD",
#                              step="year",
#                              stepmode="todate"),
#                         dict(count=1,
#                              label="1y",
#                              step="year",
#                              stepmode="backward"),
#                         dict(step="all")
#                     ])
#                 ),
#                 rangeslider=dict(
#                     visible=True
#                 ),
#                 type="date"
#             )
#         )
#         save_html = False
#         if save_html:
#             plotly.write_html(f'{output_base_dir}/figure_{filename}.html')
#         # https://zenn.dev/ganariya/articles/plotly-high-resolution
#       else:
#         plotly.show()
#     # plotly_candlestick(convert_into_ohlcv(df.sort_index().loc['2022-08-01':'2022-08-07', :], '1min'))


# In[17]:


from plotly_candle_stick import PlotlyCandleStick
from separated import Separated


# In[ ]:


if 'TickData' in globals():
  del TickData

class TickData:
  DatetimeLike = NewType('DatetimeLike', Union[datetime, DatetimeIndex])
  Datetimes = NewType('DateTimes', list[DatetimeLike])
  DatetimeSet = NewType('DateTimeSet', list[tuple[datetime, datetime]])

  def __init__(self, from_dt: datetime, until_dt: datetime, df: pd.DataFrame, output_base_dir: str,
    freq: str = 'H', parallel: bool = False):
    self.from_dt = from_dt
    self.until_dt = until_dt
    self.df = df
    self.output_base_dir = output_base_dir
    self.parallel = parallel
    self.freq = freq
    self.graph_method: PlotlyCandlestick = Separated

  def _timedelta(self) -> dict:
    if self.freq == 'H':
      return { 'hours': 1 }
    elif self.freq == 'D':
      return { 'days': 1 }
    else:
      raise ValueError(f'freq={self.freq} is not supported')

  def _from_to_dts(self) -> Datetimes:
    return [
      [from_dt, from_dt + timedelta(**self._timedelta())]
      for from_dt in
      pd.date_range(start=self.from_dt, end=self.until_dt, freq=self.freq)
    ]
  
  def _loop_export_procedure(self, dt_set: DatetimeSet):
    from_dt, until_dt = dt_set
    filename: str = str(from_dt)
    data = self.df.sort_index().loc[from_dt:until_dt, :]
    if len(data) is 0:
        print(f'SKIPPED: {filename}')
        return
    print(f'STARTED  : {filename}')
    self.graph_method.graph_method(
      convert_into_ohlcv(data, '2S'),
      output_base_dir=self.output_base_dir,
      save_fig=True, title=from_dt, filename=from_dt
    )
    print(f'COMPLETED: {filename}')


  def export(self):
    if self.parallel:
      Parallel(n_jobs=-1)([delayed(self._loop_export_procedure)(dt) for dt in self._from_to_dts()])
    else:
      [self._loop_export_procedure(dt) for dt in self._from_to_dts()]


def main_class(df):
  td = TickData(
    from_dt=datetime(2022, 8, 1, 8, 30, 0),
    until_dt=datetime(2022, 8, 1, 10, 30, 0),
    df=df,
    output_base_dir='./figures'
  )
  td.export()

def main():
    main_hourly()

DEBUG = True
main_class(df)


# 

# In[ ]:


def plotly_test(df):
    plotly = make_subplots(
        shared_xaxes=True,
        subplot_titles=('OHLC', 'Volume'),
        specs=[[{"secondary_y": True}]],
    ) 
    plotly.add_trace(
        go.Candlestick(
            x=df.index, open=df.open, high=df.high, low=df.low, close=df.close, name='OHLC'
        ),
        secondary_y=True
    )
    plotly.add_trace(
        go.Bar(x=df.index, y=df.volume, showlegend=False),
        secondary_y=False,
    )
    plotly.update_yaxes(type='log')
    plotly.show()
plotly_test(convert_into_ohlcv(df['2022-08-01'], '2S'))


# In[ ]:


plot_mpf = False
if plot_mpf:
  mpf.plot(ohlcv_H, figratio=(12,4), type='candle', style="yahoo", volume=True)


# In[ ]:


get_ipython().system('python --version')

(C) 2020 dogwood008 禁無断転載 不許複製 Reprinting, reproducing are prohibited.