第12章時系列予測のためのディープラーニング

In [32]:

Copied!





import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as dt
from sklearn.preprocessing import MinMaxScaler

時系列予測にディープラーニングを使う状況¶

さまざまな種類のディープラーニングモデルを調べる¶

ディープラーニングを使って予測を行うための準備¶

In [9]:

Copied!

url = 'https://raw.githubusercontent.com/marcopeix/TimeSeriesForecastingInPython/master/data/metro_interstate_traffic_volume_preprocessed.csv'
df = pd.read_csv(url, index_col='date_time', parse_dates=True)
print(df.columns)
url = 'https://raw.githubusercontent.com/marcopeix/TimeSeriesForecastingInPython/master/data/metro_interstate_traffic_volume_preprocessed.csv'
df = pd.read_csv(url, index_col='date_time', parse_dates=True)
print(df.columns)

Index(['temp', 'rain_1h', 'snow_1h', 'clouds_all', 'traffic_volume'], dtype='object')

都市圏の州間高速道路交通量データセットの特徴量

特徴量	説明
`date_time`	CSTタイムゾーンで記録されたデータの日付と時刻
`temp`	1時間に記録された平均気温(単位はケルビン)
`rain_1h`	1時間に降った雨の量(単位はミリメートル)
`snow_1h`	1時間に降った雪の量(単位はミリメートル)
`clouds_all`	1時間の雲の量の割合
`traffic_volume`	1時間に報告された州間高速道路94号線の西行き車線の交通量

In [21]:

Copied!





fig, axes = plt.subplots(3, 1)
# traffic volume
df['traffic_volume'].iloc[:400].plot(ax=axes[0])
axes[0].set_ylabel('Traffic Volume')
# temp: yearly
df['temp'].plot(ax=axes[1])
axes[1].set_ylabel('Temperature (K)')
# temp: daily
df['temp'].iloc[:400].plot(ax=axes[2])
axes[2].set_ylabel('Temperature (K)')
plt.tight_layout()
fig, axes = plt.subplots(3, 1)
# traffic volume
df['traffic_volume'].iloc[:400].plot(ax=axes[0])
axes[0].set_ylabel('Traffic Volume')
# temp: yearly
df['temp'].plot(ax=axes[1])
axes[1].set_ylabel('Temperature (K)')
# temp: daily
df['temp'].iloc[:400].plot(ax=axes[2])
axes[2].set_ylabel('Temperature (K)')
plt.tight_layout()

No description has been provided for this image

In [28]:

Copied!





# rain_1h, snow_1hの取る値がほとんどゼロであることがわかる
# これは特徴量として効かないので落とす
display(df.describe().transpose())
df.drop(columns=['rain_1h', 'snow_1h'], inplace=True)
# rain_1h, snow_1hの取る値がほとんどゼロであることがわかる
# これは特徴量として効かないので落とす
display(df.describe().transpose())
df.drop(columns=['rain_1h', 'snow_1h'], inplace=True)

	count	mean	std	min	25%	50%	75%	max
temp	17551.0	281.416203	12.688262	243.39	272.22	282.41	291.89	310.07
rain_1h	17551.0	0.025523	0.259794	0.00	0.00	0.00	0.00	10.60
snow_1h	17551.0	0.000000	0.000000	0.00	0.00	0.00	0.00	0.00
clouds_all	17551.0	42.034129	39.065960	0.00	1.00	40.00	90.00	100.00
traffic_volume	17551.0	3321.484588	1969.223949	113.00	1298.00	3518.00	4943.00	7280.00

In [35]:

Copied!





# 日付をtimestamp表示にしてcos,sinに変換する
seconds_per_day = 24 * 60 * 60
df['day_sin'] = df.index.map(dt.datetime.timestamp).map(lambda x: np.sin(x * 2 * np.pi / seconds_per_day)).values
df['day_cos'] = df.index.map(dt.datetime.timestamp).map(lambda x: np.cos(x * 2 * np.pi / seconds_per_day)).values
# 日付をtimestamp表示にしてcos,sinに変換する
seconds_per_day = 24 * 60 * 60
df['day_sin'] = df.index.map(dt.datetime.timestamp).map(lambda x: np.sin(x * 2 * np.pi / seconds_per_day)).values
df['day_cos'] = df.index.map(dt.datetime.timestamp).map(lambda x: np.cos(x * 2 * np.pi / seconds_per_day)).values

In [40]:

Copied!

# 日時を円周上等間隔に並べることが出来た
df.sample(100).plot.scatter('day_cos', 'day_sin').set_aspect('equal')
# 日時を円周上等間隔に並べることが出来た
df.sample(100).plot.scatter('day_cos', 'day_sin').set_aspect('equal')

In [42]:

Copied!





# train, validation, testを7:2:1で分割
n = len(df)

df_train = df[:int(0.7 * n)]
df_val = df[int(0.7 * n):int(0.9 * n)]
df_test = df[int(0.9 * n):]
# train, validation, testを7:2:1で分割
n = len(df)

df_train = df[:int(0.7 * n)]
df_val = df[int(0.7 * n):int(0.9 * n)]
df_test = df[int(0.9 * n):]

In [54]:

Copied!





scaler = MinMaxScaler()
scaler.fit(df_train) # 訓練データセットのみで適合

df_train[df_train.columns] = scaler.transform(df_train)
df_val[df_val.columns] = scaler.transform(df_val)
df_test[df_test.columns] = scaler.transform(df_test)
scaler = MinMaxScaler()
scaler.fit(df_train) # 訓練データセットのみで適合

df_train[df_train.columns] = scaler.transform(df_train)
df_val[df_val.columns] = scaler.transform(df_val)
df_test[df_test.columns] = scaler.transform(df_test)

/tmp/ipykernel_9678/4046014528.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_train[df_train.columns] = scaler.transform(df_train)
/tmp/ipykernel_9678/4046014528.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_val[df_val.columns] = scaler.transform(df_val)
/tmp/ipykernel_9678/4046014528.py:6: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_test[df_test.columns] = scaler.transform(df_test)

In [55]:

Copied!





# ここではgithubのデータを有難く利用させてもらうので保存は行わない
# df_train.to_csv('train.csv')
# df_val.to_csv('val.csv')
# df_test.to_csv('test.csv')
# ここではgithubのデータを有難く利用させてもらうので保存は行わない
# df_train.to_csv('train.csv')
# df_val.to_csv('val.csv')
# df_test.to_csv('test.csv')

In [ ]:

第12章 時系列予測のためのディープラーニング

時系列予測にディープラーニングを使う状況¶

さまざまな種類のディープラーニングモデルを調べる¶

ディープラーニングを使って予測を行うための準備¶

第12章時系列予測のためのディープラーニング