第3章ランダムウォーク

In [14]:

Copied!





import yfinance as yf
import pandas_datareader.data as web
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd

yf.pdr_override()
import yfinance as yf
import pandas_datareader.data as web
import numpy as np
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import pandas as pd

yf.pdr_override()

In [6]:

Copied!

ser_google = web.get_data_yahoo(tickers='GOOGL', start='2020-04-27', end='2021-04-27')['Close']
ser_google.plot()
ser_google = web.get_data_yahoo(tickers='GOOGL', start='2020-04-27', end='2021-04-27')['Close']
ser_google.plot()

[*********************100%***********************]  1 of 1 completed

Out[6]:

<Axes: xlabel='Date'>

No description has been provided for this image

In [7]:

Copied!





stationary = np.empty(400)
stationary[0] = 0
non_stationary = np.empty(400)
non_stationary[0] = 10
np.random.seed(42)
steps = np.random.standard_normal(400)

for i in range(1, 400):
    stationary[i] = 0.5 * stationary[i-1] + steps[i]
    non_stationary[i] = non_stationary[i-1] + steps[i]

mean_stationary = [stationary[:i].mean() for i in range(1, 400)]
mean_non_stationary = [non_stationary[:i].mean() for i in range(1, 400)]

var_stationary = [stationary[:i].var() for i in range(1, 400)]
var_non_stationary = [non_stationary[:i].var() for i in range(1, 400)]

fig, axes = plt.subplots(1, 3, figsize=[15, 4])

axes[0].set_xlabel('Timestep')
axes[0].set_ylabel('Value')
axes[0].plot(stationary, label='stationary')
axes[0].plot(non_stationary, label='non-stationary', ls='dashed')
axes[0].legend()

axes[1].set_xlabel('Timestep')
axes[1].set_ylabel('Mean')
axes[1].plot(mean_stationary, label='stationary')
axes[1].plot(mean_non_stationary, label='non-stationary', ls='dashed')
axes[1].legend()

axes[2].set_xlabel('Timestep')
axes[2].set_ylabel('Variance')
axes[2].plot(var_stationary, label='stationary')
axes[2].plot(var_non_stationary, label='non-stationary', ls='dashed')
axes[2].legend()
stationary = np.empty(400)
stationary[0] = 0
non_stationary = np.empty(400)
non_stationary[0] = 10
np.random.seed(42)
steps = np.random.standard_normal(400)

for i in range(1, 400):
    stationary[i] = 0.5 * stationary[i-1] + steps[i]
    non_stationary[i] = non_stationary[i-1] + steps[i]

mean_stationary = [stationary[:i].mean() for i in range(1, 400)]
mean_non_stationary = [non_stationary[:i].mean() for i in range(1, 400)]

var_stationary = [stationary[:i].var() for i in range(1, 400)]
var_non_stationary = [non_stationary[:i].var() for i in range(1, 400)]

fig, axes = plt.subplots(1, 3, figsize=[15, 4])

axes[0].set_xlabel('Timestep')
axes[0].set_ylabel('Value')
axes[0].plot(stationary, label='stationary')
axes[0].plot(non_stationary, label='non-stationary', ls='dashed')
axes[0].legend()

axes[1].set_xlabel('Timestep')
axes[1].set_ylabel('Mean')
axes[1].plot(mean_stationary, label='stationary')
axes[1].plot(mean_non_stationary, label='non-stationary', ls='dashed')
axes[1].legend()

axes[2].set_xlabel('Timestep')
axes[2].set_ylabel('Variance')
axes[2].plot(var_stationary, label='stationary')
axes[2].plot(var_non_stationary, label='non-stationary', ls='dashed')
axes[2].legend()

Out[7]:

<matplotlib.legend.Legend at 0x7fcfcd0d67a0>

In [8]:

Copied!





np.random.seed(42)
steps = np.random.standard_normal(1000)
steps[0] = 0
random_walk = steps.cumsum()

ADF_result = adfuller(random_walk)
print(f'ADF Statics: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

fig, axes = plt.subplots(1, 2, figsize=[12, 4])
axes[0].plot(random_walk)
axes[0].set_xlabel('Timestep')
axes[0].set_ylabel('Value')
plot_acf(random_walk, ax=axes[1], lags=20, auto_ylims=True)
plt.show()
np.random.seed(42)
steps = np.random.standard_normal(1000)
steps[0] = 0
random_walk = steps.cumsum()

ADF_result = adfuller(random_walk)
print(f'ADF Statics: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

fig, axes = plt.subplots(1, 2, figsize=[12, 4])
axes[0].plot(random_walk)
axes[0].set_xlabel('Timestep')
axes[0].set_ylabel('Value')
plot_acf(random_walk, ax=axes[1], lags=20, auto_ylims=True)
plt.show()

ADF Statics: -0.966
p-value: 0.765

In [9]:

Copied!





diff_random_walk = np.diff(random_walk)
ADF_result = adfuller(diff_random_walk)
print(f'ADF Statics: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

fig, axes = plt.subplots(1, 2, figsize=[12, 4])
axes[0].plot(diff_random_walk)
axes[0].set_xlabel('Timestep')
axes[0].set_ylabel('Value')
plot_acf(diff_random_walk, ax=axes[1], lags=20, auto_ylims=True)
plt.show()
diff_random_walk = np.diff(random_walk)
ADF_result = adfuller(diff_random_walk)
print(f'ADF Statics: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

fig, axes = plt.subplots(1, 2, figsize=[12, 4])
axes[0].plot(diff_random_walk)
axes[0].set_xlabel('Timestep')
axes[0].set_ylabel('Value')
plot_acf(diff_random_walk, ax=axes[1], lags=20, auto_ylims=True)
plt.show()

ADF Statics: -31.789
p-value: 0.000

In [10]:

Copied!

ADF_google_result = adfuller(ser_google)
print(f'ADF Statics: {ADF_google_result[0]:.3f}')
print(f'p-value: {ADF_google_result[1]:.3f}')
ADF_google_result = adfuller(ser_google)
print(f'ADF Statics: {ADF_google_result[0]:.3f}')
print(f'p-value: {ADF_google_result[1]:.3f}')

ADF Statics: 0.318
p-value: 0.978

In [11]:

Copied!





ser_google_diff = ser_google.diff().dropna()
ADF_google_result = adfuller(ser_google_diff)
print(f'ADF Statics: {ADF_google_result[0]:.3f}')
print(f'p-value: {ADF_google_result[1]:.3f}')

plot_acf(ser_google_diff, lags=20, auto_ylims=True)
plt.show()
ser_google_diff = ser_google.diff().dropna()
ADF_google_result = adfuller(ser_google_diff)
print(f'ADF Statics: {ADF_google_result[0]:.3f}')
print(f'p-value: {ADF_google_result[1]:.3f}')

plot_acf(ser_google_diff, lags=20, auto_ylims=True)
plt.show()

ADF Statics: -5.263
p-value: 0.000

In [15]:

Copied!

df = pd.DataFrame({'value': random_walk})
train = df[:800]
test = df[800:]
df = pd.DataFrame({'value': random_walk})
train = df[:800]
test = df[800:]

In [16]:

Copied!





test.loc[:, 'pred_mean'] = train.mean().value
test.loc[:, 'pred_last'] = train.iloc[-1].value
drift = (train['value'].iloc[-1] - train['value'].iloc[0]) / (train.index[-1] - train.index[0])
test.loc[:, 'pred_drift'] = np.arange(801, 1001) * drift
test.loc[:, 'pred_shift'] = df['value'].shift()[800:]

plt.xlabel('Timestep')
plt.ylabel('Value')
plt.plot(train.index, train['value'], label='Train')
plt.plot(test.index, test['value'], label='Test')
plt.plot(test.index, test['pred_mean'], ls='dashed', label='Mean')
plt.plot(test.index, test['pred_last'], ls='dashed', label='Last Value')
plt.plot(test.index, test['pred_drift'], ls='dashed', label='Drift')
plt.plot(test.index, test['pred_shift'], ls='dashed', label='Shift')
plt.axvspan(test.index[0], test.index[-1], alpha=0.2, color='gray')

plt.legend()
test.loc[:, 'pred_mean'] = train.mean().value
test.loc[:, 'pred_last'] = train.iloc[-1].value
drift = (train['value'].iloc[-1] - train['value'].iloc[0]) / (train.index[-1] - train.index[0])
test.loc[:, 'pred_drift'] = np.arange(801, 1001) * drift
test.loc[:, 'pred_shift'] = df['value'].shift()[800:]

plt.xlabel('Timestep')
plt.ylabel('Value')
plt.plot(train.index, train['value'], label='Train')
plt.plot(test.index, test['value'], label='Test')
plt.plot(test.index, test['pred_mean'], ls='dashed', label='Mean')
plt.plot(test.index, test['pred_last'], ls='dashed', label='Last Value')
plt.plot(test.index, test['pred_drift'], ls='dashed', label='Drift')
plt.plot(test.index, test['pred_shift'], ls='dashed', label='Shift')
plt.axvspan(test.index[0], test.index[-1], alpha=0.2, color='gray')

plt.legend()

/tmp/ipykernel_13890/1854262779.py:1: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.loc[:, 'pred_mean'] = train.mean().value
/tmp/ipykernel_13890/1854262779.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.loc[:, 'pred_last'] = train.iloc[-1].value
/tmp/ipykernel_13890/1854262779.py:4: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.loc[:, 'pred_drift'] = np.arange(801, 1001) * drift
/tmp/ipykernel_13890/1854262779.py:5: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test.loc[:, 'pred_shift'] = df['value'].shift()[800:]

Out[16]:

<matplotlib.legend.Legend at 0x7fcfcbc89720>

In [17]:

Copied!





xs = ['pred_mean', 'pred_last', 'pred_drift', 'pred_shift']
ys = [mean_squared_error(test['value'], test[x]) for x in xs]

plt.bar(xs, ys)
plt.xlabel('Methods')
plt.ylabel('MSE')
xs = ['pred_mean', 'pred_last', 'pred_drift', 'pred_shift']
ys = [mean_squared_error(test['value'], test[x]) for x in xs]

plt.bar(xs, ys)
plt.xlabel('Methods')
plt.ylabel('MSE')

Out[17]:

Text(0, 0.5, 'MSE')

In [ ]:

第3章 ランダムウォーク

第3章ランダムウォーク