第4章移動平均プロセスのモデル化

移動平均(MA)プロセスを定義する¶

In [115]:

Copied!





import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from statsmodels.tsa.stattools import adfuller
from statsmodels.graphics.tsaplots import plot_acf
from statsmodels.tsa.statespace.sarimax import SARIMAX
from sklearn.metrics import mean_squared_error, mean_absolute_error

In [51]:

Copied!

url = 'https://raw.githubusercontent.com/marcopeix/TimeSeriesForecastingInPython/master/data/widget_sales.csv'
df = pd.read_csv(url)
df.index = pd.date_range('2019-01-01', periods=500, name='Time')
url = 'https://raw.githubusercontent.com/marcopeix/TimeSeriesForecastingInPython/master/data/widget_sales.csv'
df = pd.read_csv(url)
df.index = pd.date_range('2019-01-01', periods=500, name='Time')

In [60]:

Copied!





ADF_result = adfuller(df['widget_sales'])
print('製品売上高ADF検定')
print(f'ADF Statistic: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

df_diff = df.diff().dropna()
ADF_result = adfuller(df_diff['widget_sales'])
print('製品売上高(1次差分) ADF検定')
print(f'ADF Statistic: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

fig, axes = plt.subplots(1, 2, figsize=[12, 4])
df['widget_sales'].plot(ax=axes[0])
df_diff['widget_sales'].plot(ax=axes[1])
axes[0].set_ylabel('Widget sales (k$)')
axes[1].set_ylabel('Widget sales - diff (k$)')
plt.show()
ADF_result = adfuller(df['widget_sales'])
print('製品売上高ADF検定')
print(f'ADF Statistic: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

df_diff = df.diff().dropna()
ADF_result = adfuller(df_diff['widget_sales'])
print('製品売上高(1次差分) ADF検定')
print(f'ADF Statistic: {ADF_result[0]:.3f}')
print(f'p-value: {ADF_result[1]:.3f}')

fig, axes = plt.subplots(1, 2, figsize=[12, 4])
df['widget_sales'].plot(ax=axes[0])
df_diff['widget_sales'].plot(ax=axes[1])
axes[0].set_ylabel('Widget sales (k$)')
axes[1].set_ylabel('Widget sales - diff (k$)')
plt.show()

製品売上高ADF検定
ADF Statistic: -1.512
p-value: 0.527
製品売上高(1次差分) ADF検定
ADF Statistic: -10.577
p-value: 0.000

No description has been provided for this image

In [53]:

Copied!

plot_acf(df_diff, auto_ylims=True, lags=30)
plt.show()
plot_acf(df_diff, auto_ylims=True, lags=30)
plt.show()

MAプロセスを予測する¶

In [79]:

Copied!





train = df_diff[:int(0.9 * len(df_diff))]
test = df_diff[int(0.9 * len(df_diff)):]

fig, axes = plt.subplots(2, 1, figsize=[8, 8], sharex=True)
df['widget_sales'].plot(ax=axes[0])
df_diff['widget_sales'].plot(ax=axes[1])
axes[0].set_ylabel('Widget sales (k$)')
axes[1].set_ylabel('Widget sales - diff (k$)')
for i in range(2):
    axes[i].axvspan(test.index[0], test.index[-1], color='tab:orange', alpha=0.5)
train = df_diff[:int(0.9 * len(df_diff))]
test = df_diff[int(0.9 * len(df_diff)):]

fig, axes = plt.subplots(2, 1, figsize=[8, 8], sharex=True)
df['widget_sales'].plot(ax=axes[0])
df_diff['widget_sales'].plot(ax=axes[1])
axes[0].set_ylabel('Widget sales (k$)')
axes[1].set_ylabel('Widget sales - diff (k$)')
for i in range(2):
    axes[i].axvspan(test.index[0], test.index[-1], color='tab:orange', alpha=0.5)

In [101]:

Copied!





def rolling_forecast(df: pd.DataFrame, train_len: int, horizon: int, window :int, method: str) -> list:
    total_len = train_len + horizon
    preds = []
    if method == 'mean':
        for i in range(train_len, total_len, window):
            mean = np.mean(df[:i].values)
            preds.extend(mean for _ in range(window))
    
    elif method == 'last':
        for i in range(train_len, total_len, window):
            last_val = df.iloc[i-1, 0]
            preds.extend(last_val for _ in range(window))
    
    elif method == 'MA':
        for i in range(train_len, total_len, window):
            model = SARIMAX(df[:i], order=(0, 0, 2))
            res = model.fit(disp=False)
            pred = res.get_prediction(0, i + window - 1).predicted_mean.iloc[-window:]
            preds.extend(pred)
    else:
        raise ValueError(f"Invalid method: {method}, choose from ['mean', 'last', 'MA']")
    return preds
def rolling_forecast(df: pd.DataFrame, train_len: int, horizon: int, window :int, method: str) -> list:
    total_len = train_len + horizon
    preds = []
    if method == 'mean':
        for i in range(train_len, total_len, window):
            mean = np.mean(df[:i].values)
            preds.extend(mean for _ in range(window))
    
    elif method == 'last':
        for i in range(train_len, total_len, window):
            last_val = df.iloc[i-1, 0]
            preds.extend(last_val for _ in range(window))
    
    elif method == 'MA':
        for i in range(train_len, total_len, window):
            model = SARIMAX(df[:i], order=(0, 0, 2))
            res = model.fit(disp=False)
            pred = res.get_prediction(0, i + window - 1).predicted_mean.iloc[-window:]
            preds.extend(pred)
    else:
        raise ValueError(f"Invalid method: {method}, choose from ['mean', 'last', 'MA']")
    return preds

In [102]:

Copied!





TRAIN_LEN = len(train)
HORIZON = len(test)
WINDOW = 2

methods = ['mean', 'last', 'MA']
for method in methods:
    test[method] = rolling_forecast(df_diff, TRAIN_LEN, HORIZON, WINDOW, method)
TRAIN_LEN = len(train)
HORIZON = len(test)
WINDOW = 2

methods = ['mean', 'last', 'MA']
for method in methods:
    test[method] = rolling_forecast(df_diff, TRAIN_LEN, HORIZON, WINDOW, method)

/tmp/ipykernel_61933/3131777319.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test[method] = rolling_forecast(df_diff, TRAIN_LEN, HORIZON, WINDOW, method)
/tmp/ipykernel_61933/3131777319.py:7: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  test[method] = rolling_forecast(df_diff, TRAIN_LEN, HORIZON, WINDOW, method)

In [108]:

Copied!





fig, ax = plt.subplots()
df_diff['widget_sales'].iloc[-70:].plot(ax=ax, label='actual')
ax.axvspan(test.index[0], test.index[-1], color='tab:orange', alpha=0.5)
ax.set_ylabel('Widget sales - diff (k$)')
for method in methods:
    test[method].plot(ax=ax, label=method, ls='dashed')
ax.legend()
plt.show()
fig, ax = plt.subplots()
df_diff['widget_sales'].iloc[-70:].plot(ax=ax, label='actual')
ax.axvspan(test.index[0], test.index[-1], color='tab:orange', alpha=0.5)
ax.set_ylabel('Widget sales - diff (k$)')
for method in methods:
    test[method].plot(ax=ax, label=method, ls='dashed')
ax.legend()
plt.show()

In [107]:

Copied!





plt.xlabel('Methods')
plt.ylabel('MSE')
plt.bar(
    methods,
    [mean_squared_error(test['widget_sales'], test[method]) for method in methods]
)
plt.show()
plt.xlabel('Methods')
plt.ylabel('MSE')
plt.bar(
    methods,
    [mean_squared_error(test['widget_sales'], test[method]) for method in methods]
)
plt.show()

In [119]:

Copied!





df['pred_MA'] = pd.Series()
df['pred_MA'].iloc[450:] = df['widget_sales'].iloc[450] + test['MA'].cumsum()

# plot
fig, ax = plt.subplots()
df['widget_sales'].iloc[-90:].plot(ax=ax, label='actual')
ax.axvspan(test.index[0], test.index[-1], color='tab:orange', alpha=0.5)
ax.set_ylabel('Widget sales (k$)')
df['pred_MA'].iloc[-90:].plot(ax=ax, label=method, ls='dashed')
ax.legend()
plt.show()
df['pred_MA'] = pd.Series()
df['pred_MA'].iloc[450:] = df['widget_sales'].iloc[450] + test['MA'].cumsum()

# plot
fig, ax = plt.subplots()
df['widget_sales'].iloc[-90:].plot(ax=ax, label='actual')
ax.axvspan(test.index[0], test.index[-1], color='tab:orange', alpha=0.5)
ax.set_ylabel('Widget sales (k$)')
df['pred_MA'].iloc[-90:].plot(ax=ax, label=method, ls='dashed')
ax.legend()
plt.show()

/tmp/ipykernel_61933/3235568680.py:2: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['pred_MA'].iloc[450:] = df['widget_sales'].iloc[450] + test['MA'].cumsum()

In [121]:

Copied!

# calculate MAE
print(f"MAE: {mean_absolute_error(df['widget_sales'].iloc[450:], df['pred_MA'].iloc[450:]):.3f}")
# calculate MAE
print(f"MAE: {mean_absolute_error(df['widget_sales'].iloc[450:], df['pred_MA'].iloc[450:]):.3f}")

MAE: 2.324

In [ ]:

第4章 移動平均プロセスのモデル化

移動平均(MA)プロセスを定義する¶

MAプロセスを予測する¶

第4章移動平均プロセスのモデル化