时间序列预测模型
- Holt-Winters季节性预测模型
- SARIMA模型
- prophet模型
## 自动搜索合适的参数的ARIMA模型
df = pd.read_csv("./timeserise.csv")
## 数据准备
## 对序列X1进行切分,后面的24个数据用于测试集
train = pd.DataFrame(df["X1"][0:120])
test = pd.DataFrame(df["X1"][120:])
## 数据准备
y_hat_avg = test.copy(deep = False)
## 模型构建
model1 = ExponentialSmoothing(train["X1"].values,
seasonal_periods=12, # 周期性为12
trend="add", seasonal="add").fit()
y_hat_avg["holt_winter_forecast1"] = model1.forecast(len(test))
## 可视化出预测结果
plt.figure(figsize=(14,7))
train["X1"].plot(figsize=(14,7),label = "X1 train")
test["X1"].plot(label = "X1 test")
y_hat_avg["holt_winter_forecast1"].plot(style="g--o", lw=2,
label="Holt-Winters")
plt.legend()
plt.grid()
plt.title("Holt-Winters季节性预测模型")
plt.show()
## 计算预测结果和真实值的误差
print("Holt-Winters季节性预测模型,预测绝对值误差:",
mean_absolute_error(test["X1"],y_hat_avg["holt_winter_forecast1"]))
plt.savefig('Holt-Winters季节性预测模型.png',dpi=300)
Holt-Winters季节性预测模型,预测绝对值误差: 31.07380001125715
2. SARIMA模型
model = pm.auto_arima(train["X1"].values,
start_p=1, start_q=1, # p,q的开始值
max_p=12, max_q=12, # 最大的p和q
test="kpss", # 使用kpss检验确定d
d = None, # 自动选择合适的d
m=12, # 序列的周期
seasonal=True, # 有季节性趋势
start_P = 0,start_Q = 0, # P,Q的开始值
max_P=5, max_Q=5, # 最大的P和Q
D = None, # 自动选择合适的D
trace=True,error_action='ignore',
suppress_warnings=True, stepwise=True)
print(model.summary())
sns.set(font= "Kaiti",style="ticks",font_scale=1.4)
## 可视化自动搜索参数获得的SARIMA(2, 0, 0)x(0, 1, 0, 12)对测试集进行预测
pre, conf = model.predict(n_periods=24, alpha=0.05,
return_conf_int=True)
## 可视化SARIMAX(2, 0, 0)x(0, 1, 0, 12)的预测结果,整理数据
y_hat = test.copy(deep = False)
y_hat["sarima_pre"] = pre
y_hat["sarima_pre_lower"] = conf[:,0]
y_hat["sarima_pre_upper"] = conf[:,1]
## 可视化出预测结果
plt.figure(figsize=(14,7))
train["X1"].plot(figsize=(14,7),label = "X1 train")
test["X1"].plot(label = "X1 test")
y_hat["sarima_pre"].plot(style="g--o", lw=2,label="SARIMA")
## 可视化出置信区间
plt.fill_between(y_hat.index, y_hat["sarima_pre_lower"],
y_hat["sarima_pre_upper"],color='k',alpha=.15,
label = "95%置信区间")
plt.legend()
plt.grid()
plt.title("SARIMA(2,0,0)x(0,1,0,12)模型")
plt.show()
# 计算预测结果和真实值的误差
print("SARIMA模型预测的绝对值误差:",
mean_absolute_error(test["X1"],y_hat["sarima_pre"]))
plt.savefig('SARIMA模型预测模型.png',dpi=300)
## 可以发现SARIMA模型很好的预测了算法的变化趋势,但是预测值比真实值较小,预测效果也很不错
SARIMA模型预测的绝对值误差: 43.46489486337553
3. prophet模型
model = Prophet(growth = "linear", # 线性增长趋势
yearly_seasonality = True, # 年周期的趋势
weekly_seasonality = False,# 以周为周期的趋势
daily_seasonality = False, # 以天为周期的趋势
seasonality_mode = "multiplicative", # 季节周期性模式
seasonality_prior_scale = 12, # 季节周期性长度
)
model.fit(train)
## 使用模型对测试集进行预测
forecast = model.predict(test)
## 输出部分预测结果
print(forecast[['ds', 'yhat', 'yhat_lower', 'yhat_upper']].head())
print("在测试集上绝对值预测误差为:",mean_absolute_error(test.y,forecast.yhat))
## 可视化原始数据和预测数据进行对比
fig, ax = plt.subplots()
train.plot(x = "ds",y = "y",figsize=(14,7),label="训练数据",ax = ax)
test.plot(x = "ds",y = "y",figsize=(14,7),label="测试数据",ax = ax)
forecast.plot(x = "ds",y = "yhat",style = "g--o",label="预测数据",ax = ax)
## 可视化出置信区间
ax.fill_between(test["ds"].values, forecast["yhat_lower"],
forecast["yhat_upper"],color='k',alpha=.2,
label = "95%置信区间")
plt.grid()
plt.xlabel("时间")
plt.ylabel("数值")
plt.title("Prophet模型")
plt.legend(loc=2)
plt.show()
## 从可视化结果中可发现模型的预测效果很好
绝对值预测误差为: 25.156494995278933
,