Univariate TS Models

Note

For TAs:
1. The differencing and ADF test have already been done in the EDA section. Here, I directly check the ACF and PACF plots of the stationary series.
2. I combined the ARIMA and SARIMA sections. By checking the ACF and PACF plots, I can determine whether a seasonal component exists, select appropriate orders of terms, and choose the optimal model through model diagnostics.

Here, we will apply the ARIMA or SARIMA model to our 10 time series datasets in order to make forecasts. Below is a brief introduction to both models, including the parameters p, d, q, P, D, and Q.

ARIMA Model:
ARIMA (AutoRegressive Integrated Moving Average) is used for non-seasonal time series data that exhibit patterns over time. The parameters are:
- p: The order of the autoregressive (AR) term.
- d: The degree of differencing to make the series stationary.
- q: The order of the moving average (MA) term.

SARIMA Model:
SARIMA (Seasonal ARIMA) is an extension of the ARIMA model that deals with seasonality in time series data. The additional parameters for seasonal components are:
- P: The order of the seasonal autoregressive (AR) term.
- D: The degree of seasonal differencing.
- Q: The order of the seasonal moving average (MA) term.
- s: The number of periods in each season.

Data and Model Selection:
In the EDA phase, we performed differencing and seasonal differencing to transform the original series into stationary series. The stationarity of the differenced series was validated using the Augmented Dickey-Fuller (ADF) test. Therefore, we can now decide whether to apply the ARIMA or SARIMA model and select the appropriate values for d or D.

After performing the necessary differencing or seasonal differencing, all series have become stationary. We will now present the ACF and PACF plots of these differenced series to determine the optimal values for p (AR model), q (MA model), P (seasonal AR), and Q (seasonal MA).

ACF and PACF Plots

The following displays the ACF and PACF plots for the stationary series after differencing.

Code
library(ggplot2)
library(tidyverse)
library(plotly)
library(quantmod)
library(forecast)
library(astsa)

# Load data
invisible(getSymbols("DX-Y.NYB", src = "yahoo", from = "2005-01-01", to = "2024-12-31"))
dxy <- data.frame(Date = index(`DX-Y.NYB`), 
                       Open = `DX-Y.NYB`[, "DX-Y.NYB.Open"], 
                       High = `DX-Y.NYB`[, "DX-Y.NYB.High"], 
                       Low = `DX-Y.NYB`[, "DX-Y.NYB.Low"], 
                       Close = `DX-Y.NYB`[, "DX-Y.NYB.Close"])
colnames(dxy) <- c("Date", "Open", "High", "Low", "Close")
dxy <- na.omit(dxy)

bea <- read.csv("./data/bea.csv")
bea$time <- as.Date(bea$time)

gdp <- read.csv("./data/gdp.csv")
gdp$time <- as.Date(gdp$time)
gdp$total <- gdp$consumption + gdp$investment + gdp$net_export + gdp$government

data_unem <- read.csv("./data/unem.csv", header=TRUE)
data_unem$time <- as.Date(data_unem$time)

data_cpi <- read.csv("./data/cpi.csv", header=TRUE)
data_cpi$time <- as.Date(data_cpi$time)

invisible(getSymbols("^GSPC", src = "yahoo", from = "2005-01-01", to = "2024-12-31"))
data <- data.frame(Date = index(GSPC), 
                       Open = GSPC[, "GSPC.Open"], 
                       High = GSPC[, "GSPC.High"], 
                       Low = GSPC[, "GSPC.Low"], 
                       Close = GSPC[, "GSPC.Close"])
colnames(data) <- c("Date", "Open", "High", "Low", "Close")

xau <- read.csv("./data/xau.csv")
xau$Date <- as.Date(xau$Date)

gsci <- read.csv("./data/gsci.csv")
gsci$Date <- as.Date(gsci$Date)

house <- read.csv("./data/house.csv", header=TRUE)
house$time <- as.Date(house$time)

visitors <- read.csv("./data/visitors.csv", header=TRUE)
visitors$time <- as.Date(visitors$time)

# time series
dxy_ts <- ts(log(dxy$Close), start=c(2005,1), frequency=252)
balance_ts <- ts(bea$balance, start=c(2005,1), end=c(2024,3), frequency=4)
gdp_ts <- ts(gdp$total, start=c(2005,1), end=c(2024,3), frequency=4)
unem_ts <- ts(log(data_unem$unem), start=c(2005,1), end=c(2023,12), frequency=12)
cpi_ts <- ts(data_cpi$cpi, start=c(2005,1), end=c(2023,12), frequency=12)
sp5_ts <- ts(GSPC$GSPC.Close, start=c(2005,1), frequency=252)
xau_ts <- ts(log(xau$Price), start=c(2005,1), end=c(2024,52), frequency=52)
gsci_ts <- ts(log(gsci$Price), start=c(2014,252), frequency=252)
house_ts <- ts(house$index, start=c(2005,1), end=c(2024,4), frequency=4)
visitors_ts <- ts(log(visitors$count), start=c(2005,1), end=c(2024,12), frequency=12)
Code
library(gridExtra)

acf <- ggAcf(diff(dxy_ts), 50)+ggtitle("ACF Plot for USD Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(dxy_ts), 50)+ggtitle("PACF Plot for USD Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(diff(balance_ts, lag=4)))+ggtitle("ACF Plot for Trade Balance") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(diff(balance_ts, lag=4)))+ggtitle("PACF Plot for Trade Balance") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(gdp_ts))+ggtitle("ACF Plot for GDP") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(gdp_ts))+ggtitle("PACF Plot for GDP") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(unem_ts))+ggtitle("ACF Plot for Unemployment Rate") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(unem_ts))+ggtitle("PACF Plot for Unemployment Rate") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(diff(cpi_ts, lag=12)))+ggtitle("ACF Plot for CPI") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(diff(cpi_ts, lag=12)))+ggtitle("PACF Plot for CPI") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(sp5_ts), 50)+ggtitle("ACF Plot for S&P 500 Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(sp5_ts), 50)+ggtitle("PACF Plot for S&P 500 Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(xau_ts), 50)+ggtitle("ACF Plot for Gold Price") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(xau_ts), 50)+ggtitle("PACF Plot for Gold Price") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(gsci_ts), 50)+ggtitle("ACF Plot for S&P GSCI Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(gsci_ts), 50)+ggtitle("PACF Plot for S&P GSCI Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(house_ts))+ggtitle("ACF Plot for House Price Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(house_ts))+ggtitle("PACF Plot for House Price Index") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

Code
acf <- ggAcf(diff(diff(visitors_ts, lag=12)))+ggtitle("ACF Plot for Number of International Visitors") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
pacf <- ggPacf(diff(diff(visitors_ts, lag=12)))+ggtitle("PACF Plot for Number of International Visitors") + theme_bw()+
  geom_segment(lineend = "butt", color = "#5a3196") +
    geom_hline(yintercept = 0, color = "#5a3196") 
grid.arrange(acf, pacf, nrow=2)

The ACF plot helps determine the q parameter by identifying the number of significant lags in the moving average (MA) component. The PACF plot helps determine the p parameter by identifying the number of significant lags in the autoregressive (AR) component. For seasonal models, ACF and PACF can also be used to determine the seasonal parameters Q and P.

  • U.S. Dollar Index: ARIMA model, p=0, d=1, q=0.
  • Trade Balance: SARIMA model, p=1, d=1, q=1, P=1 or 2,D=1, Q=1, s=4.
  • GDP: ARIMA model, p=0, d=1, q=0.
  • Unemployment Rate: ARIMA model, p=0 or 1, d=1, q=0 or 1.
  • CPI: SARIMA model, p=1 or 2, d=1, q=1 or 2, P=1 or 2, D=1, Q=1 or 2, s=12.
  • S&P 500: ARIMA model, p=0 or 1 or 4, d=1, q=0:4.
  • Gold Price: ARIMA model, p=0:4, d=1, q=0:4.
  • Global Commodity Price: ARIMA model, p=0:4, d=1, q=0:4.
  • House Price: ARIMA model, p=1 or 3, d=1, q=0:4.
  • International Visitors: SARIMA model, p=1 or 2, d=1, q=1, P=1 or 2, D=1, Q=1, s=12.

Model Selection by Auto.arima()

Code
auto.arima(dxy_ts)
Series: dxy_ts 
ARIMA(1,1,0) 

Coefficients:
         ar1
      0.0146
s.e.  0.0141

sigma^2 = 2.27e-05:  log likelihood = 19776.68
AIC=-39549.37   AICc=-39549.37   BIC=-39536.32
Code
auto.arima(balance_ts)
Series: balance_ts 
ARIMA(1,0,1)(2,1,0)[4] 

Coefficients:
         ar1     ma1     sar1     sar2
      0.7910  0.4459  -0.6826  -0.2875
s.e.  0.0816  0.1216   0.1226   0.1259

sigma^2 = 240161663:  log likelihood = -829.66
AIC=1669.32   AICc=1670.19   BIC=1680.91
Code
auto.arima(gdp_ts)
Series: gdp_ts 
ARIMA(0,2,1) 

Coefficients:
          ma1
      -0.9210
s.e.   0.0442

sigma^2 = 115231:  log likelihood = -558.4
AIC=1120.8   AICc=1120.97   BIC=1125.49
Code
auto.arima(unem_ts)
Series: unem_ts 
ARIMA(0,1,1) 

Coefficients:
         ma1
      0.1553
s.e.  0.0722

sigma^2 = 0.007935:  log likelihood = 227.33
AIC=-450.66   AICc=-450.61   BIC=-443.81
Code
auto.arima(cpi_ts)
Series: cpi_ts 
ARIMA(2,2,3)(1,0,0)[12] 

Coefficients:
          ar1     ar2      ma1      ma2      ma3    sar1
      -0.0209  0.0458  -0.3448  -0.4849  -0.1413  0.2485
s.e.   0.7971  0.2766   0.7938   0.5391   0.2840  0.0702

sigma^2 = 0.6156:  log likelihood = -264.23
AIC=542.46   AICc=542.97   BIC=566.4
Code
auto.arima(sp5_ts)
Error in polyroot(c(1, testvec)) : root finding code failed
Series: sp5_ts 
ARIMA(5,2,0) 

Coefficients:
          ar1      ar2      ar3      ar4      ar5
      -0.8897  -0.6626  -0.4855  -0.3295  -0.1257
s.e.   0.0140   0.0183   0.0194   0.0183   0.0140

sigma^2 = 959.1:  log likelihood = -24403.24
AIC=48818.48   AICc=48818.49   BIC=48857.62
Code
auto.arima(xau_ts)
Series: xau_ts 
ARIMA(0,1,0) with drift 

Coefficients:
       drift
      0.0018
s.e.  0.0007

sigma^2 = 0.0005455:  log likelihood = 2429.69
AIC=-4855.39   AICc=-4855.38   BIC=-4845.5
Code
auto.arima(gsci_ts)
Series: gsci_ts 
ARIMA(0,1,0)(1,0,0)[252] with drift 

Coefficients:
        sar1  drift
      0.0073  1e-04
s.e.  0.0205  3e-04

sigma^2 = 0.000205:  log likelihood = 7168.46
AIC=-14330.92   AICc=-14330.91   BIC=-14313.41
Code
auto.arima(house_ts)
Series: house_ts 
ARIMA(2,2,2) 

Coefficients:
          ar1      ar2     ma1     ma2
      -0.1458  -0.8308  0.1323  0.2815
s.e.   0.0917   0.0863  0.1590  0.1457

sigma^2 = 21.7:  log likelihood = -229.36
AIC=468.73   AICc=469.56   BIC=480.51
Code
auto.arima(visitors_ts)
Series: visitors_ts 
ARIMA(2,0,2)(0,0,2)[12] with non-zero mean 

Coefficients:
         ar1     ar2     ma1     ma2    sma1    sma2     mean
      0.0223  0.7591  1.0862  0.1426  0.2468  0.1422  15.3529
s.e.  0.1352  0.1105  0.1549  0.0987  0.0662  0.0583   0.1815

sigma^2 = 0.04477:  log likelihood = 34.24
AIC=-52.48   AICc=-51.86   BIC=-24.63

Best model for each time series:

  • U.S. Dollar Index: ARIMA(1,1,0)
  • Trade Balance: SARIMA(1,0,1)(2,1,0)[4]
  • GDP: ARIMA(0,2,1)
  • Unemployment Rate: ARIMA(0,1,1)
  • CPI: SARIMA(2,2,3)(1,0,0)[12]
  • S&P 500: ARIMA(5,2,0)
  • Gold Price: ARIMA(0,1,0)
  • Global Commodity Price: SARIMA(0,1,0)(1,0,0)[252]
  • House Price: ARIMA(2,2,2)
  • International Visitors: SARIMA(2,0,2)(0,0,2)[12]

Model Diagnostics

ARIMA(0,1,0)

Code
model_output <- capture.output(sarima(dxy_ts, 0,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate    SE t.value p.value
constant    1e-04 1e-04  0.8257   0.409

sigma^2 estimated as 2.26922e-05 on 5034 degrees of freedom 
 
AIC = -7.854818  AICc = -7.854818  BIC = -7.852226 
 

ARIMA(1,1,0)

Code
model_output <- capture.output(sarima(dxy_ts, 1,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ar1        0.0145 0.0141  1.0260  0.3049
constant   0.0001 0.0001  0.8118  0.4169

sigma^2 estimated as 2.268745e-05 on 5033 degrees of freedom 
 
AIC = -7.85463  AICc = -7.854629  BIC = -7.850743 
 

For both ARIMA(0,1,0) and ARIMA(1,1,0), the model diagnostics results are very similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, suggesting that the residuals are nearly stationary with a constant mean and finite variance over time.

The Autocorrelation Function (ACF) of the residuals shows mostly independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

The Ljung-Box Test p-values are below the 0.05 significance level for lags greater than 10, implying that autocorrelations remain in the residuals.

Since the AR(1) term in the ARIMA(1,1,0) model is not significant at the 10% level, and the ARIMA(0,1,0) model has lower AIC, AICc, and BIC values, we decide to use ARIMA(0,1,0) as the optimal model. Moreover, the intercept term of the ARIMA(0,1,0) is insignificant.

SARIMA(0,1,1)x(0,1,1)[4]

Code
model_output <- capture.output(sarima(balance_ts, 0,1,1,0,1,1,4))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
     Estimate     SE t.value p.value
ma1    0.3844 0.1179  3.2618  0.0017
sma1  -1.0000 0.1739 -5.7491  0.0000

sigma^2 estimated as 186230777 on 72 degrees of freedom 
 
AIC = 22.12471  AICc = 22.127  BIC = 22.21812 
 

SARIMA(1,0,1)(2,1,0)[4]

Code
model_output <- capture.output(sarima(balance_ts, 1,0,1,2,1,0,4))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
          Estimate        SE t.value p.value
ar1         0.7876    0.0819  9.6172  0.0000
ma1         0.4463    0.1217  3.6686  0.0005
sar1       -0.6808    0.1225 -5.5597  0.0000
sar2       -0.2876    0.1255 -2.2917  0.0249
constant -738.6922 1479.6176 -0.4992  0.6192

sigma^2 estimated as 226639976 on 70 degrees of freedom 
 
AIC = 22.28099  AICc = 22.29258  BIC = 22.46639 
 

For both SARIMA(0,1,1)x(0,1,1)[4] and SARIMA(1,0,1)(2,1,0)[4], the model diagnostics results are very similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, suggesting that the residuals are nearly stationary with a constant mean and finite variance over time.

The Autocorrelation Function (ACF) of the residuals shows perfect independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

The Ljung-Box Test p-values are all above the 0.05 significance level, implying no autocorrelations are left in the residuals and concluding that the model is well-fitted.

Coefficient Significance: All model coefficients are significant.

Therefore, we decide to use SARIMA(0,1,1)x(0,1,1)[4] as the optimal model, since it has lower AIC, AICc, and BIC.

ARIMA(0,1,0)

Code
model_output <- capture.output(sarima(gdp_ts, 0,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate      SE t.value p.value
constant 212.9192 38.3573  5.5509       0

sigma^2 estimated as 114760 on 77 degrees of freedom 
 
AIC = 14.53976  AICc = 14.54043  BIC = 14.60019 
 

ARIMA(0,2,1)

Code
model_output <- capture.output(sarima(gdp_ts, 0,2,1))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
    Estimate     SE  t.value p.value
ma1   -0.921 0.0442 -20.8594       0

sigma^2 estimated as 113730.5 on 76 degrees of freedom 
 
AIC = 14.5559  AICc = 14.55659  BIC = 14.61677 
 

For both ARIMA(0,1,0) and ARIMA(0,2,1), the model diagnostics results are very similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, but there is a significant oscillation in 2020, indicating the need for a more advanced model to account for special events.

The Autocorrelation Function (ACF) of the residuals shows perfect independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

The Ljung-Box Test p-values are all above the 0.05 significance level, implying no autocorrelations are left in the residuals and concluding that the model is well-fitted.

Since the MA(1) term in the ARIMA(0,2,1) model is significant at the 5% level, we decide to use ARIMA(0,2,1) as the optimal model.

ARIMA(0,1,0)

Code
model_output <- capture.output(sarima(unem_ts, 0,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate    SE t.value p.value
constant  -0.0015 0.006  -0.246  0.8059

sigma^2 estimated as 0.008057314 on 226 degrees of freedom 
 
AIC = -1.965677  AICc = -1.965598  BIC = -1.935501 
 

ARIMA(1,1,1)

Code
model_output <- capture.output(sarima(unem_ts, 1,1,1))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ar1       -0.5485 0.2146 -2.5556  0.0113
ma1        0.6962 0.1832  3.7993  0.0002
constant  -0.0014 0.0064 -0.2196  0.8264

sigma^2 estimated as 0.007811542 on 224 degrees of freedom 
 
AIC = -1.978774  AICc = -1.9783  BIC = -1.918423 
 

ARIMA(0,1,1)

Code
model_output <- capture.output(sarima(unem_ts, 0,1,1))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ma1        0.1550 0.0722  2.1459  0.0329
constant  -0.0014 0.0068 -0.2091  0.8345

sigma^2 estimated as 0.00789841 on 225 degrees of freedom 
 
AIC = -1.976678  AICc = -1.976442  BIC = -1.931414 
 

The model diagnostics results for all three models are similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, but there is a significant oscillation in 2020, indicating the need for a more advanced model to account for special events.

The Autocorrelation Function (ACF) of the residuals shows perfect independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

However, the Ljung-Box test results are different. For the ARIMA(0,1,0) model, the Ljung-Box Test p-values after lag 2 are all above the 0.05 significance level. For the other two models, all p-values are above the threshold. This implies that, for the other two models, no autocorrelations remain in the residuals, concluding that the models are well-fitted.

Coefficient Significance: All model coefficients are significant.

Since both the AR(1) and MA(1) terms in the ARIMA(1,1,1) model are significant, we choose this model as the optimal one.

SARIMA(1,1,2)x(0,1,1)[12]

Code
model_output <- capture.output(sarima(cpi_ts, 1,1,2,0,1,1,12))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
     Estimate     SE t.value p.value
ar1    0.9485 0.0611 15.5239  0.0000
ma1   -0.3778 0.0934 -4.0438  0.0001
ma2   -0.4161 0.0769 -5.4148  0.0000
sma1  -0.9915 1.1241 -0.8820  0.3788

sigma^2 estimated as 0.4663129 on 211 degrees of freedom 
 
AIC = 2.275082  AICc = 2.275968  BIC = 2.353469 
 

SARIMA(2,2,3)(1,0,0)[12]

Code
model_output <- capture.output(sarima(cpi_ts, 2,2,3,1,0,0,12))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
     Estimate     SE t.value p.value
ar1   -0.0209 0.7971 -0.0262  0.9791
ar2    0.0458 0.2766  0.1656  0.8686
ma1   -0.3448 0.7938 -0.4343  0.6645
ma2   -0.4849 0.5391 -0.8995  0.3694
ma3   -0.1413 0.2840 -0.4976  0.6193
sar1   0.2485 0.0702  3.5403  0.0005

sigma^2 estimated as 0.5989571 on 220 degrees of freedom 
 
AIC = 2.400257  AICc = 2.401955  BIC = 2.506203 
 

The model diagnostics results for both models are similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, suggesting that the residuals are nearly stationary with a constant mean and finite variance over time.

The Autocorrelation Function (ACF) of the residuals shows mostly independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

However, the Ljung-Box test results and coefficient significance are different. For the SARIMA(1,1,2)x(0,1,1)[12] model, the Ljung-Box Test p-values are all above the 0.05 significance level, and all coefficients are significant. For the SARIMA(2,2,3)(1,0,0)[12] model, the Ljung-Box Test p-values are above the 0.05 significance level only after lag 20, and the majority of the coefficients are not significant.

Therefore, we choose the SARIMA(1,1,2)x(0,1,1)[12] model as the optimal one.

ARIMA(4,1,4)

Code
model_output <- capture.output(sarima(sp5_ts, 4,1,4))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE  t.value p.value
ar1       -0.2465 0.0268  -9.2149  0.0000
ar2        0.7864 0.0223  35.2752  0.0000
ar3       -0.3998 0.0198 -20.1590  0.0000
ar4       -0.8560 0.0262 -32.6906  0.0000
ma1        0.1942 0.0334   5.8183  0.0000
ma2       -0.7627 0.0273 -27.9744  0.0000
ma3        0.4428 0.0244  18.1189  0.0000
ma4        0.7686 0.0328  23.4248  0.0000
constant   0.9567 0.3778   2.5323  0.0114

sigma^2 estimated as 783.1994 on 5022 degrees of freedom 
 
AIC = 9.505299  AICc = 9.505306  BIC = 9.518265 
 

ARIMA(5,2,0)

Code
model_output <- capture.output(sarima(sp5_ts, 5,2,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
    Estimate     SE  t.value p.value
ar1  -0.8897 0.0140 -63.5255       0
ar2  -0.6626 0.0183 -36.2866       0
ar3  -0.4855 0.0194 -25.0734       0
ar4  -0.3295 0.0183 -18.0316       0
ar5  -0.1257 0.0140  -8.9664       0

sigma^2 estimated as 958.1395 on 5025 degrees of freedom 
 
AIC = 9.705463  AICc = 9.705465  BIC = 9.713244 
 

The model diagnostics results for both models are very similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, But the magnitude of the residuals after 2020 is noticeably larger, indicating the need for a more advanced model to account for special events.

The Autocorrelation Function (ACF) of the residuals shows mostly independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

The Ljung-Box test results for both models are not satisfactory, as they are below the 0.05 significance level. This indicates that autocorrelations remain in the residuals, concluding that the models need improvement.

Coefficient Significance: All model coefficients are significant.

Since the ARIMA(4,1,4) model has lower AIC, AICc and BIC, we choose it as the optimal model.

ARIMA(0,1,0)

Code
model_output <- capture.output(sarima(xau_ts, 0,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate    SE t.value p.value
constant   0.0018 7e-04  2.4406  0.0148

sigma^2 estimated as 0.0005449249 on 1038 degrees of freedom 
 
AIC = -4.673136  AICc = -4.673132  BIC = -4.663615 
 

This model is chosen by both manual search and auto.arima(). The diagnostic results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, suggesting that the residuals are nearly stationary with a constant mean and finite variance over time.

The Autocorrelation Function (ACF) of the residuals shows mostly independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

The Ljung-Box Test p-values are all above the 0.05 significance level, implying no autocorrelations are left in the residuals and concluding that the model is well-fitted.

Therefore, the ARIMA(0,1,0) model is the optimal model.

ARIMA(0,1,0)

Code
model_output <- capture.output(sarima(gsci_ts, 0,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate    SE t.value p.value
constant    1e-04 3e-04  0.4152   0.678

sigma^2 estimated as 0.0002048122 on 2534 degrees of freedom 
 
AIC = -5.653962  AICc = -5.653961  BIC = -5.649356 
 

SARIMA(0,1,0)(1,0,0)[252]

Code
model_output <- capture.output(sarima(gsci_ts, 0,1,0,1,0,0,252))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
sar1       0.0073 0.0205  0.3552  0.7225
constant   0.0001 0.0003  0.4089  0.6826

sigma^2 estimated as 0.000204801 on 2533 degrees of freedom 
 
AIC = -5.653223  AICc = -5.653221  BIC = -5.646314 
 

The model diagnostics results for both models are similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, but the magnitude of the residuals around 2020 is noticeably larger, indicating the need for a more advanced model to account for special events.

The Autocorrelation Function (ACF) of the residuals shows mostly independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

However, the Ljung-Box test results are different. For the ARIMA(0,1,0) model, most of the Ljung-Box Test p-values are all above the 0.05 significance level. For the SARIMA(0,1,0)(1,0,0)[252] model, only p-values after lag 100 are above the threshold. This implies that autocorrelations remain in the residuals for the latter model. Moreover, the seasonal AR(1) term of the model is not significant.

Therefore, we choose the ARIMA(0,1,0) model as the optimal one.

ARIMA(3,1,0)

Code
model_output <- capture.output(sarima(house_ts, 3,1,0))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ar1        0.8522 0.0873  9.7634  0.0000
ar2       -0.6078 0.1101 -5.5188  0.0000
ar3        0.6153 0.0870  7.0760  0.0000
constant   5.5136 3.2681  1.6871  0.0957

sigma^2 estimated as 20.62213 on 75 degrees of freedom 
 
AIC = 6.017157  AICc = 6.023999  BIC = 6.167122 
 

ARIMA(3,1,2)

Code
model_output <- capture.output(sarima(house_ts, 3,1,2))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
         Estimate     SE t.value p.value
ar1        0.7153 0.1199  5.9658  0.0000
ar2       -0.6927 0.1109 -6.2484  0.0000
ar3        0.7350 0.0864  8.5071  0.0000
ma1        0.1982 0.1740  1.1390  0.2584
ma2        0.3248 0.1441  2.2531  0.0273
constant   5.3341 2.8926  1.8441  0.0692

sigma^2 estimated as 19.33216 on 73 degrees of freedom 
 
AIC = 6.007442  AICc = 6.02221  BIC = 6.217393 
 

ARIMA(2,2,2)

Code
model_output <- capture.output(sarima(house_ts, 2,2,2))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
    Estimate     SE t.value p.value
ar1  -0.1458 0.0917 -1.5904  0.1160
ar2  -0.8308 0.0863 -9.6271  0.0000
ma1   0.1323 0.1590  0.8321  0.4080
ma2   0.2815 0.1457  1.9323  0.0572

sigma^2 estimated as 20.58715 on 74 degrees of freedom 
 
AIC = 6.009331  AICc = 6.016356  BIC = 6.160402 
 

The model diagnostics results for all three models are similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, but there is a significant oscillation in 2020, indicating the need for a more advanced model to account for special events.

The Autocorrelation Function (ACF) of the residuals shows perfect independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

However, the Ljung-Box test results are different. For the ARIMA(3,1,0) model, the Ljung-Box Test p-values after lag 6 are all above the 0.05 significance level. For the other two models, all p-values are above the threshold. This implies that, for the other two models, no autocorrelations remain in the residuals, concluding that the models are well-fitted.

Coefficient significance: For the ARIMA(3,1,0) model, all three coefficients are significant. For the ARIMA(3,1,2) model, only the coefficient of the MA1 term is insignificant, while all other four are significant. For the ARIMA(2,2,2) model, only the coefficient of the AR2 term is significant, with the other three coefficients insignificant.

Therefore, we choose the ARIMA(3,1,2) model as the optimal one.

SARIMA(0,1,1)x(0,1,1)[12]

Code
model_output <- capture.output(sarima(visitors_ts, 0,1,1,0,1,1,12))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
     Estimate     SE  t.value p.value
ma1    0.2515 0.0667   3.7693   2e-04
sma1  -1.0000 0.0919 -10.8779   0e+00

sigma^2 estimated as 0.03367981 on 225 degrees of freedom 
 
AIC = -0.3681259  AICc = -0.3678899  BIC = -0.3228623 
 

SARIMA(2,0,2)(0,0,2)[12]

Code
model_output <- capture.output(sarima(visitors_ts, 2,0,2,0,0,2,12))

Code
start_line <- grep("Coefficients", model_output)  # Locate where coefficient details start
end_line <- length(model_output)  # Last line of output
cat(model_output[start_line:end_line], sep = "\n")
Coefficients: 
      Estimate     SE t.value p.value
ar1     0.0223 0.1352  0.1647  0.8693
ar2     0.7591 0.1105  6.8687  0.0000
ma1     1.0862 0.1549  7.0138  0.0000
ma2     0.1426 0.0987  1.4442  0.1500
sma1    0.2468 0.0662  3.7273  0.0002
sma2    0.1422 0.0583  2.4375  0.0155
xmean  15.3529 0.1815 84.5983  0.0000

sigma^2 estimated as 0.0434599 on 233 degrees of freedom 
 
AIC = -0.2186667  AICc = -0.2166552  BIC = -0.1026454 
 

The model diagnostics results for both models are similar. The results are as follows:

The Residual Plot shows nearly consistent fluctuation around zero, but there is a significant oscillation in 2020, indicating the need for a more advanced model to account for special events.

The Autocorrelation Function (ACF) of the residuals shows mostly independence.

The Q-Q Plot indicates that the residuals follow a near-normal distribution, with minor deviations at the tails, which is typical in time series data.

However, the Ljung-Box test results are different. For the SARIMA(0,1,1)x(0,1,1)[12] model, the Ljung-Box Test p-values are all above the 0.05 significance level. For the SARIMA(2,0,2)(0,0,2)[12] model, all p-values are below the threshold.

Therefore, we choose the SARIMA(0,1,1)x(0,1,1)[12] model as the optimal model, where all coefficients are significant.

The equation of the best model for each time series:

  • U.S. Dollar Index:

ARIMA(0,1,0)
\[ (1-B) x_t= w_t \]

where \(x_t\) is the original time series and \(w_t\) is the Gaussian white noise process.

  • Trade Balance:

SARIMA(0,1,1)x(0,1,1)[4] \[ \left(1-B^{4}\right)(1-B) x_t=\Theta (B^{4}) \theta (B) w_t \]

where \[ \begin{align} \Theta(B^4) &= 1 + 0.3818B^{4} \\ \theta(B) &= 1 -B \end{align} \]

  • GDP:

ARIMA(0,2,1)
\[ (1-B)^2 x_t=\theta (B) w_t \]

where \[ \theta(B) = 1 -0.921B \]

  • Unemployment Rate:

ARIMA(1,1,1)
\[ \phi (B) (1-B) x_t=\theta (B) w_t \]

where \[ \begin{align} \phi (B) &= 1 + 0.5485 B \\ \theta(B) &= 1 + 0.6962 B \end{align} \]

  • CPI:

SARIMA(1,1,2)x(0,1,1)[12]
\[ \phi(B) \left(1-B^{12}\right)(1-B) x_t=\Theta (B^{12}) \theta (B) w_t \]

where \[ \begin{align} \phi (B) &= 1 -0.9485 B \\ \Theta(B^{12}) &= 1 - 0.9915 B^{12} \\ \theta(B) &= 1 -0.3778 B -0.4161 B^{2} \end{align} \]

  • S&P 500:

ARIMA(4,1,4) \[ \phi (B) (1-B) x_t=\theta (B) w_t + \mu \]

where \[ \begin{align} \mu &= 0.9567 \\ \phi (B) &= 1 + 0.2465 B - 0.7864 B^2 + 0.3998 B^3 + 0.8560 B^4\\ \theta(B) &= 1 + 0.1942 B - 0.7627 B^2 + 0.4428 B^3 + 0.7686 B^4 \end{align} \]

  • Gold Price:

ARIMA(0,1,0)
\[ (1-B) x_t= w_t + \mu \quad \quad \quad where \quad \mu=0.0018 \]

  • Global Commodity Price:

ARIMA(0,1,0)
\[ (1-B) x_t= w_t \]

  • House Price:

ARIMA(3,1,2) \[ \phi (B) (1-B) x_t=\theta (B) w_t + \mu \]

where \[ \begin{align} \mu &= 5.3341 \\ \phi (B) &= 1 - 0.7153 B + 0.6927 B^2 - 0.7350 B^3 \\ \theta(B) &= 1 + 0.1982 B + 0.3248 B^2 \end{align} \]

  • International Visitors:

SARIMA(0,1,1)x(0,1,1)[12]
\[ \left(1-B^{12}\right)(1-B) x_t=\Theta (B^{12}) \theta (B) w_t \]

where \[ \begin{align} \Theta(B^{12}) &= 1 - 1.0000 B^{12} \\ \theta(B) &= 1 + 0.2515 B \end{align} \]

Forecaseting

Code
fit <- Arima(dxy_ts, order = c(0,1,0), include.drift = TRUE)
forecast_result <- forecast(fit, h = 252)
autoplot(forecast_result) +
  labs(title = "ARIMA(0,1,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(balance_ts, order = c(0,1,1), seasonal = list(order = c(0,1,1), period = 4))
forecast_result <- forecast(fit, h = 8)
autoplot(forecast_result) +
  labs(title = "SARIMA(0,1,1)x(0,1,1)[4] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(gdp_ts, order = c(0,2,1))
forecast_result <- forecast(fit, h = 12)
autoplot(forecast_result) +
  labs(title = "ARIMA(0,2,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(unem_ts, order = c(1,1,1))
forecast_result <- forecast(fit, h = 12)
autoplot(forecast_result) +
  labs(title = "ARIMA(1,1,1) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(cpi_ts, order = c(1,1,2), seasonal = list(order = c(0,1,1), period = 12))
forecast_result <- forecast(fit, h = 36)
autoplot(forecast_result) +
  labs(title = "SARIMA(1,1,2)X(0,1,1)[12] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(sp5_ts, order = c(4,1,4), include.drift=TRUE)
forecast_result <- forecast(fit, h = 252)
autoplot(forecast_result) +
  labs(title = "ARIMA(4,1,4) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(xau_ts, order = c(0,1,0), include.drift=TRUE)
forecast_result <- forecast(fit, h = 52)
autoplot(forecast_result) +
  labs(title = "ARIMA(0,1,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(gsci_ts, order = c(0,1,0), include.drift=TRUE)
forecast_result <- forecast(fit, h = 252)
autoplot(forecast_result) +
  labs(title = "ARIMA(0,1,0) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(house_ts, order = c(3,1,2))
forecast_result <- forecast(fit, h = 12)
autoplot(forecast_result) +
  labs(title = "ARIMA(3,1,2) Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

Code
fit <- Arima(visitors_ts, order = c(0,1,1), seasonal = list(order = c(0,1,1), period = 12))
forecast_result <- forecast(fit, h = 36)
autoplot(forecast_result) +
  labs(title = "SARIMA(0,1,1)X(0,1,1)[12] Forecast",
       x = "Time",
       y = "Predicted Values") +
  theme_minimal()

The forecast plots for all series show that our optimal models perform well. The confidence intervals, shown in the shaded blue area, represent the range within which the time series are expected to move. The widening of these intervals as the forecast period extends reflects increasing uncertainty.

Comparison with Benchmark Methods

Write a function

Code
plot_forecasts <- function(forecast_result, ts, h, fit) {
  print(accuracy(forecast_result))  
  # Plot the forecasts using Mean, Naïve, Drift Methods, and ARIMA Fit
  autoplot(ts) +
    autolayer(meanf(ts, h = h), series = "Mean", PI = FALSE) +
    autolayer(naive(ts, h = h), series = "Naïve", PI = FALSE) +
    autolayer(snaive(ts, h = h), series = "SNaïve", PI = FALSE) +
    autolayer(rwf(ts, drift = TRUE, h = h), series = "Drift", PI = FALSE) +
    autolayer(forecast(fit, h = h), series = "Fit", PI = FALSE) +
    xlab("Date") + 
    ylab("Predicted Values") +
    guides(colour = guide_legend(title = "Forecast Methods")) +
    theme_minimal()
}
Code
fit <- Arima(dxy_ts, order = c(0,1,0), include.drift = TRUE)
forecast_result <- forecast(fit, h = 252)
plot_forecasts(forecast_result, dxy_ts, 252, fit)
                       ME        RMSE         MAE        MPE       MAPE
Training set 8.733295e-07 0.004763563 0.003531088 -5.079e-05 0.07875213
                   MASE       ACF1
Training set 0.05939571 0.01503592

Code
fit <- Arima(balance_ts, order = c(0,1,1), seasonal = list(order = c(0,1,1), period = 4))
forecast_result <- forecast(fit, h = 8)
plot_forecasts(forecast_result, balance_ts, 8, fit)
                    ME     RMSE      MAE       MPE     MAPE      MASE
Training set -759.8004 13207.78 9733.733 0.4284101 6.512908 0.4244237
                    ACF1
Training set -0.02815605

Code
fit <- Arima(gdp_ts, order = c(0,2,1))
forecast_result <- forecast(fit, h = 12)
plot_forecasts(forecast_result, gdp_ts, 12, fit)
                   ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
Training set 32.67852 332.9493 147.9156 0.1147268 0.756791 0.1612916 -0.1421698

Code
fit <- Arima(unem_ts, order = c(1,1,1))
forecast_result <- forecast(fit, h = 12)
plot_forecasts(forecast_result, unem_ts, 12, fit)
                       ME       RMSE        MAE        MPE     MAPE      MASE
Training set -0.001293724 0.08819836 0.03253087 -0.1672727 1.841263 0.1628627
                     ACF1
Training set 0.0003963382

Code
fit <- Arima(cpi_ts, order = c(1,1,2), seasonal = list(order = c(0,1,1), period = 12))
forecast_result <- forecast(fit, h = 36)
plot_forecasts(forecast_result, cpi_ts, 36, fit)
                     ME      RMSE       MAE        MPE      MAPE       MASE
Training set 0.01658077 0.6646226 0.4823797 0.00423208 0.2025314 0.07696065
                   ACF1
Training set 0.02124335

Code
fit <- Arima(sp5_ts, order = c(4,1,4), include.drift=TRUE)
forecast_result <- forecast(fit, h = 252)
plot_forecasts(forecast_result, sp5_ts, 252, fit)
                      ME     RMSE      MAE         MPE      MAPE       MASE
Training set -0.02174175 27.98292 17.39177 -0.02943003 0.7786464 0.05055928
                    ACF1
Training set -0.01040333

Code
fit <- Arima(xau_ts, order = c(0,1,0), include.drift=TRUE)
forecast_result <- forecast(fit, h = 52)
plot_forecasts(forecast_result, xau_ts, 52, fit)
                       ME       RMSE        MAE          MPE      MAPE     MASE
Training set 5.803826e-06 0.02333315 0.01745149 0.0001973932 0.2473152 0.122605
                    ACF1
Training set 0.003597321

Code
fit <- Arima(gsci_ts, order = c(0,1,0), include.drift=TRUE)
forecast_result <- forecast(fit, h = 252)
plot_forecasts(forecast_result, gsci_ts, 252, fit)
                       ME       RMSE        MAE          MPE      MAPE
Training set 2.379987e-06 0.01430894 0.01014429 -0.000263987 0.1665067
                   MASE         ACF1
Training set 0.05461529 -0.001120247

Code
fit <- Arima(house_ts, order = c(3,1,2))
forecast_result <- forecast(fit, h = 12)
plot_forecasts(forecast_result, house_ts, 12, fit)
                   ME     RMSE      MAE       MPE      MAPE      MASE
Training set 0.520081 4.429123 2.728648 0.1197514 0.6335488 0.1106083
                    ACF1
Training set -0.01834026

Code
fit <- Arima(visitors_ts, order = c(0,1,1), seasonal = list(order = c(0,1,1), period = 12))
forecast_result <- forecast(fit, h = 36)
plot_forecasts(forecast_result, visitors_ts, 36, fit)
                        ME     RMSE        MAE         MPE      MAPE      MASE
Training set -0.0005064716 0.178516 0.06267839 -0.01008101 0.4338407 0.2188027
                    ACF1
Training set -0.00867727

Our models generally outperform the benchmark methods, although sometimes the Naïve method better predicts seasonality. By comparing the forecast results of our model with those benchmark methods, it is clear that our model offers more precise and reliable predictions. The accuracy metrics (ME, RMSE, MAE, MPE, MAPE, MASE, ACF1) further confirm the superiority of our model, making it the optimal choice for forecasting in this case.