knitr::opts_chunk$set(echo = TRUE)

Load and prepare data

library(mlr3temporal)
library(mlr3)
library(ggplot2)
library(forecast)

# National Centers for Environmental Information, National Oceanic and Atmospheric Administration


muc = read.csv("https://www.ncei.noaa.gov/data/global-summary-of-the-year/access/GM000004199.csv")
nyc = read.csv("https://www.ncei.noaa.gov/data/global-summary-of-the-year/access/USW00094728.csv")



muc = muc[, c("DATE", "PRCP", "TAVG", "TMIN", "TMAX")]
nyc = nyc[, c("DATE", "PRCP", "TAVG", "TMIN", "TMAX")]

nyc[is.na(nyc$TAVG), ]
# remove first year
nyc = nyc[-1, ]

muc_start = muc$DATE[1]
muc_end = muc$DATE[nrow(muc)]

nyc_start = nyc$DATE[1]
nyc_end = nyc$DATE[nrow(nyc)]

# nyc check missing
all_years = nyc_start:nyc_end
missing_years = setdiff(all_years, nyc$DATE)

# add missing years
all_years = muc_start:muc_end
missing_years = setdiff(all_years, muc$DATE)

missing_years_df = data.frame(DATE = missing_years, PRCP = NA, TAVG = NA, TMIN = NA, TMAX = NA)

muc = rbind(muc, missing_years_df)
muc = muc[order(muc$DATE), ]


head(muc)
# keep information on station , temp and precipitation
muc = muc[, c("PRCP", "TAVG", "TMIN", "TMAX")]
nyc = nyc[, c("PRCP", "TAVG", "TMIN", "TMAX")]



muc_ts = ts(muc, start = muc_start, end = muc_end, frequency = 1)
nyc_ts = ts(nyc, start = nyc_start, end = nyc_end, frequency = 1)

Generate Task

task = TaskRegrForecast$new(
  id = "nyc",
  backend = nyc_ts,
  target = c("TAVG", "TMIN", "TMAX")
)
task$print()

Learner

learner = LearnerRegrForecastVAR$new()
learner$train(task, row_ids = 1:140)
learner$model

Predict

p = learner$predict(task, row_ids = 141:150)

p$response
p$se

Rolling Window CV

rr = rsmp("RollingWindowCV", fixed_window = F)
rr$instantiate(task)
resample = resample(task, learner, rr, store_models = TRUE)

resample$predictions()[1:2]

Plotting

autoplot(task) + ggtitle("NYC - Yearly Climate Data")

task = TaskRegrForecast$new(
  id = "muc",
  backend = ts(muc[c("TAVG", "PRCP")], start = muc_start, end = muc_end, frequency = 1),
  target = "TAVG"
)

autoplot(task) + ggtitle("MUC - Yearly Climate Data")

learner = LearnerRegrForecastAutoArima$new()
learner$train(task, row_ids = 1:85)
learner$model
p = learner$predict(task, row_ids = 86:136)
p$se


checkresiduals(learner$model)

autoplot(forecast(learner$model, xreg = as.matrix(task$data(cols = "PRCP", rows = 131:136)))) + ylab("value")


mlr-org/mlr3forecasting documentation built on June 29, 2023, 11:57 p.m.