knitr::opts_chunk$set(echo = TRUE)
library(data.table) library(mlr3temporal) library(mlr3) library(ggplot2) library(forecast) library(anytime)
In this vignette we are going to use the example of forecasting atmospheric data from Munich (muc) and New York City (nyc).
We are interesting in forecasting three quantities: "TAVG", "TMIN", "TMAX"
, the average, minimum and maximum temperature on a given day.
We first obtain the data from the NOAA (National Centers for Environmental Information, National Oceanic and Atmospheric Administration) website.
# Download the data muc = read.csv("https://www.ncei.noaa.gov/data/global-summary-of-the-year/access/GM000004199.csv") nyc = read.csv("https://www.ncei.noaa.gov/data/global-summary-of-the-year/access/USW00094728.csv") # And subset columns columns = c("DATE", "PRCP", "TAVG", "TMIN", "TMAX") muc = muc[, columns] nyc = nyc[, columns]
The data contains few missing values and years. We will have to deal with them before forecasting.
# Add missing years. fill_missing = function(data, date) { # Filter all-na rows data = data[!apply(data, 1, function(x) {all(is.na(x))}), ] # Add all missing dates from start to end of time-series dates = data[[date]] missing = setdiff(seq(from = min(dates, na.rm = TRUE), to = max(dates, na.rm = TRUE)), dates) if (length(missing)) { dt_miss = data.table(DATE = missing, PRCP = NA, TAVG = NA, TMIN = NA, TMAX = NA) data = rbind(data, dt_miss) } data[[date]] = as.POSIXct(as.character(data[["DATE"]]), format = "%Y") return(data.table(data[order(data[[date]]),])) } muc = fill_missing(muc, "DATE") nyc = fill_missing(nyc, "DATE")
We can now supply this data.table
to create a new TaskRegrForecast
.
task = TaskRegrForecast$new( id = "nyc", backend = nyc, target = c("TAVG", "TMIN", "TMAX"), date_col = "DATE" ) task$print()
learner = LearnerRegrForecastVAR$new() learner$train(task, row_ids = 1:140) learner$model
p = learner$predict(task, row_ids = 141:150) p$response
rr = rsmp("RollingWindowCV", fixed_window = F) rr$instantiate(task) resample = resample(task, learner, rr, store_models = TRUE) resample$predictions()[1:2]
autoplot(task) + ggtitle("NYC - Yearly Climate Data") task = TaskRegrForecast$new( id = "muc", backend = ts_dt(muc), target = "TAVG", date_col = "DATE" ) autoplot(task) + ggtitle("MUC - Yearly Climate Data") learner = LearnerRegrForecastAutoArima$new() learner$train(task, row_ids = 1:85) learner$model p = learner$predict(task, row_ids = 86:136) p$se checkresiduals(learner$model) autoplot(forecast(learner$model, xreg = as.matrix(task$data(cols = "PRCP", rows = 131:136)))) + ylab("value")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.