knitr::opts_chunk$set(echo = TRUE)
library(mlr3temporal) library(mlr3) library(ggplot2) library(forecast) # National Centers for Environmental Information, National Oceanic and Atmospheric Administration muc = read.csv("https://www.ncei.noaa.gov/data/global-summary-of-the-year/access/GM000004199.csv") nyc = read.csv("https://www.ncei.noaa.gov/data/global-summary-of-the-year/access/USW00094728.csv") muc = muc[, c("DATE", "PRCP", "TAVG", "TMIN", "TMAX")] nyc = nyc[, c("DATE", "PRCP", "TAVG", "TMIN", "TMAX")] nyc[is.na(nyc$TAVG), ] # remove first year nyc = nyc[-1, ] muc_start = muc$DATE[1] muc_end = muc$DATE[nrow(muc)] nyc_start = nyc$DATE[1] nyc_end = nyc$DATE[nrow(nyc)] # nyc check missing all_years = nyc_start:nyc_end missing_years = setdiff(all_years, nyc$DATE) # add missing years all_years = muc_start:muc_end missing_years = setdiff(all_years, muc$DATE) missing_years_df = data.frame(DATE = missing_years, PRCP = NA, TAVG = NA, TMIN = NA, TMAX = NA) muc = rbind(muc, missing_years_df) muc = muc[order(muc$DATE), ] head(muc) # keep information on station , temp and precipitation muc = muc[, c("PRCP", "TAVG", "TMIN", "TMAX")] nyc = nyc[, c("PRCP", "TAVG", "TMIN", "TMAX")] muc_ts = ts(muc, start = muc_start, end = muc_end, frequency = 1) nyc_ts = ts(nyc, start = nyc_start, end = nyc_end, frequency = 1)
task = TaskRegrForecast$new( id = "nyc", backend = nyc_ts, target = c("TAVG", "TMIN", "TMAX") ) task$print()
learner = LearnerRegrForecastVAR$new() learner$train(task, row_ids = 1:140) learner$model
p = learner$predict(task, row_ids = 141:150) p$response p$se
rr = rsmp("RollingWindowCV", fixed_window = F) rr$instantiate(task) resample = resample(task, learner, rr, store_models = TRUE) resample$predictions()[1:2]
autoplot(task) + ggtitle("NYC - Yearly Climate Data") task = TaskRegrForecast$new( id = "muc", backend = ts(muc[c("TAVG", "PRCP")], start = muc_start, end = muc_end, frequency = 1), target = "TAVG" ) autoplot(task) + ggtitle("MUC - Yearly Climate Data") learner = LearnerRegrForecastAutoArima$new() learner$train(task, row_ids = 1:85) learner$model p = learner$predict(task, row_ids = 86:136) p$se checkresiduals(learner$model) autoplot(forecast(learner$model, xreg = as.matrix(task$data(cols = "PRCP", rows = 131:136)))) + ylab("value")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.