rm(list = ls()) knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center", fig.asp = 9/16, fig.width = 7, warning = FALSE )
library(tidyverse) library(tidymodels) library(magrittr) library(ggplot2) library(lubridate) library(stringr) library(splines) library(glue) files <- dir( "~/GitHub/tidynamics/vignettes/funcs/pred", full.names = TRUE ) for (i in 1:length(files)) { source(files[i]) }
## The data is stored in list li <- readRDS("~/GitHub/tidynamics/data/soenderborg.RDS") ## Change the column names in `li$Gnwp` like "k1" to "t1" for (i in names(li$Gnwp)) { li$Gnwp[paste0("t", str_sub(i, 2, -1))] = li$Gnwp[i] }
#' Get the first character in the string str_sub_1 <- function(chr){ i <- NA if (str_sub(chr, 1, 1) == "k") { i <- "t_a.p" } else { i <- "g.p" } return(i) } #' Get the value of step ahead get_ahead <- function(chr){ return(strtoi(str_sub(chr, 2, -1))) } ti <- as_tibble( cbind( data.frame( "time" = li$t, "t_a" = li$Ta, "g" = li$G), li$Tanwp, li$Gnwp[, 50:98], "ph1" = li$Ph1, "ph2" = li$Ph2, "ph3" = li$Ph3 ) ) rm(li) ti %>% # Check if `time` is the primary key count(time) %>% {nrow(filter(., n > 1)) == 0} li <- list() li$time <- ti %>% mutate(at = 1:nrow(.)) %>% mutate(fo = 1:nrow(.)) %>% dplyr::select(at, fo, time) li$obs <- ti %>% mutate(fo = 1:nrow(.)) %>% dplyr::select(fo, t_a, g, ph1, ph2, ph3) li$pred <- ti %>% mutate(at = 1:nrow(.)) %>% dplyr::select(at, 4:90) %>% gather(-at, key = "ahead_chr", value = "pred") %>% mutate(whi = map_chr(ahead_chr, str_sub_1)) %>% mutate(ahead = map_int(ahead_chr, get_ahead)) %>% dplyr::select(-ahead_chr) %>% mutate(fo = at + ahead) %>% spread(key = whi, value = pred) %>% arrange(at) # saveRDS(li, "~GitHub/tidynamics/data/soenderborg_tidy.RDS")
li$pred %>% left_join(li$obs, by = "fo") %>% filter(ahead %in% c(1, 24)) %>% ggplot() + geom_point(mapping = aes(x = t_a, y = t_a.p, color = ahead)) + geom_abline(aes(intercept = 0, slope = 1), color = "red") + labs( title = "Obs, 1 and 24 Step Ahead Pred of Ambient Temp", x = "Observation (Celsius Degree)", y = "Prediction (Celsius Degree)" ) li$obs %>% left_join(li$time, by = "fo") %>% ggplot() + geom_line(mapping = aes(x = time, y = t_a)) + labs( title = "Obs of Ambient Temp", x = "Time (Day)", y = "Temp (Celsius Degree)" )
#' Get recipe from split get_li_dm_1 <- function(split, coef_t, coef_g) { rec <- training(split) %>% recipe(ph3 ~ t_a.p + g.p) %>% step_mutate(t_a.p.lp = lp_vector(t_a.p, a1 = coef_t)) %>% step_mutate(g.p.lp = lp_vector(g.p, a1 = coef_g)) %>% # step_corr(all_predictors()) %>% step_center(all_predictors(), -all_outcomes()) %>% step_scale(all_predictors(), -all_outcomes()) %>% prep() dm_train <- juice(rec) dm_test <- bake(rec, testing(split)) return(list("train" = dm_train, "test" = dm_test, "rec" = rec)) } # Get the model from `ti_train` get_mod <- function(ti_train = li_dm$train) { mod_lm <- linear_reg() %>% set_engine("lm") %>% fit(ph3 ~ ., data = ti_train) return(mod_lm) } #' Evaluate the training and testing design matrix get_rmse <- function(mod_lm, li_dm) { rmse <- li_dm$test %>% bind_cols(predict(mod_lm, .)) %>% metrics(truth = ph3, estimate = .pred) %>% `[[`(1, 3) return(rmse) } #' Evaluate the set of parameters for linear regression val_para <- function(split, coef_t, coef_g, func_li_dm = get_li_dm_1) { li_dm <- split %>% func_li_dm(coef_t, coef_g) rmse <- li_dm %>% {get_mod(.$train)} %>% get_rmse(li_dm) return(rmse) } #' To cross validate the parameters cv_para <- function(para, ti_cv, func_li_dm = get_li_dm_1) { rmse_mean <- ti_cv$splits %>% # All the splits for cross validation # unlist(use.names = FALSE) %>% map_dbl( val_para, coef_t = para[1], coef_g = para[2], func_li_dm = func_li_dm ) %>% # Apply the val_para to split one by one mean() rmse_mean %>% {glue(" coef_t = {para[1]} ; coef_g = {para[2]} ; rmse_mean = {.} ; ")} %>% message() return(rmse_mean) }
## Split to training and testing sets ## Select the heating load from house 3 split_a1 <- li$pred %>% filter(ahead == 1) %>% left_join(li$time, by = "fo") %>% left_join(li$obs, by = "fo") %>% mutate(hour = as.numeric(hour(.$time))) %>% dplyr::select(fo, hour, t_a.p, g.p, ph3) %>% initial_split(0.6) ti_cv <- split_a1 %>% training() %>% vfold_cv(v = 10, repeats = 1) ## Test the `get_li_dm_1` function ti_cv %>% {.$splits[[1]]} %>% {get_li_dm_1(., 0.9, 0.9)} %>% print() ## rec <- ti_cv %>% {.$splits[[1]]} %>% {get_li_dm_1(., 0.9, 0.9)} %>% {.$rec} ## Test the `cv_para` function c(0.9, 0.9) %>% cv_para(ti_cv, get_li_dm_1)
## Optimize the choice of low-pass filtering coefficients result <- optim( c(t = 0.98, g = 0.98), cv_para, lower = c(0.3, 0.1), upper = c(0.999, 0.999), method = "L-BFGS-B", ti_cv = ti_cv, func_li_dm = get_li_dm_1 ) %>% print() ## Result: ## coef_t = 0.8688536 ## coef_g = 0.1000000 ## value = 0.9937975
li_dm_1 <- split_a1 %>% get_li_dm_1(coef_t = result$par[[1]], coef_g = result$par[[2]]) mod_lm_1 <- li_dm_1 %>% {get_mod(.$train)} li_dm_1$test %>% bind_cols(predict(mod_lm_1, .)) %>% metrics(truth = ph3, estimate = .pred) %>% `[[`(1, 3) li_dm_1$test %>% bind_cols(predict(mod_lm_1, .), "index" = as.numeric(rownames(.))) %>% gather(ph3, .pred, key = "type", value = "heat_load") %>% dplyr::select(index, heat_load, type) %>% ggplot() + geom_line(aes(x = index, y = heat_load, color = type)) + labs( title = "Obs and Linear Reg Pred of Heat Load in House 3", subtitle = paste0( "with 1-step forecasted ambient temp and ", "1-step forecasted radiation as input" ), x = "index", y = "heat_load (W)" )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.