#=================#
# Model Selection
#=================#
#=================#
# Data Processing
urban[ , , "LiqPrecip"] <- log(urban[ , , "LiqPrecip"] + 0.001)
is_urban <- (rowSums(!is.na(urban[, , "Tu"])) > 0)
urban_list <- urban[is_urban, , c("Ld", "LiqPrecip", "P", "PCO2", "Q", "Sd", "T", "Tu")] %>%
alply(1) %>%
map(~apply(.x, MARGIN = 2, scale))
# Tools for CV
library(cvTools)
get_oos_rmse <- function(x, formula, K, R) {
call <- call("lm", formula = formula)
folds <- cvFolds(nrow(x), K = K, R = R)
return(mean(cvTool(call, data = x, y = x$Tu, folds = folds)))
}
#=================#
# Part One: No seasonality component
# Model 1: full variables
m1 <- map_dbl(urban_list, ~get_oos_rmse(as.data.frame(.x), Tu ~ ., K = 10, R = 2))
# Model 2: Tu ~ t
m2 <- map_dbl(urban_list, ~get_oos_rmse(as.data.frame(.x), Tu ~ `T`, K = 10, R = 2))
# Compare models
df <- data.frame(m1, m2)
ggplot(df, aes(x = m1, y = m2)) +
geom_point(alpha = .3) +
theme_minimal() +
geom_abline(slope = 1, intercept = 0, col = "goldenrod") +
xlab("Full model") +
ylab("Simple model (only T)")
mean(df$m1 > df$m2)
# Where did Model 2 ourperform?
m2_wins <- df$m1 > df$m2
m2_wins_mat <- is_urban
m2_wins_mat[is_urban] <- is_urban[is_urban] + m2_wins
m2_wins_mat <- matrix(m2_wins_mat, nrow = 288)
image(m2_wins_mat)
#=================#
# Part Two: Only January
#=================#
# Data Processing
urban_jans <- urban[is_urban,
seq(1, 1152, 12),
c("Ld", "LiqPrecip", "P", "PCO2", "Q", "Sd", "Tu")] %>%
alply(1) %>%
map(~apply(.x, MARGIN = 2, scale))
# Model 1: full variables
m1_j <- map_dbl(urban_jans, ~get_oos_rmse(as.data.frame(.x), Tu ~ ., K = 10, R = 2))
# Model 2: Tu ~ t
m2_j <- map_dbl(urban_jans, ~get_oos_rmse(as.data.frame(.x), Tu ~ `T`, K = 10, R = 2))
# Compare models
df <- data.frame(m1_j, m2_j)
ggplot(df, aes(x = m1_j, y = m2_j)) +
geom_point(alpha = .3) +
theme_minimal() +
geom_abline(slope = 1, intercept = 0, col = "goldenrod") +
xlab("Full model") +
ylab("Simple model (only T)")
mean(df$m1_j > df$m2_j)
# Where did Model 2 ourperform?
m2_wins <- df$m1_j > df$m2_j
m2_wins_mat <- is_urban
m2_wins_mat[is_urban] <- is_urban[is_urban] + m2_wins
m2_wins_mat <- matrix(m2_wins_mat, nrow = 288)
image(m2_wins_mat)
#=================#
# Part Three: With montly dummy var
#=================#
# NB: this data does not get scaled
d <- t(array(rep(rep(1:12, times = 96), times = dim(urban)[1]),
dim = c(1152, length(urban_list))))
urban_cube <- abind(d, urban[is_urban, , c("Ld", "LiqPrecip", "P", "PCO2", "Q", "Sd", "Tu")])
dimnames(urban_cube)[[3]][1] <- "month"
# urban_cube[, , "month"] <- as.factor(urban_cube[, , "month"])
urban_list <- alply(urban_cube, 1)
# Model 1: full variables
m1_m <- map_dbl(urban_list, ~get_oos_rmse(as.data.frame(.x),
Tu ~ Ld + LiqPrecip + P + PCO2 + Q + Sd + as.factor(month),
K = 10, R = 1))
# Model 2: full variables plus all two-way interactions
m2_m <- map_dbl(urban_list, ~get_oos_rmse(as.data.frame(.x),
Tu ~ Ld + LiqPrecip + P + PCO2 + Q + Sd + as.factor(month) +
as.factor(month):Ld + as.factor(month): LiqPrecip + as.factor(month):P +
as.factor(month):PCO2 + as.factor(month):Q + as.factor(month):Sd,
K = 10, R = 1))
library(stringr)
latlon <- dimnames(urban)[[1]] %>%
str_split(", ") %>%
unlist()
lat <- as.integer(latlon[seq(1, length(latlon), 2)])
lon <- as.integer(latlon[seq(2, length(latlon), 2)])
lat <- lat[is_urban]
lon <- lon[is_urban]
lon[lon > 180] <- lon[lon > 180] - 360
map_df_m1 <- data.frame(lat = lat,
lon = lon,
RMSE = c(m1_m))
map_df_m2 <- data.frame(lat = lat,
lon = lon,
RMSE = c(m2_m))
#=================#
# Part Four: With seasonality dummy vars
#=================#
# Create variable
d <- t(array(rep(rep(c(1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 1), times = 96), times = dim(urban)[1]),
dim = c(1152, length(urban_list))))
urban2 <- abind(urban, d, along = 3)
urban_list <- urban2[is_urban, , c("Ld", "LiqPrecip", "P", "PCO2", "Q", "Sd", "T", "Tu")] %>%
alply(1) %>%
map(~apply(.x, MARGIN = 2, scale))
# Fit Models
model_list_2 <- map(urban_list,
~cv.lm(data = as.data.frame(.x), form.lm = formula(Tu ~ .), m = 5, printit = FALSE))
#=================#
# Old code
# # cv with caret (i think it's in-sample RMSE)
# library(caret)
# m <- train(Tu ~ .,
# data = u,
# method = "lm",
# trControl = trainControl(method = "repeatedcv",
# number = 10,
# repeats = 50))
#
# model_list_1 <- map(urban_list[1:100],
# ~train(Tu ~ .,
# data = .x,
# method = "lm",
# trControl = trainControl(method = "cv", number = 5, savePredictions=TRUE)))
#
#
# # homegrown cv
#
# folds <- createFolds(u$Tu, k = 10)
#
# get_cv <- function() {
# train_df <- u[-folds[[1]], ]
# test_df <- u[folds[[1]], ]
# m1 <- lm(Tu ~ ., data = train_df)
# y_hat <- predict(m1, new_data = test_df)
# test$"Tu" - y_hat
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.