#' @title Notebook generator: BART
#'
#' @description
#' Generates a notebook for a BART model using the bikesharing data.
#'
#' @details
#' Uses default settings in dbarts. Is unstable.
#'
#' @param agc List of atomic prediction generation controllers. The
#' first element of the list gives the starting time (ie what
#' observation is considered as t = 1), the second element is the
#' minimum window length used for estimation, and the third one is a
#' boolean indicating if the estimation window is rolling or not.
#' @param include_intercept Whether to include an intercep in the
#' design matrix or not. Defaults to FALSE since the intercept breaks
#' the function
#' @param nrep Number of MCMC draws (after burn-in)
#' @param nburn Number of burn-in draws
#'
#' @import sn
#' @importFrom stats dnorm density lm
#' @export
bikes_bart <-function(
agc = list(1, 200, TRUE),
include_intercept = FALSE,
nrep = 10000,
nburn = 5000
) {
# Silence NSE NOTEs in R CMD check
bikes_d <- cgm <- chisq <- logcnt <- yr <- sandy1 <- sandy2 <- NULL
data <- matrix(oscbvar::bikes_d_log$logcnt, ncol = 1)
start_t <- agc[[1]]
window_length <- agc[[2]]
df <- gen_atomic_df()
T <- nrow(data)
Y_all <-as.matrix(data.frame(data[start_t:T, 1]))
cgm.level <- 0.95 # alpha
cgm.exp <- 2 # beta
num.trees <- 250 # S
prior.cov <- 0.01
sd.mu <- 2
control <- dbarts::dbartsControl(
verbose = FALSE,
keepTrainingFits = TRUE,
useQuantiles = FALSE,
keepTrees = TRUE,
n.samples = nrep,
n.cuts = 100L,
n.burn = nburn,
n.trees = num.trees,
n.chains = 1,
n.threads = 1,
n.thin = 1L,
printEvery = 1,
printCutoffs = 0L,
rngKind = "default",
rngNormalKind = "default",
updateState = FALSE
)
for (i in (window_length):(T - start_t)) {
print(sprintf("%i of %i", i-window_length+1, T-start_t+1-window_length))
if (agc[[3]]) {
j <- i + 1 - window_length
} else {
j <- 1
}
data <- subset(oscbvar::bikes_d_log, select = -c(logcnt, t))
if (i < 365) {
data <- subset(data, select = -c(yr, sandy1, sandy2))
} else if (i < 667) {
data <- subset(data, select = -c(sandy1, sandy2))
} else if (i == 667) {
data <- subset(data, select = -c(sandy2))
}
Y <- matrix(Y_all[j:i, 1])
Z <- data[j:i, ]
z <- data[i + 1, ]
y <- Y_all[i + 1, 1]
#xall <- cbind(matrix(Y_all), data) # why is this even here
sigmat <- cbind(Y, Z) # For some reason Sebastians formula no work
Sigma.OLS <- sigma(lm(Y ~ ., sigmat))^2
prior.sig <- c(NROW(Y)/2, 0.75)
bart_model <- dbarts::dbarts(
Y ~ .,
data = sigmat,
control = control,
tree.prior = cgm(cgm.exp, cgm.level),
node.prior = normal(sd.mu),
n.samples = nrep,
weights = rep(1, NROW(Y)),
sigma = sqrt(Sigma.OLS[1]),
resid.prior = chisq(prior.sig[[1]], prior.sig[[2]])
)
est_mod <- bart_model$run()
preds <- bart_model$predict(z, NULL)[1,]
kernel_density <- density(preds)
log_score_st <- function(post_pred_draws, y_new) {
obj <- sn::selm(post_pred_draws ~ 1, family = "st")
para <- as.list(coef(obj, param.type = "DP"))
sn::dst(
y_new,
xi = para$xi,
omega = para$omega,
alpha = para$alpha,
nu = para$nu,
log = TRUE
)
}
log_pred_dens_bart <- log_score_st(preds, y)
df[(i + 1 - window_length), "pmean"] <- mean(preds)
df[(i + 1 - window_length), "lpdens"] <- log_pred_dens_bart
df[(i + 1 - window_length), "method"] <- "BART"
df[(i + 1 - window_length), "t"] <- (i + 1)
df[(i + 1 - window_length), "ytrue"] <- Y_all[i + 1, 1]
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.