#' get residuals and estimated coefficients
#' @param y response variable
#' @param X feature matrix
#' @keywords internal
#' @export
getRes <- function(y, X) {
blong <- NULL
for (i in 1:ncol(X)) {
x1 <- cbind(1, X[, i])
tmp <- solve(t(x1) %*% x1) %*% (t(x1) %*% y)
blong <- c(blong, tmp)
}
pred <- t(blong[seq(1, length(blong), 2)] + blong[seq(2, length(blong), 2)] * t(X))
res <- colSums((y - pred)^2)
epsilon <- colMeans(y - pred)
epsilon_vec <- y - pred
return(list(blong = blong,
res = res,
epsilon = epsilon,
epsilon_vec = epsilon_vec))
}
#' @title Boosting GaussianLSS Model
#' @description Fit boosting GaussianLSS model with different kinds of step lengths.
#' @param y response variable
#' @param data feature matrix
#' @param m_stop stopping iteration, default is 1000
#' @param center_x center the feature matrix? default is TRUE
#' @param weights a vector of weights indicating the training (1) and testing (0) data,
#' default is NULL, i.e. all observations as training data
#' @param method step length method, should be one of {"FSL", "ASL", "SAASL", "SAASL05"},
#' default is "SAASL"
#' @return \code{mu} the estiamted coefficients of mean \cr
#' \code{sigma} the estimated coefficients of the predictor of standard deviation \cr
#' \code{mu_mat} matrix of the estimated coefficients of mu in each iteartion \cr
#' \code{si_mat} matrix of the estimated coefficients of sigma in each iteration \cr
#' \code{v_mu} step length of mu in each iteartion \cr
#' \code{v_mu} step length of sigma in each iteration \cr
#' \code{muvsi} which distribution paramter is updated in each iteration \cr
#' \code{v_mu_var} which variable is used for updating mu in each iteration \cr
#' \code{v_si_var} which variable is used for updating sigma in each iteartion \cr
#' \code{like_train} positive likelihood of the training data in each iteartion \cr
#' \code{like_test} positive likelihood of the test data in each iteration
#' @importFrom stats sd dnorm optimize
#' @export
boost_gaussianLSS <- function(y, data, m_stop = 1000, center_x = TRUE, weights = NULL,
method = "SAASL") {
do_m <- do_s <- TRUE
muvsi <- rep(0, m_stop)
# Split data into train and test sets. If weights=NULL, then data = train_set = test_set
if (!is.null(weights)) {
weights <- as.logical(weights)
y_train <- y[weights]
data_train <- data[weights, ]
y_test <- y[!weights]
data_test <- data[!weights, ]
}
else {
y_train <- y
data_train <- data
y_test <- y
data_test <- data
}
if (center_x) {
data_train <- apply(data_train, FUN = function(x) scale(x, scale = FALSE, center = TRUE), MARGIN = 2)
data_test <- apply(data_test, FUN = function(x) scale(x, scale = FALSE, center = TRUE), MARGIN = 2)
}
else {
data_train <- as.matrix(data_train)
data_test <- as.matrix(data_test)
}
# add intercept
X_train <- cbind(1, data_train)
X_test <- cbind(1, data_test)
p <- ncol(data_train)
# initialize beta_mu and beta_si
beta_mu <- matrix(c(mean(y), rep(0, p)), ncol = 1)
beta_si <- matrix(c(log(sd(y)), rep(0, p)), ncol = 1)
# initialize other outputs
mu_train <- mu_test <- list()
sigma_train <- sigma_test <- list()
ngr_mu <- list()
ngr_si <- list()
mu_mat <- si_mat <- matrix(nrow = ncol(X_train), ncol = m_stop)
v_mu <- v_mu_var <- vector(length = m_stop)
v_si <- v_si_var <- vector(length = m_stop)
Xbs_train <- X_train %*% beta_si
Xbs_test <- X_test %*% beta_si
Xbm_train <- rep(mean(y_train), length(y_train))
Xbm_test <- rep(mean(y_test), length(y_test))
step.length.mu <- step.length.si <- 0
update.si <- update.mu <- 0
like_train <- like_test <- vector(length = m_stop)
h_mu <- hs2 <- vector(length = m_stop)
# boosting start
for (m in 1:m_stop) {
mu_train[[m]] <- Xbm_train
mu_test[[m]] <- Xbm_test
sigma_train[[m]] <- exp(Xbs_train)
sigma_test[[m]] <- exp(Xbs_test)
# potential update of mu
if (do_m) {
# calculate negative gradient
ngr_mu[[m]] <- (1/sigma_train[[m]]^2) * (y_train - mu_train[[m]])
RS <- getRes(as.vector(ngr_mu[[m]]), data_train)
# select the best performing variable
best_mu <- which.min(RS$res)
v_mu_var[m] <- best_mu
beta_best_mu <- RS$blong[c(best_mu*2-1,best_mu*2)]
update.mu_train <- X_train[,best_mu+1]*beta_best_mu[2] + beta_best_mu[1]
update.mu_test <- X_test[,best_mu+1]*beta_best_mu[2] + beta_best_mu[1]
# find the optimal step length by line search
if (method == "ASL") {
phi_mu <- function(v) {
ret <- -sum(dnorm(y_train, mu_train[[m]] + v * update.mu_train, sigma_train[[m]], log = TRUE))
return(ret)
}
v <- optimize(phi_mu, interval = c(-1, 10))$min
}
else if (method %in% c("SAASL", "SAASL05")) {
h_mu[m] <- sum(update.mu_train^2)
hs2[m] <- sum(update.mu_train^2/sigma_train[[m]]^2)
v <- h_mu[m] / hs2[m]
}
else if (method == "FSL") {
v <- 1
}
# the adaptive step length is 10% of the optimal step length
v_mu[m] <- v
step.length.mu <- .1 * v_mu[m]
# update the predictors
beta_mu_st <- beta_mu
beta_mu_st[1] <- beta_mu[1]+step.length.mu*beta_best_mu[1]
beta_mu_st[1+best_mu] <- beta_mu[1+best_mu]+step.length.mu*beta_best_mu[2]
}
# potential update of sigma
if (do_s) {
ngr_si[[m]] <- (-1 + exp(-2 * Xbs_train) * ((y_train - mu_train[[m]])^2))
RS <- getRes(as.vector(ngr_si[[m]]), data_train)
best_si <- which.min(RS$res)
v_si_var[m] <- best_si
beta_best_si <- RS$blong[c(best_si*2-1,best_si*2)]
update.si_train <- X_train[,best_si+1]*beta_best_si[2] + beta_best_si[1]
update.si_test <- X_test[,best_si+1]*beta_best_si[2] + beta_best_si[1]
if (method %in% c("ASL", "SAASL", "SAASL05")) {
phi_si <- function(v) {
ret <- -sum(dnorm(y_train, mu_train[[m]], exp(Xbs_train + v * update.si_train), log = TRUE))
return(ret)
}
v <- optimize(phi_si, interval = c(-1, 1))$min
}
else if (method == "FSL") {
v <- 1
}
v_si[m] <- v
step.length.si <- .1 * v_si[m]
beta_si_st <- beta_si
beta_si_st[1] <- beta_si[1]+step.length.si*beta_best_si[1]
beta_si_st[1+best_si] <- beta_si[1+best_si]+step.length.si*beta_best_si[2]
}
# select the best performing distribution parameter
if (do_m & do_s) {
# compare the positive likelihood
like_mu <- sum(dnorm(y_test, mu_test[[m]] + step.length.mu * update.mu_test, sigma_test[[m]], log = TRUE))
like_si <- sum(dnorm(y_test, mu_test[[m]], exp(Xbs_test + step.length.si * update.si_test), log = TRUE))
# if like_mu > like_si, then update mu
if (like_mu > like_si) {
beta_mu <- beta_mu_st
Xbm_train <- Xbm_train + step.length.mu * update.mu_train
Xbm_test <- Xbm_test + step.length.mu * update.mu_test
muvsi[m] <- 1
}
# else update sigma
else {
beta_si <- beta_si_st
Xbs_train <- Xbs_train + step.length.si * update.si_train
Xbs_test <- Xbs_test + step.length.si * update.si_test
muvsi[m] <- 2
}
}
# # if either paramter stops updating, update the others
# else {
# if (do_m) {
# beta_mu <- beta_mu_st
# Xbm_train <- Xbm_train + step.length.mu * update.mu_train
# Xbm_test <- Xbm_test + step.length.mu * update.mu_test
# muvsi[m] <- 1
# }
# if (do_s) {
# beta_si <- beta_si_st
# Xbs_train <- Xbs_train + step.length.si * update.si_train
# Xbs_test <- Xbs_test + step.length.si * update.si_test
# muvsi[m] <- 2
# }
# }
# update the predictors of the best performing paramter
mu_mat[, m] <- beta_mu
si_mat[, m] <- beta_si
like_train[m] <- sum(dnorm(y_train, mu_train[[m]], sigma_train[[m]], log = TRUE))
like_test[m] <- sum(dnorm(y_test, mu_test[[m]], sigma_test[[m]], log = TRUE))
}
# format the output
rownames(beta_mu) <- c("(Intercept)", colnames(data))
rownames(beta_si) <- c("(Intercept)", colnames(data))
return(list(mu = t(beta_mu),
sigma = t(beta_si),
mu_mat = mu_mat,
si_mat = si_mat,
v_si = v_si,
v_mu = v_mu,
muvsi = muvsi,
v_mu_var = v_mu_var,
v_si_var = v_si_var,
like_train = like_train,
like_test = like_test,
call = sys.call()))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.