R/HiGarrote.R
In HiGarrote: Nonnegative Garrote Method Incorporating Hierarchical Relationships

Documented in HiGarrote nnGarrote

#' An Automatic Method for the Analysis of Experiments using Hierarchical Garrote
#' 
#' `HiGarrote()` provides an automatic method for analyzing experimental data. 
#' This function applies the nonnegative garrote method to select important effects while preserving their hierarchical structures.
#' It first estimates regression parameters using generalized ridge regression, where the ridge parameters are derived from a Gaussian process prior placed on the input-output relationship. 
#' Subsequently, the initial estimates will be used in the nonnegative garrote for effects selection.
#' 
#' 
#' @param D An \eqn{n \times p} data frame for the unreplicated design matrix, where \eqn{n} is the run size and \eqn{p} is the number of factors.
#' @param y A vector for the responses corresponding to \code{D}. For replicated experiments, \code{y} should be an \eqn{n \times r} matrix, where \eqn{r} is the number of replicates.
#' @param quali_id A vector indexing qualitative factors.
#' @param quanti_id A vector indexing quantitative factors.
#' @param heredity Choice of heredity principles: weak or strong. The default is weak.
#' @param U Optional. An \eqn{n \times P} model matrix, where \eqn{P} is the number of potential effects. 
#' The inclusion of potential effects supports only main effects and two-factor interactions. 
#' Three-factor and higher order interactions are not supported.
#' The colon symbol ":" must be included in the names of a two-factor interaction for separating its parent main effects.
#' By default, \code{U} will be automatically constructed.
#' The potential effects will then include all the main effects of qualitative factors, the first two main effects (linear and quadratic) of all the quantitative factors, and all the two-factor interactions generated by those main effects.
#' By default, the coding systems of qualitative and quantitative factors are Helmert coding and orthogonal polynomial coding, respectively.
#' @param me_num Optional. A \eqn{p \times 1} vector for the main effects number of each factor.
#' \code{me_num} is required when \code{U} is not \code{NULL} and must be consistent with the main effects number specified in \code{U}.
#' @param quali_contr Optional. A list specifying the contrasts of factors. 
#' \code{quali_contr} is required only when the main effects of a qualitative factor are not generated by the default Helmert coding.
#' 
#' @returns A vector for the nonnegative garrote estimates of the identified effects.
#' 
#' @export
#' @examples
#' # Cast fatigue experiment
#' data(cast_fatigue)
#' X <- cast_fatigue[,1:7]
#' y <- cast_fatigue[,8]
#' HiGarrote::HiGarrote(X, y)
#' 
#' # Blood glucose experiment
#' data(blood_glucose)
#' X <- blood_glucose[,1:8]
#' y <- blood_glucose[,9]
#' HiGarrote::HiGarrote(X, y, quanti_id = 2:8) 
#' 
#' \donttest{
#' # Router bit experiment
#' data(router_bit)
#' X <- router_bit[, 1:9]
#' y <- router_bit[,10]
#' for(i in c(4,5)){
#' my.contrasts <- matrix(c(-1,-1,1,1,1,-1,-1,1,-1,1,-1,1), ncol = 3)
#' X[,i] <- as.factor(X[,i])
#' contrasts(X[,i]) <- my.contrasts
#' colnames(contrasts(X[,i])) <- paste0(".",1:(4-1))
#' }
#' U <- model.matrix(~.^2, X)
#' U <- U[, -1]  # remove the unnecessary intercept terms from the model matrix
#' me_num = c(rep(1,3), rep(3,2), rep(1, 4))
#' quali_contr <- list(NULL, NULL, NULL,
#'                     matrix(c(-1,-1,1,1,1,-1,-1,1,-1,1,-1,1), ncol = 3),
#'                     matrix(c(-1,-1,1,1,1,-1,-1,1,-1,1,-1,1), ncol = 3),
#'                     NULL, NULL, NULL, NULL)
#' HiGarrote::HiGarrote(X, y, quali_id = c(4,5), U = U, 
#' me_num = me_num, quali_contr = quali_contr)
#' 
#' # Experiments with replicates
#' # Generate simulated data
#' data(cast_fatigue)
#' X <- cast_fatigue[,1:7]
#' U <- data.frame(model.matrix(~.^2, X)[,-1])
#' error <- matrix(rnorm(24), ncol = 2) # two replicates for each run
#' y <- 20*U$A + 10*U$A.B + 5*U$A.C + error
#' HiGarrote::HiGarrote(X, y)
#' }
#' 
#' @references
#' Yu, W. Y. and Joseph, V. R. (2024) "Automated Analysis of Experiments using Hierarchical Garrote," arXiv preprint arXiv:2411.01383.


HiGarrote <- function(D, y, quali_id = NULL, quanti_id = NULL,
                      heredity = "weak", U = NULL, me_num = NULL, quali_contr = NULL) {
  # STEP 1: PREPROCESSING
  D <- check_D(D)
  np <- dim(D)
  n <- np[1]
  p <- np[2]
  uni_level <- get_level(D, p)
  mi <- lengths(uni_level)
  two_level_id <- which(mi == 2)
  y <- as.matrix(y)
  replicate <- ncol(y)
  run <- rep(1:n, replicate)
  y_s2 <- y
  y <- sapply(split(y,run),mean)
  y_sd <- sd(y)
  y <- scale(y)
  s2 <- 0.0
  if(replicate != 1) {
    y_s2 <- y_s2/y_sd
    s2 <- mean(sapply(split(y_s2,run),var))
  }
  
  # STEP 2: ERROR HANDLING
  if(n != length(y)) { stop("ERROR: Run size of D must equal to y")}
  if(!is.null(quali_id)) {
    if ( any(quali_id <= 0 | quali_id > p) ) {stop("ERROR: Incorrect range of quali_id")}
  }
  if(!is.null(quanti_id)) {
    if ( any(quanti_id <= 0 | quanti_id > p) ) {stop("ERROR: Incorrect range of quanti_id")}
  }
  
  quanti_eq_id <- NULL
  quanti_ineq_id <- NULL
  if(!is.null(quanti_id)) {
    quanti_eq_id <- quanti_id[ which( evenly_spaced(uni_level[quanti_id]) ) ]
    quanti_ineq_id <- setdiff(quanti_id, quanti_eq_id)
    if(length(quanti_eq_id) == 0) {quanti_eq_id <- NULL}
    if(length(quanti_ineq_id) == 0) {quanti_ineq_id <- NULL}
  }
  
  if(is.null(U)) { # U is defined by default
    me_num <- mi-1
    me_num[quanti_eq_id] <- ifelse(me_num[quanti_eq_id] > 2, 2, me_num[quanti_eq_id])
    me_num[quanti_ineq_id] <- ifelse(me_num[quanti_ineq_id] > 2, 2, me_num[quanti_ineq_id])
  } else { # U is defined by users. Under this circumstances, me_num should also be fined by users.
    if(is.null(me_num)) {
      stop("ERROR: me_num is required and must be consistent with the main effects number provided in U")
    }
  }
  
  # STEP 3: U
  if(is.null(U)) {
    if(!is.null(quali_id)) {
      for(i in quali_id){
        D[,i] <- as.factor(D[,i])
        contrasts(D[,i]) <- contr.helmert(levels(D[,i]))
        contrasts(D[,i]) <- contr_scale(contrasts(D[,i]), mi[i])
        colnames(contrasts(D[,i])) <- paste0(".",1:(mi[i]-1))
      }
    }
    if(!is.null(quanti_eq_id)) {
      for(i in quanti_eq_id){
        D[,i] <- as.factor(D[,i])
        contrasts(D[,i]) <- contr.poly(levels(D[,i]))
        contr_name <- colnames(contrasts(D[,i]))
        contrasts(D[,i]) <- contr_scale(contrasts(D[,i]), mi[i])
        colnames(contrasts(D[,i])) <- contr_name
        contrasts(D[,i], how.many = me_num[i]) <- contrasts(D[,i])[,1:me_num[i]]
      }
    }
    if(!is.null(quanti_ineq_id)) {
      for(i in quanti_ineq_id){
        D[,i] <- poly(D[,i], mi[i]-1)
        D[,i] <- contr_scale(D[,i], mi[i])
        colnames(D[,i]) <- paste0(".",1:(mi[i]-1))
        D[,i] <- D[,i][,1:me_num[i]]
      }
    }
    U <- model.matrix(~.^2, D)
    U <- U[,-1]
  }
  effects_name <- colnames(U)
  
  # STEP 4: h_list
  h_list <- lapply(1:ncol(D), function(i) {
    h_dist(D[,i], quali_contr[[i]], (i%in%two_level_id), (i%in%quali_id))
  })
  h_list_mat <- rlist::list.flatten(h_list)
  
  # STEP 5: U_j_list, h_j_list
  U_j_list <- U_j_R(uni_level, p, mi, quali_id, quanti_eq_id, quanti_ineq_id, quali_contr)
  h_j_list <- h_j_cpp(p, uni_level, U_j_list, two_level_id, quali_id)
  h_j_list <- unlist(h_j_list, recursive = FALSE)
  rho_len <- ifelse(sapply(h_j_list, is.list) == FALSE, 1, lengths(h_j_list))
  
  # STEP 5: Optimize for rho_lambda
  P <- sum(rho_len)
  ini_point <- MaxPro::MaxProLHD(P+1, P+1)
  ini_point0 <- ini_point$Design
  ini_point0 <- scales::rescale(ini_point0, to = c(0.01,0.99))
  rho_lambda_list <- rho_lambda_optim(ini_point0, h_list_mat, n, replicate, y, 0.01, 0.99)
  rho_lambda_list <- unlist(rho_lambda_list, recursive = FALSE)
  rho_lambda_obj_value <- -unlist(purrr::map(rho_lambda_list, "objective"))
  rho_lambda <- rho_lambda_list[[which.max(rho_lambda_obj_value)]]$solution
  rho <- rho_lambda[1:P]
  lambda <- rho_lambda[P+1]
  rho_list <- purrr::map2(cumsum(c(0, rho_len[-length(rho_len)])) + 1, cumsum(rho_len), ~ rho[.x:.y])
  
  # STEP 6: R
  initialize_BETA_instance(h_j_list, p, rho_list, mi)
  r_j <- r_j_cpp_R(U_j_list, me_num)
  r_j <- unlist(r_j)
  me_idx <- which(!stringr::str_detect(effects_name, ":")) # main effects idx
  hoe_idx <- which(stringr::str_detect(effects_name, ":")) # higher-order effects idx
  names(r_j) <- effects_name[me_idx]
  R <- c(rep(1, length(effects_name)))
  names(R) <- effects_name
  # main effects
  R[intersect(names(R), names(r_j))] <- r_j[intersect(names(R), names(r_j))]
  # higher order effects
  if(length(hoe_idx) != 0){
    hoe_names <- stringr::str_split(effects_name[hoe_idx], ":")
    R_hoe <- lapply(hoe_names, function(i){
      prod(r_j[i])
    })
    R_hoe <- unlist(R_hoe)
    R[hoe_idx] <- R[hoe_idx]*R_hoe
  }
  
  # STEP 8: heredity
  if(heredity == "strong") {
    A1 <- gstrong(U)
  } else {
    A1 <- gweak(U)
  }
  
  beta_ele <- beta_ele_cpp_R(U, R, lambda, replicate, n, y)
  beta_ele$Dmat <- (beta_ele$Dmat + t(beta_ele$Dmat))/2
  if(!matrixcalc::is.positive.definite(beta_ele$Dmat)) {
    D_nng <- Matrix::nearPD(beta_ele$Dmat)
    beta_ele$Dmat <- as.matrix(D_nng$mat)
  }
  beta_nng <- beta_nng_cpp_R(beta_ele, replicate, n, y, A1, s2)
  beta_shrink = round(beta_nng, 6)
  names(beta_shrink) <- effects_name
  beta_shrink <- beta_shrink[which(beta_shrink!=0)]
  beta_shrink <- beta_shrink[order(abs(beta_shrink), decreasing  = TRUE)]
  beta_shrink <- beta_shrink*y_sd
  
  return(beta_shrink)
}




#' Nonnegative Garrote Method with Hierarchical Structures
#'
#' `nnGarrote()` implements the nonnegative garrote method, as described in Yuan et al. (2009), for selecting important variables while preserving hierarchical structures.
#' The method begins by obtaining the least squares estimates of the regression parameters under a linear model.
#' These initial estimates are then used in the nonnegative garrote to perform variable selection.
#' This function supports prediction based on the linear model fitted with the selected variables and their nonnegative garrote estimates.
#' Note that this method is suitable only when the number of observations is much larger than the number of variables, ensuring that the least squares estimation remains reliable.
#'
#'
#' @param U An \eqn{n \times P} model matrix, where \eqn{n} is the number of data and \eqn{P} is the number of potential variables.
#' The inclusion of potential variables supports only up to second-order interactions.
#' Three-order and higher order interactions are not supported.
#' The colon symbol ":" must be included in the names of a second-order interaction for separating its parent variables.
#' Please see the example for the naming format.
#' @param y A vector for the responses.
#' @param new_U Optional. A matrix or data frame of the new model matrix for prediction.
#' @param heredity Choice of heredity principles: weak or strong. The default is weak.
#' @returns
#' If \code{new_U} is \code{NULL}, the function returns a vector for the nonnegative garrote estimates of the identified variables.
#'
#' If \code{new_U} is not \code{NULL}, the function returns a list with:
#' \itemize{
#' \item \code{beta_nng}: a vector for the nonnegative garrote estimates of the identified variables.
#' \item \code{pred}: predictions for the output corresponding to \code{new_U}.
#' }
#'
#' @export
#' @examples
#' x1 <- runif(1000)
#' x2 <- runif(1000)
#' x3 <- runif(1000)
#' error <- rnorm(1000)
#' X <- data.frame(x1, x2, x3)
#' U_all <- data.frame(model.matrix(~. + x1:x2 + x1:x3 + x2:x3 + I(x1^2) + I(x2^2) + I(x3^2), X))
#' colnames(U_all) <- c("X.Intercept.", "x1", "x2", "x3", "x1:x1", "x2:x2", "x3:x3",
#'  "x1:x2", "x1:x3", "x2:x3")
#' # ":" is required for detecting the parent variables of a second-order interaction.
#'
#' new_idx <- sample(1:1000, 800)
#' new_U <- U_all[new_idx,]
#' U_idx <- setdiff(1:1000, new_idx)
#' U <- U_all[U_idx,]
#' y_all <- 20*U_all$x1 + 15*U_all$`x1:x1` + 10*U_all$`x1:x2` + error
#' y <- y_all[U_idx]
#' nnGarrote(U, y, new_U)
#'
#'
#' @references
#' Yuan, M., Joseph, V. R., and Zou H. (2009) "Structured Variable Selection and Estimation," The Annals of Applied Statistics, 3(4):1738–1757.


nnGarrote <- function(U, y, new_U = NULL, heredity = "weak") {
  # STEP 1: PREPROCESSING
  U <- as.matrix(U)
  y_sd <- sd(y)
  y <- y/y_sd
  n <- nrow(U)
  P <- ncol(U)
  if(n < P) {stop("ERROR: n should be much larger than P")}
  effects_name <- colnames(U)

  # STEP 2: least squares estimates
  dat <- data.frame(U, y)
  lmod <- lm(y~.-1, data = dat)
  beta <- lmod$coefficients

  # STEP 3: heredity
  if(heredity == "strong") {
    A1 <- gstrong(U)
  } else {
    A1 <- gweak(U)
  }

  B=diag(beta)
  Z=U%*%B
  D.mat=t(Z)%*%Z
  d=t(Z)%*%y
  D.mat <- (D.mat + t(D.mat))/2
  
  if(!matrixcalc::is.positive.definite(D.mat)) {
    D.mat <- Matrix::nearPD(D.mat)
    D.mat <- as.matrix(D.mat$mat)
  }

  M=seq(0.1,length(beta),length=100)
  gcv=numeric(100)
  for(i in 1:100){
    b0=c(-M[i],rep(0,dim(A1)[2]-1))
    coef_nng = quadprog::solve.QP(D.mat, d, A1, b0)$sol
    e=y-Z%*%coef_nng
    gcv[i]=sum(e^2)/(n*(1-M[i]/n)^2)
  }
  M=M[which.min(gcv)]
  b0=c(-M,rep(0,dim(A1)[2]-1))
  coef_nng=round(quadprog::solve.QP(D.mat, d, A1, b0)$sol,10)
  beta_nng=B%*%coef_nng

  beta_shrink = round(beta_nng, 6)
  names(beta_shrink) <- effects_name
  beta_shrink <- beta_shrink[which(beta_shrink!=0)]
  beta_shrink <- beta_shrink[order(abs(beta_shrink), decreasing  = TRUE)]
  beta_shrink <- beta_shrink*y_sd

  if(is.null(new_U)) {
    return(beta_shrink)
  } else {
    beta_nng <- beta_nng*y_sd
    new_U <- as.matrix(new_U)
    pred <- as.numeric(new_U%*%beta_nng)
    result <- list("beta_nng" = beta_shrink, "pred" = pred)
    return(result)
  }

}