R/utils.R
In survkl: Estimate Survival Data with Data Integration

Documented in generate_eta

get_fold <- function(nfolds = 5, delta, stratum) {
  n <- length(delta)
  fold <- integer(n)
  
  stratum <- as.factor(stratum)
  strata_levels <- levels(stratum)
  
  for (s in strata_levels) {
    idx <- which(stratum == s)
    delta_s <- delta[idx]
    
    ind1 <- which(delta_s == 1)
    ind0 <- which(delta_s == 0)
    
    n1 <- length(ind1)
    n0 <- length(ind0)
    
    fold1 <- 1:n1 %% nfolds
    fold0 <- (n1 + 1:n0) %% nfolds
    fold1[fold1 == 0] <- nfolds
    fold0[fold0 == 0] <- nfolds
    
    fold_s <- integer(length(idx))
    fold_s[ind1] <- sample(fold1)
    fold_s[ind0] <- sample(fold0)
    
    fold[idx] <- fold_s
  }
  
  return(fold)
}


#' Generate a Sequence of Tuning Parameters (eta)
#'
#' Produces a numeric vector of `eta` values to be used in Cox–KL model.
#'
#' @param method Character string selecting how to generate \code{eta}:
#'   \dQuote{linear} or \dQuote{exponential}. Default is \dQuote{exponential}.
#'   for an exponentially spaced sequence scaled to `max_eta`. Default is `"exponential"`.
#' @param n Integer, the number of `eta` values to generate. Default is 10.
#' @param max_eta Numeric, the maximum value of `eta` in the sequence. Default is 5.
#' @param min_eta Numeric, the minimum value of `eta` in the sequence. Default is 0.
#'
#' @details
#' \itemize{
#'   \item \emph{Exponential}: values are formed by exponentiating a grid from
#'     \code{log(1)} to \code{log(100)}, then linearly rescaling to the interval
#'     \code{[0, max_eta]}. Thus the smallest value equals \code{0} and the largest
#'     equals \code{max_eta}.
#'   \item \emph{Linear}: the current implementation calls
#'     \code{seq(min_eta, max_eta, length.out = n)} and therefore assumes a
#'     numeric object \code{min_eta} exists in the calling environment. 
#' }
#' Only the exact strings \dQuote{linear} and \dQuote{exponential} are supported;
#' other values for \code{method} will result in an error because \code{eta_values}
#' is never created.
#'
#' @return Numeric vector of length \code{n} containing the generated \code{eta} values.
#'
#' @examples
#' # Generate 10 exponentially spaced eta values up to 5
#' generate_eta(method = "exponential", n = 10, max_eta = 5)
#'
#' # Generate 5 linearly spaced eta values up to 3
#' generate_eta(method = "linear", n = 5, max_eta = 3)
#'
#' @export

generate_eta <- function(method = "exponential", n = 10, max_eta = 5, min_eta = 0) {
  if (method == "linear") {
    eta_values <- seq(min_eta, max_eta, length.out = n)
  } else if (method == "exponential") {
    eta_values <- exp(seq(log(1), log(100), length.out = n))
    eta_values <- (eta_values - min(eta_values)) / (max(eta_values) - min(eta_values)) * max_eta
  }
  return(eta_values)
}



c_stat_stratcox <- function(time, xbeta, stratum, delta) {
  stratum <- factor(stratum)
  
  ord <- order(stratum, time)
  time <- time[ord]
  xbeta <- xbeta[ord]
  stratum <- stratum[ord]
  delta <- delta[ord]
  
  # Only evaluate strata with at least 2 individuals
  stratum_sizes <- table(stratum)
  valid_strata <- names(stratum_sizes[stratum_sizes > 1])
  
  # Compute per-stratum concordance
  count <- sapply(valid_strata, function(s) {
    idx <- stratum == s
    cox_c_index(time[idx], xbeta[idx], delta[idx])
  })
  
  numer <- Reduce(`+`, count["numer", ])
  denom <- Reduce(`+`, count["denom", ])
  c_stat <- numer / denom
  
  return(list(numer = numer, 
              denom = denom, 
              c_statistic = c_stat))
}


c_stat_stratcox_vec <- function(time, LP_mat, stratum, delta) {
  numer <- denom <- c_stat <- rep(NA_real_, ncol(LP_mat))
  for (j in seq_len(ncol(LP_mat))) {
    cs <- c_stat_stratcox(time, LP_mat[, j], stratum, delta)
    numer[j] <- cs$numer
    denom[j] <- cs$denom
    c_stat[j] <- cs$c_statistic
  }
  list(numer = numer, denom = denom, c_stat = c_stat)
}



#' Setup Lambda Sequence for Cox–KL Model (Internal)
#'
#' Generates a sequence of penalty parameters (`lambda`) and initializes coefficients
#' for the Cox–KL model. The maximum lambda (`lambda.max`) is defined as the 
#' smallest value that shrinks all coefficients (`beta`) to zero. Note that different 
#' values of `eta` will produce different `lambda` sequences.
#' @keywords internal
#' @noRd
setupLambdaCoxKL <- function(Z, time, delta, delta_tilde, RS, beta.init, stratum,
                             group, group.multiplier, n.each_stratum, alpha, 
                             eta, nlambda, lambda.min.ratio) {
  n <- nrow(Z)
  K <- table(group)
  K1 <- as.integer(if (min(group)==0) cumsum(K) else c(0, cumsum(K)))
  storage.mode(K1) <- "integer"
  if (K1[1]!=0) { ## some covariates are not penalized
    nullFit <- coxkl(Z[, group == 0, drop = FALSE], delta, time, stratum, RS, beta = NULL, eta)
    LinPred <- nullFit$linear.predictors
    beta.init <- c(nullFit$beta, rep(0, length(beta.init) - length(nullFit$beta)))
    rsk <- c()  
    for (i in seq_along(unique(stratum))){
      rsk <- c(rsk, rev(cumsum(rev(exp(LinPred[stratum == i])))))
    }
    # r <- (delta + eta * delta_tilde)/(1 + eta) - exp(LinPred) * cumsum(delta / rsk)
    
    r <- numeric(length(delta))
    for (i in seq_along(unique(stratum))) {
      idx <- which(stratum == i)
      r[idx] <- (delta[idx] + eta * delta_tilde[idx])/(1 + eta) - exp(LinPred[idx]) * cumsum(delta[idx] / rsk[idx])
    }

  } else { ## all covariates are penalized
    w <- c()
    h <- c()
    for (i in seq_along(unique(stratum))){
      temp.w <- 1 / (n.each_stratum[i] - (1:n.each_stratum[i]) + 1)
      w <- c(w, temp.w)
      h <- c(h, cumsum(delta[stratum == i] * temp.w))
    }
    r <- (delta + eta * delta_tilde)/(1 + eta) - h

    beta.init <- beta.init
  }
  
  ## Determine lambda.max
  zmax <- maxgrad(Z, r, K1, as.double(group.multiplier)) / n
  lambda.max <- zmax/alpha
  
  if (lambda.min.ratio == 0){
    lambda <- c(exp(seq(log(lambda.max), log(.001 * lambda.max), len = nlambda-1)), 0)
  } else {
    lambda <- exp(seq(log(lambda.max), log(lambda.min.ratio * lambda.max), len = nlambda))
  }
  lambda[1] <- lambda[1] + 1e-9
  ls <- list(beta = beta.init, lambda.seq = lambda)
  return(ls)
}



#---------------------------- Functions for high-dim LASSO ----------------------------#
# set up group information
# m: group multiplier, default (NULL) is the square root of group size of remaining features
setupG <- function(group, m){
  group.factor <- factor(group)
  if (any(levels(group.factor) == '0')) {
    g <- as.integer(group.factor) - 1
    lev <- levels(group.factor)[levels(group.factor) != '0']
  } else {
    g <- as.integer(group.factor)
    lev <- levels(group.factor)
  }
  if (is.numeric(group) | is.integer(group)) {
    lev <- paste0("G", lev)
  }
  if (is.null(m)) {
    m <- rep(NA, length(lev))
    names(m) <- lev
  } else {
    TRY <- try(as.integer(group) == g)
    if (inherits(TRY, 'try-error') || any(!TRY)) stop('Attempting to set group.multiplier is ambiguous if group is not a factor', call. = FALSE)
    if (length(m) != length(lev)) stop("Length of group.multiplier must equal number of penalized groups", call. = FALSE)
    if (storage.mode(m) != "double") storage.mode(m) <- "double"
    if (any(m < 0)) stop('group.multiplier cannot be negative', call.=FALSE)
  }
  # "g" contains the group index of each column, but convert "character" group name into integer
  structure(g, levels = lev, m = m)
}

# remove constant columns if necessary
subsetG <- function(g, nz) { # nz: index of non-constant features
  lev <- attr(g, 'levels')
  m <- attr(g, 'm')
  new <- g[nz] # only include non-constant columns
  dropped <- setdiff(g, new)  # If the entire group has been dropped
  if (length(dropped) > 0) {
    lev <- lev[-dropped] # remaining group
    m <- m[-dropped]
    group.factor <- factor(new) #remaining group factor
    new <- as.integer(group.factor) - 1 * any(levels(group.factor) == '0')  #new group index
  }
  structure(new, levels = lev, m = m)
}

# reorder group index of features
#' @importFrom stats relevel
reorderG <- function(g, m) {
  og <- g
  lev <- attr(g, 'levels')
  m <- attr(g, 'm')
  if (any(g == 0)) {
    g <- as.integer(relevel(factor(g), "0")) - 1
  }
  if (any(order(g) != 1:length(g))) {
    reorder <- TRUE
    gf <- factor(g)
    if (any(levels(gf) == "0")) {
      gf <- relevel(gf, "0")
      g <- as.integer(gf) - 1
    } else {
      g <- as.integer(gf)
    }
    ord <- order(g)
    ord.inv <- match(1:length(g), ord)
    g <- g[ord]
  } else {
    reorder <- FALSE
    ord <- ord.inv <- NULL
  }
  structure(g, levels = lev, m = m, ord = ord, ord.inv = ord.inv, reorder = reorder)
}

#Feather-level standardization
standardize.Z <- function(Z){
  mysd <- function(z){
    sqrt(sum((z - mean(z))^2)/length(z))
  }
  new.Z <- scale(as.matrix(Z), scale = apply(as.matrix(Z), 2, mysd))
  center.Z <- attributes(new.Z)$`scaled:center`
  scale.Z <- attributes(new.Z)$`scaled:scale`
  new.Z <- new.Z[, , drop = F]
  res <- list(new.Z = new.Z, center.Z = center.Z, scale.Z = scale.Z)
  return(res)
}

## converting standardized betas back to original variables
unstandardize <- function(beta, gamma, std.Z){
  original.beta <- matrix(0, nrow = length(std.Z$scale), ncol = ncol(beta))
  original.beta[std.Z$nz, ] <- beta / std.Z$scale[std.Z$nz]  # modified beta
  original.gamma <- t(apply(gamma, 1, function(x) x - crossprod(std.Z$center, original.beta))) # modified intercepts (gamma)
  return(list(gamma = original.gamma, beta = original.beta))
}


# Group-level orthogonalization (column in new order, from group_0 to group_max)
orthogonalize <- function(Z, group) {
  z.names <- colnames(Z)
  n <- nrow(Z)
  J <- max(group)
  QL <- vector("list", J)
  orthog.Z <- matrix(0, nrow = nrow(Z), ncol = ncol(Z))
  colnames(orthog.Z) <- z.names
  # unpenalized group will not be orthogonalized
  orthog.Z[, which(group == 0)] <- Z[, which(group == 0)]
  
  # SVD and generate orthogonalized X
  for (j in seq_along(integer(J))) {
    ind <- which(group == j)
    if (length(ind) == 0) { # skip 0-length group
      next
    }
    SVD <- svd(Z[, ind, drop = FALSE], nu = 0)  # Q matrix (orthonormal matrix of eigenvectors)
    r <- which(SVD$d > 1e-10)  #remove extremely small singular values
    QL[[j]] <- sweep(SVD$v[, r, drop = FALSE], 2, sqrt(n)/SVD$d[r], "*") # Q * Lambda^{-1/2}
    orthog.Z[, ind[r]] <- Z[, ind] %*% QL[[j]]  # group orthogonalized X, where (X^T * X)/n = I
  }
  nz <- !apply(orthog.Z == 0, 2, all)  #find all zero
  orthog.Z <- orthog.Z[, nz, drop = FALSE]
  attr(orthog.Z, "QL") <- QL
  attr(orthog.Z, "group") <- group[nz]
  return(orthog.Z)
}

# convert orthogonalized beta back to original scales
unorthogonalize <- function(beta, Z, group) {
  ind <- !sapply(attr(Z, "QL"), is.null)
  QL <- Matrix::bdiag(attr(Z, "QL")[ind]) #block diagonal matrix
  if (sum(group == 0) > 0){ #some groups are unpenalized
    ind0 <- which(group==0)
    original.beta <- as.matrix(rbind(beta[ind0, , drop = FALSE], QL %*% beta[-ind0, , drop = FALSE]))
  } else {  # all groups are penalized
    original.beta <- as.matrix(QL %*% beta)
  }
  return(original.beta)
}



# standardize + orthogonalize covariate matrix
newZG.Std <- function(Z, g, m){
  if (any(is.na(Z))){
    stop("Missing data (NA's) detected in covariate matrix!", call. = FALSE)
  }
  if (length(g) != ncol(Z)) {
    stop ("Dimensions of group is not compatible with Z", call. = FALSE)
  }
  G <- setupG(g, m) # setup group
  std <- standardize.Z(Z)
  std.Z <- std[[1]]
  center <- std[[2]]
  scale <- std[[3]]
  
  small_scales <- which(scale <= 1e-6)
  if (length(small_scales) > 0) {
    stop(
      paste0(
        "The following variables have (near) constant columns: ",
        paste(names(scale)[small_scales], collapse = ", ")
      )
    )
  }
  
  nz <- which(scale > 1e-6)   # non-constant columns
  if (length(nz) != ncol(Z)) {
    std.Z <- std.Z[, nz, drop = F]
    G <- subsetG(G, nz)
  }
  # Reorder groups
  G <- reorderG(G, attr(G, 'm'))
  if (attr(G, 'reorder')){
    std.Z <- std.Z[, attr(G, 'ord')]
  }
  # Group-level orthogonalization
  std.Z <- orthogonalize(std.Z, G)
  g <- attr(std.Z, "group")
  # Set group multiplier if missing
  m <- attr(G, 'm')
  if (all(is.na(m))) {
    m <- sqrt(table(g[g != 0]))
  }
  res <- list(std.Z = std.Z, g = g, m = m, reorder = attr(G, 'reorder'), nz = nz,
              ord.inv = attr(G, 'ord.inv'), center = center, scale = scale)
  return(res)
}

# Only orthogonalize covariate matrix
newZG.Unstd <- function(Z, g, m){
  if (any(is.na(Z))){
    stop("Missing data (NA's) detected in covariate matrix!", call. = FALSE)
  }
  if (length(g) != ncol(Z)) {
    stop ("Dimensions of group is not compatible with Z", call. = FALSE)
  }
  G <- setupG(g, m)
  mysd <- function(x){
    sqrt(sum((x - mean(x))^2)/length(x))
  }
  scale <- apply(as.matrix(Z), 2, mysd)
  
  
  small_scales <- which(scale <= 1e-6)
  if (length(small_scales) > 0) {
    stop(
      paste0(
        "The following variables have (near) constant columns: ",
        paste(names(scale)[small_scales], collapse = ", ")
      )
    )
  }
  
  nz <- which(scale > 1e-6) #remove constant columns
  if (length(nz) != ncol(Z)) {
    std.Z <- Z[, nz, drop = F]
    G <- subsetG(G, nz)
  } else {
    std.Z <- Z
  }
  G <- reorderG(G, attr(G, 'm'))
  if (attr(G, 'reorder')){
    std.Z <- std.Z[, attr(G, 'ord')]
  }
  std.Z <- orthogonalize(std.Z, G)
  g <- attr(std.Z, "group")
  # Set group multiplier if missing
  m <- attr(G, 'm')
  if (all(is.na(m))) {
    m <- sqrt(table(g[g != 0]))
  }
  res <- list(std.Z = std.Z, g = g, m = m, reorder = attr(G, 'reorder'),
              ord.inv = attr(G, 'ord.inv'), nz = nz)
  return(res)
}



loss.coxkl_highdim <- function(delta, y.hat, stratum, total = TRUE){
  y.hat <- as.matrix(y.hat)
  revCumsum.strat <- function(strat){
    temp.y.hat <- y.hat[which(stratum == strat), , drop=FALSE] 
    temp.rsk <- apply(temp.y.hat, 2, function(x) rev(cumsum(rev(exp(x)))))
    return(temp.rsk)
  }
  rsk <- do.call(rbind, lapply(unique(stratum), function(i) revCumsum.strat(i)))
  
  if (total == TRUE) { #when delta = 0, loss contribution is 0
    return(-2 * (crossprod(delta, y.hat) - crossprod(delta, log(rsk)))) #return a vector (1 * nlambda)
  } else { #when delta = 0, loss contribution is 0
    return(-2 * (y.hat[delta == 1, , drop = FALSE] - log(rsk)[delta == 1, , drop = FALSE])) #return a matrix (n * nlambda)
  }
}