R/coxpath.R

Defines functions get_cox_lambda_max cox_obj_function multiview.cox.fit multiview.cox.path

Documented in cox_obj_function get_cox_lambda_max multiview.cox.fit multiview.cox.path

#' Fit a Cox regression model with elastic net regularization for a path of
#' lambda values
#'
#' Fit a Cox regression model via penalized maximum likelihood for a path of
#' lambda values. Can deal with (start, stop] data and strata, as well as
#' sparse design matrices.
#'
#' Sometimes the sequence is truncated before \code{nlambda} values of lambda
#' have been used. This happens when \code{cox.path} detects that the
#' decrease in deviance is marginal (i.e. we are near a saturated fit).
#'
#' @inheritParams multiview.path
#' @inheritParams multiview
#' @return An object of class "coxnet" and "glmnet".
#' \item{a0}{Intercept value, \code{NULL} for "cox" family.}
#' \item{beta}{A \code{nvars x length(lambda)} matrix of coefficients, stored in
#' sparse matrix format.}
#' \item{df}{The number of nonzero coefficients for each value of lambda.}
#' \item{dim}{Dimension of coefficient matrix.}
#' \item{lambda}{The actual sequence of lambda values used. When alpha=0, the
#' largest lambda reported does not quite give the zero coefficients reported
#' (lambda=inf would in principle). Instead, the largest lambda for alpha=0.001
#' is used, and the sequence of lambda values is derived from this.}
#' \item{dev.ratio}{The fraction of (null) deviance explained. The deviance
#' calculations incorporate weights if present in the model. The deviance is
#' defined to be 2*(loglike_sat - loglike), where loglike_sat is the log-likelihood
#' for the saturated model (a model with a free parameter per observation).
#' Hence dev.ratio=1-dev/nulldev.}
#' \item{nulldev}{Null deviance (per observation). This is defined to be
#' 2*(loglike_sat -loglike(Null)). The null model refers to the 0 model.}
#' \item{npasses}{Total passes over the data summed over all lambda values.}
#' \item{jerr}{Error flag, for warnings and errors (largely for internal
#' debugging).}
#' \item{offset}{A logical variable indicating whether an offset was included
#' in the model.}
#' \item{call}{The call that produced this object.}
#' \item{nobs}{Number of observations.}
#'
#' @examples
#' set.seed(2)
#' nobs <- 100; nvars <- 15
#' xvec <- rnorm(nobs * nvars)
#' xvec[sample.int(nobs * nvars, size = 0.4 * nobs * nvars)] <- 0
#' x <- matrix(xvec, nrow = nobs)
#' beta <- rnorm(nvars / 3)
#' fx <- x[, seq(nvars / 3)] %*% beta / 3
#' ty <- rexp(nobs, exp(fx))
#' tcens <- rbinom(n = nobs, prob = 0.3, size = 1)
#' jsurv <- survival::Surv(ty, tcens)
#' fit1 <- glmnet:::cox.path(x, jsurv)
#'
#' # works with sparse x matrix
#' x_sparse <- Matrix::Matrix(x, sparse = TRUE)
#' fit2 <- glmnet:::cox.path(x_sparse, jsurv)
#'
#' # example with (start, stop] data
#' set.seed(2)
#' start_time <- runif(100, min = 0, max = 5)
#' stop_time <- start_time + runif(100, min = 0.1, max = 3)
#' status <- rbinom(n = nobs, prob = 0.3, size = 1)
#' jsurv_ss <- survival::Surv(start_time, stop_time, status)
#' fit3 <- glmnet:::cox.path(x, jsurv_ss)
#'
#' # example with strata
#' jsurv_ss2 <- glmnet::stratifySurv(jsurv_ss, rep(1:2, each = 50))
#' fit4 <- glmnet:::cox.path(x, jsurv_ss2)
#' @importFrom glmnet coxnet.deviance stratifySurv
multiview.cox.path <- function(x_list, x, y, rho = 0, weights = NULL, lambda = NULL, offset = NULL,
                               alpha = 1.0, nlambda = 100,
                               lambda.min.ratio = ifelse(nobs<nvars, 1e-2, 1e-4),
                               standardize = TRUE, intercept = TRUE,
                               thresh = 1e-7, exclude = integer(0), penalty.factor = rep(1.0, nvars),
                               lower.limits = -Inf, upper.limits = Inf, maxit = 100000,
                               trace.it = 0,
                               nvars, nobs, xm, xs, control, vp, vnames, is.offset) {
  ## ### Prepare all the generic arguments (mimicking top-level glmnet() call)
  ## if (alpha > 1) {
  ##   warning("alpha > 1; set to 1")
  ##   alpha = 1
  ## } else if (alpha < 0) {
  ##   warning("alpha < 0; set to 0")
  ##   alpha = 0
  ## }
  ## alpha = as.double(alpha)

  ## ## Make lambda the multiview lambda
  ## ## Allows use of mvlambda parameter for idempotence.
  ## lambda <- mvlambda

  this.call <- match.call()

  ## np = dim(x)
  ## # if (is.null(np) || (np[2] <= 1)) stop("x should be a matrix with 2 or more columns")
  ## nobs = as.integer(np[1]); nvars = as.integer(np[2])

  ## # get feature variable names
  ## vnames <- colnames(x)
  ## # if(is.null(vnames)) vnames <- paste("V",seq(nvars),sep="")

  ## # check weights
  ## if(is.null(weights)) weights = rep(1,nobs)
  ## else if (length(weights) != nobs)
  ##   stop(paste("Number of elements in weights (",length(weights),
  ##              ") not equal to the number of rows of x (",nobs,")",sep=""))
  ## weights <- as.double(weights)

  # check that response y is a Surv object of the correct length
  y <- response.coxnet(y)

  if (nrow(y) != nobs) stop(paste0("number of observations in y (" , nrow(y),
                                   ") not equal to the number of rows of x (",
                                   nobs, ")"))

  ## # check offset option
  ## is.offset <- !(is.null(offset))
  ## if (is.offset == FALSE) {
  ##   offset <- rep(0, times = nrow(y))
  ## }

  ## # check and standardize penalty factors (to sum to nvars)
  ## if(any(penalty.factor == Inf)) {
  ##   exclude = c(exclude, seq(nvars)[penalty.factor == Inf])
  ##   exclude = sort(unique(exclude))
  ## }

  ## ## Compute weighted mean and variance of columns of x, sensitive to sparse matrix
  ## ## needed to detect constant columns below, and later if standarization
  ## meansd <- weighted_mean_sd(x, weights)
  
  ## ## look for constant variables, and if any, then add to exclude
  ## const_vars <- meansd$sd == 0
  ## nzvar <- setdiff(which(!const_vars), exclude)
  ## # if all the non-excluded variables have zero variance, throw error
  ## if (length(nzvar) == 0) stop("All used predictors have zero variance")
  
  ## ## if any constant vars, add to exclude
  ## if(any(const_vars)) {
  ##   exclude <- sort(unique(c(which(const_vars),exclude)))
  ##   meansd$sd[const_vars] <- 1.0 ## we divide later, and do not want bad numbers
  ## }
  ## if(length(exclude) > 0) {
  ##   jd = match(exclude, seq(nvars), 0)
  ##   if(!all(jd > 0)) stop ("Some excluded variables out of range")
  ##   penalty.factor[jd] = 1 # ow can change lambda sequence
  ## }
  ## # check and standardize penalty factors (to sum to nvars)
  ## vp = pmax(0, penalty.factor)
  ## if (max(vp) <= 0) stop("All penalty factors are <= 0")
  ## vp = as.double(vp * nvars / sum(vp))
  
  ## ### check on limits
  ## control <- multiview.control()
  ## if (thresh >= control$epsnr)
  ##   warning("thresh should be smaller than glmnet.control()$epsnr",
  ##           call. = FALSE)
  
  ## if(any(lower.limits > 0)){ stop("Lower limits should be non-positive") }
  ## if(any(upper.limits < 0)){ stop("Upper limits should be non-negative") }
  ## lower.limits[lower.limits == -Inf] = -control$big
  ## upper.limits[upper.limits == Inf] = control$big
  ## if (length(lower.limits) < nvars) {
  ##   if(length(lower.limits) == 1) lower.limits = rep(lower.limits, nvars) else
  ##                                                                           stop("Require length 1 or nvars lower.limits")
  ## } else lower.limits = lower.limits[seq(nvars)]
  ## if (length(upper.limits) < nvars) {
  ##   if(length(upper.limits) == 1) upper.limits = rep(upper.limits, nvars) else
  ##                                                                           stop("Require length 1 or nvars upper.limits")
  ## } else upper.limits = upper.limits[seq(nvars)]
  
  ## if (any(lower.limits == 0) || any(upper.limits == 0)) {
  ##   ###Bounds of zero can mess with the lambda sequence and fdev;
  ##   ###ie nothing happens and if fdev is not zero, the path can stop
  ##   fdev <- multiview.control()$fdev
  ##   if(fdev!= 0) {
  ##     multiview.control(fdev = 0)
  ##     on.exit(multiview.control(fdev = fdev))
  ##   }
  ## }
  ## ### end check on limits
  ## ### end preparation of generic arguments

  ## # standardize x if necessary
  ## xm <- rep(0.0, times = nvars)
  ## if (standardize) {
  ##   xs <- meansd$sd
  ## } else {
  ##   xs <- rep(1.0, times = nvars)
  ## }
  ## if (!inherits(x, "sparseMatrix")) {
  ##   x <- scale(x,FALSE,xs)
  ## } else {
  ##   attr(x, "xm") <- xm
  ##   attr(x, "xs") <- xs
  ## }
  ## lower.limits <- lower.limits * xs
  ## upper.limits <- upper.limits * xs

  if (!("strata" %in% names(attributes(y))))
    y <- glmnet::stratifySurv(y, rep(1.0, nobs))

  # Pre-compute and cache some important information: ordering by stop time
  # (ascending, deaths before censored), and for (start, stop] data: ordering
  # by start time and some match information.
  # Information is computed at the strata level
  if (ncol(y) == 2) {
    stop_o <- numeric(nobs)
    for (i in unique(attr(y, "strata"))) {
      ii <- which(attr(y, "strata") == i)
      stop_o[ii] <- order(y[ii, "time"], y[ii, "status"],
                          decreasing = c(FALSE, TRUE))
    }
    attr(y, "stop_time") <- stop_o
  } else {
    stop_o   <- numeric(nobs)
    start_o  <- numeric(nobs)
    ss_match <- numeric(nobs)
    for (i in unique(attr(y, "strata"))) {
      ii <- which(attr(y, "strata") == i)
      stop_o[ii]  <- order(y[ii, "stop"], y[ii, "status"],
                           decreasing = c(FALSE, TRUE))
      start_o[ii] <- order(y[ii, "start"], decreasing = c(FALSE))
      ss_match[ii] <- match(start_o[ii], stop_o[ii])
    }
    attr(y, "stop_time")  <- stop_o
    attr(y, "start_time") <- start_o
    attr(y, "ss_match")   <- ss_match
  }

  # compute null deviance
  # currently using std.weights = FALSE in order to match glmnet output
  nulldev <- glmnet::coxnet.deviance(y = y, offset = offset, weights = weights,
                             std.weights = FALSE)

  # compute lambda_max and lambda values
  nlam = as.integer(nlambda)
  user_lambda = FALSE   # did user provide their own lambda values?
  if (is.null(lambda)) {
    if (lambda.min.ratio >= 1) stop("lambda.min.ratio should be less than 1")
    lambda_max <- get_cox_lambda_max(x, y, alpha, weights, offset, exclude, vp)
    ulam <- exp(seq(log(lambda_max), log(lambda_max * lambda.min.ratio),
                    length.out = nlam))
  } else {  # user provided lambda values
    user_lambda = TRUE
    if (any(lambda < 0)) stop("lambdas should be non-negative")
    ulam <- as.double(rev(sort(lambda)))
    nlam <- as.integer(length(lambda))
  }

  # start progress bar
  if (trace.it == 1) pb  <- utils::txtProgressBar(min = 0, max = nlam, style = 3)

  glambda <- rep(1.0, nlam) # the actual glmnet lambda sequence, initially the scale factor
  beta <- matrix(0, nrow = nvars, ncol = nlam)
  dev.ratio <- rep(NA, length = nlam)
  fit <- NULL
  mnl <- min(nlam, control$mnlam)

  cur_lambda <- ulam
  cur_lambda[1] <- if(user_lambda) ulam[1] else control$big
  for (k in 1:nlam) {
    # get the correct lambda value to fit
    ## if (k > 1) {
    ##   cur_lambda <- ulam[k]
    ## } else {
    ##   cur_lambda <- ifelse(user_lambda, ulam[k], control$big)
    ## }

    effective_lambda <- cur_lambda[k]
    if (trace.it == 2) cat("Fitting lambda index", k, ":", ulam[k], fill = TRUE)
    fit <- multiview.cox.fit(x_list = x_list,
                             x = x,
                             y = y,
                             rho = rho,
                             # weights / sum(weights),
                             weights = weights,
                             lambda = effective_lambda,
                             alpha = alpha,
                             offset = offset,
                             thresh = thresh,
                             maxit = maxit,
                             penalty.factor = vp,
                             exclude = exclude,
                             lower.limits = lower.limits,
                             upper.limits = upper.limits,
                             warm = fit,
                             from.cox.path = TRUE,
                             save.fit = TRUE,
                             trace.it = trace.it)
    if (trace.it == 1) utils::setTxtProgressBar(pb, k)
    # if error code non-zero, a non-fatal error must have occurred
    # print warning, ignore this lambda value and return result
    # for all previous lambda values
    if (fit$jerr != 0) {
      errmsg <- jerr.multiview(fit$jerr, maxit, k)
      warning(errmsg$msg, call. = FALSE)
      k <- k - 1
      break
    }
    beta[, k] <- as.vector(fit$beta)
    dev.ratio[k] <- fit$dev.ratio
    glambda[k] <- fit$lambda_scale

    # early stopping if dev.ratio almost 1 or no improvement
    if (k >= mnl && user_lambda == FALSE) {
      if (dev.ratio[k] > control$devmax * 0.99 / 0.999) break
      if (k > 1 && dev.ratio[k] - dev.ratio[k - mnl + 1] <
          control$fdev * 100 * dev.ratio[k]) break
    }
  }
  if (trace.it == 1) {
    utils::setTxtProgressBar(pb, nlam)
    cat("", fill = TRUE)
  }

  # truncate beta, dev.ratio, lambda if necessary
  if (k < nlam) {
    indices <- 1:k
    beta <- beta[, indices, drop = FALSE]
    dev.ratio <- dev.ratio[indices]
    ulam <- ulam[indices]
    glambda <- glambda[indices]
  }

  ## So far glambda has merely been the scaling factor. Now fix it
  ## to reflect what it actually should be.
  glambda <- glambda * ulam
  
  # return coefficients to original scale (because of x standardization)
  beta <- beta / xs

  # output
  stepnames <- paste0("s", 0:(length(ulam) - 1))
  out <- list(a0 = NULL)
  out$beta <- Matrix::Matrix(beta, sparse = TRUE,
                             dimnames = list(vnames, stepnames))
  out$df <- as.vector(colSums(abs(beta) > 0))  # as.vector to remove names
  out$dim <- dim(beta)
  ## HERE IS A KEY SECTION OF CODE
  ## We always stick with the glmnet lambdas so as to concur with the case
  ## rho == 0. When rho == 0, we just call glmnet and the lambdas returned are
  ## the glmnet lambdas. They have the property of idempotence: call glmnet again with
  ## the returned lambda sequence returned and you get the same results.
  ## For rho > 0, this idempotence does not hold, which can be disconcerting!!
  ## This is because multiview.fit scales the lambda before calling glmnet:::elnet.
  ## So the lambda in the object should always be what glmnet routines were called with
  ## so as to use glmnet prediction methods etc. But we also store the unmodified lambdas
  ## as mvlambda, so that using mvlambda in the call will always guarantee idempotence
  ##
  out$lambda <- glambda
  out$mvlambda <- ulam
  ##
  ##
  out$dev.ratio <- dev.ratio
  out$nulldev <- nulldev
  out$npasses <- fit$npasses
  out$jerr <- fit$jerr
  out$offset <- is.offset
  out$call <- this.call
  out$nobs <- nobs
  class(out) <- c("coxnet", "glmnet")
  out
}

#' Fit a Cox regression model with elastic net regularization for a single
#' value of lambda
#'
#' Fit a Cox regression model via penalized maximum likelihood for a single
#' value of lambda. Can deal with (start, stop] data and strata, as well as
#' sparse design matrices.
#'
#' WARNING: Users should not call \code{multiview.cox.fit} directly. Higher-level
#' functions in this package call \code{multiview.cox.fit} as a subroutine. If a
#' warm start object is provided, some of the other arguments in the function
#' may be overriden.
#'
#' \code{multiview.cox.fit} solves the elastic net problem for a single, user-specified
#' value of lambda. \code{multiview.cox.fit} works for Cox regression models, including
#' (start, stop] data and strata. It solves the problem using iteratively
#' reweighted least squares (IRLS). For each IRLS iteration, \code{multiview.cox.fit}
#' makes a quadratic (Newton) approximation of the log-likelihood, then calls
#' \code{elnet.fit} to minimize the resulting approximation.
#'
#' In terms of standardization: \code{multiview.cox.fit} does not standardize \code{x}
#' and \code{weights}. \code{penalty.factor} is standardized so that they sum
#' up to \code{nvars}.
#'
#' @inheritParams multiview.cox.path
#' @param lambda A single value for the \code{lambda} hyperparameter.
#' @param warm Either a \code{glmnetfit} object or a list (with names \code{beta}
#' and \code{a0} containing coefficients and intercept respectively) which can
#' be used as a warm start. Default is \code{NULL}, indicating no warm start.
#' For internal use only.
#' @param from.cox.path Was \code{multiview.cox.fit()} called from \code{multiview.path()}?
#' Default is FALSE.This has implications for computation of the penalty factors.
#' @param save.fit Return the warm start object? Default is FALSE.
#' @return An object with class "coxnet", "glmnetfit" and "glmnet". The list
#' returned contains more keys than that of a "glmnet" object.
#' \item{a0}{Intercept value, \code{NULL} for "cox" family.}
#' \item{beta}{A \code{nvars x 1} matrix of coefficients, stored in sparse matrix
#' format.}
#' \item{df}{The number of nonzero coefficients.}
#' \item{dim}{Dimension of coefficient matrix.}
#' \item{lambda}{Lambda value used.}
#' \item{dev.ratio}{The fraction of (null) deviance explained. The deviance
#' calculations incorporate weights if present in the model. The deviance is
#' defined to be 2*(loglike_sat - loglike), where loglike_sat is the log-likelihood
#' for the saturated model (a model with a free parameter per observation).
#' Hence dev.ratio=1-dev/nulldev.}
#' \item{nulldev}{Null deviance (per observation). This is defined to be
#' 2*(loglike_sat -loglike(Null)). The null model refers to the 0 model.}
#' \item{npasses}{Total passes over the data.}
#' \item{jerr}{Error flag, for warnings and errors (largely for internal
#' debugging).}
#' \item{offset}{A logical variable indicating whether an offset was included
#' in the model.}
#' \item{call}{The call that produced this object.}
#' \item{nobs}{Number of observations.}
#' \item{warm_fit}{If \code{save.fit=TRUE}, output of C++ routine, used for
#' warm starts. For internal use only.}
#' \item{family}{Family used for the model, always "cox".}
#' \item{converged}{A logical variable: was the algorithm judged to have
#' converged?}
#' \item{boundary}{A logical variable: is the fitted value on the boundary of
#' the attainable values?}
#' \item{obj_function}{Objective function value at the solution.}
#' @importFrom glmnet coxgrad
multiview.cox.fit <- function(x_list, x, y, rho, weights, lambda, alpha = 1.0, offset = rep(0, nobs),
                    thresh = 1e-10, maxit = 100000,
                    penalty.factor = rep(1.0, nvars), exclude = c(),
                    lower.limits = -Inf, upper.limits = Inf, warm = NULL,
                    from.cox.path = FALSE, save.fit = FALSE, trace.it = 0) {
  this.call <- match.call()
  control <- multiview.control()

  nviews  <- length(x_list)
  p_x  <- lapply(x_list, ncol)
  ends  <- cumsum(p_x)
  starts  <- c(1, ends[-nviews] + 1)

  beta_indices <- mapply(seq.int, starts, ends, SIMPLIFY = FALSE)
  pairs <- apply(utils::combn(nviews, 2), 2, identity, simplify = FALSE)
  view_components <- lapply(pairs,
                            function(pair) {
                              i  <- pair[1L]; j  <- pair[2L];
                              list(index = list(beta_indices[[i]], beta_indices[[j]]),
                                   x = list(x_list[[i]], x_list[[j]]))
                            })
  ### Prepare all generic arguments
  nobs <- nrow(x)
  nvars <- ncol(x)
  is.offset <- !(missing(offset))
  if (is.offset == FALSE) {
    offset <- as.double(rep(0, nobs))
  }
  # add xm and xs attributes if they are missing for sparse x
  # glmnet.fit assumes that x is already standardized. Any standardization
  # the user wants should be done beforehand.
  if (inherits(x, "sparseMatrix")) {
    if ("xm" %in% names(attributes(x)) == FALSE)
      attr(x, "xm") <- rep(0.0, times = nvars)
    if ("xs" %in% names(attributes(x)) == FALSE)
      attr(x, "xs") <- rep(1.0, times = nvars)
  }

  # if calling from cox.path(), we do not need to check on exclude
  # and penalty.factor arguments as they have been prepared by cox.path()
  if (!from.cox.path) {
    # check and standardize penalty factors (to sum to nvars)
    if(any(penalty.factor == Inf)) {
      exclude = c(exclude, seq(nvars)[penalty.factor == Inf])
      exclude = sort(unique(exclude))
    }
    if(length(exclude) > 0) {
      jd = match(exclude, seq(nvars), 0)
      if(!all(jd > 0)) stop ("Some excluded variables out of range")
      penalty.factor[jd] = 1 # ow can change lambda sequence
    }
    vp = pmax(0, penalty.factor)
    vp = as.double(vp * nvars / sum(vp))
  } else {
    vp <- as.double(penalty.factor)
  }

  ### check on limits
  lower.limits[lower.limits == -Inf] = -control$big
  upper.limits[upper.limits == Inf] = control$big
  if (length(lower.limits) < nvars)
    lower.limits = rep(lower.limits, nvars) else
      lower.limits = lower.limits[seq(nvars)]
  if (length(upper.limits) < nvars)
    upper.limits = rep(upper.limits, nvars) else
      upper.limits = upper.limits[seq(nvars)]
  ### end check on limits
  ### end preparation of generic arguments

  # compute null deviance
  if (is.null(warm)) {
    nulldev <- glmnet::coxnet.deviance(y = y, offset = offset, weights = weights,
                               # std.weights = FALSE)
                               std.weights = TRUE)
    fit <- NULL
    coefold <- rep(0, nvars)   # initial coefs = 0
    eta <- offset
  } else {
    if ("glmnetfit" %in% class(warm)) {
      if (!inherits(warm$warm_fit, "warmfit")) stop("Invalid warm start object")
      fit <- warm
      nulldev <- fit$nulldev
      coefold <- fit$warm_fit$a  # prev value for coefficients
      eta <- get_eta(x, coefold, 0) + offset
    } else if ("list" %in% class(warm) && "beta" %in% names(warm)) {
      fit <- warm
      nulldev <- glmnet::coxnet.deviance(y = y, offset = offset, weights = weights,
                                 # std.weights = FALSE)
                                 std.weights = TRUE)
      coefold <- fit$beta   # prev value for coefficients
      eta <- get_eta(x, coefold, 0) + offset
      fit$a0 <- 0  # needed for compatibility with elnet.fit()
    } else {
      stop("Invalid warm start object")
    }
  }

  start <- NULL     # current value for coefficients
  obj_val_old <- cox_obj_function(y, eta, weights, lambda, alpha, coefold, vp, view_components, rho)
  if (trace.it == 2) {
    cat("Warm Start Objective:", obj_val_old, fill = TRUE)
  }
  conv <- FALSE      # converged?

  sum_weights <- sum(weights)
  # IRLS loop
  for (iter in 1L:control$mxitnr) {
    xx <- x
    # compute working response and weights
    coxgrad_results <- sum_weights * glmnet::coxgrad(eta, y, weights,
                                                     # std.weights = FALSE,
                                                     std.weights = TRUE,
                                                     diag.hessian = TRUE)
    w <- -attributes(coxgrad_results)$diag_hessian * sum_weights
    zz <- (eta - offset) - ifelse(w != 0, -coxgrad_results / w, 0)



    w_sum <- sum(w)
    w_std <- w / sum(w)

    mzz <- sum(w * zz) / w_sum
    zzc <- zz - mzz

    mx <- apply(xx, 2, function(x) sum(w_std * x))

    if (!inherits(xx, "sparseMatrix")) {
      xx  <- sweep(xx, 2L, mx, check.margin = FALSE)
    } else {
      attr(xx, "xm") <- mx
      attr(xx, "xs") <- rep(1.0, times = nvars)
    }

    nx_list <- lapply(x_list, function(mat) {
      column_means <- apply(mat, 2, function(column) sum(w_std * column))
      sweep(mat, 2L, column_means, check.margin = FALSE)
    })

    features <- xx
    target <- zzc
    rows <- lapply(pairs, make_row, x_list = nx_list, p_x = p_x, rho = rho )
    features <- do.call(rbind, c(list(features), rows))
    target <- c(target, rep(0, length(pairs) * nobs))
    w <- c(w, rep(weights, length(pairs)))  #NOTE!!
    w_sum <- sum(w)
    w_std <- w / w_sum

    if (!is.null(fit)) {
      ## features and w_std below will have the right dimension from previous iteration!
      g_offset  <- c(offset, rep(offset, length(pairs)))
      g_eta <- get_eta(features, fit$warm$a, 0) ## intercept is zero for larger fit!
      fit$warm_fit$r <- w_std * (target - g_eta + g_offset)
    }

    # have to update the weighted residual in our fit object
    # (in theory g and iy should be updated too, but we actually recompute g
    # and iy anyway in wls.f)
    ## if (!is.null(fit)) {
    ##   fit$warm_fit$r <- w * (z - eta + offset)
    ## }

    #cat(sprintf("SW is %f\n", w_sum))
    ## NOTE: sum(weights) below takes care of glmnet parameterization lambda -> lambda * n!
    #lambda2  <- lambda * sum_weights / w_sum
    lambda2  <- lambda / w_sum

    fit <- elnet.fit(x = features, y = target, weights = w_std,
                     lambda = lambda2, alpha = alpha,
                     exclude = exclude,
                     intercept = FALSE, from.glmnet.fit = TRUE, save.fit = TRUE,
                     thresh = thresh, maxit = maxit,
                     upper.limits = upper.limits, penalty.factor = vp, warm = fit
                     )

    if (fit$jerr != 0) return(list(jerr = fit$jerr))
    # do WLS with warmstart from previous iteration
    ## fit <- elnet.fit(x, z, w, lambda, alpha, intercept = FALSE,
    ##                  thresh = thresh, maxit = maxit, penalty.factor = vp,
    ##                  exclude = exclude, lower.limits = lower.limits,
    ##                  upper.limits = upper.limits, warm = fit,
    ##                  from.glmnet.fit = TRUE, save.fit = TRUE)
    ## if (fit$jerr != 0) return(list(jerr = fit$jerr))

    # update coefficients, eta, mu and obj_val
    start <- fit$warm_fit$a
    eta <- get_eta(x, start, 0) + offset
    obj_val <- cox_obj_function(y, eta, weights, lambda, alpha, start, vp, view_components, rho)
    if (trace.it == 2) cat("Iteration", iter, "Objective:", obj_val, fill = TRUE)

    boundary <- FALSE
    halved <- FALSE  # did we have to halve the step size?
    # if objective function is not finite, keep halving the stepsize until it is finite
    # for the halving step, we probably have to adjust fit$g as well?
    if (!is.finite(obj_val) || obj_val > control$big) {
      warning("Infinite objective function!", call. = FALSE)
      if (is.null(coefold))
        stop("no valid set of coefficients has been found: please supply starting values",
             call. = FALSE)
      warning("step size truncated due to divergence", call. = FALSE)
      ii <- 1
      while (!is.finite(obj_val) || obj_val > control$big) {
        if (ii > control$mxitnr)
          stop("inner loop 1; cannot correct step size", call. = FALSE)
        ii <- ii + 1
        start <- (start + coefold)/2
        eta <- get_eta(x, start, 0) + offset
        obj_val <- cox_obj_function(y, eta, weights, lambda, alpha, start, vp, view_components, rho)
        if (trace.it == 2) cat("Iteration", iter, " Halved step 1, Objective:",
                               obj_val, fill = TRUE)
      }
      boundary <- TRUE
      halved <- TRUE
    }

    # if we did any halving, we have to update the coefficients, intercept
    # and weighted residual in the warm_fit object
    if (halved) {
      fit$warm_fit$a <- start
      g_eta <- get_eta(features, start, 0) ## intercept is zero for larger fit!
      fit$warm_fit$r <- w_std * (target - g_eta) + g_offset
    }

    # test for convergence
    if (abs(obj_val - obj_val_old)/(0.1 + abs(obj_val)) < control$epsnr) {
      conv <- TRUE
      break
    }
    else {
      coefold <- start
      obj_val_old <- obj_val
    }
  }
  # end of IRLS loop

  ## The scale below is used to determine the actual lambda seq for multiview
  fit$lambda_scale <- sum_weights / w_sum

  # checks on convergence and fitted values
  if (!conv)
    warning("multiview.cox.fit: algorithm did not converge", call. = FALSE)

  # prepare output object
  if (save.fit == FALSE) {
    fit$warm_fit <- NULL
  }
  # overwrite values from elnet.fit object
  fit$a0 <- list(NULL)
  fit$call <- this.call
  fit$offset <- is.offset
  fit$nulldev <- nulldev
  fit$dev.ratio <- 1 - glmnet::coxnet.deviance(y = y, pred = eta, weights = weights,
                                       std.weights = FALSE) / nulldev

  # add new key-value pairs to list
  fit$family <- "cox"
  fit$converged <- conv
  fit$boundary <- boundary
  fit$obj_function <- obj_val

  class(fit) <- c("coxnet", "glmnetfit", "glmnet")
  fit
}

#' Elastic net objective function value for Cox regression model
#'
#' Returns the elastic net objective function value for Cox regression model.
#'
#' @param y Survival response variable, must be a \code{Surv} or
#' \code{stratifySurv} object.
#' @param pred Model's predictions for \code{y}.
#' @param weights Observation weights.
#' @param lambda A single value for the \code{lambda} hyperparameter.
#' @param alpha The elasticnet mixing parameter, with \eqn{0 \le \alpha \le 1}.
#' @param coefficients The model's coefficients.
#' @param vp Penalty factors for each of the coefficients.
#' @param view_components a list of lists containing indices of coefficients and associated covariate (view) pairs
#' @param rho the fusion parameter
cox_obj_function <- function(y, pred, weights, lambda, alpha,
                             coefficients, vp, view_components, rho) {
  coop_terms <- lapply(view_components, function(l) {
    sum((l$x[[1L]] %*% coefficients[ l$index[[1L]] ] -
          l$x[[2L]] %*% coefficients[ l$index[[2L]] ])^2)
  })

  glmnet::coxnet.deviance(y = y, pred = pred, weights = weights, std.weights = TRUE) +
    # lambda * pen_function(coefficients, alpha, vp)
    # inlining the definition to avoid a function call
    lambda * sum(vp * (alpha * abs(coefficients) + (1-alpha)/2 * coefficients^2)) +
    0.5 * rho *  Reduce(f = '+', x = coop_terms)
}

#' Get lambda max for Cox regression model
#'
#' Return the lambda max value for Cox regression model, used for computing
#' initial lambda values. For internal use only.
#'
#' This function is called by \code{cox.path} for the value of lambda max.
#'
#' When \code{x} is not sparse, it is expected to already by centered and scaled.
#' When \code{x} is sparse, the function will get its attributes \code{xm} and
#' \code{xs} for its centering and scaling factors. The value of
#' \code{lambda_max} changes depending on whether \code{x} is centered and
#' scaled or not, so we need \code{xm} and \code{xs} to get the correct value.
#'
#' @param x Input matrix, of dimension \code{nobs x nvars}; each row is an
#' observation vector. If it is a sparse matrix, it is assumed to be unstandardized.
#' It should have attributes \code{xm} and \code{xs}, where \code{xm(j)} and
#' \code{xs(j)} are the centering and scaling factors for variable j respsectively.
#' If it is not a sparse matrix, it is assumed to be standardized.
#' @param y Survival response variable, must be a \code{Surv} or
#' \code{stratifySurv} object.
#' @param alpha The elasticnet mixing parameter, with \eqn{0 \le \alpha \le 1}.
#' @param weights Observation weights.
#' @param offset Offset for the model. Default is a zero vector of length
#' \code{nrow(y)}.
#' @param exclude Indices of variables to be excluded from the model.
#' @param vp Separate penalty factors can be applied to each coefficient.
get_cox_lambda_max <- function(x, y, alpha, weights = rep(1, nrow(x)),
                               offset = rep(0, nrow(x)), exclude = c(),
                               vp = rep(1, ncol(x))) {
  nobs <- nrow(x); nvars <- ncol(x)

  # extract strata (if any)
  if ("strata" %in% names(attributes(y))) {
    strata <- attr(y, "strata")
  } else {
    strata <- rep(1, nobs)
  }
  if (length(strata) != nobs) stop("length of strata != nobs")

  # if some penalty factors are zero, we need to compute eta
  vp_zero <- setdiff(which(vp == 0), exclude)
  if (length(vp_zero) > 0) {
    tempx <- x[, vp_zero, drop = FALSE]
    if(inherits(tempx, "sparseMatrix")) {
        attr(tempx, "xm") <- rep(0.0, length(vp_zero))
        attr(tempx, "xs") <- attr(x,"xs")[vp_zero]
      ## coxph cannot handle sparse x. Strata not needed because y is expected to be stratified
      fit <- multiview.cox.fit(x = tempx, y = y, offset = offset, weights = weights/sum(weights), lambda = 0)
      fit$beta <- fit$beta/attr(tempx,"xs")# need to put beta on the correct scale for next line to work
      eta <- as.numeric(predict(fit, newx = tempx, newoffset = offset, newstrata = strata))
    } else {
        eps <- glmnet.control()$epsnr

      if (length(unique(strata)) == 1) {
        fit <- survival::coxph(y ~ offset(offset) + tempx, weights = weights, eps = eps)
      } else {
        fit <- survival::coxph(y ~ offset(offset) + tempx + strata(strata),
                               weights = weights, eps = eps)
      }
      eta <- predict(fit, reference="sample") ## Coxph can do strata-specific centering
    }
  } else {
    eta <- offset
  }
  eta <- eta - mean(eta) ## keep numbers small; partial likelihood independent of centering
  ju <- rep(1, nvars)
  ju[exclude] <- 0 # we have already included constant variables in exclude

  # get cox gradient at "null" point
  # note that coxgrad already includes weights, so no need to include them
  # in subsequent computations
  null_grad <- glmnet::coxgrad(eta, y, weights)

    if (inherits(x, "sparseMatrix")) {
        xm <- attr(x, "xm")
        xs <- attr(x, "xs")
        g <- abs((drop(t(null_grad) %*% x) - sum(null_grad) * xm) / xs)
    } else {
        g <- abs(drop(t(null_grad) %*% x))
    }
  g <- g / ifelse(vp > 0, vp, 1)
  g[ju == 0] <- 0
  lambda_max <- max(g) / max(alpha, 1e-3)
  return(lambda_max)
}

Try the multiview package in your browser

Any scripts or data that you put into this service are public.

multiview documentation built on April 3, 2023, 5:20 p.m.