R/zzz.R

Defines functions identify_missing extract_rhs_values C.stat AUC_msi sim_cure weib_negloglik_lat weib_negloglik_lat_beta weib.cure.negloglik weib.cure.gradient weibull.cure weib.cure.update weib_gradient_lat weib_grad_beta soft self_scale scad_penalty mcp_scad_negloglik_lat mcp_scad_negloglik_inc mcp_scad_gradient_lat mcp_scad_gradient_inc mcp_scad_cd_upd_lat mcp_scad_cd_upd_inc mcp_penalty nonparametric_time_generator select_model l1_negloglik_inc l1_negloglik_inc_b l1_gradient_inc l1_grad_b get_cox_lambda_max exp_update exp_negloglik exp_negloglik_lat exp_negloglik_lat_beta exp_gradient exp_gradient_lat exp_grad_beta exp_cure cv.gmifs.nofdr cv.gmifs.inner cv.gmifs.fdr

#' @srrstats {G2.3} *For univariate character input:*
#' @srrstats {G2.3b} *Either: use `tolower()` or equivalent to ensure input of character parameters is not case dependent; or explicitly document that parameters are strictly case-sensitive.*
#' @srrstats {G1.4} *Software should use [`roxygen2`](https://roxygen2.r-lib.org/) to document all functions.*
#' @srrstats {G1.4a} *All internal (non-exported) functions should also be documented in standard [`roxygen2`](https://roxygen2.r-lib.org/) format, along with a final `@noRd` tag to suppress automatic generation of `.Rd` files.*
#' @srrstats {G2.4} *Provide appropriate mechanisms to convert between different data types, potentially including:*
#' @srrstats {G2.4b} *explicit conversion to continuous via `as.numeric()`*
#' Optimization Function to Fit Cox LASSO Model Using the E-M Algorithm.
#'
#' @description
#' The cox_l1 function is the optimization algorithm for fitting a Cox
#' LASSO model using the EM algorithm and is called by cure.em.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param mu_inc initial intercept for the incidence portion of the model.
#' @param mu_lat initial intercept for the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param nIter maximum number of interations, default is 100.
#' @param tol small numeric value specifying convergence tolerance.
#'
#' @return \item{b_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the incidence portion of the model. Row is step and
#' column is variable.}
#' @return \item{beta_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{b_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the incidence portion of the model. Row is step
#' and column is variable.}
#' @return \item{itct_path}{Vector representing the solution path of the
#' intercept for the incidence portion of the model.}
#' @return \item{beta_u_path}{Matrix representing the solution path of the
#' un penalized coefficients in the latency portion of the model. Row is step
#' and column is variable.}
#' @return \item{logLik_inc}{Vector representing the expected penalized
#' complete-data log-likelihood for the incidence portion of the model for
#' each step in the solution path.}
#' @return \item{logLik_lat}{Vector representing the expected penalized
#' complete-data log-likelihood for the latency portion of the model for each
#' step in the solution path.}
#' @noRd
cox_l1 <-
  function (X_u = NULL, X_p = NULL, W_u = NULL, W_p = NULL, time,
            delta, mu_inc, mu_lat, inits = NULL, nIter = 100, tol = 1e-04)
  {
    N <- length(time)
    J <- ncol(X_p)
    M <- ncol(W_p)
    CAP <- 10
    event_time <- time[delta == 1]
    uniq_event_time <- unique(event_time)
    n0 <- length(uniq_event_time)
    I0 <- matrix(time, ncol = 1) %*% matrix(1, 1, n0) >= matrix(1,
                                                               N, 1) %*% matrix(uniq_event_time, nrow = 1)
    d_j <- as.numeric(table(event_time)[rank(unique(event_time))])
    T_n0 <- max(event_time)
    tail_ind <- which(time >= T_n0 & delta == 0)
    step <- 1
    if (!is.null(X_p))
      b_p <- rep(0, J)
    if (!is.null(W_p))
      beta_p <- rep(0, M)
    if (is.null(inits))
      inits <- initialization_cox(X_u, W_u, time, delta)
    itct <- inits$itct
    b_u <- inits$b_u
    beta_u <- inits$beta_u
    survprob <- inits$survprob
    if (is.null(X_u))
      b_x <- rep(itct, N)
    else b_x <- itct + X_u %*% b_u
    if (is.null(X_u)) {
      pen_fac_inc <- rep(1, ncol(X_p))
    }
    else if (is.null(X_p)) {
      pen_fac_inc <- rep(0, ncol(X_u))
    }
    else {
      pen_fac_inc <- c(rep(0, ncol(X_u)), rep(1, ncol(X_p)))
    }
    if (is.null(W_u)) {
      pen_fac_lat <- rep(1, ncol(W_p))
    }
    else if (is.null(W_p)) {
      pen_fac_lat <- rep(0, ncol(W_u))
    }
    else {
      pen_fac_lat <- c(rep(0, ncol(W_u)), rep(1, ncol(W_p)))
    }
    pir <- rep(1, N)
    llp1_0 <- llp2_0 <- 0
    lik_inc <- lik_lat <- c()
    b_p_path <- beta_p_path <- b_u_path <- beta_u_path <- itct_path <- NULL
    conv1 <- conv2 <- FALSE
    repeat {
      pi_x <- 1/(1 + exp(-b_x))
      numerator <- pi_x[delta == 0] * survprob[delta == 0]
      pir[delta == 0] <- numerator/(1 - pi_x[delta == 0] + numerator)
      if (!conv1) {
        fit_inc <- glmnet::glmnet(x = cbind(X_u, X_p), y = cbind(1 -
                                                                  pir, pir), family = "binomial", penalty.factor = pen_fac_inc,
                                 lambda = mu_inc, standardize = FALSE)
        coef_inc <- coef(fit_inc)
        itct <- max(-CAP, min(CAP, coef_inc[1]))
        if (is.null(X_u)) {
          b_p <- pmax(-CAP, pmin(CAP, coef_inc[-1]))
          b_x <- itct + X_p[, b_p != 0, drop = FALSE] %*%
            b_p[b_p != 0]
        }
        else if (is.null(X_p)) {
          b_u <- pmax(-CAP, pmin(CAP, coef_inc[2:(ncol(X_u) +
                                                   1)]))
          b_x <- itct + X_u %*% b_u
        }
        else {
          b_u <- pmax(-CAP, pmin(CAP, coef_inc[2:(ncol(X_u) +
                                                   1)]))
          b_p <- pmax(-CAP, pmin(CAP, coef_inc[-(1:(ncol(X_u) +
                                                     1))]))
          b_x <- itct + X_u %*% b_u + X_p[, b_p != 0, drop = FALSE] %*%
            b_p[b_p != 0]
        }
        llp1_1 <- sum(pir * b_x - log(1 + exp(b_x))) - N *
          mu_inc * sum(abs(b_p))
      }
      lik_inc <- c(lik_inc, llp1_1)
      if (!conv2) {
        nz_pir <- which(pir > 0)
        fit_lat <- glmnet::glmnet(x = cbind(W_u, W_p)[nz_pir,
        ], y = Surv(time = time[nz_pir], event = delta[nz_pir]),
        family = "cox", offset = log(pir[nz_pir]), penalty.factor = pen_fac_lat,
        lambda = mu_lat, standardize = FALSE)
        coef_lat <- coef(fit_lat)
        if (is.null(W_u)) {
          beta_p <- pmax(-CAP, pmin(CAP, as.numeric(coef_lat)))
          beta_w <- W_p[, beta_p != 0, drop = FALSE] %*%
            beta_p[beta_p != 0]
        }
        else if (is.null(W_p)) {
          beta_u <- pmax(-CAP, pmin(CAP, coef_lat[1:ncol(W_u)]))
          beta_w <- W_u %*% beta_u
        }
        else {
          beta_u <- pmax(-CAP, pmin(CAP, coef_lat[1:ncol(W_u)]))
          beta_p <- pmax(-CAP, pmin(CAP, coef_lat[-(1:ncol(W_u))]))
          beta_w <- W_u %*% beta_u + W_p[, beta_p != 0,
                                        drop = FALSE] %*% beta_p[beta_p != 0]
        }
        exp_beta_w_p <- exp(beta_w) * pir
        denom <- matrix(exp_beta_w_p, 1) %*% I0
        hazard <- rep(0, N)
        hazard[delta == 1] <- sapply(event_time, function(x) (d_j/denom)[uniq_event_time ==
                                                                          x])
        accum_hazard <- sapply(time, function(x) sum((d_j/denom)[uniq_event_time <=
                                                                  x]))
        surv_baseline <- exp(-accum_hazard)
        if (length(tail_ind) > 0) {
          wtail_alpha <- uniroot(function(a) sum(-exp_beta_w_p *
                                                  max(accum_hazard) * log(time/T_n0) * (time/T_n0)^a +
                                                  delta/a + delta * log(time/T_n0)), c(0.01,
                                                                                       10), extendInt = "yes")$root
          wtail_lambda <- (max(accum_hazard))^(1/wtail_alpha)/T_n0
          surv_baseline[tail_ind] <- exp(-(wtail_lambda *
                                            time[tail_ind])^wtail_alpha)
        }
        survprob <- surv_baseline^exp(beta_w)
        llp2_1 <- sum(log(hazard[delta == 1]) + beta_w[delta ==
                                                        1]) - sum(exp_beta_w_p * accum_hazard) - N *
          mu_lat * sum(abs(beta_p))
      }
      lik_lat <- c(lik_lat, llp2_1)
      itct_path <- c(itct_path, itct)
      if (!is.null(X_u))
        b_u_path <- rbind(b_u_path, b_u)
      if (!is.null(X_p))
        b_p_path <- rbind(b_p_path, b_p)
      if (!is.null(W_u))
        beta_u_path <- rbind(beta_u_path, beta_u)
      if (!is.null(W_p))
        beta_p_path <- rbind(beta_p_path, beta_p)
      if (!conv1 & abs(llp1_1 - llp1_0) < tol)
        conv1 <- TRUE
      if (!conv2 & abs(llp2_1 - llp2_0) < tol)
        conv2 <- TRUE
      if (step > 1 & (conv1 & conv2) | step >= nIter) {
        break
      }
      llp1_0 <- llp1_1
      llp2_0 <- llp2_1
      step <- 1 + step
    }
    output <- list(b_p_path = b_p_path, beta_p_path = beta_p_path,
                   b_u_path = b_u_path, itct_path = itct_path, beta_u_path = beta_u_path,
                   lik_inc = lik_inc, lik_lat = lik_lat)
    output
  }

#' Optimization Function for Fitting Cox MCP or SCAD Model Using the
#' E-M algorithm.
#'
#' @description
#' The cox_mcp_scad function is the optimization algorithm for fitting a Cox
#' LASSO model using the EM algorithm. This function is called by cureem.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param penalty specifies either the MCP or SCAD penalty.
#' @param mu_inc initial intercept for the incidence portion of the model.
#' @param mu_lat initial intercept for the latency portion of the model.
#' @param gamma_inc penalty parameter for the incidence portion of the model.
#' @param gamma_lat penalty parameter for the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param nIter maximum number of interations, default is 100.
#' @param tol small numeric value specifying convergence tolerance.
#'
#' @return \item{b_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the incidence portion of the model. Row is step and
#' column is variable.}
#' @return \item{b_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the incidence portion of the model. Row is step
#' and column is variable.}
#' @return \item{itct_path}{Vector representing the solution path of the
#' intercept for the incidence portion of the model.}
#' @return \item{beta_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{beta_u_path}{Matrix representing the solution path of the
#' un penalized coefficients in the latency portion of the model. Row is step
#' and column is variable.}
#' @return \item{logLik_inc}{Vector representing the expected penalized
#' complete-data log-likelihood for the incidence portion of the model for
#' each step in the solution path.}
#' @return \item{logLik_lat}{Vector representing the expected penalized
#' complete-data log-likelihood for the latency portion of the model for each
#' step in the solution path.}
#' @noRd
cox_mcp_scad <-
  function (X_u = NULL, X_p = NULL, W_u = NULL, W_p = NULL, time,
            delta, penalty = "MCP", mu_inc, mu_lat, gamma_inc = 3, gamma_lat = 3,
            inits = NULL, nIter = 1000, tol = 1e-04)
  {
    N <- length(time)
    J <- ncol(X_p)
    M <- ncol(W_p)
    CAP <- 20
    event_time <- time[delta == 1]
    uniq_event_time <- unique(event_time)
    n0 <- length(uniq_event_time)
    I0 <- matrix(time, ncol = 1) %*% matrix(1, 1, n0) >= matrix(1,
                                                               N, 1) %*% matrix(uniq_event_time, nrow = 1)
    d_j <- as.numeric(table(event_time)[rank(uniq_event_time)])
    Z <- sapply(uniq_event_time, function(t) colSums(W_p[time ==
                                                          t & delta == 1, , drop = FALSE]))
    T_n0 <- max(event_time)
    tail_ind <- which(time >= T_n0 & delta == 0)
    step <- 1
    if (!is.null(X_p))
      b_p <- rep(0, J)
    if (!is.null(W_p))
      beta_p <- rep(0, M)
    if (is.null(inits))
      inits <- initialization_cox(X_u, W_u, time, delta)
    itct <- inits$itct
    b_u <- inits$b_u
    beta_u <- inits$beta_u
    survprob <- inits$survprob
    if (is.null(X_u))
      b_x <- rep(itct, N)
    else b_x <- itct + X_u %*% b_u
    if (is.null(W_u))
      beta_w <- rep(0, N)
    else beta_w <- W_u %*% beta_u
    pir <- rep(1, N)
    exp_beta_w_p <- exp(beta_w) * pir
    lik_inc <- lik_lat <- c()
    b_p_path <- beta_p_path <- b_u_path <- beta_u_path <- itct_path <- NULL
    llp1_0 <- llp2_0 <- 0
    conv1 <- conv2 <- FALSE
    repeat {
      pi_x <- 1/(1 + exp(pmin(CAP, -b_x)))
      numerator <- pi_x[delta == 0] * survprob[delta == 0]
      pir[delta == 0] = numerator/pmax(1e-50, 1 - pi_x[delta ==
                                                         0] + numerator)
      if (!conv1) {
        for (l in 1:J) {
          bl <- b_p[l]
          b_p[l] <- max(-CAP, min(CAP, mcp_scad_cd_upd_inc(penalty = penalty,
                                                          pir, pi_x, X_p[, l], bl, gamma_inc, mu_inc)))
          b_x <- b_x + X_p[, l, drop = FALSE] %*% (b_p[l] -
                                                    bl)
          pi_x <- 1/(1 + exp(pmin(CAP, -b_x)))
        }
        out_inc <- optim(par = c(itct, b_u), fn = mcp_scad_negloglik_inc,
                        gr = mcp_scad_gradient_inc, b_p = b_p, X_u = X_u,
                        X_p = X_p, pir = pir, CAP = CAP, method = "BFGS")
        itct <- max(-CAP, min(CAP, out_inc$par[1]))
        if (!is.null(X_u))
          b_u <- pmax(-CAP, pmin(CAP, out_inc$par[2:(ncol(X_u) +
                                                      1)]))
        pen <- ifelse(penalty == "MCP", mcp_penalty(b_p, gamma_inc,
                                                   mu_inc), scad_penalty(b_p, gamma_inc, mu_inc))
        llp1_1 <- -out_inc$value - N * pen
        if (is.null(X_u))
          b_x <- itct + X_p[, b_p != 0, drop = FALSE] %*%
          b_p[b_p != 0]
        else b_x <- itct + X_u %*% b_u + X_p[, b_p != 0, drop = FALSE] %*%
          b_p[b_p != 0]
      }
      lik_inc <- c(lik_inc, llp1_1)
      if (!conv2) {
        for (l in 1:M) {
          betal <- beta_p[l]
          beta_p[l] <- max(-CAP, min(CAP, mcp_scad_cd_upd_lat(penalty,
                                                             exp_beta_w_p, W_p[, l], betal, Z[l, ], I0,
                                                             d_j, gamma_lat, mu_lat)))
          change <- W_p[, l, drop = FALSE] %*% (beta_p[l] -
                                                 betal)
          beta_w <- beta_w + change
          exp_beta_w_p <- exp(pmin(CAP, beta_w)) * pir
          l = l + 1
        }
        if (!is.null(W_u)) {
          out_lat <- optim(par = beta_u, fn = mcp_scad_negloglik_lat,
                          gr = mcp_scad_gradient_lat, beta_p = beta_p,
                          W_u = W_u, W_p = W_p, delta = delta, pir = pir,
                          I0 = I0, d_j = d_j, CAP = CAP, method = "BFGS")
          beta_u <- pmax(-CAP, pmin(CAP, out_lat$par))
          beta_w <- W_u %*% beta_u + W_p[, beta_p != 0,
                                        drop = FALSE] %*% beta_p[beta_p != 0]
          exp_beta_w_p <- exp(pmin(CAP, beta_w)) * pir
        }
        denom <- matrix(exp_beta_w_p, 1) %*% I0
        hazard <- rep(0, N)
        hazard[delta == 1] <- sapply(event_time, function(x) (d_j/denom)[uniq_event_time ==
                                                                          x])
        accum_hazard <- sapply(time, function(x) sum((d_j/denom)[uniq_event_time <=
                                                                  x]))
        surv_baseline <- exp(-accum_hazard)
        if (length(tail_ind) > 0) {
          tmp <- try(wtail_alpha <- uniroot(function(a) sum(-exp_beta_w_p *
                                                              max(accum_hazard) * log(time/T_n0) * (time/T_n0)^a +
                                                              delta/a + delta * log(time/T_n0)), c(0.01,
                                                                                                   10), extendInt = "yes")$root, silent = TRUE)
          if (!inherits(tmp, "try-error")) {
            wtail_lambda <- (max(accum_hazard))^(1/wtail_alpha)/T_n0
            surv_baseline[tail_ind] <- exp(-(wtail_lambda *
                                              time[tail_ind])^wtail_alpha)
          }
        }
        survprob <- surv_baseline^exp(pmin(CAP, beta_w))
        pen <- ifelse(penalty == "MCP", mcp_penalty(beta_p,
                                                   gamma_lat, mu_lat), scad_penalty(beta_p, gamma_lat,
                                                                                    mu_lat))
        llp2_1 <- sum(log(hazard[delta == 1]) + beta_w[delta ==
                                                        1])
        -sum(exp_beta_w_p * accum_hazard) - N * pen
      }
      lik_lat <- c(lik_lat, llp2_1)
      itct_path <- c(itct_path, itct)
      if (!is.null(X_u))
        b_u_path <- rbind(b_u_path, b_u)
      b_p_path <- rbind(b_p_path, b_p)
      if (!is.null(W_u))
        beta_u_path <- rbind(beta_u_path, beta_u)
      beta_p_path <- rbind(beta_p_path, beta_p)
      if (!conv1 & abs(llp1_1 - llp1_0) < tol)
        conv1 <- TRUE
      if (!conv2 & abs(llp2_1 - llp2_0) < tol)
        conv2 <- TRUE
      if (step > 1 & (conv1 & conv2) | step >= nIter) {
        break
      }
      llp1_0 <- llp1_1
      llp2_0 <- llp2_1
      step <- 1 + step
    }
    output <- list(b_p_path = b_p_path, b_u_path = b_u_path,
                   itct_path = itct_path, beta_p_path = beta_p_path, beta_u_path = beta_u_path,
                   lik_inc = lik_inc, lik_lat = lik_lat)
    output
  }

#' Wrapper Function for Fitting Parametric or Semi-parametric
#' Penalized MCMs Using the E-M Algorithm.
#'
#' @description
#' The cure.em function structures the data and then fits the selected EM MCM
#' (Cox LASSO invokes cox_l1; Cox MCP and Cox SCAD invoke cox_mcp_scad, Weibull
#' LASSO invokes weib_EM, and exponential LASSO invokes exp_EM) which are  the
#' relevant optimization functions. This function is also called by cv.em.inner
#' and cv.em.nofdr when cv_cureem is used for cross-validation.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param event event (delta = 1) and censoring (delta = 0) indicator.
#' @param penalty specifies either the MCP or SCAD penalty.
#' @param penalty.factor.inc vector with length equal to the number of penalized
#' incidence predictors where 0 indicates no penalty and 1 indicates a penalty
#' should be applied. If not specified, default is to penalize all incidence
#' predictors.
#' @param penalty.factor.lat vector with length equal to the number of penalized
#' latency predictors where 0 indicates no penalty and 1 indicates a penalty
#' should be applied. If not specified, default is to penalize all latency
#' predictors.
#' @param thresh small numeric value specifying convergence tolerance.
#' @param maxit maximum number of EM algorithm iterations.
#' @param lambda.inc LASSO penalty parameter for incidence portion of the model.
#' @param lambda.lat LASSO penalty for latency portion of the model.
#' @param gamma.inc penalty parameter for the incidence portion of the model
#' under SCAD or MCP penalty.
#' @param gamma.lat penalty parameter for the latency portion of the model
#' under SCAD or MCP penalty.
#' @param inits initial values used to optimization.
#'
#' @return \item{output}{List that includes the relevant coefficient paths and
#' additional parameter paths (alpha for Weibull, rate for Weibull and
#' exponential)}
#' @noRd
cure.em <-
  function (X_u = NULL, X_p, W_u = NULL, W_p, time, event, model = "cox",
            penalty = "lasso", penalty.factor.inc = rep(1, ncol(x.inc)),
            penalty.factor.lat = rep(1, ncol(x.lat)), thresh = 0.001,
            maxit = ifelse(penalty == "lasso", 100, 1000), lambda.inc = 0.1,
            lambda.lat = 0.1, gamma.inc = 3, gamma.lat = 3, inits = NULL)
  {
    x.inc <- cbind(X_u, X_p)
    x.lat <- cbind(W_u, W_p)
    if (is.null(dim(x.inc)) | is.null(dim(x.lat)))
      stop("Error: x.inc and x.lat should be matrices with 2 or more columns")
    if ((ncol(x.inc) <= 1) | (ncol(x.lat) <= 1))
      stop("Error: x.inc and x.lat should be matrices with 2 or more columns")
    if (nrow(x.inc) != nrow(x.lat) | nrow(x.lat) != length(time) |
        length(time) != length(event))
      stop("Error: Input dimension mismatch")
    if (is.data.frame(x.inc) | is.data.frame(x.lat)) {
      x.inc <- as.matrix(x.inc)
      x.lat <- as.matrix(x.lat)
    }
    if (!model %in% c("cox", "weibull", "exponential"))
      stop("Error: Only 'cox', 'weibull', 'exponential' available for 'model' parameter")
    if (!penalty %in% c("lasso", "MCP", "SCAD"))
      stop("Error: Only 'lasso', 'MCP', 'SCAD' available for 'penalty' parameter")
    if (any(!c(penalty.factor.inc, penalty.factor.lat) %in% c(0,
                                                              1)))
      stop("Error: Penalty factors can only contain 0 or 1")
    if (any(c(lambda.inc, lambda.lat, gamma.inc, gamma.lat) <=
            0))
      stop("Error: Penalty pamameters lambda and gamma should be positive")
    if (!is.null(inits))
      inits <- inits_check(model, N = length(time), penalty.factor.inc,
                          penalty.factor.lat, inits)
    if (model != "cox" & penalty != "lasso")
      penalty <- "lasso"
    if (model == "cox" & penalty == "lasso")
      fit <- cox_l1(X_u, X_p, W_u, W_p, time, event, lambda.inc,
                   lambda.lat, inits, maxit, thresh)
    else if (model == "cox" & penalty %in% c("MCP", "SCAD"))
      fit <- cox_mcp_scad(X_u, X_p, W_u, W_p, time, event, penalty,
                         lambda.inc, lambda.lat, gamma.inc, gamma.lat, inits,
                         maxit, thresh)
    else if (model == "weibull")
      fit <- weib_EM(X_u, X_p, W_u, W_p, time, event, lambda.inc,
                    lambda.lat, inits, maxit, thresh)
    else if (model == "exponential")
      fit <- exp_EM(X_u, X_p, W_u, W_p, time, event, lambda.inc,
                   lambda.lat, inits, maxit, thresh)
    model_select <- which.max(fit$lik_inc + fit$lik_lat)
    b <- rep(NA, ncol(x.inc))
    beta <- rep(NA, ncol(x.lat))
    b[penalty.factor.inc == 0] <- fit$b_u_path[model_select, ]
    b[penalty.factor.inc == 1] <- fit$b_p_path[model_select, ]
    beta[penalty.factor.lat == 0] <- fit$beta_u_path[model_select,
    ]
    beta[penalty.factor.lat == 1] <- fit$beta_p_path[model_select,
    ]
    output <- list(b0 = fit$itct_path[model_select], b = b, beta = beta)
    if (model %in% c("exponential", "weibull"))
      output$rate = fit$lambda_path[model_select]
    if (model == "weibull")
      output$alpha <- fit$alpha_path[model_select]
    output$logLik.inc <- fit$lik_inc[model_select]
    output$logLik.lat <- fit$lik_lat[model_select]
    return(output)
  }

#' Estimate Metrics (C-statistic and AUC) When Using Parallel Processing with
#' Cross-Validation and the EM Algorithm for Fitting a MCM.
#'
#' @description
#' When fitting the MCM using the EM algorithm, this function is called by
#' cv.em.nofdr and is used for cross-validation when parallel computing is used.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param folds_i selected fold for the current parallel process.
#' @param k number of cross-validation folds.
#' @param model character, indicating the latency distribution, either "cox",
#' "exponential", or "weibull".
#' @param penalty character indicating the type of penalty to impose, either
#' "lasso", "MCP", or "SCAD".
#' @param thresh small numeric value specifying convergence tolerance.
#' @param nIter maximum number of interations.
#' @param grid.tuning logical, indicates whether penalty parameters for the
#' incidence and latency portions of the model are simultaneously examined.
#' @param tuning_sequence sequence of lambda penalty parameter values to examine.
#' @param gamma.inc when the penalty is "MCP" or "SCAD", the values for the
#' gamma penalty in the incidence portion of the model.
#' @param gamma.lat when the penalty is "MCP" or "SCAD", the values for the
#' gamma penalty in the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param measure.inc character, indicating metric used for optimizing the model
#'  fit, either "c" for the MCM C-statistic or "auc" for the MCM AUC.
#' @param cure_cutoff numeric, value indicating at what time point MCM
#' C-statistic or MCM AUC should be calculating when optimizing.
#'
#' @return \item{AUC}{value of the MCM AUC at the indicated cure_cutoff.}
#' @return \item{Cstat}{value of the MCM C-statistic at the indicated cure_cutoff}
#' @noRd
cv.em.inner <-
  function (X_u, X_p, W_u, W_p, time, delta, folds_i, k, model = c("cox",
                                                                   "weibull", "exponential"), penalty = c("lasso", "MCP", "SCAD"),
            thresh = 1e-05, nIter = 10000, grid.tuning = FALSE, tuning_sequence = NULL,
            gamma.inc = 3, gamma.lat = 3, inits = NULL, measure.inc = c("c",
                                                                        "auc"), cure_cutoff = 5)
  {
    test_i <- which(folds_i == k)
    test_delta <- delta[test_i]
    test_time <- time[test_i]
    test_X_p <- X_p[test_i, , drop = FALSE]
    test_W_p <- W_p[test_i, , drop = FALSE]
    if (is.null(X_u)) {
      test_X_u <- NULL
      X_u_train <- NULL
      penalty.factor.inc <- rep(1, ncol(X_p))
    }
    else {
      test_X_u <- X_u[test_i, , drop = FALSE]
      X_u_train <- X_u[-test_i, , drop = FALSE]
      penalty.factor.inc <- rep(0:1, c(ncol(X_u), ncol(X_p)))
    }
    if (is.null(W_u)) {
      test_W_u <- NULL
      W_u_train <- NULL
      penalty.factor.lat <- rep(1, ncol(W_p))
    }
    else {
      test_W_u <- W_u[test_i, , drop = FALSE]
      W_u_train <- W_u[-test_i, , drop = FALSE]
      penalty.factor.lat <- rep(0:1, c(ncol(W_u), ncol(W_p)))
    }
    initial_val <- inits
    if (model == "cox")
      initial_val$survprob <- initial_val$survprob[-test_i]
    cst <- rep(NA, nrow(tuning_sequence))
    if (measure.inc == "auc")
      auc <- rep(NA, nrow(tuning_sequence))
    for (i in 1:nrow(tuning_sequence)) {
      lambda.inc <- tuning_sequence[i, 1]
      if (grid.tuning)
        lambda.lat <- tuning_sequence[i, 2]
      else lambda.lat <- lambda.inc
      train_out <- cure.em(X_u_train, X_p[-test_i, ], W_u_train,
                          W_p[-test_i, ], time[-test_i], delta[-test_i], model,
                          penalty, penalty.factor.inc, penalty.factor.lat,
                          thresh, nIter, lambda.inc, lambda.lat, gamma.inc,
                          gamma.lat, initial_val)
      if (is.null(X_u)) {
        b_u <- NULL
        b_p <- train_out$b
      }
      else {
        b_u <- train_out$b[1:ncol(X_u)]
        b_p <- train_out$b[(ncol(X_u) + 1):(length(train_out$b))]
      }
      if (is.null(W_u)) {
        beta_u <- NULL
        beta_p <- train_out$beta
      }
      else {
        beta_u <- train_out$beta[1:ncol(W_u)]
        beta_p <- train_out$beta[(ncol(W_u) + 1):(length(train_out$beta))]
      }
      cst[i] <- C.stat(cure_cutoff, b_u, train_out$b0, b_p,
                      beta_u, beta_p, test_delta, test_time, test_X_u,
                      test_X_p, test_W_u, test_W_p)
      if (measure.inc == "auc")
        auc[i] <- AUC_msi(cure_cutoff = cure_cutoff, b_u,
                         train_out$b0, b_p, test_delta, test_time, test_X_u,
                         test_X_p)
    }
    if (measure.inc == "auc") {
      res <- rbind(auc, cst)
      rownames(res) <- c("AUC", "Cstat")
    }
    else res <- cst
    return(res)
  }


#' Generate Knockoffs Within Cross-Validated MCM E-M Algorithm For False
#' Discovery Rate Control.
#'
#' @description
#' This function generates knockoffs as part of the cross-validated MCM fitting
#' procedure. Once the knockoffs are appended to the full dataset, this function
#' calls cv.em.fdr function which performs the optimization.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param model character, indicating the latency distribution, either "cox",
#' "exponential", or "weibull".
#' @param penalty character indicating the type of penalty to impose, either
#' "lasso", "MCP", or "SCAD".
#' @param fdr numeric value between 0 and 1 indicating the FDR threshold to use
#' for variable selection.
#' @param thresh small numeric value specifying convergence tolerance.
#' @param nIter maximum number of interations.
#' @param penalty.factor.inc vector with length equal to the number of penalized
#' incidence predictors where 0 indicates no penalty and 1 indicates a penalty
#' should be applied. If not specified, default is to penalize all incidence
#' predictors.
#' @param penalty.factor.lat vector with length equal to the number of penalized
#' latency predictors where 0 indicates no penalty and 1 indicates a penalty
#' should be applied. If not specified, default is to penalize all latency
#' predictors.
#' @param grid.tuning logical, indicates whether penalty parameters for the
#' incidence and latency portions of the model are simultaneously examined.
#' @param lambda.inc.list sequence of lambda penalty parameter values for the
#' incidence portion of the model to examine.
#' @param lambda.lat.list sequence of lambda penalty parameter values for the
#' incidence portion of the model to examine.
#' @param nlambda.inc if grid.tuning is TRUE, the ten different values of the
#' lambda penalty parameter are examined for optimizing the indicence portion of
#' the model. Otherwise, 50 different values of the lambda
#' penalty parameter are examined for optimizing the indicence portion of the
#' model.
#' @param nlambda.lat if grid.tuning is TRUE, the ten different values of the
#' lambda penalty parameter are examined for optimizing the latency portion of
#' the model. Otherwise, 50 different values of the lambda
#' penalty parameter are examined for optimizing the latency portion of the
#' model.
#' @param lambda.min.ratio.inc numeric value in (0, 1) representing the smallest
#' value for the lambda penalty as a fraction of the largest lambda penalty to
#' use in the incidence portion of the model.
#' @param lambda.min.ratio.lat numeric value in (0, 1) representing the smallest
#' value for the lambda penalty as a fraction of the largest lambda penalty to
#' use in the latency portion of the model.
#' @param gamma.inc when the penalty is "MCP" or "SCAD", the values for the
#' gamma penalty in the incidence portion of the model.
#' @param gamma.lat when the penalty is "MCP" or "SCAD", the values for the
#' gamma penalty in the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param n_folds number of folds to use in the cross-validation procedure.
#' @param measure.inc character, indicating metric used for optimizing the model
#'  fit, either "c" for the MCM C-statistic or "auc" for the MCM AUC.
#' @param one.se logical, if TRUE, identifies the optimal model as the model
#' within one SE of the selected and estimated measure.inc ("c" or "auc").
#' Allows selection of a more parsimonious model than that achieved when
#' maximizing "c" or "auc".
#' @param cure_cutoff numeric, value indicating at what time point MCM
#' C-statistic or MCM AUC should be calculating when optimizing.
#' @param parallel logical, if TRUE, parallel processing is used.
#' @param seed random seed to set to enhance reproducibility.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return \item{b0}{numeric, estimated intercept for the incidence portion of
#' the model.}
#' @return \item{b}{vector, estimated coefficients for the incidence portion of
#' the model.}
#' @return \item{beta}{vector, estimated coefficients for the latency portion of
#' the model.}
#' @return \item{rate}{numeric, estimated rate when fitting a Weibull or
#' exponential MCM.}
#' @return \item{alpha}{numeric, estimated alpha parameter when fitting a
#' Weibull MCM.}
#' @return \item{selected_b}{indicates which, if any, of the incidence
#' coefficients have an fdr less than the user-specified fdr threshold.}
#' @return \item{selected_beta}{indicates which, if any, of the latency
#' coefficients have an fdr less than the user-specified fdr threshold.}
#' @noRd
cv.em.fdr <-
  function (X_u, X_p, W_u, W_p, time, delta, model = c("weibull",
                                                       "exponential"), penalty = c("lasso", "MCP", "SCAD"), fdr = 0.2,
            thresh = 0.001, nIter = ifelse(penalty == "lasso", 100, 1000),
            penalty.factor.inc, penalty.factor.lat, grid.tuning = FALSE,
            lambda.inc.list = NULL, lambda.lat.list = NULL, nlambda.inc = ifelse(grid.tuning,
                                                                                 10, 50), nlambda.lat = ifelse(grid.tuning, 10, 50), lambda.min.ratio.inc = 0.1,
            lambda.min.ratio.lat = 0.1, gamma.inc = 3, gamma.lat = 3,
            inits = NULL, n_folds = 5, measure.inc = c("c", "auc"), one.se = FALSE,
            cure_cutoff = 5, parallel = FALSE, seed = NULL, verbose = TRUE)
  {
    if (!is.null(seed))
      withr::local_seed(seed)
    X_k <- knockoff::create.second_order(X_p, method = "asdp",
                                        shrink = T)
    W_k <- knockoff::create.second_order(W_p, method = "asdp",
                                        shrink = T)
    Xaug <- cbind(X_p, X_k)
    Waug <- cbind(W_p, W_k)
    fit <- cv.em.nofdr(X_u, Xaug, W_u, Waug, time, delta, model,
                      penalty, thresh, nIter, grid.tuning, lambda.inc.list,
                      lambda.lat.list, nlambda.inc, nlambda.lat, lambda.min.ratio.inc,
                      lambda.min.ratio.lat, gamma.inc, gamma.lat, inits, n_folds,
                      measure.inc, one.se, cure_cutoff, parallel, seed, verbose)
    if (is.null(X_u))
      b_p <- fit$b
    else {
      b_u <- fit$b[1:ncol(X_u)]
      b_p <- fit$b[(ncol(X_u) + 1):(length(fit$b))]
    }
    if (is.null(W_u))
      beta_p <- fit$beta
    else {
      beta_u <- fit$beta[1:ncol(W_u)]
      beta_p <- fit$beta[(ncol(W_u) + 1):(length(fit$beta))]
    }
    Z_b <- abs(b_p)
    Z_beta <- abs(beta_p)
    J <- ncol(X_p)
    M <- ncol(W_p)
    orig_b <- 1:J
    orig_beta <- 1:M
    W_b <- Z_b[orig_b] - Z_b[orig_b + J]
    W_beta <- Z_beta[orig_beta] - Z_beta[orig_beta + M]
    T_b <- knockoff::knockoff.threshold(W_b, fdr = fdr, offset = 1)
    T_beta <- knockoff::knockoff.threshold(W_beta, fdr = fdr,
                                          offset = 1)
    selected_b <- (1:J)[W_b > T_b]
    selected_beta <- (1:M)[W_beta > T_beta]
    if (identical(unname(selected_b), integer(0))) {
      b_p[1:J] <- 0
    }
    else {
      b_p[-selected_b] <- 0
    }
    if (identical(unname(selected_beta), integer(0))) {
      beta_p[1:M] <- 0
    }
    else {
      beta_p[-selected_beta] <- 0
    }
    if (is.null(X_u)) {
      b <- b_p[1:J]
    }
    else {
      b <- c(b_u, b_p[1:J])
    }
    if (is.null(W_u)) {
      beta <- beta_p[1:M]
    }
    else {
      beta <- c(beta_u, beta_p[1:M])
    }
    return(list(b0 = fit$b0, b = b, beta = beta, rate = fit$rate,
                alpha = fit$alpha, selected_b = selected_b, selected_beta = selected_beta))
  }

#' Cross-Validation Optimization Function for MCM Using the E-M Algorithm.
#'
#' @description
#' This function is the main optimization function for fitting a MCM using the
#' EM algorithm. This function is called by cv_cureem. It is also called by
#' cv.em.fdr as cv.em.fdr first generates knockoffs then uses
#' this function for optimization.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param model character, indicating the latency distribution, either "cox",
#' "exponential", or "weibull".
#' @param penalty character indicating the type of penalty to impose, either
#' "lasso", "MCP", or "SCAD".
#' @param thresh small numeric value specifying convergence tolerance.
#' @param nIter maximum number of iterations.
#' @param grid.tuning logical, indicates whether penalty parameters for the
#' incidence and latency portions of the model are simultaneously examined.
#' @param lambda.inc.list sequence of lambda penalty parameter values for the
#' incidence portion of the model to examine.
#' @param lambda.lat.list sequence of lambda penalty parameter values for the
#' incidence portion of the model to examine.
#' @param nlambda.inc if grid.tuning is TRUE, the ten different values of the
#' lambda penalty parameter are examined for optimizing the indicence portion of
#' the model. Otherwise, 50 different values of the lambda
#' penalty parameter are examined for optimizing the indicence portion of the
#' model.
#' @param nlambda.lat if grid.tuning is TRUE, the ten different values of the
#' lambda penalty parameter are examined for optimizing the latency portion of
#' the model. Otherwise, 50 different values of the lambda
#' penalty parameter are examined for optimizing the latency portion of the
#' model.
#' @param lambda.min.ratio.inc numeric value in (0, 1) representing the smallest
#' value for the lambda penalty as a fraction of the largest lambda penalty to
#' use in the incidence portion of the model.
#' @param lambda.min.ratio.lat numeric value in (0, 1) representing the smallest
#' value for the lambda penalty as a fraction of the largest lambda penalty to
#' use in the latency portion of the model.
#' @param gamma.inc when the penalty is "MCP" or "SCAD", the values for the
#' gamma penalty in the incidence portion of the model.
#' @param gamma.lat when the penalty is "MCP" or "SCAD", the values for the
#' gamma penalty in the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param n_folds number of folds to use in the cross-validation procedure.
#' @param measure.inc character, indicating metric used for optimizing the model
#'  fit, either "c" for the MCM C-statistic or "auc" for the MCM AUC.
#' @param one.se logical, if TRUE, identifies the optimal model as the model
#' within one SE of the selected and estimated measure.inc ("c" or "auc").
#' Allows selection of a more parsimonious model than that achieved when
#' maximizing "c" or "auc".
#' @param cure_cutoff numeric, value indicating at what time point MCM
#' C-statistic or MCM AUC should be calculating when optimizing.
#' @param parallel logical, if TRUE, parallel processing is used.
#' @param seed random seed to set to enhance reproducibility.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return list that includes the relevant coefficient paths and
#' additional parameter paths (alpha for Weibull, rate for Weibull and
#' exponential).
#' @noRd
cv.em.nofdr <-
  function (X_u, X_p, W_u, W_p, time, delta, model = c("cox", "weibull",
                                                       "exponential"), penalty = c("lasso", "MCP", "SCAD"), thresh = 0.001,
            nIter = ifelse(penalty == "lasso", 100, 1000), grid.tuning = FALSE,
            lambda.inc.list = NULL, lambda.lat.list = NULL, nlambda.inc = ifelse(grid.tuning,
                                                                                 10, 50), nlambda.lat = ifelse(grid.tuning, 10, 50), lambda.min.ratio.inc = 0.1,
            lambda.min.ratio.lat = 0.1, gamma.inc = 3, gamma.lat = 3,
            inits = NULL, n_folds = 5, measure.inc = c("c", "auc"), one.se = FALSE,
            cure_cutoff = 5, parallel = FALSE, seed = NULL, verbose)
  {
    if (!is.null(seed))
      withr::local_seed(seed)
    if (!grid.tuning) {
      #if ((!is.null(lambda.inc.list) & !is.null(lambda.lat.list) &
      #     !identical(lambda.inc.list, lambda.lat.list)) | (nlambda.inc !=
      #                                                      nlambda.lat) | (lambda.min.ratio.inc != lambda.min.ratio.lat))
      #  warning("Warning: Grid tuning is off. Same lambda sequence for incidence and latency was used.")
      if (length(lambda.inc.list) > length(lambda.lat.list))
        lambda.lat.list <- lambda.inc.list
      else lambda.inc.list <- lambda.lat.list
      nlambda.inc <- nlambda.lat <- max(nlambda.inc, nlambda.lat)
      lambda.min.ratio.inc <- lambda.min.ratio.lat <- min(lambda.min.ratio.inc,
                                                          lambda.min.ratio.lat)
    }
    N <- length(time)
    if (is.null(inits)) {
      if (model == "cox")
        inits <- initialization_cox(X_u, W_u, time, delta)
      else inits <- initialization_parametric(X_u, W_u, time,
                                             delta, model)
    }
    if (grid.tuning) {
      if (is.null(lambda.inc.list) | is.null(lambda.lat.list)) {
        pir <- rep(1, N)
        if (is.null(X_u))
          pi_x <- rep(mean(delta), N)
        else pi_x <- 1/(1 + exp(-inits$itct - X_u %*% inits$b_u))
        if (is.null(W_u))
          beta_w <- rep(0, N)
        else beta_w <- W_u %*% inits$beta_u
        if (model == "cox")
          survprob <- inits$survprob
        else if (model == "weibull")
          survprob <- exp(-(inits$lambda * time)^(inits$alpha) *
                           exp(beta_w))
        else if (model == "exponential")
          survprob <- exp(-inits$lambda * time * exp(beta_w))
        numerator = pi_x[delta == 0] * survprob[delta ==
                                                  0]
        pir[delta == 0] = numerator/(1 - pi_x[delta == 0] +
                                       numerator)
        if (is.null(lambda.inc.list)) {
          lambda.max.inc <- max(abs(matrix(pir - 0.5, 1,
                                          N) %*% X_p))/N
          lambda.min.inc <- lambda.max.inc * lambda.min.ratio.inc
          k1 = (0:(nlambda.inc - 1))/nlambda.inc
          lambda.inc.list <- lambda.max.inc * lambda.min.ratio.inc^k1
        }
        if (is.null(lambda.lat.list)) {
          nz_pir <- which(pir > 0)
          lambda.max.lat <- get_cox_lambda_max(x = cbind(W_u,
                                                        W_p)[nz_pir, ], time = time[nz_pir], event = delta[nz_pir],
                                              offset = log(pir[nz_pir]))
          lambda.min.lat <- lambda.max.lat * lambda.min.ratio.lat
          k2 <- (0:(nlambda.lat - 1))/nlambda.lat
          lambda.lat.list <- lambda.max.lat * lambda.min.ratio.lat^k2
        }
      }
      lambda.grid <- expand.grid(lambda.inc.list, lambda.lat.list)
    }
    else {
      if (is.null(lambda.inc.list)) {
        lambda_max <- max(colSums(abs(X_p)), colSums(abs(W_p)))/(2 *
                                                                  N)
        lambda_min <- lambda_max * lambda.min.ratio.inc
        k1 <- (0:(nlambda.inc - 1))/nlambda.inc
        lambda.list <- lambda_max * lambda.min.ratio.inc^k1
      }
      else lambda.list <- lambda.inc.list
    }
    if (grid.tuning)
      tuning_sequence <- lambda.grid
    else tuning_sequence <- matrix(lambda.list, ncol = 1)
    folds_i <- sample(rep(1:n_folds, length.out = N))
    Cstat <- matrix(NA, nrow(tuning_sequence), n_folds)
    if (measure.inc == "auc")
      AUC <- matrix(NA, nrow(tuning_sequence), n_folds)
    if (parallel) {
      ncores <- n_folds
      cl <- parallel::makeCluster(detectCores())
      doParallel::registerDoParallel(cl)
      res_list <- foreach(k = 1:n_folds) %dopar% {
        res <- cv.em.inner(X_u = X_u, X_p = X_p, W_u = W_u, W_p = W_p, time = time, delta = delta,
                          folds_i = folds_i, k = k, model = model, penalty = penalty, thresh = thresh, nIter = nIter, grid.tuning = grid.tuning,
                          tuning_sequence = tuning_sequence, gamma.inc = gamma.inc, gamma.lat = gamma.lat, inits = inits,
                          measure.inc = measure.inc, cure_cutoff = cure_cutoff)
        return(res)
      }
      parallel::stopCluster(cl)
      if (measure.inc == "auc") {
        for (k in 1:n_folds) {
          AUC[, k] <- res_list[[k]][1, ]
          Cstat[, k] <- res_list[[k]][2, ]
        }
      }
      else {
        for (k in 1:n_folds) Cstat[, k] <- res_list[[k]]
      }
    }
    else {
      for (k in 1:n_folds) {
        if (verbose)
          message("Fold ", k, " out of ", n_folds, " training...\n")
        res <- cv.em.inner(X_u = X_u, X_p = X_p, W_u = W_u, W_p = W_p, time = time, delta = delta,
                          folds_i = folds_i, k = k, model = model, penalty = penalty, thresh = thresh, nIter = nIter, grid.tuning = grid.tuning,
                          tuning_sequence = tuning_sequence, gamma.inc = gamma.inc, gamma.lat = gamma.lat, inits = inits,
                          measure.inc = measure.inc, cure_cutoff = cure_cutoff)
        if (measure.inc == "auc") {
          AUC[, k] <- res[1, ]
          Cstat[, k] <- res[2, ]
        }
        else Cstat[, k] <- res
      }
    }
    if (measure.inc == "auc") {
      auc <- rowMeans(AUC, na.rm = T)
      c_stat <- rowMeans(Cstat, na.rm = T)
      if (one.se) {
        auc_sd <- sqrt(apply(AUC, 1, var, na.rm = T) * (n_folds -
                                                         1)/n_folds/(nrow(X_p) - 1))
        c_stat_sd <- sqrt(apply(Cstat, 1, var, na.rm = T) *
                           (n_folds - 1)/n_folds/(nrow(X_p) - 1))
        opt_step_b <- which.max(auc)
        opt_step_beta <- which.max(c_stat)
        model_select_b <- which(auc >= (auc[opt_step_b] -
                                         auc_sd[opt_step_b]))[1]
        model_select_beta <- which(c_stat >= (c_stat[opt_step_beta] -
                                               c_stat_sd[opt_step_beta]))[1]
      }
      else {
        model_select_b <- which.max(auc)
        model_select_beta <- which.max(c_stat)
      }
    }
    else {
      c_stat <- rowMeans(Cstat, na.rm = T)
      if (one.se) {
        c_stat_sd <- sqrt(apply(Cstat, 1, var, na.rm = T) *
                           (n_folds - 1)/n_folds/(nrow(X_p) - 1))
        opt_step <- which.max(c_stat)
        model_select <- model_select_b <- model_select_beta <- which(c_stat >=
                                                                       (c_stat[opt_step] - c_stat_sd[opt_step]))[1]
      }
      else model_select <- model_select_b <- model_select_beta <- which.max(c_stat)
    }
    optimal_lambda_b <- tuning_sequence[model_select_b, 1]
    if (grid.tuning)
      optimal_lambda_beta <- tuning_sequence[model_select_beta,
                                            2]
    else optimal_lambda_beta <- tuning_sequence[model_select_beta,
                                               1]
    if (verbose) {
      message("Selected lambda for incidence: ", round(optimal_lambda_b,
                                                       3), "\n")
      message("Selected lambda for latency: ", round(optimal_lambda_beta,
                                                     3), "\n")
      message("Maximum C-statistic: ", max(c_stat), "\n")
      if (measure.inc == "auc")
        message("Maximum AUC: ", max(auc), "\n")
      if (verbose)
        message("Fitting a final model...\n")
    }
    if (is.null(X_u))
      penalty.factor.inc <- rep(1, ncol(X_p))
    else penalty.factor.inc <- rep(0:1, c(ncol(X_u), ncol(X_p)))
    if (is.null(W_u))
      penalty.factor.lat <- rep(1, ncol(W_p))
    else penalty.factor.lat <- rep(0:1, c(ncol(W_u), ncol(W_p)))
    output <- cure.em(X_u, X_p, W_u, W_p, time, delta, model,
                     penalty, penalty.factor.inc, penalty.factor.lat, thresh,
                     nIter, optimal_lambda_b, optimal_lambda_beta, gamma.inc,
                     gamma.lat, inits)
    output$selected_lambda_inc <- optimal_lambda_b
    output$selected_lambda_lat <- optimal_lambda_beta
    output$max_c <- max(c_stat)
    if (measure.inc == "auc")
      output$max.auc <- max(auc)
    return(output)
  }

#' Generate Knockoffs Within Cross-Validated MCM GMIFS Algorithm For False
#' Discovery Rate Control.
#'
#' @description
#' This function generates knockoffs as part of the cross-validated MCM fitting
#' procedure when using the GMIFS algorithm, so it is called by cv_curegmifs.
#' Once the knockoffs are appended to the full dataset, this function
#' calls cv.em.fdr function which performs the optimization.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param model character, indicating the latency distribution, either
#' "exponential" or "weibull".
#' @param fdr numeric value between 0 and 1 indicating the FDR threshold to use
#' for variable selection.
#' @param thresh small numeric value specifying convergence tolerance.
#' @param nIter maximum number of iterations.
#' @param epsilon small numeric value used to update the regression coefficients.
#' @param inits initial values used to optimization.
#' @param n_folds number of folds to use in the cross-validation procedure.
#' @param measure.inc character, indicating metric used for optimizing the model
#'  fit, either "c" for the MCM C-statistic or "auc" for the MCM AUC.
#' @param one.se logical, if TRUE, identifies the optimal model as the model
#' within one SE of the selected and estimated measure.inc ("c" or "auc").
#' Allows selection of a more parsimonious model than that achieved when
#' maximizing "c" or "auc".
#' @param cure_cutoff numeric, value indicating at what time point MCM
#' C-statistic or MCM AUC should be calculating when optimizing.
#' @param parallel logical, if TRUE, parallel processing is used.
#' @param seed random seed to set to enhance reproducibility.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return list that includes the relevant coefficient paths and
#' additional parameter paths (alpha for Weibull, rate for Weibull and
#' exponential).
#'
#' @noRd
cv.gmifs.fdr <-
  function(X_u, X_p, W_u, W_p, time, delta, model = c("weibull", "exponential"),
           fdr = 0.2, thresh = 1e-5, nIter = 1e4, epsilon = 0.001, inits = NULL,
           n_folds = 5, measure.inc = c("c", "auc"), one.se = FALSE,
           cure_cutoff = 5, parallel = FALSE, seed = NULL, verbose = TRUE) {
    if (!is.null(seed)) withr::local_seed(seed)
    X_k <- knockoff::create.second_order(X_p, method = "asdp", shrink = T)
    W_k <- knockoff::create.second_order(W_p, method = "asdp", shrink = T)
    Xaug <- cbind(X_p, X_k)
    Waug <- cbind(W_p, W_k)
    fit <- cv.gmifs.nofdr(
      X_u, Xaug, W_u, Waug, time, delta, model, thresh, nIter, epsilon, inits,
      n_folds, measure.inc, one.se, cure_cutoff,
      parallel, seed, verbose
    )
    b_p <- fit$b_p
    b_u <- fit$b_u
    beta_p <- fit$beta_p
    beta_u <- fit$beta_u
    Z_b <- abs(fit$b_p)
    Z_beta <- abs(fit$beta_p)
    J <- ncol(X_p)
    M <- ncol(W_p)
    orig_b <- 1:J
    orig_beta <- 1:M
    W_b <- Z_b[orig_b] - Z_b[orig_b + J]
    W_beta <- Z_beta[orig_beta] - Z_beta[orig_beta + M]
    T_b <- knockoff::knockoff.threshold(W_b, fdr = fdr, offset = 1)
    T_beta <- knockoff::knockoff.threshold(W_beta, fdr = fdr, offset = 1)
    selected_b <- (1:J)[W_b > T_b]
    selected_beta <- (1:M)[W_beta > T_beta]
    # added b_u code above and this code to output b0, b, beta, rate, and alpha
    b_p <- b_p[1:J]
    beta_p <- beta_p[1:M]
    if (identical(unname(selected_b), integer(0))) {
      b_p[1:J] <- 0
    } else {
      b_p[-selected_b] <- 0
    }
    if (identical(unname(selected_beta), integer(0))) {
      beta_p[1:M] <- 0
    } else {
      beta_p[-selected_beta] <- 0
    }
    return(list(b0 = fit$b0, b_u = b_u, b_p = b_p, beta_u = beta_u,
                beta_p = beta_p, rate = fit$rate, alpha = fit$alpha,
                selected_b = selected_b, selected_beta = selected_beta))
  }

#' Estimate Metrics (C-statistic and AUC) When Using Parallel Processing with
#' Cross-Validation and the GMIFS Algorithm for Fitting a MCM.
#'
#' @description
#' Function called by cv.gmifs.nofdr for estimating AUC and C-statistics within
#' cross-validation procedure when using the GMIFS algorithm. Used with
#' foreach when parallel computing is used.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param folds_i selected fold for the current parallel process.
#' @param k number of cross-validation folds.
#' @param model character, indicating the latency distribution, either
#' "exponential" or "weibull".
#' @param thresh small numeric value specifying convergence tolerance.
#' @param nIter maximum number of iterations.
#' @param epsilon small numeric value used to update the regression coefficients.
#' @param inits initial values used to optimization.
#' @param measure.inc character, indicating metric used for optimizing the model
#'  fit, either "c" for the MCM C-statistic or "auc" for the MCM AUC.
#' @param cure_cutoff numeric, value indicating at what time point MCM
#' C-statistic or MCM AUC should be calculating when optimizing.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return \item{AUC}{value of the MCM AUC at the indicated cure_cutoff.}
#' @return \item{Cstat}{value of the MCM C-statistic at the indicated
#' cure_cutoff.}
#' @noRd
cv.gmifs.inner <-
  function(X_u, X_p, W_u, W_p, time, delta, folds_i, k,
           model = c("weibull", "exponential"), thresh = 1e-5, nIter = 1e4,
           epsilon = 0.001, inits = NULL, measure.inc = c("c", "auc"),
           cure_cutoff = 5, verbose = TRUE) {
    test_i <- which(folds_i == k)
    test_delta <- delta[test_i]
    test_time <- time[test_i]
    test_X_p <- X_p[test_i, , drop = F]
    test_W_p <- W_p[test_i, , drop = F]
    if (is.null(X_u)) test_X_u <- NULL else test_X_u <- X_u[test_i, , drop = F]
    if (is.null(W_u)) test_W_u <- NULL else test_W_u <- W_u[test_i, , drop = F]
    if (is.null(X_u)) X_u_train <- NULL else X_u_train <- X_u[-test_i, , drop = F]
    if (is.null(W_u)) W_u_train <- NULL else W_u_train <- W_u[-test_i, , drop = F]
    if (model == "exponential") {
      train_out <- exp_cure(
        X_u_train, X_p[-test_i, ],
        W_u_train, W_p[-test_i, ],
        time[-test_i], delta[-test_i],
        epsilon, thresh, nIter, inits, verbose
      )
    } else if (model == "weibull") {
      train_out <- weibull.cure(
        X_u_train, X_p[-test_i, ],
        W_u_train, W_p[-test_i, ],
        time[-test_i], delta[-test_i],
        epsilon, thresh, nIter, inits, verbose
      )
    }
    cst <- sapply(
      1:length(train_out$itct_path),
      function(x) {
        if (is.null(X_u)) b_u <- NULL else b_u <- train_out$b_u_path[x, ]
        if (is.null(W_u)) beta_u <- NULL else beta_u <- train_out$beta_u_path[x, ]
        cs <- C.stat(
          cure_cutoff = cure_cutoff,
          b_u, train_out$itct_path[x], train_out$b_p_path[x, ],
          beta_u, train_out$beta_p_path[x, ],
          test_delta, test_time, test_X_u, test_X_p, test_W_u, test_W_p
        )
        return(cs)
      }
    )
    if (measure.inc == "auc") {
      auc <- sapply(
        1:length(train_out$itct_path),
        function(x) {
          if (is.null(X_u)) b_u <- NULL else b_u <- train_out$b_u_path[x, ]
          if (is.null(W_u)) beta_u <- NULL else beta_u <- train_out$beta_u_path[x, ]
          a <- AUC_msi(
            cure_cutoff = cure_cutoff, b_u, train_out$itct_path[x],
            train_out$b_p_path[x, ], test_delta, test_time, test_X_u, test_X_p
          )
          return(a)
        }
      )
      res <- rbind(c(auc, rep(NA, nIter - length(auc))), c(cst, rep(NA, nIter - length(cst))))
      rownames(res) <- c("AUC", "Cstat")
    } else {
      res <- c(cst, rep(NA, nIter - length(cst)))
    }
    return(res)
  }

#' Cross-Validation Optimization Function for MCM Using the GMIFS Algorithm.
#'
#' @description
#' Function for fitting a MCM using the GMIFS algorithm
#' Called by cv_gmifsem and cv.gmifs.fdr (latter first generates knockoffs then uses
#' this function for optimization)
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param model character, indicating the latency distribution, either
#' "exponential" or "weibull".
#' @param thresh small numeric value specifying convergence tolerance.
#' @param nIter maximum number of iterations.
#' @param epsilon small numeric value used to update the regression coefficients.
#' @param inits initial values used to optimization.
#' @param n_folds number of folds to use in the cross-validation procedure.
#' @param measure.inc character, indicating metric used for optimizing the model
#'  fit, either "c" for the MCM C-statistic or "auc" for the MCM AUC.
#' @param one.se logical, if TRUE, identifies the optimal model as the model
#' within one SE of the selected and estimated measure.inc ("c" or "auc").
#' Allows selection of a more parsimonious model than that achieved when
#' maximizing "c" or "auc".
#' @param cure_cutoff numeric, value indicating at what time point MCM
#' C-statistic or MCM AUC should be calculating when optimizing.
#' @param parallel logical, if TRUE, parallel processing is used.
#' @param seed random seed to set to enhance reproducibility.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return \item{model.select.inc}{which model in the incidence path is optimal.}
#' @return \item{model.select.lat}{which model in the latency path is optimal.}
#' @return \item{b0}{estimated intercept for the incidence portion of the model.}
#' @return \item{b_u}{vector of estimated unpenalized coefficients in the
#' incidence portion of the model.}
#' @return \item{b_p}{vector of estimated penalized coefficients in the
#' incidence portion of the model.}
#' @return \item{beta_u}{vector of estimated unpenalized coefficients in the
#' latency portion of the model.}
#' @return \item{beta_p}{vector of estimated penalized coefficients in the
#' latency portion of the model.}
#' @return \item{alpha}{estimated alpha parameter if fitting a Weibull MCM.}
#' @return \item{rate}{estimated rate parameter if fitting a Weibull or
#' exponential MCM.}
#' @return \item{logLik}{log-Likelihood for the selected model.}
#' @return \item{max.c}{C-statistic for the selected model.}
#' @return \item{max.auc}{AUC for the selected model.}
#' @noRd
cv.gmifs.nofdr <-
  function(X_u, X_p, W_u, W_p, time, delta, model = c("weibull", "exponential"),
           thresh = 1e-5, nIter = 1e4, epsilon = 0.001, inits = NULL,
           n_folds = 5, measure.inc = c("c", "auc"), one.se = FALSE,
           cure_cutoff = 5, parallel = FALSE, seed = NULL, verbose) {
    if (!is.null(seed)) withr::local_seed(seed)
    folds_i <- sample(rep(1:n_folds, length.out = length(time)))
    Cstat <- matrix(NA, nIter, n_folds)
    if (measure.inc == "auc") AUC <- matrix(NA, nIter, n_folds)
    if (parallel) { # parallel computing
      ncores <- n_folds
      # registerDoMC(ncores) # for doMC
      cl <- parallel::makeCluster(detectCores())
      doParallel::registerDoParallel(cl)
      res_list <- foreach(k = 1:n_folds) %dopar% {
        res <- cv.gmifs.inner(
          X_u, X_p, W_u, W_p, time, delta, folds_i, k,
          model, thresh, nIter, epsilon, inits, measure.inc, cure_cutoff, verbose
        )
        return(res)
      }
      parallel::stopCluster(cl)
      if (measure.inc == "auc") {
        for (k in 1:n_folds) {
          AUC[, k] <- res_list[[k]][1, ]
          Cstat[, k] <- res_list[[k]][2, ]
        }
      } else {
        for (k in 1:n_folds) Cstat[, k] <- res_list[[k]]
      }
    } else { # no parallel computing
      for (k in 1:n_folds) {
        if (verbose) message("Fold ", k, " out of ", n_folds, " training...\n")
        res <- cv.gmifs.inner(
          X_u, X_p, W_u, W_p, time, delta, folds_i, k,
          model, thresh, nIter, epsilon, inits, measure.inc, cure_cutoff, verbose
        )
        if (measure.inc == "auc") {
          AUC[, k] <- res[1, ]
          Cstat[, k] <- res[2, ]
        } else {
          Cstat[, k] <- res
        }
      }
    }
    if (measure.inc == "auc") { # use AUC to tune incidence and C-statistic to tune latency
      auc <- rowMeans(AUC, na.rm = T)
      c_stat <- rowMeans(Cstat, na.rm = T)
      if (one.se) {
        auc_sd <- sqrt(apply(AUC, 1, var, na.rm = T) * (n_folds - 1) / n_folds / (dim(X_p)[1] - 1))
        c_stat_sd <- sqrt(apply(Cstat, 1, var, na.rm = T) * (n_folds - 1) / n_folds / (dim(X_p)[1] - 1))
        opt_step_b <- which.max(auc)
        opt_step_beta <- which.max(c_stat)
        model_select_b <- which(auc >= (auc[opt_step_b] - auc_sd[opt_step_b]))[1]
        model_select_beta <- which(c_stat >= (c_stat[opt_step_beta] - c_stat_sd[opt_step_beta]))[1]
      } else {
        model_select_b <- which.max(auc)
        model_select_beta <- which.max(c_stat)
      }
      if (verbose) {
        message("Selected step for incidence: ", model_select_b, "\n")
        message("Selected step for latency: ", model_select_beta, "\n")
        message("Maximum AUC: ", max(auc, na.rm = T), "\n")
        message("Maximum C-statistic: ", max(c_stat, na.rm = T), "\n")
        message("Fitting a final model...\n")
      }
      if (model == "exponential") {
        fit <- exp_cure(X_u, X_p, W_u, W_p, time, delta, epsilon, thresh,
          nIter = max(model_select_b, model_select_beta), inits, verbose
        )
      } else if (model == "weibull") {
        fit <- weibull.cure(X_u, X_p, W_u, W_p, time, delta, epsilon, thresh,
          nIter = max(model_select_b, model_select_beta), inits, verbose
        )
      }
    } else { # use C-statistic to tune both components
      c_stat <- rowMeans(Cstat, na.rm = T)
      if (one.se) {
        c_stat_sd <- sqrt(apply(Cstat, 1, var, na.rm = T) * (n_folds - 1) / n_folds / (dim(X_p)[1] - 1))
        opt_step <- which.max(c_stat)
        model_select <- model_select_b <- model_select_beta <-
          which(c_stat >= (c_stat[opt_step] - c_stat_sd[opt_step]))[1]
      } else {
        model_select <- model_select_b <- model_select_beta <- which.max(c_stat)
      }
      if (verbose) {
        message("Selected step: ", model_select, "\n")
        message("Maximum C-statistic: ", max(c_stat, na.rm = T), "\n")
        message("Fitting a final model...\n")
      }
      if (model == "exponential") {
        fit <- exp_cure(X_u, X_p, W_u, W_p, time, delta, epsilon, thresh,
          nIter = model_select, inits,
          verbose
        )
      } else if (model == "weibull") {
        fit <- weibull.cure(X_u, X_p, W_u, W_p, time, delta, epsilon, thresh,
          nIter = model_select,
          inits, verbose
        )
      }
    }
    nstep <- length(fit$logLikelihood)
    b_p <- fit$b_p_path[nstep, ]
    b_u <- fit$b_u_path[nstep, ]
    b0 <- fit$itct_path[nstep]
    beta_p <- fit$beta_p_path[nstep, ]
    beta_u <- fit$beta_u_path[nstep, ]
    rate <- fit$lambda_path[nstep]
    if (model == "weibull") {
      alpha <- fit$alpha_path[nstep]
    } else {
      alpha <- 1
    }
    output <- list(
      model.select.inc = model_select_b, model.select.lat = model_select_beta,
      b0 = b0, b_u = b_u, b_p = b_p, beta_u = beta_u, beta_p = beta_p,
      alpha = alpha, rate = rate,
      logLik = fit$logLikelihood[nstep], max.c = max(c_stat, na.rm = T)
    )
    if (measure.inc == "auc") output$max.auc <- max(auc, na.rm = T)
    return(output)
  }

#' Optimization Function to Fit a Penalized Exponential MCM Using the
#' GMIFS Algorithm.
#'
#' @description
#' The exp_cure function is the optimization algorithm for fitting a
#' penalized exponential MCM using the GMIFS algorithm.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param epsilon small numeric value used to update the regression coefficients.
#' @param tol small numeric value specifying convergence tolerance.
#' @param nIter maximum number of interations.
#' @param inits initial values used to optimization.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return \item{b_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the incidence portion of the model. Row is step and
#' column is variable.}
#' @return \item{beta_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{b_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the incidence portion of the model. Row is step
#' and column is variable.}
#' @return \item{itct_path}{Vector representing the solution path of the
#' intercept for the incidence portion of the model.}
#' @return \item{beta_u_path}{Matrix representing the solution path of the
#' un penalized coefficients in the latency portion of the model. Row is step
#' and column is variable.}
#' @return \item{lambda_path}{Vector representing the solution path for the alpha
#' parameter in the Weibull MCM.}
#' @return \item{logLikelihood}{Vector representing the expected penalized
#' complete-data log-likelihood for each step in the solution path.}
#' @noRd
exp_cure <-
function(X_u = NULL, X_p, W_u = NULL, W_p, time, delta, epsilon = 0.001,
         tol = 1e-05, nIter = 1e4, inits = NULL, verbose) {
  # X_u: N by nonp, non-penalized covariate matrix associated with incidence
  # X_p: N by J, penalized covariate matrix associated with incidence
  # W_u: N by nonp, non-penalized covariate matrix associated with latency
  # W_p:  N by M, penalized covariate matrix associated with incidence
  # time: vector of length N, observed survival time
  # delta: vector of length N, censoring status (not censored = 0)
  # epsilon: incremental size
  # tol: difference between log-likelihood
  N <- length(time)
  J <- ncol(X_p) # number of penalized incidence covariates
  M <- ncol(W_p) # number of penalized latency covariates
  X_p <- cbind(X_p, -X_p) # N by 2J
  W_p <- cbind(W_p, -W_p) # N by 2M

  ######## initialization ########
  step <- 1
  b_p <- rep(0, 2 * J) # penalized incidence
  beta_p <- rep(0, 2 * M) # penalized latency
  if (is.null(inits)) inits <- initialization_parametric(X_u, W_u, time, delta,
                                                         model = "weibull")
  itct <- inits$itct
  b_u <- inits$b_u
  beta_u <- inits$beta_u
  lambda <- inits$lambda
  log_lambda <- log(max(lambda, 1e-15))
  LL0 <- 0

  b_p_path <- matrix(0, nrow = nIter, ncol = 2 * J)
  beta_p_path <- matrix(0, nrow = nIter, ncol = 2 * M)
  lambda_path <- itct_path <- logLikelihood <- rep(0, nIter)
  b_u_path <- matrix(0, nrow = nIter, ncol = length(b_u))
  beta_u_path <- matrix(0, nrow = nIter, ncol = length(beta_u))

  ######## loop ########

  repeat{
    #### update penalized parameters
    upd <- exp_update(lambda, b_p, beta_p, b_u, itct, beta_u, X_u, X_p, W_u,
                      W_p, time, delta, epsilon)
    b_p <- upd$b_p
    beta_p <- upd$beta_p
    b_p_path[step,] <- b_p
    beta_p_path[step,] <- beta_p

    #### update other parameters
    out <- optim(
      par = c(log_lambda, b_u, itct, beta_u), fn = exp_negloglik,
      gr = exp_gradient, b_p = b_p, beta_p = beta_p, X_u = X_u, X_p = X_p,
      W_u = W_u, W_p = W_p, time = time, delta = delta, method = "BFGS"
    )
    log_lambda <- out$par[1]
    lambda <- exp(log_lambda)
    if (!is.null(X_u)) {
      b_u <- out$par[2:(ncol(X_u) + 1)] # unpenalized incidence
      itct <- out$par[ncol(X_u) + 2]
      if (!is.null(W_u)) beta_u <- out$par[(ncol(X_u) + 3):(ncol(X_u) + ncol(W_u) + 2)] # unpenalized latency
    } else {
      itct <- out$par[2]
      if (!is.null(W_u)) beta_u <- out$par[3:(ncol(W_u) + 2)] # unpenalized latency
    }

    lambda_path[step] <- lambda
    if (!is.null(X_u)) b_u_path[step,] <- b_u
    itct_path[step] <- itct
    if (!is.null(W_u)) beta_u_path[step,] <- beta_u

    LL1 <- -out$value
    logLikelihood[step] <- LL1

    if (verbose & step %% 1000 == 0) message("step = ", step, "\n")
    if (step > 1 && (abs(LL1 - LL0) < tol | step >= nIter)) { #
      break
    }
    LL0 <- LL1
    step <- 1 + step
  }
  ######## output ########
  b_p_path <- b_p_path[, 1:J] - b_p_path[, (J + 1):(2 * J)]
  beta_p_path <- beta_p_path[, 1:M] - beta_p_path[, (M + 1):(2 * M)]
  if (step < nIter) {
    b_p_path <- b_p_path[1:step,]
    beta_p_path <- beta_p_path[1:step,]
    lambda_path <- lambda_path[1:step]
    b_u_path <- b_u_path[1:step,]
    itct_path <- itct_path[1:step]
    beta_u_path <- beta_u_path[1:step,]
    logLikelihood <- logLikelihood[1:step]
  }
  output <- list(
    b_p_path = b_p_path, beta_p_path = beta_p_path,
    lambda_path = lambda_path, b_u_path = b_u_path, itct_path = itct_path,
    beta_u_path = beta_u_path, logLikelihood = logLikelihood
  )
  output
}

#' Optimization Function to Fit a Penalized Exponential MCM Using the
#' E-M Algorithm.
#'
#' @description
#' The exp_EM function is the optimization algorithm for fitting a
#' penalized exponential MCM called by cureem (which calls internal cure.em
#' function).
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param mu_inc initial intercept for the incidence portion of the model.
#' @param mu_lat initial intercept for the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param nIter maximum number of iterations.
#' @param tol small numeric value specifying convergence tolerance.
#'
#' @return \item{b_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the incidence portion of the model. Row is step and
#' column is variable.}
#' @return \item{beta_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{b_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the incidence portion of the model. Row is step
#' and column is variable.}
#' @return \item{itct_path}{Vector representing the solution path of the
#' intercept for the incidence portion of the model.}
#' @return \item{beta_u_path}{Matrix representing the solution path of the
#' un penalized coefficients in the latency portion of the model. Row is step
#' and column is variable.}
#' @return \item{lambda_path}{Vector representing the solution path for the
#' lambda parameter in the exponential MCM.}
#' @return \item{lik_inc}{Vector representing the expected penalized
#' complete-data log-likelihood for each step in the solution path for the
#' incidence portion of the model.}
#' @return \item{lik_lat}{Vector representing the expected penalized
#' complete-data log-likelihood for each step in the solution path for the
#' latency portion of the model.}
#' @noRd
exp_EM <-
  function (X_u = NULL, X_p, W_u = NULL, W_p, time, delta, mu_inc,
            mu_lat, inits = NULL, nIter = 100, tol = 1e-04)
  {
    N <- length(time)
    J <- ncol(X_p)
    M <- ncol(W_p)
    step <- 1
    b_p <- rep(0, J)
    beta_p <- rep(0, M)
    b_p_ext <- rep(0, 2 * J)
    beta_p_ext <- rep(0, 2 * M)
    if (is.null(inits))
      inits <- initialization_parametric(X_u, W_u, time, delta,
                                        model = "weibull")
    itct <- inits$itct
    b_u <- inits$b_u
    beta_u <- inits$beta_u
    lambda <- inits$lambda
    log_lambda <- log(max(lambda, 1e-15))
    pir <- rep(1, N)
    llp1_0 <- llp2_0 <- 0
    lik_inc <- lik_lat <- c()
    b_p_path <- beta_p_path <- b_u_path <- beta_u_path <- itct_path <- lambda_path <- NULL
    conv1 <- conv2 <- FALSE
    repeat {
      if (is.null(X_u))
        b_x <- itct + X_p[, b_p != 0, drop = FALSE] %*% b_p[b_p !=
                                                             0]
      else b_x <- itct + X_u %*% b_u + X_p[, b_p != 0, drop = FALSE] %*%
          b_p[b_p != 0]
      if (is.null(W_u))
        beta_w <- W_p[, beta_p != 0, drop = FALSE] %*% beta_p[beta_p !=
                                                               0]
      else beta_w <- W_u %*% beta_u + W_p[, beta_p != 0, drop = FALSE] %*%
          beta_p[beta_p != 0]
      pir[delta == 0] <- 1/(1 + exp(-b_x[delta == 0] + lambda *
                                     time[delta == 0] * exp(beta_w[delta == 0])))
      if (!conv1) {
        b_p_ext <- optim(par = b_p_ext, fn = l1_negloglik_inc_b,
                        gr = l1_grad_b, itct = itct, b_u = b_u, X_u = X_u,
                        X_p = X_p, pir = pir, mu = mu_inc, method = "L-BFGS-B",
                        lower = rep(0, 2 * J))$par
        b_p <- b_p_ext[1:J] - b_p_ext[(J + 1):(2 * J)]
      }
      if (!conv2) {
        beta_p_ext <- optim(par = beta_p_ext, fn = exp_negloglik_lat_beta,
                           gr = exp_grad_beta, lambda = lambda, beta_u = beta_u,
                           W_u = W_u, W_p = W_p, time = time, delta = delta,
                           pir = pir, mu = mu_lat, method = "L-BFGS-B",
                           lower = rep(0, 2 * M))$par
        beta_p <- beta_p_ext[1:M] - beta_p_ext[(M + 1):(2 *
                                                         M)]
      }
      if (!conv1) {
        out_inc <- optim(par = c(itct, b_u), fn = l1_negloglik_inc,
                        gr = l1_gradient_inc, b_p = b_p, X_u = X_u, X_p = X_p,
                        pir = pir, mu = mu_inc, method = "BFGS")
        itct <- out_inc$par[1]
        if (!is.null(X_u))
          b_u <- out_inc$par[2:(ncol(X_u) + 1)]
        llp1_1 <- -out_inc$value
      }
      lik_inc <- c(lik_inc, llp1_1)
      if (!conv2) {
        out_lat <- optim(par = c(log_lambda, beta_u), fn = exp_negloglik_lat,
                        gr = exp_gradient_lat, beta_p = beta_p, W_u = W_u,
                        W_p = W_p, time = time, delta = delta, pir = pir,
                        mu = mu_lat, method = "BFGS")
        log_lambda <- out_lat$par[1]
        lambda <- exp(log_lambda)
        if (!is.null(W_u))
          beta_u <- out_lat$par[2:(ncol(W_u) + 1)]
        llp2_1 <- -out_lat$value
      }
      lik_lat <- c(lik_lat, llp2_1)
      itct_path <- c(itct_path, itct)
      if (!is.null(X_u))
        b_u_path <- rbind(b_u_path, b_u)
      b_p_path <- rbind(b_p_path, b_p)
      if (!is.null(W_u))
        beta_u_path <- rbind(beta_u_path, beta_u)
      beta_p_path <- rbind(beta_p_path, beta_p)
      lambda_path <- c(lambda_path, lambda)
      if (!conv1 & abs(llp1_1 - llp1_0) < tol)
        conv1 <- TRUE
      if (!conv2 & abs(llp2_1 - llp2_0) < tol)
        conv2 <- TRUE
      if (step > 1 & (conv1 & conv2) | step >= nIter) {
        break
      }
      llp1_0 <- llp1_1
      llp2_0 <- llp2_1
      step <- 1 + step
    }
    output <- list(b_p_path = b_p_path, b_u_path = b_u_path,
                   itct_path = itct_path, beta_p_path = beta_p_path, beta_u_path = beta_u_path,
                   lambda_path = lambda_path, lik_inc = lik_inc, lik_lat = lik_lat)
    output
  }


#' Gradient for Penalized Predictors in Latency Portion of Exponential MCM Using
#' the E-M Algorithm.
#'
#' @description
#' Gradient for optim when updating penalized predictors in the latency portion
#' when using E-M algorithm to fit an exponential MCM (called by exp_EM).
#'
#' @param theta numeric vector for the penalized predictors in the
#' exponential latency distribution.
#' @param lambda estimated lambda parameter for exponential latency distribution.
#' @param beta_u vector of estimated unpenalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return value of the gradient.
#' @noRd
exp_grad_beta <-
  function(theta, lambda, beta_u, W_u, W_p, time, delta, pir, mu) {
    N <- dim(W_p)[1]
    M <- ncol(W_p)
    beta_p_ext <- theta
    beta_p <- beta_p_ext[1:M] - beta_p_ext[(M + 1):(2 * M)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    temp1 <- pir * lambda * time * exp(betaw)
    grad <- matrix(delta - temp1, 1) %*% W_p
    return(c(-grad + N * mu, grad + N * mu))
  }

#' Gradient for Unpenalized Predictors in Latency Portion of Exponential MCM
#' Using the E-M Algorithm.
#'
#' @description
#' Gradient for optim when updating unpenalized predictors in the latency
#' portion when using E-M algorithm for an exponential MCM (called by exp_EM).
#'
#' @param theta numeric vector for the unpenalized predictors in the
#' exponential latency distribution.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return value of the gradient.
#' @noRd
exp_gradient_lat <-
  function(theta, beta_p, W_u, W_p, time, delta, pir, mu) { # latency
    log_lambda <- theta[1]
    lambda <- exp(log_lambda)
    if (!is.null(W_u)) beta_u <- theta[2:(ncol(W_u) + 1)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    temp1 <- pir * lambda * time * exp(betaw)
    temp2 <- delta - temp1
    grad2 <- sum(temp2)
    if (!is.null(W_u)) grad3 <- matrix(temp2, 1) %*% W_u else grad3 <- NULL
    return(-c(grad2, grad3))
  }

#' Gradient for Unpenalized Predictors in Latency Portion of Exponential MCM
#' Using the GMIFS Algorithm.
#'
#' @description
#' Gradient for optim when updating unpenalized predictors in an
#' exponential MCM when fit using GMIFS (called by exp_cure).
#'
#' @param theta numeric vector for the unpenalized predictors in the
#' exponential latency distribution.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#'
#' @return value of the gradient.
#' @noRd
exp_gradient <-
  function(theta, b_p, beta_p, X_u, X_p, W_u, W_p, time, delta) {
    N <- length(time)
    lambda <- exp(theta[1])
    if (!is.null(X_u)) {
      b_u <- theta[2:(ncol(X_u) + 1)] # unpenalized incidence
      itct <- theta[ncol(X_u) + 2]
      if (!is.null(W_u)) beta_u <- theta[(ncol(X_u) + 3):(ncol(X_u) + ncol(W_u) + 2)] # unpenalized latency
    } else {
      itct <- theta[2]
      if (!is.null(W_u)) beta_u <- theta[3:(ncol(W_u) + 2)] # unpenalized latency
    }
    b_nonzero <- which(b_p != 0)
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(X_u)) {
      C_b <- exp(itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    } else {
      C_b <- exp(itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    }
    if (!is.null(W_u)) {
      C_beta <- exp(W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    } else {
      C_beta <- exp(W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    }
    C_lbeta <- exp(-lambda * time * C_beta)
    temp1 <- log(pmax(lambda * time, rep(1e-100, N)))
    temp2 <- lambda * time * C_beta
    temp3 <- 1 / (1 + C_b * C_lbeta)
    temp4 <- (1 - delta) * C_b * C_lbeta * temp2 * temp3
    temp5 <- delta * (1 - temp2)
    temp6 <- 1 / (1 + C_b)
    temp7 <- temp6 * (delta - (1 - delta) * C_b * (1 - C_lbeta) * temp3)
    grad2 <- sum(temp5 - temp4)
    if (!is.null(X_u)) grad3 <- matrix(temp7, 1) %*% X_u else grad3 <- NULL
    grad4 <- sum(temp7) # intercept
    if (!is.null(W_u)) grad5 <- matrix(temp5 - temp4, 1) %*% W_u else grad5 <- NULL
    return(-c(grad2, grad3, grad4, grad5))
  }

#' Gradient for Penalized Predictors in Latency Portion of Exponential MCM
#' Using the GMIFS Algorithm.
#'
#' @description
#' Function for optim when updating penalized predictors in the latency portion
#' when using EM algorithm for an exponential MCM (called by exp_EM).
#'
#' @param theta numeric vector for the penalized predictors in the
#' exponential latency distribution.
#' @param lambda estimated lambda parameter for exponential latency distribution.
#' @param beta_u vector of estimated unpenalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return value of the negative log-likelihood.
#' @noRd
exp_negloglik_lat_beta <-
  function(theta, lambda, beta_u, W_u, W_p, time, delta, pir, mu) { # latency
    N <- dim(W_p)[1]
    M <- ncol(W_p)
    beta_p_ext <- theta
    beta_p <- beta_p_ext[1:M] - beta_p_ext[(M + 1):(2 * M)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    ll1 <- delta * (log(lambda) + betaw)
    ll2 <- -pir * lambda * time * exp(betaw)
    return(-sum(ll1 + ll2) + N * mu * sum(beta_p_ext))
  }

#


#' Negative Log-likelihood for Unpenalized Predictors in Latency Portion of
#' Exponential MCM Using the E-M Algorithm.
#'
#' @description
#' Function for calculating negative log-likelihood passed to optim when
#' updating unpenalized predictors in the latency portion of the model
#' using E-M algorithm for an exponential MCM (called by exp_EM).
#'
#' @param theta numeric vector for the unpenalized predictors in the
#' exponential latency distribution.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return value of the negative log-likelihood.
#' @noRd
exp_negloglik_lat <-
  function(theta, beta_p, W_u, W_p, time, delta, pir, mu) { # latency
    N <- dim(W_p)[1]
    log_lambda <- theta[1]
    lambda <- exp(log_lambda)
    if (!is.null(W_u)) beta_u <- theta[2:(ncol(W_u) + 1)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    ll1 <- delta * (log_lambda + betaw)
    ll2 <- -pir * lambda * time * exp(betaw)
    return(-sum(ll1 + ll2) + N * mu * sum(abs(beta_p)))
  }


#' Negative Log-likelihood for Unpenalized Predictors in Latency Portion of
#' Exponential MCM Using the GMIFS Algorithm.
#'
#' @description
#' Function for calculating negative log-likelihood passed to optim when
#' updating unpenalized predictors in the latency portion of the model
#' using GMIFS algorithm for an exponential MCM (called by exp_cure).
#'
#' @param theta numeric vector for the unpenalized predictors in the
#' exponential latency distribution.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#'
#' @return value of the negative log-likelihood.
#' @noRd
exp_negloglik <-
  function(theta, b_p, beta_p, X_u, X_p, W_u, W_p, time, delta) {
    N <- length(time)
    lambda <- exp(theta[1])
    if (!is.null(X_u)) {
      b_u <- theta[2:(ncol(X_u) + 1)] # unpenalized incidence
      itct <- theta[ncol(X_u) + 2]
      if (!is.null(W_u)) beta_u <- theta[(ncol(X_u) + 3):(ncol(X_u) + ncol(W_u) + 2)] # unpenalized latency
    } else {
      itct <- theta[2]
      if (!is.null(W_u)) beta_u <- theta[3:(ncol(W_u) + 2)] # unpenalized latency
    }
    b_nonzero <- which(b_p != 0)
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(X_u)) {
      logC_b <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      logC_b <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    if (!is.null(W_u)) {
      logC_beta <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      logC_beta <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    temp1 <- 1 / (1 + exp(-logC_b)) # exp(logC_b) / (1+exp(logC_b)) #Mar17
    logC_lbeta <- -lambda * time * exp(logC_beta)
    ll1 <- delta * (log(temp1) + theta[1] + logC_beta + logC_lbeta)
    ll2 <- (1 - delta) * log(pmax(1 - temp1 * (1 - exp(logC_lbeta)), rep(1e-100, N)))
    return(-sum(ll1 + ll2))
  }


#' Identify GMIFS Coefficient Update for Exponential MCM.
#'
#' @description
#' Function for identifying which coefficient in the incidence and latency
#' portions of the exponential MCM to update when using GMIFS, called by
#' exp_cure.
#'
#' @param lambda estimated lambda parameter for exponential latency distribution.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param b_u vector of estimated unpenalized incidence coefficients.
#' @param itct estimated intercept for incidence portion of the model.
#' @param beta_u vector of estimated unpenalized latency coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param epsilon small numeric value used to update the regression coefficients.
#'
#' @return \item{b_p}{penalized incidence coefficient to update.}
#' @return \item{beta_p}{penalized latency coefficient to update.}
#' @noRd
exp_update <-
  function(lambda, b_p, beta_p, b_u, itct, beta_u, X_u, X_p, W_u, W_p, time,
           delta, epsilon) {
    b_nonzero <- which(b_p != 0)
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(X_u)) {
      C_b <- exp(itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    } else {
      C_b <- exp(itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    }
    if (!is.null(W_u)) {
      C_beta <- exp(W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    } else {
      C_beta <- exp(W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    }
    C_lbeta <- exp(-lambda * time * C_beta)
    temp2 <- lambda * time * C_beta
    temp3 <- 1 / (1 + C_b * C_lbeta)
    temp4 <- (1 - delta) * C_b * C_lbeta * temp2 * temp3
    temp5 <- delta * (1 - temp2)
    temp6 <- 1 / (1 + C_b)
    ### update b_p
    grad_b <- matrix(temp6 * (delta - (1 - delta) * C_b * (1 - C_lbeta) * temp3), 1) %*% X_p # length J
    j_b <- which.max(grad_b)
    b_p[j_b] <- b_p[j_b] + epsilon
    ### update beta_p
    grad_beta <- matrix(temp5 - temp4, 1) %*% W_p # length M
    j_beta <- which.max(grad_beta)
    beta_p[j_beta] <- beta_p[j_beta] + epsilon

    return(list(b_p = b_p, beta_p = beta_p))
  }

#' Function for Initializing Unpenalized Parameters for Cox MCM when Using E-M
#' Algorithm.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#'
#' @return \item{itct}{estimated intercept for the incidence portion of the
#' model.}
#' @return \item{b_u}{estimated unpenalized coefficients for the incidence
#' portion of the model.}
#' @return \item{beta_u}{estimated unpenalized coefficients for the latency
#' portion of the model.}
#' @return \item{survprob}{estimated probabilities of survival.}
#' @noRd
initialization_cox <-
  function (X_u = NULL, W_u = NULL, time, delta)
  {
    data_init = as.data.frame(cbind(time, delta, X_u, W_u))
    if (is.null(X_u) & is.null(W_u)) {
      fit <- glm(delta ~ 1, family = "binomial")
      surv <- survival::survfit(Surv(time, delta) ~ 1)
      survprob <- sapply(time, function(x) summary(surv, times = x,
                                                  extend = T)$surv)
      inits <- list(itct = coef(fit), b_u = NULL, beta_u = NULL,
                   survprob = survprob)
    }
    else if (is.null(X_u)) {
      colnames(data_init) <- c("time", "delta", paste0("lat",
                                                       1:ncol(W_u)))
      formula_lat <- as.formula(paste("Surv(time, delta) ~",
                                     paste(paste0("lat", 1:ncol(W_u)), collapse = " + ")))
      fit1 <- survival::coxph(formula_lat, data = data_init)
      surv <- survival::survfit(fit1)
      survprob <- sapply(time, function(x) summary(surv, times = x,
                                                  extend = T)$surv)
      fit2 <- glm(delta ~ 1, family = "binomial")
      inits <- list(itct = coef(fit2), b_u = NULL, beta_u = coef(fit1),
                   survprob = survprob)
    }
    else if (is.null(W_u)) {
      colnames(data_init) <- c("time", "delta", paste0("inc",
                                                       1:ncol(X_u)))
      formula_inc <- as.formula(paste("delta ~", paste(paste0("inc",
                                                             1:ncol(X_u)), collapse = " + ")))
      fit <- glm(formula_inc, family = "binomial", data = data_init)
      surv <- survival::survfit(Surv(time, delta) ~ 1)
      survprob <- sapply(time, function(x) summary(surv, times = x,
                                                  extend = T)$surv)
      itct <- coef(fit)[1]
      b_u <- coef(fit)[-1]
      inits <- list(itct = itct, b_u = b_u, beta_u = NULL, survprob = survprob)
    }
    else {
      colnames(data_init) <- c("time", "delta", paste0("inc",
                                                       1:ncol(X_u)), paste0("lat", 1:ncol(W_u)))
      formula_lat <- as.formula(paste("Surv(time, delta) ~",
                                     paste(paste0("lat", 1:ncol(W_u)), collapse = " + ")))
      formula_inc <- as.formula(paste("delta ~", paste(paste0("inc",
                                                             1:ncol(X_u)), collapse = " + ")))
      fit1 <- glm(formula_inc, family = "binomial", data = data_init)
      itct <- coef(fit1)[1]
      b_u <- coef(fit1)[-1]
      fit2 <- survival::coxph(formula_lat, data = data_init)
      beta_u <- coef(fit2)
      surv <- survival::survfit(fit2)
      survprob <- sapply(time, function(x) summary(surv, times = x,
                                                  extend = T)$surv)
      inits <- list(itct = itct, b_u = b_u, beta_u = beta_u,
                   survprob = survprob)
    }
    return(inits)
  }


#' Function for Calulating Maximum Penalty Parameter, lambda, for Cox MCM.
#'
#' @param x matrix of predictors.
#' @param time time-to-event of interest.
#' @param event event (delta = 1) and censoring (delta = 0) indicator.
#' @param offset offset.
#'
#' @return value of maximum lambda.
#' @noRd
get_cox_lambda_max <- function(x, time, event, offset = rep(0, dim(x)[1])) {
  N <- dim(x)[1]
  beta_w <- offset
  beta_w <- beta_w - mean(beta_w)

  event_time <- time[event == 1]
  uniq_event_time <- unique(event_time)
  n0 <- length(uniq_event_time)
  I0 <- matrix(time, ncol = 1) %*% matrix(1, 1, n0) >=
    matrix(1, N, 1) %*% matrix(uniq_event_time, nrow = 1) # N*n0
  d_j <- as.numeric(table(event_time)[rank(uniq_event_time)])

  exp_beta_w <- exp(beta_w)
  sum0 <- pmax(1e-50, matrix(exp_beta_w, 1) %*% I0) # 1*n0
  sum1 <- t(x) %*% diag(as.numeric(exp_beta_w)) %*% I0 # ncol(x) * n0
  null_grad <- colSums(x[event == 1, , drop = FALSE]) - rowSums(sum1 %*% diag(as.numeric(d_j / sum0))) # ncol(x) * 1
  g <- abs(null_grad)
  lambda_max <- max(g) / N
  return(lambda_max)
}


#' Function for Initializing Unpenalized Parameters for Weibull and
#' Exponential MCM when Using E-M Algorithm.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param model character, indicating the latency distribution, either
#' "exponential" or "weibull".
#'
#' @return \item{itct}{estimated intercept for the incidence portion of the
#' model.}
#' @return \item{b_u}{estimated unpenalized coefficients for the incidence
#' portion of the model.}
#' @return \item{beta_u}{estimated unpenalized coefficients for the latency
#' portion of the model.}
#' @return \item{lambda}{estimated lambda parameter for Weibull and exponential
#' latency distribution.}
#' @return \item{alpha}{if model is "weibull", estimated alpha parameter.}
#' @noRd
initialization_parametric <-
  function (X_u = NULL, W_u = NULL, time, delta, model = "weibull")
  {
    if (!is.null(X_u)) {
      fit1 <- glm(as.formula(paste("delta ~", paste(colnames(as.data.frame(X_u)),
                                                   collapse = " + "))), data = as.data.frame(X_u), family = binomial(link = "logit"))
      b_u <- fit1$coefficients[-1]
      itct <- fit1$coefficients[1]
    }
    else {
      itct <- log(mean(delta)/(1 - mean(delta)))
      b_u <- NULL
    }
    if (!is.null(W_u)) {
      fit2 <- survival::coxph(as.formula(paste("Surv(time, delta) ~",
                                              paste(colnames(as.data.frame(W_u)), collapse = " + "))),
                             data = as.data.frame(W_u), ties = "breslow")
      beta_u <- coef(fit2)
    }
    else beta_u <- NULL
    if (model == "weibull") {
      alpha <- uniroot(function(a) log(gamma(1 + 2/a)) - 2 *
                        log(gamma(1 + 1/a)) - log(var(time) + (mean(time))^2) +
                        2 * log(mean(time)), c(0.01, 10))$root
      lambda <- gamma(1 + 1/alpha)/mean(time)
    }
    else if (model == "exponential") {
      lambda <- 1/mean(time)
    }
    inits <- list(itct = itct, b_u = b_u, beta_u = beta_u, lambda = lambda)
    if (model == "weibull")
      inits$alpha <- alpha
    return(inits)
  }


#' Check Initial Values.
#'
#' @param model character, indicating the latency distribution, either "cox",
#' "exponential", or "weibull".
#' @param N number of observations.
#' @param penalty.factor.inc vector with length equal to the number of penalized
#' incidence predictors where 0 indicates no penalty and 1 indicates a penalty
#' should be applied. If not specified, default is to penalize all incidence
#' predictors.
#' @param penalty.factor.lat vector with length equal to the number of penalized
#' latency predictors where 0 indicates no penalty and 1 indicates a penalty
#' should be applied. If not specified, default is to penalize all latency
#' predictors.
#' @param inits estimated initial values used to optimization.
#'
#' @return if no warning, returns inital values.
#' @noRd
inits_check <-
  function (model, N, penalty.factor.inc, penalty.factor.lat, inits) {
    if (is.null(inits$itct)) {
      warning("Warning: Initial value of intercept is missing. Initial values were generated by the program.")
      return(NULL)
    }
    if (model == "cox") {
      if (is.null(inits$survprob)) {
        warning("Warning: Initial value of survprob is missing. Initial values were generated by the program.")
        return(NULL)
      }
      else if (length(inits$survprob) != N) {
        warning("Warning: Initial value of survprob has incorrect dimension. Initial values were generated by the program.")
        return(NULL)
      }
    }
    if (model %in% c("weibull", "exponential")) {
      if (is.null(inits$lambda)) {
        warning("Warning: Initial value of lambda is missing. Initial values were generated by the program.")
        return(NULL)
      }
      else if (inits$lambda <= 0) {
        warning("Warning: Initial value of lambda should be positive. Initial values were generated by the program.")
        return(NULL)
      }
    }
    if (model == "weibull") {
      if (is.null(inits$alpha)) {
        warning("Warning: Initial value of alpha is missing. Initial values were generated by the program.")
        return(NULL)
      }
      else if (inits$alpha <= 0) {
        warning("Warning: Initial value of alpha should be positive. Initial values were generated by the program.")
        return(NULL)
      }
    }
    if (any(penalty.factor.inc == 0))
      if (is.null(inits$b_u)) {
        warning("Warning: Initial value of b_u is missing. Initial values were generated by the program.")
        return(NULL)
      }
    else if (length(inits$b_u) != sum(penalty.factor.inc ==
                                      0)) {
      warning("Warning: Initial value of b_u has incorrect dimension. Initial values were generated by the program.")
      return(NULL)
    }
    if (any(penalty.factor.lat == 0))
      if (is.null(inits$beta_u)) {
        warning("Warning: Initial value of beta_u is missing. Initial values were generated by the program.")
        return(NULL)
      }
    else if (length(inits$beta_u) != sum(penalty.factor.lat ==
                                         0)) {
      warning("Warning: Initial value of beta_u has incorrect dimension. Initial values were generated by the program.")
      return(NULL)
    }
    return(inits)
  }

#' Gradient for Penalized Predictors in the Incidence Portion
#' of Parametric MCM Using the E-M Algorithm.
#'
#' @description
#' This function is passed to optim when updating penalized predictors in the
#' incidence portion of the model when using E-M algorithm for fitting either an
#' exponential or Weibull MCM and estimates the gradient. Called
#' by exp_EM and weib_EM.
#'
#' @param theta vector of penalized incidence coefficients.
#' @param itct estimated intercept for the incidence portion of the model.
#' @param b_u vector of estimated unpenalized incidence coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu lasso penalty parameter for incidence portion of the model.
#'
#' @return value of the gradient.
#' @noRd
l1_grad_b <-
  function(theta, itct, b_u, X_u, X_p, pir, mu) {
    N <- dim(X_p)[1]
    J <- ncol(X_p)
    b_p_ext <- theta
    b_p <- b_p_ext[1:J] - b_p_ext[(J + 1):(2 * J)]
    b_nonzero <- which(b_p != 0)
    if (!is.null(X_u)) {
      bx <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      bx <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    pix <- 1 / (1 + exp(-bx))
    grad <- matrix(pir - pix, 1) %*% X_p
    return(c(-grad + N * mu, grad + N * mu))
  }


#' Gradient for Unpenalized Predictors in the Incidence Portion
#' of Parametric MCM Using the E-M Algorithm.
#'
#' @description
#' This function is passed to optim when updating unpenalized predictors in the
#' incidence portion of the model when using E-M algorithm for fitting either an
#' exponential or Weibull MCM and estimates the gradient. Called
#' by exp_EM and weib_EM.
#'
#' @param theta vector of unpenalized incidence coefficients.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu lasso penalty parameter for incidence portion of the model.
#'
#' @return value of the gradient.
#' @noRd
l1_gradient_inc <-
  function(theta, b_p, X_u, X_p, pir, mu) # incidence
  {
    itct <- theta[1]
    if (!is.null(X_u)) b_u <- theta[2:(ncol(X_u) + 1)]
    b_nonzero <- which(b_p != 0)
    if (!is.null(X_u)) {
      bx <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      bx <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    pix <- 1 / (1 + exp(-bx))
    grad1 <- sum(pir - pix)
    if (!is.null(X_u)) grad2 <- matrix(pir - pix, 1) %*% X_u else grad2 <- NULL
    return(-c(grad1, grad2))
  }


#' Negative Log-likelihood for Penalized Predictors in the Incidence Portion
#' of Parametric MCM Using the E-M Algorithm.
#'
#' @description
#' This function is passed to optim when updating penalized predictors in the
#' incidence portion of the model when using E-M algorithm for fitting either an
#' exponential or Weibull MCM and estimates the negative log-likelihood. Called
#' by exp_EM and weib_EM.
#'
#' @param theta vector of unpenalized incidence coefficients.
#' @param itct estimated intercept for the incidence portion of the model.
#' @param b_u vector of estimated unpenalized incidence coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu lasso penalty parameter for incidence portion of the model.
#'
#' @return value of negative log-likelihood.
#' @noRd
l1_negloglik_inc_b <-
  function(theta, itct, b_u, X_u, X_p, pir, mu) # incidence
  {
    N <- dim(X_p)[1]
    J <- ncol(X_p)
    b_p_ext <- theta
    b_p <- b_p_ext[1:J] - b_p_ext[(J + 1):(2 * J)]
    b_nonzero <- which(b_p != 0)
    if (!is.null(X_u)) {
      bx <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      bx <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    ll <- sum(pir * bx - log(1 + exp(bx))) - N * mu * sum(b_p_ext)
    return(-ll)
  }


#' Negative Log-likelihood for Unpenalized Predictors in the Incidence Portion
#' of Parametric MCM Using the E-M Algorithm.
#'
#' @description
#' This function is passed to optim when updating unpenalized predictors in the
#' incidence portion of the model when using E-M algorithm for fitting either an
#' exponential or Weibull MCM and estimates the negative log-likelihood. Called
#' by exp_EM and weib_EM.
#'
#' @param theta vector of unpenalized incidence coefficients.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu lasso penalty parameter for incidence portion of the model.
#'
#' @return value of negative log-likelihood.
#' @noRd
l1_negloglik_inc <-
  function(theta, b_p, X_u, X_p, pir, mu) # incidence
  {
    itct <- theta[1]
    if (!is.null(X_u)) b_u <- theta[2:(ncol(X_u) + 1)]
    N <- dim(X_p)[1]
    b_nonzero <- which(b_p != 0)
    if (!is.null(X_u)) {
      bx <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      bx <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    ll <- sum(pir * bx - log(1 + exp(bx))) - N * mu * sum(abs(b_p))
    return(-ll)
  }

#' Identify Model From Solution Path Optimizes the Information Criterion.
#'
#' @description
#' This function is called by coef, summary, predict, plot, auc_mcm,
#' concordance_mcm to identify the step at which the user decides is the final
#' model. This function then extracts the optimal step and values for
#' information criteria at that step.
#'
#' @param object an object of class "mixturecure".
#' @param model_select a case-sensitive parameter or any (numeric) step along
#' the solution path can be selected. The default is model_select = "AIC" which
#' returns which model along the solution path attained the lowest AIC. Other
#' options are model_select = "mAIC" for the modified AIC, model_select = "cAIC"
#' for the corrected AIC, model_select = "BIC", model_select = "mBIC" for the
#' modified BIC, model_select = "EBIC" for the extended BIC,
#' model_select = "logLik" for the step that maximizes the log-likelihood, or
#' any numeric value from the solution path.
#' @return \item{select} step at which use-defined model_select attains optimum.
#' @return \item{AIC} AIC path.
#' @return \item{BIC} BIC path.
#' @return \item{mAIC} modified AIC path.
#' @return \item{mBIC} modified BIC path.
#' @return \item{EBIC} extended BIC path.
#' @return \item{cAIC} corrected AIC path.
#' @return \item{logLik} log-likelihood path.
#' @noRd
select_model <- function(object, model_select) {
  if (!object$cv) {
  if (is.character(model_select)) {
    model_select <- c(
      "AIC", "BIC", "logLik", "cAIC", "mAIC", "mBIC",
      "EBIC"
    )[pmatch(
      model_select,
      c(
        "AIC", "BIC", "logLik", "cAIC", "mAIC", "mBIC",
        "EBIC"
      )
    )]
    if (any(!model_select %in%
            c("AIC", "BIC", "logLik", "cAIC", "mAIC", "mBIC", "EBIC"))) {
      stop("Error: model_select must be either 'AIC', 'BIC', 'logLik', 'cAIC',
               'mAIC', 'mBIC', or 'EBIC' ")
    }
    if (!is.null(object$x_incidence)) {
      vars_inc <- apply(object$b_path, 1, function(x) sum(x != 0))
    } else {
      vars_inc <- 0
    }
    if (!is.null(object$x_latency)) {
      vars_lat <- apply(object$beta_path, 1, function(x) sum(x != 0))
    } else {
      vars_lat <- 0
    }
  }
  } else {
    if (!is.null(object$x_latency)) {
      vars_lat <- sum(object$beta != 0)
    } else {
      vars_lat <- 0
    }
    if (!is.null(object$x_incidence)) {
      vars_inc <- sum(object$b != 0)
    } else {
      vars_inc <- 0
    }
  }
  if (object$model == "weibull") {
    df <- vars_inc + vars_lat + 3
  } else if (object$model == "exponential") {
    df <- vars_inc + vars_lat + 2
  } else if (object$model == "cox") {
    df <- vars_inc + vars_lat + 1
  }
  logLik <- object$logLik
  p <- dim(object$x_incidence)[2] + dim(object$x_latency)[2]
  AIC <- 2 * df - 2 * logLik
  cAIC <- AIC + (2 * df * (df + 1)) / (length(object$y) - df - 1)
  mAIC <- (2 + 2 * log(p / .5)) * df - 2 * logLik
  BIC <- df * (log(length(object$y))) - 2 * logLik
  mBIC <- df * (log(length(object$y)) + 2 * log(p / 4)) - 2 * logLik
  EBIC <- log(length(object$y)) * df + 2 * (1 - .5) * log(choose(p, df)) -
    2 * logLik
  if (object$model != "cox") {
    if (object$mode == "weibull") {
      if (!exists("alpha_path", object)) {
        object$alpha_path <- object$alpha
      }
    }
    if (!exists("rate_path", object)) {
      object$rate_path <- object$rate
    }
  }
  if (!object$cv) {
    if (model_select == "AIC") {
      select <- which.min(AIC)
    } else if (model_select == "BIC") {
      select <- which.min(BIC)
    } else if (model_select == "mAIC") {
      select <- which.min(mAIC)
    } else if (model_select == "mBIC") {
      select <- which.min(mBIC)
    } else if (model_select == "EBIC") {
      select <- which.min(EBIC)
    } else if (model_select == "cAIC") {
      select <- which.min(cAIC)
    } else if (model_select == "logLik") {
      select <- which.max(logLik)
    }
  }
  list(select = select, AIC = AIC, BIC = BIC, mAIC = mAIC, mBIC = mBIC,
       EBIC = EBIC, cAIC = cAIC, logLik = logLik)
}


#' Non-Parametric Time-to-Event Generator.
#'
#' @description
#' Function called by generate_cure_data when the model is to generate
#' nonparametric times.
#'
#' @param N number of observations
#' @param beta_w linear predictor for the latency portion of the model.
#' @param maxT maximum observable time-to-event.
#' @param knots number of knots.
#'
#' @return time-to-event.
#' @noRd
nonparametric_time_generator <-
  function(N, beta_w, maxT = 20, knots = 8) {
    time <- 0:maxT
    k <- c(0, sort(sample(time[2:maxT], size = knots, replace = FALSE)), maxT)
    heights <- c(0, sort(1 - pmin(rexp(knots, rate = 10), 1 - 1e-15)), 1)
    # heights <- c(0, sort(runif(knots)), 1)
    tk <- merge(data.frame(time), data.frame(time = k, heights),
      by = "time", all = FALSE
    )
    MonotonicSpline <- stats::splinefun(
      x = tk$time, y = tk$heights,
      method = "hyman"
    )
    t <- rep(NA, N)
    for (i in 1:N) {
      u <- runif(1)
      t[i] <- uniroot(function(a) (1 - MonotonicSpline(a))^exp(beta_w[i]) - u, c(0, maxT))$root
    }
    return(t)
  }


#' MCP Penalty.
#'
#' @description
#' Called by cox_mcp_scad when using EM algorithm with MCP option.
#'
#' @param b_p vector of penalized coefficients.
#' @param gamma MCP penalty is parameterized by gamma > 1 and lambda >= 0.
#' @param lambda MCP penalty is parameterized by gamma > 1and lambda >= 0.
#'
#' @return MCP.
#' @noRd
mcp_penalty <-
  function(b_p, gamma, lambda) {
    idx <- which(b_p != 0 & abs(b_p) <= gamma * lambda)
    s1 <- lambda * sum(abs(b_p[idx])) - sum((b_p[idx])^2) / (2 * gamma)
    s2 <- gamma * lambda^2 / 2 * sum(abs(b_p) > gamma * lambda)
    return(s1 + s2)
  }


#' Identify MCP and SCAD Incidence Coefficient Update for Cox MCM Using E-M
#' Algorithm.
#'
#' @description
#' Function for identifying which coefficient in the incidence
#' portion of the Cox MCM to update when using the E-M algorithm with either
#' the MCP or SCAD penalty.
#'
#' @param penalty character indicating "MCP" or "SCAD" penalty.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param pi_x vector of estimated probabilities of being susceptible to the
#' event of interest.
#' @param x_l vector for lth penalized covariate in the incidence portion of the
#' model.
#' @param bl coefficient estimate for the lth penalized covariate in the
#' incidence portion of the model.
#' @param gamma gamma penalty parameter for the incidence portion of the model.
#' @param lambda lambda penalty parameter for the incidence portion of the model.
#'
#' @return which coefficient to update.
#' @noRd
mcp_scad_cd_upd_inc <-
  function(penalty, pir, pi_x, x_l, bl, gamma, lambda) {
    d1 <- mean((pir - pi_x) * x_l)
    vl <- mean(pi_x * (1 - pi_x) * x_l^2)
    zl <- d1 + vl * bl
    if (penalty == "MCP") {
      if (abs(bl) <= gamma * lambda) {
        return(soft(zl, lambda) / (vl - 1 / gamma))
      } else {
        return(ifelse(vl == 0, 0, zl / vl))
      }
    } else if (penalty == "SCAD") {
      if (abs(bl) <= lambda) {
        return(ifelse(vl == 0, 0, soft(zl, lambda) / vl))
      } else if (abs(bl) <= gamma * lambda) {
        return(soft(zl, gamma * lambda / (gamma - 1)) / (vl - 1 / (gamma - 1)))
      } else {
        return(ifelse(vl == 0, 0, zl / vl))
      }
    }
  }

#' Identify MCP and SCAD Latency Coefficient Update for Cox MCM Using E-M
#' Algorithm.
#'
#' @description
#' Function for identifying which coefficient in the  latency
#' portion of the Cox MCM to update when using the E-M algorithm with either
#' the MCP or SCAD penalty.
#'
#' @param penalty character indicating "MCP" or "SCAD" penalty.
#' @param exp_beta_w_p exponentiated coefficient for latency portion of the model.
#' @param w_l vector for lth penalized covariate in the latency portion of the
#' model.
#' @param betal coefficient estimate for the lth penalized covariate in the
#' latency portion of the model.
#' @param Zl sum of for the lth penalized coefficient at each unique event time.
#' @param I0 logical, matrix with number of rows corresponding to observations
#' and number of columns corresponding to unique event times.
#' @param d_j vector representing number of events at each unique event time.
#' @param gamma gamma penalty parameter for the latency portion of the model.
#' @param lambda lambda penalty parameter for the latency portion of the model.
#'
#' @return which coefficients to update.
#' @noRd
mcp_scad_cd_upd_lat <-
  function(penalty, exp_beta_w_p, w_l, betal, Zl, I0, d_j, gamma, lambda) {
    N <- length(exp_beta_w_p)
    sum0 <- pmax(1e-50, matrix(exp_beta_w_p, 1) %*% I0) # 1*n0
    sum1 <- matrix(exp_beta_w_p * w_l, 1) %*% I0 # 1*n0
    sum2 <- matrix(exp_beta_w_p * w_l^2, 1) %*% I0 # 1*n0
    d1 <- sum(Zl - d_j * sum1 / sum0) / N
    vl <- -sum(d_j * (sum1^2 / sum0^2 - sum2 / sum0)) / N
    zl <- d1 + vl * betal
    if (penalty == "MCP") {
      if (abs(betal) <= gamma * lambda) {
        return(soft(zl, lambda) / (vl - 1 / gamma))
      } else {
        return(ifelse(vl == 0, 0, zl / vl))
      }
    } else if (penalty == "SCAD") {
      if (abs(betal) <= lambda) {
        return(ifelse(vl == 0, 0, soft(zl, lambda) / vl))
      } else if (abs(betal) <= gamma * lambda) {
        return(soft(zl, gamma * lambda / (gamma - 1)) / (vl - 1 / (gamma - 1)))
      } else {
        return(ifelse(vl == 0, 0, zl / vl))
      }
    }
  }


#' MCP and SCAD Gradient for Cox Incidence for the E-M Algorithm.
#'
#' @description
#' When fitting a Cox model using either the MCP or SCAD penalty, this
#' gradient function is used by optim in the M-step of
#' cox_mcp_scad in M-step of the E-M algorithm for the incidence portion of the
#' model.
#'
#' @param theta value for the gradient.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param CAP absolute value of the maximum intercept estimate (to avoid
#' numerical instability), default is 20.
#'
#' @return value of the gradient.
#' @noRd
mcp_scad_gradient_inc <-
  function(theta, b_p, X_u, X_p, pir, CAP) # incidence
  {
    itct <- theta[1]
    if (!is.null(X_u)) b_u <- theta[2:(ncol(X_u) + 1)]
    b_nonzero <- which(b_p != 0)
    if (is.null(X_u)) {
      bx <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      bx <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    pix <- 1 / (1 + exp(pmin(CAP, -bx)))
    grad1 <- sum(pir - pix)
    if (!is.null(X_u)) grad2 <- matrix(pir - pix, 1) %*% X_u else grad2 <- NULL
    return(-c(grad1, grad2))
  }


#' MCP and SCAD Gradient for Cox Latency for the E-M Algorithm.
#'
#' @description
#' When fitting a Cox model using either the MCP or SCAD penalty, this
#' gradient function is used by optim in the M-step of
#' cox_mcp_scad in M-step of the E-M algorithm for the latency portion of the
#' model.
#'
#' @param theta value for the gradient.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param I0 logical, matrix with number of rows corresponding to observations
#' and number of columns corresponding to unique event times.
#' @param d_j vector representing number of events at each unique event time.
#' @param CAP absolute value of the maximum intercept estimate (to avoid
#' numerical instability), default is 20.
#'
#' @return value of the gradient.
#' @noRd
mcp_scad_gradient_lat <-
  function(theta, beta_p, W_u, W_p, delta, pir, I0, d_j, CAP) { # latency
    beta_u <- theta
    N <- dim(W_p)[1]
    beta_nonzero <- which(beta_p != 0)
    beta_w <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    exp_beta_w_p <- exp(pmin(CAP, beta_w)) * pir
    sum0 <- pmax(1e-50, matrix(exp_beta_w_p, 1) %*% I0) # 1*n0
    sum1 <- t(W_u) %*% diag(as.numeric(exp_beta_w_p)) %*% I0 # ncol(W_u) * n0
    du <- colSums(W_u[delta == 1, , drop = FALSE]) - rowSums(sum1 %*% diag(as.numeric(d_j / sum0))) # ncol(W_u) * 1
    return(-du)
  }


#' MCP and SCAD Negative Log-Likelihood Cox Incidence for the E-M Algorithm.
#'
#' @description
#' When fitting a Cox model using either the MCP or SCAD penalty, this
#' negative log-likelihood function is used by optim in the M-step of
#' cox_mcp_scad in M-step of the E-M algorithm for the incidence portion of the
#' model.
#'
#' @param theta negative log-likelihood.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param CAP absolute value of the maximum intercept estimate (to avoid
#' numerical instability), default is 20.
#'
#' @return value of the negative log-likelihood.
#' @noRd
mcp_scad_negloglik_inc <-
  function(theta, b_p, X_u, X_p, pir, CAP) # incidence
  {
    itct <- theta[1]
    if (!is.null(X_u)) b_u <- theta[2:(ncol(X_u) + 1)]
    N <- dim(X_p)[1]
    b_nonzero <- which(b_p != 0)
    if (is.null(X_u)) {
      bx <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      bx <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    ll <- sum(pir * bx - log(1 + exp(pmin(CAP, bx))))
    return(-ll)
  }

#' MCP and SCAD Negative Log-Likelihood Cox Latency for the E-M Algorithm.
#'
#' @description
#' When fitting a Cox model using either the MCP or SCAD penalty, this
#' negative log-likelihood function is used by optim in the M-step of
#' cox_mcp_scad in M-step of the E-M algorithm for the latency portion of the
#' model.
#'
#' @param theta negative log-likelihood.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param I0 logical, matrix with number of rows corresponding to observations
#' and number of columns corresponding to unique event times.
#' @param d_j vector representing number of events at each unique event time.
#' @param CAP absolute value of the maximum intercept estimate (to avoid
#' numerical instability), default is 20.
#'
#' @return value of the negative log-likelihood.
#' @noRd
mcp_scad_negloglik_lat <-
  function(theta, beta_p, W_u, W_p, delta, pir, I0, d_j, CAP) { # latency
    beta_u <- theta
    N <- dim(W_p)[1]
    beta_nonzero <- which(beta_p != 0)
    beta_w <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    exp_beta_w_p <- exp(pmin(CAP, beta_w)) * pir
    sum0 <- pmax(1e-50, matrix(exp_beta_w_p, 1) %*% I0) # 1*n0
    ll <- sum(beta_w[delta == 1]) - sum(d_j * log(sum0))
    return(-ll)
  }

#' SCAD Penalty.
#'
#' @description
#' Called by cox_mcp_scad when using EM algorithm with SCAD option.
#'
#' @param b_p vector of penalized coefficients.
#' @param gamma SCAD penalty is parameterized by gamma > 1 and lambda >= 0.
#' @param lambda SCAD penalty is parameterized by gamma > 1and lambda >= 0.
#'
#' @return SCAD.
#' @noRd
scad_penalty <-
  function(b_p, gamma, lambda) {
    idx1 <- which(b_p != 0 & abs(b_p) <= lambda)
    idx2 <- which(abs(b_p) > lambda & abs(b_p) <= gamma * lambda)
    s1 <- lambda * sum(abs(b_p[idx1]))
    s2 <- (gamma * lambda * sum(abs(b_p[idx2])) - 0.5 * (sum((b_p[idx2])^2 + lambda^2))) / (gamma - 1)
    s3 <- lambda^2 * (gamma^2 - 1) / (2 * (gamma - 1)) * sum(abs(b_p) > gamma * lambda)
    return(s1 + s2 + s3)
  }


#' Center and Scale a Matrix.
#'
#' @description
#' If scale = TRUE, this function centers and scales all covariates in matrix X.
#' It differs from R's scale function in that a vector of zeros is returned
#' if the SD for a vectors is 1.
#'
#' @param X data matrix.
#' @param scale logical, if TRUE, columns are to be centered and scaled.
#'
#' @return data matrix.
#' @noRd
#' @srrstats {G5.5} *Correctness tests should be run with a fixed random seed*
self_scale <-
  function(X, scale) {
    if (is.null(X)) {
      return(NULL)
    }
    if (dim(X)[2] == 0) {
      return(NULL)
    }
    if (scale) {
      n <- dim(X)[1]
      mean.x <- colMeans(X)
      sd.x <- sqrt(colMeans(X^2) - mean.x^2)
      Xs <-  scale(X)
      Xs[,sd.x==0] <- rep(0, n)
    } else {
      Xs <- X
    }
    return(Xs)
  }


#' Softmax.
#'
#' @description
#' Called by mcp_scad_cd_upd_inc and mcp_scad_cd_upd_lat
#'
#' @param z temp.
#' @param gamma temp.
#'
#' @return softmax.
#' @noRd
soft <-
  function(z, gamma) {
    if (gamma >= abs(z)) {
      return(0)
    } else {
      return(ifelse(z > 0, z - gamma, z + gamma))
    }
  }


#' Internal optimization function to fit Weibull LASSO model using the E-M
#' algorithm
#'
#' @description
#' The weib_EM function is the optimization algorithm for fitting a Weibull
#' LASSO model called by cureem for fitting the model using the EM algorithm
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param mu_inc initial intercept for the incidence portion of the model.
#' @param mu_lat initial intercept for the latency portion of the model.
#' @param inits initial values used to optimization.
#' @param nIter maximum number of interations, default is 100.
#' @param tol small numeric value specifying convergence tolerance.
#'
#' @return \item{b_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the incidence portion of the model. Row is step and
#' column is variable.}
#' @return \item{beta_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{b_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the incidence portion of the model. Row is step
#' and column is variable.}
#' @return \item{itct_path}{Vector representing the solution path of the
#' intercept for the incidence portion of the model.}
#' @return \item{beta_u_path}{Matrix representing the solution path of the
#' un penalized coefficients in the latency portion of the model. Row is step
#' and column is variable.}
#' @return \item{alpha_path}{Vector representing the solution path for the alpha
#' parameter in the Weibull MCM.}
#' @return \item{lambda_path}{Vector representing the solution path for the
#' lambda parameter in the Weibull MCM.}
#' @return \item{lik_inc}{Vector representing the expected penalized
#' complete-data log-likelihood for the incidence portion of the model for
#' each step in the solution path.}
#' @return \item{lik_lat}{Vector representing the expected penalized
#' complete-data log-likelihood for the latency portion of the model for each
#' step in the solution path.}
#' @noRd
weib_EM <-
  function (X_u = NULL, X_p, W_u = NULL, W_p, time, delta, mu_inc,
            mu_lat, inits = NULL, nIter = 100, tol = 1e-04)
  {
    N <- length(time)
    J <- ncol(X_p)
    M <- ncol(W_p)
    step <- 1
    b_p <- rep(0, J)
    beta_p <- rep(0, M)
    b_p_ext <- rep(0, 2 * J)
    beta_p_ext <- rep(0, 2 * M)
    if (is.null(inits))
      inits <- initialization_parametric(X_u, W_u, time, delta,
                                        model = "weibull")
    itct <- inits$itct
    b_u <- inits$b_u
    beta_u <- inits$beta_u
    lambda <- inits$lambda
    alpha <- inits$alpha
    log_alpha <- log(max(alpha, 1e-15))
    log_lambda <- log(max(lambda, 1e-15))
    pir <- rep(1, N)
    llp1_0 <- llp2_0 <- 0
    lik_inc <- lik_lat <- c()
    b_p_path <- beta_p_path <- b_u_path <- beta_u_path <- itct_path <- alpha_path <- lambda_path <- NULL
    conv1 <- conv2 <- FALSE
    repeat {
      if (is.null(X_u))
        b_x <- itct + X_p[, b_p != 0, drop = FALSE] %*% b_p[b_p !=
                                                             0]
      else b_x <- itct + X_u %*% b_u + X_p[, b_p != 0, drop = FALSE] %*%
          b_p[b_p != 0]
      if (is.null(W_u))
        beta_w <- W_p[, beta_p != 0, drop = FALSE] %*% beta_p[beta_p !=
                                                               0]
      else beta_w <- W_u %*% beta_u + W_p[, beta_p != 0, drop = FALSE] %*%
          beta_p[beta_p != 0]
      pir[delta == 0] <- 1/(1 + exp(-b_x[delta == 0] + (lambda *
                                                         time[delta == 0])^alpha * exp(beta_w[delta == 0])))
      if (!conv1) {
        b_p_ext <- optim(par = b_p_ext, fn = l1_negloglik_inc_b,
                        gr = l1_grad_b, itct = itct, b_u = b_u, X_u = X_u,
                        X_p = X_p, pir = pir, mu = mu_inc, method = "L-BFGS-B",
                        lower = rep(0, 2 * J))$par
        b_p <- b_p_ext[1:J] - b_p_ext[(J + 1):(2 * J)]
      }
      if (!conv2) {
        beta_p_ext <- optim(par = beta_p_ext, fn = weib_negloglik_lat_beta,
                           gr = weib_grad_beta, alpha = alpha, lambda = lambda,
                           beta_u = beta_u, W_u = W_u, W_p = W_p, time = time,
                           delta = delta, pir = pir, mu = mu_lat, method = "L-BFGS-B",
                           lower = rep(0, 2 * M))$par
        beta_p <- beta_p_ext[1:M] - beta_p_ext[(M + 1):(2 *
                                                         M)]
      }
      if (!conv1) {
        out_inc <- optim(par = c(itct, b_u), fn = l1_negloglik_inc,
                        gr = l1_gradient_inc, b_p = b_p, X_u = X_u, X_p = X_p,
                        pir = pir, mu = mu_inc, method = "BFGS")
        itct <- out_inc$par[1]
        if (!is.null(X_u))
          b_u <- out_inc$par[2:(ncol(X_u) + 1)]
        llp1_1 <- -out_inc$value
      }
      lik_inc <- c(lik_inc, llp1_1)
      if (!conv2) {
        out_lat <- optim(par = c(log_alpha, log_lambda, beta_u),
                        fn = weib_negloglik_lat, gr = weib_gradient_lat,
                        beta_p = beta_p, W_u = W_u, W_p = W_p, time = time,
                        delta = delta, pir = pir, mu = mu_lat, method = "BFGS")
        log_alpha <- out_lat$par[1]
        alpha <- exp(log_alpha)
        log_lambda <- out_lat$par[2]
        lambda <- exp(log_lambda)
        if (!is.null(W_u))
          beta_u <- out_lat$par[3:(ncol(W_u) + 2)]
        llp2_1 <- -out_lat$value
      }
      lik_lat <- c(lik_lat, llp2_1)
      itct_path <- c(itct_path, itct)
      if (!is.null(X_u))
        b_u_path <- rbind(b_u_path, b_u)
      b_p_path <- rbind(b_p_path, b_p)
      if (!is.null(W_u))
        beta_u_path <- rbind(beta_u_path, beta_u)
      beta_p_path <- rbind(beta_p_path, beta_p)
      alpha_path <- c(alpha_path, alpha)
      lambda_path <- c(lambda_path, lambda)
      if (!conv1 & abs(llp1_1 - llp1_0) < tol)
        conv1 <- TRUE
      if (!conv2 & abs(llp2_1 - llp2_0) < tol)
        conv2 <- TRUE
      if (step > 1 & (conv1 & conv2) | step >= nIter) {
        break
      }
      llp1_0 <- llp1_1
      llp2_0 <- llp2_1
      step <- 1 + step
    }
    output <- list(b_p_path = b_p_path, b_u_path = b_u_path,
                   itct_path = itct_path, beta_p_path = beta_p_path, beta_u_path = beta_u_path,
                   alpha_path = alpha_path, lambda_path = lambda_path, lik_inc = lik_inc,
                   lik_lat = lik_lat)
    output
  }

#' Gradient for Penalized Predictors in Weibull MCM Fit Using EM Algorithm.
#'
#' @description
#' Gradient for optim when updating penalized predictors in the latency portion
#' of a Weibull MCM when fit using EM (called by weib_EM).
#'
#' @param theta numeric vector for the penalized predictors in the
#' Weibull latency distribution.
#' @param alpha estimated alpha parameter for Weibull latency distribution.
#' @param lambda estimated lambda parameter for Weibull latency distribution.
#' @param beta_u vector of estimated unpenalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return value of the gradient.
#' @noRd
weib_grad_beta <-
  function(theta, alpha, lambda, beta_u, W_u, W_p, time, delta, pir, mu) {
    N <- dim(W_p)[1]
    M <- ncol(W_p)
    beta_p_ext <- theta
    beta_p <- beta_p_ext[1:M] - beta_p_ext[(M + 1):(2 * M)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    temp1 <- pir * (lambda * time)^alpha * exp(betaw)
    grad <- matrix(delta - temp1, 1) %*% W_p
    return(c(-grad + N * mu, grad + N * mu))
  }

#' Gradient for Unpenalized Predictors in Weibull MCM Fit Using EM Algorithm.
#'
#' @description
#' Gradient for optim when updating unpenalized predictors in the latency portion
#' of a Weibull MCM when fit using EM (called by weib_EM)
#'
#' @param theta numeric vector for the unpenalized predictors in the
#' Weibull latency distribution.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return value of the gradient.
#' @noRd
weib_gradient_lat <-
  function(theta, beta_p, W_u, W_p, time, delta, pir, mu) { # latency
    log_alpha <- theta[1]
    log_lambda <- theta[2]
    alpha <- exp(log_alpha)
    lambda <- exp(log_lambda)
    if (!is.null(W_u)) beta_u <- theta[3:(ncol(W_u) + 2)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    temp1 <- pir * (lambda * time)^alpha * exp(betaw)
    temp2 <- delta - temp1
    grad1 <- sum(delta + log(pmax(lambda * time, 1e-100)) * temp2 * alpha)
    grad2 <- sum(alpha * temp2)
    if (!is.null(W_u)) grad3 <- matrix(temp2, 1) %*% W_u else grad3 <- NULL
    return(-c(grad1, grad2, grad3))
  }


#' Identify GMIFS Coefficient Update for Weibull MCM.
#'
#' @description
#' Function for identifying which coefficient in the incidence and latency
#' portions of the Weibull MCM to update when using GMIFS, called by weibull.cure
#'
#' @param alpha estimated alpha parameter for Weibull latency distribution.
#' @param lambda estimated lambda parameter for Weibull latency distribution.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param b_u vector of estimated unpenalized incidence coefficients.
#' @param itct estimated intercept for incidence portion of the model.
#' @param beta_u vector of estimated unpenalized latency coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param epsilon small numeric value used to update the regression coefficients.
#'
#' @return \item{b_p}{penalized incidence coefficient to update.}
#' @return \item{beta_p}{penalized latency coefficient to update.}
#' @noRd
weib.cure.update <-
  function(alpha, lambda, b_p, beta_p, b_u, itct, beta_u, X_u, X_p, W_u, W_p, time, delta, epsilon) {
    b_nonzero <- which(b_p != 0)
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(X_u)) {
      C_b <- exp(itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    } else {
      C_b <- exp(itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    }
    if (!is.null(W_u)) {
      C_beta <- exp(W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    } else {
      C_beta <- exp(W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    }
    C_lbeta <- exp(-(lambda * time)^alpha * C_beta)
    temp2 <- (lambda * time)^alpha * C_beta
    temp3 <- 1 / (1 + C_b * C_lbeta)
    temp4 <- (1 - delta) * C_b * C_lbeta * temp2 * temp3
    temp5 <- delta * (1 - temp2)
    temp6 <- 1 / (1 + C_b)
    ### update b_p
    grad_b <- matrix(temp6 * (delta - (1 - delta) * C_b * (1 - C_lbeta) * temp3), 1) %*% X_p # length J
    j_b <- which.max(grad_b)
    b_p[j_b] <- b_p[j_b] + epsilon
    ### update beta_p
    grad_beta <- matrix(temp5 - temp4, 1) %*% W_p # length M
    j_beta <- which.max(grad_beta)
    beta_p[j_beta] <- beta_p[j_beta] + epsilon
    return(list(b_p = b_p, beta_p = beta_p))
  }


#' Optimization Function for Weibull MCM Using GMIFS.
#'
#' @description
#' This function is Called by curegmifs, cv.gmifs.inner, and cv.gmifs.nofdr as
#' this function is the optimiziation algorithm for fitting a Weibull mixture
#' cure model using the GMIFS algorithm.
#'
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param epsilon small numeric value used to update the regression coefficients.
#' @param tol small numeric value specifying convergence tolerance.
#' @param nIter maximum number of interations.
#' @param inits initial values used to optimization.
#' @param verbose logical, if TRUE displays step process to the console.
#'
#' @return \item{b_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the incidence portion of the model. Row is step and
#' column is variable.}
#' @return \item{beta_p_path}{Matrix representing the solution path of the
#' penalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{alpha_path}{Vector representing the solution path for the alpha
#' parameter in the Weibull MCM.}
#' @return \item{lambda_path}{Vector representing the solution path for the alpha
#' parameter in the Weibull MCM.}
#' @return \item{b_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the incidence portion of the model. Row is step
#' and column is variable.}
#' @return \item{itct_path}{Vector representing the solution path of the
#' intercept for the incidence portion of the model.}
#' @return \item{beta_u_path}{Matrix representing the solution path of the
#' unpenalized coefficients in the latency portion of the model. Row is step and
#' column is variable.}
#' @return \item{logLikelihood}{Vector representing the expected penalized
#' complete-data log-likelihood for each step in the solution path.}
#'
#' @noRd
weibull.cure <-
  function(X_u = NULL, X_p, W_u = NULL, W_p, time, delta, epsilon = 0.001,
           tol = 1e-05, nIter = 1e4, inits = NULL, verbose) {
    # X_u: N by nonp, non-penalized covariate matrix associated with incidence
    # X_p: N by J, penalized covariate matrix associated with incidence
    # W_u: N by nonp, non-penalized covariate matrix associated with latency
    # W_p:  N by M, penalized covariate matrix associated with incidence
    # time: vector of length N, observed survival time
    # delta: vector of length N, censoring status (not censored = 0)
    # epsilon: incremental size
    # tol: difference between log-likelihood

    N <- length(time)
    J <- ncol(X_p) # number of penalized incidence covariates
    M <- ncol(W_p) # number of penalized latency covariates
    X_p <- cbind(X_p, -X_p) # N by 2J
    W_p <- cbind(W_p, -W_p) # N by 2M
    ######## initialization ########
    step <- 1
    b_p <- rep(0, 2 * J) # penalized incidence
    beta_p <- rep(0, 2 * M) # penalized latency
    if (is.null(inits)) inits <- initialization_parametric(X_u, W_u, time,
                                                     delta, model = "weibull")
    itct <- inits$itct
    b_u <- inits$b_u
    beta_u <- inits$beta_u
    lambda <- inits$lambda
    alpha <- inits$alpha
    log_alpha <- log(max(alpha, 1e-15))
    log_lambda <- log(max(lambda, 1e-15))
    LL0 <- 0

    alpha_path <- lambda_path <- itct_path <- logLikelihood <- rep(0, nIter)
    b_p_path <- matrix(0, nrow = nIter, ncol = 2 * J)
    beta_p_path <- matrix(0, nrow = nIter, ncol = 2 * M)
    b_u_path <- matrix(0, nrow = nIter, ncol = length(b_u))
    beta_u_path <-  matrix(0, nrow = nIter, ncol = length(beta_u))

    ######## loop ########

    repeat{
      #### update penalized parameters
      upd <- weib.cure.update(alpha, lambda, b_p, beta_p, b_u, itct, beta_u, X_u, X_p, W_u, W_p, time, delta, epsilon)
      b_p <- upd$b_p
      beta_p <- upd$beta_p
      b_p_path[step,] <- b_p
      beta_p_path[step,] <- beta_p

      #### update other parameters
      out <- optim(
        par = c(log_alpha, log_lambda, b_u, itct, beta_u), fn = weib.cure.negloglik, gr = weib.cure.gradient,
        b_p = b_p, beta_p = beta_p, X_u = X_u, X_p = X_p, W_u = W_u, W_p = W_p,
        time = time, delta = delta, method = "BFGS"
      )
      log_alpha <- out$par[1]
      alpha <- exp(log_alpha)
      log_lambda <- out$par[2]
      lambda <- exp(log_lambda)
      if (!is.null(X_u)) {
        b_u <- out$par[3:(ncol(X_u) + 2)] # unpenalized incidence
        itct <- out$par[ncol(X_u) + 3]
        if (!is.null(W_u)) beta_u <- out$par[(ncol(X_u) + 4):(ncol(X_u) + ncol(W_u) + 3)] # unpenalized latency
      } else {
        itct <- out$par[3]
        if (!is.null(W_u)) beta_u <- out$par[4:(ncol(W_u) + 3)] # unpenalized latency
      }

      alpha_path[step] <- alpha
      lambda_path[step] <- lambda
      if (!is.null(X_u)) b_u_path[step,] <- b_u
      itct_path[step] <- itct
      if (!is.null(W_u)) beta_u_path[step,] <- beta_u

      LL1 <- -out$value
      logLikelihood[step] <- LL1

      if (verbose & step %% 1000 == 0) message("step = ", step, "\n")
      if (step > 1 && (abs(LL1 - LL0) < tol | step >= nIter)) { #
        break
      }
      LL0 <- LL1
      step <- 1 + step
    }

    ######## output ########
    b_p_path <- b_p_path[, 1:J] - b_p_path[, (J + 1):(2 * J)]
    beta_p_path <- beta_p_path[, 1:M] - beta_p_path[, (M + 1):(2 * M)]
    # Remove rows of matrices and elements of vectors that are 0 because
    # algorithm converged before nIter
    if (step < nIter) {
      b_p_path <- b_p_path[1:step,]
      beta_p_path <- beta_p_path[1:step,]
      alpha_path <- alpha_path[1:step]
      lambda_path <- lambda_path[1:step]
      b_u_path <- b_u_path[1:step,]
      itct_path <- itct_path[1:step]
      beta_u_path <- beta_u_path[1:step,]
      logLikelihood <- logLikelihood[1:step]
    }
    output <- list(
      b_p_path = b_p_path, beta_p_path = beta_p_path, alpha_path = alpha_path,
      lambda_path = lambda_path, b_u_path = b_u_path, itct_path = itct_path,
      beta_u_path = beta_u_path, logLikelihood = logLikelihood
    )
    output
  }

#' Gradient for Updating Unpenalized Predictors in Weibull MCM Using GMIFS.
#'
#' @description
#' Gradient for optim when updating unpenalized predictors in an
#' Weibull MCM when fit using GMIFS (called by weibull.cure)
#'
#' @param theta vector of unpenalized parameters to estimate.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#'
#' @return negative gradient.
#' @noRd
weib.cure.gradient <-
  function(theta, b_p, beta_p, X_u, X_p, W_u, W_p, time, delta) {
    N <- length(time)
    alpha <- exp(theta[1])
    lambda <- exp(theta[2])
    if (!is.null(X_u)) {
      b_u <- theta[3:(ncol(X_u) + 2)] # unpenalized incidence
      itct <- theta[ncol(X_u) + 3]
      if (!is.null(W_u)) beta_u <- theta[(ncol(X_u) + 4):(ncol(X_u) + ncol(W_u) + 3)] # unpenalized latency
    } else {
      itct <- theta[3]
      if (!is.null(W_u)) beta_u <- theta[4:(ncol(W_u) + 3)] # unpenalized latency
    }
    b_nonzero <- which(b_p != 0)
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(X_u)) {
      C_b <- exp(itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    } else {
      C_b <- exp(itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero])
    }
    if (!is.null(W_u)) {
      C_beta <- exp(W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    } else {
      C_beta <- exp(W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero])
    }
    C_lbeta <- exp(-(lambda * time)^alpha * C_beta)
    temp1 <- log(pmax(lambda * time, rep(1e-100, N)))
    temp2 <- (lambda * time)^alpha * C_beta
    temp3 <- 1 / (1 + C_b * C_lbeta)
    temp4 <- (1 - delta) * C_b * C_lbeta * temp2 * temp3
    temp5 <- delta * (1 - temp2)
    temp6 <- 1 / (1 + C_b)
    temp7 <- temp6 * (delta - (1 - delta) * C_b * (1 - C_lbeta) * temp3)
    grad1 <- sum(delta * (1 / alpha + temp1 - temp2 * temp1) - temp4 * temp1) * alpha
    grad2 <- alpha * sum(temp5 - temp4)
    if (!is.null(X_u)) grad3 <- matrix(temp7, 1) %*% X_u else grad3 <- NULL
    grad4 <- sum(temp7) # intercept
    if (!is.null(W_u)) grad5 <- matrix(temp5 - temp4, 1) %*% W_u else grad5 <- NULL
    return(-c(grad1, grad2, grad3, grad4, grad5))
  }


#' Negative Log-Likelihood for the Latency Portion of a Weibull Mixture Cure
#' Model for GMIFS Algorithm to Update Unpenalized Predictors.
#'
#' @description
#' Function for optim when updating unpenalized predictors in an
#' Weibull mixture cure model when fit using GMIFS (called by weibull.cure).
#'
#' @param theta vector of unpenalized parameters to estimate.
#' @param b_p vector of estimated penalized incidence coefficients.
#' @param beta_p vector of estimated penalized latency coefficients.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#'
#' @return returns the negative log-likelihood value.
#' @noRd
weib.cure.negloglik <-
  function(theta, b_p, beta_p, X_u, X_p, W_u, W_p, time, delta) {
    N <- length(time)
    alpha <- exp(theta[1])
    lambda <- exp(theta[2])
    if (!is.null(X_u)) {
      b_u <- theta[3:(ncol(X_u) + 2)] # unpenalized incidence
      itct <- theta[ncol(X_u) + 3]
      if (!is.null(W_u)) beta_u <- theta[(ncol(X_u) + 4):(ncol(X_u) + ncol(W_u) + 3)] # unpenalized latency
    } else {
      itct <- theta[3]
      if (!is.null(W_u)) beta_u <- theta[4:(ncol(W_u) + 3)] # unpenalized latency
    }
    b_nonzero <- which(b_p != 0)
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(X_u)) {
      logC_b <- itct + X_u %*% b_u + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    } else {
      logC_b <- itct + X_p[, b_nonzero, drop = FALSE] %*% b_p[b_nonzero]
    }
    if (!is.null(W_u)) {
      logC_beta <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      logC_beta <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    temp1 <- 1 / (1 + exp(-logC_b)) # exp(logC_b) / (1+exp(logC_b)) #Mar17
    logC_lbeta <- -(lambda * time)^alpha * exp(logC_beta)
    ll1 <- delta * (log(temp1) + theta[2] + theta[1] +
      (alpha - 1) * log(pmax(lambda * time, rep(1e-100, N))) +
      logC_beta + logC_lbeta)
    ll2 <- (1 - delta) * log(pmax(1 - temp1 * (1 - exp(logC_lbeta)), rep(1e-100, N)))
    return(-sum(ll1 + ll2))
  }


#' Negative Log-Likelihood for the Latency Portion of a Weibull Mixture Cure
#' Model for EM Algorithm to Update Penalized Predictors.
#'
#' @description
#' Function for optim when updating penalized predictors in the latency portion
#' of a Weibull MCM when fit using EM (called by weib_EM)
#'
#' @param theta numeric vector for the penalized predictors in the
#' Weibull latency distribution.
#' @param beta_p vector of estimated latency coefficients.
#' @param alpha estimated alpha parameter for Weibull latency distribution.
#' @param lambda estimated lambda parameter for Weibull latency distribution.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return returns the negative log-likelihood value.
#' @noRd
weib_negloglik_lat_beta <-
  function(theta, alpha, lambda, beta_u, W_u, W_p, time, delta, pir, mu) { # latency
    N <- dim(W_p)[1]
    M <- ncol(W_p)
    beta_p_ext <- theta
    beta_p <- beta_p_ext[1:M] - beta_p_ext[(M + 1):(2 * M)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    ll1 <- delta * (log(alpha) + log(lambda) + (alpha - 1) * log(pmax(lambda * time, 1e-100)) + betaw)
    ll2 <- -pir * (lambda * time)^alpha * exp(betaw)
    return(-sum(ll1 + ll2) + N * mu * sum(beta_p_ext))
  }

#' Negative Log-Likelihood for the Latency Portion of a Weibull Mixture Cure
#' Model for EM Algorithm to Update Unpenalized Predictors.
#'
#' @description
#' Function for optim when updating unpenalized predictors in the latency
#' portion of a Weibull MCM when fit using EM (called by weib_EM).
#'
#' @param theta numeric vector for alpha, lambda, and any unpenalized
#' coefficients in the Weibull latency distribution.
#' @param beta_p vector of estimated latency coefficients.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#' @param time time-to-event of interest.
#' @param delta event (delta = 1) and censoring (delta = 0) indicator.
#' @param pir vector of estimates for the conditional distribution of the event
#' at the current parameter estimates and observed datafor the latency
#' distribution E-step.
#' @param mu initial intercept for the latency portion of the model.
#'
#' @return returns the negative log-likelihood value.
#' @noRd
weib_negloglik_lat <-
  function(theta, beta_p, W_u, W_p, time, delta, pir, mu) { # latency
    N <- dim(W_p)[1]
    log_alpha <- theta[1]
    log_lambda <- theta[2]
    alpha <- exp(log_alpha)
    lambda <- exp(log_lambda)
    if (!is.null(W_u)) beta_u <- theta[3:(ncol(W_u) + 2)]
    beta_nonzero <- which(beta_p != 0)
    if (!is.null(W_u)) {
      betaw <- W_u %*% beta_u + W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    } else {
      betaw <- W_p[, beta_nonzero, drop = FALSE] %*% beta_p[beta_nonzero]
    }
    ll1 <- delta * (log_alpha + log_lambda + (alpha - 1) * log(pmax(lambda * time, 1e-100)) + betaw)
    ll2 <- -pir * (lambda * time)^alpha * exp(betaw)
    return(-sum(ll1 + ll2) + N * mu * sum(abs(beta_p)))
  }

#' Simulate Data under a Mixture Cure Model.
#'
#' @description Called by nonzerocure_test to simulate survival data under the
#' null model. If b = NULL, censoring times are generated from an exponential
#' distribution. If b is not null, censoring times are generated from a
#' uniform distribution with 0 as the lower limit and b as the upper limit.
#'
#' @param n integer, number of observations to generate.
#' @param mu rate for generating time-to-event from an exponential is 1/mean.
#' @param censor_mu rate for generating censoring time from an exponential is
#' 1/censor_mu.
#' @param b if non-null, censoring time is generated from a uniform distribution
#' on the interval from 0 to b.
#' @param reps number of times to simulate data.
#' @param seed random seed to set to enhance reproducibility.
#'
#' @return estimated susceptible proportion.
#'
#' @srrstats {G5.5} *Correctness tests should be run with a fixed random seed*
#' @noRd
sim_cure <-
  function(n, mu = 1, censor_mu = 3, b = NULL, reps = 10000, seed = NULL) {
    susceptible_prop <- numeric(length = reps)
    if (!is.null(seed)) {
      withr::local_seed(seed)
    }
    for (i in 1:reps) {
      surv <- rexp(n, 1 / mu)
      if (is.null(b)) {
        u <- rexp(n, 1 / censor_mu) ## rate is 1/mu
      } else {
        u <- runif(n, 0, b)
      }
      time <- ifelse(surv <= u, surv, u)
      censor <- ifelse(surv <= u, 1, 0)
      fit <- survival::survfit(survival::Surv(time, censor) ~ 1)
      susceptible_prop[i] <- 1 - min(fit$surv)
    }
    susceptible_prop
  }

#' Internal Function for Calculating the Area Under the Incidence Curve.
#'
#' @description Called by cv.em.inner and cv.gmifs.inner when the AUC is used
#' to optimized the mixture cure model.
#'
#' @param cure_cutoff time at which the C-statistic should be estimated.
#' @param b_u_hat vector of estimated unpenalized incidence coefficients.
#' @param itct_hat estimated intercept for incidence portion of the model.
#' @param b_p_hat vector of estimated penalized incidence coefficients.
#' @param testing_delta vector of censoring indicator for dataset of interest.
#' @param testing_time vector of time-to-event for dataset of interest.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#'
#' @return AUC under the incidence curve at the specified
#' cure_cutoff timepoint.
#' @noRd
AUC_msi <- function(cure_cutoff = 5, b_u_hat = NULL, itct_hat, b_p_hat, testing_delta, testing_time,
                    X_u = NULL, X_p) {
  testing_n <- length(testing_time)
  v <- rep(0, testing_n)
  y <- rep(999, testing_n)
  y[testing_time > cure_cutoff] <- 0
  y[testing_time <= cure_cutoff & testing_delta == 1] <- 1
  v[y < 2] <- 1
  if (all(b_p_hat == 0)) {
    if (is.null(X_u)) xb <- rep(itct_hat, testing_n) else xb <- itct_hat + X_u %*% b_u_hat
  } else {
    if (is.null(X_u)) {
      xb <- itct_hat + X_p[, b_p_hat != 0, drop = FALSE] %*% b_p_hat[b_p_hat != 0]
    } else {
      xb <- itct_hat + X_u %*% b_u_hat + X_p[, b_p_hat != 0, drop = FALSE] %*% b_p_hat[b_p_hat != 0]
    }
  }
  p_hat <- 1 / (1 + exp(-xb))
  temp <- v * y + (1 - v) * p_hat
  temp1 <- temp[order(p_hat, decreasing = T)]
  temp_f <- v * (1 - y) + (1 - v) * (1 - p_hat)
  temp1f <- temp_f[order(p_hat, decreasing = T)]
  TPR <- c(0, cumsum(temp1) / cumsum(temp1)[testing_n])
  FPR <- c(0, cumsum(temp1f) / cumsum(temp1f)[testing_n])
  height <- (TPR[-1] + TPR[-length(TPR)]) / 2
  width <- diff(FPR)
  auc_msi <- sum(height * width)
  return(auc_msi)
}

#' Calculate C-statistic for Mixture Cure Model.

#' @description Called by cv.em.inner and cv.gmifs.inner when
#' the concordance statistic is used in optimization
#'
#' @param cure_cutoff time at which the C-statistic should be estimated.
#' @param b_u_hat vector of estimated unpenalized incidence coefficients.
#' @param itct_hat estimated intercept for incidence portion of the model.
#' @param b_p_hat vector of estimated penalized incidence coefficients.
#' @param beta_u_hat vector of estimated unpenalized latency coefficients.
#' @param beta_p_hat vector of estimated penalized latency coefficients.
#' @param testing_delta vector of censoring indicator for dataset of interest.
#' @param testing_time vector of time-to-event for dataset of interest.
#' @param X_u unpenalized predictors in the incidence portion of the model.
#' @param X_p penalized predictors in the incidence portion of the model.
#' @param W_u unpenalized predictors in the latency portion of the model.
#' @param W_p penalized predictors in the latency portion of the model.
#'
#' @return mixture cure model c-statistic at the specified
#' cure_cutoff timepoint.
#' @noRd
C.stat <-
  function(cure_cutoff = 5, b_u_hat = NULL, itct_hat, b_p_hat,
           beta_u_hat = NULL, beta_p_hat, testing_delta, testing_time,
           X_u = NULL, X_p, W_u = NULL, W_p) {
    C_csw_num <- 0
    C_csw_denom <- 0
    testing_n <- length(testing_time)
    v <- rep(0, testing_n)
    y <- rep(999, testing_n)
    y[testing_time > cure_cutoff] <- 0
    y[testing_time <= cure_cutoff & testing_delta == 1] <- 1
    v[y < 2] <- 1
    if (all(b_p_hat == 0)) {
      if (is.null(X_u)) {
        p_hat <- 1 / (1 + exp(-itct_hat))
      } else {
        p_hat <- 1 / (1 + exp(-itct_hat - X_u %*% b_u_hat))
      }
    } else {
      if (is.null(X_u)) {
        p_hat <- 1 / (1 + exp(-itct_hat - X_p[, b_p_hat != 0,
          drop = FALSE
        ] %*% b_p_hat[b_p_hat != 0]))
      } else {
        p_hat <- 1 / (1 + exp(-itct_hat - X_u %*% b_u_hat -
          X_p[, b_p_hat != 0, drop = FALSE] %*% b_p_hat[b_p_hat !=
            0]))
      }
    }
    temp <- v * y + (1 - v) * as.vector(p_hat)
    if (all(beta_p_hat == 0)) {
      if (is.null(W_u)) {
        W_beta <- rep(0, testing_n)
      } else {
        W_beta <- W_u %*% beta_u_hat
      }
    } else {
      if (is.null(W_u)) {
        W_beta <- W_p[, beta_p_hat != 0, drop = FALSE] %*%
          beta_p_hat[beta_p_hat != 0]
      } else {
        W_beta <- W_u %*% beta_u_hat + W_p[, beta_p_hat !=
          0, drop = FALSE] %*% beta_p_hat[beta_p_hat != 0]
      }
    }
    for (i in 1:testing_n) {
      for (j in 1:testing_n) {
        if (j == i | !testing_delta[i] | testing_time[i] > testing_time[j]) {
          next
        }
        I_ij <- testing_time[i] < testing_time[j] | (testing_time[i] ==
          testing_time[j] & !testing_delta[j])
        if (!I_ij) {
          next
        }
        if (W_beta[i] > W_beta[j]) {
          C_csw_num <- C_csw_num + temp[j]
        }
        C_csw_denom <- C_csw_denom + temp[j]
      }
    }
    return(C_csw_num / C_csw_denom)
  }


#' Extract Variables from the Right-hand Side of a Model Formula.
#'
#' @param model a model formula.
#'
#' @return names of variables on the right-hand side of a
#' model formula.
#'
#' @noRd
extract_rhs_values <- function(model) {
  # Extract the model formula
  model_formula <- formula(model)
  # Get the right-hand side (RHS) variables from the formula
  rhs_vars <- attr(terms(model_formula), "term.labels")
  # Extract the model call
  model_call <- model$call
  # Check if the model call contains the data argument
  if (!"data" %in% names(model_call)) {
    stop("Error: Model call does not reference a dataset.")
  }
  # Evaluate the data argument from the model call in the model's environment
  data <- eval(model_call$data, environment(model$terms))
  if (is.null(data)) {
    stop("Error: Could not evaluate dataset from model call.")
  }
  # Extract the RHS variable values from the dataset
  rhs_values <- data[rhs_vars]
  return(rhs_values)
}


identify_missing <- function(formula, data) {
  mf <- model.frame(formula = formula, data = data, na.action = na.omit)
  omitted <- attr(mf, "na.action")
  as.numeric(omitted)
}

#' Number of parameters in fitted mixture cure model
#'
#' @description
#' This function returns the number of parameters in a user-specified model
#' criterion or step for a \code{curegmifs}, \code{cureem},
#' \code{cv_curegmifs} or \code{cv_cureem} fitted object.
#'
#' @param object a \code{mixturecure} object resulting from \code{curegmifs},
#' \code{cureem}, \code{cv_curegmifs}, \code{cv_cureem}.
#' @param model_select either a case-sensitive parameter for models fit using
#' \code{curegmifs} or \code{cureem} or any numeric step along the solution path
#' can be selected. The default is \code{model_select = "AIC"} which calculates
#' the predicted values using the coefficients from the model achieving the
#' minimum AIC. The complete list of options are:
#' \itemize{
#'     \item \code{"AIC"} for the minimum AIC (default).
#'     \item \code{"mAIC"} for the minimum modified AIC.
#'     \item \code{"cAIC"} for the minimum corrected AIC.
#'     \item \code{"BIC"}, for the minimum BIC.
#'     \item \code{"mBIC"} for the minimum modified BIC.
#'     \item \code{"EBIC"} for the minimum extended BIC.
#'     \item \code{"logLik"} for the step that maximizes the
#' log-likelihood.
#'     \item \code{n} where n is any numeric value from the
#'     solution path.
#'   }
#' This option has no effect for objects fit using \code{cv_curegmifs} or
#' \code{cv_cureem}.
#'
#' @return number of paramaters of the fitted mixture cure model using the
#' specified criteria.
#'
#'
#' @srrstats {G1.4} *Software should use [`roxygen2`](https://roxygen2.r-lib.org/) to document all functions.*
npar_mixturecure <- function (object, model_select = "AIC")
{
  coef <- coef(object, model_select = model_select)
  n <- length(coef$rate) + length(coef$shape) + length(coef$b0) +
    sum(coef$beta_inc != 0) + sum(coef$beta_lat != 0)
  return(n)
}

Try the hdcuremodels package in your browser

Any scripts or data that you put into this service are public.

hdcuremodels documentation built on Aug. 8, 2025, 7:38 p.m.