R/orthoDr_surv.r

Defines functions orthoDr_surv

Documented in orthoDr_surv

#' @title IR-CP model
#' @name orthoDr_surv
#' @description The counting process based semiparametric dimension reduction (IR-CP) model for right censored survival outcome.
#' @param x A matrix or data.frame for features. The algorithm will not scale the columns to unit variance
#' @param y A vector of observed time
#' @param censor A vector of censoring indicator
#' @param method Which estimating equation to use: should be  \code{forward} (1-d model), \code{dn} (counting process) or \code{dm} (martingale)
#' @param ndr The number of directions
#' @param B.initial Initial \code{B} values. Will use the counting process based SIR model \link[orthoDr]{CP_SIR} as the initial if leaving as \code{NULL}.
#' If specified, must be a matrix with \code{ncol(x)} rows and \code{ndr} columns. Will be processed by Gram-Schmidt if not orthogonal
#' @param bw A Kernel bandwidth, assuming each variables have unit variance
#' @param keep.data Should the original data be kept for prediction. Default is \code{FALSE}
#' @param control A list of tuning variables for optimization. \code{epsilon} is the size for numerically approximating the gradient. For others, see Wen and Yin (2013).
#' @param maxitr Maximum number of iterations
#' @param verbose Should information be displayed
#' @param ncore Number of cores for parallel computing. The default is the maximum number of threads.
#' @return A \code{orthoDr} object; a list consisting of
#' \item{B}{The optimal \code{B} value}
#' \item{fn}{The final functional value}
#' \item{itr}{The number of iterations}
#' \item{converge}{convergence code}
#' @references Sun, Q., Zhu, R., Wang, T. and Zeng, D. "Counting Process Based Dimension Reduction Method for Censored Outcomes." (2017)
#' DOI: \url{https://arxiv.org/abs/1704.05046}.
#' @references Wen, Z. and Yin, W., "A feasible method for optimization with orthogonality constraints." Mathematical Programming 142.1-2 (2013): 397-434.
#' DOI: \url{https://doi.org/10.1007/s10107-012-0584-1}
#' @examples
#' # This is setting 1 in Sun et. al. (2017) with reduced sample size
#' library(MASS)
#' set.seed(1)
#' N = 200; P = 6
#' V=0.5^abs(outer(1:P, 1:P, "-"))
#' dataX = as.matrix(mvrnorm(N, mu=rep(0,P), Sigma=V))
#' failEDR = as.matrix(c(1, 0.5, 0, 0, 0, rep(0, P-5)))
#' censorEDR = as.matrix(c(0, 0, 0, 1, 1, rep(0, P-5)))
#' T = rexp(N, exp(dataX %*% failEDR))
#' C = rexp(N, exp(dataX %*% censorEDR - 1))
#' ndr = 1
#' Y = pmin(T, C)
#' Censor = (T < C)
#'
#' # fit the model
#' forward.fit = orthoDr_surv(dataX, Y, Censor, method = "forward")
#' distance(failEDR, forward.fit$B, "dist")
#'
#' dn.fit = orthoDr_surv(dataX, Y, Censor, method = "dn", ndr = ndr)
#' distance(failEDR, dn.fit$B, "dist")
#'
#' dm.fit = orthoDr_surv(dataX, Y, Censor, method = "dm", ndr = ndr)
#' distance(failEDR, dm.fit$B, "dist")

orthoDr_surv <- function(x, y, censor, method = "dm", ndr = ifelse(method == "forward", 1, 2),
                         B.initial = NULL,
                         bw = NULL,
                         keep.data = FALSE,
                         control = list(),
                         maxitr = 500,
                         verbose = FALSE,
                         ncore = 0)
{
  if (!is.matrix(x)) stop("x must be a matrix")
  if (!is.numeric(x)) stop("x must be numerical")
  if (nrow(x) != length(y) | nrow(x) != length(censor)) stop("Number of observations do not match")

  # check tuning parameters
  control = control.check(control)
  match.arg(method, c("forward", "dn", "dm"))

  ndr = max(1, ndr)
  if (ndr > 4) warning("ndr > 4 is not recommended due to nonparametric kernel estimations")
  ndr = min(ndr, ncol(x))

  if (method == "forward" & ndr > 1)
  {
    warning("forward can only solve for 1 dimension")
    ndr = 1
  }

  N = nrow(x)
  P = ncol(x)

  if (is.null(bw))
    bw = silverman(ndr, N)

  # scale X and Y for stability
  # Y = scale(log(y)) / sqrt(2) / silverman(1, N)

  Y = scale(rank(y, ties.method = "average") / (N + 1)) / sqrt(2) / silverman(1, N)

  xscale = apply(x, 2, sd)
  X = scale(x)

  # get initial value
  if (is.null(B.initial))
  {
    B.initial = CP_SIR(X, Y, censor)$vectors[,1:ndr, drop = FALSE]
  }else{
    if (!is.matrix(B.initial)) stop("B.initial must be a matrix")
    if (ncol(x) != nrow(B.initial) | ndr != ncol(B.initial)) stop("Dimension of B.initial is not correct")
    if (method == "forward" & ncol(B.initial) > 1) stop("forward method can only use 1-d B.initial")
  }

  B.initial = gramSchmidt(B.initial)$Q

  # pre-process

  Yorder = order(Y)
  X = X[Yorder, ]
  Y = Y[Yorder]
  C = censor[Yorder]
  Fail.Ind = which(C==1)

  # calculate some useful stuff

  nFail = length(Fail.Ind)

  # E[X | dN(t) = 1, Y(t) = 1] - E[X | dN(t) = 0, Y(t) = 1] at all failure times

  kernel.y = exp(-(as.matrix(dist(Y, method = "euclidean"))))
  kernel.y[upper.tri(kernel.y)] = 0

  Phit = matrix(0, P, nFail)

  for (j in 1:nFail)
    Phit[,j] = as.matrix(apply(X, 2, weighted.mean, w = C*kernel.y[, Fail.Ind[j]]) - colMeans(X[Fail.Ind[j]:N, ,drop = FALSE]))

  # start to fit the model

  pre = Sys.time()

  if (method == "forward")
    fit = surv_forward_solver(B.initial, X, Fail.Ind, bw,
                         control$rho, control$eta, control$gamma, control$tau, control$epsilon,
                         control$btol, control$ftol, control$gtol, maxitr, verbose, ncore)

  if (method == "dn")
    fit = surv_dn_solver(B.initial, X, Phit, Fail.Ind, bw,
                    control$rho, control$eta, control$gamma, control$tau, control$epsilon,
                    control$btol, control$ftol, control$gtol, maxitr, verbose, ncore)

  if (method == "dm")
    fit = surv_dm_solver(B.initial, X, Phit, Fail.Ind, bw,
                         control$rho, control$eta, control$gamma, control$tau, control$epsilon,
                         control$btol, control$ftol, control$gtol, maxitr, verbose, ncore)

  if (verbose > 0)
    cat(paste("Total time: ", round(as.numeric(Sys.time() - pre, units = "secs"), digits = 2), " secs\n", sep = ""))

  # rescale B back to the original scale

  fit$B = sweep(fit$B, 1, xscale, FUN = "/")
  fit$B = apply(fit$B, 2, function(x) x / sqrt(sum(x^2)))
  fit$method = method
  fit$keep.data = keep.data

  if (keep.data)
  {
    fit[['x']] = x
    fit[['y']] = y
    fit[['censor']] = censor
  }

  class(fit) <- c("orthoDr", "fit", "surv")

  return(fit)
}

Try the orthoDr package in your browser

Any scripts or data that you put into this service are public.

orthoDr documentation built on Sept. 5, 2019, 5:03 p.m.