R/presmooth.R

Defines functions print.survPresmooth presmooth control.presmooth

Documented in control.presmooth presmooth print.survPresmooth

control.presmooth <- function(n.boot = c(5000, 1000),
                              q.weight = c(0.2, 0.8),
                              k = 1,
                              length.grid.bw.plugin = 100,
                              length.grid.ise = 100,
                              pilot.par.ini = NULL,
                              save.data = FALSE,
                              save.mise = FALSE,
                              na.action = na.omit) {
  list(n.boot = n.boot, q.weight = q.weight, k = k, length.grid.bw.plugin = length.grid.bw.plugin, length.grid.ise = length.grid.ise, pilot.par.ini = pilot.par.ini, save.data = save.data, save.mise = save.mise, na.action = na.action)
}

presmooth <- function(times,
                      status,
                      dataset = NULL,
                      estimand = c("S", "H", "f", "h"),
                      bw.selec = c("fixed", "plug-in", "bootstrap"),
                      presmoothing = TRUE,
                      fixed.bw = NULL,
                      grid.bw.pil = NULL,
                      grid.bw = NULL,
                      kernel = c("biweight", "triweight"),
                      bound = c("none", "left", "right", "both"),
                      x.est = NULL,
                      control = control.presmooth()) {
  na.action <- control$na.action
  ## Define the 'internal' data frame
  dfr <-
    if (is.null(dataset)) {
      na.action(data.frame(times, status))
    } else {
      na.action(dataset[, c(deparse(substitute(times)), deparse(substitute(status)))])
    }
  names(dfr) <- c("t", "d")
  dfr$t <- as.numeric(dfr$t)
  dfr$d <- as.integer(dfr$d)
  dfr <- dfr[order(dfr$t, 1 - dfr$d),]
  dup <- as.integer(duplicated(dfr$t))
  if (length(kernel) > 1) {
    kernel <- kernel[1]
  }
  n.kernel <- pmatch(kernel, c("biweight", "triweight"))
  if (is.na(n.kernel)) {
    stop("'kernel' argument must be one of 'biweight', 'triweight'")
  } else {
    kernel <- switch(n.kernel, "biweight", "triweight")
  }
  ## Constants: c1: second moment of the kernel; c2: integral of the squared kernel, c_K; c3: integral of the squared second derivative of the kernel; c4
  const <- switch(n.kernel, list(c1 = 1/7, c2 = 5/7, c3 = 45/2, c4 = 15/7, c5 = 25/231), list(c1 = 1/9, c2 = 350/429, c3 = 35, c4 = 35/11, c5 = 245/2574))
  if (length(estimand) > 1) {
    estimand <- estimand[1]
  }
  n.estimand <- match(estimand, c("S", "H", "f", "h"))
  if (is.na(n.estimand)) {
    stop("'estimand' argument must be one of 'S', 'H', 'f', 'h'")
  }
  if (length(bound) > 1) {
    bound <- bound[1]
  }
  n.bound <- pmatch(bound, c("none", "left", "right", "both"))
  if (is.na(n.bound)) {
    stop("'bound' argument must be one of 'none', 'left', 'right', 'both'")
  } else {
    bound <- switch(n.bound, "none", "left", "right", "both")
  }
  if (length(bw.selec) > 1) {
    bw.selec <- bw.selec[1]
  }
  n.bw.selec <- pmatch(bw.selec, c("fixed", "plug-in", "bootstrap"))
  if (is.na(n.bw.selec)) {
    stop("'bw.selec' argument must be one of 'fixed', 'plug-in', 'bootstrap'")
  } else {
    bw.selec <- switch(n.bw.selec, "fixed", "plug-in", "bootstrap")
  }
  if ((n.bw.selec == 1) && (!presmoothing) && (n.estimand >= 3)) {
    if (is.null(fixed.bw) || length(fixed.bw) != 2) {
      stop("'fixed.bw' must be non-null and of length 2")
    } else {
      if (fixed.bw[1] != 0) {
        warning("'presmoothing' is FALSE, but the presmoothing fixed bandwidth is not 0: fixed.bw[1] set to 0")
        fixed.bw[1] <- 0
      }
    }
  }
  allcens <- FALSE
  if (sum(dfr$d) == 0) {
    warning("All the times are censored: estimation is not possible")
    allcens <- TRUE
    n.bw.selec <- 1
    bw.selec <- NA
    fixed.bw <- if (n.estimand <= 2) NA else c(NA,NA)
  }
  if ((!presmoothing) && (n.estimand %in% 1:2)) {
    fixed.bw <- 0
    if (n.bw.selec != 1) {
      warning("No bandwidth selection performed when 'presmoothing' is 'FALSE' and 'estimand' is 'S' or 'H'. Input processed as if 'bw.selec' were 'fixed' with 'fixed.bw' equal to 0.")
      n.bw.selec <- 1
      bw.selec <- "fixed"
    }
  }
  n <- dim(dfr)[[1]]
  if (n.estimand <= 2) {
    if (!is.null(x.est)) {
      warning("'x.est' values overriden: only estimates at observed times are given")
    }
    x.est <- dfr$t
  } else {
    if (is.null(x.est)) {
      x.est <- seq(min(dfr$t), quantile(dfr$t, 0.9), length.out = 50)
    }
  }
  le.x.est <- length(x.est)
  q.w <- quantile(dfr$t, control$q.weight)
  cond <- (dfr$t > q.w[1]) & (dfr$t < q.w[2])
  range.t <- diff(range(dfr$t))
  forh <- n.estimand >= 3
  ## check the correction of grid.bw.pil
  if ((((n.bw.selec == 2) && (n.estimand <= 2)) || (n.bw.selec == 1)) && !is.null(grid.bw) && !allcens) {
    warning("No grid of pilot bandwidths is needed (the value of 'grid.bw.pil' is ignored)")
  }
  if (((n.bw.selec == 2) && forh) || (n.bw.selec == 3)) {
    ## Default grid where the first pilot bandwidth will be selected
    if (is.null(grid.bw.pil)) {
      grid.bw.pil <-
        if (q.w[1] < (q.w[2]/10)) {
          q.w[1] * (q.w[2]/q.w[1])^seq(0, 1, by = 0.01)
        } else {
          q.w[2]/10 * 10^seq(0, 1, by = 0.01)
        }
    }
    le.bw.pilot <- length(grid.bw.pil)
    ## pilot bandwidth selected by cross-validation
    ls.cv <- .C("lscv", dfr$t, dfr$d, n, grid.bw.pil, le.bw.pilot, n.kernel, lscv = numeric(le.bw.pilot), PACKAGE = "survPresmooth")$lscv
    pilot.bw1 <- grid.bw.pil[which.min(ls.cv)]
  }
  ## check the correction of grid.bw
  if (n.bw.selec !=  3 && !is.null(grid.bw) && !allcens) {
    warning(paste("No grid of bandwidths is needed when 'bw.selec =", paste("\"", bw.selec, "\"'", sep = ""), "(the value of 'grid.bw' is ignored)"))
  }
  if (!(forh && presmoothing) && is.list(grid.bw) && length(grid.bw) > 1) {
    warning("Only one grid of bandwidths is needed (the second one has been discarded)")
  }
  if (forh && presmoothing && ((is.list(grid.bw) && length(grid.bw) == 1) || is.numeric(grid.bw))) {
    warning("Two grid of bandwidthds are needed, not only one (the given grid has been duplicated)")
    if (is.numeric(grid.bw)) {
      grid.bw <- list(grid.bw)
    }
    grid.bw[[2]] <- grid.bw[[1]]
  }
  if (((n.bw.selec == 3) && forh) || (n.bw.selec == 2)) {
    ## Initial parameter estimates of a simple Weibull model for the observed lifetimes, based on the method of moments
    shape0 <- uniroot(function(x) var(dfr$t) - (mean(dfr$t)/ gamma(1+1/x))^2*(gamma(1+2/x) - (gamma(1+1/x))^2), interval = c(0.02, 20))$root
    scale0 <- mean(dfr$t)/gamma(1+1/shape0)
    pilot.par.ini <-
      if (is.null(control$pilot.par.ini)) {
        c(1/shape0, scale0, shape0, scale0, shape0, 1/scale0, 1/3, 1/3)
      } else {
        control$pilot.par.ini
      }
    pilot.bw2.integrand <- function(x, par, modif = as.integer(0)) {
      .C("pilot2forhintegrand", x, length(x), par, length(par), modif, pilot2hint = numeric(length(x)), PACKAGE = "survPresmooth")$pilot2hint
    }
  }
  ## Bandwidth computation
  switch(
    n.bw.selec,
    ## fixed bandwidth
    bw <- fixed.bw,
    {
      ## Plug-in
      le.plugin <- as.integer(ifelse(control$length.grid.bw.plugin %% 2 == 0, control$length.grid.bw.plugin - 1, control$length.grid.bw.plugin))
      grid.plugin <- seq(q.w[1], q.w[2], length.out = le.plugin)
      step.plugin <- (grid.plugin[le.plugin] - grid.plugin[1])/(le.plugin-1)
      esf <- 1 - ecdf(dfr$t)(grid.plugin) + 1/n
      switch(
        estimand,
        S =,
        H = {
          ## Pilot
          ## A logistic model is supposed for the probability of non-censoring
          logistfit <- glm(d ~ t , family = binomial, data = dfr)
          coef.logis <- logistfit$coefficients
          p <- as.numeric(predict(logistfit, newdata = data.frame(t = grid.plugin), type = "response"))
          ## 1st if-else
          if (all(p > 0.999) || all(p < 0.001)) {
            warning("Pilot bandwidth computation problem. Estimated p is constantly equal to 0 or 1: selected bandwidth taken as the largest value")
            pilot.bw <- NULL
            bw <- control$k * range.t
          } else {
            ## 2nd if-else
            if (exp(coef.logis[1]) > .Machine$double.xmax^.1 | exp(coef.logis[1] + grid.plugin[le.plugin]*coef.logis[2]) > .Machine$double.xmax^.1) {
              warning("Computational problems due to overflow: no presmoothing done")
              pilot.bw <- NULL
              bw <- 0
            } else {
              ## Fit a simple Weibull model for observed lifetimes
              llikplugH <- function(p, x) {
                -sum(dweibull(x, p[1], p[2], log = TRUE))
              }
              estparplug <- optim(c(shape0, scale0), llikplugH, method = "L-BFGS-B", lower = 1e-8, x = dfr$t)$par
              ## Computation of C1 and C2 according to formulas (2.154) and (2.155) on page 87 of Lopez de Ullibarri's thesis
              C1fun1 <- function(x, t, n, coef, par) {
                .C("c1integrand1", x, length(x), t, n, coef, par, c1fun1 = numeric(length(x)), PACKAGE = "survPresmooth")$c1fun1
              }
              C1fun2 <- function(x, t, n, coef, par) {
                .C("c1integrand2", x, length(x), t, n, coef, par, c1fun2 = numeric(length(x)), PACKAGE = "survPresmooth")$c1fun2
              }
              n.d0 <- min(which(dfr$d == 0), na.rm = TRUE)
              if (length(n.d0) == 0) {
                n.d0 <- n
              }
              C1integrand1 <- C1integrand2 <- numeric(le.plugin)
              for (i in 1:le.plugin) {
                C1integrand1[i] <- integrate(C1fun1, lower = if (i==1) min(dfr$t[n.d0]*0.99, grid.plugin[1]/2) else grid.plugin[i-1], upper = grid.plugin[i], t = dfr$t, n = n, coef = coef.logis, par = estparplug, subdivisions = 10000, stop.on.error = FALSE)$value
                C1integrand2[i] <- integrate(C1fun2, lower = if (i==1) min(dfr$t[n.d0]*0.99, grid.plugin[1]/2) else grid.plugin[i-1], upper = grid.plugin[i], t = dfr$t, n = n, coef = coef.logis, par = estparplug, subdivisions = 10000, stop.on.error = FALSE)$value
              }
              C1integrand <- cumsum(C1integrand1) * cumsum(C1integrand2)
              C1 <- const$c1 / 2 * step.plugin * .C("simpson", C1integrand, le.plugin, integral = numeric(1), PACKAGE = "survPresmooth")$integral
              C2pre <- p * (1 - p) * dweibull(grid.plugin, estparplug[1], estparplug[2]) / esf^2
              C2 <- const$c4 / 4 * step.plugin * .C("simpson", C2pre, le.plugin, integral = numeric(1), PACKAGE = "survPresmooth")$integral
              ## Formula on page 91 of Lopez de Ullibarri's thesis, modified by considering an empirical upper bound (trying to avoid the sporadical ocurrence of exceedingly large bandwidths)
              pilot.bw1 <- (C2/C1 * ifelse(C1 < 0, -1 , 3/2) / n)^(1/5)
              ## Computation of D1 and D2 according to formulas (2.237) and (2.238) on page 125 of Lopez-de-Ullibarri's thesis
              Dintegrand <- .C("dintegrand", grid.plugin, le.plugin, dfr$t, n, logistfit$coefficients, estparplug, p, d1int = numeric(le.plugin), d2int = numeric(le.plugin), PACKAGE = "survPresmooth")[c("d1int", "d2int")]
              D1 <- const$c1 * step.plugin * .C("simpson", Dintegrand$d1int, le.plugin, integral = numeric(1), PACKAGE = "survPresmooth")$integral
              D2 <- -const$c2 * step.plugin * .C("simpson", Dintegrand$d2int, le.plugin, integral = numeric(1), PACKAGE = "survPresmooth")$integral
              ## Formula on page 127 of Lopez de Ullibarri's thesis, modified by considering an empirical upper bound (trying to avoid the sporadical ocurrence of too large bandwidths)
              pilot.bw2 <- (D2/D1 * ifelse(D1 < 0, 1/2 , -1) / n)^(1/3)
              pilot.bw <- pmin(c(pilot.bw1, pilot.bw2), control$k * range.t)
              ## Bandwidth (computed according to formulas on page 17 of Lopez-de-Ullibarri's thesis)
              p.nw <- .C("nadarayawatson", dfr$t, n, dfr$t, dfr$d, n, pilot.bw[2], n.kernel, phat = numeric(n), PACKAGE = "survPresmooth")$phat
              ## 3rd if-else
              if (all(p.nw[cond] > 0.999) || all(p.nw[cond] < 0.001)) {
                warning("Bandwidth computation problem. Estimated p is constantly equal to 0 or 1: selected bandwidth taken as the largest value")
                pilot.bw <- NULL
                bw <- control$k * range.t
              } else {
                esf2 <- 1 - ecdf(dfr$t)(dfr$t) + 1/n
                Q <- sum(p.nw[cond] * (1 - p.nw[cond]) / esf2[cond]^2) / n
                funalpha1 <- function(x, t, d, n, bw, kern) {
                  .C("alphaintegrand", t, d, n, x, length(x), bw, bw, kern, alphaint = numeric(length(x)), PACKAGE = "survPresmooth")$alphaint
                }
                prealpha <- numeric(le.plugin)
                for (i in 1:le.plugin) {
                  prealpha[i] <- integrate(funalpha1, lower = if (i == 1) 0 else grid.plugin[i-1], upper = grid.plugin[i], t = dfr$t, d = dfr$d, n = n, bw = pilot.bw[1], kern = n.kernel, subdivisions = 10000, stop.on.error = FALSE)$value
                }
                alpha <- cumsum(prealpha)
                A <- step.plugin * .C("simpson", alpha^2, le.plugin, integral = numeric(1), PACKAGE = "survPresmooth")$integral
                ## The same modification than for pilot bandwidths is incorporated
                bw <- if (A < 1e-6 & Q < 1e-6) 0 else min((const$c5 * Q / A / 2 / const$c1^2 / n)^(1/3), control$k * range.t)
                ## end of 3rd if-else
              }
              ## end of 2rd if-else
            }
            ## end of 1st if-else
          }
        },
        f = {
          ## Pilot
          ## suppressWarnings() is necessary to avoid some annoying but irrelevant warnings...
          llikplugf <- function(p, x, d) {
            -sum(suppressWarnings(log(d * (p[7] * dweibull(x, p[1], p[2]) + p[8] * dweibull(x, p[3], p[4]) + (1-p[7]-p[8]) * dweibull(x, p[5], p[6])) + (1-d) * (p[7] * pweibull(x, p[1], p[2], lower.tail = FALSE) + p[8] * pweibull(x, p[3], p[4], lower.tail = FALSE) +(1-p[7]-p[8]) * pweibull(x, p[5], p[6], lower.tail = FALSE)))))
          }
          ## Redefinition of llikplugf for a mixture of only two components
          llikplugf2 <- function(p, x, d) {
            -sum(suppressWarnings(log(d * (p[5] * dweibull(x, p[1], p[2]) + (1 - p[5]) * dweibull(x, p[3], p[4])) + (1 - d) * (p[5] * pweibull(x, p[1], p[2], lower.tail = FALSE) + (1-p[5]) * pweibull(x, p[3], p[4], lower.tail = FALSE)))))
          }
          ## Redefinition of llikplugf according to a simple Weibull model
          llikplugf3 <- function(p, x, d) {
            -sum(suppressWarnings(log(d * dweibull(x, p[1], p[2]) + (1 - d) * pweibull(x, p[1], p[2], lower.tail = FALSE))))
          }
          estparplug <- constrOptim(pilot.par.ini, llikplugf, grad = NULL, x = dfr$t, d= rep(1, n), ui = rbind(c(rep(0, 6), -1, -1), c(1, rep(0,  7)), c(0, 1, rep(0, 6)), c(rep(0, 2), 1, rep(0, 5)), c(rep(0, 3), 1, rep(0, 4)), c(rep(0, 4), 1, rep(0, 3)), c(rep(0, 5), 1, 0, 0), c(rep(0, 6), 1, 0), c(rep(0, 7), 1), c(rep(0, 6), -1, 0), c(rep(0, 7), -1)), ci = c(-1, rep(0, 8), -1, -1))$par
          ## If one component of the mixture is negligible, refit with a mixture of only 2 Weibulls
          if (any(estparplug[7] < 0.1, estparplug[8] < 0.1, 1 - estparplug[7] - estparplug[8] < 0.1)) {
            estparplug <- constrOptim(c(pilot.par.ini[1:4], 0.5), llikplugf2, grad = NULL, x = dfr$t, d = rep(1, n), ui = rbind(c(1, rep(0,  4)), c(0, 1, rep(0, 3)), c(rep(0, 2), 1, rep(0, 2)), c(rep(0, 3), 1, 0), c(rep(0, 4), 1), c(rep(0, 4), -1)), ci = c(rep(0, 5), -1))$par
          }
          ## Again, if one of the 2-component-mixture is negligible, refit with only 1 Weibull
          if (estparplug[5] < 0.1 | estparplug[5] > 0.9) {
            estparplug <- constrOptim(c(pilot.par.ini[3:4]), llikplugf3, grad = NULL, x = dfr$t, d = rep(1, n), ui = rbind(c(1, 0), c(0, 1)), ci = rep(0, 2))$par
          }
          ## Formula (4.76) for b_1 on page 236 of A. Jacome's thesis
          pilot.bw2 <- (const$c3 / const$c1 / n / integrate(pilot.bw2.integrand, q.w[1], q.w[2], par = estparplug)$value)^(1/7)
          estparplug2 <- constrOptim(pilot.par.ini, llikplugf, grad = NULL, x = dfr$t,d = dfr$d, ui = rbind(c(rep(0, 6), -1, -1), c(1, rep(0,  7)), c(0, 1, rep(0, 6)), c(rep(0, 2), 1, rep(0, 5)), c(rep(0, 3), 1, rep(0, 4)), c(rep(0, 4), 1, rep(0, 3)), c(rep(0, 5), 1, 0, 0), c(rep(0, 6), 1, 0), c(rep(0, 7), 1), c(rep(0, 6), -1, 0), c(rep(0, 7), -1)), ci = c(-1, rep(0, 8), -1, -1))$par
          if (any(estparplug2[7] < 0.1, estparplug2[8] < 0.1, 1 - estparplug2[7] - estparplug2[8] < 0.1)) {
            estparplug2 <- constrOptim(c(pilot.par.ini[1:4], 0.5), llikplugf2, grad = NULL, x = dfr$t, d = dfr$d, ui = rbind(c(1, rep(0,  4)), c(0, 1, rep(0, 3)), c(rep(0, 2), 1, rep(0, 2)), c(rep(0, 3), 1, 0), c(rep(0, 4), 1), c(rep(0, 4), -1)), ci = c(rep(0, 5), -1))$par
          }
          if (estparplug2[5] < 0.1 || estparplug2[5] > 0.9) {
            estparplug2 <- constrOptim(c(pilot.par.ini[3:4]), llikplugf3, grad = NULL, x = dfr$t, d = dfr$d, ui = rbind(c(1, 0), c(0, 1)), ci = rep(0, 2))$par
          }
          intf3sq <- integrate(pilot.bw2.integrand, q.w[1], q.w[2], par = estparplug2)$value
          ## If f is replaced by h*p*(1-F)/(1-H), 3rd pilot bandwidth in Jacome's thesis is unnecessary
          km <- .C("presmestim", dfr$t, n, dfr$t, n, as.double(0), as.integer(0), as.integer(0), as.numeric(dfr$d), as.integer(0), as.integer(1), pest = numeric(n), PACKAGE = "survPresmooth")$pest
          fact <- sum((km[cond] / (1 - ecdf(dfr$t)(sort(dfr$t)[cond])))^2 * (dfr$d[order(dfr$t)][cond] == 1))/n
          pilot.bw3 <- (const$c3 * fact / const$c1 / n / intf3sq)^(1/7)
          pilot.bw <- pmin(c(pilot.bw1, pilot.bw2, pilot.bw3), control$k * range.t)
          ## Bandwidth
          if (presmoothing) {
            ## Computation of the 5 integrals in psi(L)
            funalpha2 <- function(x, t, d, n, bw, kern) {
              .C("alphaintegrand", t, d, n, x, length(x), bw[1], bw[2], kern, alphaint = numeric(length(x)), PACKAGE = "survPresmooth")$alphaint
            }
            prealpha <- numeric(le.plugin)
            for (i in 1:le.plugin) {
              prealpha[i] <- integrate(funalpha2, lower = if (i == 1) 0 else grid.plugin[i - 1], upper = grid.plugin[i], t = dfr$t, d = dfr$d, n = n, bw = pilot.bw[1:2], kern = n.kernel, subdivisions = 10000, stop.on.error = FALSE)$value
            }
            alpha <- cumsum(prealpha)
            ## In termsmise.c f replaced by h*p*(1-F)/(1-H)
            ints1to5 <- .C("termsmise", dfr$t, dfr$d, n, esf, grid.plugin, le.plugin, step.plugin, pilot.bw, n.kernel, n.estimand, alpha, int1 = numeric(1), int2 = numeric(1), int3 = numeric(1), int4 = numeric(1), int5 = numeric(1), PACKAGE = "survPresmooth")[paste("int", 1:5, sep = "")]
            ## The objective function is formula (3.180) for AMISE in Lopez-de-Ullibarri's thesis
            ## When minimizing, an empirical upper bound is considered, trying to avoid the sporadical ocurrence of exceedingly large bandwidths
            bw <- pmin(constrOptim(pilot.bw[-3], function(pars, n, kern, c1, c2, n.est, ints) .C("funplugin", pars[1], pars[2], n, kern, c1, c2, n.est, ints$int1, ints$int2, ints$int3, ints$int4, ints$int5, fplug = numeric(1), PACKAGE = "survPresmooth")$fplug, grad = NULL, n = n, kern = n.kernel, c1 = const$c1, c2 = const$c2, n.est = n.estimand, ints = ints1to5, ui = rbind(c(1, 0), c(0, 1)), ci = c(0, 0))$par, control$k *range.t)
          } else {
            ## computation of the 2 integrals of the AMISE (instead of the 5 of the general case)
            ints1to2 <- .C("termsmisenopresmooth", dfr$t, dfr$d, n, esf, grid.plugin, le.plugin, step.plugin, pilot.bw, n.kernel, n.estimand, int1 = numeric(1), int2 = numeric(1), PACKAGE = "survPresmooth")[c("int1","int2")]
            bw <- c(0, min((const$c2 * ints1to2$int2 / n / const$c1^2 / ints1to2$int1)^(1/5), control$k * range.t))
          }
        },
        h = {
          ## Pilot
          ## Log-likelihood function (a mixture of three Weibulls is considered as model for the distribution of lifetimes)
          ## suppressWarnings() is necessary to avoid some annoying but irrelevant warnings...
          llikplugh <- function(p, x) {
            -sum(suppressWarnings(log(p[7] * dweibull(x, p[1], p[2]) + p[8] * dweibull(x, p[3], p[4]) + (1-p[7]-p[8]) * dweibull(x, p[5], p[6]))))
          }
          ## Optimization with constrOptim() allows to take into account the constraint for the mixture weights (p[1] + p[2] <= 1)
          estparplug <- constrOptim(pilot.par.ini, llikplugh, grad = NULL, x = dfr$t, ui = rbind(c(rep(0, 6), -1, -1), c(1, rep(0,  7)), c(0, 1, rep(0, 6)), c(rep(0, 2), 1, rep(0, 5)), c(rep(0, 3), 1, rep(0, 4)), c(rep(0, 4), 1, rep(0, 3)), c(rep(0, 5), 1, 0, 0), c(rep(0, 6), 1, 0), c(rep(0, 7), 1), c(rep(0, 6), -1, 0), c(rep(0, 7), -1)), ci = c(-1, rep(0, 8), -1, -1))$par
          ## If one component of the mixture is negligible, refit with a mixture of only 2 Weibulls
          if (any(estparplug[7] < 0.1, estparplug[8] < 0.1, 1 - estparplug[7] - estparplug[8] < 0.1)) {
            ## Redefinition of llikplugh for a mixture of only two components
            llikplugh <- function(p, x) {
              -sum(suppressWarnings(log(p[5] * dweibull(x, p[1], p[2]) + (1 - p[5]) * dweibull(x, p[3], p[4]))))
            }
            estparplug <- constrOptim(c(pilot.par.ini[1:4], 0.5), llikplugh, grad = NULL, x = dfr$t, ui = rbind(c(1, rep(0,  4)), c(0, 1, rep(0, 3)), c(rep(0, 2), 1, rep(0, 2)), c(rep(0, 3), 1, 0), c(rep(0, 4), 1), c(rep(0, 4), -1)), ci = c(rep(0, 5), -1))$par
          }
          ## Again, if one of the 2-component-mixture is negligible, refit with only 1 Weibull
          if (estparplug[5] < 0.1 || estparplug[5] > 0.9) {
            ## Redefinition of llikplugh with a simple Weibull model
            llikplugh <- function(p, x) {
              -sum(suppressWarnings(log(dweibull(x, p[1], p[2]))))
            }
            estparplug <- constrOptim(c(pilot.par.ini[3:4]), llikplugh, grad = NULL, x = dfr$t, ui = rbind(c(1, 0), c(0, 1)), ci = rep(0, 2))$par
          }
          ## Formula (4.76) for b_1 on page 236 of Jacome's thesis
          pilot.bw2 <- (const$c3 / const$c1 / n / integrate(pilot.bw2.integrand, q.w[1], q.w[2], par = estparplug)$value)^(1/7)
          pilot.bw <- pmin(c(pilot.bw1, pilot.bw2), control$k * range.t)
          ## Bandwidth
          if (presmoothing) {
            ints1to5 <- .C("termsmise", dfr$t, dfr$d, n, esf, grid.plugin, le.plugin, step.plugin, pilot.bw, n.kernel, n.estimand, as.double(0), int1 = numeric(1), int2 = numeric(1), int3 = numeric(1), int4 = numeric(1), int5 = numeric(1), PACKAGE = "survPresmooth")[paste("int", 1:5, sep = "")]
            ## The objective function is formula (3.180) for AMISE in Lopez-de-Ullibarri's thesis
            ## When minimizing, an empirical upper bound is considered, trying to avoid the sporadical ocurrence of exceedingly large bandwidths
            bw <- pmin(constrOptim(pilot.bw, function(pars, n, kern, c1, c2, n.est, ints) .C("funplugin", pars[1], pars[2], n, kern, c1, c2, n.est, ints$int1, ints$int2, ints$int3, ints$int4, ints$int5, fplug = numeric(1), PACKAGE = "survPresmooth")$fplug, grad = NULL, n = n, kern = n.kernel, c1 = const$c1, c2 = const$c2, n.est = n.estimand, ints = ints1to5, ui = rbind(c(1, 0), c(0, 1)), ci = c(0, 0))$par, control$k * range.t)
          } else {
            ## Computation of the 2 integrals of the AMISE (instead of the 5 of the general case)
            ints1to2 <- .C("termsmisenopresmooth", dfr$t, dfr$d, n, esf, grid.plugin, le.plugin, step.plugin, pilot.bw, n.kernel, n.estimand, int1 = numeric(1), int2 = numeric(1), PACKAGE = "survPresmooth")[c("int1","int2")]
            bw <- c(0, min((const$c2 * ints1to2$int2 / n / const$c1^2 / ints1to2$int1)^(1/5), control$k * range.t))
          }
        }
        ## End of switch(estimand)
      )
      ## End of plugin
    },
    {
      ## Bootstrap
      ## Pilot
      ## Smoothing pilot bandwidth (only if estimand is "f" or "h")
      ## 1st if
      pilot.bw2 <-
        if (forh) {
          ## A mixture of three Weibulls is considered as model for the distribution of lifetimes
          llikboot <- function(p, x, d) {
            -sum(suppressWarnings(log(d * (p[7] * dweibull(x, p[1], p[2]) + p[8] * dweibull(x, p[3], p[4]) + (1-p[7]-p[8]) * dweibull(x, p[5], p[6])) + (1-d) * (p[7] * pweibull(x, p[1], p[2], lower.tail = FALSE) + p[8] * pweibull(x, p[3], p[4], lower.tail = FALSE) +(1-p[7]-p[8]) * pweibull(x, p[5], p[6], lower.tail = FALSE)))))
          }
          ## Optimization with constrOptim() allows to take into account the constraint for the mixture weights (p[1] + p[2] <= 1)
          estparboot <- constrOptim(pilot.par.ini, llikboot, grad = NULL, x = dfr$t,d = dfr$d, ui = rbind(c(rep(0, 6), -1, -1), c(1, rep(0,  7)), c(0, 1, rep(0, 6)), c(rep(0, 2), 1, rep(0, 5)), c(rep(0, 3), 1, rep(0, 4)), c(rep(0, 4), 1, rep(0, 3)), c(rep(0, 5), 1, 0, 0), c(rep(0, 6), 1, 0), c(rep(0, 7), 1), c(rep(0, 6), -1, 0), c(rep(0, 7), -1)), ci = c(-1, rep(0, 8), -1, -1))$par
          ## If one component of the mixture is negligible, refit with a mixture of only 2 Weibulls
          if (any(estparboot[7] < 0.1, estparboot[8] < 0.1, 1 - estparboot[7] - estparboot[8] < 0.1)) {
            llikboot <- function(p, x, d) {
              -sum(suppressWarnings(log(d * (p[5] * dweibull(x, p[1], p[2]) + (1 - p[5]) * dweibull(x, p[3], p[4])) + (1 - d) * (p[5] * pweibull(x, p[1], p[2], lower.tail = FALSE) + (1 - p[5]) * pweibull(x, p[3], p[4], lower.tail = FALSE)))))
            }
            estparboot <- constrOptim(c(pilot.par.ini[1:4], 0.5), llikboot, grad = NULL, x = dfr$t, d = dfr$d, ui = rbind(c(1, rep(0,  4)), c(0, 1, rep(0, 3)), c(rep(0, 2), 1, rep(0, 2)), c(rep(0, 3), 1, 0), c(rep(0, 4), 1), c(rep(0, 4), -1)), ci = c(rep(0, 5), -1))$par
          }
          ## Again, if one of the 2-component-mixture is negligible, refit with only 1 Weibull
          if (estparboot[5] < 0.1 | estparboot[5] > 0.9) {
            llikboot <- function(p, x, d) {
              -sum(suppressWarnings(log(d * dweibull(x, p[1], p[2]) + (1 - d) * pweibull(x, p[1], p[2], lower.tail = FALSE))))
            }
            estparboot <- constrOptim(c(pilot.par.ini[3:4]), llikboot, grad = NULL, x = dfr$t, d = dfr$d, ui = rbind(c(1, 0), c(0, 1)), ci = rep(0, 2))$par
          }
          ## The same bandwidth for f and h: optimal bandwidth for estimating f without presmoothing, given by Sellero et al. (1999)
          km <- .C("presmestim", dfr$t, n, dfr$t, n, as.double(0), as.integer(0), as.integer(0), as.numeric(dfr$d), as.integer(0), as.integer(1), pest = numeric(n), PACKAGE = "survPresmooth")$pest
          if (any(dfr$d[order(dfr$t)][cond] == 1)) {
            fact <- sum((km[cond] / (1 - ecdf(dfr$t)(sort(dfr$t)[cond])))^2 * (dfr$d[order(dfr$t)][cond] == 1))/n
            intden <- integrate(pilot.bw2.integrand, q.w[1], q.w[2], par = estparboot, modif = as.integer(1))$value
            (const$c2 * fact / const$c1^2 / n / intden)^0.2
          } else {
            warning("All times between the percentiles specified by 'q.weight' are censored. Muller's pilot bandwidth is used")
            max(dfr$t)/8/sum(dfr$d)^0.2
          }
          ## End of 1st if
        }
      pilot.bw <- pmin(c(pilot.bw1, pilot.bw2), control$k * range.t)
      ## Bandwith
      nboot.temp <- as.integer(control$n.boot)
      n.boot <-
        if (length(nboot.temp) == 1) {
          nboot.temp
        } else {
          if (n.estimand <= 2) {
            nboot.temp[1]
          } else {
            nboot.temp[2]
          }
        }
      le.ise <- as.integer(control$length.grid.ise)
      grid.ise <- seq(q.w[1], q.w[2], length.out = le.ise)
      step.ise <- (grid.ise[le.ise] - grid.ise[1])/(le.ise-1)
      if (is.null(grid.bw)) {
        grid.bw.1 <- control$k * range.t/10 * 10^seq(0, 1, by = 0.05)
        grid.bw.2 <-
          if (forh & presmoothing)
            grid.bw.1
      } else {
        grid.bw.1 <-
          if (is.list(grid.bw)) {
            grid.bw[[1]]
          } else {
            if (is.numeric(grid.bw)) {
              grid.bw
            }
          }
        grid.bw.2 <-
          if (forh & presmoothing) {
            grid.bw[[2]]
          }
      }
      le.bw.1 <- length(grid.bw.1)
      le.bw.2 <- length(grid.bw.2)
      if (presmoothing) {
        ## Estimates (with the pilot bandwidth) of:
        ## a) conditional probability of non-censoring
        p.hat <- .C("nadarayawatson", dfr$t, n, dfr$t, dfr$d, n, pilot.bw[1], n.kernel, phat = numeric(n), PACKAGE = "survPresmooth")$phat
        ## b) survival, density, hazard or cumulative hazard
        ps.dfr <-
          switch(
            estimand,
            S = ,
            H = .C("presmestim", grid.ise, le.ise, dfr$t, n, as.double(0), as.integer(0), as.integer(0), p.hat, dup, n.estimand, pest = numeric(le.ise), PACKAGE = "survPresmooth")$pest,
            f = .C("presmdensfast", grid.ise, le.ise, dfr$t, n, pilot.bw[2], n.kernel, p.hat, dhat = numeric(le.ise), PACKAGE = "survPresmooth")$dhat,
            h = .C("presmtwfast", grid.ise, le.ise, dfr$t, n, pilot.bw[2], n.kernel, dup, p.hat, hhat = numeric(le.ise), PACKAGE = "survPresmooth")$hhat
          )
        ## Compute bootstrapped presmoothed estimates
        mise <- .C("isevect", dfr$t, dfr$d, n, n.boot, grid.ise, le.ise, grid.bw.1, le.bw.1, if (is.null(grid.bw.2)) as.double(0) else grid.bw.2, if (is.null(grid.bw.2)) as.integer(0) else le.bw.2, n.kernel, dup, n.estimand, p.hat, ps.dfr, as.integer(presmoothing), ise = numeric(if (forh) le.bw.1*le.bw.2 else le.bw.1), PACKAGE = "survPresmooth")$ise / n.boot
      } else {
        ## as.numeric(dfr$d) is passed instead of p.hat
        ps.dfr <-
          if (n.estimand == 3) {
            .C("presmdensfast", grid.ise, le.ise, dfr$t, n, pilot.bw[2], n.kernel, as.numeric(dfr$d), dhat = numeric(le.ise), PACKAGE = "survPresmooth")$dhat
          } else {
            if (n.estimand == 4) {
              .C("presmtwfast", grid.ise, le.ise, dfr$t, n, pilot.bw[2], n.kernel, dup, as.numeric(dfr$d), hhat = numeric(le.ise), PACKAGE = "survPresmooth")$hhat
            }
          }
        mise <- .C("isevect", dfr$t, dfr$d, n, n.boot, grid.ise, le.ise, as.double(0), as.integer(0), grid.bw.1, le.bw.1, n.kernel, dup, n.estimand, as.double(0), ps.dfr, as.integer(presmoothing), ise = numeric(le.bw.1), PACKAGE = "survPresmooth")$ise / n.boot
      }
      ## Return the bandwidth minimizing the mise
      pos.min.mise <- max(which(mise == min(mise)))
      bw <- pmin(if (forh) if (presmoothing) c(grid.bw.1[(pos.min.mise - 1)%%le.bw.1 + 1], grid.bw.2[(pos.min.mise - 1)%/%le.bw.1 + 1]) else c(0, grid.bw.1[pos.min.mise]) else grid.bw.1[pos.min.mise], control$k * range.t)
      ## End of bootstrap
    }
    ## End of switch(n.bw.selec)
  )
  ## Estimate
  ## With fixed presmoothing bandwidth = 0, non-presmoothed estimates are obtained
  p <-
    if (any(is.na(bw))) {
      NA
    } else {
      if (bw[1] == 0) {
        as.numeric(dfr$d)
      } else {
        .C("nadarayawatson", dfr$t, n, dfr$t, dfr$d, n, bw[1], n.kernel, phat = numeric(n), PACKAGE = "survPresmooth")$phat
      }
    }
  if (forh && (n.bound == 4) && (max(x.est) > bw[2]) && all((x.est < bw[2]) || (max(dfr$t) - x.est < bw[2]))) {
    warning("Possibly anomalous estimate due to a too large smoothing bandwidth: try correcting only one of the two boundary effects")
  }
  estim <-
    if (any(is.na(bw))) {
      NA
    } else {
      .C("presmestim", x.est, le.x.est, dfr$t, n, if (forh) bw[2] else as.double(0), if (forh) n.kernel else as.integer(0), if (forh) n.bound else as.integer(0), p, dup, n.estimand, pest = numeric(le.x.est), PACKAGE = "survPresmooth")$pest
    }
  ps <- list(call = match.call(), data = if (control$save.data) dfr, q.weight = q.w, bw.selec = bw.selec, presmoothing = presmoothing, bound = bound, mise = if ((n.bw.selec == 3) & control$save.mise) if (forh & presmoothing) matrix(mise*step.ise, nrow = le.bw.1, ncol = le.bw.2) else mise*step.ise, grid.pil =  if (((n.bw.selec == 2) & forh) | (n.bw.selec == 3)) grid.bw.pil, pilot.bw = if (n.bw.selec != 1) pilot.bw, bandwidth = bw, grid.bw = if (n.bw.selec == 3 & (forh | presmoothing)) if (forh & presmoothing) list(grid.bw.1 = grid.bw.1, grid.bw.2 = grid.bw.2) else list(grid.bw = grid.bw.1), p.hat = p, x.est = x.est, estimand = estimand, estimate = estim)
  class(ps) <- "survPresmooth"
  ps
}

print.survPresmooth <- function(x,
                                long = FALSE,
                                more = NULL,
                                ...) {
  dots <- list(...)
  if (!inherits(x, "survPresmooth")) {
    stop("x object must be of class survPresmooth")
  }
  cat("\nCall: ")
  dput(x$call)
  cat("\n")
  cat(paste(if (x$presmoothing) "P" else "Non-p","resmoothed estimation of the", sep = ""),
      switch(
        x$estimand,
        S = "survival function, S(t)",
        H = "cumulative hazard function, H(t)",
        f = "density function, f(t)",
        h = "hazard function, h(t)"
      ), "\n\n")
  df <- data.frame(x$x.est, x$estimate)
  colnames(df) <- c("t", paste(x$estimand, "(t)", sep = ""))
  if (long) {
    print(df, ...)
  } else {
    if (nrow(df) <= 100) {
      print(df, ...)
    } else {
      print(head(df), ...)
      cat(" ...\n")
      cat("(Some output has been omitted for brevity: to get all the output set 'long=TRUE' in a call to 'print()')\n")
    }
  }
  cat("\nBandwidth selection method:", x$bw.selec, "\n")
  cat("\nBandwidth(s):\n")
  cat("\tpresmoothing:\t", ifelse(is.null(dots$digits), x$bandwidth[1], round(x$bandwidth[1], dots$digits)), "\n")
  if (length(x$bandwidth) == 2) {
    cat("\tsmoothing:\t", ifelse(is.null(dots$digits), x$bandwidth[2], round(x$bandwidth[2], dots$digits)), "\n")
  }
  if (x$estimand == "f" || x$estimand == "h") {
    cat("\nBoundary effect corrected:", x$bound, "\n")
  }
  more <- setdiff(more, c("call", "presmoothing", "estimand", "x.est", "estimate", "bandwidth", "bound"))
  if (!is.null(more)) {
    ext.names <- c(data = "Data", q.weight = "Range of the weight function", mise = "Bootstrap MISE", grid.pil = "Grid for the pilot bandwidth", pilot.bw = "Pilot bandwidth(s)", grid.bw = "Grid for bandwidth selection", p.hat = "Estimate of the conditional probability of uncensoring")
    comps <- c("data", "q.weight", "mise", "grid.pil", "pilot.bw", "grid.bw", "p.hat")
    inx <- more %in% comps
    if (any(inx)) {
      invisible(sapply(seq_along(more[inx]), function(s) {
        cat(paste("\n", ext.names[more][inx][s], ":\n", sep = ""))
        print(x[[more[inx][s]]], ...)
      }))
      if (!all(inx)) {
        if (sum(!inx) == 1) {
          warning(paste(more[!inx], "is not a component of x"))
        } else {
          warning(paste(paste(more[!inx], collapse = ", "), "are not components of x"))
        }
      }
    } else {
      if (sum(!inx) == 1) {
        warning(paste(more[!inx], "is not a component of x"))
      } else {
        warning(paste(paste(more[!inx], collapse = ", "), "are not components of x"))
      }
    }
  }
}

Try the survPresmooth package in your browser

Any scripts or data that you put into this service are public.

survPresmooth documentation built on April 3, 2025, 6:26 p.m.