R/subsample.rfsrc.R
In RFpredInterval: Prediction Intervals with Random Forests and Boosted Forests

subsample.rfsrc <- function(obj,                
                            B = 100,
                            block.size = 1,
                            subratio = NULL,
                            stratify = TRUE,
                            joint = FALSE,
                            bootstrap = FALSE,
                            verbose = TRUE) 
{
  ##--------------------------------------------------------------
  ##
  ## coherence checks
  ##
  ##--------------------------------------------------------------
  if (sum(inherits(obj, c("rfsrc", "grow"), TRUE) == c(1, 2)) != 2) {
    stop("This function only works for objects of class `(rfsrc, grow)'")
  }
  if (inherits(obj, "anonymous")) {
    stop("this function does work with anonymous forests")
  }
  ##--------------------------------------------------------------
  ##
  ## is this a multivariate family?
  ##
  ##--------------------------------------------------------------
  fmly <- obj$family
  mv.fmly <- fmly == "regr+" || fmly == "class+" || fmly == "mix+" 
  ##--------------------------------------------------------------
  ##
  ## set the sample size
  ## set subratio (if not supplied)
  ## confirm subratio is appropriately set
  ##
  ##--------------------------------------------------------------
  n <- obj$n
  if (!bootstrap && is.null(subratio)) {
    subratio <- 1 / sqrt(n)
  }
  if (!bootstrap && (subratio < 0 || subratio > 1)) {
    stop("subratio must be between 0 and 1:", subratio)
  }
  ##--------------------------------------------------------------
  ##
  ## extract random forest parmaters
  ##
  ##--------------------------------------------------------------
  ## list of parameters to be obtained from the grow object
  grow.prms <- c("call",
                 "ntree",
                 "mtry",
                 "nodesize",
                 "nodedepth",
                 "splitrule",
                 "nsplit",
                 "xvar.wt",
                 "split.wt",
                 "cause.wt",
                 "block.size")
   
  ## list of parameters to be obtained from the forest object
  forest.prms <- c("forest",
                   "bootstrap",
                   "sampsize",
                   "samptype",
                   "case.wt",
                   "perf.type",
                   "rfq")
  ## pull the parameters (grow first, then forest object)
  rf.prms <- c(obj[grow.prms], obj$forest[forest.prms])
  ## rename parameters to their correct random forest names
  rf.prms$call <- formula(rf.prms$call)
  names(rf.prms)[names(rf.prms) == "call"] <- "formula"
  names(rf.prms)[names(rf.prms) == "cause.wt"] <- "cause"
  ## subsamping cannot be applied when case weights are non-standard
  if (!all(diff(rf.prms$case.wt) == 0)) {
    stop("subsampling is not permitted on forests grown under non-standard case weights")
  }
  ## everthing is OK - nuke the case weights
  rf.prms$case.wt <- NULL
  ## need time.interest if this is a survival object
  if (fmly == "surv" || fmly == "surv-CR") {
    rf.prms$ntime <- obj$time.interest
  }
  ## nuke xvar and split weighting if they are standard
  if (all(diff(rf.prms$xvar.wt) == 0)) {
    rf.prms$xvar.wt <- NULL
  }
  if (all(diff(rf.prms$split.wt) == 0)) {
    rf.prms$split.wt <- NULL
  }
  ## nuke nodedepth if it's standard
  if (rf.prms$nodedepth <= 0) {
    rf.prms$nodedepth <- NULL
  }
  ## subsampling is not permitted if bootstrapping is not by root
  if (!bootstrap && rf.prms$bootstrap != "by.root") {
    stop("subsampling is not permitted on grow objects where sampling is not 'by.root'")
  }
  ## for subsampling we need to determine the subsampling tree sample size
  ## this is now handled by forest$sampsize which is a function
  ## for double bootstrapping we do NOT allow custom sampling for grow object
  if (bootstrap && rf.prms$bootstrap != "by.root") {
    stop("double bootstrapping is only permitted for breiman (bootstrap) forests")
  }
  else {
    ##everything is OK - set bootstrap parameter to NULL for later custom bootstrap
    ## leave $samptype and $sampsize in place to be used for making double bootstrap
    rf.prms$bootstrap <- NULL
  }
  ##--------------------------------------------------------------
  ##
  ## call the bootstrap subroutine if double bootstrap was requested
  ##
  ##--------------------------------------------------------------
  if (bootstrap) {
    bootO <- bootsample(obj, rf.prms, B = B, block.size = block.size, joint = joint, verbose = verbose)
    rO <- list(rf = bootO$rf, vmp = bootO$vmp, vmpB = bootO$vmpB, subratio = NULL)
    class(rO) <- c(class(obj), "bootsample")
    return(rO)
  }
  ##--------------------------------------------------------------
  ##
  ## extract data from the previously grown forest
  ## set the dimension, pull the feature names
  ##
  ##--------------------------------------------------------------
  dta <- data.frame(obj$yvar, obj$xvar)
  colnames(dta)[1:length(obj$yvar.names)] <- obj$yvar.names
  ##--------------------------------------------------------------
  ##
  ## call vimp to extract importance if not available in original object
  ##
  ##--------------------------------------------------------------
  vmp <- get.mv.vimp(obj, FALSE, FALSE)
  if (is.null(vmp)) {
    if (verbose) cat("no importance found: calculating it now ...\n")
    obj <- vimp(obj, perf.type = rf.prms$perf.type, block.size = block.size)
    vmp <- get.mv.vimp(obj, FALSE, FALSE)
    rf.prms$block.size <- block.size
    if (verbose) cat("done\n")
  }
  ##--------------------------------------------------------------
  ##
  ## call joint vimp - reference vimp value for pure noise settings
  ##
  ##--------------------------------------------------------------
  obj.joint <- NULL
  if (joint) {
    vmp.joint <- get.mv.vimp(get.joint.vimp(obj, rf.prms), FALSE, FALSE)
    vmp <- vimp.combine(vmp, vmp.joint)
  }
  ##----------------------------------------------------------
  ##
  ## subsampling loop for calculating VIMP confidence regions
  ##
  ##----------------------------------------------------------
  if (verbose) pb <- txtProgressBar(min = 0, max = B, style = 3)
  vmpS <- lapply(1:B, function(b) {
    ## progress bar
    if (verbose && B > 1) {
      setTxtProgressBar(pb, b)
    }
    if (verbose && b == B) {
      cat("\n")
    }
    ## draw the subsample
    ## use stratified sampling for classification/CR if requested (default)
    ## stratified sampling not allowed for mv-families
    if (!mv.fmly && stratify) {
      if (fmly == "class") {
        pt <- make.strat.sample(obj$yvar, subratio)
      }
      else if (fmly == "surv" || fmly == "surv-CR") {
        pt <- make.strat.sample(obj$yvar[, 2], subratio)
      }
      else {
        pt <- sample(1:n, size = (n * subratio), replace = FALSE)
      }
    }
    else {
      pt <- sample(1:n, size = (n * subratio), replace = FALSE)
    }
    ## calculate VMP on the stratified data
    rf.b <-  do.call("rfsrc",
                     c(list(data = dta[pt,, drop = FALSE], importance = TRUE), rf.prms))
    vmp.b <- get.mv.vimp(rf.b, FALSE, FALSE)
    if (joint) {
      vmp.b.joint <- get.mv.vimp(get.joint.vimp(rf.b, rf.prms), FALSE, FALSE)
      vmp.b <- vimp.combine(vmp.b, vmp.b.joint)
    }
    rO.b <- lapply(1:length(vmp), function(j) {
      vmp.j <- vmp[[j]]
      vmp.b.j <- vmp.b[[j]]
      vmp.mtx <- data.frame(matrix(NA, nrow(vmp.j), ncol(vmp.j)))
      rownames(vmp.mtx) <- rownames(vmp.j)
      colnames(vmp.mtx) <- colnames(vmp.j)
      if (ncol(vmp.b.j) == ncol(vmp.j)) {
        vmp.mtx[rownames(vmp.b.j), ] <- vmp.b.j
        vmp.b.j <- vmp.mtx
      }
      vmp.b.j
    })
    names(rO.b) <- names(vmp)
    rO.b
  })
  ##------------------------------------------------------------------
  ##
  ## return the goodies
  ##
  ##------------------------------------------------------------------
  rO <- list(rf = obj, vmp = vmp, vmpS = vmpS, subratio = subratio)
  class(rO) <- c(class(obj), "subsample")
  rO
}
subsample <- subsample.rfsrc
###################################################################
##
## extract confidence interval + other goodies from subsampled object
##
###################################################################
extract.subsample <- function(obj, alpha = .05, target = 0, m.target = NULL, standardize = TRUE)
{
  ## in case a user applies this to a bootsample object
  if (sum(c(grepl("rfsrc", class(obj))), grepl("subsample", class(obj))) == 2) {
    subsample <- TRUE
  }
  else if (sum(c(grepl("rfsrc", class(obj))), grepl("bootsample", class(obj))) == 2) {
    subsample <- FALSE
  }
  else {
    stop("this must be an object obtained by calling the 'subsample' function")
  }
  if (!subsample) {
    return(extract.bootsample(obj, alpha = alpha,
              target = target, m.target = m.target, standardize = standardize))    
  }
  ## coerce the (potentially) multivariate rf object
  m.target <- get.univariate.target(obj$rf, m.target)
  obj$rf <- coerce.multivariate(obj$rf, m.target)
  ## coerce the (potentially) multivariate original grow vimp
  if (is.null(m.target)) {
    vmp <- obj$vmp[[1]][, 1 + target, drop = FALSE]
  }
  else {
    vmp <- obj$vmp[[m.target]][, 1 + target, drop = FALSE]
  }
  ## coerce the (potentially) multivariate subsampled vimp 
  if (is.null(m.target)) {
    obj$vmpS <- lapply(obj$vmpS, data.frame)
  }
  else {
    obj$vmpS <- lapply(obj$vmpS, function(oo) {
      oo[[m.target]]
    })
  }  
  ## extract necessary objects
  theta.hat <- get.standardize.vimp(obj$rf, vmp, standardize)[, 1]
  theta.star <- get.standardize.vimp(obj$rf, rbind(sapply(obj$vmpS, "[[", 1 + target)), standardize)
  rownames(theta.star) <- xvar.names <- rownames(vmp)
  ## sample sizes
  n <- obj$rf$n
  m <- obj$subratio * n
  ## boxplot subsampling data
  scl <- sqrt(obj$subratio)
  theta.star.scl <- theta.hat - scl * rowMeans(theta.star, na.rm = TRUE)
  boxplot.dta <- data.frame(t(scl * theta.star + theta.star.scl))
  ## subsampled z-statistic 
  z.star <- sqrt(m) * (theta.star - theta.hat) 
  ## asymptotic standard error
  se.Z <- sqrt(m / n) * apply(theta.star, 1, sd, na.rm = TRUE)
  ## asymptotic JK standard error
  se.jk.Z <- sqrt((m / n) * rowMeans((theta.star - theta.hat)^2, na.rm = TRUE))
  ## nonparametric (default) subsampling confidence interval  
  ci <- do.call(cbind, lapply(1:nrow(z.star), function(rowj) {
    c(theta.hat[rowj] - quantile(z.star[rowj, ], 1 - alpha/2, na.rm = TRUE) / sqrt(n),
      theta.hat[rowj] - quantile(z.star[rowj, ], .75, na.rm = TRUE) / sqrt(n),
      theta.hat[rowj] - quantile(z.star[rowj, ], .50, na.rm = TRUE) / sqrt(n),
      theta.hat[rowj] - quantile(z.star[rowj, ], .25, na.rm = TRUE) / sqrt(n),
      theta.hat[rowj] - quantile(z.star[rowj, ], alpha/2, na.rm = TRUE) / sqrt(n))
  }))
  rownames(ci) <- paste(round(100 * c(alpha/2,.25,.50,.75,1-alpha/2), 1), "%", sep = "")
  colnames(ci) <- xvar.names
  ## normal-Z subsampling confidence interval
  ci.Z <- do.call(cbind, lapply(1:nrow(z.star), function(rowj) {
  c(theta.hat[rowj] - qnorm(1 - alpha/2) * se.Z[rowj],
    theta.hat[rowj] - qnorm(.75) * se.Z[rowj],
    theta.hat[rowj],
    theta.hat[rowj] + qnorm(.75) * se.Z[rowj],
    theta.hat[rowj] + qnorm(1 - alpha/2) * se.Z[rowj])
  }))
  rownames(ci.Z) <- paste(round(100 * c(alpha/2,.25,.50,.75,1-alpha/2), 1), "%", sep = "")
  colnames(ci.Z) <- xvar.names
  ## normal-Z subsampling confidence interval with JK
  ci.jk.Z <- do.call(cbind, lapply(1:nrow(z.star), function(rowj) {
  c(theta.hat[rowj] - qnorm(1 - alpha/2) * se.jk.Z[rowj],
    theta.hat[rowj] - qnorm(.75) * se.jk.Z[rowj],
    theta.hat[rowj],
    theta.hat[rowj] + qnorm(.75) * se.jk.Z[rowj],
    theta.hat[rowj] + qnorm(1 - alpha/2) * se.jk.Z[rowj])
  }))
  rownames(ci.jk.Z) <- paste(round(100 * c(alpha/2,.25,.50,.75,1-alpha/2), 1), "%", sep = "")
  colnames(ci.jk.Z) <- xvar.names
  ## nonparametric variable selection object
  var.sel <- data.frame(lower  =  ci[1, ],
                        median =  ci[3, ],
                        upper  =  ci[5, ],
                        pvalue = rowMeans((theta.star - sqrt(n / m) * theta.hat) > 0, na.rm = TRUE),
                        signif =  ci[1, ] > 0)
  rownames(var.sel) <- xvar.names
  ## normal-Z variable selection object
  var.sel.Z <- data.frame(lower  =  ci.Z[1, ],
                          mean   =  ci.Z[3, ],
                          upper  =  ci.Z[5, ],
                          pvalue = pnorm(theta.hat, 0, se.Z, FALSE),
                          signif =  ci.Z[1, ] > 0)
  rownames(var.sel.Z) <- xvar.names
  ## normal-Z variable selection object with JK
  var.jk.sel.Z <- data.frame(lower  =  ci.jk.Z[1, ],
                          mean   =  ci.jk.Z[3, ],
                          upper  =  ci.jk.Z[5, ],
                          pvalue = pnorm(theta.hat, 0, se.jk.Z, FALSE),
                          signif =  ci.jk.Z[1, ] > 0)
  rownames(var.jk.sel.Z) <- xvar.names
  list(boxplot.dta = boxplot.dta,
       ci = ci,
       ci.Z = ci.Z,
       ci.jk.Z = ci.jk.Z,
       vmp = theta.hat,
       vmpS = theta.star,
       se.Z = se.Z,
       se.jk.Z = se.jk.Z,
       var.sel = var.sel,
       var.sel.Z = var.sel.Z,
       var.jk.sel.Z = var.jk.sel.Z)
}
###################################################################
##
## get joint vimp from a forest object
##
###################################################################
get.joint.vimp <- function(o, rf.prms = NULL) {
  class(o)[2] <- "grow"  ##fake the class to trick vimp()
  if (is.null(rf.prms)) {
    return(vimp(o, joint = TRUE))
  }
  else {
    return(vimp(o, joint = TRUE, perf.type = rf.prms$perf.type, block.size = rf.prms$block.size))
  }
}
###################################################################
##
## get standarized vimp from a matrix (p x B) of subsampled vimp
##
###################################################################
get.standardize.vimp <- function(obj, vmp, standardize = FALSE) {
  ## nothing to do - standardization not rquested
  if (!standardize) {
    return(vmp)
  }
  else {
    do.call(cbind, lapply(1:ncol(vmp), function(j) {
      v <- vmp[, j]
      if (obj$family != "regr") {
        100 * v 
      }
      else {
        100 * v / var(obj$yvar, na.rm = TRUE)
      }
    }))
  }
}
###################################################################
##
## double bootstrap sampling
##
###################################################################
make.double.boot.sample <- function(ntree, n, smp, size = function(x) {x}, replace = TRUE)
{
  dbl.smp <- do.call(cbind, mclapply(1:ntree, function(bb) {
    inb <- rep(0, n)
    smp.2 <- sample(smp, size = size(n), replace = replace)
    frq <- tapply(smp.2, smp.2, length)
    idx <- as.numeric(names(frq))
    inb[idx] <- frq
    inb
  }))
  if (length(setdiff(1:n, smp)) > 0) {
    dbl.smp[-setdiff(1:n, smp),, drop = FALSE]
  }
  else {
    dbl.smp
  }
}
###################################################################
##
## stratified sampling without replacement 
##
###################################################################
make.strat.sample <- function(y, subratio) {
  frq <- table(y)
  class.labels <- names(frq)
  as.numeric(unlist(sapply(class.labels, function(cl) {
    pt <- which(y == cl)
    sample(pt, size = length(pt) * subratio, replace = FALSE)    
  })))
}
###################################################################
##
## print function for subsampled ci
##
###################################################################
print.subsample.rfsrc <- function(x, alpha = .05, standardize = TRUE) {
  vmp <- x$vmp
  nullO <- lapply(1:length(vmp), function(j) {
    m.target <- names(vmp)[j]
    p.vmp <- ncol(vmp[[j]])
    vmp.col.names <- colnames(vmp[[j]])
    if (length(vmp) > 1) {
      cat("processing a multivariate family, outcome:", m.target, "\n")
    }
    lapply(1:p.vmp, function(k) {
      oo <- extract.subsample(x, alpha = alpha, target = k - 1,
                  m.target = m.target, standardize = standardize)
      cat("===== VIMP confidence regions for", vmp.col.names[k], " =====\n")
      cat("nonparametric:\n")
      print(round(oo$ci, 3))
      cat("parametric:\n")
      print(round(oo$ci.Z, 3))
      cat("parametric (jackknife):\n")
      print(round(oo$ci.jk.Z, 3))
      NULL
    })
  })
}
print.subsample <- print.subsample.rfsrc
###################################################################
##
## combine two lists of vimp: o1 is master, o2 is appended
##
###################################################################
vimp.combine <- function(o1, o2) {
  rO <- lapply(1:length(o1), function(j) {
    rbind(o1[[j]], o2[[j]])
  })
  names(rO) <- names(o1)
  rO
}
###################################################################
##
## core bootstrap function
##
###################################################################
bootsample <- function(obj, rf.prms, B, block.size, joint, verbose) {
  ##--------------------------------------------------------------
  ##
  ## extract data from the previously grown forest
  ## set the dimensions, pull the feature names
  ##
  ##--------------------------------------------------------------
  dta <- data.frame(obj$yvar, obj$xvar)
  colnames(dta)[1:length(obj$yvar.names)] <- obj$yvar.names
  n <- nrow(obj$xvar)
  ##--------------------------------------------------------------
  ##
  ## call vimp to extract importance if not available in original object
  ##
  ##--------------------------------------------------------------
  vmp <- get.mv.vimp(obj, FALSE, FALSE)
  if (is.null(vmp)) {
    if (verbose) cat("no importance found: calculating it now ...\n")
    obj <- vimp(obj, perf.type = rf.prms$perf.type, block.size = block.size)
    vmp <- get.mv.vimp(obj, FALSE, FALSE)
    rf.prms$block.size <- block.size
    if (verbose) cat("done\n")
  }
  ##--------------------------------------------------------------
  ##
  ## call joint vimp - reference vimp value for pure noise settings
  ##
  ##--------------------------------------------------------------
  obj.joint <- NULL
  if (joint) {
    vmp.joint <- get.mv.vimp(get.joint.vimp(obj, rf.prms), FALSE, FALSE)
    vmp <- vimp.combine(vmp, vmp.joint)
  }
  ##----------------------------------------------------------
  ##
  ## bootstrap loop for calculating VIMP confidence regions
  ##
  ##----------------------------------------------------------
  if (verbose) pb <- txtProgressBar(min = 0, max = B, style = 3)
  vmpB <- lapply(1:B, function(b) {
    ## progress bar
    if (verbose && B > 1) {
      setTxtProgressBar(pb, b)
    }
    if (verbose && b == B) {
      cat("\n")
    }
    ##---------------------------------------------------
    ##
    ## double bootstrap 
    ##
    ##---------------------------------------------------
    ## double bootstrap sample
    smp <- sample(1:n, size = n, replace = TRUE)
    smp.unq <- sort(unique(smp))
    dbl.smp <- make.double.boot.sample(rf.prms$ntree, n, smp,
                             size = rf.prms$sampsize, replace = rf.prms$samptype == "swr")
    ## pull VIMP from the double boostrap forest
    rf.b <- do.call("rfsrc",
     c(list(data = dta[smp.unq,, drop = FALSE], importance = TRUE, bootstrap = "by.user", samp = dbl.smp), rf.prms))
    vmp.b <- get.mv.vimp(rf.b, FALSE, FALSE)
    if (joint) {
      vmp.b.joint <- get.mv.vimp(get.joint.vimp(rf.b, rf.prms), FALSE, FALSE)
      vmp.b <- vimp.combine(vmp.b, vmp.b.joint)
    }
    rO.b <- lapply(1:length(vmp), function(j) {
      vmp.j <- vmp[[j]]
      vmp.b.j <- vmp.b[[j]]
      vmp.mtx <- data.frame(matrix(NA, nrow(vmp.j), ncol(vmp.j)))
      rownames(vmp.mtx) <- rownames(vmp.j)
      colnames(vmp.mtx) <- colnames(vmp.j)
      if (ncol(vmp.b.j) == ncol(vmp.j)) {
        vmp.mtx[rownames(vmp.b.j), ] <- vmp.b.j
        vmp.b.j <- vmp.mtx
      }
      vmp.b.j
    })
    names(rO.b) <- names(vmp)
    rO.b
  })
  ## return the potentially updated forest object
  ## return the vimp 
  list(rf = obj, vmp = vmp, vmpB = vmpB)
}
###################################################################
##
## extract confidence interval + other goodies from bootstrap object
##
###################################################################
extract.bootsample <- function(obj, alpha = .05, target = 0, m.target = NULL, standardize = TRUE)
{
  ## confirm this is the right kind of object
  if (!sum(c(grepl("rfsrc", class(obj))), grepl("bootsample", class(obj))) == 2) {
    stop("this must be a double-bootstrap object obtained by calling the 'subsample' function")
  }
  ## coerce the (potentially) multivariate rf object
  m.target <- get.univariate.target(obj$rf, m.target)
  obj$rf <- coerce.multivariate(obj$rf, m.target)
  ## coerce the (potentially) multivariate vimp object
  if (is.null(m.target)) {
    obj$vmpB <- lapply(obj$vmpB, data.frame)
  }
  else {
    obj$vmpB <- lapply(obj$vmpB, function(oo) {
      oo[[m.target]]
    })
  }  
  ## extract vimp
  theta.boot <- get.standardize.vimp(obj$rf, rbind(sapply(obj$vmpB, "[[", 1 + target)), standardize)
  rownames(theta.boot) <- rownames(obj$vmp[[1]][, 1 + target, drop = FALSE])
  ## bootstrap VIMP
  vmp.boot <- rowMeans(theta.boot, na.rm = TRUE)
  ## bootstrap standard error
  se.boot <- apply(theta.boot, 1, sd, na.rm = TRUE)
  ## nonparametric bootstrap confidence interval
  ci.boot <- do.call(cbind, lapply(1:length(vmp.boot), function(rowj) {
    c(quantile(theta.boot[rowj,], alpha/2, na.rm = TRUE),
      quantile(theta.boot[rowj,], .25, na.rm = TRUE),
      vmp.boot[rowj],
      quantile(theta.boot[rowj,], .75, na.rm = TRUE),
      quantile(theta.boot[rowj,], 1 - alpha/2, na.rm = TRUE))
  }))
  rownames(ci.boot) <- paste(round(100 * c(alpha/2,.25,.50,.75,1-alpha/2), 1), "%", sep = "")
  colnames(ci.boot) <- names(vmp.boot)
  ## parametric bootstrap confidence interval
  ci.boot.Z <- do.call(cbind, lapply(1:length(vmp.boot), function(rowj) {
    c(vmp.boot[rowj] - qnorm(1 - alpha/2) * se.boot[rowj],
      vmp.boot[rowj] - qnorm(.75) * se.boot[rowj],
      vmp.boot[rowj],
      vmp.boot[rowj] + qnorm(.75) * se.boot[rowj],
      vmp.boot[rowj] + qnorm(1 - alpha/2) * se.boot[rowj])
  }))
  rownames(ci.boot.Z) <- paste(round(100 * c(alpha/2,.25,.50,.75,1-alpha/2), 1), "%", sep = "")
  colnames(ci.boot.Z) <- names(vmp.boot)
  ## bootstrap variable selection object
  var.sel.boot <- data.frame(lower  =  ci.boot[1, ],
                             mean   =  ci.boot[3, ],
                             upper  =  ci.boot[5, ],
                             signif =  ci.boot[1, ] > 0)
  rownames(var.sel.boot) <- names(vmp.boot)
  var.sel.boot.Z <- data.frame(lower  =  ci.boot.Z[1, ],
                               mean   =  ci.boot.Z[3, ],
                               upper  =  ci.boot.Z[5, ],
                               pvalue = pnorm(vmp.boot, 0, se.boot, FALSE),
                               signif =  ci.boot.Z[1, ] > 0)
  rownames(var.sel.boot.Z) <- names(vmp.boot)
  list(ci = ci.boot,
       ci.Z = ci.boot.Z,
       vmp <- vmp.boot,
       vmpS = theta.boot,
       se = se.boot,
       var.sel = var.sel.boot,
       var.sel.Z = var.sel.boot.Z)
}
###################################################################
##
## print function from bootstrap ci
##
###################################################################
print.bootsample.rfsrc <- function(x, alpha = .05, standardize = TRUE) {
  vmp <- x$vmp
  nullO <- lapply(1:length(vmp), function(j) {
    m.target <- names(vmp)[j]
    p.vmp <- ncol(vmp[[j]])
    vmp.col.names <- colnames(vmp[[j]])
    if (length(vmp) > 1) {
      cat("processing a multivariate family, outcome:", m.target, "\n")
    }
    lapply(1:p.vmp, function(k) {
      oo <- extract.bootsample(x, alpha = alpha, target = k - 1,
                   m.target = m.target, standardize = standardize)
      cat("===== VIMP confidence regions for", vmp.col.names[k], " =====\n")
      cat("nonparametric:\n")
      print(round(oo$ci, 3))
      cat("parametric:\n")
      print(round(oo$ci.Z, 3))
      NULL
    })
  })
}
print.bootsample <- print.bootsample.rfsrc
Any scripts or data that you put into this service are public.
RFpredInterval documentation built on March 7, 2023, 7:17 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
RFpredInterval
Prediction Intervals with Random Forests and Boosted Forests

R/subsample.rfsrc.R
In RFpredInterval: Prediction Intervals with Random Forests and Boosted Forests

Defines functions print.bootsample.rfsrc extract.bootsample bootsample vimp.combine print.subsample.rfsrc make.strat.sample make.double.boot.sample get.standardize.vimp get.joint.vimp extract.subsample subsample.rfsrc

Try the RFpredInterval package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RFpredInterval Prediction Intervals with Random Forests and Boosted Forests

R/subsample.rfsrc.R In RFpredInterval: Prediction Intervals with Random Forests and Boosted Forests

Defines functions print.bootsample.rfsrc extract.bootsample bootsample vimp.combine print.subsample.rfsrc make.strat.sample make.double.boot.sample get.standardize.vimp get.joint.vimp extract.subsample subsample.rfsrc

Try the RFpredInterval package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

RFpredInterval
Prediction Intervals with Random Forests and Boosted Forests

R/subsample.rfsrc.R
In RFpredInterval: Prediction Intervals with Random Forests and Boosted Forests