varSelRF: Variable Selection using Random Forests

Documented in plot.varSelRF plot.varSelRFBoot randomVarImpsRF randomVarImpsRFplot selProbPlot summary.varSelRFBoot varSelImpSpecRF varSelRF varSelRFBoot

#### watch out use of "..." inside Boot function

#####   in bootSimplifyRF maybe allow allBootRuns = NULL so that object is small.



varSelRF <- function(xdata, Class,
                     c.sd = 1,
                     mtryFactor = 1,
                     ntree = 5000,
                     ntreeIterat = 2000,                     
                     vars.drop.num = NULL,
                     vars.drop.frac = 0.2,
                     whole.range = TRUE,
                     recompute.var.imp = FALSE,
                     verbose = FALSE,
                     returnFirstForest = TRUE,
                     fitted.rf = NULL,
                     keep.forest = FALSE) {

    if(!is.factor(Class))
        stop("Class should be a factor")
    if( (is.null(vars.drop.num) & is.null(vars.drop.frac)) |
       (!is.null(vars.drop.num) & !is.null(vars.drop.frac)))
        stop("One (and only one) of vars.drop.frac and vars.drop.num must be NULL and the other set")
  
    max.num.steps <- dim(xdata)[2]
    num.subjects <- dim(xdata)[1]

    if(is.null(colnames(xdata)))
        colnames(xdata) <- paste("v", 1:dim(xdata)[2], sep ="")
    
    ##oversize the vectors; will prune later.
    n.vars <- vars <- OOB.rf <- OOB.sd <- rep(NA, max.num.steps)

    oobError <- function(rf) { ## should not be exported in the namespace.
        ## The out of bag error
        ooo <- rf$confusion[, -dim(rf$confusion)[2]]
        s.ooo <- sum(ooo)
        diag(ooo) <- 0
        sum(ooo)/s.ooo
    }

    
    if(!is.null(fitted.rf)) {
        if(ncol(fitted.rf$importance) < 2)
            stop("The fitted rf was not fitted with importance = TRUE")
        n.ntree <- fitted.rf$ntree
        mtry <- fitted.rf$mtry
        n.mtryFactor <- mtry/sqrt(ncol(xdata))
        if((n.ntree != ntree) | (n.mtryFactor != mtryFactor))
            warning("Using as ntree and mtry the parameters obtained from fitted.rf",
                    immediate.= TRUE)
        ntree <- n.ntree
        mtryFactor <- n.mtryFactor
        rm(n.ntree, n.mtryFactor)
        rf <- fitted.rf
    } else {
        mtry <- floor(sqrt(ncol(xdata)) * mtryFactor)
        rf <- randomForest(x = xdata, y = Class,
                           ntree = ntree, mtry = mtry,
                           importance = TRUE,
                           keep.forest = keep.forest)
    }
    
    if(returnFirstForest)
        FirstForest <- rf
    else
        FirstForest <- NULL
    m.iterated.ob.error <- m.initial.ob.error <- oobError(rf)
    sd.iterated.ob.error <- sd.initial.ob.error <-
        sqrt(m.iterated.ob.error * (1 - m.iterated.ob.error) *
             (1/num.subjects))

    if(verbose) {
        print(paste("Initial OOB error: mean = ",
                    round(m.initial.ob.error, 4),
                    "; sd = ", round(sd.initial.ob.error, 4), sep = ""))
    }

    importances <- importance(rf, type = 1, scale = FALSE)
    selected.vars <- order(importances, decreasing = TRUE)
    ordered.importances <- importances[selected.vars]
    
    initialImportances <- importances
    initialOrderedImportances <- ordered.importances
    
    j <- 1
    n.vars[j] <- dim(xdata)[2] 
    vars[j] <- paste(colnames(xdata), collapse = " + ")
    OOB.rf[j] <- m.iterated.ob.error
    OOB.sd[j] <- sd.iterated.ob.error

    var.simplify <- TRUE
    
    while(var.simplify) {
        if (verbose){
          print("gc inside loop of varSelRF")
          print(gc())
        } else {
          gc()
        }
        
        last.rf <- rf 
        last.vars <- selected.vars
        previous.m.error <- m.iterated.ob.error
        previous.sd.error <- sd.iterated.ob.error

        ## If this is left as only
        ## "if(length(selected.vars) <= 2) var.simplify <- FALSE"
        ## under the if((length(selected.vars) < 2) | (any(selected.vars < 1))),
        ## as it used to be, then we fit a 2 model var, which might be
        ## better or as good as others, but as we never re-enter,
        ## we cannot return it, even if we see it in the history.

        ## Alternatively, we cannot just convert
        ## "if((length(selected.vars) < 2) | (any(selected.vars < 1))"
        ## to the <= 2, as we then would re-enter many times because
        ## of the way selected.vars <- selected.vars[1:2] when
        ## num.vars < (vars.drop + 2)

        ## This way, we enter just to set last.rf, last.vars and
        ## we bail out
        
        if(length(selected.vars) <= 2) {
          var.simplify <- FALSE
          break
        }

        
        if(recompute.var.imp & (j > 1)) {
            importances <- importance(rf, type = 1, scale = FALSE)
            tmp.order <- order(importances, decreasing = TRUE)
            selected.vars <- selected.vars[tmp.order]
            ordered.importances <- importances[tmp.order]
        }
        
        num.vars <- length(selected.vars)
  
        if(is.null(vars.drop.num))
            vars.drop <- round(num.vars * vars.drop.frac)
        else vars.drop <- vars.drop.num
        
        if(num.vars >= (vars.drop + 2)) {
            ## prevent infinite looping when num.vars is, say, 3 and
            ## vars.drop.frac < 0.17
            ## We must drop a variable for sure, since > 2 and we
            ## are still simplifying
            ## Alternatively, use ceiling instead of round, above.
            if(vars.drop == 0) {
                vars.drop <- 1
                if( (num.vars - vars.drop) < 1)
                    stop("vars.drop = 0 and num.vars -vars.drop < 1!")
            }
            selected.vars <- selected.vars[1: (num.vars - vars.drop)]
            ordered.importances <- ordered.importances[1: (num.vars - vars.drop)]
        }
        else {
            selected.vars <- selected.vars[1:2]
            ordered.importances <- ordered.importances[1:2]
        }
        
        ## couldn't we eliminate the following?
        if((length(selected.vars) < 2) | (any(selected.vars < 1))) {
            var.simplify <- FALSE
            break
        }
        
        
        
        mtry <- floor(sqrt(length(selected.vars)) * mtryFactor)
        if(mtry > length(selected.vars)) mtry <- length(selected.vars)
        
        if(recompute.var.imp) 
            rf <- randomForest(x = xdata[, selected.vars],
                               y = Class, importance= TRUE,
                               ntree = ntree, mtry = mtry,
                               keep.forest = keep.forest)
        else
            rf <- randomForest(x = xdata[, selected.vars],
                               y = Class, importance= FALSE,
                               ntree = ntreeIterat, mtry = mtry,
                               keep.forest = keep.forest)
        
        m.iterated.ob.error <- oobError(rf)
        sd.iterated.ob.error <-
            sqrt(m.iterated.ob.error * (1 - m.iterated.ob.error) *
                 (1/num.subjects))
        
        if(verbose) {
            print(paste("..... iteration ", j, "; OOB error: mean = ",
                        round(m.iterated.ob.error, 4),
                        "; sd = ", round(sd.iterated.ob.error, 4),
                        "; num. vars = ", length(selected.vars), 
                        sep = ""))
        }
        j <- j + 1
        
        
        n.vars[j] <- length(selected.vars)
        vars[j] <- paste(colnames(xdata)[selected.vars],
                         collapse = " + ")
        OOB.rf[j] <- m.iterated.ob.error
        OOB.sd[j] <- sd.iterated.ob.error


        if(!whole.range &
           (
            (m.iterated.ob.error >
             (m.initial.ob.error + c.sd*sd.initial.ob.error))
            |
            (m.iterated.ob.error >
             (previous.m.error + c.sd*previous.sd.error)))
           )
            var.simplify <- FALSE
    }

    if (!whole.range) {
        if(!is.null(colnames(xdata)))
            selected.vars <- sort(colnames(xdata)[last.vars])
        else
            selected.vars <- last.vars

        out <- list(selec.history = data.frame(
                    Number.Variables = n.vars,
                    Vars.in.Forest = vars,
                    OOB = OOB.rf,
                    sd.OOB = OOB.sd)[1:j,],
                    rf.model = last.rf,
                    selected.vars = selected.vars,
                    selected.model =  paste(selected.vars, collapse = " + "),
                    best.model.nvars = length(selected.vars),
                    initialImportances = initialImportances,
                    initialOrderedImportances = initialOrderedImportances,
                    ntree = ntree,
                    ntreeIterat = ntreeIterat,
                    mtryFactor = mtryFactor,
#                    mtry = mtry,
                    firstForest = FirstForest)
        class(out) <- "varSelRF"
        return(out)
    }
    else {
      ## Prune the too long vectors created at begin.
      ## not needed above, because we select the 1:j rows
      ## of the return matrix selec.history.
        n.vars <- n.vars[1:j]
        vars <- vars[1:j]
        OOB.rf<- OOB.rf[1:j]
        OOB.sd <- OOB.sd[1:j]
        min.oob.ci <- min(OOB.rf) + c.sd * OOB.sd[which.min(OOB.rf)]
        best.pos <-
            which(OOB.rf <= min.oob.ci)[which.min(n.vars[which(OOB.rf <= min.oob.ci)])]

        selected.vars <- sort(unlist(strsplit(vars[best.pos],
                                              " + ", fixed = TRUE)))
        out <- list(selec.history = data.frame(
                    Number.Variables = n.vars,
                    Vars.in.Forest = vars,
                    OOB = OOB.rf,
                    sd.OOB = OOB.sd),
                    rf.model = NA,
                    selected.vars = selected.vars,
                    selected.model = paste(selected.vars, collapse = " + "),
                    best.model.nvars = n.vars[best.pos],
                    initialImportances = initialImportances,
                    initialOrderedImportances = initialOrderedImportances,
                    ntree = ntree,
                    ntreeIterat = ntreeIterat,
                    mtryFactor = mtryFactor,
                    ##mtry = mtry,
                    firstForest = FirstForest)
        class(out) <- "varSelRF"
        return(out)
    }
}


print.varSelRF <- function(x, ...) {
    cat("\nBackwards elimination on random forest; ")
  cat(paste("ntree = ", x$ntree,";  mtryFactor = ",
            x$mtryFactor, "\n"), sep ="")
  cat("\n Selected variables:\n")
  print(x$selected.vars)
  cat("\n Number of selected variables:", x$best.model.nvars, "\n\n")
}

#summary.varSelRF <- print.varSelRF

plot.varSelRF <- function(x, nvar = NULL, which = c(1, 2), ...) {
    if (length(which) == 2 && dev.interactive()) {
        op <- par(ask = TRUE, las = 1)
    } else {
        op <- par(las = 1)
    }
    
    on.exit(par(op))
    
    if(is.null(nvar))
        nvar <- min(30,
                    length(x$initialOrderedImportances))
    
    show <- c(FALSE, FALSE)
    show[which] <- TRUE

    if (show[1]){
      dotchart(rev(x$initialOrderedImportances[1:nvar]),
          main = "Initial importances",
          xlab = "Importances (unscaled)")
    }
    
    if (show[2]){
      ylim <- c(0, max(0.50, x$selec.history$OOB))
      plot(x$selec.history$Number.Variables,
          x$selec.history$OOB, type = "b",
          xlab = "Number of variables used",
          ylab = "OOB error", log = "x",
          ylim = ylim,
          ...)
      
      ##  if(max(x$selec.history$Number.Variables) > 300)
      ##      axis(1, at = c(1, 2, 3, 5, 8, 15, 25, 50, 75, 150, 200, 300),
      ##           labels = c(1, 2, 3, 5, 8, 15, 25, 50, 75, 150, 200, 300))
      
      lines(x$selec.history$Number.Variables,
          x$selec.history$OOB +
              2 * x$selec.history$sd.OOB, lty = 2)
      lines(x$selec.history$Number.Variables,
          x$selec.history$OOB -
              2 * x$selec.history$sd.OOB, lty = 2)
    }
}
  
## We could also write a varSelRFCV zz: move al TODO

varSelRFBoot <- function(xdata, Class,
                         c.sd = 1,
                         mtryFactor = 1,
                         ntree = 5000,
                         ntreeIterat = 2000,
                         vars.drop.frac = 0.2,
                         bootnumber = 200,
                         whole.range = TRUE,
                         recompute.var.imp = FALSE,
                         usingCluster = TRUE,
                         TheCluster = NULL,
                         srf = NULL,
                         verbose = TRUE,
                         ...) {
    
    ## beware there is a lot of data copying... pass by reference, or
    ## minimize copying or something.
    

    if(is.null(colnames(xdata)))
        colnames(xdata) <- paste("v", 1:dim(xdata)[2], sep ="")
    
    if(!is.null(srf)) { ## we are passing a simplified rf object.
        if(class(srf) != "varSelRF")
            stop("srf must be the results of a run of varSelRF")
        n.ntree <- srf$ntree
        n.ntreeIterat <- srf$ntreeIterat
        n.mtryFactor <- srf$mtryFactor
        if((n.ntree != ntree) | (n.mtryFactor != mtryFactor) |
           (n.ntreeIterat != ntreeIterat))
        warning("Using as ntree and mtryFactor the parameters obtained from srf",
                immediate.= TRUE)
        ntree <- n.ntree
        mtryFactor <- n.mtryFactor
        rm(n.ntree, n.mtryFactor)
        all.data.run <- srf
    } else { ## we are simplifying the random forest
        all.data.run <- varSelRF(Class = Class,
                             xdata = xdata,
                             c.sd = c.sd,
                             mtryFactor = mtryFactor,
                             ntree = ntree,
                             ntreeIterat = ntreeIterat,
                             vars.drop.frac = vars.drop.frac,
                             whole.range = whole.range,
                             recompute.var.imp = recompute.var.imp)
###                             ...) 
    }
    columns.data <- which(colnames(xdata) %in%
                          all.data.run$selected.vars)
    
    all.data.rf.mtry <- floor(mtryFactor * sqrt(length(columns.data)))
    if(all.data.rf.mtry > length(columns.data))
        all.data.rf.mtry <- length(columns.data)
    all.data.rf.predict <- randomForest(y = Class,
                                        x = xdata[, columns.data],
                                        ntree = all.data.run$ntree,
                                        mtry = all.data.rf.mtry,
                                        xtest = xdata[, columns.data],
                                        ytest = Class,
                                        keep.forest = FALSE)
    
    full.pred <- all.data.rf.predict$test$predicted
    
    all.data.selected.vars <- all.data.run$selected.vars
    ##    all.data.selected.model <- all.data.run$selected.model
    all.data.best.model.nvars <- all.data.run$best.model.nvars
    
    N <- length(Class)
    solution.sizes <- rep(NA, bootnumber)
    overlap.with.full <- rep(NA, bootnumber)
    vars.in.solutions <- vector()
##    solutions <- rep(NA, bootnumber)
    
    bootTrainTest <- function(dummy,
        xdataTheCluster,
        ClassTheCluster,
        c.sd,
        mtryFactor,
        ntree, ntreeIterat,
        whole.range, recompute.var.imp,
        ...) {
        N <- length(ClassTheCluster)
        sample.again <- TRUE
        while(sample.again) {
            bootsample <- unlist(tapply(1:N,  ClassTheCluster,
                                        function(x)
                                        sample(x, size = length(x),
                                               replace = TRUE)))
            ## sure, this isn't the fastest, but will do for now.
            nobootsample <- setdiff(1:N, bootsample)
            if(!length(nobootsample))
                sample.again <- TRUE
            else sample.again <- FALSE
        }
        ## this is an ugly hack to prevent nobootsamples
        ## of size 0.
        
        train.data <- xdataTheCluster[bootsample, , drop = FALSE]
        test.data <- xdataTheCluster[nobootsample, , drop = FALSE]
        train.class <- ClassTheCluster[bootsample]
        test.class <- ClassTheCluster[nobootsample]

        boot.run <- varSelRF(Class = train.class,
                             xdata = train.data,
                             c.sd = c.sd,
                             mtryFactor = mtryFactor,
                             ntree = ntree,
                             ntreeIterat = ntreeIterat,
                             whole.range = whole.range,
                             recompute.var.imp = recompute.var.imp,
                             vars.drop.frac = vars.drop.frac)
###                             ...)
        output.cl <- list()
        output.cl$best.model.nvars <- boot.run$best.model.nvars
        output.cl$selected.model <- boot.run$selected.model
        output.cl$selected.vars <- boot.run$selected.vars
        output.cl$nobootsample <- nobootsample
        output.cl$bootsample <- bootsample
        output.cl$initialImportances <- boot.run$initialImportances
        output.cl$initialOrderedImportances <- boot.run$initialOrderedImportances
        output.cl$selec.history <- boot.run$selec.history

        boot.col.data <- which(colnames(xdataTheCluster) %in%
                               boot.run$selected.vars)
        run.test.mtry <- floor(mtryFactor * sqrt(length(boot.col.data)))
        if(run.test.mtry > length(boot.col.data))
            run.test.mtry <- length(boot.col.data)

        boot.run.test <- randomForest(y  = train.class,
                                      x = train.data[, boot.col.data,
                                      drop = FALSE],
                                       ntree = boot.run$ntree,
                                      mtry = run.test.mtry,
                                      keep.forest = FALSE,
                                      xtest = test.data[, boot.col.data,
                                      drop = FALSE])$test
##                                      ytest = test.class)$test
        output.cl$class.pred.array <- boot.run.test$predicted
        output.cl$prob.pred.array <- boot.run.test$votes
        rm(boot.run.test)
        gc()
        return(output.cl)
    } ## bootTrainTest; this function is defined to be used in the cluster.
    ## we use it too if not in the cluster. There is a bit too much
    ## copying, because we define a pair of new objects
    ## xdataTheCluster and ClassTheCluster that need not be defined
    ## if we just copied the code. But that is ugly and hard to follow,
    ## and the non-cluster is slow for other reasons.
    
    if(usingCluster) {
        if (verbose){
          print("gc inside varSelRFBoot papply")
          print(gc())
        } else {
          gc()
        }
        
        cat("\n      Running bootstrap iterations using cluster (can take a while)\n")
        boot.runs <- clusterApplyLB(TheCluster,
                                    1:bootnumber,
                                    bootTrainTest,
                                    xdataTheCluster = xdata,
                                    ClassTheCluster = Class,
                                    c.sd = c.sd,
                                    mtryFactor = mtryFactor,
                                    ntree = ntree,
                                    ntreeIterat = ntreeIterat,
                                    whole.range = whole.range,
                                    recompute.var.imp = recompute.var.imp)
        ## clean up before leaving
        clusterEvalQ(TheCluster,
                     rm(list = c("xdataTheCluster", "ClassTheCluster")))

    } else { ## Not using Cluster
        boot.runs <- list()
        xdataTheCluster <- xdata
        ClassTheCluster <- Class
        cat("\n      Running bootstrap iterations")
        for(nboot in 1:bootnumber) {
            cat(".")
            boot.runs[[nboot]] <-
                bootTrainTest(nboot,
                              xdataTheCluster = xdata,
                              ClassTheCluster = Class,
                              c.sd = c.sd,
                              mtryFactor = mtryFactor,
                              ntree = ntree,
                              ntreeIterat = ntreeIterat,
                              whole.range = whole.range,
                              recompute.var.imp = recompute.var.imp)
        }
        cat("\n")
    }
        

    solutions <- unlist(lapply(boot.runs, function(z) {
        paste(sort(z$selected.vars), collapse = " + ")}))
    vars.in.solutions <- unlist(lapply(boot.runs,
                                       function(z) z$selected.vars))
    solution.sizes <- unlist(lapply(boot.runs,
                                    function(z) z$best.model.nvars))
    overlap.with.full <-
        unlist(lapply(boot.runs,
                      function(x) {
                          length(intersect(x$selected.vars,
                                           all.data.selected.vars))/
                                               sqrt(all.data.best.model.nvars *
                                                    x$best.model.nvars)})) 

    prob.pred.array <-
        array(NA, dim = c(N,  nlevels(Class), bootnumber),
              dimnames = list(1:N, levels(Class),
              paste("BootstrapReplication.", 1:bootnumber, sep = "")))

    ## to store class predictions as data frame
    class.pred.array <- data.frame(Class)
    
    for(nb in 1:bootnumber) {
        nobootsample <- boot.runs[[nb]]$nobootsample
        class.pred.array[[nb]] <- factor(NA, levels = levels(Class))
        class.pred.array[nobootsample, nb] <-
            boot.runs[[nb]]$class.pred.array
        prob.pred.array[nobootsample, , nb] <-
            boot.runs[[nb]]$prob.pred.array
    }
    names(class.pred.array) <-
        paste("BootstrapReplication.", 1:bootnumber, sep = "")

            
    ############## The .632+ estimate of prediction error ##################
    ##      one:         \hat{Err}^{(1)} in Efron & Tibshirani, 1997, p. 550,
    ##                   or leave-one-out bootstrap error.  
    ##      resubst:     \bar{err}, the apparent error rate, resubstitution rate,
    ##                   or "in sample" error rate.
    ##      full.pred:   the "in sample" prediction from full model; only used
    ##                   to obtain gamma.
    ##      gamma:       gamma in p. 552
    ##      r:           R in p. 552 (bounding in [0, 1]).
    ##      err632:      the .632 error
    ##      errprime:    the one prime; \hat{Err}^{(1)}' 
    ##      err:         the .632+ error
    
    ## This is how I find one
    
    one <- mean(apply(cbind(class.pred.array, Class), 1,
                      function(x) {mean(x[-(bootnumber + 1)] != x[bootnumber + 1],
                                        na.rm = TRUE)}), na.rm = TRUE)
    ## this is equivalent to what Torsten Hothorn does in ipred
            
    resubst <- mean(full.pred != Class)
    ## The following code I take directly from the function
    ## bootest.factor, by Torsten Hothorn, in package ipred.
    err632 <- 0.368 * resubst + 0.632 * one
    gamma <-
        sum(outer(as.numeric(Class),
                  as.numeric(full.pred),
                  function(x, y) ifelse(x == y, 0, 1)))/(length(Class)^2)
    
    r <- (one - resubst)/(gamma - resubst)
    r <- ifelse(one > resubst & gamma > resubst, r, 0)
    if((r > 1) | (r < 0)) { ## just debugging; remove later?
        print(paste("r outside of 0, 1 bounds: one", one,
                    "resubst", resubst, "gamma", gamma))
        if(r > 1) {
            r <- 1
            print("setting r to 1")
        }
        else if(r < 0) {
            r <- 0
            print("setting r to 0")
        }
    }
    
    errprime <- min(one, gamma)
    err <- err632 + (errprime - resubst) *
        (0.368 * 0.632 * r)/(1 - 0.368 * r)
    cat("\n     .632+ prediction error ", round(err, 4), "\n")

    out <- list(number.of.bootsamples = bootnumber, 
                bootstrap.pred.error = err,
                resubstitution.error = resubst,
                leave.one.out.bootstrap.error = one,
                all.data.randomForest = all.data.rf.predict,
                all.data.vars = all.data.selected.vars,
                all.data.run = all.data.run,
                class.predictions = class.pred.array,
                prob.predictions = prob.pred.array,
                number.of.vars = solution.sizes,
                overlap = overlap.with.full,
                all.vars.in.solutions = vars.in.solutions,
                all.solutions = solutions,
                class = Class,
                allBootRuns = boot.runs)
    class(out) <- "varSelRFBoot"
    return(out)
}


print.varSelRFBoot <- function(x, ...) {
    cat("\n\n Variable selection with random forest \n")
    cat(" ------------------------------\n")
##    cat("\n\n randomForest summary \n")
##    print(object$all.data.randomForest)
    cat("\n Variables used \n")
    print(x$all.data.vars)
    cat("\n \n Number of variables used: ", length(x$all.data.vars),
        "\n")
    
    cat("\n\n Bootstrap results\n")
    cat(" ------------------\n")
    cat("\n Bootstrap (.632+) estimate of prediction error: \n",
        " (using", x$number.of.bootsamples, "bootstrap iterations): \n",
        x$bootstrap.pred.error)
    cat("\n\n Number of vars in bootstrapped forests:        \n")
    print(summary(x$number.of.vars))
}


## zz: pass gene names and subject names??
## look at examples from ~/Proyectos/Signatures/Symposum/boot.pamr.knn.dlda.R
## o similar 
## Or not, because I think I am ussing column names
## but look at that code anyway for improvements


#### in summaryBoot: a plot of error rate vs. 
#### number of variables
#### requires saving el "all.data.run" en el Boot, which should ALWAYS be done!!


summary.varSelRFBoot <- function(object,
                                 return.model.freqs = FALSE,
                                 return.class.probs = TRUE,
                                 return.var.freqs.b.models = TRUE,
                                 ...) {
    cat("\n\n Variable selection using all data \n")
    cat(" ------------------------------\n")
###    cat("\n\n randomForest summary \n")
###    print(object$all.data.randomForest)
    cat("\n \n variables used \n")
    print(object$all.data.vars)
    cat("\n \n Number of variables used: ", length(object$all.data.vars),
        "\n")
    cat("\n\n Bootstrap results\n")
    cat(" ------------------\n")
    cat("\n\n Bootstrap (.632+) estimate of prediction error:",
        object$bootstrap.pred.error, " (using",
        object$number.of.bootsamples, "bootstrap iterations).\n")
    cat("\n\n Resubstitution error:                          ",
        object$resubstitution.error, "\n")
    cat("\n\n Leave-one-out bootstrap error:                 ",
        object$leave.one.out.bootstrap.error, "\n")
    nk <- as.vector(table(object$class))
    cat("\n\n Error rate at random:                          ",
        1 - (max(nk)/sum(nk)), "\n")
    cat("\n\n Number of vars in bootstrapped forests:        \n")
    print(summary(object$number.of.vars))
    cat
    cat("\n")
    cat("\n Overlapp of bootstrapped forests with forest from all data\n")
    print(summary(object$overlap))
    if(return.var.freqs.b.models) {
        cat("\n\n Variable freqs. in bootstrapped models \n")
        print(sort(table(object$all.vars.in.solutions),
                   decreasing = TRUE)/object$number.of.bootsamples)
    }
    in.all.data <-
        which(names(table(object$all.vars.in.solutions)) %in% object$all.data.vars)
    cat("\n\n Variable freqs. of variables in forest from all data, and summary \n")
    print(table(object$all.vars.in.solutions)[in.all.data]/object$number.of.bootsamples)
    cat("\n")
    print(summary(table(object$all.vars.in.solutions)[in.all.data]/object$number.of.bootsamples))


    if(return.model.freqs) {
        tmp.table <- sort(table(object$all.solutions),
                          decreasing = TRUE)/object$number.of.bootsamples
        n.tmp.table <- names(tmp.table)
        dim(tmp.table) <- c(dim(tmp.table), 1)
        rownames(tmp.table) <- n.tmp.table
        colnames(tmp.table) <- "Freq."
        cat("\n\n Solutions frequencies in bootstrapped models \n")
        print(tmp.table)
    }
    
    if(return.class.probs)
        cat("\n\n Mean class membership probabilities from out of bag samples\n")
    if(return.class.probs) {
        mean.class.probs <- apply(object$prob.predictions, c(1, 2),
                                  function(x) mean(x, na.rm = TRUE))
        colnames(mean.class.probs) <- levels(object$class)
    }
    if(return.class.probs) return(mean.class.probs)
}



plot.varSelRFBoot <- function(x,
                              oobProb = TRUE,
                              oobProbBoxPlot = FALSE,
                              ErrorNum = TRUE,
                              subject.names = NULL,
                              class.to.plot = NULL,
                              ...) {
    
    if(oobProb | oobProbBoxPlot) {
        mean.class.probs <- apply(x$prob.predictions, c(1, 2),
                                  function(x) mean(x, na.rm = TRUE))
        colnames(mean.class.probs) <- levels(x$class)
        rainbow.col <- rainbow(nlevels(x$class))
	if(dev.interactive()) {
              op <- par(ask = TRUE, las = 1)
    	} else {
               op <- par(las = 1)
    	}
        on.exit(par(op))
####        for(i in 1:ncol(mean.class.probs)) {
        if(is.null(class.to.plot))
            class.to.plot <- 1:ncol(mean.class.probs)
        for(i in class.to.plot) {
            if(oobProbBoxPlot)  {
                boxplot(data.frame(t(x$prob.predictions[ , i, ])),
                        xlab = "Samples",
                        ylab = "Out of bag probability of membership",
                        main = paste("Class", colnames(mean.class.probs)[i]),
                        type = "p",
                        col = rainbow.col[as.numeric(x$class)],
                        pch = 19, ylim = c(0, 1.3),
                        axes = FALSE)
            } else {
###                dotchart(mean.class.probs[, i],
###                         xlab = "Samples",
###                         ylab = "(Average) Out of bag probability of membership",
###                         main = paste("Class", colnames(mean.class.probs)[i]),
###                         type = "p",
###                         col = rainbow.col[as.numeric(x$class)],
###                         pch = 19, ylim = c(0, 1.2),
###                         axes = FALSE)
                plot(mean.class.probs[, i],
                     xlab = "Samples",
                     ylab = "(Average) Out of bag probability of membership",
                     main = paste("Class", colnames(mean.class.probs)[i]),
                     type = "p",
                     col = rainbow.col[as.numeric(x$class)],
                     pch = 19, ylim = c(0, 1.3),
                     axes = FALSE)
            }
            if(!is.null(subject.names))
                text(mean.class.probs[ , i],
                     labels = subject.names, pos = 2, cex = 0.8)
            
            box()
            ##axis(1)
            par(las = 2)
            axis(1, at = seq(1:dim(mean.class.probs)[1]),
                 labels = seq(1:dim(mean.class.probs)[1]))
            segments(x0 = seq(1:dim(mean.class.probs)[1]),
                     y0 = 0,
                     x1 = seq(1:dim(mean.class.probs)[1]),
                     y1 = 1,
                     lty = 2,
                     col = "grey")
            axis(2, at = seq(from = 0, to = 1, by = 0.2))
            legend(y = c(1.29, 1.15), x = c(1, dim(mean.class.probs)[1]),
                   legend = paste("Class", levels(x$class)),
                   col = rainbow.col,
                   pch = 19, bty = "n" )
            abline( h = 1.05, lty = 1)
            abline(h = seq(from = 0, to = 1,
                   length = 1 + nlevels(x$class))[-c(1, nlevels(x$class) + 1)], 
                   lty = 2)
        }
    }
    if(ErrorNum) {
        par(las = 2)
        all.data.errors <- x$all.data.run$selec.history$OOB
        ngenes <- x$all.data.run$selec.history$Number.Variables
        maxplot <-
            max(
                c(unlist(lapply(x$allBootRuns,
                                function(x) max(x$selec.history$OOB))),
                  all.data.errors))
        minplot <-
            min(
                c(unlist(lapply(x$allBootRuns,
                                function(x) min(x$selec.history$OOB))),
                  all.data.errors))
        minplot <- minplot * (1 - 0.1)
        maxplot <- maxplot * (1 + 0.2)
        plot(ngenes, all.data.errors, 
             type = "l", axes = TRUE, xlab = "Number of variables",
             ylab = "OOB Error rate", ylim = c(minplot, maxplot),
             lty = 1,
             log = "x",
             col = "red", lwd = 2,
             main = "OOB Error rate vs. Number of variables in predictor",
             xlim = c(2, max(ngenes)*1.1))
###             plot(num.points.plot:1,
###                  x$allBootRuns[[1]]$other$trained.pam.cv$error,
###                  type = "l", axes = FALSE, xlab = "Number of genes",
###                  ylab = "CV Error rate", ylim = c(minplot, maxplot), lty = 2,
###                  main = "CV Error rate vs. Number of genes in predictor.")
        legend(x = 10, y = maxplot,
               legend = c("Bootstrap samples", "Original sample"),
               lty = c(2, 1), lwd = c(1, 3), col = c("Black", "Red"),
               bty = "n")
##        box()
##        axis(2)
##        axis(1)
        
        if(max(ngenes) > 300)
            axis(1, at = c(2, 3, 5, 8, 15, 20, 25, 35,
                    50, 75, 150, 200, 300),
                 labels = c(2, 3, 5, 8, 15, 10, 25, 35,
                 50, 75, 150, 200, 300))

        for(nb in 1:x$number.of.bootsamples)
            lines(x$allBootRuns[[nb]]$selec.history$Number.Variables,
                  x$allBootRuns[[nb]]$selec.history$OOB, lty = 2)
        lines(ngenes, all.data.errors, col = "red", lwd = 4)
    }
}



randomVarImpsRF <- function(xdata, Class, forest, numrandom = 100,
                            whichImp = "impsUnscaled",
                            usingCluster = TRUE,
                            TheCluster = NULL,
                            ...) {

  if(!all(whichImp %in% c("impsScaled", "impsUnscaled", "impsGini")))
      stop("whichImp contains a non-valid option; should be one or more \n",
           "of impsScaled, impsUnscaled, impsGini")

  
  ontree <- forest$ntree
  omtry <- forest$mtry
  
  nodesize <- 1
  
  if(usingCluster) {
      iRF2.cluster <- function(dummy, xdataTheCluster, ClassTheCluster,
                               ontree, omtry, nodesize, ...) {
        rf <- randomForest(x = xdataTheCluster,
                           y = sample(ClassTheCluster),
                           ntree = ontree,
                           mtry = omtry,
                           nodesize = nodesize,
                           importance = TRUE, keep.forest = FALSE,
                           ...)
        ## If we specify the importance measure, only that var is evaluated.
        ## if we say "ALL", all three.
        impsScaled <- NULL
        impsUnscaled <- NULL
        impsGini <- NULL
        if("impsUnscaled" %in% whichImp) 
          impsUnscaled <- importance(rf, type = 1, scale = FALSE)
        if("impsScaled" %in% whichImp)
          impsScaled <- importance(rf, type = 1, scale = TRUE)
        if("impsGini" %in% whichImp)
            impsGini <- importance(rf, type = 2, scale = TRUE)
          ## impsGini <- rf$importance[, ncol(rf$importance)]
        
        return(list(impsScaled,
                    impsUnscaled,
                    impsGini))
    }
      
      outCl <- clusterApplyLB(TheCluster,
                              1:numrandom,
                              iRF2.cluster,
                              xdataTheCluster = xdata,
                              ClassTheCluster = Class,
                              ontree = ontree,
                              omtry = omtry,
                              nodesize = nodesize)


      
  } else {
      outCl <- list()
      cat("\n Obtaining random importances ")
      for(nriter in 1:numrandom) {
          cat(".")
        rf <- randomForest(x = xdata,
                           y = sample(Class),
                           ntree = ontree,
                           mtry = omtry,
                           nodesize = nodesize,
                           importance = TRUE,
                           keep.forest = FALSE,
                           ...)
        impsScaled <- NULL
        impsUnscaled <- NULL
        impsGini <- NULL
          if("impsUnscaled" %in% whichImp) 
              impsUnscaled <- importance(rf, type = 1, scale = FALSE)
          if("impsScaled" %in% whichImp)
              impsScaled <- importance(rf, type = 1, scale = TRUE)
          if("impsGini" %in% whichImp)
              impsGini <- importance(rf, type = 2, scale = TRUE)
          ## impsGini <- rf$importance[, ncol(rf$importance)]
        outCl[[nriter]] <- list(impsScaled,
                                impsUnscaled,
                                impsGini)
      }
      cat("\n")
    } ##</else>
    
  randomVarImps <- list()
  
  if("impsScaled" %in% whichImp) {
    randomVarImps$impsScaled <-
        matrix(unlist(lapply(outCl, function(x) x[[1]])),
               ncol = length(outCl))
    colnames(randomVarImps$impsScaled) <- 1:numrandom
    rownames(randomVarImps$impsScaled) <- rownames(forest$importance)
  }
  if("impsUnscaled" %in% whichImp){
    randomVarImps$impsUnscaled <-
        matrix(unlist(lapply(outCl, function(x) x[[2]])),
               ncol = length(outCl))
    colnames(randomVarImps$impsUnscaled) <- 1:numrandom
    rownames(randomVarImps$impsUnscaled) <- rownames(forest$importance)
  }
  if("impsGini" %in% whichImp) {
    randomVarImps$impsGini <-
        matrix(unlist(lapply(outCl, function(x) x[[3]])),
               ncol = length(outCl))
    colnames(randomVarImps$impsGini) <- 1:numrandom
    rownames(randomVarImps$impsGini) <- rownames(forest$importance)
  }
  
  class(randomVarImps) <- c(class(randomVarImps),
                            "randomVarImpsRF")
  return(randomVarImps)
  ## Hopefully, we are returning a list with only the components selected.zz
}

randomVarImpsRFplot <- function(randomImportances,
                                forest,
                                whichImp = "impsUnscaled",
                                nvars = NULL,
                                show.var.names = FALSE,
                                vars.highlight = NULL,
                                main = NULL,
                                screeRandom = TRUE,
                                lwdBlack = 1.5,
                                lwdRed = 2,
                                lwdLightblue = 1,
                                cexPoint = 1,
                                overlayTrue = FALSE,
                                xlab = NULL,
                                ylab = NULL,
                                ...) {
    
    if(ncol(forest$importance) < 2)
        stop("The fitted rf", deparse(substitute(forest)),
             "was not fitted with importance = TRUE")
    
    randomImportances <-
        switch(whichImp,
               "impsUnscaled" = randomImportances$impsUnscaled,
               "impsScaled" = randomImportances$impsScaled,
               "impsGini" = randomImportances$impsGini,
               )

    originalForestImportance <-
        switch(whichImp,
               "impsUnscaled" =
                   importance(forest, type = 1, scale = FALSE),
               "impsScaled" =
                   importance(forest, type = 1, scale = TRUE),
               "impsGini" =
                   importance(forest, type = 2, scale = TRUE)
               ## forest$importance[, ncol(forest$importance)]
               )
    if(is.null(originalForestImportance))
        stop("\n Not valid 'whichImp' \n")
    
    if(is.null(xlab)) xlab <- "(Ordered) Variable"
    if(is.null(ylab)) ylab <-
        switch(whichImp,
               "impsUnscaled" = "Importance (unscaled)",
               "impsScaled"   = "Importance (scaled)",
               "impsGini"     = "Importance (Gini)",
               )
               
    nvars <- min(nvars, dim(randomImportances)[1])
    ylim <- range(originalForestImportance, randomImportances)    
    plottingOrder <- order(originalForestImportance,
                           decreasing = TRUE)[1:nvars]

    ## This is really (something changed in randomForest?) a matrix.
    ## Explictily make it a vector
    ## check first
    if(ncol(originalForestImportance) != 1)
        stop("originalForestImportance ncol != 1")
    orderedOriginalImps <- originalForestImportance[plottingOrder, 1,
                                                    drop = TRUE]
    
    plot(orderedOriginalImps, type = "n", axes = FALSE, 
         xlab = xlab, ylab = ylab,
         main = main, ylim = ylim,
         ...)
    abline(h = 0, lty = 2, col = "blue")
    axis(2)
    box()
    if(show.var.names) {
        axis(1, labels = names(orderedOriginalImps),
             at = 1:length(orderedOriginalImps))
    } else {
        axis(1)
    }
    
    if(!overlayTrue)
         ###points(x = 1:nvars, orderedOriginalImps, lwd = lwdBlack,
           ###    col = "black", type = "b", cex = cexPoint)
        lines(x = 1:nvars, orderedOriginalImps, lwd = lwdBlack,
              col = "black", type = "b", cex = cexPoint)

    
    if(!is.null(vars.highlight)) {
        if(length(vars.highlight) > nvars) {
            warning("Not all vars. to highlight will be shown; increase nvars\n")
            cat("\n Not all vars. to highlight will be shown; increase nvars\n")
        }
        pos.selected <- which(names(orderedOriginalImps) %in% vars.highlight)
        if(!length(pos.selected)){
            warning("No selected vars. among those to show\n")
            cat("\nNo selected vars. among those to show\n")
        }
        else segments(pos.selected, 0,
                      pos.selected, orderedOriginalImps[pos.selected],
                      col = "blue", lwd = 2)
        if(length(pos.selected) < length(vars.highlight)) {
            warning("Not shown ",
                    length(vars.highlight) - length(pos.selected),
                    " of the 'to-highlight' variables\n")
            cat("Not shown ", length(vars.highlight) - length(pos.selected),
                    " of the 'to-highlight' variables\n")

        }
    }

    column.of.mean <- dim(randomImportances)[2] + 1
    if(screeRandom) {
        randomImportances <- apply(randomImportances, 2,
                                   sort, decreasing = TRUE)
        randomImportances <- randomImportances[1:nvars, ]
        randomImportances <- cbind(randomImportances,
                                   apply(randomImportances, 1, mean))
    } else {
        randomImportances <- randomImportances[plottingOrder, ]
        randomImportances <- cbind(randomImportances,
                                   apply(randomImportances, 1, mean))
    }

    matlines(randomImportances[, -column.of.mean],
             col = "lightblue", lwd = lwdLightblue, lty = 1)
    lines(x = 1:nvars,
          y = randomImportances[, column.of.mean],
          lwd = lwdRed, col = "red")

    if(overlayTrue) {
        points(x = 1:nvars, orderedOriginalImps, lwd = lwdBlack,
               col = "black", type = "b", cex = cexPoint)
        lines(x = 1:nvars, orderedOriginalImps, lwd = lwdBlack,
              col = "black", type = "l")
    }

}



varSelImpSpecRF <- function(forest,
                            xdata = NULL,
                            Class = NULL,
                            randomImps = NULL,
                            threshold = 0.10,
                            numrandom = 20,
                            whichImp = "impsUnscaled",
                            usingCluster = TRUE,
                            TheCluster = NULL,
                            ...) {

    if((is.null(xdata) | is.null(Class)) & is.null(randomImps))
        stop("You must specify a randomVarImpsRF object OR",
             "valid covariates and class objects.\n")

    if((!is.null(xdata) & !is.null(Class)) & !is.null(randomImps))
        warning("Using only the randomVarImpsRF object.",
             "Covariates and class objects discarded.\n", immediate. = TRUE)

    if(length(whichImp) > 1) stop("You can only use one importance measure")

    originalImps <- switch(whichImp,
                           "impsUnscaled" =
                               importance(forest, type = 1, scale = FALSE),
                           "impsScaled" =
                               importance(forest, type = 1, scale = TRUE),
                           "impsGini" =
                               importance(forest, type = 2, scale = TRUE)
                   ## forest$importance[, ncol(forest$importance)]
                   )
    if(is.null(originalImps))
        stop("\n Not valid 'whichImp' \n")

    originalImpsOrder <- order(originalImps,
                               decreasing = TRUE)

    if(is.null(randomImps)) {
        randomImps <-
            switch(whichImp,
                   "impsUnscaled" = randomVarImpsRF(xdata, Class, forest,
                   numrandom = numrandom,
                   whichImp = "impsUnscaled",
                   usingCluster = usingCluster,
                   TheCluster = TheCluster,
                   ...)$impsUnscaled, 
                   "impsUnscaled" = randomVarImpsRF(xdata, Class, forest,
                   numrandom = numrandom,
                   whichImp = "impsScaled",
                   usingCluster = usingCluster,
                   TheCluster = TheCluster,
                   ...)$impsScaled, 
                   "impsUnscaled" = randomVarImpsRF(xdata, Class, forest,
                   numrandom = numrandom,
                   whichImp = "impsGini",
                   usingCluster = usingCluster,
                   TheCluster = TheCluster,
                   ...)$impsGini, 
                   )               
    } else {
        elemento <- match(whichImp, names(randomImps))
        if(is.na(elemento))
            stop("The requested importance was not calculated\n",
                 "for the randomImps", deparse(substitute(randomImps)),"\n",
                 "object.\n")
        cat("\n Using the randomVarImpsRF", deparse(substitute(randomImps)),
            "object. xdata, Class, numrandom ignored.\n")
        randomImps <- randomImps[[elemento]]

    }
     
    randomImps <- apply(randomImps, 2,
                        sort, decreasing = TRUE)
    thresholds <- apply(randomImps, 1,
                        function(x) quantile(x, probs = 1 - threshold, type = 8))

    largest.value <- which(originalImps[originalImpsOrder] <= thresholds)[1]
    if(is.na(largest.value)) {
        selected.vars <- originalImpsOrder
        warning("All variables selected; could signal a problem")
    } else if(largest.value == 1) {
        selected.vars <- NA
        warning("No variables selected; could signal a problem")
    } else selected.vars <- originalImpsOrder[1:(largest.value - 1)]

    return(selected.vars)
}


selProbPlot <- function(object,
                        k = c(20, 100),
                        color = TRUE,
                        legend = FALSE,
                        xlegend = 68,
                        ylegend = 0.93,
                        cexlegend = 1.4,
                        main = NULL,
                        xlab = "Rank of gene",
                        ylab = "Selection probability",
                        pch = 19,
                        ...) {
    ## selection probability plots, such as in Pepe
    ## et al. 2003 (ROC paper).
    if(class(object) != "varSelRFBoot")
        stop("This function only works with objects created\n",
             "with the varSelRFBoot function.\n")
    nboot <- object$number.of.bootsamples
    if(nboot < 100)
        warning("You only used ", nboot,
                " bootstrap samples. Might be too few.",
                immediate. = TRUE)
    
    original.imps <- object$all.data.run$initialImportances
    original.ranks <- rank(-original.imps)
    boot.ranks <- lapply(object$allBootRuns,
                         function(x) {rank(-x$initialImportances)})
    boot.ranks <- matrix(unlist(boot.ranks), ncol = nboot)
    k1 <- apply(boot.ranks, 1, function(z) {sum(z <= k[1])/nboot})
    k2 <- apply(boot.ranks, 1, function(z) {sum(z <= k[2])/nboot})

    if(color) {
        plot(original.ranks[original.ranks <= k[2]],
             k2[original.ranks <= k[2]], xlim = c(1, k[2]),
             col = "red", pch = pch,
             xlab = xlab,
             ylab = ylab,
             main = main,
             ylim = c(0, 1),
             ...)
        points(original.ranks[original.ranks <= k[1]],
               k1[original.ranks <= k[1]],
               col = "blue", pch = pch)
        
        if(legend) legend(x = xlegend, y = ylegend,
                          legend = c(paste("Top", k[2]),
                          paste("Top", k[1])),
                          col = c("red", "blue"),
                          pch = pch, cex = cexlegend)
    } else {
        plot(original.ranks[original.ranks <= k[2]],
             k2[original.ranks <= k[2]], xlim = c(1, k[2]),
             pch = pch,
             xlab = "Rank of gene",
             ylab = "Selection probability",
             main = main, ylim = c(0, 1),
             ...)
        points(original.ranks[original.ranks <= k[1]],
               k1[original.ranks <= k[1]],
               pch = 21)
        if(legend) legend(x = xlegend, y = ylegend,
                          legend = c(paste("Top", k[2]),
                          paste("Top", k[1])),
                          pch = c(19, 21), cex = (cexlegend + 0.2))
    }
}





###################################################################
###################################################################
##########                                           ##############
##########       Custom code used in paper           ##############
##########       (unlikely to be of interest         ##############
##########       for anybody else)                   ##############
##########                                           ##############
###################################################################
###################################################################



    

figureSummary.varSelRFBoot <- function(object) {
## to create data for figures of paper with randomdata
    errorrate <- object$bootstrap.pred.error
    nvused <- length(object$all.data.vars)
    return(c(errorrate, nvused))
}


figureSummary2.varSelRFBoot <- function(object) {
## to create data for figures of paper with randomdata
    errorrate <- object$bootstrap.pred.error
    nvused <- length(object$all.data.vars)
    median.nvars <- median (object$number.of.vars)
    iq1.nvars <- quantile(object$number.of.vars, p = 0.25)
    iq3.nvars <- quantile(object$number.of.vars, p = 0.75)
    string1 <- paste(formatC(round(nvused, 0), width = 6)," (",
                     round(iq1.nvars, 0),", ",
                     round(median.nvars, 0), ", ",
                     round(iq3.nvars, 0),"),  ", 
                     formatC(round(errorrate, 3), width = 4)," \\\\ ",
                     sep = "")
    string1
}




tableSummary.varSelRFBoot <- function(object, name){
### to create data ready for the LaTeX tables of the paper

    neatname <- switch(name,
                        "gl.boot" = "Leukemia    ",
                        "vv.boot" = "Breast 2 cl.",
                        "vv3.boot" ="Breast 3 cl.",
                        "nci.boot" ="NCI 60      ",
                        "ra.boot" = "Adenocar.   ",
                     "brain.boot" = "Brain       ",
                     "colon.boot" = "Colon       ",
                  "lymphoma.boot" = "Lymphoma    ",
                  "prostate.boot" = "Prostate    ",
                     "srbct.boot" = "Srbct       ")
                        
    errorrate <- object$bootstrap.pred.error
    nvused <- length(object$all.data.vars)
    median.nvars <- median (object$number.of.vars)
    iq1.nvars <- quantile(object$number.of.vars, p = 0.25)
    iq3.nvars <- quantile(object$number.of.vars, p = 0.75)
    in.all.data <- which(names(table(object$all.vars.in.solutions)) %in% object$all.data.vars)
    tmp1 <- table(object$all.vars.in.solutions)[in.all.data]/object$number.of.bootsamples
    median.freq <- median(tmp1)
    if(nvused == 2) {
        iq1.freq <- min(tmp1)
        iq3.freq <- max(tmp1)
    } else {
        iq1.freq <- quantile(tmp1, p = 0.25)
        iq3.freq <- quantile(tmp1, p = 0.75)
    }

    if(nvused > 2) 
        string1 <-
            paste(neatname, "&     ",
                  formatC(round(errorrate, 3), width = 4)," &       ",
                  formatC(round(nvused, 0), width = 4)," &       ",
                  formatC(round(median.nvars, 0), width = 4)," (",
                  round(iq1.nvars, 0),", ",
                  round(iq3.nvars, 0),")   &   ",
                  formatC(round(median.freq, 2), width = 4)," (",
                  round(iq1.freq, 2),", ",
                  round(iq3.freq, 2),")\\\\",
                  sep = "") else   string1 <-
                      paste(neatname, "&     ",
                            formatC(round(errorrate, 3), width = 4)," &       ",
                            formatC(round(nvused, 0), width = 4)," &       ",
                            formatC(round(median.nvars, 0), width = 4)," (",
                            round(iq1.nvars, 0),", ",
                            round(iq3.nvars, 0),")   &   ",
                            formatC(round(median.freq, 2), width = 4)," (",
                            round(iq1.freq, 2),", ",
                            round(iq3.freq, 2),")\\footnotemark[1]\\\\",
                            sep = "")
    cat(string1, "\n")
}




###################################################################
###################################################################
##########                                           ##############
##########       Miscellaneous code                  ##############
##########                                           ##############
###################################################################
###################################################################



## rVI <- function(xdata, ydata, forest, numrandom = 40,
##                 whichImp = "impsUnscaled",
##                 ...) {
##     ## from randomVarImpsRF, but simplified to use only
##     ## one tytpe of importance
    
##     ontree <- forest$ntree
##     omtry <- forest$mtry

##     if(usingCluster) {
##         iRF.cluster <- function(dummy, ontree, omtry, ...) {
##             rf <- randomForest(x = xdataTheCluster,
##                                y = sample(ClassTheCluster),
##                                ntree = ontree,
##                                mtry = omtry,
##                                importance = TRUE, keep.forest = FALSE,
##                                ...)
##             imps <- switch(whichImp,
##                            "impsUnscaled" =
##                            importance(rf, type = 1, scale = FALSE),
##                            "impsScaled" =
##                            importance(rf, type = 1, scale = TRUE),
##                            "impsGini" =
##                            rf$importance[, ncol(rf$importance)]
##                            )
##             if(is.null(imps))
##                 stop("\n Not valid 'whichImp' \n")
##             return(imps)
##         }
##         clusterEvalQ(TheCluster,
##                      rm(list = c("xdataTheCluster", "ClassTheCluster")))
##         xdataTheCluster <<- xdata
##         ClassTheCluster <<- ydata
##         clusterExport(TheCluster,
##                       c("xdataTheCluster", "ClassTheCluster"))
##         outCl <- clusterApplyLB(TheCluster,
##                                 1:numrandom,
##                                 iRF.cluster,
##                                 ontree = ontree,
##                                 omtry = omtry)

##     } else {
##         outCl <- list()
##         for(nriter in 1:numrandom) {
##             rf <- randomForest(x = xdata,
##                                y = sample(ydata),
##                                ntree = ontree,
##                                mtry = omtry,
##                                importance = TRUE,
##                                keep.forest = FALSE,
##                                ...)
##             imps <- switch(whichImp,
##                            "impsUnscaled" =
##                            importance(rf, type = 1, scale = FALSE),
##                            "impsScaled" =
##                            importance(rf, type = 1, scale = TRUE),
##                            "impsGini" =
##                            rf$importance[, ncol(rf$importance)]
##                            )
##             if(is.null(imps))
##                 stop("\n Not valid 'whichImp' \n")
##             outCl[[nriter]] <- imps
##         }
##     }
    
##     randomVarImps <- matrix(unlist(outCl), ncol = numrandom)
##     rownames(randomVarImps) <- rownames(forest$importance)
##     colnames(randomVarImps) <- 1:numrandom
##     class(randomVarImps) <- c(class(randomVarImps),
##                               "rVI")
##     return(randomVarImps)
## }


##### old stuff

####nr <- 10; nc <- 20; x <- matrix(rnorm(2* nr*nc), ncol = nc)
####colnames(x) <- paste("v", 1:nc, sep ="")
####Class <- factor(c(rep("A", nr), rep("B", nr)))
####xdata <- x

####library(randomForest)
####rf1 <- randomForest(x = x, y = Class, importance = TRUE,
####                    keep.forest = FALSE, ntree = 2000)

####usingCluster <- TRUE
####if(usingCluster) {
####    library(snow)
####    library(Rmpi)
####    clusterNumberNodes <- 4
####    typeCluster <- "MPI"
####    TheCluster <- makeCluster(clusterNumberNodes,
####                              type = typeCluster)
####    clusterSetupSPRNG(TheCluster)
####    clusterEvalQ(TheCluster, library(randomForest))
####}



####screePlotRF <- function(forest, randomImportances,
####                        nvars = 50,
####                        show.var.names = FALSE,
####                        vars.highlight = NULL,
####                        main = NULL, scale = TRUE) {
####    nvars <- min(nvars, dim(randomImportances)[1])

    
####    if(scale) {
####        original.imps <- importance(forest, type = 1, scale = TRUE)
####    } else {
####        original.imps <- importance(forest, type = 1, scale = FALSE)
####    }
####    ordered.imps <- sort(original.imps, decreasing = TRUE)[1:nvars]

####    plot(ordered.imps, type = "b", axes = FALSE, lwd = 1.5,
####         xlab = "Variable", ylab = "Importance",
####         main = main)
####    abline(h = 0, lty = 2, col = "blue")
####    axis(2)
####    box()
####    if(show.var.names) {
####        axis(1, labels = names(ordered.imps),
####             at = 1:length(ordered.imps))
####    } else {
####        axis(1)
####    }

####    if(!is.null(vars.highlight)) {
####        if(length(vars.highlight) > nvars) {
####            warning("Not all vars. to highlight will be shown; increase nvars\n")
####            cat("\n Not all vars. to highlight will be shown; increase nvars\n")
####        }
####        pos.selected <- which(names(ordered.imps) %in% vars.highlight)
####        if(!length(pos.selected)){
####            warning("No selected vars. among those to show\n")
####            cat("\nNo selected vars. among those to show\n")
####        }
####        else segments(pos.selected, 0,
####                      pos.selected, ordered.imps[pos.selected],
####                      col = "blue", lwd = 2)
####        if(length(pos.selected) < length(vars.highlight)) {
####            warning("Not shown ", length(vars.highlight) - length(pos.selected),
####                    " of the 'to-highlight' variables\n")
####            cat("Not shown ", length(vars.highlight) - length(pos.selected),
####                    " of the 'to-highlight' variables\n")

####        }
####    }

####    randomImportances <- randomImportances[1:nvars, ]
####    column.mean <- dim(randomImportances)[2]
####    matlines(randomImportances[, -column.mean],
####             col = "green", lty = 1)
####    lines(x = 1:nvars,
####          y = randomImportances[, column.mean],
####          lwd = 1.3, col = "red")
####}


####screePlotRF(rf1, ii1, nvars = 20, vars.highligh = paste("v", c(8, 4), sep =""), show.var.names = TRUE)

####i1 <- randomVarImpsRF(xdata, Class, rf1, numrandom = 3)
####screePlotRF(rf1, i1)






### This version allows optimizing mtry and ntree; leave code here,
### but don't use.

## FIXME: uncomment, but fix global function uses. And add examples.

## ExperimentalvarSelRF <- function(xdata, Class, vars.drop.num = NULL,
##                      vars.drop.frac = 0.5,
##                      c.sd = 1,
##                      whole.range = TRUE,
##                      recompute.var.imp = FALSE,
##                      verbose = FALSE,
##                      returnFirstForest = TRUE,
##                      ## next are all for tune2RF
##                      ## but this is a mess; should pass them as
##                      ## control.
##                      tuneMtry = FALSE, tuneNtree = FALSE,
##                      startNtree = 1000, startMtryFactor = 1,
##                      stepFactorMtry = 1.25,
##                      stepFactorNtree = 1.75,
##                      minCorNtree = 0.975,
##                      quantNtree = 0.025,
##                      returnForest = TRUE,
##                      ntreeTry = 2000) {

##     if( (is.null(vars.drop.num) & is.null(vars.drop.frac)) |
##        (!is.null(vars.drop.num) & !is.null(vars.drop.frac)))
##         stop("One (and only one) of vars.drop.frac and vars.drop.num must be NULL and the other set")
    

##     max.num.steps <- dim(xdata)[2]
##     num.subjects <- dim(xdata)[1]

##     if(is.null(colnames(xdata)))
##         colnames(xdata) <- paste("v", 1:dim(xdata)[2], sep ="")
    
##     ##oversize the vectors; will prune later.
##     n.vars <- vars <- OOB.rf <- OOB.sd <- rep(NA, max.num.steps)
    
##     ## First get optimal values for mtry and ntree, and get first run:
    
##     rf <- tune2RF(x = xdata, y = Class,
##                   tuneMtry = tuneMtry, tuneNtree = tuneNtree,
##                   startNtree = startNtree, mtryFactor = mtryFactor,
##                   stepFactorMtry = stepFactorMtry,
##                   stepFactorNtree = stepFactorNtree,
##                   minCorNtree = minCorNtree,
##                   quantNtree = quantNtree,
##                   returnForest = TRUE,
##                   ntreeTry = ntreeTry,
##                   verbose = verbose)

##     ## We'll need it for future runs
##     ntree <- rf$ntree
##     mtry <- rf$mtry

##     if(returnFirstForest)
##         FirstForest <- rf
##     else
##         FirstForest <- NULL
    
## #    rf <- randomForest(x = xdata, y = Class, importance= TRUE,
## #                       ntree = ntree, keep.forest = FALSE)
##     m.iterated.ob.error <- m.initial.ob.error <- oobError(rf)
##     sd.iterated.ob.error <- sd.initial.ob.error <-
##         sqrt(m.iterated.ob.error * (1 - m.iterated.ob.error) * (1/num.subjects))

##     if(verbose) {
##         print(paste("Initial OOB error: mean = ", round(m.initial.ob.error, 4),
##                     "; sd = ", round(sd.initial.ob.error, 4), sep = ""))
##     }

## ### previous code (before rF < 4.3.1)
##     #selected.vars <- order(rf$importance[, (ncol(rf$importance) - 1)], decreasing = TRUE)
## #    ordered.importance <- rf$importance[selected.vars,(ncol(rf$importance) -1)]

##     importances <- importance(rf, type = 1, scale = FALSE)
##     selected.vars <- order(importances, decreasing = TRUE)
##     ordered.importances <- importances[selected.vars]
    
##     initialImportances <- importances
##     initialOrderedImportances <- ordered.importances
    
##     j <- 1
##     n.vars[j] <- dim(xdata)[2] 
##     vars[j] <- paste(colnames(xdata), collapse = " + ")
##     OOB.rf[j] <- m.iterated.ob.error
##     OOB.sd[j] <- sd.iterated.ob.error

##     var.simplify <- TRUE
    
##     while(var.simplify) {
##         last.rf <- rf
##         last.vars <- selected.vars
## #        print(paste(".........Number of variables before selection",
## #                    dim(xdata)[2])) ## debug
##         previous.m.error <- m.iterated.ob.error
##         previous.sd.error <- sd.iterated.ob.error

##         if(recompute.var.imp & (j > 1)) {
##             ## need to set indexes as absolute w.r.t. original data
## ####            tmp.order <- order(rf$importance[, (ncol(rf$importance) - 1)],
## ####                                    decreasing = TRUE)
## ####            selected.vars <- selected.vars[tmp.order]
## ####            ordered.importance <- rf$importance[tmp.order, (ncol(rf$importance) -1)]

##             importances <- importance(rf, type = 1, scale = FALSE)
##             tmp.order <- order(importances, decreasing = TRUE)
##             selected.vars <- selected.vars[tmp.order]
##             ordered.importances <- importances[tmp.order]

##         }
        
##         num.vars <- length(selected.vars)
  
## ####        if(any(is.na(ordered.importances))) {
## ####            print("**********  Nas in ordered.importances ******")
## ####            browser()
## ####        }
           
##         if(any(ordered.importances < 0)) {
##             selected.vars <- selected.vars[-which(ordered.importances < 0)]
##             ordered.importances <- ordered.importances[-which(ordered.importances < 0)]
##         } else {
##             if(is.null(vars.drop.num))
##                 vars.drop <- round(num.vars * vars.drop.frac)
##             else vars.drop <- vars.drop.num
                
##             if(num.vars >= (vars.drop + 2)) {
##                 selected.vars <- selected.vars[1: (num.vars - vars.drop)]
##                 ordered.importances <- ordered.importances[1: (num.vars - vars.drop)]
##             }
##             else {
##                 selected.vars <- selected.vars[1:2]
##                 ordered.importances <- ordered.importances[1:2]
##             }
##         }
##         ## couldn't we eliminate the following?
##         if((length(selected.vars) < 2) | (any(selected.vars < 1))) {
##             var.simplify <- FALSE
##             break
##         }

##         if(length(selected.vars) <= 2) var.simplify <- FALSE
      
##         if(recompute.var.imp) 
##             rf <- randomForest(x = xdata[, selected.vars], y = Class, importance= TRUE,
##                                ntree = ntree, mtry = mtry, keep.forest = FALSE)
##         else
##             rf <- randomForest(x = xdata[, selected.vars], y = Class, importance= FALSE,
##                                ntree = ntree, mtry = mtry, keep.forest = FALSE)
        
##         m.iterated.ob.error <- oobError(rf)
##         sd.iterated.ob.error <-
##             sqrt(m.iterated.ob.error * (1 - m.iterated.ob.error) * (1/num.subjects))
        
##         if(verbose) {
##             print(paste("..... iteration ", j, "; OOB error: mean = ",
##                         round(m.iterated.ob.error, 4),
##                         "; sd = ", round(sd.iterated.ob.error, 4), sep = ""))
##         }
##         j <- j + 1

        
##         n.vars[j] <- length(selected.vars)
##         vars[j] <- paste(colnames(xdata)[selected.vars],
##                          collapse = " + ")
##         OOB.rf[j] <- m.iterated.ob.error
##         OOB.sd[j] <- sd.iterated.ob.error


##         if(!whole.range &
##            (
##             (m.iterated.ob.error >
##              (m.initial.ob.error + c.sd*sd.initial.ob.error))
##             |
##             (m.iterated.ob.error >
##              (previous.m.error + c.sd*previous.sd.error)))
##            )
##             var.simplify <- FALSE
##     }

##     if (!whole.range) {
##         if(!is.null(colnames(xdata)))
##             selected.vars <- sort(colnames(xdata)[last.vars])
##         else
##             selected.vars <- last.vars

##         out <- list(selec.history = data.frame(
##                     Number.Variables = n.vars,
##                     Vars.in.Forest = vars,
##                     OOB = OOB.rf,
##                     sd.OOB = OOB.sd)[1:j,],
##                     rf.model = last.rf,
##                     selected.vars = selected.vars,
##                     selected.model =  paste(selected.vars, collapse = " + "),
##                     best.model.nvars = length(selected.vars),
##                     initialImportances = initialImportances,
##                     initialOrderedImportances = initialOrderedImportances,
##                     ntree = ntree,
##                     mtry = mtry,
##                     firstForest = FirstForest)
##         class(out) <- "varSelRF"
##         return(out)
##     }
##     else {
##         n.vars <- n.vars[1:j]
##         vars <- vars[1:j]
##         OOB.rf<- OOB.rf[1:j]
##         OOB.sd <- OOB.sd[1:j]
##         ##browser()
##         min.oob.ci <- min(OOB.rf) + c.sd * OOB.sd[which.min(OOB.rf)]
##         best.pos <-
##             which(OOB.rf <= min.oob.ci)[which.min(n.vars[which(OOB.rf <= min.oob.ci)])]

##         selected.vars <- sort(unlist(strsplit(vars[best.pos],
##                                               " + ", fixed = TRUE)))
##         out <- list(selec.history = data.frame(
##                     Number.Variables = n.vars,
##                     Vars.in.Forest = vars,
##                     OOB = OOB.rf,
##                     sd.OOB = OOB.sd),
##                     rf.model = NA,
##                     selected.vars = selected.vars,
##                     selected.model = paste(selected.vars, collapse = " + "),
##                     best.model.nvars = n.vars[best.pos],
##                     initialImportances = initialImportances,
##                     initialOrderedImportances = initialOrderedImportances,
##                     ntree = ntree,
##                     mtry = mtry,
##                     firstForest = FirstForest)
##         class(out) <- "varSelRF"
##         return(out)
##     }
## }



## FIXME: uncomment, but fix global function uses. And add examples.

## tune2RF <- function(x, y, tuneMtry = TRUE, tuneNtree = TRUE,
##                     startNtree = 1000, startMtryFactor = 1,
##                     stepFactorMtry = 1.25,
##                     stepFactorNtree = 1.75,
##                     minCorNtree = 0.9,
##                     quantNtree = 0.025, ## with 4000, look at first 100
##                     returnForest = TRUE,
##                     ntreeTry = 2000,
##                     verbose = TRUE,
##                     plot = FALSE,
##                     ...) {
##     ## if tuneMtry = TRUE and tuneNtree = TRUE,
##     ##        ntree is used for tuneRF, mtryFactor is ignored
##     ## if tuneMtry = TRUE and tuneNtree = FALSE
##     ##        ntree used for tuneRF and final ntree, and mtry factor is ignored
##     ## if tuneMtry = FALSE and tuneRF = TRUE,
##     ##        ntree is used only as starting value for search of ntree and
##     ##        mtryFactor is used for mtry
##     ## if tuneMtry = FALSE and tuneRF = FALSE,
##     ##        mtryFactor and ntree are the ntree and mtry used.

##     if(plot) {
##         op <- par(mfrow = c(1,2))
##         on.exit(par(op))
##     }
##     if(tuneMtry) {
##         tunedMtry <- tuneRF(x, y, stepFactor = stepFactorMtry,
##                             ntreeTry = ntreeTry,
##                             mtryStart = floor(sqrt(ncol(x)) * startMtryFactor),
##                             doBest = FALSE,
##                             plot = plot,
##                             trace = verbose)
##         tunedMtry <-
##             tunedMtry[which.min(tunedMtry[, 2]), 1]
##     } else {
##         tunedMtry <- floor(sqrt(ncol(x)) * startMtryFactor)
##     }

##     tunedNtree <- startNtree
##     if(tuneNtree) {
##         if(verbose)
##             cat("\n Tunning ntree: initial forest construction \n")
##         f1 <- randomForest(x, y, mtry = tunedMtry,
##                            importance = TRUE,
##                            keep.forest = FALSE,
##                            ntree = tunedNtree)
##         repeat {
##             f2 <- randomForest(x, y, mtry = tunedMtry,
##                                importance = TRUE,
##                                keep.forest = FALSE,
##                                ntree = round(stepFactorNtree * tunedNtree))
##             m1 <- cbind(
##                         importance(f1, type = 1, scale = FALSE),
##                         importance(f2, type = 1, scale = FALSE))
##             m1[m1 < quantile(m1, 1-quantNtree)] <- NA
##             m1 <- na.omit(m1)
##             m2 <- m1
##             m2[m2 <= 0] <- NA
##             m2 <- na.omit(m2)
##             mc1 <- cor(m1)
##             mc.rob <- cov.rob(m2, method = "mcd", cor = TRUE)$cor
##             if(verbose) {
##                 cat("\n ... tunning ntree; cor. importances successive ntrees\n")
##                 colnames(mc1) <- c(tunedNtree, round(stepFactorNtree * tunedNtree))
##                 rownames(mc1) <- c(tunedNtree, round(stepFactorNtree * tunedNtree))
##                 colnames(mc.rob) <- c(tunedNtree, round(stepFactorNtree * tunedNtree))
##                 rownames(mc.rob) <- c(tunedNtree, round(stepFactorNtree * tunedNtree))
##                 cat("\n         correlation matrix\n")
##                 print(round(mc1, 4))
##                 cat("\n         robust correlation matrix\n")
##                 print(round(mc.rob, 4))
##                 cat(" \n using ", dim(m2)[1], "observations\n")
##                 cat("\n")
##             }
##             if((min(mc1) > minCorNtree) &
##                (min(mc.rob) > minCorNtree)) {
##                 plot(m1[, 1], m1[, 2], xlab = paste("Ntree", tunedNtree),
##                      ylab = paste("Ntree", round(stepFactorNtree * tunedNtree)),
##                      main =
##                      paste("Upper ", quantNtree,
##                            "th quantile of importances", sep = ""))
                     
##                 break
##             }
##             tunedNtree <- round(stepFactorNtree * tunedNtree)
##             f1 <- f2
##         }
##     }
##     if(returnForest) {
##         if(tuneNtree)
##             return(f1)
##         else
##             return(randomForest(x, y, mtry = tunedMtry,
##                                 importance = TRUE,
##                                 keep.forest = FALSE,
##                                 ntree = ntree)
##                    )
##     }
##     else {
##         return(c(tunedMtry = tunedMtry, tunedNtree = tunedNtree))
##     }
## }




boot.imp <- function(data, class, ntree = 20000, B = 200) {
## just checking the output from the pg.plots. is correct.
    N <- length(class)
    mat.out <- matrix(NA, nrow = dim(data)[2], ncol = B)
    for(i in 1:B) {
        sample.again <- TRUE
        while(sample.again) {
            bootsample <- unlist(tapply(1:N,  class,
                                        function(x)
                                        sample(x, size = length(x),
                                               replace = TRUE)))
            ## sure, this isn't the fastest, but will do for now.
            nobootsample <- setdiff(1:N, bootsample)
            if(!length(nobootsample))
                sample.again <- TRUE
            else sample.again <- FALSE
        }
        ## this is an ugly hack to prevetn nobootsamples
        ## of size 0.
        train.data <- data[bootsample, , drop = FALSE]
        train.class <- class[bootsample]
        rftmp <- randomForest(x = train.data, y = train.class,
                              ntree = ntree,
                              keep.forest = FALSE,
                              importance = TRUE)
        mat.out[, i] <- importance(rftmp, type = 1, scale = FALSE)
    }
    return(mat.out)
}


####gl.b.i <- boot.imp(gl.data, gl.class, B = 20)
        
####pairs(cbind(gl.b.i, importance(gl.20000.rf, type = 1, scale = FALSE)),
####      pch = ".")

####pairs(cbind(gl.b.i[, 1:10], importance(gl.20000.rf, type = 1, scale = FALSE)),
####      pch = ".")

####summary(cbind(gl.b.i, importance(gl.20000.rf, type = 1, scale = FALSE)))
####boxplot(data.frame(cbind(gl.b.i, importance(gl.20000.rf, type = 1, scale = FALSE))))




#####f.mtr <- function(x, mf = 3) {
#####  x2 <- floor(sqrt(x) * mf)
#####  x2[x2 > x] <- x[x2 > x]
#####  return(x2)
#####}
#####plot(x = sqrt(2:200), f.mtr(2:200, mf = 5), type = "l")
rdiaz02/varSelRF documentation built on May 27, 2019, 3:06 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
rdiaz02/varSelRF
Variable Selection using Random Forests

R/varSelRF.R
In rdiaz02/varSelRF: Variable Selection using Random Forests

Defines functions varSelRF print.varSelRF plot.varSelRF varSelRFBoot print.varSelRFBoot summary.varSelRFBoot plot.varSelRFBoot randomVarImpsRF randomVarImpsRFplot varSelImpSpecRF selProbPlot figureSummary.varSelRFBoot figureSummary2.varSelRFBoot tableSummary.varSelRFBoot boot.imp

Documented in plot.varSelRF plot.varSelRFBoot randomVarImpsRF randomVarImpsRFplot selProbPlot summary.varSelRFBoot varSelImpSpecRF varSelRF varSelRFBoot

R Package Documentation

Browse R Packages

We want your feedback!

rdiaz02/varSelRF Variable Selection using Random Forests

R/varSelRF.R In rdiaz02/varSelRF: Variable Selection using Random Forests

Defines functions varSelRF print.varSelRF plot.varSelRF varSelRFBoot print.varSelRFBoot summary.varSelRFBoot plot.varSelRFBoot randomVarImpsRF randomVarImpsRFplot varSelImpSpecRF selProbPlot figureSummary.varSelRFBoot figureSummary2.varSelRFBoot tableSummary.varSelRFBoot boot.imp

Documented in plot.varSelRF plot.varSelRFBoot randomVarImpsRF randomVarImpsRFplot selProbPlot summary.varSelRFBoot varSelImpSpecRF varSelRF varSelRFBoot

R Package Documentation

Browse R Packages

We want your feedback!

rdiaz02/varSelRF
Variable Selection using Random Forests

R/varSelRF.R
In rdiaz02/varSelRF: Variable Selection using Random Forests