R/iRafNet_permutation.R
In iRafNet: Integrative Random Forest for Gene Regulatory Network Inference

#' Derive importance scores for one permuted data.
#'
#' MAIN FUNCTION -- > iRafNet_permutation
#' 
#' INPUT
#' 
#' X            (n x p) gene expression matrix
#' W            (p x p) matrix of sampling weights
#' ntree        number of trees
#' mtry         number of variables to be sampled at each node
#' genes.name   list of gene names 
#' perm         seed for permutation
#' 
#' OUTPUT: importance score of interactions for permuted data.
#'
#'
#' OTHER FUNCTIONS -- > importance  and  iRafNet_onetarget
#' 
#' importance     compute importance score for an object of class iRafNet 
#' (this function is a modified version of file importance.R contained in package randomForest, A. Liaw and M. Wiener (2002))
#' 
#' iRafNet_onetarget  for each class, model the expression of a target gene as a function of the expression of other genes via random forest. 
#'                class specific tree ensemble are designed to borrow information across them. 
#' (this function is a modified version of file randomForest.R contained in package randomForest, A. Liaw and M. Wiener (2002))
#'   
#'
#' @export 
#"iRafNet_permutation" <-  function(X, ...)UseMethod("iRafNet")



importance <- function(x,  scale=TRUE) {
  
  type=NULL;
  class=NULL;
  if (!inherits(x, "randomForest"))
    stop("x is not of class randomForest")
  classRF <- x$type != "regression"
  hasImp <- !is.null(dim(x$importance)) || ncol(x$importance) == 1
  hasType <- !is.null(type)
  if (hasType && type == 1 && !hasImp)
    stop("That measure has not been computed")
  allImp <- is.null(type) && hasImp
  if (hasType) {
    if (!(type %in% 1:2)) stop("Wrong type specified")
    if (type == 2 && !is.null(class))
      stop("No class-specific measure for that type")
  }
  
  imp <- x$importance
  if (hasType && type == 2) {
    if (hasImp) imp <- imp[, ncol(imp), drop=FALSE]
  } else {
    if (scale) {
      SD <- x$importanceSD
      imp[, -ncol(imp)] <-
        imp[, -ncol(imp), drop=FALSE] /
        ifelse(SD < .Machine$double.eps, 1, SD)
    }
    if (!allImp) {
      if (is.null(class)) {
        ## The average decrease in accuracy measure:
        imp <- imp[, ncol(imp) - 1, drop=FALSE]
      } else {
        whichCol <- if (classRF) match(class, colnames(imp)) else 1
        if (is.na(whichCol)) stop(paste("Class", class, "not found."))
        imp <- imp[, whichCol, drop=FALSE]
      }
    }
  }
  imp<-imp[,2]
  imp
}


"irafnet_onetarget" <-
  function(x, y=NULL,  xtest=NULL, ytest=NULL, ntree,
           mtry=if (!is.null(y) && !is.factor(y))
             max(floor(ncol(x)/3), 1) else floor(sqrt(ncol(x))),
           replace=TRUE, classwt=NULL, cutoff, strata,
           sampsize = if (replace) nrow(x) else ceiling(.632*nrow(x)),
           nodesize = if (!is.null(y) && !is.factor(y)) 5 else 1,
           maxnodes=NULL,
           importance=FALSE, localImp=FALSE, nPerm=1,
           proximity, oob.prox=proximity,
           norm.votes=TRUE, do.trace=FALSE,
           keep.forest=!is.null(y) && is.null(xtest), corr.bias=FALSE,
           keep.inbag=FALSE, sw) {
    addclass <- is.null(y)
    classRF <- addclass || is.factor(y)
    if (!classRF && length(unique(y)) <= 5) {
      warning("The response has five or fewer unique values.  Are you sure you want to do regression?")
    }
    if (classRF && !addclass && length(unique(y)) < 2)
      stop("Need at least two classes to do classification.")
    n <- nrow(x)
    p <- ncol(x)
    if (n == 0) stop("data (x) has 0 rows")
    x.row.names <- rownames(x)
    x.col.names <- if (is.null(colnames(x))) 1:ncol(x) else colnames(x)
    
    ## overcome R's lazy evaluation:
    keep.forest <- keep.forest
    
    testdat <- !is.null(xtest)
    if (testdat) {
      if (ncol(x) != ncol(xtest))
        stop("x and xtest must have same number of columns")
      ntest <- nrow(xtest)
      xts.row.names <- rownames(xtest)
    }
    
    ## Make sure mtry is in reasonable range.
    if (mtry < 1 || mtry > p)
      warning("invalid mtry: reset to within valid range")
    mtry <- max(1, min(p, round(mtry)))
    if (!is.null(y)) {
      if (length(y) != n) stop("length of response must be the same as predictors")
      addclass <- FALSE
    } else {
      if (!addclass) addclass <- TRUE
      y <- factor(c(rep(1, n), rep(2, n)))
      x <- rbind(x, x)
    }
    
    ## Check for NAs.
    if (any(is.na(x))) stop("NA not permitted in predictors")
    if (testdat && any(is.na(xtest))) stop("NA not permitted in xtest")
    if (any(is.na(y))) stop("NA not permitted in response")
    if (!is.null(ytest) && any(is.na(ytest))) stop("NA not permitted in ytest")
    
    ncat <- rep(1, p)
    xlevels <- as.list(rep(0, p))
    maxcat <- max(ncat)
    if (maxcat > 32)
      stop("Can not handle categorical predictors with more than 32 categories.")
    
    if (classRF) {
      nclass <- length(levels(y))
      ## Check for empty classes:
      if (any(table(y) == 0)) stop("Can't have empty classes in y.")
      if (!is.null(ytest)) {
        if (!is.factor(ytest)) stop("ytest must be a factor")
        if (!all(levels(y) == levels(ytest)))
          stop("y and ytest must have the same levels")
      }
      if (missing(cutoff)) {
        cutoff <- rep(1 / nclass, nclass)
      } else {
        if (sum(cutoff) > 1 || sum(cutoff) < 0 || !all(cutoff > 0) ||
              length(cutoff) != nclass) {
          stop("Incorrect cutoff specified.")
        }
        if (!is.null(names(cutoff))) {
          if (!all(names(cutoff) %in% levels(y))) {
            stop("Wrong name(s) for cutoff")
          }
          cutoff <- cutoff[levels(y)]
        }
      }
      if (!is.null(classwt)) {
        if (length(classwt) != nclass)
          stop("length of classwt not equal to number of classes")
        ## If classwt has names, match to class labels.
        if (!is.null(names(classwt))) {
          if (!all(names(classwt) %in% levels(y))) {
            stop("Wrong name(s) for classwt")
          }
          classwt <- classwt[levels(y)]
        }
        if (any(classwt <= 0)) stop("classwt must be positive")
        ipi <- 1
      } else {
        classwt <- rep(1, nclass)
        ipi <- 0
      }
    } else addclass <- FALSE
    
    if (missing(proximity)) proximity <- addclass
    if (proximity) {
      prox <- matrix(0.0, n, n)
      proxts <- if (testdat) matrix(0, ntest, ntest + n) else double(1)
    } else {
      prox <- proxts <- double(1)
    }
    
    if (localImp) {
      importance <- TRUE
      impmat <- matrix(0, p, n)
    } else impmat <- double(1)
    
    if (importance) {
      if (nPerm < 1) nPerm <- as.integer(1) else nPerm <- as.integer(nPerm)
      if (classRF) {
        impout <- matrix(0.0, p, nclass + 2)
        impSD <- matrix(0.0, p, nclass + 1)
      } else {
        impout <- matrix(0.0, p, 2)
        impSD <- double(p)
        names(impSD) <- x.col.names
      }
    } else {
      impout <- double(p)
      impSD <- double(1)
    }
    
    nsample <- if (addclass) 2 * n else n
    Stratify <- length(sampsize) > 1
    if ((!Stratify) && sampsize > nrow(x)) stop("sampsize too large")
    if (Stratify && (!classRF)) stop("sampsize should be of length one")
    if (classRF) {
      if (Stratify) {
        if (missing(strata)) strata <- y
        if (!is.factor(strata)) strata <- as.factor(strata)
        nsum <- sum(sampsize)
        if (length(sampsize) > nlevels(strata))
          stop("sampsize has too many elements.")
        if (any(sampsize <= 0) || nsum == 0)
          stop("Bad sampsize specification")
        ## If sampsize has names, match to class labels.
        if (!is.null(names(sampsize))) {
          sampsize <- sampsize[levels(strata)]
        }
        if (any(sampsize > table(strata)))
          stop("sampsize can not be larger than class frequency")
      } else {
        nsum <- sampsize
      }
      nrnodes <- 2 * trunc(nsum / nodesize) + 1
    } else {
      ## For regression trees, need to do this to get maximal trees.
      nrnodes <- 2 * trunc(sampsize/max(1, nodesize - 4)) + 1
    }
    if (!is.null(maxnodes)) {
      ## convert # of terminal nodes to total # of nodes
      maxnodes <- 2 * maxnodes - 1
      if (maxnodes > nrnodes) warning("maxnodes exceeds its max value.")
      nrnodes <- min(c(nrnodes, max(c(maxnodes, 1))))
    }
    ## Compiled code expects variables in rows and observations in columns.
    x <- t(x)
    storage.mode(x) <- "double"
    if (testdat) {
      xtest <- t(xtest)
      storage.mode(xtest) <- "double"
      if (is.null(ytest)) {
        ytest <- labelts <- 0
      } else {
        labelts <- TRUE
      }
    } else {
      xtest <- double(1)
      ytest <- double(1)
      ntest <- 1
      labelts <- FALSE
    }
    nt <- if (keep.forest) ntree else 1
    
    rfout <- .C("regRF",
                x,
                as.double(y),
                as.integer(c(n, p)),
                as.integer(sampsize),
                as.integer(nodesize),
                as.integer(nrnodes),
                as.integer(ntree),
                as.integer(mtry),
                as.integer(c(importance, localImp, nPerm)),
                as.integer(ncat),
                as.integer(maxcat),
                as.integer(do.trace),
                as.integer(proximity),
                as.integer(oob.prox),
                as.integer(corr.bias),
                ypred = double(n),
                impout = impout,
                impmat = impmat,
                impSD = impSD,
                prox = prox,
                ndbigtree = integer(ntree),
                nodestatus = matrix(integer(nrnodes * nt), ncol=nt),
                leftDaughter = matrix(integer(nrnodes * nt), ncol=nt),
                rightDaughter = matrix(integer(nrnodes * nt), ncol=nt),
                nodepred = matrix(double(nrnodes * nt), ncol=nt),
                bestvar = matrix(integer(nrnodes * nt), ncol=nt),
                xbestsplit = matrix(double(nrnodes * nt), ncol=nt),
                mse = double(ntree),
                keep = as.integer(c(keep.forest, keep.inbag)),
                replace = as.integer(replace),
                testdat = as.integer(testdat),
                xts = xtest,
                ntest = as.integer(ntest),
                yts = as.double(ytest),
                labelts = as.integer(labelts),
                ytestpred = double(ntest),
                proxts = proxts,
                msets = double(if (labelts) ntree else 1),
                coef = double(2),
                oob.times = integer(n),
                inbag = if (keep.inbag)
                  matrix(integer(n * ntree), n) else integer(1), sw = as.double(sw))[c(16:28, 36:41)]
    ## Format the forest component, if present.
    if (keep.forest) {
      max.nodes <- max(rfout$ndbigtree)
      rfout$nodestatus <-
        rfout$nodestatus[1:max.nodes, , drop=FALSE]
      rfout$bestvar <-
        rfout$bestvar[1:max.nodes, , drop=FALSE]
      rfout$nodepred <-
        rfout$nodepred[1:max.nodes, , drop=FALSE]
      rfout$xbestsplit <-
        rfout$xbestsplit[1:max.nodes, , drop=FALSE]
      rfout$leftDaughter <-
        rfout$leftDaughter[1:max.nodes, , drop=FALSE]
      rfout$rightDaughter <-
        rfout$rightDaughter[1:max.nodes, , drop=FALSE]
    }
    cl <- match.call()
    cl[[1]] <- as.name("randomForest")
    ## Make sure those obs. that have not been OOB get NA as prediction.
    ypred <- rfout$ypred
    if (any(rfout$oob.times < 1)) {
      ypred[rfout$oob.times == 0] <- NA
    }
    out <- list(call = cl,
                type = "regression",
                predicted = structure(ypred, names=x.row.names),
                mse = rfout$mse,
                rsq = 1 - rfout$mse / (var(y) * (n-1) / n),
                oob.times = rfout$oob.times,
                importance = if (importance) matrix(rfout$impout, p, 2,
                                                    dimnames=list(x.col.names,
                                                                  c("%IncMSE","IncNodePurity"))) else
                                                                    matrix(rfout$impout, ncol=1,
                                                                           dimnames=list(x.col.names, "IncNodePurity")),
                importanceSD=if (importance) rfout$impSD else NULL,
                localImportance = if (localImp)
                  matrix(rfout$impmat, p, n, dimnames=list(x.col.names,
                                                           x.row.names)) else NULL,
                proximity = if (proximity) matrix(rfout$prox, n, n,
                                                  dimnames = list(x.row.names, x.row.names)) else NULL,
                ntree = ntree,
                mtry = mtry,
                forest = if (keep.forest)
                  c(rfout[c("ndbigtree", "nodestatus", "leftDaughter",
                            "rightDaughter", "nodepred", "bestvar",
                            "xbestsplit")],
                    list(ncat = ncat), list(nrnodes=max.nodes),
                    list(ntree=ntree), list(xlevels=xlevels)) else NULL,
                coefs = if (corr.bias) rfout$coef else NULL,
                y = y,
                test = if(testdat) {
                  list(predicted = structure(rfout$ytestpred,
                                             names=xts.row.names),
                       mse = if(labelts) rfout$msets else NULL,
                       rsq = if(labelts) 1 - rfout$msets /
                         (var(ytest) * (n-1) / n) else NULL,
                       proximity = if (proximity)
                         matrix(rfout$proxts / ntree, nrow = ntest,
                                dimnames = list(xts.row.names,
                                                c(xts.row.names,
                                                  x.row.names))) else NULL)
                } else NULL,
                inbag = if (keep.inbag)
                  matrix(rfout$inbag, nrow(rfout$inbag),
                         dimnames=list(x.row.names, NULL)) else NULL)
    
    class(out) <- "randomForest"
    return(out)
  }



# --- MAIN function
"iRafNet_permutation" <-
  function(X,W, ntree,mtry,genes.name,perm) {
    
    p<-dim(X)[2]
    imp<-matrix(0,p,p)
    n<-dim(X)[1]
    index<-seq(1,p)
    vec1<-matrix(rep(genes.name,p),p,p)
    vec2<-t(vec1); vec1<-c(vec1); vec2<-c(vec2)

    set.seed(perm)
    label<-sample(n)
    
    for (j in 1:p){ 
      y<-X[label,j];  # -- permute samples of target gene
      
      weights.rf<-as.matrix(W[,j]); 
      weights.rf[j]<-0
      weights.rf<-weights.rf/sum(weights.rf);
      
      w.sorted<-sort(weights.rf,decreasing = FALSE,index.return=T)
      index<-w.sorted$ix
      x.sorted<-X[,index]
      w.sorted<-w.sorted$x
      
      rout<-irafnet_onetarget(x=x.sorted,y=as.double(y),importance=TRUE,mtry=round(sqrt(p-1)),ntree=1000,
                              sw=as.double(w.sorted))
      
      imp[index,j]<-c(importance(rout))
    }
    
    
    # --- Return importance score for each regulation
    imp<-c(imp);
    out<-cbind(as.character(vec1),as.character(vec2),as.data.frame(imp),stringsAsFactors=FALSE)
    out<-out[vec1!=vec2,]
    i<-sort(out[,3],decreasing=TRUE,index=TRUE)
    out<-out[i$ix,]
    return(out[,3])
    
  }
Any scripts or data that you put into this service are public.
iRafNet documentation built on May 2, 2019, 6:56 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
iRafNet
Integrative Random Forest for Gene Regulatory Network Inference

R/iRafNet_permutation.R
In iRafNet: Integrative Random Forest for Gene Regulatory Network Inference

Try the iRafNet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

iRafNet Integrative Random Forest for Gene Regulatory Network Inference

R/iRafNet_permutation.R In iRafNet: Integrative Random Forest for Gene Regulatory Network Inference

Try the iRafNet package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

iRafNet
Integrative Random Forest for Gene Regulatory Network Inference

R/iRafNet_permutation.R
In iRafNet: Integrative Random Forest for Gene Regulatory Network Inference