R/AIMS.R

Defines functions applyAIMS .apply.nbc .removeDuplicatedEntrezPerPatients .get.all.pairs.genes .one.vs.all.tsp .comp.sel.pairs .smaller

Documented in applyAIMS

.smaller <- function(x,y){
  x < y
}

## Functions necessary to run AIMS
.comp.sel.pairs <- function(dataset,sel.pairs,func=.smaller){
  to.ret <- matrix(NA,nrow=length(sel.pairs),ncol(dataset$exprs))
  for (spi in 1:length(sel.pairs)){
    ss.cur <- strsplit(sel.pairs[spi],"<")[[1]]
    gene1 = which(dataset$GeneName == ss.cur[1])
    gene2 = which(dataset$GeneName == ss.cur[2])
    #stopifnot(length(gene1) == 1 & length(gene2) == 1)
    if (length(gene1) == 1 & length(gene2) == 1){
      to.ret[spi,] <- func(dataset$exprs[gene1,],dataset$exprs[gene2,])
    }
    else{
      message(paste("You are missing the pair or have more than one",sel.pairs[spi],"in",dataset$name))
    }
  }
  to.ret <- apply(to.ret,2,as.numeric)
  rownames(to.ret) <- sel.pairs
  to.ret
}

.one.vs.all.tsp <- function(D,GeneName,one.vs.all.tsp){
  ## Need to add some QC
  ## First compute
  train.pairs <- .comp.sel.pairs(list(exprs=D,GeneName=GeneName),one.vs.all.tsp$all.pairs)

  classes <- matrix("",ncol=length(one.vs.all.tsp$k),nrow=ncol(D))
  prob <- matrix(0,ncol=length(one.vs.all.tsp$k),nrow=ncol(D))
  colnames(classes) <- colnames(prob) <- as.character(one.vs.all.tsp$k)
  rownames(classes) <- rownames(prob) <-colnames(D)
  nb.d <- data.frame(t(train.pairs))
  all.probs <- list()
  for (ki in one.vs.all.tsp$k){
      message(sprintf("Current k = %d",ki))

      ## need to add this for the new version of e1071 (1.7-1)
      ## there is now a new isnumeric field in the naive bayes object
      ## we need to update our model consequently.
      isnumeric <- list()
      for (ni in names(one.vs.all.tsp$one.vs.all.tsp[[ki]]$tables)){
          isnumeric[[ni]] <- TRUE
      }
      one.vs.all.tsp$one.vs.all.tsp[[ki]]$isnumeric <- isnumeric
      
      prob.train <- predict(one.vs.all.tsp$one.vs.all.tsp[[ki]],nb.d,type="raw")
      cur.cl <- apply(prob.train,1,function(prob.cur){colnames(prob.train)[which.max(prob.cur)]})
      cur.prob <- apply(prob.train,1,function(prob.cur){max(prob.cur)})
      prob[,as.character(ki)] <- cur.prob
      classes[,as.character(ki)] <- cur.cl
      all.probs[[as.character(ki)]] <- prob.train
  }
  invisible(list(cl = classes,prob = prob,all.probs = all.probs,rules.matrix=train.pairs))
}

.get.all.pairs.genes <- function(all.pairs){
  genes <- c()
  for (cp in strsplit(all.pairs,"<")){
    genes <- c(genes,cp)
  }
  unique(genes)
}
## Remove the duplicated Entrez by keeping the most higly expressed
## This is closer to the single sample selection
## D is a raw gene expression matrix rows == genes and columns patients
.removeDuplicatedEntrezPerPatients <- function(D,EntrezID,probes){
  ## Maybe we have nothing to do already
  if (all(!duplicated(EntrezID))){
    return(list(dataset=D,EntrezID=EntrezID))
  }
  else{
    uniqEntrez <- sort(unique(EntrezID))
    newD <- matrix(0,nrow=length(uniqEntrez),ncol=ncol(D))
    if (!missing(probes)){
      sel.probes <- matrix("",nrow=length(uniqEntrez),ncol=ncol(D))
    }
    for (i in 1:ncol(D)){
      curD <- D[,i]
      curEntrez <- EntrezID
      oi <- order(curD,decreasing=TRUE) ## order by raw expression
      curD <- curD[oi]
      curEntrez <- curEntrez[oi]
      cur.sel <- !duplicated(curEntrez) ## remove duplicated
      curD <- curD[cur.sel]
      curEntrez <- curEntrez[cur.sel]
      reorder <- match(uniqEntrez,curEntrez)
      newD[,i] <- curD[reorder]
      if (!missing(probes)){
        sel.probes[,i] <- probes[oi][cur.sel][reorder]
      }
    }
    colnames(newD) <- colnames(D)
    if (!missing(probes)){
      colnames(sel.probes) <- colnames(D)
      return(list(dataset=newD,EntrezID=uniqEntrez,probes=sel.probes))
    }
    else{
      return(list(dataset=newD,EntrezID=uniqEntrez))
    }
  }
}

.apply.nbc <- function(D,EntrezID,sel.nbc){
  ## Verify the number of rows of D and EntrezIDs have the same length 
  if (nrow(D) != length(EntrezID)){
    stop(sprintf("You need the same number of rows and EntrezID. Right now nrow(D) = %d and length(EntrezID) = %d",nrow(D),length(EntrezID)))
  }
  ## AIMS needs to be applied on expression values > 0. Require 95% of the values to be > 0
  if (!all(apply(D,2,function(x){(sum(x < 0,na.rm=TRUE)/length(x)) < 0.05}))){
    stop("AIMS needs to be applied on expressionn values > 0. Did you gene-centered your matrix D? You should not.")
  }
  
  ## Verify if D is a numeric matrix
  if (!all(apply(D,2,is.numeric))){
    stop("Verify D is a numeric matrix. apply(D,2,as.numeric) could do the job or verify the first column doesn't contain probe ids") 
  }
  
  D <- apply(D,2,as.numeric)
  EntrezID <- as.character(EntrezID)
  sel.ids.nb <- .get.all.pairs.genes(sel.nbc$all.pairs)
  sel.gn <- EntrezID %in% sel.ids.nb
  D <- D[sel.gn,,drop=FALSE]
  Entrez <- EntrezID[sel.gn]
  col.D.test <- .removeDuplicatedEntrezPerPatients(D,Entrez)
  pred.test <- .one.vs.all.tsp(D=col.D.test$dataset,
                              GeneName=col.D.test$EntrezID,
                              sel.nbc)

  ## Add more information to the output variable
  pred.test$data.used <- col.D.test$dataset
  pred.test$EntrezID.used <- col.D.test$EntrezID
  
  invisible(pred.test)
}

applyAIMS <- function(eset,EntrezID){  
  D <- NA
  if (!is(eset,"ExpressionSet") & !is(eset,"matrix")){
    stop("eset argument should be either an ExpressionSet (Biobase) or a numerical matrix")
  }
  
  if (is(eset,"ExpressionSet")){
    D <- exprs(eset)
  }
  else if (is(eset,"matrix")){
    D <- eset
  }
  
  data(AIMSmodel)
  .apply.nbc(D,EntrezID,AIMSmodel)
}

Try the AIMS package in your browser

Any scripts or data that you put into this service are public.

AIMS documentation built on Nov. 8, 2020, 8:31 p.m.