Feature ranking and classification of microarray data

Let us import the data from the package datamicroarray

```{echo=FALSE} rm(list=ls()) library(devtools) library(gbcode) ## install_github("gbonte/gbcode") library(datamicroarray) ## install_github('ramhiser/datamicroarray') library(randomForest) library(e1071)

AUC.rank<-NULL AUC.mrmr<-NULL AUC.rf<-NULL AUC.rf2<-NULL AUC.mimr<-NULL AUC.wrap<-NULL

De<-describe_data() w<-which(De[,"n"]>50)

  Let us now perform a cross-validation where for each of the CV folds: we rank the features by using a set of techniques from the gbcode package
  ("rankrho","mrmr","mimr","rfrank","linearFsel"), we select the top NMAX
  and assess by classification with a Random Forest

```{echo=TRUE}
  NMAX<-25
  CV<-10

  Yhat.rank<-NULL
  Yhat.mrmr<-NULL
  Yhat.rf<-NULL
  Yhat.rf2<-NULL
  Yhat.mimr<-NULL
  Yhat.wrap<-NULL

  Ytrue<-NULL

  for (f in 1:NROW(De)) {
  set.seed(f+1)
  author<-as.character(De[f,"author"])
  print(author)
  data(list=author,package="datamicroarray")
  D<-get(author)
  X<-array(as.numeric(as.matrix(D$x)),dim(D$x)) ##D$x
  Y<-D$y
  N<-NROW(X)
  Y<-factor(Y)
  if (length(levels(Y))>2){
  Ta<-table(Y)
  wta<-which.max(Ta)
  Yn<-numeric(length(Y))
  Yn[which(Y==names(Ta)[wta])]<-1
  Y<-factor(Yn)
  }

  X<-imputeDataset(X)$impX
  wconst<-which(apply(X,2,sd)<1e-2)
  if (length(wconst)>0)
  X<-X[,-wconst]
  X<-scale(X)
  n<-NCOL(X)

  levels(Y)<-c("0","1")



  ## randomizaion of order 
  Is<-sample(N)
  X<-X[Is,]
  Y<-Y[Is]


  Nv<-round(N/CV)
  for (cv in 1:CV){
  set.seed(cv)
  Its<-((cv-1)*Nv+1):min(N,cv*Nv) ##sample(1:N,round(N/3)) ##((cv-1)*Nv+1):min(N,cv*Nv)
  Itr<-setdiff(1:N,Its)
  Xtr<-X[Itr,]
  Ytr<-Y[Itr]
  Xts<-X[Its,]
  wconst<-which(apply(Xtr,2,sd)<1e-2)
  if (length(wconst)>0){
  Xtr<-Xtr[,-wconst]
  Xts<-Xts[,-wconst]
  }
  n<-NCOL(Xtr)
  Yts<-Y[Its]

  subs.rank<-rankrho(Xtr,Ytr,nmax=min(n-5,1000))
  subs.mrmr<-subs.rank[mrmr(Xtr[,subs.rank],Ytr,nmax=NMAX)]
  subs.mimr<-subs.rank[mimr(Xtr[,subs.rank],Ytr,nmax=NMAX)]
  subs.wrap<-subs.rank[linearFsel(Xtr[,subs.rank],Ytr,nmax=NMAX)]


  subs.rf<-subs.rank[rfrank(Xtr[,subs.rank],factor(Ytr),nmax=NMAX,type=1)]
  subs.rf2<-subs.rank[rfrank(Xtr[,subs.rank],factor(Ytr),nmax=NMAX,type=2)]

  subs.rank<-subs.rank[1:NMAX]

  for (ns in (2:NMAX)){
    Ytrue<-c(Ytrue,as(Yts,"character"))
    Yhat.rank<-c(Yhat.rank,pred("rf",Xtr[,subs.rank[1:ns]], factor(Ytr),Xts[,subs.rank[1:ns]],class=TRUE)$prob[,"1"])     
    Yhat.mrmr<-c(Yhat.mrmr,pred("rf",Xtr[,subs.mrmr[1:ns]],factor(Ytr),  Xts[,subs.mrmr[1:ns]],class=TRUE)$prob[,"1"])
    Yhat.mimr<-c(Yhat.mimr,pred("rf",Xtr[,subs.mimr[1:ns]],factor(Ytr),   Xts[,subs.mimr[1:ns]],class=TRUE)$prob[,"1"])                 
    Yhat.rf<-c(Yhat.rf,pred("rf",Xtr[,subs.rf[1:ns]],factor(Ytr),   Xts[,subs.rf[1:ns]],class=TRUE)$prob[,"1"]) 
    Yhat.rf2<-c(Yhat.rf2,pred("rf",Xtr[,subs.rf2[1:ns]],factor(Ytr),   Xts[,subs.rf2[1:ns]],class=TRUE)$prob[,"1"])   
    Yhat.wrap<-c(Yhat.wrap,pred("rf",Xtr[,subs.wrap[1:ns]],factor(Ytr),   Xts[,subs.wrap[1:ns]],class=TRUE)$prob[,"1"]) 
  }

  auc.rank<-AUC(factor(Ytrue),Yhat.rank)
  auc.mrmr<-AUC(factor(Ytrue),Yhat.mrmr)
  auc.mimr<-AUC(factor(Ytrue),Yhat.mimr)
  auc.rf<-AUC(factor(Ytrue),Yhat.rf)
  auc.rf2<-AUC(factor(Ytrue),Yhat.rf2)
  auc.wrap<-AUC(factor(Ytrue),Yhat.wrap)


  cat("\n f=",author ,"N=",N,"n=",n,
  "cv=",cv,"AUC.rank=",auc.rank,"AUC.mrmr=",  auc.mrmr,"AUC.mimr=",   auc.mimr, "AUC.rf=",auc.rf,
  "AUC.rf2=",auc.rf,"AUC.wrap=",auc.wrap,"\n")


  } ## for cv





  }


gbonte/gbcode documentation built on Feb. 27, 2024, 7:38 a.m.