R/PPtreeClass.R

Defines functions PPTreeclass

Documented in PPTreeclass

#' Construct the projection pursuit classification tree 
#' 
#' Find tree structure using various projection pursuit indices of 
#' classification in each split.
#' @title Projection pursuit classification tree 
#' @usage PPTreeclass(formula,data, PPmethod="LDA",weight=TRUE,
#'                      r=1,lambda=0.1,energy=0,maxiter=50000,...) 
#' @param formula an object of class "formula"
#' @param data data frame
#' @param PPmethod method for projection pursuit; "LDA", "PDA", "Lr", 
#'        "GINI", and "ENTROPY"
#' @param weight weight flag in LDA, PDA and Lr index
#' @param r r in Lr index
#' @param lambda lambda in PDA index
#' @param energy parameter for the probability to take new projection
#' @param maxiter maximum iteration number
#' @param ... arguments to be passed to methods
#' @return Tree.Struct tree structure of projection pursuit classification tree
#' @return projbest.node 1 dimensional optimal projections of each node split
#' @return splitCutoff.node cutoff values of each node split 
#' @return origclass original class 
#' @return origdata original data
#' @references Lee, YD, Cook, D., Park JW, and Lee, EK(2013) 
#' PPtree: Projection Pursuit Classification Tree, 
#' Electronic Journal of Statistics, 7:1369-1386.
#' @export
#' @keywords tree
#' @examples
#' data(iris)
#' Tree.result <- PPTreeclass(Species~.,data = iris,"LDA")
#' Tree.result
PPTreeclass<-function(formula,data,PPmethod="LDA",weight=TRUE,r=1,
                        lambda=0.1,energy=0,maxiter=50000,...){
   Call<-match.call()
   indx<-match(c("formula","data"),names(Call),nomatch=0L)
   if(indx[1]==0L) 
     stop("a 'formula' argument is required")
   temp<-Call[c(1L,indx)]
   temp[[1L]]<-quote(stats::model.frame)
   m<-eval.parent(temp)
   Terms<-attr(m,"terms")
   formula<-as.character(formula)
   class.n<-formula[2]
   data.n<-strsplit(formula[3]," \\+ ")[[1]]
   int.flag<-any(strsplit(formula[3]," \\* ")[[1]] == formula[3])
   if(data.n[1]=="."){
     tot.n<-class.n
   } else{
     tot.n<-c(class.n,data.n)
   }     
   if(!int.flag){
     stop("PPTreeclass cannot treat interaction terms")
   }else if(!sum(duplicated(c(colnames(data),tot.n))[-c(1:ncol(data))])==length(tot.n)){
     
   }else{
     origclass<-data[,class.n]
     if(data.n[1]=="."){
       origdata<-data[,colnames(data)!=class.n]
     }else {
       origdata<-data[,data.n,drop=FALSE]
     }
   }
   TOL<-NULL
   origdata<-as.matrix(origdata)
   Find.proj<-function(origclass,origdata,PPmethod,weight,r,lambda,
                       maxiter,...){
      origdata<-as.matrix(origdata)
      n<-nrow(origdata)
      p<-ncol(origdata)
      g<-table(origclass)
      g.name<-as.numeric(names(g))
      G<-length(g)
      origclass<-as.numeric(factor(origclass))
      if(PPmethod=="Lr"){
         indexbest<-Lrindex(origclass,as.matrix(origdata),weight=weight,r=r);
      } else if(PPmethod=="GINI"){
         indexbest<-0;
         for(i in 1:p){
            tempdata<-as.matrix(origdata[,i]);
            tempindex<-GINIindex1D(origclass,tempdata);  
            if(indexbest<tempindex)
               indexbest<-tempindex;
         }  
      } else if(PPmethod=="ENTROPY"){
         indexbest<-0;
         for(i in 1:p){
            tempdata<-as.matrix(origdata[,i]);
            tempindex<-ENTROPYindex1D(origclass,tempdata);  
            if(indexbest<tempindex)
               indexbest<-tempindex;
         }  
      } else{
        indexbest<-0;
      }
      energy<-ifelse(energy==0,1-indexbest,energy)
      energy.temp<-1-indexbest
      TOL<-energy.temp/1000000
  
      if(PPmethod=="LDA"){
         a<-LDAopt(as.numeric(as.factor(origclass)),origdata,weight,q=1,maxiter=maxiter)
      } else if(PPmethod=="PDA"){
         a<-PDAopt(as.numeric(as.factor(origclass)),origdata,weight,q=1,
                   lambda=lambda,maxiter=maxiter)   
      } else {    
         a<-PPopt(as.numeric(as.factor(origclass)),as.matrix(origdata),
                  weight,q=1,PPmethod=PPmethod,r=r,energy=energy,cooling=0.999,
                  TOL=TOL,maxiter=maxiter) 
      } 
      proj.data<-as.matrix(origdata)%*%a$projbest
      sign<-sign(a$projbest[abs(a$projbest)==max(abs(a$projbest))])
      if(p==1){
         index <- 1
      } else{
         index<-(1:p)*(abs(a$projbest)==max(abs(a$projbest)))
         index<-index[index>0]
      }
      if(G==2){
         class<-origclass
      } else{
         m<-tapply(c(proj.data),origclass,mean)
         sd<-tapply(c(proj.data),origclass,sd)
         sd.sort<-sort.list(sd)
         m.list<-sort.list(m)
         m.sort<-sort(m)
         m.name<-as.numeric(names(m.sort))
         G<-length(m)
         dist<-0
         split<-0
         for(i in 1:(G-1)){
            if(m[m.list[i+1]]-m[m.list[i]]>dist){
               split<-i
               dist<-m[m.list[i+1]]-m[m.list[i]]
            }
         }
         class<-rep(0,n)
         for(i in 1:split) 
            class<-class+(origclass==m.name[i])
         class<-2-class
         g<-table(class)
         g.name<-as.numeric(names(g))
         G<-length(g)
         n<-nrow(origdata)
         class<-as.numeric(factor(class))
         if(PPmethod=="Lr"){
            indexbest<-Lrindex(class,as.matrix(origdata),weight=weight,r=r);
         } else if(PPmethod=="GINI"){
            indexbest<-0;
            for(i in 1:p){
               tempdata<-origdata[,i];
               tempindex<-GINIindex1D(class,as.matrix(tempdata));  
               if(indexbest<tempindex)
                  indexbest<-tempindex;
            }  
         } else if(PPmethod=="ENTROPY"){
            indexbest<-0;
            for(i in 1:p){
               tempdata<-origdata[,i];
               tempindex<-ENTROPYindex1D(class,as.matrix(tempdata));  
               if(indexbest<tempindex)
                  indexbest<-tempindex;
            }  
         } else {
           indexbest<-0
         }
         energy<-ifelse(energy==0,1-indexbest,energy)
         energy.temp<-1-indexbest
         TOL<-energy.temp/1000000
         if(PPmethod=="LDA"){
            a<-LDAopt(as.numeric(as.factor(class)),as.matrix(origdata),
                      weight,q=1)
         } else if(PPmethod=="PDA"){
            a<-PDAopt(as.numeric(as.factor(class)),as.matrix(origdata),
                      weight,q=1,lambda=lambda)   
         } else { 
            a<-PPopt(as.numeric(as.factor(class)),as.matrix(origdata),
                     PPmethod=PPmethod,r=r,q=1,energy=energy,cooling=0.999,
                     TOL=TOL)      
         }  
         if(sign!=sign(a$projbest[index])) 
            a$projbest<--a$projbest
         proj.data<-as.matrix(origdata)%*%a$projbest
      }
 
      m.LR<-tapply(proj.data,class,mean)
      temp.list<-sort.list(m.LR)
      m.LR<-m.LR[temp.list]
      sd.LR<-tapply(proj.data,class,function(x) 
                                    ifelse(length(x)>1,stats::sd(x),0))[temp.list]
      IQR.LR<-tapply(proj.data,class,function(x) 
                                    ifelse(length(x)>1,stats::IQR(x),0))[temp.list]
      median.LR<-tapply(proj.data,class,stats::median)[temp.list]
      n.LR<-table(class)[temp.list]
        
      c1<-(m.LR[1]+m.LR[2])/2
      c2<-(m.LR[1]*n.LR[2]+m.LR[2]*n.LR[1])/sum(n.LR)
      c3<-ifelse(sum(sd.LR==0)!=0,c1,
                        (m.LR[1]*sd.LR[2]+m.LR[2]*sd.LR[1])/sum(sd.LR))
      c4<-ifelse(sum(sd.LR==0)!=0,c2,(m.LR[1]*sd.LR[2]/sqrt(n.LR[2])+ 
                        m.LR[2]*sd.LR[1]/sqrt(n.LR[1]))/
                        (sd.LR[1]/sqrt(n.LR[1])+sd.LR[2]/sqrt(n.LR[2])))
      c5<-(median.LR[1]+median.LR[2])/2
      c6<-(median.LR[1]*n.LR[2]+median.LR[2]*n.LR[1])/sum(n.LR)    
      c7<-ifelse(sum(IQR.LR==0)!=0,c5,(median.LR[1]*IQR.LR[2]+
                                         median.LR[2]*IQR.LR[1])/sum(IQR.LR))      
      c8<-ifelse(sum(IQR.LR==0)!=0,c6,(median.LR[1]*(IQR.LR[2]/sqrt(n.LR[2]))+ 
                           median.LR[2]*(IQR.LR[1]/sqrt(n.LR[1])))/
                     ((IQR.LR[1]/sqrt(n.LR[1]))+(IQR.LR[2]/sqrt(n.LR[2]))))
      sel.proj<-sort(proj.data[which(proj.data>stats::quantile(proj.data,prob=0.25)& 
                                     proj.data<stats::quantile(proj.data,prob=0.75))])
      C<-c(c1,c2,c3,c4,c5,c6,c7,c8)
      Index<-a$indexbest
      Alpha<-t(a$projbest)
      IOindexR<-NULL
      IOindexL<-NULL
      sort.LR<-as.numeric(names(sort(m.LR)))
      IOindexL<-class==sort.LR[1]
      IOindexR<-class==sort.LR[2]
      list(Index=Index,Alpha=Alpha,C=C,IOindexL=IOindexL,IOindexR = IOindexR)
   }
   
   Tree.construct<-function(origclass,origdata,Tree.Struct, 
          id,rep,rep1,rep2,projbest.node,splitCutoff.node,PPmethod,r = NULL, 
          lambda=NULL,TOL,maxiter=50000,...) {
      origclass<-as.integer(origclass)
      origdata<-as.matrix(origdata)
      n<-nrow(origdata)
      g<-table(origclass)
      G<-length(g)
      if(length(Tree.Struct)==0){
         Tree.Struct<-matrix(1:(2*G-1),ncol=1)
         Tree.Struct<-cbind(Tree.Struct,0,0,0,0)
      }
      if(G==1){
         Tree.Struct[id,3]<-as.numeric(names(g))
         list(Tree.Struct=Tree.Struct,projbest.node=projbest.node, 
                splitCutoff.node=splitCutoff.node,rep=rep,rep1=rep1,rep2=rep2)
      } else {
         Tree.Struct[id,2]<-rep1
         rep1<-rep1+1
         Tree.Struct[id,3]<-rep1
         rep1<-rep1+1
         Tree.Struct[id,4]<-rep2
         rep2<-rep2+1
         a<-Find.proj(origclass,origdata,PPmethod,weight,r,lambda,
                      maxiter)
         splitCutoff.node<-rbind(splitCutoff.node,a$C)
         Tree.Struct[id,5]<-a$Index
         projbest.node<-rbind(projbest.node,a$Alpha)
         t.class<-origclass
         t.data<-origdata
         t.class<-t.class*a$IOindexL
         t.n<-length(t.class[t.class==0])
         t.index<-sort.list(t.class)
         t.index<-sort(t.index[-(1:t.n)])
         t.class<-t.class[t.index]
         t.data<-origdata[t.index,]
         b<-Tree.construct(t.class,t.data,Tree.Struct, 
                           Tree.Struct[id, 2],rep,rep1,rep2,projbest.node, 
                           splitCutoff.node,PPmethod,r,lambda,TOL,maxiter,...)
         Tree.Struct<-b$Tree.Struct
         projbest.node<-b$projbest.node
         splitCutoff.node<-b$splitCutoff.node
         rep<-b$rep
         rep1<-b$rep1
         rep2<-b$rep2
         t.class<-origclass
         t.data<-origdata
         t.class<-(t.class*a$IOindexR)
         t.n<-length(t.class[t.class==0])
         t.index<-sort.list(t.class)
         t.index<-sort(t.index[-(1:t.n)])
         t.class<-t.class[t.index]
         t.data<-origdata[t.index,]
         n<-nrow(t.data)
         G<-length(table(t.class))
         b<-Tree.construct(t.class,t.data,Tree.Struct, 
                Tree.Struct[id,3],rep,rep1,rep2,projbest.node, 
                splitCutoff.node,PPmethod,r,lambda,TOL,maxiter,...)
         Tree.Struct<-b$Tree.Struct
         projbest.node<-b$projbest.node
         splitCutoff.node<-b$splitCutoff.node
         rep<-b$rep
         rep1<-b$rep1
         rep2<-b$rep2
      }
      list(Tree.Struct=Tree.Struct,projbest.node=projbest.node, 
            splitCutoff.node=splitCutoff.node,rep=rep,rep1=rep1,rep2=rep2)
   }
   splitCutoff.node<-NULL
   projbest.node<-NULL
   Tree.Struct<-NULL
   id<-1
   rep1<-2
   rep2<-1
   rep<-1
   Tree.final<-Tree.construct(origclass,origdata,Tree.Struct,id,rep,rep1, 
                              rep2,projbest.node, splitCutoff.node,
                              PPmethod,r,lambda,TOL,maxiter,...)                            
   Tree.Struct<-Tree.final$Tree.Struct
   colnames(Tree.Struct)<-c("id","L.node.ID","R.F.node.ID","Coef.ID","Index")
   projbest.node<-Tree.final$projbest.node
   splitCutoff.node<-Tree.final$splitCutoff.node
   colnames(splitCutoff.node)<-paste("Rule",1:8,sep="")
   treeobj<-list(Tree.Struct=Tree.Struct,projbest.node=projbest.node, 
                 splitCutoff.node=splitCutoff.node,origclass=origclass,
                 origdata=origdata,terms=Terms)
   class(treeobj)<-append(class(treeobj),"PPtreeclass")
   return(treeobj)
}

Try the PPtreeViz package in your browser

Any scripts or data that you put into this service are public.

PPtreeViz documentation built on Dec. 6, 2019, 9:07 a.m.