VHDClassification-package: Discrimination-Classification in very high dimension with...

Description Details Author(s) References Examples

Description

This package provides an implementation of Linear disciminant analysis and quadratic discriminant analysis that works fine in very high dimension (when there are many more variables than observations).

Details

Package: VHDClassification
Type: Package
Version: 0.1
Date: 2010-04-15
License: GPL-2
LazyLoad: yes
Depends: methods, e1071, lattice, stats

This package provides learning procedure for classification in very high dimension. Binary learning is done with learnBinaryRule while K-class (K>=2) learning is done with function learnPartitionWithLLR.

learnBinaryRule can return an object LinearRule-class or an object QuadraticRule-class depending whether type='linear' or 'quadratic'. learnPartitionWithLLR basically returns a set of binary rules which is represented by the class PartitionWithLLR-class. The used procedure for the learning are described in the papers cited below. The

The method predict (predict-methods) is implemented for class LinearRule-class QuadraticRule-class learnPartitionWithLLR. It predicts the class of a new observation.

Author(s)

Maintainer-author: Robin Girard <robin.girard@mines-paristech.fr>

References

Fast rate of convergence in high dimensional linear discriminant analysis. R. Girard To appear in Journal of Nonparametric Statistics.\ Very high dimensional discriminant analysis with thresholding estimation. R. Girard. Submitted.

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
############ Tests 2 classes when the true rule should be quadratic
#library(VHDClassification)
p=500; n=50 ; mu=array(0,c(p,2)) ; C=array(c(1,20),c(p,2)); C[c(1,3,5),1]=40
x=NULL; y=NULL;
for (k in 1:2)
{
  M=matrix(rnorm(p*n),nrow=p,ncol=n)
  tmp=array(C[,k]^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))
  x=rbind(x,t(tmp));
  y=c(y,array(k,n))
  }
#Learning
LearnedQuadBinaryRule=learnBinaryRule(x,y,type='quadratic')
LearnedLinBinaryRule=learnBinaryRule(x,y) # default is linear type
# for comparison with SVM
# require(e1071)
# svmRule=best.tune('svm',
#              train.x=x,
#              train.y=factor(y),
#              ranges=list(gamma=c(2^(-4:4),
#              cost = 2^(-2:2))))
# for comparison with randomForest
require(randomForest)
RF <- best.tune('randomForest',x,factor(y),ranges=list(ntree = c(100,500)))
# for comparison with nearest chrunken centroid
#require(pamr)
#myTrainingdata=list(x=t(x),y=y)
#mytrain <- pamr.train(myTrainingdata)
#mycv <- pamr.cv(mytrain,myTrainingdata)
#thresh=try(mycv$threshold[which.min(mycv$error)],silent = TRUE)




#Testing Set
x=NULL; y=NULL;
for (k in 1:2){
    M=matrix(rnorm(p*n),nrow=p,ncol=n)
    x=rbind(x,t(array(C[,k]^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
    y=c(y,array(k,n))    
}
#Testing
myTestingdata=list(x=x,y=y)
QDAScore=mean((y!=predict(LearnedQuadBinaryRule,myTestingdata$x))) ;
LDAScore=mean((y!=predict(LearnedLinBinaryRule,myTestingdata$x))) ;
RFScore=mean((y!=predict(RF,myTestingdata$x))) ;
#SVMScore=mean((y!=predict(svmRule,x))) ;
#comparison with nearest chrunken centroid
myTestingdata=list(x=t(x),y=y)
#V=as.numeric(pamr.predict(mytrain, myTestingdata$x,threshold=thresh,type="class"))
#SCScore=mean((myTestingdata$y!=V))
cat('\n')
cat('What does it cost to use type=linear  when the rule  is quadratic ? ','\n',
'Score of the linear rule: ',LDAScore,'\n',
'Score of the quadratic rule: ',QDAScore,'\n',
#'Score of the nearest shrunken centroid rule: ',SCScore,'\n',
'Score of the random forest rule: ',RFScore,'\n',
#'Score of the support vector machine rule: ',SVMScore,'\n',
'Note: These scores should be average for a large number of experiment or interpreted carefully \n')
plotClassifRule(LearnedQuadBinaryRule)

############ Tests 2 classes quadratic and linear. when the true is linear 
#library(VHDClassification)
#p=100; n=50 ; mu=array(0,c(p,2)); mu[1:10,1]=1 ;C=array(c(1,20),p)
#x=NULL; y=NULL;
#for (k in 1:2){
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
#	x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
#    y=c(y,array(k,n))}
#Learning
#LearnedQuadBinaryRule=learnBinaryRule(x,y,type='quadratic')
#LearnedLinBinaryRule=learnBinaryRule(x,y) # default is linear type
#comparison with nearest chrunken centroid
#require(pamr)
#myTrainingdata=list(x=t(x),y=y)
#mytrain <- pamr.train(myTrainingdata)
#mycv <- pamr.cv(mytrain,myTrainingdata)
#thresh=try(mycv$threshold[which.min(mycv$error)],silent = TRUE)


#Testing Set
#x=NULL; y=NULL;
#for (k in 1:2){
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
#    x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
#    y=c(y,array(k,n))    
#}
#Testing
#myTestingdata=list(x=x,y=y)
#QDAScore=mean((y!=predict(LearnedQuadBinaryRule,myTestingdata$x))) ;
#LDAScore=mean((y!=predict(LearnedLinBinaryRule,myTestingdata$x))) ;
#comparison with nearest shrunken centroid
#myTestingdata=list(x=t(x),y=y)
#tmp=as.numeric(pamr.predict(mytrain,threshold=thresh,
# myTestingdata$x,type="class"))
#SCScore=mean((myTestingdata$y!=tmp))
#cat('\n',
#'What does it cost to use type=
# quadratic rule when the true optimal rule is linear ? ','\n',
#'Score of the linear rule: ',LDAScore,'\n',
#'Score of the  rule with type=quadratic : ',QDAScore,'\n',
# 'it detects that the true rule is linear?\n',
#'Score of the nearest shrunken centroid rule: ',SCScore,'\n')

#plotClassifRule(LearnedQuadBinaryRule)

############ Tests 3 classes
#library(VHDClassification)
#p=1000; n=40 ; mu=array(0,c(p,3)); mu[1:10,1]=4; C=array(c(1,20),p)

#x=NULL; y=NULL;
#for (k in 1:3){
#    if (k<3){
#     M=matrix(rnorm(p*n),nrow=p,ncol=n)
#    x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
#    y=c(y,array(k,n))}
#    else
#    {
#    tildeC=C; tildeC[1:10]=40; 
#     M=matrix(rnorm(p*n),nrow=p,ncol=n)
#    x=rbind(x,t(array(tildeC^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
#    y=c(y,array(k,n))
#    }
#    }
#Learning
#LearnedLinearPartitionWithLLR=learnPartitionWithLLR(x,y,type='linear')
#LearnedQuadraticPartitionWithLLR=learnPartitionWithLLR(x,y,type='quadratic')
#plotClassifRule(LearnedQuadraticPartitionWithLLR)
#require(randomForest)
#RF <- best.tune('randomForest',x,factor(y),ranges=list(ntree = c(500)))

#Testing Set
#x=NULL; y=NULL;
#for (k in 1:3){
#    if (k<3){
#     M=matrix(rnorm(p*n),nrow=p,ncol=n)
#    x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
#    y=c(y,array(k,n))}
#    else
#    {
#    tildeC=C; tildeC[1:10]=40; 
#     M=matrix(rnorm(p*n),nrow=p,ncol=n)
#    x=rbind(x,t(array(tildeC^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
#    y=c(y,array(k,n))
#    }
#    }
#Testing
#myTestingdata=list(x=x,y=y)
#LDAScore=mean((y!=factor(predict(LearnedLinearPartitionWithLLR,myTestingdata$x)))) ;
#QDAScore=mean((y!=factor(predict(LearnedQuadraticPartitionWithLLR,myTestingdata$x)))) ;
#RFScore=mean((y!=predict(RF,myTestingdata$x))) ;

#cat('Score of the quadratic rule: ',QDAScore,'\n',
#'Score of the linear rule: ',LDAScore,'\n',
#'Score of the random Forest Rule: ',RFScore,'\n')

VHDClassification documentation built on May 2, 2019, 2:38 a.m.