Description Details Author(s) References Examples
This package provides an implementation of Linear disciminant analysis and quadratic discriminant analysis that works fine in very high dimension (when there are many more variables than observations).
Package: | VHDClassification |
Type: | Package |
Version: | 0.1 |
Date: | 2010-04-15 |
License: | GPL-2 |
LazyLoad: | yes |
Depends: | methods, e1071, lattice, stats |
This package provides learning procedure for classification in very high dimension.
Binary learning is done with learnBinaryRule
while K-class (K>=2) learning is done
with function learnPartitionWithLLR
.
learnBinaryRule
can return an object LinearRule-class
or an object QuadraticRule-class
depending
whether type='linear' or 'quadratic'.
learnPartitionWithLLR
basically returns a set of binary rules which is represented by the class PartitionWithLLR-class
.
The used procedure for the learning are described in the papers cited below. The
The method predict (predict-methods) is implemented for class LinearRule-class
QuadraticRule-class
learnPartitionWithLLR
.
It predicts the class of a new observation.
Maintainer-author: Robin Girard <robin.girard@mines-paristech.fr>
Fast rate of convergence in high dimensional linear discriminant analysis. R. Girard To appear in Journal of Nonparametric Statistics.\ Very high dimensional discriminant analysis with thresholding estimation. R. Girard. Submitted.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 | ############ Tests 2 classes when the true rule should be quadratic
#library(VHDClassification)
p=500; n=50 ; mu=array(0,c(p,2)) ; C=array(c(1,20),c(p,2)); C[c(1,3,5),1]=40
x=NULL; y=NULL;
for (k in 1:2)
{
M=matrix(rnorm(p*n),nrow=p,ncol=n)
tmp=array(C[,k]^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))
x=rbind(x,t(tmp));
y=c(y,array(k,n))
}
#Learning
LearnedQuadBinaryRule=learnBinaryRule(x,y,type='quadratic')
LearnedLinBinaryRule=learnBinaryRule(x,y) # default is linear type
# for comparison with SVM
# require(e1071)
# svmRule=best.tune('svm',
# train.x=x,
# train.y=factor(y),
# ranges=list(gamma=c(2^(-4:4),
# cost = 2^(-2:2))))
# for comparison with randomForest
require(randomForest)
RF <- best.tune('randomForest',x,factor(y),ranges=list(ntree = c(100,500)))
# for comparison with nearest chrunken centroid
#require(pamr)
#myTrainingdata=list(x=t(x),y=y)
#mytrain <- pamr.train(myTrainingdata)
#mycv <- pamr.cv(mytrain,myTrainingdata)
#thresh=try(mycv$threshold[which.min(mycv$error)],silent = TRUE)
#Testing Set
x=NULL; y=NULL;
for (k in 1:2){
M=matrix(rnorm(p*n),nrow=p,ncol=n)
x=rbind(x,t(array(C[,k]^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
y=c(y,array(k,n))
}
#Testing
myTestingdata=list(x=x,y=y)
QDAScore=mean((y!=predict(LearnedQuadBinaryRule,myTestingdata$x))) ;
LDAScore=mean((y!=predict(LearnedLinBinaryRule,myTestingdata$x))) ;
RFScore=mean((y!=predict(RF,myTestingdata$x))) ;
#SVMScore=mean((y!=predict(svmRule,x))) ;
#comparison with nearest chrunken centroid
myTestingdata=list(x=t(x),y=y)
#V=as.numeric(pamr.predict(mytrain, myTestingdata$x,threshold=thresh,type="class"))
#SCScore=mean((myTestingdata$y!=V))
cat('\n')
cat('What does it cost to use type=linear when the rule is quadratic ? ','\n',
'Score of the linear rule: ',LDAScore,'\n',
'Score of the quadratic rule: ',QDAScore,'\n',
#'Score of the nearest shrunken centroid rule: ',SCScore,'\n',
'Score of the random forest rule: ',RFScore,'\n',
#'Score of the support vector machine rule: ',SVMScore,'\n',
'Note: These scores should be average for a large number of experiment or interpreted carefully \n')
plotClassifRule(LearnedQuadBinaryRule)
############ Tests 2 classes quadratic and linear. when the true is linear
#library(VHDClassification)
#p=100; n=50 ; mu=array(0,c(p,2)); mu[1:10,1]=1 ;C=array(c(1,20),p)
#x=NULL; y=NULL;
#for (k in 1:2){
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
# x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
# y=c(y,array(k,n))}
#Learning
#LearnedQuadBinaryRule=learnBinaryRule(x,y,type='quadratic')
#LearnedLinBinaryRule=learnBinaryRule(x,y) # default is linear type
#comparison with nearest chrunken centroid
#require(pamr)
#myTrainingdata=list(x=t(x),y=y)
#mytrain <- pamr.train(myTrainingdata)
#mycv <- pamr.cv(mytrain,myTrainingdata)
#thresh=try(mycv$threshold[which.min(mycv$error)],silent = TRUE)
#Testing Set
#x=NULL; y=NULL;
#for (k in 1:2){
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
# x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
# y=c(y,array(k,n))
#}
#Testing
#myTestingdata=list(x=x,y=y)
#QDAScore=mean((y!=predict(LearnedQuadBinaryRule,myTestingdata$x))) ;
#LDAScore=mean((y!=predict(LearnedLinBinaryRule,myTestingdata$x))) ;
#comparison with nearest shrunken centroid
#myTestingdata=list(x=t(x),y=y)
#tmp=as.numeric(pamr.predict(mytrain,threshold=thresh,
# myTestingdata$x,type="class"))
#SCScore=mean((myTestingdata$y!=tmp))
#cat('\n',
#'What does it cost to use type=
# quadratic rule when the true optimal rule is linear ? ','\n',
#'Score of the linear rule: ',LDAScore,'\n',
#'Score of the rule with type=quadratic : ',QDAScore,'\n',
# 'it detects that the true rule is linear?\n',
#'Score of the nearest shrunken centroid rule: ',SCScore,'\n')
#plotClassifRule(LearnedQuadBinaryRule)
############ Tests 3 classes
#library(VHDClassification)
#p=1000; n=40 ; mu=array(0,c(p,3)); mu[1:10,1]=4; C=array(c(1,20),p)
#x=NULL; y=NULL;
#for (k in 1:3){
# if (k<3){
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
# x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
# y=c(y,array(k,n))}
# else
# {
# tildeC=C; tildeC[1:10]=40;
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
# x=rbind(x,t(array(tildeC^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
# y=c(y,array(k,n))
# }
# }
#Learning
#LearnedLinearPartitionWithLLR=learnPartitionWithLLR(x,y,type='linear')
#LearnedQuadraticPartitionWithLLR=learnPartitionWithLLR(x,y,type='quadratic')
#plotClassifRule(LearnedQuadraticPartitionWithLLR)
#require(randomForest)
#RF <- best.tune('randomForest',x,factor(y),ranges=list(ntree = c(500)))
#Testing Set
#x=NULL; y=NULL;
#for (k in 1:3){
# if (k<3){
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
# x=rbind(x,t(array(C^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
# y=c(y,array(k,n))}
# else
# {
# tildeC=C; tildeC[1:10]=40;
# M=matrix(rnorm(p*n),nrow=p,ncol=n)
# x=rbind(x,t(array(tildeC^(1/2),c(p,n))*(M)+array(mu[,k],c(p,n))));
# y=c(y,array(k,n))
# }
# }
#Testing
#myTestingdata=list(x=x,y=y)
#LDAScore=mean((y!=factor(predict(LearnedLinearPartitionWithLLR,myTestingdata$x)))) ;
#QDAScore=mean((y!=factor(predict(LearnedQuadraticPartitionWithLLR,myTestingdata$x)))) ;
#RFScore=mean((y!=predict(RF,myTestingdata$x))) ;
#cat('Score of the quadratic rule: ',QDAScore,'\n',
#'Score of the linear rule: ',LDAScore,'\n',
#'Score of the random Forest Rule: ',RFScore,'\n')
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.