Nothing
# Author : F.Rohart, Australian Institute for Bioengineering and Nanotechnology, The University of Queensland, Brisbane, QLD
# created: 28-05-2014
# last modification: 10-10-2014
# Copyright (C) 2014
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
# random.subsampling(Y)
# prediction.formula(data,H,uloadings,vloadings,CH,means.X,means.Y,sigma.X,sigma.Y)
# pre.screening(X,coeff)
# random subsamplings
component.selection=function(object,alpha, showProgress=TRUE)
{
# distance: "max.dist", "centroids.dist", "mahalanobis.dist"
#object is a bootsPLS object
if(missing(object)) stop("`object' is missing")
if(missing(alpha)) alpha=0.01
if(!any(class(object)=="bootsPLS")) stop("problem class of object")
#summary of the classification accuracy over all the replications
MC=apply(object$ClassifResult,3,function(y){apply(y,3,function(x){sum(x)-sum(diag(x))})})
max=ncol(object$nbr.var) #number max of components included
pval=NULL
opt=1 #initialise the first optimal number of genes
for(j in 2:max)
{
pval[j]=t.test(MC[,opt],MC[,j],alternative="greater")$p.value #t.test of "is adding X genes improves the overall results"
if( (pval[j]< (alpha)))
{
opt=j #if the p-value is lower than 0.05, the optimal number of genes is updated
#cat("opt.temp ", opt,"\n") #print the temporary optimal number of genes for MC
}
}
#if(opt.MC==1)
#cat("opt.temp ", opt,"\n") #print the temporary optimal number of genes for MC
if(showProgress)
message("Number of chosen components: ",opt,"\n")
out=list(pval=pval,opt=opt,object=object,alpha=alpha)
structure(out,class="component.selection")
}
variable.selection=function(object,ncomp,alpha,limit, showProgress=TRUE)
{
#object:bootsPLS object
#ncomp: number of optimal component
#alpha: level of the tests
#limit: vector of maximal number of genes to include on each component
# distance: "max.dist", "centroids.dist", "mahalanobis.dist"
if(missing(object)) stop("`object' is missing")
if(missing(alpha)) alpha=0.01
if(missing(ncomp)) ncomp=ncol(object$nbr.var)
if(missing(limit)) limit=rep(ncol(object$frequency),ncomp)-1
method=object$data$dist
if(!any(class(object)=="bootsPLS")) stop("problem class of object")
#loop on the number of optimal component
subsamplings=list()
pval=signature=list()
opt=NULL
for(compi in 1:ncomp)
{
if(showProgress)
message("Component ",compi," under progress.")
variables=sort(object$frequency[compi,],decreasing=TRUE)
dup=duplicated(variables[limit[compi]:1])[limit[compi]:1]
first=which(!dup)[1] #give the first subset of genes. e.g 4genes have the same stability 1.00, first=4
subsamplings[[compi]]=matrix(NA,nrow=100,ncol=limit[compi])
for(j in (first):limit[compi])
{
if(!dup[j])
{
if(showProgress)
cat("Calculating subsamplings for the top",j,"variables on component", compi,"\n")
out=CI.prediction(X=object$data$X,Y=object$data$Y,signature=c(signature,list(names(variables)[1:j])),ncomp=compi)
# match distance and output in out
ind.method=which(dimnames(out$ClassifResult)[[5]]==method)
subsamplings[[compi]][,j]=apply(out$ClassifResult[,,compi,,ind.method],3,function(x){sum(x)-sum(diag(x))})
}
}
colnames(subsamplings[[compi]])=names(variables)[1:limit[compi]]
pval[[compi]]=numeric()
opt.temp=first #initialise the first optimal number of genes
#cat("comp.",compi," opt.temp ", opt.temp.MC,"\n") #print the temporary optimal number of genes for MC
for(j in (first+1):limit[compi])
{
if(!dup[j])
{
temp = try(t.test(subsamplings[[compi]][,opt.temp],subsamplings[[compi]][,j],alternative="greater")$p.value, silent=T) #t.test of "is adding X genes improves the overall results"
if(any(class(temp) == "try-error") || is.na(temp)) # temp can be NaN when error.keepX is constant
{
pval[[compi]][j] = 1
} else {
pval[[compi]][j] = temp
}
if( pval[[compi]][j]< (alpha) )
{
opt.temp=j #if the p-value is lower than alpha, the optimal number of genes is updated
#cat("comp.",compi," opt.temp.MC ", opt.temp.MC,"\n") #print the temporary optimal number of genes for MC
}
}
}
opt[compi]=opt.temp
signature[[compi]]=names(variables)[1:opt.temp]
if(showProgress)
message("\nNumber of chosen variables on component ", compi,": ",opt.temp,"\n")
}
names(opt)=paste("comp.",1:ncomp,sep="")
names(pval)=paste("comp.",1:ncomp,sep="")
names(subsamplings)=paste("comp.",1:ncomp,sep="")
names(signature)=paste("comp.",1:ncomp,sep="")
out=list(pval=pval,opt=opt,signature=signature,subsamplings=subsamplings,object=object,alpha=alpha)
structure(out,class="variable.selection")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.