if(require(MASS)==FALSE){
install.packages("MASS")
library(MASS)
}
if(require(glmnet)==FALSE){
install.packages("glmnet")
library(glmnet)
}
if(require(plyr)==FALSE){
install.packages("plyr")
library(plyr)
}
if(require(inline)==FALSE){
install.packages("inline")
library(inline)
}
if(require(Rcpp)==FALSE){
install.packages("Rcpp")
library(Rcpp)
}
if(require(RcppArmadillo)==FALSE){
install.packages("RcppArmadillo")
library(RcppArmadillo)
}
if(require(devtools)==FALSE){
install.packages("devtools")
library(devtools)
}
if(require(crayon)==FALSE){
install.packages("crayon")
library(crayon)
}
#.sourceCpp_1_DLLInfo` <- dyn.load('../cs.so')
#PALM <- Rcpp:::sourceCppFunction(function(w0_r, X1_r, X2_r, t1_r, lambda1, lambda2 = 0, alpha = 1, te = 0.0001, maxtimes = 0L, palmquiet = TRUE) {}, FALSE, `.sourceCpp_1_DLLInfo`, 'sourceCpp_1_PALM')
#iPALM <- Rcpp:::sourceCppFunction(function(w0_r, X1_r, X2_r, t1_r, lambda1, lambda2 = 0, alpha = 1, te = 0.0001, maxtimes = 500L, alpha_1 = 0.2, beta_1 = 0.8, palmquiet = TRUE) {}, FALSE, `.sourceCpp_1_DLLInfo`, 'sourceCpp_1_iPALM')
sourceCpp("testarcpp.cpp")
#' Addvec1
#'
#' This function allows you to add constant 1s to the features so the intercept of the model could be absorded.
#' @param X data matrix with nrow the number of instances and ncol the number of features.
#' @keywords add a constant feature 1 for any instances to treat intercept or bias in a uniform way with all other features
#' @export
#' @examples
#' Addvec1(X)
Addvec1<-function(X){
return (cbind(rep(1,nrow(X)),X))
}
#' Hentr
#'
#' This function allows you to compute entropy value of a bernoullie random variable.
#' @param x a vector of probabilities between 0 and 1.0 or 1 will give 0 output.Values outside the range will generate errors.
#' @keywords entropy
#' @export
#' @examples
#' Hentr(X)
Hentr<-function(x){
y=-x*log(x)-(1-x)*log(1-x)
y[is.nan(y)==TRUE]=0
return (y)
}
#' Signoid
#'
#' This function allows you to compute sigmoid function value of a real number vectorwisely.Input of positive infinity will return 1
#' and negative infinity 0
#' @param x vector.
#' @keywords sigmoid
#' @export
#' @examples
#' Signoid(x)
Signoid<-function(x){
return (1/(1+exp(-x)))
}
#' Lexp
#'
#'This function allows you to compute log sigmoid function value of a real number vectorwisely.
#'Input of positive infinity will return 0
#' @param x vector.
#' @keywords log-sigmoid
#' @export
#' @examples
#' Lexp(x)
Lexp<-function(x){
return (log(1+exp(-x)))
}
#' F
#'
#' This function allows you to compute the value of the objective function of EELR.
#' @param w coefficient vector
#' @param X1 labeled data matrix(each row representing features for an instance).
#' @param X2 unlabeled data matrix(each row representing features for an instance).NA for supervised learning
#' @param t1 instance labels of labeled data, only taking -1 and 1.
#' @param lambda1 regularization parameter for naive elasticnet penalty.
#' @param lambda2 regularization parameter for unlabled data contribution.
#' @param alpha regularization parameter for tradeoff between l1 and l2 penalty.
#' @keywords Objective function (with penalty) value
#' @export
#' @examples
#' F(w,X1,X2=NA,t1,lambda1=0,lambda2=0,alpha)
F<-function(w,X1,X2=NA,t1,lambda1=0,lambda2=0,alpha){
if(is.na(X2[1])==FALSE){
return (sum(Lexp(t1*(X1%*%w)))
+ lambda1*( (1-alpha)/2*sum(w^2) + alpha*sum(abs(w)))
+ lambda2*sum(Hentr(Signoid(X2%*%w))))
}else{
return (sum(laply(t1*(X1%*%w),Lexp))
+lambda1*( (1-alpha)/2*sum(w^2) + alpha*sum(abs(w))))
}
}
#' GetPredict
#'
#'This function allows you to computing predicted value by inner product of w coefficients and data matrix.1,-1
#'will be the typical label output.0 can also be the output if the prediction ties between two classes.
#' @param w0.coefficients vector in the model
#' @param Xt.data matrix
#' @keywords computing predicted value from w coefficients and data matrix
#' @export
#' @examples
#' GetPredict(w,Xt)
GetPredict<-function(w,Xt){
return (sign(sign(Xt%*%w)+.5))
}
#' MissErr
#'
#'This function allows you to compute classification error between predicted labels and known labels(with 0-1 loss).Note that
#'the predicted labels can take 0 and the loss is 0.5.
#' @param tpred prediction label vector(taking 1 and -1).
#' @param t known data label vector.
#' @keywords log-sigmoid
#' @export
#' @examples
#' MissErr(tpred,t)
MissErr<-function(tpred,t){
if(length(tpred)!=length(t))stop(paste0("lengths are different in MissErr with first",length(tpred),"second",length(t)))
return (sum(abs(matrix(tpred,ncol=1)-matrix(t,ncol=1))/(2*length(t))) )
}
#' getlambda1seq
#'
#'This function allows you to computing sequence of lambda1 for cross validation by first computing a maximum value and then
#'generate an exponentially arithmetic dropping sequence.
#' @param w0. as in PALM
#' @param X1. as in PALM
#' @param X2. as in PALM
#' @param t1. as in PALM
#' @param te. as in PALM
#' @param lambda2.
#' @param sparsete.tolerance for searching maximum lambda1, when the l-1 norm of the parameter
#' vector is below this threshold or lamdda1 = 10^(6) the maximum lambda1 is reached
#' @param lambda1.min.ratio.ratio of min and max lambda1 searched
#' @param nlambda1.number of lambda1 searched
#' @keywords sequence of lambda1 in the grid search
#' @export
#' @examples
#' getlambda1seq(w0,X1,X2,t1,lambda2,
#' sparsete,lambda1.min.ratio,nlambda1,te=te,wn=FALSE)
getlambda1seq<-function(w0,X1,X2,t1,lambda2,
sparsete,lambda1.min.ratio,nlambda1,te=te,wn=FALSE){
for(i in 4:6){
tempresult<-PALM(w0,X1,X2,t1,lambda1=10^(i/2),lambda2=lambda2,alpha=1,te=te)
if(mean(abs(tempresult))<sparsete||i==6){
lambda1max =10^(i/2)
break
}
}
if(lambda1max ==10^(i/2)&&wn==TRUE)warning("lamdba1 reaches the maximum!")
return (exp(seq(from=log(lambda1max),
to=log(lambda1max*lambda1.min.ratio),length=nlambda1)))
}
#' lambda2tr
#'
#'This function generates the exponentially rising sequence of lambda2 in the grid search.
#' @param k.transformed to be as.numeric(k!=0)*2^(k/2-3.5)
#' @keywords exponentially arithmetic increasing sequence
#' @export
#' @examples
#' lambda2tr(k)
lambda2tr<-function(k){
return ( as.numeric(k!=0)*2^(k/2-3.5) )
}
#' writesvmdata2
#'
#'This function writes txt data for software svmlight and svmlin.
#' @param X.data matrix
#' @param t.label
#' @param filename1.name for files written for software svmlight
#' @param filename2.name for files written for software svmlin
#' @param svm_light_dir.directory for svm_light
#' @param svmlin_dir. directory for svm_lin
#' @keywords write txt data for software svmlight and svmlin
#' @export
#' @examples
#' writesvmdata2(X,t,filename1,filename2,svm_light_dir,svmlin_dir)
#filename1 for light filename2 for lin
#filename1 for light filename2 for lin
writesvmdata2<-function(X,t,filename1,filename2,svm_light_dir,svmlin_dir){
twd=getwd()
if(NA%in%X | nrow(X) == 0 | ncol(X) == 0)stop("invalid input for makesvmdata")
nr = nrow(X)
nc = ncol(X)
#t3=proc.time()
outmatrix = matrix(nrow=nr,ncol=4*nc)
outmatrix[,seq(2,4*nc,by=4)]=':'
outmatrix[,seq(4,4*nc,by=4)]=' '
outmatrix[,seq(1,4*nc,by=4)]=matrix(1:nc,nrow=nr,ncol=nc,byrow=TRUE)
outmatrix[,seq(3,4*nc,by=4)]=X
#t4=t4 + proc.time()-t3
if(length(t)!=nr){
print(nr)
print("\n")
print(length(t))
print("\n")
print(X)
print("\n")
print(t)
stop("length of t must be equal to nrow(X)!")
}
#t3=proc.time()
setwd(svmlin_dir)
write.table(outmatrix,file=filename2,sep='',quote=FALSE,row.names=FALSE,col.names=FALSE )
setwd(twd)
setwd(svm_light_dir)
outmatrix=cbind(t,rep(' ',length(t)),outmatrix)
write.table(outmatrix,file=filename1,sep='',quote=FALSE,row.names=FALSE,col.names=FALSE )
setwd(twd)
#t5=t5 + proc.time()-t3
}
#' writetxt2
#'
#'This function generate and then write data for software svmlight and svmlin..
#' @param jobname.the same of in testsepa
#' @param X1. the same as in testsepa.
#' @keywords write txt data for software svmlight and svmlin
#' @export
#' @examples
#' writetxt2(jobname,X1,X2,X3,t1,t3,
#' svm_light_dir,svmlin_dir)
writetxt2<-function(jobname,X1,X2,X3,t1,t3,
svm_light_dir,svmlin_dir){
n1=nrow(X1)
n2=nrow(X2)
twd=getwd()
n3=length(t3)
if(length(t1)!=n1|length(t3)!=nrow(X3))stop("inconsistent input with X1,t1 or X3,t3 in writetxt")
setwd(svm_light_dir)
writesvmdata2( rbind(X1[,-1],X2[,-1]) ,filename1=(paste0(jobname,"train.txt")) ,
filename2=paste0(jobname,"training_examples") ,t=c(t1, rep(0,n2)),
svm_light_dir=svm_light_dir,svmlin_dir=svmlin_dir )
writesvmdata2( X3[,-1], filename1=paste0(jobname,"test.txt"), filename2=paste0(jobname,"test_examples"), t=rep(0,n3),
svm_light_dir=svm_light_dir,svmlin_dir=svmlin_dir )
writesvmdata2( X2[,-1], filename1=paste0(jobname,"trainunlabel.txt"),
filename2=paste0(jobname,"trainingunlabel_examples"), t=rep(0,n2),
svm_light_dir=svm_light_dir,svmlin_dir=svmlin_dir )
setwd(svmlin_dir)
write.table(matrix( c(t1,rep(0,n2)),ncol=1 ), file=paste0(jobname,"training_labels"),row.names=FALSE,col.names=FALSE )
setwd(twd)
}
#' svmtran
#'
#'This function efficiently utilize saved results for doing resductive learning with svm software.
#' @param jobname.See parameters of svmtest
#' @param t3. label of test data set
#' @keywords transductive learning
#' @export
#' @examples
#' svmtran(jobname=jobname,t3,svmlight,svm_light_dir,svmlin,
#' svmlin.mode,svmlin_dir,
#' twd)
svmtran<-function(jobname=jobname,t3,svmlight,svm_light_dir,svmlin,
svmlin.mode,svmlin_dir,
twd){
svm_light_e<-NA
svmlin_l2_e<-NA
svmlin_DA_e<-NA
if(svmlight==TRUE){
setwd(twd)
setwd(svm_light_dir)
#system(paste0("cd ",svm_light_dir))
#system(paste0( "./svm_learn -b 1 -c ", svmlightC, " ",jobname,"train.txt ",jobname,"model.txt") )
# no biase since it is included in the data
system(paste0("./svm_classify ",jobname,"trainunlabel.txt ",jobname,"model.txt ",jobname,"unlabelprediction.txt"))
svm_light_pred<-read.table(file=paste0(jobname,"unlabelprediction.txt"))
# print(svm_light_pred)
svm_light_e<-MissErr(sign(svm_light_pred[,1]),t3)
print("svmlight miser:")
print(svm_light_e)
}
if(svmlin==TRUE){
setwd(twd)
setwd(svmlin_dir)###note that the training_examples contain labeled data soin transductive learning modethe new training
###_example file is required to be created to contain only the unlabeled data.
if("l2hatloss"%in%svmlin.mode){
#system(paste0("./svmlin -A ",2," -W 1 -U 1 -R 0.5 training_examples training_labels"))
#system(paste0("./svmlin -A ",2," -W ",svmlinWv[1]/n1," -U ",svmlinUv[1]*n2/n1," -R 0.5 ",jobname,"training_examples ",jobname,"training_labels"))
system(paste0("./svmlin -f ",jobname,"training_examples_ms.weights ",jobname,"trainingunlabel_examples "))
svmlin_pred<-read.table(file=paste0(jobname,"trainingunlabel_examples.outputs"))
svmlin_l2_e<-MissErr(sign(svmlin_pred[,1]),t3)
print("svmlin l2hatloss miser:")
print(svmlin_l2_e)
}
if("DA"%in%svmlin.mode){
#system(paste0("./svmlin -A ",3," -W 1 -U 1 -R 0.5 training_examples training_labels"))
#system(paste0("./svmlin -A ",3," -W ",svmlinWv[2]/n1," -U ",svmlinUv[2]*n2/n1," -R 0.5 ",jobname,"training_examples ",jobname,"training_labels"))
system(paste0("./svmlin -f ",jobname,"training_examples_da.weights ",jobname,"trainingunlabel_examples "))
svmlin_pred<-read.table(file=paste0(jobname,"trainingunlabel_examples.outputs"))
svmlin_DA_e<-MissErr(sign(svmlin_pred[,1]),t3)
print("svmlin DA miser:")
print(svmlin_DA_e)
}
}
setwd(twd)
return ( c(svm_light_e,svmlin_l2_e,svmlin_DA_e) )
}
#' svmtest
#'
#'This function does classification with svmlight and svmlin.
#' @param jobname.the same as parameter jobname for testsepa
#' @param X1. X1 and the remaining parameters are the same as in testsepa
#' @param svmlightC. in svmlight,the parameter for svmlight controlling tradeoff between l2norm and margin
#' @param svmlinWv.default:c(1,1) in svmlin, the parameter controlling trade off between l2norm
#' @param svmlinUv.default:c(1,1)
#' @param svmlin.mode. "l2hatloss" quandratic loss for labeled data with multiple switch for transductive learning,
#' "DA" global optimization method with deterministic annealing
#' @param cpsvmlin. a parameter to improve efficiency
#' @keywords svmlight svmlin
#' @export
#' @examples
#' svmtest(jobname,X1,X2,X3,t1,t3,writeout,svmlight,svm_light_dir,svmlin,
#' svmlin.mode,svmlin_dir,
#' twd,svmlightC,svmlinWv=c(1,1),svmlinUv=c(1,1),cpsvmlin=FALSE)
#'
#'
#'
svmtest<-function(jobname,X1,X2,X3,t1,t3,writeout,svmlight,svm_light_dir,svmlin,
svmlin.mode,svmlin_dir,
twd,svmlightC,svmlinWv=c(1,1),svmlinUv=c(1,1),cpsvmlin=FALSE){
svm_light_e<-NA
svmlin_l2_e<-NA
svmlin_DA_e<-NA
if(writeout==TRUE){
writetxt2(jobname=jobname,X1,X2,X3,t1,t3,svm_light_dir=svm_light_dir,svmlin_dir=svmlin_dir)
}
n1=nrow(X1)
n2=ncol(X2)
if(svmlight==TRUE){
setwd(twd)
setwd(svm_light_dir)
#system(paste0("cd ",svm_light_dir))
system(paste0( "./svm_learn -b 1 -c ", 1/svmlightC, " ",jobname,"train.txt ",jobname,"model.txt") )
# no biase since it is included in the data
system(paste0("./svm_classify ",jobname,"test.txt ",jobname,"model.txt ",jobname,"prediction.txt"))
svm_light_pred<-read.table(file=paste0(jobname,"prediction.txt"))
# print(svm_light_pred)
svm_light_e<-MissErr(sign(svm_light_pred[,1]),t3)
print("svmlight miser:")
print(svm_light_e)
}
if(svmlin==TRUE){
setwd(twd)
setwd(svmlin_dir)
if("l2hatloss"%in%svmlin.mode){
#system(paste0("./svmlin -A ",2," -W 1 -U 1 -R 0.5 training_examples training_labels"))
system(paste0("./svmlin -A ",2," -W ",svmlinWv[1]/n1," -U ",svmlinUv[1]*n2/n1," -R 0.5 ",jobname,"training_examples ",jobname,"training_labels"))
system(paste0("./svmlin -f ",jobname,"training_examples.weights ",jobname,"test_examples "))
svmlin_pred<-read.table(file=paste0(jobname,"test_examples.outputs"))
svmlin_l2_e<-MissErr(sign(svmlin_pred[,1]),t3)
print("svmlin l2hatloss miser:")
print(svmlin_l2_e)
if(cpsvmlin==TRUE){# cp training_example.weights to store coefficients
system(paste0("cp ",jobname,"training_examples.weights ",jobname,"training_examples_ms.weights"))
}
}
if("DA"%in%svmlin.mode){
#system(paste0("./svmlin -A ",3," -W 1 -U 1 -R 0.5 training_examples training_labels"))
system(paste0("./svmlin -A ",3," -W ",svmlinWv[2]/n1," -U ",svmlinUv[2]*n2/n1," -R 0.5 ",jobname,"training_examples ",jobname,"training_labels"))
system(paste0("./svmlin -f ",jobname,"training_examples.weights ",jobname,"test_examples "))
svmlin_pred<-read.table(file=paste0(jobname,"test_examples.outputs"))
svmlin_DA_e<-MissErr(sign(svmlin_pred[,1]),t3)
print("svmlin DA miser:")
print(svmlin_DA_e)
if(cpsvmlin==TRUE){
system(paste0("cp ",jobname,"training_examples.weights ",jobname,"training_examples_da.weights"))
}
}
}
setwd(twd)
return ( c(svm_light_e,svmlin_l2_e,svmlin_DA_e) )
}
#' testsepa
#'
#'This function does inductive and transductive learning performance comparison with supervised learning by
#'glmnet, semi-sueprvsied learnin svmlight and svmlin for simulated dataset
#'or assigned dataset or 3 microarray dataset provided.It returns a list error1 containing the classification
#'error rates in the first element and the coefficients learned in the next elements.if transductive
#'=TRUE,error1[[1]] contains CER of RSLR(tr),RSLR(in),glmnet(tr),glmnet(in),svmlight(tr),
#'svmlight(in),svmlin_ms(tr),svmlin_ms(in),svmlin_da(tr),svmlin_da(in) where "tr" is short for
#'transductive learning mode and "in" inductive learning mode.If transductive=FALSE,only inductive
#'mode CER are in error1[[1]].error[[2]] ~ error[[7]] contains the coefficients learned by the
#'5 methods in the order mentioned above.the lambda1 parameter is for the elastic net penalty.
#'And the lambda2 parameter is for the entropy regularizer for unlabeleda data.Empirically,
#'lambda2 can be fixed to 1 by setting lambda2=1,and crv = FALSE.
#'If svmlight(http://svmlight.joachims.org/) or svmlin(http://vikas.sindhwani.org/svmlin.html)
#' method is used for making comparisons of methods,please install them in the parent directory
#' of current working directory and assign the path by parameter svm_light_dir and svmlin_dir
#'
#' @param jobname.Name of this simulation job.It must be assigned if import is 0.default:1.
#' @param import. 0 in simulation mode, or 1 if using given data set with dataset
#' =1(for leukemia data set),2(for colon data set) or 3(for prostate data set), or 1 with dataset != 1,2 or 3 if using assigned data
#' @param X1. labeled data matrix
#' @param X2. unlabeled data matrix
#' @param X3. test data matrix
#' @param t1. labeled data set labels
#'@param t2. unlabeled data set labels
#'@param t3. test data set labels
#' @param dim. dimensions for simulation(import=1)
#' @param w0. initial value of the w coefficients, deault zero vector
#'@param standardize. TRUE to scale the features, except for the later added constant feature of 1 in simulation mode
#'@param n1. labeled data size in simulation mode
#'@param n2. unlabeled data size in simulation mode
#'@param n3. test data size in simulation mode
#'@param pnr. ratio of sizes of the two classes generated in simulation mode
#'@param transductive. TRUE to compute both inductive and transductive learning classification error rates;FALSE only to compute
#'inductive learning classification error rates
#'@param lambda1. assigned value of lambda1 in simulation mode with crv = FALSE
#'@param lambda2. assigned value of lambda2 in simulation mode with crv = FALSE
#'@param alpha. assigned value of alpha in simulation mode with crv = FALSE
#'@param seed. seed for set.seed() to make repreducible results
#'@param te. tolerance for converging criteria ,in simulation experiment 0.001 is typically for cross validation and 0.0001 for
#'training on the whole data set
#'@param sa. parameter mu to control distance of the two classes mean in simulation mode
#'@param psparse. number of effective or nonzero coefficients in w in simulation mode
#'@param crv. TRUE to do 5fold cross validation to select the lambda1,lambda2 and alpha; FALSE to use assigned values
#'@param type.measure. "c" currently only 0-1 loss classification error implemented
#'@param lambda1.min.ratio. parameter for function getlambda1seq
#'@param nlambda1. number of lambda1 searched in a sequence
#'@param lambda1seq. default= NA to generate by the program, or to use assigned sequence of lambda1 to search by cross validation
#'@param writeout. parameter for function svmtest
#'@param svmlight. TRUE to turn svmlight on
#'@param svm_light_dir. path of directory of svm_light, parameter for svmtest and svmtran
#'@param svmlin. TRUE to turn svmlin on
#'@param svmlin_dir. path of directory of svmlin-v1.0, parameter for svmtest and svmtran
#'@param svmlin.mode. parameter of function svmtest and function svmtran
#'@param sparsete. parameter for function getlambda1seq,see help file for getlambda1seq for details
#' @keywords the function for simulation(import=0), test on microarray dataset(import=1 and dataset
#' =1 , 2 or 3) or user assigned data(import = 1 and dataset != 1,2 or 3)
#' @export
#' @examples
#' testsepa(
#' jobname=1,import=0,X=NULL,X1=NULL,X2=NULL,X3=NULL,t1=NULL,t2=NULL,t3=NULL,dim=NULL,w0=NULL,standardize=FALSE,
#' n1,n2,n3,pnr=.5,transductive=FALSE,lambda1,lambda2,alpha,
#' seed=0,te,
#' sa,psparse=.1
#' ,crv=FALSE,type.measure="c",lambda1.min.ratio=.0001,nlambda1=20,lambda1seq=NA,
#' writeout=FALSE,svmlight=FALSE,svm_light_dir,svmlin=FALSE,
#' svmlin.mode=NULL,svmlin_dir,
#' sparsete=.001,palmquiet=TRUE,nlambda2=8,rho=0.4,testbratio=0.5,dataset=NULL)
testsepa<-function(jobname=1,import=0,X1=NULL,X2=NULL,X3=NULL,t1=NULL,t2=NULL,t3=NULL,dim=NULL,w0=NULL,standardize=FALSE,
n1,n2,n3,Total,pnr=.5,transductive=FALSE,lambda1,lambda2,alpha,
seed=0,te,
sa,psparse=20
,crv=FALSE,type.measure="c",lambda1.min.ratio=.0001,nlambda1=20,lambda1seq=NA,
writeout=FALSE,svmlight=FALSE,svm_light_dir,svmlin=FALSE,
svmlin.mode=NULL,svmlin_dir,
sparsete=.001,palmquiet=TRUE,nlambda2=8,rho=0.5,testbratio=0.5,dataset=NULL)
{
twd=getwd()
set.seed(seed)
finalresult=vector("list",14)#[[1]]for cer others for storing coefficients of the methods
if(import==0){
print("Doing simulation")
Total=n1+n2+n3
Beta<-rep(0,dim)
#Beta[sample(c(2:(dim+1)),psparse*dim,replace=FALSE)]=c(1+runif(psparse*dim/2),-1-runif(psparse*dim/2))
# Beta[c( (1+dim*0.3-psparse/2):(dim*0.3),(dim*0.7+1):(dim*0.7+psparse/2))]=c(4+runif(psparse/2),-4-runif(psparse/2))
Beta[c( (1+dim*0.3-psparse/2):(dim*0.3),(dim*0.7+1):(dim*0.7+psparse/2))]=c(rep(4,psparse/2),rep(-4,psparse/2))
finalresult[[7]]=Beta
Edcrm=rho^( abs( matrix(1:dim,nrow=dim,ncol=dim,byrow=TRUE) - matrix(1:dim,nrow=dim,ncol=dim,byrow=FALSE) ))
X<-mvrnorm(n=n1*pnr,mu=-sa*Beta,Sigma=Edcrm)
X<-rbind(X,mvrnorm(n=n1*(1-pnr),mu=sa*Beta,Sigma=Edcrm))
if(n2>0){
X<-rbind(X,mvrnorm(n=n2*pnr+(0.5-testbratio)*n2,mu=-sa*Beta,Sigma=Edcrm))
X<-rbind(X,mvrnorm(n=n2*(1-pnr)-(0.5-testbratio)*n2,mu=sa*Beta,Sigma=Edcrm))
}
if(n3>0){
X<-rbind(X,mvrnorm(n=n3*pnr,mu=-sa*Beta,Sigma=Edcrm))
X<-rbind(X,mvrnorm(n=n3*(1-pnr),mu=sa*Beta,Sigma=Edcrm))
}
X1<-X[1:n1,]
#t<-2*rbinom(Total,1,1/(1+exp(-X%*%Beta)))-1
#generate t according to gaussian distribution
t=c(rep(1,n1*pnr),rep(-1,n1*(1-pnr)),rep(1,n2*pnr+(0.5-testbratio)*n2),
rep(-1,n2*(1-pnr)-(0.5-testbratio)*n2),
rep(1,n3*pnr),rep(-1,n3*(1-pnr)))
t1<-t[1:n1]
if(n2>0){
X2<-X[(n1+1):(n1+n2),]
t2<-t[(n1+1):(n1+n2)]
X3=X[(n1+n2+1):Total,]
t3=t[(n1+n2+1):Total]
}
}else{
if(dataset==1){
load("leukemia.rda")
ylen=length(ly.original)
ss=sample(1:ylen)
tset=ss[61:ylen]
uset=ss[27:60]
lset=ss[1:26]
X1=lx.original[lset,]
t1=ly.original[lset]
X2=lx.original[uset,]
t2=ly.original[uset]
X3=lx.original[tset,]
t3=ly.original[tset]
t1=2*t1-1##turn 0 to -1
t3=2*t3-1
t2=2*t2-1
n1=nrow(X1)
n2=nrow(X2)
dim=ncol(X1)
}
if(dataset==2){
load("colon.rda")
ylen=length(colon.y)
ss=sample(1:ylen)
tset=ss[53:ylen]
uset=ss[27:52]
lset=ss[1:26]
X1=colon.x[lset,]
t1=colon.y[lset]
X2=colon.x[uset,]
t2=colon.y[uset]
X3=colon.x[tset,]
t3=colon.y[tset]
t1=2*t1-1
t3=2*t3-1
t2=2*t2-1
n1=nrow(X1)
n2=nrow(X2)
dim=ncol(X1)
}
if(dataset==3){
load("prostate.rda")
ylen=length(prostate.y)
ss=sample(1:ylen)
tset=ss[79:ylen]
uset=ss[27:78]
lset=ss[1:26]
X1=prostate.x[lset,]
t1=prostate.y[lset]
X2=prostate.x[uset,]
t2=prostate.y[uset]
X3=prostate.x[tset,]
t3=prostate.y[tset]
t1=2*t1-1
t3=2*t3-1
t2=2*t2-1
n1=nrow(X1)
n2=nrow(X2)
dim=ncol(X1)
}
}
if(standardize==TRUE){######this scale is before adding the w0
X1=scale(X1)
X2=scale(X2)
X3=scale(X3)
}
# finalresult[[8]]=X1
# finalresult[[9]]=X2
# finalresult[[10]]=X3
# finalresult[[11]]=t1
# finalresult[[12]]=t2
# finalresult[[13]]=t3
X1=Addvec1(X1)##adding const for w0
X2=Addvec1(X2)
X3=Addvec1(X3)
if( FALSE==(setequal ( unique(t1) ,c(1,-1) ) ) ){
stop("the category indicator must only takes 1 or -1!")
}
#initialize the parameters to be computed
if(is.null(w0)){
w0<-rep(0,dim+1)
if(length(w0)!=ncol(X1)){print("dimension error!")}
}else{
w0=w0
}
setwd(twd)
lbd1<-NULL
#cv or not cv
if(crv==FALSE){
lbd1=lambda1
lbd2=lambda2
alpha=alpha
svmlightC=1
svmlinUl2=1
svmlinWl2=1
svmlinUDA=1
svmlinWDA=1
}else{
if(is.na(lambda1seq)){
lambda1seq=getlambda1seq(w0=w0,X1=X1,X2=X2,t1=t1,lambda2=1,sparsete=sparsete,
lambda1.min.ratio=lambda1.min.ratio,nlambda1=nlambda1,te=te)
}
in1<-sample(1:n1)
in2<-sample(1:n2)
l1<-list(0,0,0,0,0)
l2<-list(0,0,0,0,0)
for(i in 1:5){
l1[[i]]<-in1[floor((i-1)*n1/5+1):floor(i*n1/5)]
l2[[i]]<-in2[floor((i-1)*n2/5+1):floor(i*n2/5)]
}
#writing the txtfiles in preparation for training in cross validation on svms
jobnamecv = paste(jobname,"cv",1:5,sep="")#not paste0 here! note for parameter collapse
for(i in 1:5){
writetxt2(jobname=jobnamecv[i],X1=X1[-l1[[i]],],X2=X2[-l2[[i]],],X3=X1[l1[[i]],],t1=t1[-l1[[i]]],t3=t1[l1[[i]]],
svm_light_dir,svmlin_dir)
}
Minl1<-matrix(nrow=nlambda2,ncol=3) #to store each mincv error for lambda2(col1),col2=best lambda1 for the lambda2 and col3=best al
Svmlinl2mat<-matrix(nrow=nlambda2,ncol=length(lambda1seq))
SvmlinDAmat<-matrix(nrow=nlambda2,ncol=length(lambda1seq))
Svmlightvec<-rep(NA,nlambda2)# svmlight with c in svmlight equal to our lambda2
for(k in 1:nlambda2 ){
#######svmlight cv start
Svmval=rep(0,3)###store 3 svmtest error in one cross validation
for(i in 1:5){
Svmval=Svmval+svmtest(jobname=jobnamecv[i],X1=X1[-l1[[i]],],X2=X2[-l2[[i]],],X3=X1[l1[[i]],],t1=t1[-l1[[i]]],t3=t1[l1[[i]]],writeout=FALSE,
svmlight=svmlight,svm_light_dir=svm_light_dir,svmlin=FALSE,
svmlin.mode=svmlin.mode,svmlin_dir=svmlin_dir,
twd=twd,svmlightC=lambda2tr(k),svmlinWv=rep(NA,2),svmlinUv=rep(NA,2) )
}
Svmlightvec[k]=Svmval[1]/5 #k for hyperparameter lambda2 j for lambda1
#######svmlight cv end
#Fseq<-matrix(ncol=6,nrow=length(lambda1seq))
Fseq<-matrix(ncol=11,nrow=length(lambda1seq)) #store crossvalidation error for each lambda1 and al in 0:10, with lambda2 fixed
for(j in 1:length(lambda1seq)){
######svmlin cv
Svmval=rep(0,3)###store 3 svmtest error in one cross validation
for(i in 1:5){
Svmval=Svmval+svmtest(jobname=jobnamecv[i],X1=X1[-l1[[i]],],X2=X2[-l2[[i]],],X3=X1[l1[[i]],],t1=t1[-l1[[i]]],t3=t1[l1[[i]]],writeout=FALSE,
svmlight=FALSE,svm_light_dir=svm_light_dir,svmlin=svmlin,
svmlin.mode=svmlin.mode,svmlin_dir=svmlin_dir,
twd=twd,svmlightC=NULL,svmlinWv=rep(lambda1seq[j],2),svmlinUv=rep(lambda2tr(k),2) )
}
Svmlinl2mat[k,j]=Svmval[2]/5 #k for hyperparameter lambda2 j for lambda1
SvmlinDAmat[k,j]=Svmval[3]/5
######svmlin cv
for(al in 0:10){
Fval<-0
for(i in 1:5){
#resu<-PALM(w0,X1[-l1[[i]],],X2[-l2[[i]],],t1[-l1[[i]]],lambda1=lambda1seq[j],lambda2=0.25*k,alpha=al*0.1,te=te)
resu<-PALM(w0,X1[-l1[[i]],],X2[-l2[[i]],],t1[-l1[[i]]],lambda1=lambda1seq[j],lambda2=lambda2tr(k),alpha=al*0.1,te=te,palmquiet=palmquiet)
if(type.measure=="c"){
Fval=Fval+MissErr(GetPredict(resu,X1[l1[[i]],]),t1[l1[[i]]])
}else{
tempProb=F(resu,X1[l1[[i]],],NA,t1[l1[[i]]],lambda1=0,lambda2=0,alpha=1)
Fval=Fval+tempProb
}
}#for i
#print((al+1+(j-1)*6 + length(lambda1seq)*6*(k) )/(6*length(lambda1seq)*(nlambda2+1)))
print((al+1+(j-1)*11 + length(lambda1seq)*11*(k) )/(11*length(lambda1seq)*(nlambda2+1)))
Fseq[j,al+1]<-Fval/5#! al+1 not al !
}#for al
}#for j
that=which(Fseq==min(Fseq),arr.ind=TRUE) #Fseq is :Fseq[i,j] cv error when lbd1=lambda1seq[i] and alpha =al[j]
Minl1[k,]=c(min(Fseq),lambda1seq[ that[1,1] ],that[1,2])
#the cv result for fixed lbd2 as k,col2 is the corresponding lamdba1,col3 is the corresponding al
}# for k
lbd2=lambda2tr( which(Minl1[,1]==min(Minl1[,1]))[1] )
lbd1=Minl1[which(Minl1[,1]==min(Minl1[,1]))[1],2]
alpha=Minl1[which(Minl1[,1]==min(Minl1[,1]))[1],3]*0.1-0.1#col3 in Minl1 is only the index so subtracted by 0.1
lightloc=which( Svmlightvec==min(Svmlightvec) )[1]
svmlightC=lambda2tr(lightloc)######svmlight hyperparameter
linl2loc=which( Svmlinl2mat==min(Svmlinl2mat) ,arr.ind=TRUE )
linDAloc=which( SvmlinDAmat==min(SvmlinDAmat) ,arr.ind=TRUE )
svmlinUl2=lambda2tr(linl2loc[1,1])######svmlin hyperparameter
svmlinWl2=lambda1seq[linl2loc[1,2]]
svmlinUDA=lambda2tr(linDAloc[1,1])
svmlinWDA=lambda1seq[linDAloc[1,2]]
print("the lbd1,lbd2 and alpha by cv is")
print(c(lbd1,lbd2,alpha))
#print(Minl1)#########
#print(Fseq)#########
}
#eelr predict
resu<-PALM(w0,X1,X2,t1,lambda1=lbd1,lambda2=lbd2,alpha=alpha,te=0.1*te,palmquiet=FALSE)
tmyu<-GetPredict(resu,X3)
myeu<-MissErr(tmyu,t3)
#glmnet predict supervised
# s1=proc.time()
cvmin=matrix(ncol=2,nrow=11)
if(crv==TRUE){
foldid=rep(1,n1)
for(i in 2:5){
foldid[l1[[i]]]=i
}
}else{
foldid=c( sample(1:5,size=5,replace=FALSE),sample(1:5,size=n1-5,replace=TRUE))
}
if(type.measure=="c"){
typec="class"
}else{
typec="deviance"
}
for(al in 0:10){
cvgela=cv.glmnet(x=X1[,-1],y=as.factor(t1),family="binomial",
type.measure=typec,foldid=foldid,alpha=al*0.1)
cvmin[al+1,]=c( min(cvgela$cvm), cvgela$lambda.min)
}
that=which(cvmin[,1]==min(cvmin[,1]))[1]
lcvfit=cv.glmnet(x=X1[,-1],y=as.factor(t1),family="binomial",
type.measure=typec,foldid=foldid,alpha=(that-1)*0.1)
tmyl=as.numeric(predict(lcvfit,newx=X3[,-1],s="lambda.min",type="class"))
myel<-MissErr(as.vector(tmyl),t3)
#svms predict
svm_inductive<-NA
svm_transductive<-NA
svm_inductive=svmtest(jobname=jobname,X1=X1,X2=X2,X3=X3,t1=t1,t3=t3,writeout=writeout,svmlight=svmlight,svm_light_dir=svm_light_dir,svmlin=svmlin,
svmlin.mode=svmlin.mode,svmlin_dir=svmlin_dir,
twd=twd,svmlightC=svmlightC,
svmlinWv=c(svmlinWl2,svmlinWDA),svmlinUv=c(svmlinUl2,svmlinUDA),cpsvmlin=TRUE)
##to store coefficients of the methods
finalresult[[2]]=resu
finalresult[[3]]=as.vector(coef(lcvfit,s="lambda.min"))
system(paste0("perl ",svm_light_dir,"svm2weight.pl ",svm_light_dir,jobname,"model.txt > ",svm_light_dir,jobname,"svmlightw8s.out"))
finalresult[[4]]=read.table(paste0(svm_light_dir,jobname,"svmlightw8s.out"))
finalresult[[5]]= read.table(paste0(svmlin_dir,jobname,"training_examples_ms.weights"))
#in the final inductive svmtest()the cpsvmlin has been set to be TRUE
finalresult[[6]]= read.table(paste0(svmlin_dir,jobname,"training_examples_da.weights"))
#handling transductive learning
if(transductive==TRUE){
#transductive version of our methods
tmyu_tran<-GetPredict(resu,X2)
myeu_tran<-MissErr(tmyu_tran,t2)
#transductive version of svm
svm_transductive=svmtran(jobname=jobname,t2,svmlight,svm_light_dir,svmlin,
svmlin.mode,svmlin_dir,
twd)
# transductive prediction of glmnet
tmyl_tran=as.numeric(predict(lcvfit,newx=X2[,-1],s="lambda.min",type="class"))
myel_tran<-MissErr(as.vector(tmyl_tran),t2)
}
if(transductive==FALSE){
finalresult[[1]]=c(myeu,myel,svm_inductive)
}else{
finalresult[[1]]=c(myeu,myeu_tran,myel,myel_tran,svm_inductive,svm_transductive)
}
return (finalresult)
}
#'runjob
#'
#'this funciont conducts experiment and saves the result in current working directory with name paste0(
#'"error",jobname,".RData"), the result is a list named error1 containing element as each
#'round of experiment with different seed numbers.It is assumed that svm_light and svmlin
#'directories are in the parent directory of the current working directory
#'@param startseed the seed for the first experiment, by default startseed=endseed and only
#'one round of experiment is conducted
#'@param endseed the seed for the last experiment
#'@param jobname the name in the middle of the file name to save the result. default:1
#'@param l_size the size of labeled dataset in simulation mode. default:20
#'@param dim the dimension of the (labeled) dataset in simulation mode. default:300
#'@param testsize the size of the test dataset in simulation mode.default:1200
#'@param sa the parameter for the distance between two gaussian means in simulation mode. default:0.6
#'@param psparse dimensions of effective features in simulation mode
#'@param testbratio proportion of class 1 in the whole unlabeled data set
#'@param u_size size of unlabeled dataset in simulation mode
#'@param rho parameter for covariance matrix, see the paper
#'@param import 0 simulation mode;1 use real data with "dataset" must be assigned
#'@param dataset if "import" parameter is 1,dataset must take 1,2 or 3 indicating the
#'"leukemia","colon",or "prostate" microarray data set.The data has been preprocessed.
#' @keywords run jobs of simulation experiment or on microarray dataset
#' @export
#' @examples
runjob<-function(startseed=100,
endseed=100,
jobname=1,
l_size=20,
dim=300,
testsize=1200,
sa=0.6,
psparse=20,
testbratio=0.5,
u_size=100,
rho=0.5,
import=0,
dataset = 1){
error1=list()
for(s in startseed:endseed){
cerr=testsepa(jobname = jobname, import=import,dim=dim,n1=l_size,n2=u_size,n3=testsize,Total=440,lambda1=2,lambda2=1.2,alpha=0.8,standardize=TRUE,
seed=s,te=10^(-3),
sa=l_sa,psparse=l_psparse ,
crv=TRUE,type.measure="c",lambda1.min.ratio=.0001,nlambda1=9,
sparsete=.02,
writeout=TRUE,svmlight=TRUE,svm_light_dir="../svm_light/",svmlin=TRUE,svmlin_dir="../svmlin-v1.0/",svmlin.mode=c("DA","l2hatloss"),
nlambda2=9,palmquiet=TRUE,,rho=rho,transductive=TRUE,testbratio=testbratio,dataset=dataset)
error1[[length(error1) + 1]] = cerr
}
save(error1,file=paste0("error",jobname,"i.RData"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.