For our benchmark example, we have two groups which are drawn from a bivariate normal distribution where the mean of one group is fixed and the group means shift to provide different amounts of overlap of the two groups. This example is meant to illustrate that each MKL implentation selects the correct kernel. For instance, as the hyperparameter gets smaller (using kernlab's parameterization) the resulting decision boundary is almost linear. On the other hand, as the hyperparameter gets larger then decision boundary becomes more jagged and circular. Further details of this are highlighted "Multiple-kernel learning for genomic data mining and prediction" on bioRvix doi: https://doi.org/10.1101/415950.

Loading data

library(RMKL)
library(caret)
data(benchmark.data)
# The data sets are organized in a a list. Each entry of the list is a 100x3 matrix with each row consisting of a x- and y- coordinate, and a group label (-1,1).
#Below is a summary of the mean of each group for each mean structure.
lapply(1:length(benchmark.data), function(a) aggregate(x = benchmark.data[[a]][,1:2], by=list(benchmark.data[[a]][,3]), mean))

Using RMKL with benchmark data

 data.mkl=benchmark.data[[4]]
 kernels=rep('radial',2)
 sigma=c(2,1/20)
 train.samples=sample(1:nrow(data.mkl),floor(0.7*nrow(data.mkl)),replace=FALSE)
 degree=sapply(1:length(kernels), function(a) ifelse(kernels[a]=='p',2,0))
 #kernels.gen splts the data into a training and test set, and generates the desired kernel matrices.
 #Here we generate two gaussisan kernel matrices with sigma hyperparameter 2 and 0.05
 K=kernels.gen(data=data.mkl[,1:2],train.samples=train.samples,kernels=kernels,sigma=sigma,degree=degree,scale=rep(0,length(kernels)))
 C=0.05 #Cost parameter for DALMKL
 K.train=K$K.train
 K.test=K$K.test

  # parameters set up
  cri_outer = 0.01 # criterion for outer cycle, 0.01 is default by author
  cri_inner = cri_outer/10000  #criterion for inner cycle, this ratio is default by author
  calpha = 10 ### Lagrangian duality constraint parameter, must be positive, 10 is default by author
  max_iter_outer = 500 # maximum number of iterations in outer cycle
  max_iter_inner = 500 # maximum number of iterations in inner cycle
  ytr=data.mkl[train.samples,3]
  k.train=simplify2array(K.train) #Converts list of kernel matrices in to an array with is appropriate for C++ code
  k.test=simplify2array(K.test)

  #Implement DALMKL with the hinge loss function
  spicy_svmb1n=SpicyMKL(k.train, ytr, 'hinge',C, cri_outer, cri_inner, max_iter_outer, max_iter_inner, calpha)
  spicysvmb1n_results=predict_Spicy(spicy_svmb1n$alpha,spicy_svmb1n$b, k = k.test)
  cm.DALMKL.svm=confusionMatrix(factor(sign(spicysvmb1n_results),levels=c(-1,1)), factor(data.mkl[-train.samples,3],levels=c(-1,1)))
  cm.DALMKL.svm

  #Implement DALMKL with a logistic loss function
  spicy_logib1n=SpicyMKL(k.train, ytr,'logistic' ,C, cri_outer, cri_inner, max_iter_outer, max_iter_inner, calpha)
  spicylogib1n_results=predict_Spicy(spicy_logib1n$alpha,spicy_logib1n$b, k = k.test)
  cm.DALMKL.logi=confusionMatrix(factor(sign(spicylogib1n_results),levels=c(-1,1)), factor(data.mkl[-train.samples,3],levels=c(-1,1)))
  cm.DALMKL.logi

  #Convert C parameter from DALMKL implenetation to SimpleMKL and SEMKL implementation to make the four implementations comparible.
  C_SEMKL=C.convert(K.train,spicy_logib1n,C)

  #Implement SimpleMKL
  SimpleMKL.model=SimpleMKL.classification(k=K.train,data.mkl[train.samples,3], penalty=C_SEMKL)
  cm.SimpleMKL=confusionMatrix(factor(prediction.Classification(SimpleMKL.model,ktest=K.test,data.mkl[train.samples,3])$predict,       levels=c(-1,1)),factor(data.mkl[-train.samples,3],levels=c(-1,1)))
  cm.SimpleMKL

  #Implement SEMKL
  SEMKL.model=SEMKL.classification(k=K.train,data.mkl[train.samples,3], penalty=C_SEMKL)
  cm.SEMKL=confusionMatrix(factor(prediction.Classification(SEMKL.model,ktest=K.test,data.mkl[train.samples,3])$predict,
  levels=c(-1,1)),factor(data.mkl[-train.samples,3],levels=c(-1,1)))
  cm.SEMKL

  #Selecting a plot in the middle to show the benefit of MKL over SVM
plot(benchmark.data[[4]][,-3],col=benchmark.data[[4]][,3]+3,main='Benchmark Data',pch=19,xlab='X1', ylab='X2')

#Using the radial kernel with both hyperparameter individually and then in a combined analysis
C=100
K=kernels.gen(data=benchmark.data[[4]][,1:2],train.samples=train.samples,kernels=kernels,
              sigma=sigma,degree=degree,scale=rep(0,length(kernels)))
K.train=K$K.train
K.test=K$K.test
#MKL with only one candidate kernel is equivalent to SVM
#SVM with radial hyperparameter 2
rbf2=SEMKL.classification(k = list(K.train[[1]]),outcome = benchmark.data[[4]][train.samples,3],penalty = C)

#SVM with radial hyperparameter 1/20
rbf.05=SEMKL.classification(k=list(K.train[[2]]),outcome = benchmark.data[[4]][train.samples,3],penalty = C)
domain=seq(1,8,0.1)
grid=cbind(c(replicate(length(domain), domain)),c(t(replicate(length(domain), domain))))
predict.data=rbind(benchmark.data[[4]][train.samples,1:2],grid)
kernels.predict=kernels.gen(data=predict.data,train.samples=1:length(train.samples),kernels=kernels,
            sigma=sigma,degree=degree,scale=rep(0,length(kernels)))

predict2=prediction.Classification(rbf2, ktest = list(kernels.predict$K.test[[1]]),
                          train.outcome = benchmark.data[[4]][train.samples,3])

predict.05=prediction.Classification(rbf.05, ktest = list(kernels.predict$K.test[[2]]),
                                   train.outcome = benchmark.data[[4]][train.samples,3])

#Contour plot of the predicted values using the model where a single kernel was used
filled.contour(domain,domain, matrix(predict2$predict,length(domain),length(domain)),
               col = colorRampPalette(c('indianred1','lightskyblue'))(2),
               main='Classication Rule Hyperparameter=2', 
               plot.axes={points(benchmark.data[[4]][,-3],col=benchmark.data[[4]][,3]+3,pch=18,cex=1.5)})


filled.contour(domain,domain, matrix(predict.05$predict,length(domain),length(domain)),
               col = colorRampPalette(c('indianred1','lightskyblue'))(2),
               main='Classication Rule Hyperparameter=0.05',
               plot.axes={points(benchmark.data[[4]][,-3],col=benchmark.data[[4]][,3]+3,pch=18,cex=1.5)})
###################################################################################################
#Use the optimal model with the combination of kernels

predict.combined=prediction.Classification(SEMKL.model, ktest = kernels.predict$K.test,
                                   train.outcome = benchmark.data[[4]][train.samples,3])
filled.contour(domain,domain, matrix(predict.combined$predict,length(domain),length(domain)),
               col = colorRampPalette(c('indianred1','lightskyblue'))(2),
               main='Classication Rule MKL', 
               plot.axes={points(benchmark.data[[4]][,-3],col=benchmark.data[[4]][,3]+3,pch=18,cex=1.5)})

Realizations that fall in the light blue region will be classified as 1, while the points that fall in the light red region will be classified as -1. The points are the original observations. Notice that the two groups do overlap, and that a radial kernel with a large hyperparameter is able to classify in areas with overlap, while a radial kernel with a small hyperparameter can not. The kernel wieghts for this example are 0.9997 for a radial kernel 2 as a hyperparameter, and 0.0002 for radial kernel with 1/20 as a hyper parameter.

TCGA small example

These date are described in man/tcga.small.Rd, these data are log2(miRNA expression value+1).

rm(list=ls())
library(RMKL)
library(kernlab)
library(caret)

data(tcga.small)

normalized=apply(tcga.small,2,function(a) a/sqrt(sum(a^2)))


kernels=c('linear', rep('radial',8))
sigma=c(0,10^(-5:2))
training.samples=sample(1:nrow(normalized), 200, replace=FALSE)
K=kernels.gen(data=normalized[,-ncol(normalized)], sigma=sigma, degree=0, scale=0,kernels=kernels,
              train.samples=training.samples)


K.train=K$K.train
K.test=K$K.test
K.train.dal=simplify2array(K.train)
K.test.dal=simplify2array(K.test)


outcome=tcga.small[,ncol(tcga.small)]
y.train=outcome[training.samples]
cri_out = .01
cri_in = .000001
maxiter_out = maxiter_in = 500
C = 0.5*10^c(-2:0)
calpha = 10

mod.hinge=lapply(C, function(a){
mod.hinge = SpicyMKL(K.train.dal, y.train,'hinge' , a , cri_out, cri_in, 
                     maxiter_out, maxiter_in, calpha)
prediction.hinge = predict_Spicy(mod.hinge$alpha,mod.hinge$b, K.test.dal)
cm=confusionMatrix(factor(sign(prediction.hinge),levels=c(-1,1)),
                   factor(outcome[-training.samples],levels=c(-1,1)))
return(list(model=mod.hinge,cm=cm))
})


mod.logistic=lapply(C, function(a){
  mod.logistic = SpicyMKL(K.train.dal, y.train,'logistic' , a , cri_out, cri_in, 
                       maxiter_out, maxiter_in, calpha)
  prediction.logistic = predict_Spicy(mod.logistic$alpha,mod.logistic$b, K.test.dal)
  cm=confusionMatrix(factor(sign(prediction.logistic),levels=c(-1,1)),
                     factor(outcome[-training.samples],levels=c(-1,1)))
  return(list(model=mod.logistic,cm=cm))
})

C.SEMKL.logisic=sapply(1:length(mod.logistic), function(b) C.convert(K.train, mod.logistic[[b]]$model,C[b]))

C.SEMKL.hinge=sapply(1:length(mod.hinge), function(b) C.convert(K.train, mod.hinge[[b]]$model,C[b]))

SimpleMKL.results=lapply(C.SEMKL.hinge, function(b){
  SimpleMKL=SimpleMKL.classification(k=K.train, outcome=as.numeric(as.character(outcome[training.samples])), penalty=b, tol = 10^(-4), max.iters = 1000)
  cm.SimpleMKL=confusionMatrix(factor(prediction.Classification(SimpleMKL,ktest=K.test,as.numeric(as.character(outcome[training.samples])))$predict,levels=c(-1,1)),
                               factor(outcome[-training.samples]))
  return(list(cm=cm.SimpleMKL,model=SimpleMKL))})


SEMKL.results=lapply(C.SEMKL.hinge, function(b){
  SEMKL=SEMKL.classification(k=K.train, outcome=as.numeric(as.character(outcome[training.samples])), penalty=b, tol = 10^(-4), max.iters = 1000)
  cm.SEMKL=confusionMatrix(factor(prediction.Classification(SEMKL,ktest=K.test,as.numeric(as.character(outcome[training.samples])))$predict,levels=c(-1,1)),
                               factor(outcome[-training.samples]))
  return(list(cm=cm.SEMKL,model=SEMKL))})


cwilso6/RMKL documentation built on May 18, 2021, 9:58 a.m.