inst/doc/vignette.R

## ----style, echo = FALSE, results = 'asis'------------------------------------
BiocStyle::markdown()

## ----setup, include = FALSE---------------------------------------------------

library(knitr)
# global options
knitr::opts_chunk$set(dpi = 100, echo=TRUE, warning=FALSE, message=FALSE, eval = TRUE,
                      fig.show=TRUE, fig.width= 7,fig.height= 6,fig.align='center', out.width = '80%'#, 
                      #fig.path= 'Figures/'
                      )

# knitr::opts_chunk$set(
#   collapse = TRUE,
#   comment = "#>"
# )

## ----overview, echo=FALSE, message=FALSE--------------------------------------
library(mixOmics)
coul <- color.mixo(1:3)

plot(0, type="n", xlim=c(0,100), ylim=c(0,100), axes=FALSE,
     xlab="",ylab="", main="mixOmics overview")
box()

# PCA
rect(xleft = 20, ybottom = 75, xright = 40, ytop = 95, col=coul[1])
text(5, 85, "PCA")

# PLS-DA
rect(xleft = 20, ybottom = 50, xright = 40, ytop = 70, col=coul[1])
rect(xleft = 43, ybottom = 50, xright = 45, ytop = 70, col=coul[2])
text(5, 60, "PLS-DA")

# PLS
rect(xleft = 20, ybottom = 25, xright = 40, ytop = 45, col=coul[1])
rect(xleft = 43, ybottom = 25, xright = 60, ytop = 45, col=coul[1])
text(5, 35, "PLS")

# DIABLO
rect(xleft = 20, ybottom = 0, xright = 40, ytop = 20, col=coul[1])
rect(xleft = 43, ybottom = 0, xright = 60, ytop = 20, col=coul[1])
points(x=61, y=10, pch=16, col=coul[3], cex=0.5)
points(x=62.5, y=10, pch=16, col=coul[3], cex=0.5)
points(x=64, y=10, pch=16, col=coul[3], cex=0.5)
rect(xleft = 65, ybottom = 0, xright = 80, ytop = 20, col=coul[1])
rect(xleft = 85, ybottom = 0, xright = 88, ytop = 20, col=coul[2])
text(5, 10, "DIABLO")

# legend
rect(xleft = 75, ybottom = 95, xright = 77, ytop = 97, col=coul[1])
text(90, 96, "Quantitative", cex=0.75)
rect(xleft = 75, ybottom = 90, xright = 77, ytop = 92, col=coul[2])
text(90, 91, "Qualitative", cex=0.75)

## ----methods, echo=FALSE, fig.cap="List of methods in mixOmics, sparse indicates methods that perform variable selection", out.width='100%', fig.align='center'----
knitr::include_graphics("XtraFigs/Methods.png")

## ----cheatsheet, echo=FALSE, fig.cap="Main functions and parameters of each method", out.width= '100%', fig.align='center'----
knitr::include_graphics("XtraFigs/cheatsheet.png")

## ---- eval = FALSE------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#   BiocManager::install("mixOmics")

## ---- eval = FALSE------------------------------------------------------------
#  install.packages("devtools")
#  # then load
#  library(devtools)
#  install_github("mixOmicsTeam/mixOmics")

## ---- message=FALSE-----------------------------------------------------------
library(mixOmics)

## ---- eval = FALSE------------------------------------------------------------
#  # from csv file
#  data <- read.csv("your_data.csv", row.names = 1, header = TRUE)
#  
#  # from txt file
#  data <- read.table("your_data.txt", header = TRUE)
#  
#  # then in the argument in the mixOmics functions, just fill with:
#  # X = data

## -----------------------------------------------------------------------------
data(nutrimouse)
X <- nutrimouse$gene

## -----------------------------------------------------------------------------
MyResult.pca <- pca(X)  # 1 Run the method
plotIndiv(MyResult.pca) # 2 Plot the samples
plotVar(MyResult.pca)   # 3 Plot the variables

## -----------------------------------------------------------------------------
MyResult.spca <- spca(X, keepX=c(5,5)) # 1 Run the method
plotIndiv(MyResult.spca)               # 2 Plot the samples
plotVar(MyResult.spca)                 # 3 Plot the variables

## ----overview-PCA, echo=FALSE, message=FALSE----------------------------------
library(mixOmics)
coul <- color.mixo(1:3)

plot(0, type="n", xlim=c(0,100), ylim=c(83,97), axes=FALSE,
     xlab="",ylab="", main="PCA overview")
box()

# PCA
rect(xleft = 20, ybottom = 85, xright = 60, ytop = 95, col=coul[1])
text(5, 90, "PCA")

# legend
rect(xleft = 85, ybottom = 90, xright = 87, ytop = 92, col=coul[1])
text(90, 94, "Quantitative", cex=0.75)
#rect(xleft = 75, ybottom = 90, xright = 77, ytop = 92, col=coul[2])
#text(90, 91, "Qualitative", cex=0.75)


## ----echo=TRUE, message=FALSE-------------------------------------------------
library(mixOmics)
data(liver.toxicity)
X <- liver.toxicity$gene

## ---- fig.show = 'hide'-------------------------------------------------------
MyResult.pca <- pca(X)     # 1 Run the method
plotIndiv(MyResult.pca)    # 2 Plot the samples
plotVar(MyResult.pca)      # 3 Plot the variables

## ----eval=TRUE, fig.show=FALSE------------------------------------------------
plotIndiv(MyResult.pca, group = liver.toxicity$treatment$Dose.Group, 
          legend = TRUE)

## -----------------------------------------------------------------------------
plotIndiv(MyResult.pca, ind.names = FALSE,
          group = liver.toxicity$treatment$Dose.Group,
          pch = as.factor(liver.toxicity$treatment$Time.Group),
          legend = TRUE, title = 'Liver toxicity: genes, PCA comp 1 - 2',
          legend.title = 'Dose', legend.title.pch = 'Exposure')

## ----eval=TRUE, fig.show=TRUE-------------------------------------------------
MyResult.pca2 <- pca(X, ncomp = 3)
plotIndiv(MyResult.pca2, comp = c(1,3), legend = TRUE,
          group = liver.toxicity$treatment$Time.Group,
          title = 'Multidrug transporter, PCA comp 1 - 3')

## -----------------------------------------------------------------------------
plot(MyResult.pca2)
MyResult.pca2

## ----eval=TRUE, fig.show = 'hide'---------------------------------------------
# a minimal example
plotLoadings(MyResult.pca)

## ----eval=TRUE, fig.show = 'hide'---------------------------------------------
# a customized example to only show the top 100 genes 
# and their gene name
plotLoadings(MyResult.pca, ndisplay = 100, 
             name.var = liver.toxicity$gene.ID[, "geneBank"],
             size.name = rel(0.3))

## ----eval= FALSE, fig.show = 'hide'-------------------------------------------
#  plotIndiv(MyResult.pca2,
#            group = liver.toxicity$treatment$Dose.Group, style="3d",
#            legend = TRUE, title = 'Liver toxicity: genes, PCA comp 1 - 2 - 3')

## -----------------------------------------------------------------------------
MyResult.spca <- spca(X, ncomp = 3, keepX = c(15,10,5))                 # 1 Run the method
plotIndiv(MyResult.spca, group = liver.toxicity$treatment$Dose.Group,   # 2 Plot the samples
          pch = as.factor(liver.toxicity$treatment$Time.Group),
          legend = TRUE, title = 'Liver toxicity: genes, sPCA comp 1 - 2',
          legend.title = 'Dose', legend.title.pch = 'Exposure')
plotVar(MyResult.spca, cex = 1)                                        # 3 Plot the variables
# cex is used to reduce the size of the labels on the plot

## -----------------------------------------------------------------------------
selectVar(MyResult.spca, comp = 1)$value

## -----------------------------------------------------------------------------
plotLoadings(MyResult.spca)

## ---- echo=TRUE,results='hide', fig.show=FALSE--------------------------------
selectVar(MyResult.spca, comp=2)$value
plotLoadings(MyResult.spca, comp = 2)

## ----eval=TRUE, echo=FALSE, results='hide'------------------------------------
tune.pca(X)

## ----overview-PLSDA, echo=FALSE, message=FALSE--------------------------------
library(mixOmics)
coul <- color.mixo(1:3)

plot(0, type="n", xlim=c(0,100), ylim=c(83,97), axes=FALSE,
     xlab="",ylab="", main="PLSDA overview")
box()

# PLS-DA
rect(xleft = 20, ybottom = 85, xright = 60, ytop = 95, col=coul[1])
rect(xleft = 63, ybottom = 85, xright = 65, ytop = 95, col=coul[2])
text(5, 90, "PLS-DA")


# legend
rect(xleft = 85, ybottom = 92, xright = 87, ytop = 94, col=coul[1])
text(90, 95, "Quantitative", cex=0.75)
rect(xleft = 85, ybottom = 87, xright = 87, ytop = 89, col=coul[2])
text(90, 90, "Qualitative", cex=0.75)


## ----echo=TRUE, message=FALSE-------------------------------------------------
library(mixOmics)
data(srbct)
X <- srbct$gene
Y <- srbct$class 
summary(Y)
dim(X); length(Y)

## ---- echo=TRUE,results='hide', fig.show = 'hide'-----------------------------
MyResult.splsda <- splsda(X, Y, keepX = c(50,50)) # 1 Run the method
plotIndiv(MyResult.splsda)                          # 2 Plot the samples
plotVar(MyResult.splsda)                            # 3 Plot the variables
selectVar(MyResult.splsda, comp=1)$name             # Selected variables on component 1

## ----fig.show='hide'----------------------------------------------------------
MyResult.plsda <- plsda(X,Y) # 1 Run the method
plotIndiv(MyResult.plsda)    # 2 Plot the samples
plotVar(MyResult.plsda)      # 3 Plot the variables

## -----------------------------------------------------------------------------
plotIndiv(MyResult.splsda, ind.names = FALSE, legend=TRUE,
          ellipse = TRUE, star = TRUE, title = 'sPLS-DA on SRBCT',
          X.label = 'PLS-DA 1', Y.label = 'PLS-DA 2')

## -----------------------------------------------------------------------------
plotVar(MyResult.splsda, var.names=FALSE)

## -----------------------------------------------------------------------------
plotVar(MyResult.plsda, cutoff=0.7)

## -----------------------------------------------------------------------------
background <- background.predict(MyResult.splsda, comp.predicted=2,
                                dist = "max.dist") 
plotIndiv(MyResult.splsda, comp = 1:2, group = srbct$class,
          ind.names = FALSE, title = "Maximum distance",
          legend = TRUE,  background = background)

## ---- echo=TRUE,results='hide', fig.keep='all'--------------------------------
auc.plsda <- auroc(MyResult.splsda)

## -----------------------------------------------------------------------------
MyResult.splsda2 <- splsda(X,Y, ncomp=3, keepX=c(15,10,5))

## ----eval=TRUE----------------------------------------------------------------
selectVar(MyResult.splsda2, comp=1)$value

## -----------------------------------------------------------------------------
plotLoadings(MyResult.splsda2, contrib = 'max', method = 'mean')

## ----eval=FALSE, fig.show='hide'----------------------------------------------
#  plotIndiv(MyResult.splsda2, style="3d")

## ----eval= TRUE---------------------------------------------------------------
MyResult.plsda2 <- plsda(X,Y, ncomp=10)
set.seed(30) # for reproducibility in this vignette, otherwise increase nrepeat
MyPerf.plsda <- perf(MyResult.plsda2, validation = "Mfold", folds = 3, 
                  progressBar = FALSE, nrepeat = 10) # we suggest nrepeat = 50

# type attributes(MyPerf.plsda) to see the different outputs
# slight bug in our output function currently see the quick fix below
#plot(MyPerf.plsda, col = color.mixo(5:7), sd = TRUE, legend.position = "horizontal")

# quick fix
matplot(MyPerf.plsda$error.rate$BER, type = 'l', lty = 1, 
        col = color.mixo(1:3), 
        main = 'Balanced Error rate')
legend('topright', 
       c('max.dist', 'centroids.dist', 'mahalanobis.dist'), 
       lty = 1,
       col = color.mixo(5:7))

## -----------------------------------------------------------------------------
MyPerf.plsda

## ----eval=TRUE----------------------------------------------------------------
list.keepX <- c(5:10,  seq(20, 100, 10))
list.keepX # to output the grid of values tested
set.seed(30) # for reproducbility in this vignette, otherwise increase nrepeat
tune.splsda.srbct <- tune.splsda(X, Y, ncomp = 3, # we suggest to push ncomp a bit more, e.g. 4
                                 validation = 'Mfold',
                                 folds = 3, dist = 'max.dist', progressBar = FALSE,
                                 measure = "BER", test.keepX = list.keepX,
                                 nrepeat = 10)   # we suggest nrepeat = 50

## -----------------------------------------------------------------------------
error <- tune.splsda.srbct$error.rate
ncomp <- tune.splsda.srbct$choice.ncomp$ncomp # optimal number of components based on t-tests on the error rate
ncomp
select.keepX <- tune.splsda.srbct$choice.keepX[1:ncomp]  # optimal number of variables to select
select.keepX
plot(tune.splsda.srbct, col = color.jet(ncomp))

## ---- fig.show = 'hide'-------------------------------------------------------
MyResult.splsda.final <- splsda(X, Y, ncomp = ncomp, keepX = select.keepX)
plotIndiv(MyResult.splsda.final, ind.names = FALSE, legend=TRUE,
          ellipse = TRUE, title="SPLS-DA, Final result")

## ----overview-PLS, echo=FALSE, message=FALSE----------------------------------
library(mixOmics)
coul <- color.mixo(1:3)

plot(0, type="n", xlim=c(0,100), ylim=c(83,97), axes=FALSE,
     xlab="",ylab="", main="PLS overview")
box()

# PLS
rect(xleft = 20, ybottom = 85, xright = 50, ytop = 95, col=coul[1])
rect(xleft = 52, ybottom = 85, xright = 73, ytop = 95, col=coul[1])
text(5, 90, "PLS")


# legend
rect(xleft = 85, ybottom = 92, xright = 87, ytop = 94, col=coul[1])
text(90, 95, "Quantitative", cex=0.75)


## ----echo=TRUE, message=FALSE-------------------------------------------------
library(mixOmics)
data(nutrimouse)
X <- nutrimouse$gene  
Y <- nutrimouse$lipid
dim(X); dim(Y)

## ----fig.show='hide'----------------------------------------------------------
MyResult.spls <- spls(X,Y, keepX = c(25, 25), keepY = c(5,5))  
plotIndiv(MyResult.spls)                                      
plotVar(MyResult.spls)                                        

## -----------------------------------------------------------------------------
plotIndiv(MyResult.spls, group = nutrimouse$genotype,
          rep.space = "XY-variate", legend = TRUE,
          legend.title = 'Genotype',
          ind.names = nutrimouse$diet,
          title = 'Nutrimouse: sPLS')

## -----------------------------------------------------------------------------
plotIndiv(MyResult.spls, group=nutrimouse$diet,
          pch = nutrimouse$genotype,
          rep.space = "XY-variate",  legend = TRUE,
          legend.title = 'Diet', legend.title.pch = 'Genotype',
          ind.names = FALSE, 
          title = 'Nutrimouse: sPLS')

## -----------------------------------------------------------------------------
plotVar(MyResult.spls, cex=c(3,2), legend = TRUE)
coordinates <- plotVar(MyResult.spls, plot = FALSE)

## ----eval= FALSE, fig.show = 'hide'-------------------------------------------
#  X11()
#  cim(MyResult.spls, comp = 1)
#  cim(MyResult.spls, comp = 1, save = 'jpeg', name.save = 'PLScim')

## ----eval=FALSE---------------------------------------------------------------
#  X11()
#  network(MyResult.spls, comp = 1)
#  network(MyResult.spls, comp = 1, cutoff = 0.6, save = 'jpeg', name.save = 'PLSnetwork')
#  # save as graph object for cytoscape
#  myNetwork <- network(MyResult.spls, comp = 1)$gR

## -----------------------------------------------------------------------------
plotArrow(MyResult.spls,group=nutrimouse$diet, legend = TRUE,
          X.label = 'PLS comp 1', Y.label = 'PLS comp 2', legend.title = 'Diet')

## -----------------------------------------------------------------------------
MySelectedVariables <- selectVar(MyResult.spls, comp = 1)
MySelectedVariables$X$name # Selected genes on component 1
MySelectedVariables$Y$name # Selected lipids on component 1

## -----------------------------------------------------------------------------
plotLoadings(MyResult.spls, comp = 1, size.name = rel(0.5))

## ----eval=TRUE----------------------------------------------------------------
MyResult.pls <- pls(X,Y, ncomp = 4)  
set.seed(30) # for reproducibility in this vignette, otherwise increase nrepeat
perf.pls <- perf(MyResult.pls, validation = "Mfold", folds = 5,
                  progressBar = FALSE, nrepeat = 10)
plot(perf.pls, criterion = 'Q2.total')

## ---- eval=TRUE, message='hide'-----------------------------------------------
list.keepX <- c(2:10, 15, 20)
# tuning based on correlations
set.seed(30) # for reproducibility in this vignette, otherwise increase nrepeat
tune.spls.cor <- tune.spls(X, Y, ncomp = 3,
                           test.keepX = list.keepX,
                           validation = "Mfold", folds = 5,
                           nrepeat = 10, progressBar = FALSE,
                           measure = 'cor')
plot(tune.spls.cor, measure = 'cor')

## -----------------------------------------------------------------------------
tune.spls.cor$choice.keepX

## -----------------------------------------------------------------------------
# tuning both X and Y
set.seed(30) # for reproducibility in this vignette, otherwise increase nrepeat
tune.spls.cor.XY <- tune.spls(X, Y, ncomp = 3,
                           test.keepX = c(8, 20, 50),
                           test.keepY = c(4, 8, 16),
                           validation = "Mfold", folds = 5,
                           nrepeat = 10, progressBar = FALSE,
                           measure = 'cor')
## visualise correlations
plot(tune.spls.cor.XY, measure = 'cor')
## visualise RSS
plot(tune.spls.cor.XY, measure = 'RSS')

## ----overview-DIABLO, echo=FALSE, message=FALSE-------------------------------
library(mixOmics)
coul <- color.mixo(1:3)

plot(0, type="n", xlim=c(0,100), ylim=c(83,97), axes=FALSE,
     xlab="",ylab="", main="DIABLO overview")
box()


rect(xleft = 12, ybottom = 85, xright = 30, ytop = 95, col=coul[1])
rect(xleft = 32, ybottom = 85, xright = 45, ytop = 95, col=coul[1])
points(x=47, y=90, pch=16, col='black', cex=0.5)
points(x=48.5, y=90, pch=16, col='black', cex=0.5)
points(x=50, y=90, pch=16, col='black', cex=0.5)
rect(xleft = 52, ybottom = 85, xright = 61, ytop = 95, col=coul[1])
rect(xleft = 63, ybottom = 85, xright = 65, ytop = 95, col=coul[2])
text(5, 90, "DIABLO")


# legend
rect(xleft = 85, ybottom = 92, xright = 87, ytop = 94, col=coul[1])
text(90, 95, "Quantitative", cex=0.75)
rect(xleft = 85, ybottom = 87, xright = 87, ytop = 89, col=coul[2])
text(90, 90, "Qualitative", cex=0.75)


## ----message=FALSE------------------------------------------------------------
library(mixOmics)
data(breast.TCGA)
# extract training data and name each data frame
X <- list(mRNA = breast.TCGA$data.train$mrna, 
          miRNA = breast.TCGA$data.train$mirna, 
          protein = breast.TCGA$data.train$protein)
Y <- breast.TCGA$data.train$subtype
summary(Y)

list.keepX <- list(mRNA = c(16, 17), miRNA = c(18,5), protein = c(5, 5))

## ---- fig.show = 'hide'-------------------------------------------------------
MyResult.diablo <- block.splsda(X, Y, keepX=list.keepX)
plotIndiv(MyResult.diablo)
plotVar(MyResult.diablo)

## ----eval= TRUE---------------------------------------------------------------
MyResult.diablo2 <- block.plsda(X, Y)

## -----------------------------------------------------------------------------
plotIndiv(MyResult.diablo, 
          ind.names = FALSE, 
          legend=TRUE, cex=c(1,2,3),
          title = 'BRCA with DIABLO')

## -----------------------------------------------------------------------------
plotVar(MyResult.diablo, var.names = c(FALSE, FALSE, TRUE),
        legend=TRUE, pch=c(16,16,1))

## -----------------------------------------------------------------------------
plotDiablo(MyResult.diablo, ncomp = 1)

## -----------------------------------------------------------------------------
circosPlot(MyResult.diablo, cutoff=0.7)

## ----eval=FALSE, fig.height=8, fig.width=8------------------------------------
#  # minimal example with margins improved:
#  # cimDiablo(MyResult.diablo, margin=c(8,20))
#  
#  # extended example:
#  cimDiablo(MyResult.diablo, color.blocks = c('darkorchid', 'brown1', 'lightgreen'), comp = 1, margin=c(8,20), legend.position = "right")

## -----------------------------------------------------------------------------
#plotLoadings(MyResult.diablo, contrib = "max")
plotLoadings(MyResult.diablo, comp = 2, contrib = "max")

## ---- eval = FALSE, fig.show = 'hide'-----------------------------------------
#  network(MyResult.diablo, blocks = c(1,2,3),
#          color.node = c('darkorchid', 'brown1', 'lightgreen'),
#          cutoff = 0.6, save = 'jpeg', name.save = 'DIABLOnetwork')

## ---- eval = TRUE-------------------------------------------------------------
set.seed(123) # for reproducibility in this vignette
MyPerf.diablo <- perf(MyResult.diablo, validation = 'Mfold', folds = 5, 
                   nrepeat = 10, 
                   dist = 'centroids.dist')

#MyPerf.diablo  # lists the different outputs

# Performance with Majority vote
#MyPerf.diablo$MajorityVote.error.rate

## ---- echo=TRUE,results='hide',fig.keep='all'---------------------------------
Myauc.diablo <- auroc(MyResult.diablo, roc.block = "miRNA", roc.comp = 2)

## -----------------------------------------------------------------------------
# prepare test set data: here one block (proteins) is missing
X.test <- list(mRNA = breast.TCGA$data.test$mrna, 
                      miRNA = breast.TCGA$data.test$mirna)

Mypredict.diablo <- predict(MyResult.diablo, newdata = X.test)
# the warning message will inform us that one block is missing
#Mypredict.diablo # list the different outputs

## -----------------------------------------------------------------------------
confusion.mat <- get.confusion_matrix(
                truth = breast.TCGA$data.test$subtype, 
                predicted = Mypredict.diablo$MajorityVote$centroids.dist[,2])
kable(confusion.mat)
get.BER(confusion.mat)

## -----------------------------------------------------------------------------
MyResult.diablo$design

## -----------------------------------------------------------------------------
MyDesign <- matrix(c(0, 0.1, 0.3,
                     0.1, 0, 0.9,
                     0.3, 0.9, 0),
                   byrow=TRUE,
                   ncol = length(X), nrow = length(X),
                 dimnames = list(names(X), names(X)))
MyDesign
MyResult.diablo.design <- block.splsda(X, Y, keepX=list.keepX, design=MyDesign)

## -----------------------------------------------------------------------------
sessionInfo()

Try the mixOmics package in your browser

Any scripts or data that you put into this service are public.

mixOmics documentation built on April 15, 2021, 6:01 p.m.