## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ---- echo=FALSE--------------------------------------------------------------
Sys.setenv(LANGUAGE = "en")
## ---- warning=FALSE, message=FALSE, echo=TRUE, eval=FALSE---------------------
# if (!requireNamespace("BiocManager", quietly = TRUE))
# install.packages("BiocManager")
# if (!require("devtools"))
# install.packages("devtools")
# devtools::install_github("xlucpu/MOVICS")
## ---- echo=FALSE, eval=TRUE, warning=FALSE, message=FALSE---------------------
library("dplyr")
library("knitr")
library("kableExtra")
#library("devtools")
#load_all()
## ---- eval=TRUE, warning=FALSE, message=FALSE---------------------------------
library("MOVICS")
## ---- eval=TRUE---------------------------------------------------------------
# load example data of breast cancer
load(system.file("extdata", "brca.tcga.RData", package = "MOVICS", mustWork = TRUE))
load(system.file("extdata", "brca.yau.RData", package = "MOVICS", mustWork = TRUE))
## ---- eval=TRUE---------------------------------------------------------------
# print name of example data
names(brca.tcga)
names(brca.yau)
# extract multi-omics data
mo.data <- brca.tcga[1:4]
# extract raw count data for downstream analyses
count <- brca.tcga$count
# extract fpkm data for downstream analyses
fpkm <- brca.tcga$fpkm
# extract maf for downstream analysis
maf <- brca.tcga$maf
# extract segmented copy number for downstream analyses
segment <- brca.tcga$segment
# extract survival information
surv.info <- brca.tcga$clin.info
## ---- eval=TRUE---------------------------------------------------------------
# scenario 1: considering we are dealing with an expression data that have 2 rows with NA values
tmp <- brca.tcga$mRNA.expr # get expression data
dim(tmp) # check data dimension
tmp[1,1] <- tmp[2,2] <- NA # set 2 rows with NA values
tmp[1:3,1:3] # check data
elite.tmp <- getElites(dat = tmp,
method = "mad",
na.action = "rm", # NA values will be removed
elite.pct = 1) # elite.pct equals to 1 means all (100%) features after NA removal will be selected even using mad method
dim(elite.tmp$elite.dat) # check dimension again and see that we have removed 2 rows with NA data
elite.tmp <- getElites(dat = tmp,
method = "mad",
na.action = "impute", # NA values will be imputed
elite.pct = 1)
dim(elite.tmp$elite.dat) # all data kept
elite.tmp$elite.dat[1:3,1:3] # NA values have been imputed
# scenario 2: considering we are dealing with continuous data and use mad or sd to select elites
tmp <- brca.tcga$mRNA.expr # get expression data with 500 features
elite.tmp <- getElites(dat = tmp,
method = "mad",
elite.pct = 0.1) # this time only top 10% features with high mad values are kept
dim(elite.tmp$elite.dat) # get 50 elite left
elite.tmp <- getElites(dat = tmp,
method = "sd",
elite.num = 100, # this time only top 100 features with high sd values are kept
elite.pct = 0.1) # this time elite.pct argument will be disabled because elite.num has been already indicated.
dim(elite.tmp$elite.dat) # get 100 elites left
# scenario 3:
# considering we are dealing with continuous data and use pca to select elites
tmp <- brca.tcga$mRNA.expr # get expression data with 500 features
elite.tmp <- getElites(dat = tmp,
method = "pca",
pca.ratio = 0.95) # ratio of PCs is selected
dim(elite.tmp$elite.dat) # get 204 elite (PCs) left
# scenario 4: considering we are dealing with data and use cox to select elite
tmp <- brca.tcga$mRNA.expr # get expression data
elite.tmp <- getElites(dat = tmp,
method = "cox",
surv.info = surv.info, # must provide survival information with 'futime' and 'fustat'
p.cutoff = 0.05,
elite.num = 100) # this time elite.num argument will be disabled because cox method refers to p.cutoff to select elites
dim(elite.tmp$elite.dat) # get 125 elites
table(elite.tmp$unicox$pvalue < 0.05) # 125 genes have nominal pvalue < 0.05 in univariate Cox regression
tmp <- brca.tcga$mut.status # get mutation data
elite.tmp <- getElites(dat = tmp,
method = "cox",
surv.info = surv.info, # must provide survival information with 'futime' and 'fustat'
p.cutoff = 0.05,
elite.num = 100) # this time elite.num argument will be disabled because cox method refers to p.cutoff to select elites
dim(elite.tmp$elite.dat) # get 3 elites
table(elite.tmp$unicox$pvalue < 0.05) # 3 mutations have nominal pvalue < 0.05
# scenario 5: considering we are dealing with mutation data using freq to select elites
tmp <- brca.tcga$mut.status # get mutation data
rowSums(tmp) # check mutation frequency
elite.tmp <- getElites(dat = tmp,
method = "freq", # must set as 'freq'
elite.num = 80, # note: in this scenario elite.num refers to frequency of mutation
elite.pct = 0.1) # discard because elite.num has been already indicated
rowSums(elite.tmp$elite.dat) # only genes that are mutated in over than 80 samples are kept as elites
elite.tmp <- getElites(dat = tmp,
method = "freq", # must set as 'freq'
elite.pct = 0.2) # note: in this scenario elite.pct refers to frequency of mutation / sample size
rowSums(elite.tmp$elite.dat) # only genes that are mutated in over than 0.2*643=128.6 samples are kept as elites
# get mo.data list just like below (not run)
# mo.data <- list(omics1 = elite.tmp$elite.dat,
# omics2 = ...)
## ---- fig.align="center", fig.width=6, fig.height=6, fig.cap="Figure 1. Identification of optimal cluster number by calculating CPI (blue line) and Gaps-statistics (red line) in TCGA-BRCA cohort.", eval=TRUE----
# identify optimal clustering number (may take a while)
optk.brca <- getClustNum(data = mo.data,
is.binary = c(F,F,F,T), # note: the 4th data is somatic mutation which is a binary matrix
try.N.clust = 2:8, # try cluster number from 2 to 8
fig.name = "CLUSTER NUMBER OF TCGA-BRCA")
## ---- eval=TRUE---------------------------------------------------------------
# perform iClusterBayes (may take a while)
iClusterBayes.res <- getiClusterBayes(data = mo.data,
N.clust = 5,
type = c("gaussian","gaussian","gaussian","binomial"),
n.burnin = 1800,
n.draw = 1200,
prior.gamma = c(0.5, 0.5, 0.5, 0.5),
sdev = 0.05,
thin = 3)
## ---- eval=FALSE--------------------------------------------------------------
# iClusterBayes.res <- getMOIC(data = mo.data,
# N.clust = 5,
# methodslist = "iClusterBayes", # specify only ONE algorithm here
# type = c("gaussian","gaussian","gaussian","binomial"), # data type corresponding to the list
# n.burnin = 1800,
# n.draw = 1200,
# prior.gamma = c(0.5, 0.5, 0.5, 0.5),
# sdev = 0.05,
# thin = 3)
## ---- fig.show='hide', message=TRUE, eval=TRUE--------------------------------
# perform multi-omics integrative clustering with the rest of 9 algorithms
moic.res.list <- getMOIC(data = mo.data,
methodslist = list("SNF", "PINSPlus", "NEMO", "COCA", "LRAcluster", "ConsensusClustering", "IntNMF", "CIMLR", "MoCluster"),
N.clust = 5,
type = c("gaussian", "gaussian", "gaussian", "binomial"))
# attach iClusterBayes.res as a list using append() to moic.res.list with 9 results already
moic.res.list <- append(moic.res.list,
list("iClusterBayes" = iClusterBayes.res))
# save moic.res.list to local path
save(moic.res.list, file = "moic.res.list.rda")
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# load(file = "iClusterBayes.res.rda")
# load(file = "moic.res.list.rda")
## ---- fig.align="center", fig.width=7, fig.height=6, fig.cap="Figure 2. Consensus heatmap based on results from 10 multi-omics integrative clustering algorithms with cluster number of 5.", eval=TRUE----
cmoic.brca <- getConsensusMOIC(moic.res.list = moic.res.list,
fig.name = "CONSENSUS HEATMAP",
distance = "euclidean",
linkage = "average")
## ---- fig.align="center", fig.width=5, fig.height=5.5, fig.cap="Figure 3. Quantification of sample similarity using silhoutte score based on consensus ensembles result.", eval=TRUE----
getSilhouette(sil = cmoic.brca$sil, # a sil object returned by getConsensusMOIC()
fig.path = getwd(),
fig.name = "SILHOUETTE",
height = 5.5,
width = 5)
## ---- eval=TRUE---------------------------------------------------------------
# convert beta value to M value for stronger signal
indata <- mo.data
indata$meth.beta <- log2(indata$meth.beta / (1 - indata$meth.beta))
# data normalization for heatmap
plotdata <- getStdiz(data = indata,
halfwidth = c(2,2,2,NA), # no truncation for mutation
centerFlag = c(T,T,T,F), # no center for mutation
scaleFlag = c(T,T,T,F)) # no scale for mutation
## ---- eval=TRUE---------------------------------------------------------------
feat <- iClusterBayes.res$feat.res
feat1 <- feat[which(feat$dataset == "mRNA.expr"),][1:10,"feature"]
feat2 <- feat[which(feat$dataset == "lncRNA.expr"),][1:10,"feature"]
feat3 <- feat[which(feat$dataset == "meth.beta"),][1:10,"feature"]
feat4 <- feat[which(feat$dataset == "mut.status"),][1:10,"feature"]
annRow <- list(feat1, feat2, feat3, feat4)
## ---- fig.align="center", fig.width=9, fig.height=8.5, fig.cap="Figure 4. Comprehensive heatmap of multi-omics integrative clustering by iClusterBayes with annotation of potential drivers.", eval=TRUE----
# set color for each omics data
# if no color list specified all subheatmaps will be unified to green and red color pattern
mRNA.col <- c("#00FF00", "#008000", "#000000", "#800000", "#FF0000")
lncRNA.col <- c("#6699CC", "white" , "#FF3C38")
meth.col <- c("#0074FE", "#96EBF9", "#FEE900", "#F00003")
mut.col <- c("grey90" , "black")
col.list <- list(mRNA.col, lncRNA.col, meth.col, mut.col)
# comprehensive heatmap (may take a while)
getMoHeatmap(data = plotdata,
row.title = c("mRNA","lncRNA","Methylation","Mutation"),
is.binary = c(F,F,F,T), # the 4th data is mutation which is binary
legend.name = c("mRNA.FPKM","lncRNA.FPKM","M value","Mutated"),
clust.res = iClusterBayes.res$clust.res, # cluster results
clust.dend = NULL, # no dendrogram
show.rownames = c(F,F,F,F), # specify for each omics data
show.colnames = FALSE, # show no sample names
annRow = annRow, # mark selected features
color = col.list,
annCol = NULL, # no annotation for samples
annColors = NULL, # no annotation color
width = 10, # width of each subheatmap
height = 5, # height of each subheatmap
fig.name = "COMPREHENSIVE HEATMAP OF ICLUSTERBAYES")
## ---- fig.align="center", fig.width=9, fig.height=9, fig.cap="Figure 5. Comprehensive heatmap of multi-omics integrative clustering by COCA with dendrogram for samples.", eval=TRUE----
# comprehensive heatmap (may take a while)
getMoHeatmap(data = plotdata,
row.title = c("mRNA","lncRNA","Methylation","Mutation"),
is.binary = c(F,F,F,T), # the 4th data is mutation which is binary
legend.name = c("mRNA.FPKM","lncRNA.FPKM","M value","Mutated"),
clust.res = moic.res.list$COCA$clust.res, # cluster results
clust.dend = moic.res.list$COCA$clust.dend, # show dendrogram for samples
color = col.list,
width = 10, # width of each subheatmap
height = 5, # height of each subheatmap
fig.name = "COMPREHENSIVE HEATMAP OF COCA")
## ---- fig.align="center", fig.width=9, fig.height=9.5, fig.cap="Figure 6. Comprehensive heatmap based on consensus across 10 algorithms with clinicopathological annotation.", eval=TRUE----
# extract PAM50, pathologic stage and age for sample annotation
annCol <- surv.info[,c("PAM50", "pstage", "age"), drop = FALSE]
# generate corresponding colors for sample annotation
annColors <- list(age = circlize::colorRamp2(breaks = c(min(annCol$age),
median(annCol$age),
max(annCol$age)),
colors = c("#0000AA", "#555555", "#AAAA00")),
PAM50 = c("Basal" = "blue",
"Her2" = "red",
"LumA" = "yellow",
"LumB" = "green",
"Normal" = "black"),
pstage = c("T1" = "green",
"T2" = "blue",
"T3" = "red",
"T4" = "yellow",
"TX" = "black"))
# comprehensive heatmap (may take a while)
getMoHeatmap(data = plotdata,
row.title = c("mRNA","lncRNA","Methylation","Mutation"),
is.binary = c(F,F,F,T), # the 4th data is mutation which is binary
legend.name = c("mRNA.FPKM","lncRNA.FPKM","M value","Mutated"),
clust.res = cmoic.brca$clust.res, # consensusMOIC results
clust.dend = NULL, # show no dendrogram for samples
show.rownames = c(F,F,F,F), # specify for each omics data
show.colnames = FALSE, # show no sample names
show.row.dend = c(F,F,F,F), # show no dendrogram for features
annRow = NULL, # no selected features
color = col.list,
annCol = annCol, # annotation for samples
annColors = annColors, # annotation color
width = 10, # width of each subheatmap
height = 5, # height of each subheatmap
fig.name = "COMPREHENSIVE HEATMAP OF CONSENSUSMOIC")
## ---- fig.align="center", fig.width=6, fig.height=7, fig.cap="Figure 7. Kaplan-Meier survival curve of 5 identified subtypes of breast cancer in TCGA-BRCA cohort.", eval=TRUE----
# survival comparison
surv.brca <- compSurv(moic.res = cmoic.brca,
surv.info = surv.info,
convt.time = "m", # convert day unit to month
surv.median.line = "h", # draw horizontal line at median survival
xyrs.est = c(5,10), # estimate 5 and 10-year survival
fig.name = "KAPLAN-MEIER CURVE OF CONSENSUSMOIC")
print(surv.brca)
## ---- eval=TRUE---------------------------------------------------------------
clin.brca <- compClinvar(moic.res = cmoic.brca,
var2comp = surv.info, # data.frame needs to summarize (must has row names of samples)
strata = "Subtype", # stratifying variable (e.g., Subtype in this example)
factorVars = c("PAM50","pstage","fustat"), # features that are considered categorical variables
nonnormalVars = "futime", # feature(s) that are considered using nonparametric test
exactVars = "pstage", # feature(s) that are considered using exact test
doWord = TRUE, # generate .docx file in local path
tab.name = "SUMMARIZATION OF CLINICAL FEATURES")
## ---- eval=FALSE--------------------------------------------------------------
# print(clin.brca$compTab)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
clin.brca$compTab %>%
kbl(caption = "Table 1. Comparison of clinical features among 5 identified subtype of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=8, fig.height=3, fig.cap="Figure 8. Mutational OncoPrint of 5 identified subtypes of breast cancer in TCGA-BRCA cohort.", eval=TRUE----
# mutational frequency comparison
mut.brca <- compMut(moic.res = cmoic.brca,
mut.matrix = brca.tcga$mut.status, # binary somatic mutation matrix
doWord = TRUE, # generate table in .docx format
doPlot = TRUE, # draw OncoPrint
freq.cutoff = 0.05, # keep those genes that mutated in at least 5% of samples
p.adj.cutoff = 0.05, # keep those genes with adjusted p value < 0.05 to draw OncoPrint
innerclust = TRUE, # perform clustering within each subtype
annCol = annCol, # same annotation for heatmap
annColors = annColors, # same annotation color for heatmap
width = 6,
height = 2,
fig.name = "ONCOPRINT FOR SIGNIFICANT MUTATIONS",
tab.name = "INDEPENDENT TEST BETWEEN SUBTYPE AND MUTATION")
## ---- eval=FALSE--------------------------------------------------------------
# print(mut.brca)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
mut.brca %>%
kbl(caption = "Table 2. Comparison of mutational frequency among 5 identified subtype of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- eval=TRUE---------------------------------------------------------------
head(maf)
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# head(maf) %>%
# kbl(caption = "Table . Demo of MAF data with eligible column names.") %>%
# kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=6, fig.height=6, fig.cap="Figure 9. Comparison of TMB and TiTv among 5 identified subtypes of breast cancer in TCGA-BRCA cohort.", eval=TRUE----
# compare TMB
tmb.brca <- compTMB(moic.res = cmoic.brca,
maf = maf,
rmDup = TRUE, # remove duplicated variants per sample
rmFLAGS = FALSE, # keep FLAGS mutations
exome.size = 38, # estimated exome size
test.method = "nonparametric", # statistical testing method
fig.name = "DISTRIBUTION OF TMB AND TITV")
## ---- eval=FALSE--------------------------------------------------------------
# head(tmb.brca$TMB.dat)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
head(tmb.brca$TMB.dat) %>%
kbl(caption = "Table 3. Demo of comparison of TMB among 5 identified subtype of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## -----------------------------------------------------------------------------
# change column names of segment data
colnames(segment) <- c("sample","chrom","start","end","value")
## ---- eval=TRUE---------------------------------------------------------------
head(segment)
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# head(segment) %>%
# kbl(caption = "Table . Demo of segmented copy number data with eligible column names.") %>%
# kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=10, fig.height=2.5, fig.cap="Figure 10. Barplot of fraction genome altered among 5 identified subtypes of breast cancer in TCGA-BRCA cohort.", eval=TRUE----
# compare FGA, FGG, and FGL
fga.brca <- compFGA(moic.res = cmoic.brca,
segment = segment,
iscopynumber = FALSE, # this is a segmented copy number file
cnathreshold = 0.2, # threshold to determine CNA gain or loss
test.method = "nonparametric", # statistical testing method
fig.name = "BARPLOT OF FGA")
## ---- eval=FALSE--------------------------------------------------------------
# head(fga.brca$summary)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
head(fga.brca$summary) %>%
kbl(caption = "Table 4. Demo of comparison of fraction genome altered among 5 identified subtype of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.show = "hold", out.width = "50%", fig.align = "default", fig.width=8, fig.height=6, fig.cap="Figure 11. Boxviolins for estimated IC50 of Cisplatin and Paclitaxel among 5 identified subtypes of breast cancer in TCGA-BRCA cohort.", eval=TRUE----
# drug sensitivity comparison
drug.brca <- compDrugsen(moic.res = cmoic.brca,
norm.expr = fpkm[,cmoic.brca$clust.res$samID], # double guarantee sample order
drugs = c("Cisplatin", "Paclitaxel"), # a vector of names of drug in GDSC
tissueType = "breast", # choose specific tissue type to construct ridge regression model
test.method = "nonparametric", # statistical testing method
prefix = "BOXVIOLIN OF ESTIMATED IC50")
## ---- eval=FALSE--------------------------------------------------------------
# head(drug.brca$Cisplatin)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
head(drug.brca$Cisplatin) %>%
kbl(caption = "Table 5. Demo of estimated IC50 for Cisplatin among 5 identified subtype of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=8, fig.height=5, fig.cap="Figure 12. Agreement of 5 identified subtypes of breast cancer with PAM50 classification and pathological stage in TCGA-BRCA cohort.", eval=TRUE----
# customize the factor level for pstage
surv.info$pstage <- factor(surv.info$pstage, levels = c("TX","T1","T2","T3","T4"))
# agreement comparison (support up to 6 classifications include current subtype)
agree.brca <- compAgree(moic.res = cmoic.brca,
subt2comp = surv.info[,c("PAM50","pstage")],
doPlot = TRUE,
box.width = 0.2,
fig.name = "AGREEMENT OF CONSENSUSMOIC WITH PAM50 AND PSTAGE")
## ---- eval=FALSE--------------------------------------------------------------
# print(agree.brca)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
agree.brca %>%
kbl(caption = "Table 6. Agreement of 5 identified subtypes with PAM50 classification and pathological stage in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- eval=TRUE---------------------------------------------------------------
# run DEA with edgeR
runDEA(dea.method = "edger",
expr = count, # raw count data
moic.res = cmoic.brca,
prefix = "TCGA-BRCA") # prefix of figure name
# run DEA with DESeq2
runDEA(dea.method = "deseq2",
expr = count,
moic.res = cmoic.brca,
prefix = "TCGA-BRCA")
# run DEA with limma
runDEA(dea.method = "limma",
expr = fpkm, # normalized expression data
moic.res = cmoic.brca,
prefix = "TCGA-BRCA")
## ---- fig.align="center", fig.width=7, fig.height=6, fig.cap="Figure 13. Heatmap of subtype-specific upregulated biomarkers using edgeR for 5 identified subtypes in TCGA-BRCA cohort.", eval=TRUE----
# choose edgeR result to identify subtype-specific up-regulated biomarkers
marker.up <- runMarker(moic.res = cmoic.brca,
dea.method = "edger", # name of DEA method
prefix = "TCGA-BRCA", # MUST be the same of argument in runDEA()
dat.path = getwd(), # path of DEA files
res.path = getwd(), # path to save marker files
p.cutoff = 0.05, # p cutoff to identify significant DEGs
p.adj.cutoff = 0.05, # padj cutoff to identify significant DEGs
dirct = "up", # direction of dysregulation in expression
n.marker = 100, # number of biomarkers for each subtype
doplot = TRUE, # generate diagonal heatmap
norm.expr = fpkm, # use normalized expression as heatmap input
annCol = annCol, # sample annotation in heatmap
annColors = annColors, # colors for sample annotation
show_rownames = FALSE, # show no rownames (biomarker name)
fig.name = "UPREGULATED BIOMARKER HEATMAP")
## ---- eval=FALSE--------------------------------------------------------------
# # check the upregulated biomarkers
# head(marker.up$templates)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
head(marker.up$templates) %>%
kbl(caption = "Table 7. Demo of subtype-specific upregulated biomarkers for 5 identified subtypes of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=7, fig.height=6, fig.cap="Figure 14. Heatmap of subtype-specific downregulated biomarkers using limma for 5 identified subtypes in TCGA-BRCA cohort.", eval=TRUE----
# choose limma result to identify subtype-specific down-regulated biomarkers
marker.dn <- runMarker(moic.res = cmoic.brca,
dea.method = "limma",
prefix = "TCGA-BRCA",
dirct = "down",
n.marker = 50, # switch to 50
doplot = TRUE,
norm.expr = fpkm,
annCol = annCol,
annColors = annColors,
fig.name = "DOWNREGULATED BIOMARKER HEATMAP")
## ---- eval=TRUE---------------------------------------------------------------
# MUST locate ABSOLUTE path of msigdb file
MSIGDB.FILE <- system.file("extdata", "c5.bp.v7.1.symbols.xls", package = "MOVICS", mustWork = TRUE)
## ---- fig.align="center", fig.width=10, fig.height=8, fig.cap="Figure 15. Heatmap of subtype-specific upregulated pathways using edgeR algorithm for 5 identified subtypes in TCGA-BRCA cohort.", eval=TRUE----
# run GSEA to identify up-regulated GO pathways using results from edgeR
gsea.up <- runGSEA(moic.res = cmoic.brca,
dea.method = "edger", # name of DEA method
prefix = "TCGA-BRCA", # MUST be the same of argument in runDEA()
dat.path = getwd(), # path of DEA files
res.path = getwd(), # path to save GSEA files
msigdb.path = MSIGDB.FILE, # MUST be the ABSOLUTE path of msigdb file
norm.expr = fpkm, # use normalized expression to calculate enrichment score
dirct = "up", # direction of dysregulation in pathway
p.cutoff = 0.05, # p cutoff to identify significant pathways
p.adj.cutoff = 0.25, # padj cutoff to identify significant pathways
gsva.method = "gsva", # method to calculate single sample enrichment score
norm.method = "mean", # normalization method to calculate subtype-specific enrichment score
fig.name = "UPREGULATED PATHWAY HEATMAP")
## ---- eval=FALSE--------------------------------------------------------------
# print(gsea.up$gsea.list$CS1[1:6,3:6])
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
gsea.up$gsea.list$CS1[1:6,3:6] %>%
kbl(caption = "Table 8. Demo of GSEA results for the first cancer subtype (CS1) of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- eval=FALSE--------------------------------------------------------------
# head(round(gsea.up$grouped.es,3))
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
head(round(gsea.up$grouped.es,3)) %>%
kbl(caption = "Table 9. Demo of subtype-specific enrichment scores among 5 identified subtypes of breast cancer in TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=10, fig.height=8, fig.cap="Figure 16. Heatmap of subtype-specific downregulated pathways using limma algorithm for 5 identified subtypes in TCGA-BRCA cohort.", eval=TRUE----
# run GSEA to identify down-regulated GO pathways using results from DESeq2
gsea.dn <- runGSEA(moic.res = cmoic.brca,
dea.method = "deseq2",
prefix = "TCGA-BRCA",
msigdb.path = MSIGDB.FILE,
norm.expr = fpkm,
dirct = "down",
p.cutoff = 0.05,
p.adj.cutoff = 0.25,
gsva.method = "ssgsea", # switch to ssgsea
norm.method = "median", # switch to median
fig.name = "DOWNREGULATED PATHWAY HEATMAP")
## ---- eval=TRUE---------------------------------------------------------------
# MUST locate ABSOLUTE path of gene set file
GSET.FILE <-
system.file("extdata", "gene sets of interest.gmt", package = "MOVICS", mustWork = TRUE)
## ---- fig.align="center", fig.width=8, fig.height=5, fig.cap="Figure 17. Heatmap of enrichment score of gene set of interest for 5 identified subtypes in TCGA-BRCA cohort.", eval=TRUE----
# run GSVA to estimate single sample enrichment score based on given gene set of interest
gsva.res <-
runGSVA(moic.res = cmoic.brca,
norm.expr = fpkm,
gset.gmt.path = GSET.FILE, # ABSOLUTE path of gene set file
gsva.method = "gsva", # method to calculate single sample enrichment score
annCol = annCol,
annColors = annColors,
fig.path = getwd(),
fig.name = "GENE SETS OF INTEREST HEATMAP",
height = 5,
width = 8)
# check raw enrichment score
print(gsva.res$raw.es[1:3,1:3])
# check z-scored and truncated enrichment score
print(gsva.res$scaled.es[1:3,1:3])
## ---- fig.align="center", fig.width=6, fig.height=6, fig.cap="Figure 18. Heatmap of NTP in Yau cohort using subtype-specific upregulated biomarkers identified from TCGA-BRCA cohort", eval=TRUE----
# run NTP in Yau cohort by using up-regulated biomarkers
yau.ntp.pred <- runNTP(expr = brca.yau$mRNA.expr,
templates = marker.up$templates, # the template has been already prepared in runMarker()
scaleFlag = TRUE, # scale input data (by default)
centerFlag = TRUE, # center input data (by default)
doPlot = TRUE, # to generate heatmap
fig.name = "NTP HEATMAP FOR YAU")
## ---- eval=FALSE--------------------------------------------------------------
# head(yau.ntp.pred$ntp.res)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
head(yau.ntp.pred$ntp.res) %>%
kbl(caption = "Table 10. Demo of predicted subtypes in Yau cohort by NTP using subtype-specific upregulated biomarkers identified from TCGA-BRCA cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=6, fig.height=7, fig.cap="Figure 19. Kaplan-Meier survival curve of predicted 5 subtypes of breast cancer in Yau cohort.", eval=TRUE----
# compare survival outcome in Yau cohort
surv.yau <- compSurv(moic.res = yau.ntp.pred,
surv.info = brca.yau$clin.info,
convt.time = "m", # switch to month
surv.median.line = "hv", # switch to both
fig.name = "KAPLAN-MEIER CURVE OF NTP FOR YAU")
print(surv.yau)
## ---- fig.align="center", fig.width=8, fig.height=5, fig.cap="Figure 20. Agreement of predicted 5 subtypes of breast cancer with PAM50 classification in Yau cohort.", eval=TRUE----
# compare agreement in Yau cohort
agree.yau <- compAgree(moic.res = yau.ntp.pred,
subt2comp = brca.yau$clin.info[, "PAM50", drop = FALSE],
doPlot = TRUE,
fig.name = "YAU PREDICTEDMOIC WITH PAM50")
## ---- eval=FALSE--------------------------------------------------------------
# print(agree.yau)
## ---- echo=FALSE, eval=TRUE---------------------------------------------------
agree.yau %>%
kbl(caption = "Table 11. Agreement of 5 predicted subtypes of breast cancer with PAM50 classification in Yau cohort.") %>%
kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- eval=TRUE---------------------------------------------------------------
yau.pam.pred <- runPAM(train.expr = fpkm,
moic.res = cmoic.brca,
test.expr = brca.yau$mRNA.expr)
## ---- eval=TRUE---------------------------------------------------------------
print(yau.pam.pred$IGP)
## ---- fig.show = "hold", out.width = "33.3%", fig.align = "default", fig.width=10, fig.height=9, fig.cap="Figure 21. Consistency heatmap using Kappa statistics.", eval=TRUE----
# predict subtype in discovery cohort using NTP
tcga.ntp.pred <- runNTP(expr = fpkm,
templates = marker.up$templates,
doPlot = FALSE)
# predict subtype in discovery cohort using PAM
tcga.pam.pred <- runPAM(train.expr = fpkm,
moic.res = cmoic.brca,
test.expr = fpkm)
# check consistency between current and NTP-predicted subtype in discovery TCGA-BRCA
runKappa(subt1 = cmoic.brca$clust.res$clust,
subt2 = tcga.ntp.pred$clust.res$clust,
subt1.lab = "CMOIC",
subt2.lab = "NTP",
fig.name = "CONSISTENCY HEATMAP FOR TCGA between CMOIC and NTP")
# check consistency between current and PAM-predicted subtype in discovery TCGA-BRCA
runKappa(subt1 = cmoic.brca$clust.res$clust,
subt2 = tcga.pam.pred$clust.res$clust,
subt1.lab = "CMOIC",
subt2.lab = "PAM",
fig.name = "CONSISTENCY HEATMAP FOR TCGA between CMOIC and PAM")
# check consistency between NTP and PAM-predicted subtype in validation Yau-BRCA
runKappa(subt1 = yau.ntp.pred$clust.res$clust,
subt2 = yau.pam.pred$clust.res$clust,
subt1.lab = "NTP",
subt2.lab = "PAM",
fig.name = "CONSISTENCY HEATMAP FOR YAU")
## ---- eval=TRUE---------------------------------------------------------------
# include original clinical information as `clust.res` and a string value for `mo.method` to a list
pseudo.moic.res <- list("clust.res" = surv.info,
"mo.method" = "PAM50")
# make pseudo samID
pseudo.moic.res$clust.res$samID <- rownames(pseudo.moic.res$clust.res)
# make pseudo clust using a mapping relationship
pseudo.moic.res$clust.res$clust <- sapply(pseudo.moic.res$clust.res$PAM50,
switch,
"Basal" = 1, # relabel Basal as 1
"Her2" = 2, # relabel Her2 as 2
"LumA" = 3, # relabel LumA as 3
"LumB" = 4, # relabel LumnB as 4
"Normal" = 5) # relabel Normal as 5
## ---- eval=TRUE---------------------------------------------------------------
head(pseudo.moic.res$clust.res)
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# head(pseudo.moic.res$clust.res) %>%
# kbl(caption = "Table . Demo of pseudo object for downstream analyses in MOVICS.") %>%
# kable_classic(full_width = TRUE, html_font = "Calibri")
## ---- fig.align="center", fig.width=6, fig.height=7, fig.cap="Figure 22. Kaplan-Meier survival curve of PAM50 subtypes of breast cancer with pseudo input in TCGA-BRCA cohort.", eval=TRUE----
# survival comparison
pam50.brca <- compSurv(moic.res = pseudo.moic.res,
surv.info = surv.info,
convt.time = "y", # convert day unit to year
surv.median.line = "h", # draw horizontal line at median survival
fig.name = "KAPLAN-MEIER CURVE OF PAM50 BY PSEUDO")
## ---- echo=TRUE, eval=TRUE----------------------------------------------------
sessionInfo()
## ---- echo=FALSE, eval=FALSE--------------------------------------------------
# save.image("MOVICS.RData")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.