## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
knitr::opts_chunk$set(echo = TRUE)
knitr::opts_chunk$set(eval = TRUE)
#knitr::opts_chunk$set(dev = 'png')
#knitr::opts_chunk$set(dpi=100)
## ----setup2, echo=FALSE,include=FALSE-----------------------------------------
library(plotly)
library(ggplot2)
library(foreach)
library(doParallel)
# library(devtools)
# install_github('koyelucd/betaclust',force = TRUE)
# library(betaclust)
## ----package, include=TRUE, echo=TRUE, message=FALSE,warning=FALSE------------
library(betaclust)
## ----data,include=TRUE, echo=TRUE---------------------------------------------
data(pca.methylation.data)
head(pca.methylation.data)
## ----thresholds,include=TRUE, echo=TRUE---------------------------------------
M <- 3 ## No. of methylation states in a DNA sample
N <- 4 ## No. of patients
R <- 1 ## No. of DNA samples
my.seed <- 190 ## set seed for reproducibility
threshold_out <- betaclust(pca.methylation.data[,2:5], M, N, R,
model_names = c("K..","KN."),
model_selection = "BIC", parallel_process = FALSE,
seed = my.seed)
## ----output0,include=TRUE, echo=TRUE------------------------------------------
summary(threshold_out)
## ----wrapperoutput3,include=TRUE, echo=TRUE,fig.width = 4, fig.height = 4,dev = 'png'----
plot(threshold_out, what = "information criterion", plot_type = "ggplot")
## ----output1,include=TRUE, echo=TRUE------------------------------------------
threshold_points <- threshold_out$optimal_model_results$thresholds
threshold_points$threholds
## ----output2,include=TRUE, echo=TRUE,fig.width = 5, fig.height = 4,dev = 'png'----
plot(threshold_out, what = "fitted density", threshold = TRUE, data = pca.methylation.data[,2:5], patient_number = 1, plot_type = "ggplot")
## ----output3,include=TRUE, echo=TRUE,fig.width = 5, fig.height = 4,dev = 'png',warning=FALSE----
plot(threshold_out, what = "kernel density", threshold = TRUE, data = pca.methylation.data[,2:5], plot_type = "ggplot")
## ----output4,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5, dev = 'png'----
plot(threshold_out, what = "uncertainty")
## ----dmc,include=TRUE, echo=TRUE----------------------------------------------
M <- 3 ## No. of methylation states in a DNA sample
N <- 4 ## No. of patients
R <- 2 ## No. of DNA samples
my.seed <- 190 ## set seed for reproducibility
dmc_output <- betaclust(pca.methylation.data[,2:9], M, N, R,
model_names = "K.R", parallel_process = FALSE,
seed = my.seed)
## ----dmcoutput4b, include=TRUE, echo=TRUE-------------------------------------
print(dmc_output$optimal_model_results$DM$AUC)
print(dmc_output$optimal_model_results$DM$WD)
## ----dmcoutput1,include=TRUE, echo=TRUE---------------------------------------
summary(dmc_output)
## ----dmcoutput2,include=TRUE, echo=TRUE,fig.width = 6.5, fig.height = 5,dev = 'png'----
plot(dmc_output, what = "fitted density", plot_type = "ggplot", data = pca.methylation.data[,2:9], sample_name = c("Benign","Tumour"))
## ----dmcoutput3,include=TRUE, echo=TRUE,fig.width = 6.5, fig.height = 5,dev = 'png'----
plot(dmc_output, what = "kernel density", plot_type = "ggplot", data = pca.methylation.data[,2:9])
## ----dmcoutput4,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5, dev = 'png'----
plot(dmc_output, what = "uncertainty", plot_type = "ggplot")
## ----dmcoutput6,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5, dev = 'png'----
## the dataframe containing methylation data for the CpG sites identified as the most differentially methylated ones
dmc_df <- DMC_identification(dmc_output, data = pca.methylation.data[,2:9],
pca.methylation.data[,1], threshold = 0.06,
metric="WD")
## ----dmcoutput5,include=TRUE, echo=TRUE,fig.width = 6, fig.height = 5,dev = 'png'----
##read the legacy data
data(legacy.data)
head(legacy.data)
## create empty dataframes and matrices to store the DMCs related to the RARB genes
ecdf_rarb <- data.frame()
cpg_rarb <- data.frame(matrix(NA, nrow = 0, ncol = 6))
colnames(cpg_rarb) <- colnames(legacy.data)
## split the UCSC_RefGene_name column to read the gene name related to that CpG site
## select the CpG sites related to the RARB genes
a <- 1
for(i in 1:nrow(legacy.data))
{
str_vec = unlist(strsplit(as.character(legacy.data[i,"UCSC_RefGene_Name"]),"[;]"))
if(length(str_vec) != 0)
{
if(str_vec[[1]] == "RARB")
{
cpg_rarb[a,] <- legacy.data[i,]
a <- a+1
}
}
}
## Read the DMCs related to the RARB genes
ecdf_rarb <- dmc_df[dmc_df$IlmnID %in% cpg_rarb$IlmnID,]
## Plot the ecdf of the selected DMCs
ecdf.betaclust(ecdf_rarb[,2:9], R = 2, sample_name = c("Benign","Tumour"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.