Introduction

This document presents an example of the usage of the MLML2R package for R.

Install the R package using the following commands on the R console:

install.packages("devtools")
devtools::install_github("samarafk/MLML2R")
library(MLML2R)

The function MLML provides maximum likelihood estimates (MLE) for 5-hmC and 5-mC levels using data from any combination of two of the methods: BS-seq, TAB-seq or oxBS-seq, or combination of all the three methods.

The algorithm implemented in the MLML function is based on the Expectation-Maximization (EM) algorithm proposed by Qu et al. (2013). In addition, our implementation is optimized, since we derived the exact constrained MLE for 5-mC or 5-hmC levels, and the iterative EM algorithm is not needed. Our improved formulation can, thus, decrease analytic processing time and computational burden, common bottlenecks when processing single-base profiling data from thousands of samples.

Furthermore, our routine is flexible and can be used with both next generation sequencing and Infinium Methylation microarray data in the R-statistical language.

True proportion

library(MLML2R)

True proportions used in the data simulation (true_parameters_sim2 dataset).

par(mfrow =c(1,3))

plot(density(true_parameters_sim2$p_h),main= "True 5-hmC",xlab=" ",xlim=c(0,1),ylim=c(0,15))

plot(density(true_parameters_sim2$p_m),main= "True 5-mC",xlab=" ",xlim=c(0,1),ylim=c(0,5))

plot(density(true_parameters_sim2$p_u),main= "True 5-C",ylim=c(0,5),xlab=" ",xlim=c(0,1))

Dataset used in this example was simulated from the above true proportions.

BS+oxBS+TAB data

# via EM algortihm
results_em <- MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
                   Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,
                   Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2,tol=0.00001,iterative = TRUE)

# via Lagrange multiplier approximation
results_lag <- MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
                   Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,
                   Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2)
plot(results_em$hmC,results_lag$hmC)
all.equal(results_em$hmC,results_lag$hmC)

all.equal(results_em$C,results_lag$C)

all.equal(results_em$hmC[,1],true_parameters_sim2$p_h)

all.equal(results_em$mC[,1],true_parameters_sim2$p_m)


all.equal(results_lag$hmC[,1],true_parameters_sim2$p_h)

all.equal(results_lag$mC[,1],true_parameters_sim2$p_m)
library(microbenchmark)
mbm = microbenchmark(
   lagrange = MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
                                                             Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,
                                                            Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2),
   EM = MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
                            Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,
                            Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2,tol=0.0001,iterative = TRUE),
   times=10)
mbm



T = MethylatedBS_sim2
U = UnMethylatedBS_sim2
L = UnMethylatedOxBS_sim2
M = MethylatedOxBS_sim2
G = UnMethylatedTAB_sim2
H = MethylatedTAB_sim2

results_oxBS_TAB_BS_exact <- list()

results_oxBS_TAB_BS_exact$mC <- M/(M+L)
results_oxBS_TAB_BS_exact$hmC <- H/(H+G)
results_oxBS_TAB_BS_exact$C <- U/(U+T)


range(results_oxBS_TAB_BS_exact$mC)
range(results_oxBS_TAB_BS_exact$hmC)
range(results_oxBS_TAB_BS_exact$C)
par(mfrow =c(1,3))

plot(density(results_oxBS_TAB_BS_exact$hmC[,1]),main= "5-hmC using exact unconstrained",xlab=" ",ylim=c(0,15),xlim=c(0,1))
lines(density(results_oxBS_TAB_BS_exact$hmC[,2]),col=2)
lines(density(results_oxBS_TAB_BS_exact$hmC[,3]),col=3)
lines(density(results_oxBS_TAB_BS_exact$hmC[,4]),col=4)


plot(density(results_oxBS_TAB_BS_exact$mC[,1]),main= "5-mC using exact unconstrained",xlab=" ",ylim=c(0,5),xlim=c(0,1))
lines(density(results_oxBS_TAB_BS_exact$mC[,2]),col=2)
lines(density(results_oxBS_TAB_BS_exact$mC[,3]),col=3)
lines(density(results_oxBS_TAB_BS_exact$mC[,4]),col=4)


plot(density(results_oxBS_TAB_BS_exact$C[,1]),main= "5-C using exact unconstrained",ylim=c(0,5),xlab=" ",xlim=c(0,1))
lines(density(results_oxBS_TAB_BS_exact$C[,2]),col=2)
lines(density(results_oxBS_TAB_BS_exact$C[,3]),col=3)
lines(density(results_oxBS_TAB_BS_exact$C[,4]),col=4)
par(mfrow =c(1,3))

plot(density(results_lag$hmC[,1]),main= "5-hmC using Lagrange",xlab=" ",xlim=c(0,1),ylim=c(0,15))
lines(density(results_lag$hmC[,2]),col=2)
lines(density(results_lag$hmC[,2]),col=3)
lines(density(results_lag$hmC[,2]),col=4)


plot(density(results_lag$mC[,1]),main= "5-mC using Lagrange",xlab=" ",xlim=c(0,1),ylim=c(0,5))
lines(density(results_lag$mC[,2]),col=2)
lines(density(results_lag$mC[,2]),col=3)
lines(density(results_lag$mC[,2]),col=4)


plot(density(results_lag$C[,1]),main= "5-C using Lagrange",ylim=c(0,5),xlab=" ",xlim=c(0,1))
lines(density(results_lag$C[,2]),col=2)
lines(density(results_lag$C[,2]),col=3)
lines(density(results_lag$C[,2]),col=4)
par(mfrow =c(1,3))

plot(density(results_em$hmC[,1]),main= "5-hmC using EM",xlab=" ",xlim=c(0,1),ylim=c(0,15))
lines(density(results_em$hmC[,2]),col=2)
lines(density(results_em$hmC[,3]),col=3)
lines(density(results_em$hmC[,4]),col=4)


plot(density(results_em$mC[,1]),main= "5-mC using EM",xlab=" ",xlim=c(0,1),ylim=c(0,5))
lines(density(results_em$mC[,2]),col=2)
lines(density(results_em$mC[,3]),col=3)
lines(density(results_em$mC[,4]),col=4)


plot(density(results_em$C[,1]),main= "5-C using EM",ylim=c(0,5),xlab=" ",xlim=c(0,1))
lines(density(results_em$C[,2]),col=2)
lines(density(results_em$C[,3]),col=3)
lines(density(results_em$C[,4]),col=4)

BS+oxBS data

# obtain MLE via EM-algorithm for BS+oxBS:
results_em <- MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,iterative=TRUE,tol=0.0001)

# obtain constrained exact MLE for BS+oxBS:
results_exact <- MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2)

all.equal(results_em$hmC,results_exact$hmC)
plot(results_em$hmC,results_exact$hmC)
par(mfrow =c(1,3))

plot(density(results_em$hmC[,1]),main= "5-hmC using exact",xlab=" ",xlim=c(0,1),ylim=c(0,15))
lines(density(results_em$hmC[,2]),col=2)
lines(density(results_em$hmC[,3]),col=3)
lines(density(results_em$hmC[,4]),col=4)


plot(density(results_em$mC[,1]),main= "5-mC using exact",xlab=" ",xlim=c(0,1),ylim=c(0,5))
lines(density(results_em$mC[,2]),col=2)
lines(density(results_em$mC[,3]),col=3)
lines(density(results_em$mC[,4]),col=4)


plot(density(results_em$C[,1]),main= "5-C using exact",ylim=c(0,5),xlab=" ",xlim=c(0,1))
lines(density(results_em$C[,2]),col=2)
lines(density(results_em$C[,3]),col=3)
lines(density(results_em$C[,4]),col=4)

BS+TAB data

# obtain MLE via EM-algorithm for BS+TAB:
results_em <- MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2,tol=0.0001)

# obtain constrained exact MLE for BS+TAB:
results_exact <- MLML(Tc = MethylatedBS_sim2 , Uc = UnMethylatedBS_sim2,
Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2)

all.equal(results_em$hmC,results_exact$hmC)
plot(results_em$hmC,results_exact$hmC)
par(mfrow =c(1,3))

plot(density(results_em$hmC[,1]),main= "5-hmC using exact",xlab=" ",xlim=c(0,1),ylim=c(0,15))
lines(density(results_em$hmC[,2]),col=2)
lines(density(results_em$hmC[,3]),col=3)
lines(density(results_em$hmC[,4]),col=4)


plot(density(results_em$mC[,1]),main= "5-mC using exact",xlab=" ",xlim=c(0,1),ylim=c(0,5))
lines(density(results_em$mC[,2]),col=2)
lines(density(results_em$mC[,3]),col=3)
lines(density(results_em$mC[,4]),col=4)


plot(density(results_em$C[,1]),main= "5-C using exact",ylim=c(0,5),xlab=" ",xlim=c(0,1))
lines(density(results_em$C[,2]),col=2)
lines(density(results_em$C[,3]),col=3)
lines(density(results_em$C[,4]),col=4)

TAB+oxBS data

# obtain MLE via EM-algorithm for oxBS+TAB:
results_em <- MLML(Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,
Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2,iterative=TRUE,tol=0.0001)

# obtain constrained exact MLE for oxBS+TAB:
results_exact <- MLML(Lc = UnMethylatedOxBS_sim2, Mc = MethylatedOxBS_sim2,
Gc = UnMethylatedTAB_sim2, Hc = MethylatedTAB_sim2)

all.equal(results_em$hmC,results_exact$hmC)
plot(results_em$hmC,results_exact$hmC)
par(mfrow =c(1,3))

plot(density(results_em$hmC[,1]),main= "5-hmC using exact",xlab=" ",xlim=c(0,1),ylim=c(0,15))
lines(density(results_em$hmC[,2]),col=2)
lines(density(results_em$hmC[,3]),col=3)
lines(density(results_em$hmC[,4]),col=4)


plot(density(results_em$mC[,1]),main= "5-mC using exact",xlab=" ",xlim=c(0,1),ylim=c(0,5))
lines(density(results_em$mC[,2]),col=2)
lines(density(results_em$mC[,3]),col=3)
lines(density(results_em$mC[,4]),col=4)


plot(density(results_em$C[,1]),main= "5-C using exact",ylim=c(0,5),xlab=" ",xlim=c(0,1))
lines(density(results_em$C[,2]),col=2)
lines(density(results_em$C[,3]),col=3)
lines(density(results_em$C[,4]),col=4)


samarafk/MLML2R documentation built on Oct. 19, 2019, 6:04 p.m.