inst/doc/HIREewas.R

## ----style-knitr, eval=TRUE, echo=FALSE, results="asis"--------------------
BiocStyle::latex()

## ----data_preparation1-----------------------------------------------------
###############################################################
#Generate the EWAS data 
###############################################################

set.seed(123)
###define a function to draw samples from a Dirichlet distribution
rDirichlet <- function(alpha_vec){
	num <- length(alpha_vec)
	temp <- rgamma(num, shape = alpha_vec, rate = 1)
	return(temp / sum(temp))
}

n <- 180     #number of samples 
n1 <- 60    #number of controls
n2 <- 120    #number of cases 

m <- 2000   #number of CpG sites
K <- 3       #underlying cell type number
	
###simulate methylation baseline profiles
#assume cell type 1 and cell type 2 are from the same lineage
#cell type 1
methy1 <- rbeta(m,3,6)
#cell type 2 
methy2 <- methy1 + rnorm(m, sd=0.01)
ind <- sample(1:m, m/5) 
methy2[ind] <- rbeta(length(ind),3,6)

#cell type 3
methy3 <- rbeta(m,3,6)
mu <- cbind(methy1, methy2, methy3)

#number of covariates
p <- 2

###simulate covariates / phenotype (disease status and age)
X <- rbind(c(rep(0, n1),rep(1, n2)), runif(n, min=20, max=50))

###simulate phenotype effects
beta <- array(0, dim=c(m,K,p))

#control vs case
m_common <- 10
max_signal <- 0.15
min_signal <- 0.07

#we allow different signs and magnitudes
signs <- sample(c(-1,1), m_common*K, replace=TRUE)
beta[1:m_common,1:K,1] <- signs * runif(m_common*K, min=min_signal, max=max_signal)

m_seperate <- 10
signs <- sample(c(-1,1), m_seperate*2, replace=TRUE)
beta[m_common+(1:m_seperate),1:2,1] <- signs * 
					runif(m_seperate*2, min=min_signal, max=max_signal)

signs <- sample(c(-1,1), m_seperate, replace=TRUE)
beta[m_common+m_seperate+(1:m_seperate),K,1] <- signs * 
					runif(m_seperate, min=min_signal, max=max_signal)

#age
base <- 20
m_common <- 10
max_signal <- 0.015
min_signal <- 0.007
signs <- sample(c(-1,1), m_common*K, replace=TRUE)
beta[base+1:m_common,1:K,2] <- signs * 
					runif(m_common*K, min=min_signal, max=max_signal)

m_seperate <- 10
signs <- sample(c(-1,1), m_seperate*2, replace=TRUE)
beta[base+m_common+(1:m_seperate),1:2,2] <- signs * 
					runif(m_seperate*2, min=min_signal, max=max_signal)

signs <- sample(c(-1,1), m_seperate, replace=TRUE)
beta[base+m_common+m_seperate+(1:m_seperate),K,2] <- signs * 
					runif(m_seperate, min=min_signal, max=max_signal)

###generate the cellular compositions 
P <- sapply(1:n, function(i){
				if(X[1,i]==0){ #if control
					rDirichlet(c(4,4, 2+X[2,i]/10))
				}else{
					rDirichlet(c(4,4, 5+X[2,i]/10))
				}	
			})

###generate the observed methylation profiles 
Ometh <- NULL
for(i in 1:n){
	utmp <- t(sapply(1:m, function(j){
					tmp1 <- colSums(X[ ,i] * t(beta[j, , ]))
					rnorm(K,mean=mu[j, ]+tmp1,sd=0.01)
				}))
	tmp2 <- colSums(P[ ,i] * t(utmp))
	Ometh <- cbind(Ometh, tmp2 + rnorm(m, sd = 0.01))
}

#constrain methylation values between 0 and 1
Ometh[Ometh > 1] <- 1

Ometh[Ometh < 0] <- 0

## ----data-preparation2-----------------------------------------------------
#the class of the methylation matrix
class(Ometh)

#the values in the methylation matrix
head(Ometh[,1:6])

#the class of the covariate matrix
class(X)

#the values in the covariate matrix
X[ ,1:6]

## ----model1----------------------------------------------------------------
library(HIREewas)
ret_list <- HIRE(Ometh, X, num_celltype=K, tol=10^(-5), num_iter=1000, alpha=0.01)

## ----model2----------------------------------------------------------------
# the class of ret_list
class(ret_list)

#the estimated cellular compositions 
ret_list$P_t[ ,1:6]

#the estimated cell-type-specific methylation baseline profiles
head(ret_list$mu_t)

#the estimated phenotype effects
head(ret_list$beta_t)

#the penalized BIC value
ret_list$pBIC

#the estimated p-values to claim whether a CpG site is at risk
#in some cell type for a covariate

#p value matrix for case/control
head(ret_list$pvalues[ ,1:3])

#p value matrix for age
head(ret_list$pvalues[ ,4:6]) 

## --------------------------------------------------------------------------
#estimated cell compositions vs the truth
par(mfrow=c(1,3))
plot(ret_list$P_t[2, ], P[1, ], xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, col="red")

plot(ret_list$P_t[1, ], P[2, ], xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, col="red")

plot(ret_list$P_t[3, ], P[3, ], xlim=c(0,1), ylim=c(0,1))
abline(a=0, b=1, col="red")

## --------------------------------------------------------------------------
riskCpGpattern(ret_list$pvalues[1:100, K+c(2,1,3)], 
		main_title="Detected association pattern\n with age", hc_row_ind = FALSE)

Try the HIREewas package in your browser

Any scripts or data that you put into this service are public.

HIREewas documentation built on Nov. 8, 2020, 6:04 p.m.