Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,comment = "#>",
echo = TRUE,cache = TRUE,
dev = "png")
## ----setup,warning = FALSE----------------------------------------------------
# Load libraries
req_packs = c("ggplot2","smarter","CSeQTL")
for(pack in req_packs){
library(package = pack,character.only = TRUE)
}
# List package's exported functions
ls("package:CSeQTL")
# Fix seed
set.seed(2)
## ----fig.dim = c(8,6)---------------------------------------------------------
# sample size
NN = 3e2
# fold-change between q-th and 1st cell type
true_KAPPA = c(1,3,1)
# eQTL effect size per cell type,
# fold change between B and A allele
true_ETA = c(1,1,1)
# cis/trans effect size
true_ALPHA = c(1,1,1)
# count number of cell types
QQ = length(true_KAPPA)
# TReC model overdispersion
true_PHI = 0.1
# ASReC model overdispersion
true_PSI = 0.05
# Simulate cell type proportions
tRHO = gen_true_RHO(wRHO = 1,NN = NN,QQ = QQ)
plot_RHO(RHO = tRHO)
boxplot(tRHO,xlab = "Cell Type",ylab = "Proportion")
# Simulate a data object for a gene and SNP
sim = CSeQTL_dataGen(NN = NN,MAF = 0.3,true_BETA0 = log(1000),
true_KAPPA = true_KAPPA,true_ETA = true_ETA,true_PHI = true_PHI,
true_PSI = true_PSI,prob_phased = 0.05,true_ALPHA = true_ALPHA,
RHO = tRHO,cnfSNP = TRUE,show = FALSE)
names(sim)
# TReC, ASReC, haplotype 2 counts
sim$dat[1:3,]
# Batch covariates including the intercept
sim$XX[1:3,]
sim$dat$SNP = sim$true_SNP
sim$dat$SNP = factor(sim$dat$SNP,
levels = sort(unique(sim$dat$SNP)),
labels = c("AA","AB","BA","BB"))
# TReC vs SNP
ggplot(data = sim$dat,
mapping = aes(x = SNP,y = log10(total + 1))) +
geom_violin(aes(fill = SNP)) + geom_jitter(width = 0.25) +
geom_boxplot(width = 0.1) +
xlab("Phased Genotype") + ylab("log10(TReC + 1)") +
theme(legend.position = "right",
axis.title = element_text(size = 15),
axis.text = element_text(size = 12))
# TReC vs ASReC
ggplot(data = sim$dat,
mapping = aes(x = total,y = total_phased,color = SNP)) +
geom_point() + xlab("Total Read Count (TReC)") +
ylab("Total Allele-specific Read Count (ASReC)") +
theme(legend.position = "right",
axis.title = element_text(size = 15),
axis.text = element_text(size = 12))
## ----R.options = list(width = 200)--------------------------------------------------------------------------------------------------------------------------------------------------------------------
summary(sim$XX)
mRHO = matrix(1,NN,1)
colnames(mRHO) = "Bulk"
cistrans_thres = 0.01
res = c()
for(trec_only in c(TRUE,FALSE)){
for(neg_binom in c(TRUE,FALSE)){
for(beta_binom in c(TRUE,FALSE)){
cat(sprintf("%s: trec_only = %s, neg_binom = %s, beta_binom = %s ...\n",
date(),trec_only,neg_binom,beta_binom))
PHASE = sim$dat$phased * ifelse(trec_only,0,1)
upPHI = ifelse(neg_binom,1,0)
upPSI = ifelse(beta_binom,1,0)
sout = CSeQTL_smart(TREC = sim$dat$total,hap2 = sim$dat$hap2,
ASREC = sim$dat$total_phased,PHASE = PHASE,
SNP = sim$true_SNP,RHO = mRHO,XX = sim$XX,upPHI = upPHI,
upKAPPA = 1,upETA = 1,upPSI = upPSI,upALPHA = 1,
iFullModel = FALSE,trim = FALSE,thres_TRIM = 10,
hypotest = TRUE,swap = FALSE,numAS = 5,numASn = 5,
numAS_het = 5,cistrans_thres = cistrans_thres)
# sout$HYPO
res = rbind(res,smart_df(Model = ifelse(trec_only,"TReC-only","TReCASE"),
TReC_Dist = ifelse(neg_binom,"Negative Binomial","Poisson"),
ASReC_Dist = ifelse(beta_binom,"Beta-Binomial","Binomial"),
sout$res))
rm(sout)
}}}
num_vars = which(unlist(lapply(res,function(xx) class(xx))) == "numeric")
res[,num_vars] = apply(res[,num_vars],2,function(xx) smart_SN(x = xx,digits = 2))
res$Interpret = apply(res[,c("cistrans","PVAL_eQTL")],1,function(xx){
ct = xx[1]
pval = as.numeric(xx[2])
ifelse(pval < 0.05,sprintf("%s eQTL",ct),"no eQTL")
})
res$Correct_Model = apply(res[,c("TReC_Dist","ASReC_Dist")],1,function(xx){
chk_trec = (( true_PHI > 0 & xx[1] == "Negative Binomial" )
| ( true_PHI == 0 & xx[1] == "Poisson" ))
chk_trec
chk_asrec = (( true_PSI > 0 & xx[2] == "Beta-Binomial" )
| ( true_PSI == 0 & xx[2] == "Binomial" ))
chk_trec = ifelse(chk_trec,"TReC-Yes","TReC-No")
chk_asrec = ifelse(chk_asrec,"ASReC-Yes","ASReC-No")
sprintf("%s;%s",chk_trec,chk_asrec)
})
# Simplified output
smart_rmcols(res,c("LRT_trec","LRT_trecase","LRT_cistrans","cistrans"))
## ----R.options = list(width = 200)--------------------------------------------------------------------------------------------------------------------------------------------------------------------
cistrans_thres = 0.01
res = c()
for(trec_only in c(TRUE,FALSE)){
for(neg_binom in c(TRUE,FALSE)){
for(beta_binom in c(TRUE,FALSE)){
cat(sprintf("%s: trec_only = %s, neg_binom = %s, beta_binom = %s ...\n",
date(),trec_only,neg_binom,beta_binom))
PHASE = sim$dat$phased * ifelse(trec_only,0,1)
upPHI = ifelse(neg_binom,1,0)
upPSI = ifelse(beta_binom,1,0)
upKAPPA = rep(1,QQ)
upETA = upKAPPA
upALPHA = upETA
sout = CSeQTL_smart(TREC = sim$dat$total,hap2 = sim$dat$hap2,
ASREC = sim$dat$total_phased,PHASE = PHASE,
SNP = sim$true_SNP,RHO = sim$true_RHO,XX = sim$XX,upPHI = upPHI,
upKAPPA = upKAPPA,upETA = upETA,upPSI = upPSI,upALPHA = upALPHA,
iFullModel = FALSE,trim = FALSE,thres_TRIM = 10,
hypotest = TRUE,swap = FALSE,numAS = 5,numASn = 5,
numAS_het = 5,cistrans_thres = cistrans_thres)
# sout$HYPO
res = rbind(res,smart_df(Model = ifelse(trec_only,"TReC-only","TReCASE"),
TReC_Dist = ifelse(neg_binom,"Negative Binomial","Poisson"),
ASReC_Dist = ifelse(beta_binom,"Beta-Binomial","Binomial"),
sout$res))
rm(sout)
}}}
num_vars = which(unlist(lapply(res,function(xx) class(xx))) == "numeric")
res[,num_vars] = apply(res[,num_vars],2,function(xx) smart_SN(x = xx,digits = 2))
res$Interpret = apply(res[,c("cistrans","PVAL_eQTL")],1,function(xx){
ct = xx[1]
pval = as.numeric(xx[2])
ifelse(pval < 0.05,sprintf("%s eQTL",ct),"no eQTL")
})
res$Correct_Model = apply(res[,c("TReC_Dist","ASReC_Dist")],1,function(xx){
chk_trec = (( true_PHI > 0 & xx[1] == "Negative Binomial" )
| ( true_PHI == 0 & xx[1] == "Poisson" ))
chk_trec
chk_asrec = (( true_PSI > 0 & xx[2] == "Beta-Binomial" )
| ( true_PSI == 0 & xx[2] == "Binomial" ))
chk_trec = ifelse(chk_trec,"TReC-Yes","TReC-No")
chk_asrec = ifelse(chk_asrec,"ASReC-Yes","ASReC-No")
sprintf("%s;%s",chk_trec,chk_asrec)
})
# Simplified output
smart_rmcols(res,c("LRT_trec","LRT_trecase","LRT_cistrans","cistrans"))
## ----ols_bulk-----------------------------------------------------------------
ols_out = CSeQTL_linearTest(input = sim$dat,XX = sim$XX,
RHO = sim$true_RHO,SNP = sim$true_SNP,MARG = TRUE)
names(ols_out)
# Model estimates
summary(ols_out$lm_out)
# Effect sizes and hypothesis test
ols_out$out_df
## ----ols_cts,fig.dim = c(6,7)-------------------------------------------------
ols_out = CSeQTL_linearTest(input = sim$dat,XX = sim$XX,
RHO = sim$true_RHO,SNP = sim$true_SNP,MARG = FALSE)
names(ols_out)
# Model estimates
summary(ols_out$lm_out)
# Effect sizes and hypothesis tests
ols_out$out_df
## -----------------------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.