inst/doc/intro.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
	collapse = TRUE,comment = "#>",
	echo = TRUE,cache = TRUE,
	dev = "png")

## ----setup,warning = FALSE----------------------------------------------------
# Load libraries
req_packs = c("ggplot2","smarter","CSeQTL")
for(pack in req_packs){
  library(package = pack,character.only = TRUE)
}

# List package's exported functions
ls("package:CSeQTL")

# Fix seed
set.seed(2)

## ----fig.dim = c(8,6)---------------------------------------------------------
# sample size
NN = 3e2

# fold-change between q-th and 1st cell type
true_KAPPA  = c(1,3,1)

# eQTL effect size per cell type, 
#   fold change between B and A allele
true_ETA    = c(1,1,1)

# cis/trans effect size
true_ALPHA  = c(1,1,1)

# count number of cell types
QQ = length(true_KAPPA)

# TReC model overdispersion
true_PHI = 0.1

# ASReC model overdispersion
true_PSI = 0.05

# Simulate cell type proportions
tRHO = gen_true_RHO(wRHO = 1,NN = NN,QQ = QQ)
plot_RHO(RHO = tRHO)
boxplot(tRHO,xlab = "Cell Type",ylab = "Proportion")

# Simulate a data object for a gene and SNP
sim = CSeQTL_dataGen(NN = NN,MAF = 0.3,true_BETA0 = log(1000),
  true_KAPPA = true_KAPPA,true_ETA = true_ETA,true_PHI = true_PHI,
  true_PSI = true_PSI,prob_phased = 0.05,true_ALPHA = true_ALPHA,
  RHO = tRHO,cnfSNP = TRUE,show = FALSE)

names(sim)

# TReC, ASReC, haplotype 2 counts
sim$dat[1:3,]

# Batch covariates including the intercept
sim$XX[1:3,]

sim$dat$SNP = sim$true_SNP
sim$dat$SNP = factor(sim$dat$SNP,
  levels = sort(unique(sim$dat$SNP)),
  labels = c("AA","AB","BA","BB"))

# TReC vs SNP
ggplot(data = sim$dat,
  mapping = aes(x = SNP,y = log10(total + 1))) +
  geom_violin(aes(fill = SNP)) + geom_jitter(width = 0.25) +
  geom_boxplot(width = 0.1) +
  xlab("Phased Genotype") + ylab("log10(TReC + 1)") +
  theme(legend.position = "right",
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 12))

# TReC vs ASReC
ggplot(data = sim$dat,
  mapping = aes(x = total,y = total_phased,color = SNP)) +
  geom_point() + xlab("Total Read Count (TReC)") + 
  ylab("Total Allele-specific Read Count (ASReC)") +
  theme(legend.position = "right",
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 12))

## ----R.options = list(width = 200)--------------------------------------------------------------------------------------------------------------------------------------------------------------------
summary(sim$XX)

mRHO = matrix(1,NN,1)
colnames(mRHO) = "Bulk"
cistrans_thres = 0.01
res = c()

for(trec_only in c(TRUE,FALSE)){
for(neg_binom in c(TRUE,FALSE)){
for(beta_binom in c(TRUE,FALSE)){
  
  cat(sprintf("%s: trec_only = %s, neg_binom = %s, beta_binom = %s ...\n",
    date(),trec_only,neg_binom,beta_binom))
  
  PHASE = sim$dat$phased * ifelse(trec_only,0,1)
  upPHI = ifelse(neg_binom,1,0)
  upPSI = ifelse(beta_binom,1,0)
  
  sout = CSeQTL_smart(TREC = sim$dat$total,hap2 = sim$dat$hap2,
    ASREC = sim$dat$total_phased,PHASE = PHASE,
    SNP = sim$true_SNP,RHO = mRHO,XX = sim$XX,upPHI = upPHI,
    upKAPPA = 1,upETA = 1,upPSI = upPSI,upALPHA = 1,
    iFullModel = FALSE,trim = FALSE,thres_TRIM = 10,
    hypotest = TRUE,swap = FALSE,numAS = 5,numASn = 5,
    numAS_het = 5,cistrans_thres = cistrans_thres)
  # sout$HYPO
  
  res = rbind(res,smart_df(Model = ifelse(trec_only,"TReC-only","TReCASE"),
    TReC_Dist = ifelse(neg_binom,"Negative Binomial","Poisson"),
    ASReC_Dist = ifelse(beta_binom,"Beta-Binomial","Binomial"),
    sout$res))
  rm(sout)
  
}}}

num_vars = which(unlist(lapply(res,function(xx) class(xx))) == "numeric")
res[,num_vars] = apply(res[,num_vars],2,function(xx) smart_SN(x = xx,digits = 2))
res$Interpret = apply(res[,c("cistrans","PVAL_eQTL")],1,function(xx){
  ct = xx[1]
  pval = as.numeric(xx[2])
  ifelse(pval < 0.05,sprintf("%s eQTL",ct),"no eQTL")
})
res$Correct_Model = apply(res[,c("TReC_Dist","ASReC_Dist")],1,function(xx){
  chk_trec = (( true_PHI > 0 & xx[1] == "Negative Binomial" )
    | ( true_PHI == 0 & xx[1] == "Poisson" ))
  chk_trec
  
  chk_asrec = (( true_PSI > 0 & xx[2] == "Beta-Binomial" )
    | ( true_PSI == 0 & xx[2] == "Binomial" ))
  
  chk_trec = ifelse(chk_trec,"TReC-Yes","TReC-No")
  chk_asrec = ifelse(chk_asrec,"ASReC-Yes","ASReC-No")
  sprintf("%s;%s",chk_trec,chk_asrec)
})

# Simplified output
smart_rmcols(res,c("LRT_trec","LRT_trecase","LRT_cistrans","cistrans"))

## ----R.options = list(width = 200)--------------------------------------------------------------------------------------------------------------------------------------------------------------------
cistrans_thres = 0.01
res = c()

for(trec_only in c(TRUE,FALSE)){
for(neg_binom in c(TRUE,FALSE)){
for(beta_binom in c(TRUE,FALSE)){
  
  cat(sprintf("%s: trec_only = %s, neg_binom = %s, beta_binom = %s ...\n",
    date(),trec_only,neg_binom,beta_binom))
  
  PHASE = sim$dat$phased * ifelse(trec_only,0,1)
  upPHI = ifelse(neg_binom,1,0)
  upPSI = ifelse(beta_binom,1,0)
  upKAPPA = rep(1,QQ)
  upETA = upKAPPA
  upALPHA = upETA
  
  sout = CSeQTL_smart(TREC = sim$dat$total,hap2 = sim$dat$hap2,
    ASREC = sim$dat$total_phased,PHASE = PHASE,
    SNP = sim$true_SNP,RHO = sim$true_RHO,XX = sim$XX,upPHI = upPHI,
    upKAPPA = upKAPPA,upETA = upETA,upPSI = upPSI,upALPHA = upALPHA,
    iFullModel = FALSE,trim = FALSE,thres_TRIM = 10,
    hypotest = TRUE,swap = FALSE,numAS = 5,numASn = 5,
    numAS_het = 5,cistrans_thres = cistrans_thres)
  # sout$HYPO
  
  res = rbind(res,smart_df(Model = ifelse(trec_only,"TReC-only","TReCASE"),
    TReC_Dist = ifelse(neg_binom,"Negative Binomial","Poisson"),
    ASReC_Dist = ifelse(beta_binom,"Beta-Binomial","Binomial"),
    sout$res))
  rm(sout)
  
}}}

num_vars = which(unlist(lapply(res,function(xx) class(xx))) == "numeric")
res[,num_vars] = apply(res[,num_vars],2,function(xx) smart_SN(x = xx,digits = 2))
res$Interpret = apply(res[,c("cistrans","PVAL_eQTL")],1,function(xx){
  ct = xx[1]
  pval = as.numeric(xx[2])
  ifelse(pval < 0.05,sprintf("%s eQTL",ct),"no eQTL")
})
res$Correct_Model = apply(res[,c("TReC_Dist","ASReC_Dist")],1,function(xx){
  chk_trec = (( true_PHI > 0 & xx[1] == "Negative Binomial" )
    | ( true_PHI == 0 & xx[1] == "Poisson" ))
  chk_trec
  
  chk_asrec = (( true_PSI > 0 & xx[2] == "Beta-Binomial" )
    | ( true_PSI == 0 & xx[2] == "Binomial" ))
  
  chk_trec = ifelse(chk_trec,"TReC-Yes","TReC-No")
  chk_asrec = ifelse(chk_asrec,"ASReC-Yes","ASReC-No")
  sprintf("%s;%s",chk_trec,chk_asrec)
})

# Simplified output
smart_rmcols(res,c("LRT_trec","LRT_trecase","LRT_cistrans","cistrans"))


## ----ols_bulk-----------------------------------------------------------------
ols_out = CSeQTL_linearTest(input = sim$dat,XX = sim$XX,
  RHO = sim$true_RHO,SNP = sim$true_SNP,MARG = TRUE)
names(ols_out)

# Model estimates
summary(ols_out$lm_out)

# Effect sizes and hypothesis test
ols_out$out_df

## ----ols_cts,fig.dim = c(6,7)-------------------------------------------------
ols_out = CSeQTL_linearTest(input = sim$dat,XX = sim$XX,
  RHO = sim$true_RHO,SNP = sim$true_SNP,MARG = FALSE)
names(ols_out)

# Model estimates
summary(ols_out$lm_out)

# Effect sizes and hypothesis tests
ols_out$out_df

## -----------------------------------------------------------------------------
sessionInfo()

Try the CSeQTL package in your browser

Any scripts or data that you put into this service are public.

CSeQTL documentation built on April 24, 2026, 9:06 a.m.