R/powerEQTL.scRNAseq.sim.R
In powerEQTL: Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis

Documented in powerEQTL.scRNAseq.sim

# modified on July 21, 2021
#  (1) remove dependency on glmmTMB since it is no longer available in CRAN
#
# modified on June 17, 2021
#  (1) drop glmmADMB since it is not in mainstream repositories

# modified on June 13, 2021
#  (1) added glmmADMB
#
# modified on Feb. 15, 2021
#  (1) rename 'nSubj' back to 'n' and 'nCellPerSubj' back to 'm' to be consistent with other R functions in this package
#  (2) use asymptotic distribution of test statistics to calculate power to get stable results and to improve speed. 
#
# modified on Feb. 14, 2021
#  (1) rename 'n' to 'nSubj' and 'm' to 'nCellPerSubj'
#  (2) calculate cutoff based on data from null distribution
#
# modified on Feb. 10, 2021
#  (1) NBZIMM not in CRAN yet. temporarily remove NBZIMM
#
# created on Feb. 8, 2021
#  (1) calculate power for eQTL via scRNAseq data by using simulation approach
#  (1) consider NBZIMM since it is much faster than GLMMadaptive and glmmTMB
#
########################################################

powerEQTL.scRNAseq.sim=function(
  slope, 
  n, 
  m, 
  power = NULL,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim  =  1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1, # number of computer cores used by 'mclapply'
  n.lower = 2.01,
  n.upper = 1e+4,
  slope.lower =  1e-6,
  slope.upper = log(1.0e+6),
  MAF.lower = 0.05,
  MAF.upper = 0.49
)
{
  if(is.null(MAF)==TRUE && is.null(slope) == FALSE &&
     is.null(n) == FALSE && is.null(power) == FALSE)
  {
    MAF = minMAFEQTL.scRNAseq.sim(
      slope = slope,
      n = n, 
      m = m,
      power = power,
  m.int = m.int, # mean of random intercept
  sigma.int = sigma.int, # SD of the random intercept
  zero.p = zero.p, # probability that an excess zero occurs
  theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  FWER = FWER,
  nTests = nTests,
  nSim = nSim, # number of simulations
  estMethod = estMethod, # parameter estimation method for ZINB
  nCores = nCores, # number of computer cores used by 'mclapply'
  MAF.lower = MAF.lower,
  MAF.upper = MAF.upper
)
    names(MAF) = "MAF"
    return(MAF)
  } else if (is.null(MAF)==FALSE && is.null(slope) == TRUE &&
             is.null(n) == FALSE && is.null(power) == FALSE) {
    slope = minSlopeEQTL.scRNAseq.sim(
      n = n, 
      m = m,
      power = power,
  m.int = m.int, # mean of random intercept
  sigma.int = sigma.int, # SD of the random intercept
  zero.p = zero.p, # probability that an excess zero occurs
  theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = MAF, 
  FWER = FWER,
  nTests = nTests,
  nSim = nSim, # number of simulations
  estMethod = estMethod, # parameter estimation method for ZINB
  nCores = nCores, # number of computer cores used by 'mclapply'
  slope.lower = slope.lower,
  slope.upper = slope.upper
      )

    names(slope) = "slope"
    return(slope)
  } else if (is.null(MAF)==FALSE && is.null(slope) == FALSE &&
             is.null(n) == TRUE && is.null(power) == FALSE) {
    n = ssEQTL.scRNAseq.sim(  
          slope = slope, 
          m = m,
          power = power,
  m.int = m.int, # mean of random intercept
  sigma.int = sigma.int, # SD of the random intercept
  zero.p = zero.p, # probability that an excess zero occurs
  theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = MAF, 
  FWER = FWER,
  nTests = nTests,
  nSim  =  nSim, # number of simulations
  estMethod = estMethod, # parameter estimation method for ZINB
  nCores = nCores, # number of computer cores used by 'mclapply'
  n.lower = n.lower,
  n.upper = n.upper)

    names(n)="n"
    return(n)
  } else if (is.null(MAF)==FALSE && is.null(slope) == FALSE &&
             is.null(n) == FALSE && is.null(power) == TRUE) {
    power = powerEQTL.scRNAseq.sim.default(
      slope = slope, 
      n = n, 
      m = m, 
      m.int = m.int, # mean of random intercept
      sigma.int = sigma.int, # SD of the random intercept
      zero.p = zero.p, # probability that an excess zero occurs
      theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
      # the smaller theta is, the larger variance of NB random variable is
      MAF = MAF, 
      FWER = FWER,
      nTests = nTests,
      nSim = nSim, # number of simulations
      estMethod = estMethod, # parameter estimation method for ZINB
      nCores = nCores # number of computer cores used by 'mclapply'
    )$power  
    names(power) = "power"
    return(power)
  } else {
    stop("One and only one of the 4 parameters (MAF, slope, n, power) can be NULL!\n")
  }
  
}

powerEQTL.scRNAseq.sim.default = function(
  slope, # slope
  n, # number of subjects
  m, # number of cells per subjects 
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim = 1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1 # number of computer cores used by 'mclapply'
)
{
  #nSubj = 102
  #nCellPerSubj = 227868

  nSubj = n
  nCellPerSubj = m
  
  if(estMethod == "GLMMadaptive")  {
    pvalVec = unlist(mclapply(1:nSim, function(i) {
      # simulate data under alternative hypothesis: slope not equal to zero
      simDati = simDat.eQTL.scRNAseq(nSubj=nSubj,
        nCellPerSubj = nCellPerSubj,
        # probability that an excess zero occurs
        zero.p = zero.p,
        m.int = m.int,
        # standard deviation of the random intercept
        sigma.int = sigma.int,
        # slope for genotype in NB distribution: mu = exp(beta0 + beta1*SNP)
        slope = slope,
        # dispersion parameter of NB distribution
        # the smaller theta is, the larger variance of NB random variable is
        theta  = theta,
        MAF = MAF # SNP MAF
      )

      #########################
      # test if slope is significantly different from zero 
      res.try = try(f <- GLMMadaptive::mixed_model(fixed = counts ~ geno, 
                    random = ~ 1 | id, 
      	      data = simDati,
                    zi_fixed = ~ 1, 
	      family = zi.negative.binomial()), silent = TRUE)
      aa = attr(res.try, which="class")
      if(aa == "try-error")
      {
        pval = NA 
      } else {
        pval = summary(f)$coef_table[2, 4]
      }

      return(pval)

    }, mc.cores=nCores))
  } else {
    stop("Currently available choice for 'estMethod' is GLMMadaptive!")
  }

  # power
  power = mean( pvalVec < FWER/nTests, na.rm =TRUE)
  res = list(power = power, pvalVec=pvalVec)

  invisible(res)
}


# cutoff of test statistic is based on null distribution of the test statistic
powerEQTL.scRNAseq.sim.default.exact = function(
  slope, # slope
  n, # number of subjects
  m, # number of cells per subjects 
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim = 1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1 # number of computer cores used by 'mclapply'
)
{
  #nSubj = 102
  #nCellPerSubj = 227868

  nSubj = n
  nCellPerSubj = m
  
  if(estMethod == "GLMMadaptive")  {
    res.sim = mclapply(1:nSim, function(i) {
      # simulate data under alternative hypothesis: slope not equal to zero
      simDati = simDat.eQTL.scRNAseq(nSubj=nSubj,
        nCellPerSubj = nCellPerSubj,
        # probability that an excess zero occurs
        zero.p = zero.p,
        m.int = m.int,
        # standard deviation of the random intercept
        sigma.int = sigma.int,
        # slope for genotype in NB distribution: mu = exp(beta0 + beta1*SNP)
        slope = slope,
        # dispersion parameter of NB distribution
        # the smaller theta is, the larger variance of NB random variable is
        theta  = theta,
        MAF = MAF # SNP MAF
      )

      # simulate data under null hypothesis: slope = 0
      simDat0 = simDat.eQTL.scRNAseq(nSubj=nSubj,
        nCellPerSubj = nCellPerSubj,
        # probability that an excess zero occurs
        zero.p = zero.p,
        m.int = m.int,
        # standard deviation of the random intercept
        sigma.int = sigma.int,
        # slope for genotype in NB distribution: mu = exp(beta0 + beta1*SNP)
        slope = 0,
        # dispersion parameter of NB distribution
        # the smaller theta is, the larger variance of NB random variable is
        theta  = theta,
        MAF = MAF # SNP MAF
      )
 
      
      #########################
      # test if slope is significantly different from zero 
      res.try = try(f <- GLMMadaptive::mixed_model(fixed = counts ~ geno, 
                    random = ~ 1 | id, 
      	      data = simDati,
                    zi_fixed = ~ 1, 
	      family = zi.negative.binomial()), silent = TRUE)
      aa = attr(res.try, which="class")
      if(aa == "try-error")
      {
        stat1 = NA 
      } else {
        stat1 = summary(f)$coef_table[2, 3]
      }

      # under H0
      res.try0 = try(f0 <- GLMMadaptive::mixed_model(fixed = counts ~ geno, 
                    random = ~ 1 | id, 
      	      data = simDat0,
                    zi_fixed = ~ 1, 
	      family = zi.negative.binomial()), silent = TRUE)
      
      aa0 = attr(res.try0, which="class")
      if(aa0 == "try-error")
      {
        stat0 = NA 
      } else {
        stat0 = summary(f0)$coef_table[2, 3]
      }

      res = c(stat1, stat0)
      return(res)

    }, mc.cores=nCores)
  } else {
    stop("Currently available choice for 'estMethod' is GLMMadaptive!")
  }
  
  mat = t(sapply(res.sim, function(x) {x}))
  colnames(mat) = c("Ha", "H0")

  stat1 = mat[,1] # test statistic under Ha
  stat0 = mat[,2] # test statistic under H0
  # cutoff
  alpha2 = FWER/nTests
  cutoff.upp = quantile(stat0, prob= 1 - alpha2/2, na.rm=TRUE)
  cutoff.low = quantile(stat0, prob= alpha2/2, na.rm=TRUE)


  # power
  if(is.na(cutoff.upp)==TRUE || is.na(cutoff.low) == TRUE || any(is.na(stat1) == FALSE) == FALSE)
  {
    power = 0
  } else {
    power = mean(stat1 < cutoff.low | stat1  > cutoff.upp, na.rm=TRUE)
  }

  res = list(power = power, stat0=stat0, stat1=stat1, cutoff.low=cutoff.low, cutoff.upp = cutoff.upp)
  invisible(res)
}

###########################################################

# difference between estimated power and desired power
diffPower4ss.scRNAseq.sim=function( n, 
                                slope, 
                                 m,
                                power,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim  =  1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1 # number of computer cores used by 'mclapply'
)
{
  n = ceiling(n)
  est.power=powerEQTL.scRNAseq.sim.default(
    slope = slope, 
    n = n, 
    m = m, 
  m.int = m.int, # mean of random intercept
      sigma.int = sigma.int, # SD of the random intercept
      zero.p = zero.p, # probability that an excess zero occurs
      theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
      # the smaller theta is, the larger variance of NB random variable is
      MAF = MAF, 
      FWER = FWER,
      nTests = nTests,
      nSim = nSim, # number of simulations
      estMethod = estMethod, # parameter estimation method for ZINB
      nCores = nCores # number of computer cores used by 'mclapply'
   )$power  

  diff=est.power - power
  return(diff)

}


ssEQTL.scRNAseq.sim=function(  slope, 
                           m,
                           power = 0.8,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim  =  1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1, # number of computer cores used by 'mclapply'
  n.lower = 2.01,
  n.upper = 1e+4)
{

  loop = 0
  
  while(loop < 10)
  {
    loop = loop + 1


    res.try = try(res.root <- uniroot(f=diffPower4ss.scRNAseq.sim,
                     interval = c(n.lower, n.upper),
                     slope = slope, 
                     m = m,
                     power = power,
    m.int = m.int, # mean of random intercept
    sigma.int = sigma.int, # SD of the random intercept
    zero.p = zero.p, # probability that an excess zero occurs
    theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
    # the smaller theta is, the larger variance of NB random variable is
    MAF = MAF, 
    FWER = FWER,
    nTests = nTests,
    nSim = nSim, # number of simulations
    estMethod = estMethod, # parameter estimation method for ZINB
    nCores = nCores # number of computer cores used by 'mclapply'
                    ), silent = TRUE)

    aa = attr(res.try, which="class")
    if(is.null(aa) == FALSE)
    {
      res.root = list(root = NA)
    } else {
      break
    }   

  }

  return(ceiling(res.root$root))
}

###########################################
# difference between estimated power and desired power
diffPower4slope.scRNAseq.sim=function(
                                slope,
                                n,
                                m,
                                power,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim = 1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1 # number of computer cores used by 'mclapply'
)
{
  est.power=powerEQTL.scRNAseq.sim.default(
    slope = slope, 
    n = n, 
    m = m, 
  m.int = m.int, # mean of random intercept
      sigma.int = sigma.int, # SD of the random intercept
      zero.p = zero.p, # probability that an excess zero occurs
      theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
      # the smaller theta is, the larger variance of NB random variable is
      MAF = MAF, 
      FWER = FWER,
      nTests = nTests,
      nSim = nSim, # number of simulations
      estMethod = estMethod, # parameter estimation method for ZINB
      nCores = nCores # number of computer cores used by 'mclapply'
   )$power  


  diff=est.power - power
  return(diff)
  
}

minSlopeEQTL.scRNAseq.sim=function(n, 
                           m,
                           power = 0.8,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  MAF = 0.2, 
  FWER = 0.05,
  nTests = 1,
  nSim  =  1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1, # number of computer cores used by 'mclapply'
  slope.lower =  1e-6,
  slope.upper = log(1.0e+6)
)
{
  
  loop = 0
  
  while(loop < 10)
  {
    loop = loop + 1

    res.try = try(res.root <- uniroot(f=diffPower4slope.scRNAseq.sim,
                     interval = c(slope.lower, slope.upper),
                     n = n, 
                     m = m,
                     power = power,
    m.int = m.int, # mean of random intercept
    sigma.int = sigma.int, # SD of the random intercept
    zero.p = zero.p, # probability that an excess zero occurs
    theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
    # the smaller theta is, the larger variance of NB random variable is
    MAF = MAF, 
    FWER = FWER,
    nTests = nTests,
    nSim = nSim, # number of simulations
    estMethod = estMethod, # parameter estimation method for ZINB
    nCores = nCores # number of computer cores used by 'mclapply'
    ), silent = TRUE)

    aa = attr(res.try, which="class")
    if(is.null(aa) == FALSE)
    {
      res.root = list(root = NA)
    } else {
      break
    }   
  }
  
  return(res.root$root)
}

##################################################

# difference between estimated power and desired power
diffPower4MAF.scRNAseq.sim=function(
  MAF,
  slope,
  n,
  m,
  power,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  FWER = 0.05,
  nTests = 1,
  nSim = 1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1 # number of computer cores used by 'mclapply'
)
{
  est.power=powerEQTL.scRNAseq.sim.default(
    slope = slope, 
    n = n, 
    m = m, 
  m.int = m.int, # mean of random intercept
      sigma.int = sigma.int, # SD of the random intercept
      zero.p = zero.p, # probability that an excess zero occurs
      theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
      # the smaller theta is, the larger variance of NB random variable is
      MAF = MAF, 
      FWER = FWER,
      nTests = nTests,
      nSim = nSim, # number of simulations
      estMethod = estMethod, # parameter estimation method for ZINB
      nCores = nCores # number of computer cores used by 'mclapply'
   )$power  

  diff=est.power - power
  return(diff)
  
}

minMAFEQTL.scRNAseq.sim=function(slope, 
                             n, 
                               m,
                               power = 0.8,
  m.int = -1, # mean of random intercept
  sigma.int = 1, # SD of the random intercept
  zero.p = 0.1, # probability that an excess zero occurs
  theta = 1, # dispersion parameter of NB distribution NB(mu, theta)
  # the smaller theta is, the larger variance of NB random variable is
  FWER = 0.05,
  nTests = 1,
  nSim  =  1000, # number of simulations
  estMethod = "GLMMadaptive", # parameter estimation method for ZINB
  nCores = 1, # number of computer cores used by 'mclapply'
  MAF.lower = 0.05,
  MAF.upper = 0.049
)
{

  loop = 0
  
  while(loop < 10)
  {
    loop = loop + 1
    res.try = try(res.root <- uniroot(f=diffPower4MAF.scRNAseq.sim,
                     interval = c(MAF.lower, MAF.upper),
                     slope = slope,
                     n = n, 
                     m = m,
                     power = power,
    m.int = m.int, # mean of random intercept
        sigma.int = sigma.int, # SD of the random intercept
        zero.p = zero.p, # probability that an excess zero occurs
        theta = theta, # dispersion parameter of NB distribution NB(mu, theta)
        # the smaller theta is, the larger variance of NB random variable is
        FWER = FWER,
        nTests = nTests,
        nSim = nSim, # number of simulations
        estMethod = estMethod, # parameter estimation method for ZINB
        nCores = nCores # number of computer cores used by 'mclapply'
    ), silent = TRUE)
    aa = attr(res.try, which="class")
    if(is.null(aa) == FALSE)
    {
      res.root = list(root = NA)
    } else {
      break
    }   
  }
  
  return(res.root$root)
}
Any scripts or data that you put into this service are public.
powerEQTL documentation built on June 8, 2025, 10:37 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
powerEQTL
Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis

R/powerEQTL.scRNAseq.sim.R
In powerEQTL: Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis

Defines functions minMAFEQTL.scRNAseq.sim diffPower4MAF.scRNAseq.sim minSlopeEQTL.scRNAseq.sim diffPower4slope.scRNAseq.sim ssEQTL.scRNAseq.sim diffPower4ss.scRNAseq.sim powerEQTL.scRNAseq.sim.default.exact powerEQTL.scRNAseq.sim.default powerEQTL.scRNAseq.sim

Documented in powerEQTL.scRNAseq.sim

Try the powerEQTL package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

powerEQTL Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis

R/powerEQTL.scRNAseq.sim.R In powerEQTL: Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis

Defines functions minMAFEQTL.scRNAseq.sim diffPower4MAF.scRNAseq.sim minSlopeEQTL.scRNAseq.sim diffPower4slope.scRNAseq.sim ssEQTL.scRNAseq.sim diffPower4ss.scRNAseq.sim powerEQTL.scRNAseq.sim.default.exact powerEQTL.scRNAseq.sim.default powerEQTL.scRNAseq.sim

Documented in powerEQTL.scRNAseq.sim

Try the powerEQTL package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

powerEQTL
Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis

R/powerEQTL.scRNAseq.sim.R
In powerEQTL: Power and Sample Size Calculation for Bulk Tissue and Single-Cell eQTL Analysis