R/marginal_assoc_tests.r
In CCpop: One and two locus GWAS of binary phenotype with case-control-population design

Documented in marginal.assoc.test.hwe.in.controls marginal.assoc.test.kpx.kpy marginal.assoc.test.pop.hwe.kpy marginal.assoc.test.unconstrained.chisq marginal.assoc.test.unconstrained.gsq

# Tests for association (or independence) between a SNP locus and a binary 
# disease phenotype.

marginal.assoc.test.unconstrained.chisq = function(t0, t1) {
  # The marginal Pearson chi-squared test on the 2x3 (or less) contingency table.
  
  # NOTE: the tables must be pruned so that they do not contain columns with all 
  # zeros.
  
  n0 = sum(t0)
  n1 = sum(t1)
  nx = t0 + t1
  n = n0 + n1
  e0 = (n0 / n) * nx
  e1 = (n1 / n) * nx
  
  statistic = sum((t0 - e0) ^ 2 / e0 + (t1 - e1) ^ 2 / e1)
  p.value = pchisq(statistic, df = length(t0), lower.tail = F)
  
  return (list(pen = t1 / (t0 + t1), statistic = statistic, p.value = p.value))
}

marginal.assoc.test.unconstrained.gsq = function(t0, t1) {
  # The marginal G^2 test (GLRT conditioned on the observed marginals of the 2x3
  # contingency table)
  
  pen.full = t1 / (t0 + t1)
  pen.null = sum(t1) / sum(t1 + t0)
  
  statistic = 2 * (sum(t0 * log(1 - pen.full) + t1 * log(pen.full), na.rm = T) - 
                   sum(t0 * log(1 - pen.null) + t1 * log(pen.null), na.rm = T))
  p.value = pchisq(statistic, df = length(t0), lower.tail = F)
  
  return (list(pen = pen.full, statistic = statistic, p.value = p.value))
}

marginal.assoc.test.hwe.in.controls = function(t0, t1) {
  # Assuming HWE among the controls (justifiable if the disease is rare). This is 
  # a Wald test for marginal association assuming HWE among the controls (Chen and
  # Chatterjee 2007)
  
  t1[t1 == 0] = 1 # is this a good solution to the case where I have zero t1 cells?
  n0 = sum(t0)
  
  ef = (t0[2] + 2 * t0[3]) / (2 * n0)
  et0 = n0 * c((1 - ef) ^ 2, 2 * ef * (1 - ef), ef ^ 2)
  eor.hom = (t1[2] * et0[1]) / (et0[2] * t1[1])
  eor.het = (t1[3] * et0[1]) / (et0[3] * t1[1])
  
  beta = log(c(eor.hom, eor.het))
  
  beta.cov = matrix(c(
    1/t1[1] + 1/t1[2] + 1/(2*et0[1] + et0[2]) + 1/(2*et0[3] + et0[2]),
    1/t1[1] + 1/n0 * 1/(ef * (1 - ef)),
    1/t1[1] + 1/n0 * 1/(ef * (1 - ef)),
    1/t1[1] + 1/t1[3] + 4/(2*et0[3] + et0[2]) + 4/(2*et0[1] + et0[2])), nrow = 2)
  
  statistic = t(beta) %*% solve(beta.cov) %*% beta
  p.value = pchisq(statistic, df = 2, lower.tail = F)
  
  return (list(statistic = statistic, p.value = p.value))
}

marginal.assoc.test.pop.hwe.kpy = function(t0, t1, tp, prevalence, pen.initial = NULL, f.initial = NULL) {
  # Assuming HWE in the general population, and that disease prevalence is 
  # known. This test uses an extraneous population sample to improve estimation 
  # of population parameters such as SNP distributions.
  
  tx = t0 + t1 + tp
  
  f.null.est = min(0.5, (0.5 * tx[2] + tx[3]) / sum(tx))
  px.null.est = c((1 - f.null.est) ^ 2, 2 * f.null.est * (1 - f.null.est), f.null.est ^ 2)
  null.loglik = sum(t0 * log(1 - prevalence) + t1 * log(prevalence) + tx * log(px.null.est))
  
  if (is.null(pen.initial)) {
    pars.initial = c(rep(prevalence, 3), f.null.est)
  } else {
    pars.initial = c(pen.initial, f.initial)
  }
  
  eval_f = function(pars) {
    pen = pars[1:3]
    f = pars[4]
    
    px = c((1 - f) ^ 2, 2 * f * (1 - f), f ^ 2)
    dpx.df = c(2 * f - 2, 2 - 4 * f, 2 * f)
    
    objective = null.loglik - sum(t0 * log(1 - pen) + t1 * log(pen)) - sum(tx * log(px))
    gradient = -c(t1 / pen - t0 / (1 - pen), sum(tx / px * dpx.df))
    
    return (list(objective = objective, gradient = gradient))
  }
  
  lb = rep(.Machine$double.eps, 4)
  ub = c(rep(1, 3) - .Machine$double.neg.eps, 0.5)
  
  eval_g_eq = function(pars) {
    pen = pars[1:3]
    f = pars[4]
    
    px = c((1 - f) ^ 2, 2 * f * (1 - f), f ^ 2)
    dpx.df = c(2 * f - 2, 2 - 4 * f, 2 * f)
    
    constraints = sum(pen * px) - prevalence
    jacobian = matrix(c(px, sum(pen * dpx.df)), nrow = 1)
    
    return (list(constraints = constraints, jacobian = jacobian))
  }

  sol = nloptr(x0 = pars.initial, eval_f = eval_f, eval_g_eq = eval_g_eq, lb = lb, ub = ub,
               opts = list(algorithm = 'NLOPT_LD_SLSQP', ftol_abs = 1e-8, xtol_rel = 0, maxeval = 1000))
  
  pen = t0 * 0 + sol$solution[1:3]
  f = sol$solution[4]
  px = c((1 - f) ^ 2, 2 * f * (1 - f), f ^ 2)
    
  statistic = -2 * sol$objective
  p.value = pchisq(statistic, df = 2, lower.tail = F)
  
  return (list(pen = pen, statistic = statistic, p.value = p.value))
}

marginal.assoc.test.kpx.kpy = function(t0, t1, prevalence, px, pen.initial = NULL) {
  # Assuming the SNP distribution is known, and that disease prevalence is known
  
  # The GLRT for testing a marginal association given a known SNP distribution 
  # (specified as genotypic, but can come from a HWE assumption and known MAF)
  
  # NOTE: this is exactly the same algorithm as that for a the pairwise test with 
  # known pxx.
  
  m = length(px)
  df = m - 1
  
  if (is.null(pen.initial)) {
    pars.initial = rep(prevalence, df)
  } else {
    pen.initial = pen.initial[2:m]
  }
  
  fn = function(x) {
    pen = c((prevalence - sum(x * px[2:m])) / px[1], x)
    return (-sum(t0 * log(1 - pen) + t1 * log(pen)))
  }
  
  gr = function(x) {
    x0 = (prevalence - sum(x * px[2:m])) / px[1]
    g = t1[2:m] / x - t0[2:m] / (1 - x) - t1[1] / x0 / px[1] * px[2:m] + t0[1] / (1 - x0) / px[1] * px[2:m]      
    return (-g)
  }
  
  ui = rbind(diag(df), -diag(df), -px[2:m], px[2:m])
  ci = c(rep(.Machine$double.eps, df), rep(-(1 - .Machine$double.neg.eps), df), .Machine$double.eps - prevalence, prevalence - px[1] * (1 - .Machine$double.neg.eps))
  sol = constrOptim(pars.initial, fn, gr, ui, ci)
  
  pen = t0 * 0 + c((prevalence - sum(sol$par * px[2:m])) / px[1], sol$par)
  
  statistic = 2 * sum(t0 * log((1 - pen) / (1 - prevalence)) + t1 * log(pen / prevalence))
  p.value = pchisq(statistic, df = df, lower.tail = F)
  
  return (list(pen = pen, statistic = statistic, p.value = p.value))
}

Any scripts or data that you put into this service are public.

CCpop documentation built on May 2, 2019, 3:25 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

CCpop
One and two locus GWAS of binary phenotype with case-control-population design

R/marginal_assoc_tests.r
In CCpop: One and two locus GWAS of binary phenotype with case-control-population design

Defines functions marginal.assoc.test.unconstrained.chisq marginal.assoc.test.unconstrained.gsq marginal.assoc.test.hwe.in.controls marginal.assoc.test.pop.hwe.kpy marginal.assoc.test.kpx.kpy

Documented in marginal.assoc.test.hwe.in.controls marginal.assoc.test.kpx.kpy marginal.assoc.test.pop.hwe.kpy marginal.assoc.test.unconstrained.chisq marginal.assoc.test.unconstrained.gsq

Try the CCpop package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CCpop One and two locus GWAS of binary phenotype with case-control-population design

R/marginal_assoc_tests.r In CCpop: One and two locus GWAS of binary phenotype with case-control-population design

Defines functions marginal.assoc.test.unconstrained.chisq marginal.assoc.test.unconstrained.gsq marginal.assoc.test.hwe.in.controls marginal.assoc.test.pop.hwe.kpy marginal.assoc.test.kpx.kpy

Documented in marginal.assoc.test.hwe.in.controls marginal.assoc.test.kpx.kpy marginal.assoc.test.pop.hwe.kpy marginal.assoc.test.unconstrained.chisq marginal.assoc.test.unconstrained.gsq

Try the CCpop package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

CCpop
One and two locus GWAS of binary phenotype with case-control-population design

R/marginal_assoc_tests.r
In CCpop: One and two locus GWAS of binary phenotype with case-control-population design