R/haplo.cc.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Documented in haplo.cc

#$Author: sinnwell $
#$Date: 2011/11/10 15:29:40 $

## Sinnwell JP and Schaid DJ
## Mayo Clinic, Rochester MN
## 1/2004

haplo.cc <- function(y, geno, x.adj=NA, locus.label=NA, ci.prob=0.95,
                     miss.val=c(0,NA), weights=NULL, eps.svd=1e-5, simulate=FALSE,
                     sim.control=score.sim.control(), control=haplo.glm.control())

{
  ## wrap the execution of haplo.score, haplo.group and haplo.glm
  ## into this function, specific for case-control (cc) studies.  

  ## 1) calculate Odds Ratios for haplotype effects.
  ## 2) keep in mind it is a ratio relative to the baseline
  ##    haplotype, which is often highly associated with the trait,
  ##    shown by freq's and the score.
  ## 3) You may choose different, 'neutral' baseline by setting
  ##    haplo.base to the index of haplo.unique from a previous run with same 
  ##    haps kept by haplo.freq.min, or haplo.min.count cutoffs
  
  # check for missing values in y and geno
  n.subj <- length(y)
  if(n.subj != nrow(geno)) {
    stop("Different number of rows for y and geno!")
  }
  if(!all(is.na(x.adj)) && n.subj != nrow(x.adj)) {
    stop("Different number of rows for y and x.adj")
  }
  
  exclude <- which(is.na(y))
  tmp <- which(apply(is.na(geno),1,all))
  exclude <- unique(c(exclude,tmp))
  if(length(exclude)) {
    warning(paste("Subjects ", exclude,
                  " removed for missing y or all alleles missing."))
    geno <- geno[-exclude,,drop=FALSE]
    y <- y[-exclude]
    if(!all(is.na(x.adj))) {
      x.adj <- as.matrix(x.adj)
      x.adj <- x.adj[-exclude,,drop=FALSE]
    } 
  }

  if(is.null(control$em.control$iseed)) {
    runif(1) # use a random number, so the next .Random.seed value is used
    control$em.control$iseed <- .Random.seed
  }
 
  # check y to be 0's and 1's
  if(!all(y==0 | y==1)) {
    stop("y must be binary with 0=controls and 1=cases")
  }
  n.subj <- nrow(geno) 
  
  # set skip.haplo and hap.freq.min for haplo.glm to be the same, meeting
  # min. number of expected counts per haplotypes
  haplo.min.count <- control$haplo.min.count
  haplo.freq.min <- control$haplo.freq.min
  if(is.na(haplo.min.count)) haplo.min.count <- haplo.freq.min*2*nrow(geno)
  
  if( !is.null(haplo.min.count) && haplo.min.count < 1)
    warning("haplo.min.count may be too small for reliable results\n")

  set.seed(control$em.control$iseed)
  score.lst <- haplo.score(y=y, geno=geno, x.adj=x.adj, trait.type="binomial",
                        min.count=haplo.min.count, 
                        locus.label=locus.label,
                        miss.val=miss.val, haplo.effect=control$haplo.effect,
                        eps.svd=eps.svd, simulate=simulate,
                        sim.control=sim.control, em.control=control$em.c)
                           
  # get haplotype frequency estimates within the two groups
  set.seed(control$em.control$iseed)
  group.lst <- haplo.group(group=y, geno=geno, locus.label=locus.label,
                           miss.val=miss.val, weight=weights,
                           control=control$em.c)
  
  group.count <- group.lst$group.count
  names(group.count) <- c("control", "case")
  n.loci <- group.lst$n.loci
  
  # merge group and score data
  merge.df <- haplo.score.merge(score.lst, group.lst)

  names(merge.df)[(n.loci+score.lst$simulate+3):(n.loci+score.lst$simulate+5)] <-
                 c("pool.hf", "control.hf", "case.hf")
 
  # prepare data for haplo.glm with binomial family logit link.
  # extract for Odds Ratios, exp(betas)
  geno.glm <- setupGeno(geno, miss.val=miss.val)
  
# weights are not supported for haplo.glm in S-PLUS--take out for now
#  if (is.null(weights)) weights=rep(1,n.subj)

  if(!all(is.na(x.adj))) {
    glm.data <- data.frame(geno.glm, y=y, x.adj)
    hglm.formula <- formulize(y="y",
          x=c("geno.glm", names(glm.data)[names(glm.data) %nin% c("y","geno.glm")]),
                              data=glm.data)
  } else {
    glm.data <- data.frame(geno.glm, y=y)
    hglm.formula <- formulize(y="y", x="geno.glm",data=glm.data)
  }
  # will only get OR's for haplotypes with freq > haplo.freq.min, based on min.count
  # should be set same as skip.haplo so 1:1 merge of haps from merge.lst
  
  set.seed(control$em.control$iseed)
  fit.lst <- haplo.glm(hglm.formula, family=binomial, data=glm.data, 
                       locus.label=locus.label,
                       miss.val=miss.val, control=control)

#  ncoef <- length(fit.lst$coef)
  hapcoef.idx <- grep("^geno.glm", names(fit.lst$coeff)) 
  # get OR's and variance estimates based on var(b's)
  # make base haplo coef zero b/c or will be 1
  hap.coef <- (fit.lst$coefficients)[hapcoef.idx]
  se.bet <- sqrt(fit.lst$var.mat[cbind(hapcoef.idx, hapcoef.idx)])

  # find CI's for OR's transormed from CI's on Betas 
  if(0 > ci.prob || ci.prob > 1) {
    warning("invalid value for ci.prob, setting to default 0.95")
    ci.prob <- 0.95
  }

  norm.constant <- -1*qnorm((1-ci.prob)/2)
  lower.ci <- hap.coef - norm.constant*se.bet
  lower.ci <- c(NA,exp(lower.ci))

  upper.ci <- hap.coef + norm.constant*se.bet
  upper.ci <- c(NA,exp(upper.ci))
  hap.OR <- c(1,exp(hap.coef))
  OR.df <- cbind(lower.ci, hap.OR, upper.ci)
  
  ## when a rare haplotype is below haplo.min.info, & not part of model
  ## need to remove from haplo.rare    --JPS 6/2005
  fit.lst$haplo.rare <- fit.lst$haplo.rare[fit.lst$haplo.freq[fit.lst$haplo.rare] > control$haplo.min.info]

  ## combine haps, OR's and CI's in a data.frame 
  fit.df <- as.data.frame(fit.lst$haplo.unique)[c(fit.lst$haplo.base,
                                          fit.lst$haplo.common, fit.lst$haplo.rare),]

  # if rare term replicate rare coeff for every rare hap
  if(length(fit.lst$haplo.rare) > 0) {
    rarehap.idx <- grep("rare$", rownames(OR.df))
    rare.df <- matrix(rep(OR.df[rarehap.idx,],length(fit.lst$haplo.rare)),ncol=3,byrow=TRUE)
    if(control$keep.rare.haplo) {
      OR.df <- rbind(OR.df[-rarehap.idx,],rare.df)
    } else OR.df <- rbind(OR.df,rare.df)
  }
  
  class.vec <- c("Base", rep("Eff",length(fit.lst$haplo.common)), rep("R", length(fit.lst$haplo.rare)))
  fit.df <- cbind(fit.df,class.vec,OR.df)
  names(fit.df)[(n.loci+1):(n.loci+4)] <- c("glm.eff","OR.lower","OR", "OR.upper")

  # format the hap columns so the df's will merge on the haps
  fit.df[,1:n.loci] <- format(fit.df[,1:n.loci])
  merge.df[,1:n.loci] <- format(merge.df[,1:n.loci])

  cc.df <- merge(merge.df, fit.df,by=1:n.loci,all.x=TRUE,all.y=TRUE)
  cc.lst <- list(cc.df=cc.df, group.count=group.count, score.lst=score.lst,
                 fit.lst=fit.lst, ci.prob=ci.prob, exclude.subj=exclude)
  
  ## returned score and glm objects in the returned list for
  ## additional applications
  
  class(cc.lst) <- "haplo.cc"
  cc.lst
  
}

Any scripts or data that you put into this service are public.

haplo.stats documentation built on May 29, 2024, 9:53 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

haplo.stats
Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.cc.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Defines functions haplo.cc

Documented in haplo.cc

Try the haplo.stats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplo.stats Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.cc.q In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Defines functions haplo.cc

Documented in haplo.cc

Try the haplo.stats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplo.stats
Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.cc.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous