R/haplo.power.cc.ncp.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Documented in find.intercept.logistic haplo.power.cc.ncp

##$Author: sinnwell $
##$Date: 2008/04/29 14:36:25 $
##$Header: /projects/genetics/cvs/cvsroot/haplo.stats/R/haplo.power.cc.ncp.q,v 1.4 2008/04/29 14:36:25 sinnwell Exp $
##$Locker:  $
##$Log: haplo.power.cc.ncp.q,v $
##Revision 1.4  2008/04/29 14:36:25  sinnwell
##T to TRUE
##
##Revision 1.3  2008/03/10 19:00:34  sinnwell
##call get.hapPair instead of re-used code
##
##Revision 1.2  2008/02/28 21:47:10  sinnwell
##*** empty log message ***
##
##Revision 1.1  2008/02/28 15:59:27  sinnwell
##Initial revision
#

haplo.power.cc.ncp <- function(haplo, haplo.freq, base.index, haplo.beta, case.frac, prevalence) {
  
   cont.frac <- 1 - case.frac

   haplo <- as.matrix(haplo)

   n.loci <- ncol(haplo)
   n.haplo  <- nrow(haplo)

   # Error checks

   if(length(base.index) != 1)
     {
       stop("Length of base.index != 1")
     }

   if(length(haplo.beta) != n.haplo)
     {
       stop("Length of haplo.beta != number of haplotypes")
     }
   
   if(n.loci <2)
     {
       stop("Number of loci < 2")
     }
   
   if(length(haplo.freq) != n.haplo)
     {
       stop("Length haplo.freq != number of haplotypes")
     }

   
  # Create matrix of indices for all possible pairs of haplotypes

#   haplo.indx <- expand.grid(1:n.haplo,1:n.haplo)
#   haplo.indx <- cbind(haplo.indx[,2],haplo.indx[,1])
#   haplo.indx <- haplo.indx[haplo.indx[,1] <= haplo.indx[,2],]


 # Set up regression design matrices and beta coeff vectors

#   haplo.reg <- 1:n.haplo
#   x.haplo <- 1*outer(haplo.indx[,1], haplo.reg,"==") +  1*outer(haplo.indx[,2], haplo.reg,"==")

   # Compute prior genotype probs (geno = pair of haplotypes)

#   p.g <-  haplo.freq[haplo.indx[,1]] * haplo.freq[haplo.indx[,2]]
#   p.g <-  p.g * ifelse(haplo.indx[,1] == haplo.indx[,2], 1, 2)
   
   hapPair.lst <- get.hapPair(haplo, haplo.freq, base.index)
  
   x.haplo <- hapPair.lst$x.haplo
   p.g <- hapPair.lst$p.g
   haplo.indx <- hapPair.lst$haplo.indx
   
  # now move base.index to first col of x, and first element of beta

   x.design <- cbind( rep(1,nrow(x.haplo)), x.haplo) #[,-base.index])
   beta <- haplo.beta
   beta <- c(beta[base.index], beta[-base.index])
   
   # Find intercept
   other <- list(x=x.design, p.x=p.g, beta=beta[-1],  prev = prevalence)
   root <- uniroot(f=find.intercept.logistic, interval=c(-50,50),other=other)

   # Put intercept as first item in beta vector
   beta[1] <- root$root

   # function to compute P(Y|g) according to logistic reg model,
   # needed in subsequent steps

   py.g <- function(x,beta){
      root <- exp(x%*%beta)
      p <- root/(1+root)
      return(p)
    }


  # Determine genotype (haplotype pair) frequencies for cases (pg.case) and
  # controls (pg.cont)

   pg.case <- py.g(x.design, beta) * p.g        # P(y=1|g)P(g)
   pg.case <- as.vector(pg.case/sum(pg.case))

   pg.cont <- (1-py.g(x.design, beta)) * p.g    # P(y=0|g)P(g)
   pg.cont <- as.vector(pg.cont/sum(pg.cont))


   #--------------  Complete Haplotype Data ------------------------------------------

   df.complete <-  ncol(x.haplo)

   # E[X|case]
   ex.case.complete <- apply(x.haplo * pg.case, 2, sum)

   # E[X|control]
   ex.cont.complete <- apply(x.haplo * pg.cont, 2, sum)

   delta.complete <- (ex.case.complete - ex.cont.complete)


   # E[Xbar]
   ex.xbar.complete <- (case.frac * ex.case.complete) + (cont.frac * ex.cont.complete)

   # Vx

   xDiff <- x.haplo - matrix(rep(ex.xbar.complete, nrow(x.haplo)),nrow=nrow(x.haplo), byrow=TRUE)
   vx.case <- t(xDiff * pg.case ) %*% (xDiff )
   vx.cont <- t(xDiff * pg.cont ) %*% (xDiff )
   vx.complete <-  (case.frac*vx.case) + (cont.frac * vx.cont)

   ncp.complete <- case.frac*cont.frac*( t(delta.complete) %*% solve(vx.complete) %*% delta.complete)

   
  #--------------  Incomplete Haplotype Data ----------------------------------------


  # Genotype matrix, ignoring haplotype phase information. 

   geno.mat <- NULL
   for(i in 1:n.loci){
     t1 <- haplo[haplo.indx[,1],i]
     t2 <- haplo[haplo.indx[,2],i]
     a1 <- ifelse(t1 < t2, t1, t2)
     a2 <- ifelse(t2 > t1, t2, t1)
     geno.mat <- cbind(geno.mat, a1, a2)
   }

  # Create haplo.group which gives a code for pairs of
  # haplotypes that fall into the same genotype group when phase is unk

   geno.hash <- haplo.hash(geno.mat)

   haplo.group <- geno.hash$hash

  # Create probabilities for haplotype groups 

   p.haplo.group.case <- as.vector(tapply(pg.case, haplo.group, sum))
   p.haplo.group.cont <- as.vector(tapply(pg.cont, haplo.group, sum))


  # Create exemplary data of genotypes for case and controls,
  # with weights that represent their relative frequencies, 
  # such that the weights sum to 1.

   geno.temp <- rbind(geno.hash$hap.mtx, geno.hash$hap.mtx)
   oldClass(geno.temp) <- "model.matrix"
   
   wt <- c(case.frac*p.haplo.group.case, cont.frac*p.haplo.group.cont)

   save.em <- haplo.em(geno=geno.temp, weight=wt, control=haplo.em.control(min.posterior=0))
   
   n.haplo <- nrow(save.em$haplotype)
   hap.indx <- 1:n.haplo
   basehap <-  hap.indx[save.em$hap.prob == max(save.em$hap.prob)][1]

  # Score haplotypes that are not rare (freq > 10e-4) and not the most frequent, which
  # is considered a baseline haplotype

   uhap <- hap.indx[hap.indx!=basehap &( save.em$hap.prob > 10e-4)]

   x <- 1*outer(save.em$hap1code, uhap, "==") + 1*outer(save.em$hap2code, uhap, "==")

   nx <- ncol(x)

   xstar <- NULL
   for(j in 1:nx){
     xstar <-  cbind(xstar, tapply(x[,j] * save.em$post,save.em$subj.id,sum))
   }
   
   df.incomplete <- ncol(xstar)
   
   ncase <- length(p.haplo.group.case)
   case <- 1:ncase
   cont <- (ncase+1):(2*ncase)

  # note that xstar is same for cases and controls, because posteriors are the same for
  # cases and controls when running EM for haplotypes under Ho

   xstar <- xstar[case,]
   
   # E[X|case]
   ex.case.incomplete <- apply(xstar * p.haplo.group.case, 2, sum)

   # E[X|control]
   ex.cont.incomplete <- apply(xstar * p.haplo.group.cont, 2, sum)
   
   delta.incomplete <- ex.case.incomplete - ex.cont.incomplete

   # E[Xbar]
   ex.xbar.incomplete <- (case.frac * ex.case.incomplete) + 
                         (cont.frac * ex.cont.incomplete)

   #  Vx

   xstar.diff <- (xstar  - matrix(rep(ex.xbar.incomplete, nrow(xstar)),nrow=nrow(xstar), byrow=TRUE))

   vx.case <- t(xstar.diff * p.haplo.group.case) %*% (xstar.diff)
   vx.cont <- t(xstar.diff * p.haplo.group.cont) %*% (xstar.diff)

   vx.incomplete <- (case.frac * vx.case) + (cont.frac * vx.cont)

   ncp.incomplete <- case.frac * cont.frac * t(delta.incomplete) %*% solve(vx.incomplete) %*% delta.incomplete

  # By running haplo.em on the unphased genotype data, the haplotype order is changed. To map haplotypes
  # between incomplete and complete haplotype data, we do the following.

   haplo.df.incomplete <- data.frame(save.em$haplotype, haplo.freq.incomp=save.em$hap.prob, 
                                     hap.indx.incomp=hap.indx)
   
   haplo.df.complete   <- data.frame(haplo, haplo.freq, hap.indx.comp=1:nrow(haplo))
   
   haplo.merge <- merge(haplo.df.incomplete, haplo.df.complete, by.x=1:n.loci,by.y=1:n.loci,all=TRUE)

  # Need to subset to haplotypes that are scored for incomplete data (see above for how uhap was created)
   zed <- (hap.indx!=basehap) & ( save.em$hap.prob > 10e-4)

   haplo.merge <- haplo.merge[zed,]
   haplo.merge$x.score <- 1:nrow(haplo.merge)
   ord <- order(haplo.merge$hap.indx.comp)
   haplo.merge <- haplo.merge[ord,]
   ord <- haplo.merge$x.score

   delta.incomplete <-  delta.incomplete[ord]
   ex.case.incomplete <- ex.case.incomplete[ord]
   ex.cont.incomplete <- ex.cont.incomplete[ord]

   tmp <- vx.incomplete[ord,]
   vx.incomplete <- tmp[,ord]

   return(list(ncp.phased.haplo = ncp.complete, 
               df.phased.haplo  = df.complete,
               ex.case.complete = ex.case.complete, 
               ex.cont.complete = ex.cont.complete,
               delta.complete   = delta.complete, 
               vx.complete      = vx.complete, 
               ncp.unphased.haplo = ncp.incomplete, 
               df.unphased.haplo  = df.incomplete,
               ex.case.incomplete  = ex.case.incomplete, 
               ex.cont.incomplete  = ex.cont.incomplete,
               delta.incomplete    = delta.incomplete, 
               vx.incomplete       = vx.incomplete) )

}


find.intercept.logistic <- function(b0,other) {
  
  # function to find intercept for logistic regression when 
  # design matrix is specified, along with
  # population frequencies of rows, beta for non-intercept
  # effects, and prevalence (prev)
  x <- other$x
  p.x <- other$p.x
  beta <- other$beta
  prev <- other$prev
  z <- x %*% c(b0,beta)
  pd <- exp(z)/(1+exp(z))
  return( sum(pd*p.x) - prev )

}

Any scripts or data that you put into this service are public.

haplo.stats documentation built on May 29, 2024, 9:53 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

haplo.stats
Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.power.cc.ncp.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Defines functions find.intercept.logistic haplo.power.cc.ncp

Documented in find.intercept.logistic haplo.power.cc.ncp

Try the haplo.stats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplo.stats Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.power.cc.ncp.q In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

Defines functions find.intercept.logistic haplo.power.cc.ncp

Documented in find.intercept.logistic haplo.power.cc.ncp

Try the haplo.stats package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

haplo.stats
Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous

R/haplo.power.cc.ncp.q
In haplo.stats: Statistical Analysis of Haplotypes with Traits and Covariates when Linkage Phase is Ambiguous