Nothing
#$Author: sinnwell $
#$Date: 2013/12/02 21:11:40 $
#$Header: /projects/genetics/cvs/cvsroot/haplo.stats/R/haplo.score.q,v 1.32 2013/12/02 21:11:40 sinnwell Exp $
#$Locker: $
#$Log: haplo.score.q,v $
#Revision 1.32 2013/12/02 21:11:40 sinnwell
#change rms::: to rms::
#
#Revision 1.31 2013/01/14 19:33:16 sinnwell
#small changes for 1.5.9
#
#Revision 1.30 2011/12/05 20:56:10 sinnwell
#final manual changes, updated test suite
#
#Revision 1.29 2011/11/23 20:34:02 sinnwell
#release 1.4.81, updates with test scripts
#
#Revision 1.28 2009/04/09 14:31:37 sinnwell
#*** empty log message ***
#
#Revision 1.27 2009/04/08 17:52:40 sinnwell
# use R's pchisq with lower.tail=FALSE for more signif digits
#
#Revision 1.26 2008/04/08 20:26:22 sinnwell
#add eps.svd, undo last changes with haplo.effect and control
#
#Revision 1.25 2008/04/07 21:54:51 sinnwell
#make control parameter have eps.svd, haplo.effect, sim.control, em.control
#
#Revision 1.24 2008/04/04 14:15:17 sinnwell
#change Ginv eps to 1e-5, sometimes gives full rank for v.score, when should be n.score-1
#
#Revision 1.23 2008/04/01 20:53:07 sinnwell
#added epsilon par to Ginv
#
#Revision 1.22 2007/02/26 22:01:12 sinnwell
#remove row.rem code, it was deprecated in haplo.em
#rows.rem is now miss, the index of rows removed in y, x.adj b/c of NA
#
#
#Revision 1.21 2007/01/25 19:41:27 sinnwell
#include haplo.effect for additive, recessive, dominant
#include min.count as parameter, base skip.haplo on it.
#Added a few comments for readability
#
#Revision 1.20 2006/10/25 15:09:54 sinnwell
#rm Matrix library call, only done in Ginv.q
#
#Revision 1.19 2006/05/02 15:09:25 sinnwell
#improve error messages
#
#Revision 1.18 2006/01/27 16:25:47 sinnwell
#enforce dependency of Ginv on Matrix
#
#Revision 1.17 2005/11/01 14:49:22 sinnwell
#*** empty log message ***
#
#Revision 1.16 2005/03/31 15:18:41 sinnwell
#for g.inv of class Matrix, must have t(u.score)%*% g.inv
#
#Revision 1.15 2005/02/16 19:58:13 sinnwell
#change some comments
#
#Revision 1.14 2004/12/29 17:35:18 sinnwell
#default for skip.haplo is now 5/(nrow(geno)*2)
#
#Revision 1.13 2003/12/08 19:42:18 sinnwell
# changed F,T to FALSE,TRUE
#
#Revision 1.12 2003/09/17 22:56:25 schaid
#added check for var(u.score) < 0 when computing simulated haplo.score.sim
#
#Revision 1.11 2003/09/17 22:35:06 schaid
#modified how max-stat and score.haplo simulated p-values account for NA
#results (either NA for observed stat, or NA for simulated stats)
#
#Revision 1.10 2003/09/15 14:53:17 sinnwell
#add in na.rm in score.max.sim calculation.
#may be part of a bigger problem
#
#Revision 1.9 2003/09/11 21:27:54 sinnwell
#change simulations to target both global and max for precision
#
#Revision 1.8 2003/08/27 21:19:06 sinnwell
#*** empty log message ***
#
#Revision 1.7 2003/08/27 20:59:18 sinnwell
#add rows.rem back into return list, it was needed in haplo.group
#
#Revision 1.6 2003/08/22 18:05:24 sinnwell
#update for release of haplo.score 1.2.0 with 'PIN'
#
#Revision 1.4 2003/04/22 20:30:20 sinnwell
#include use of is.R()
#
#Revision 1.3 2003/03/06 21:48:45 sinnwell
#include license statement
#
#Revision 1.2 2003/01/17 16:57:06 sinnwell
#revision for haplo.score version 1.2
#
# License:
#
# Copyright 2003 Mayo Foundation for Medical Education and Research.
#
# This program is free software; you can redistribute it and/or modify it under the terms of
# the GNU General Public License as published by the Free Software Foundation; either
# version 2 of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or
# FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
# more details.
#
# You should have received a copy of the GNU General Public License along with this
# program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
# Boston, MA 02111-1307 USA
#
# For other licensing arrangements, please contact Daniel J. Schaid.
#
# Daniel J. Schaid, Ph.D.
# Division of Biostatistics
# Harwick Building Room 775
# Mayo Clinic
# 200 First St., SW
# Rochester, MN 55905
#
# phone: 507-284-0639
# fax: 507-284-9542
# email: schaid@mayo.edu
#
haplo.score <- function(y, geno, trait.type="gaussian",
offset = NA, x.adj = NA,
min.count=5, skip.haplo=min.count/(2*nrow(geno)),
locus.label=NA, miss.val=c(0,NA),
haplo.effect="additive", eps.svd=1e-5,
simulate=FALSE, sim.control=score.sim.control(),
em.control = haplo.em.control())
{
call <- match.call()
# Choose trait type:
trait.int <- charmatch(trait.type, c("gaussian", "binomial", "poisson",
"ordinal"))
if(is.na(trait.int)) stop("Invalid trait type")
if(trait.int == 0) stop("Ambiguous trait type")
# Check dims of y and geno
if(length(y)!=nrow(geno)) stop("length of y does not match number of rows in geno")
n.loci <- ncol(geno)/2
if(n.loci != (floor(ncol(geno)/2)) )stop("geno has odd number of columns, need 2 columns per locus")
# Check if Adjusted by Reg
adjusted <- TRUE
if(all(is.na(x.adj)) ) adjusted <- FALSE
if(adjusted){
x.adj <- as.matrix(x.adj)
if(nrow(x.adj)!=length(y)) stop("length of y does not match number of rows in x.adj")
}
# get haplo.effect
effCode <- charmatch(casefold(haplo.effect), c("additive", "dominant", "recessive"))
# General checks for missing data
miss <- which(is.na(y))
if(adjusted) miss <- unique(c(miss, which(apply(is.na(x.adj),1,any))))
# If Poisson, check for errors and missing offset
if(trait.int==3) {
if(all(is.na(offset))) stop("Missing offset")
miss <- unique(c(miss, which(is.na(offset))))
}
# Subset to non-missing values:
if(length(miss)) {
y <- as.numeric(y[-miss])
geno <- geno[-miss,]
if(adjusted) x.adj <- x.adj[-miss,,drop=FALSE]
if(trait.int==3) offset <- offset[-miss]
}
# Create a haplo object (using EMhaps)
haplo <- haplo.em(geno, locus.label, miss.val=miss.val,
control = em.control)
# Check convergence of EM
if(!haplo$converge) stop("EM for haplo failed to converge")
# If binomial, check for errors
if(trait.int==2) {
if(!all(y==1|y==0)) stop("Invalid y values")
if(all(y==1) | all(y==0)) stop("No variation in y values")
}
# If Proportional Odds, recode and check y values
if(trait.int==4){
y <- factor(y)
y.lev <- levels(y)
y <- as.numeric(y)
if(max(y) < 3) stop("Less than 3 levels for y values")
}
n.subj <- length(y)
# Computations for haplotype score statistics
# code haplotypes per subject
hap1 <- haplo$hap1code
hap2 <- haplo$hap2code
indx <- haplo$indx.subj
post <- haplo$post
nreps <- as.vector(haplo$nreps)
uhap <- sort(unique(c(hap1,hap2)))
# Choose to score haplotypes that have probabilities > skip.haplo
which.haplo <- which(haplo$hap.prob >= skip.haplo)
uhap <- uhap[which.haplo]
x <- outer(hap1,uhap,"==") + outer(hap2,uhap,"==")
# code x for additive haplotype effects
x <- (1*outer(hap1,uhap,"==")) + (1*outer(hap2,uhap,"=="))
if(effCode==2) {
# translate x to dominant effect
x <- 1*(x > 0)
}
if(effCode==3){
# translate x to recessive effect
x <- (x > 1) * 1
}
# apply posterior probs of the haplotype pair to the coding
n.x <- ncol(x)
x.post <- matrix(rep(NA, n.subj * n.x), ncol=n.x)
for(j in 1:n.x){
x.post[,j] <- tapply(x[,j]*post, indx, sum)
}
# dominant and recessive models may have insufficient haplotype counts
# if so, don't score them
if(effCode > 1) {
misshaps <- apply(x.post,2,sum) < min.count
##WARN for no haplotypes left to score
if(all(misshaps))
stop(paste("Too few occurrences of haplotypes for model haplo.effect: ", haplo.effect, "\n"))
# subset appropriate objects to reflect the reduction in haplotypes
x.post <- x.post[,!misshaps,drop=FALSE]
x <- x[,!misshaps,drop=FALSE]
which.haplo <- which.haplo[!misshaps]
}
# SCORES FOR GLM's
if(trait.int <= 3){
# If not adjusted, use summaries of y
if(!adjusted){
mu <- switch(trait.int, mean(y), mean(y), sum(y)/sum(offset) )
a <- switch(trait.int, var(y), 1, 1)
# if not adjusted, still need to set up x.adj, for intercept,
# to be used by haplo.score.glm to compute score stat adjusted
# for intercept
x.adj <- matrix(rep(1,n.subj),ncol=1)
}
# If adjusted, use GLM to get fitted and residuals
if(adjusted){
reg.out <- glm(y ~ x.adj, family=trait.type)
# bind col of 1's for intercept:
x.adj <- cbind(rep(1,n.subj),x.adj)
mu <- reg.out$fitted.values
a <- switch(trait.int,
sum(reg.out$residuals^2)/reg.out$df.residual,
1, 1)
}
v <- switch(trait.int, 1/a, mu*(1-mu), mu )
# Now compute score statistics
tmp <- haplo.score.glm(y, mu, a, v, x.adj, nreps, x.post, post, x)
u.score <- tmp$u.score
v.score <- tmp$v.score
}
# Scores for Proportional Odds
if(trait.int==4) {
if(adjusted){
requireNamespace("rms", quietly = TRUE) ## had: require(rms)
reg.out <- rms::lrm(y ~ x.adj)
K <- max(y)
n.xadj <- ncol(x.adj)
alpha <- reg.out$coef[1:(K-1)]
beta <- reg.out$coeff[K:(K-1 + n.xadj)]
tmp <- haplo.score.podds(y, alpha, beta, x.adj, nreps, x.post,
post, x)
}
if(!adjusted){
tbl <- table(y)
s <- 1- (cumsum(tbl)-tbl)/n.subj
alpha <- - log((1-s[-1])/s[-1])
tmp <- haplo.score.podds(y, alpha, beta=NA, x.adj=NA, nreps, x.post,
post, x)
}
u.score <- tmp$u.score
v.score <- tmp$v.score
}
# Compute Score Statistics:
tmp <- Ginv(v.score, eps=eps.svd)
df <- tmp$rank
g.inv <- tmp$Ginv
score.global <- t(u.score)%*% g.inv %*%u.score
score.haplo <- u.score / sqrt(diag(v.score))
score.max <- max(score.haplo^2, na.rm=TRUE)
# Compute empirical p-values if simulate=TRUE
if(!simulate) {
score.global.p.sim <- NA
score.haplo.p.sim <- rep(NA,length(score.haplo))
score.max.p.sim <- NA
n.val.global <- NA
n.val.haplo <- NA
} else {
# initialize rejection counts
score.global.rej <- 0
score.haplo.rej <- rep(0,length(score.haplo))
score.max.rej <- 0
# initialize valid simulation counts. A simulation for the
# global test can be invalid if the global statistic results
# in an NA value. A simulation for the max stat, or the individual
# haplotype scores, is declared invalid if any of the individual
# haplotype score stats results in an NA value. However, there is
# an exception. If the observed score statistic for an individual
# haplotype is NA, this haplotype score is ignored in the simulations,
# allowing the other haplotypes to have simulated p-values, and
# the observed max stat ignores the score relating to this
# haplotype with an NA value.
n.val.global <- 0
n.val.haplo <- 0
score.haplo.sqr <- score.haplo^2
score.haplo.ok <- !is.na(score.haplo.sqr)
if(trait.int<=3){
# Initialize mu.rand and v.rand, in case not adjusted (if adjusted,
# then these will be over-ridden in simulation loop
mu.rand <- mu
v.rand <- v
}
## Compute sequential Monte Carlo p-values as given by
## Besag and Clifford, Biometrika Jun 1991
## Make sure max-stat and global pvals are accurate to
## within desired pval error range.
## Employ an extra rule: sample a min (default=1000) for enough
## samples to get good estimates of haplo.p.sim pvals
done <- FALSE
while(!done) {
# random order
rand.ord <- order(runif(n.subj))
if(trait.int <=3){
if(adjusted){
mu.rand <- mu[rand.ord]
v.rand <- switch(trait.int, v, v[rand.ord], v[rand.ord])
}
tmp <- haplo.score.glm(y[rand.ord], mu.rand, a, v.rand,
x.adj[rand.ord,], nreps, x.post, post, x)
}
if(trait.int ==4){
if(adjusted) {
tmp <- haplo.score.podds(y[rand.ord], alpha, beta,
x.adj[rand.ord,,drop=FALSE],nreps, x.post, post, x)
}
if(!adjusted) {
tmp <- haplo.score.podds(y[rand.ord], alpha, beta=NA,
x.adj=NA,nreps, x.post, post, x)
}
}
u.score <- tmp$u.score
v.score <- tmp$v.score
# Now compute score statistics
tmp <- Ginv(v.score, eps=eps.svd)
g.inv <- tmp$Ginv
score.global.sim <- t(u.score) %*% g.inv %*% u.score
# note that score.haplo.sim is squared, where score.haplo is not
# because we want to keep the sign of score.haplo for returned
# values
score.haplo.sim <- ifelse(diag(v.score) < 0, NA, u.score^2 / diag(v.score) )
if(!is.na(score.global.sim)) {
n.val.global <- n.val.global +1
if(score.global.sim >= score.global) score.global.rej <- score.global.rej +1
}
## for max stat, and individual haplotype scores, we require the
## same number of valid
## simulations, so that the haplotype-specific simuated
## p-values are all based on the
## same number of valid simulations. Without this, it would be impossible to
## know the confidence in the individual p-values.
if(!any(is.na(score.haplo.sim))){
n.val.haplo <- n.val.haplo + 1
score.haplo.rej[score.haplo.ok] <- score.haplo.rej[score.haplo.ok] +
ifelse(score.haplo.sim[score.haplo.ok] >= score.haplo.sqr[score.haplo.ok], 1, 0)
score.max.sim <- max(score.haplo.sim[score.haplo.ok])
if(score.max.sim >= score.max) score.max.rej <- score.max.rej +1
}
# h is target count of rejection obs to stop sampling
# formula from Besag and Clifford
h.global <- 1/(sim.control$p.threshold^2 + 1/max(1,n.val.global))
h.max <- 1/(sim.control$p.threshold^2 + 1/max(1,n.val.haplo))
# print the info out to screen
if(sim.control$verbose) {
cat("h.global:", round(h.global,5), ", global: ", score.global.rej, ", count: ", n.val.global, "\n")
cat("h.max:", round(h.max,5), ", max-score: ", score.max.rej, ", count: ", n.val.haplo, "\n")
}
# done if reach max or enough rej values. If max is reached,
# keep results and calculate p-vals at that point.
if( (max(n.val.global,n.val.haplo) == sim.control$max.sim) |
(h.global<=score.global.rej & h.max<=score.max.rej) )
done <- TRUE
# not done if min.sim not met
# this control is for haplo.sim values
if(min(n.val.global, n.val.haplo) < sim.control$min.sim) done <- FALSE
}
score.global.p.sim <- score.global.rej / n.val.global
score.haplo.p.sim <- rep(NA, length(score.haplo.rej))
score.haplo.p.sim[score.haplo.ok] <- score.haplo.rej[score.haplo.ok] / n.val.haplo
score.max.p.sim <- score.max.rej / n.val.haplo
}
score.global.p <- if(is.R()) pchisq(score.global, df, lower.tail=FALSE) else 1 - pchisq(score.global,df)
score.haplo.p <- if(is.R()) pchisq(score.haplo^2, 1, lower.tail=FALSE) else 1 - pchisq(score.haplo^2,1)
# Create locus label if missing:
if(all(is.na(locus.label))) {
locus.label<- paste("loc-",1:n.loci,sep="")
}
obj <- (list(score.global=score.global, df=df,score.global.p=score.global.p,
score.global.p.sim=score.global.p.sim,
score.haplo=score.haplo,score.haplo.p=score.haplo.p,
score.haplo.p.sim=score.haplo.p.sim,
score.max.p.sim=score.max.p.sim,
haplotype=haplo$haplotype[which.haplo,],
hap.prob=haplo$hap.prob[which.haplo],
locus.label=locus.label, simulate=simulate,
call=call, haplo.effect=haplo.effect,
n.val.global=n.val.global,
n.val.haplo=n.val.haplo, rows.rem=miss))
if(exists("is.R") && is.function(is.R) && is.R()) {
class(obj) <- "haplo.score"
} else {
oldClass(obj) <- "haplo.score"
}
return(obj)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.