##' Calculate marginal model posterior probabilities for one disease
##' incorporating information from others
##'
##' Given a list of model matrices and log ABFs, this function
##' calculates the marginal model posterior probabilities for thr
##' FIRST disease without ever calculating the joint Bayes Factors for
##' all cross-disease model configurations, which would require large
##' amounts of memory.
##'
##' This aims to be faster than marginalpp by ignoring contributions
##' from other diseases to models with individual PP < eps. This is
##' an approximation, and the effects of having too large an eps may
##' induce inaccuracies. However, if you want to pull in information
##' from several diseases, this may be the only way to do it within
##' achievable computer time.
##'
##' @title Marginal PP for models sharing information between diseases
##' @param STR list of models for diseases 1, 2, ..., n, each given in
##' the form of a character vector, with entries
##' \code{"snp1\%snp2\%snp3"}. The null model is given by
##' \code{"1"} OR \code{"0"}. It is assumed that all elements of
##' ABF, PP and pr below follow this same order.
##' @param ABF list of log(ABF) vectors for diseases 1, 2, ...
##' @param PP list of posterior probability vectors for diseases 1, 2,
##' ...
##' @param pr list of prior probabilities for the models in STR
##' @param kappa single value or vector of values to consider for the
##' sharing scale parameter. the value of kappa=1 must be
##' included, and if not will be prepended.
##' @param p0 prior probability of the null model
##' @param fthr models for all but the first disease are retained if
##' their cumsum(PP) < fthr. Ie set \code{PP[j]}=0 if SNP j is not in the smallest set of SNPs that satisfy cumsum(PP) < fthr. This is an
##' APPROXIMATION, eps should be as close to 1 as your computing
##' facilities allow.
#' @param N0 number of shared controls
#' @param ND list of number of cases for a set of diseases
##' @return list of: - single.pp: list of pp for each model in
##' \code{STR[[i]]} for disease i - shared.pp: list of pp for each model
##' in \code{STR[[i]]} for disease i, - STR: not quite as input,
##' reordered so null model is first row - ABF: not quite as
##' input, repordered so null model is first row - kappa: as
##' supplied
##' @export
##' @author Chris Wallace
marginalone <- function(STR, ABF, PP, pr, kappa, p0, fthr=0.99,N0,ND) {
n <- length(STR)
if (n < 2)
stop("Need at least 2 diseases")
if (length(ABF) != n || length(pr) != n | length(PP) != n)
stop("STR, ABF, PP and pr need to have the same lengths")
dis <- names(STR)
N <- sum(unlist(ND))+N0
Mk <- vector("list",n)
PP.adj <- vector("list",n) # adjusted PP for computations
pr.adj <- vector("list",n)
for(j in 1:n) {
Mk[[j]] <- unlist(lapply(strsplit(STR[[j]],"%"),length)) # model sizes
eta <- exp(Mk[[j]]*.5*log((ND[[j]]+N0)/N))
PP.adj[[j]] <- PP[[j]]*eta/sum(PP[[j]]*eta)
pr.adj[[j]] <- pr[[j]]*eta/sum(pr[[j]]*eta)
}
names(PP.adj) <- dis
names(pr.adj) <- dis
print(paste("trimming cumsumPP > ", fthr, " for diseases 2..", n))
print(paste("initial lengths: ", paste(sapply(PP, length), collapse = ", ")))
PP0 <- PP
pr0 <- pr
pr[[1]] <- pr.adj[[1]]
PP[[1]] <- PP.adj[[1]]
for (i in 2:n) {
ind <- order(PP[[i]],decreasing=TRUE)
trun <- min(which(cumsum(sort(PP[[i]],decreasing=TRUE))>fthr))
wh <- ind[1:trun]
if (length(wh)) {
STR[[i]] <- STR[[i]][wh]
ABF[[i]] <- ABF[[i]][wh]
pr[[i]] <- pr.adj[[i]][wh]
PP[[i]] <- PP.adj[[i]][wh]
pr0[[i]] <- pr0[[i]][wh]
PP0[[i]] <- PP0[[i]][wh]
} else {
pr[[i]] <- pr.adj[[i]]
PP[[i]] <- PP.adj[[i]]
}
}
print(paste("trimmed lengths: ", paste(sapply(PP, length), collapse = ", ")))
SS <- lapply(STR, strsplit, "%")
SS <- lapply(SS, setdiff, c("0", "1"))
usnps <- sort(unique(unlist(SS)))
if (!(1 %in% kappa))
kappa <- c(1, kappa)
PP.nonull <- PP
for (i in seq_along(STR)) {
wh <- which(STR[[i]] %in% c("0", "1"))
if (length(wh)) {
STR[[i]] <- STR[[i]][-wh]
ABF[[i]] <- ABF[[i]][-wh]
pr[[i]] <- pr[[i]][-wh]
pr0[[i]] <- pr0[[i]][-wh]
PP.nonull[[i]] <- PP[[i]][-wh]
PP[[i]] <- PP[[i]][-wh]
PP0[[i]] <- PP0[[i]][-wh]
}
PP[[i]] <- addnull(PP[[i]], calcpp(addnull(pr[[i]], p0),
addnull(ABF[[i]], 0))[1])
PP0[[i]] <- addnull(PP0[[i]], calcpp(addnull(pr0[[i]], p0),
addnull(ABF[[i]], 0))[1])
}
STR.i <- lapply(SS, function(ss) {
lapply(ss, function(x) as.integer(factor(x, levels = usnps)))
})
names(STR.i) <- NULL
names(PP.nonull) <- NULL
fun <- switch(n, NULL, "calcQone2", "calcQone3", "calcQone4")
if (is.null(fun))
stop("calcQone not written for ", n, " diseases yet")
Q <- do.call(fun, c(STR.i, PP.nonull))
maxpower <- n * (n - 1)/2
tmp <- lapply(kappa, function(k) {
if (n == 2) {
a <- pr[[1]] * (1 + (k - 1) * Q)
}
else {
s <- k^((1:maxpower)) #/maxpower)
a <- pr[[1]] * (1 + colSums((s - 1) * t(Q)))
}
a
})
tmp <- do.call("cbind", tmp)
alt.prior <- addnull(tmp, p0)
alt.pp <- calcpp(alt.prior, addnull(ABF[[1]], 0))
pr <- lapply(pr, addnull, p0)
STR[[1]] <- addnull(STR[[1]], "1")
alt.pp <- t(alt.pp)
# wh <- which(kappa == 1)
# sumsq <- sum((PP0[[1]] - alt.pp[,wh])^2)
# if ((sumsq > tol)) {
# warning("trait ", 1, " kappa=1 PP does not match input PP, sumsq=",
# sumsq, "which is > tol.\nsuggests you need to include more models in the calculation")
#}
list(single.prior = pr[[1]], single.pp = PP[[1]], shared.prior = alt.prior,
shared.pp = alt.pp, STR = STR[[1]], kappa = kappa)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.