library("parallel")
# kXk matrices yy, ny, t(ny) and nn contain matrix elements [1,1], [2,1], [1,2]
# and [2,2] of the 2X2 contingency tables for all possible combinations of k
# variables. For a variable i return its Fisher exact p-values with
# variables j>=i.
fast_fisher<-function(i,yy,ny,nn){
ftp <- rep(1,nrow(yy))
for(j in i:nrow(yy)) {
ftp[j]<-fisher.test(
matrix(nrow=2,ncol=2,
data=c(yy[i,j],ny[i,j],ny[j,i],nn[i,j])),alternative="greater")$p.value
if(ftp[j]==0)ftp[j]<-2*.Machine$double.xmin
}
return(ftp)
}
# Metropolis randomization of vector x of 0s and 1s, where p is a vector of
# occupancy probabilities, keeping the sum of x fixed. If there are more 0s
# than 1s in x, a subset of vacant positions is chosen at random, and 1s are
# moved to these positions with Metropolis transition probability. If there
# are more 1s than 0s, the roles of 0s and 1s are interchanged
metro<-function(x,p,sweeps){
for(i in 1:sweeps){
occ<-which(x==1&p<1)
vac<-which(x==0&p>0)
locc<-length(occ)
lvac<-length(vac)
if(locc==0|lvac==0)return(x)
if(locc>lvac){
orig<-sample(occ,size=lvac)
dest<-vac
}
else{
orig<-occ
dest<-sample(vac,size=locc)
}
lmove<-length(orig)
moves<-(p[dest]*(1-p[orig])/p[orig]/(1-p[dest])>runif(length(orig)))
x[dest[moves]]<-1
x[orig[moves]]<-0
}
return(x)
}
# Stouffer and Fisher combos come courtesy of Wikipedia:
# http://en.wikipedia.org/wiki/Fisher's_method . I only changed the function
# names.
stouffer_combo <- function(p, w) { # p is a vector of p-values
if (missing(w)) {
w <- rep(1, length(p))/length(p)
} else {
if (length(w) != length(p))
stop("Length of p and w must equal!")
}
Zi <- qnorm(1-p)
Z <- sum(w*Zi)/sqrt(sum(w^2))
p.val <- 1-pnorm(Z)
return(c(Z = Z, p.value = p.val))
}
fisher_combo <- function(p) {
Xsq <- -2*sum(log(p))
p.val <- pchisq(Xsq, df = 2*length(p),lower.tail=FALSE)
return(c(Xsq = Xsq, p.value = p.val))
}
sim_fisher<-function(m, nsim, nsweep, seedme, njobs=1,
combo=c("fisher","stouffer")){
assertthat::assert_that(is.list(m))
assertthat::assert_that(length(m) >= 1)
ncores <- min(njobs, parallel::detectCores())
cl <- parallel::makeCluster(
getOption("cl.cores",ncores), setup_strategy = "sequential")
parallel::clusterSetRNGStream(cl)
RNGkind("L'Ecuyer-CMRG")
set.seed(seedme)
combo<-match.arg(combo)
rf <- lapply(m,rowMeans)
tp <- matrix(ncol=nsim,nrow=ncol(m[[1]])*(ncol(m[[1]])-1)/2)
xmat <- matrix(ncol=length(m),nrow=ncol(m[[1]])*(ncol(m[[1]])-1)/2)
for(i in 1:nsim){
# print("Simulation number",i,"\n")
if(length(m)<=1) {
next
}
for(j in 1:length(m)){
if(nsweep>0 && length(rf[[j]]) > 1){
m[[j]] <- parallel::parApply(
cl=cl, X=m[[j]], MARGIN=2,
FUN=metro, p=rf[[j]], sweeps=nsweep)
}
yy<-t(m[[j]])%*%m[[j]]
ny<-t(1-m[[j]])%*%m[[j]]
nn<-t(1-m[[j]])%*%(1-m[[j]])
lbi<-1:nrow(yy)
lbi[lbi%%2==0]<-nrow(yy)+2-nrow(yy)%%2-lbi[lbi%%2==0]
assertthat::assert_that(nrow(yy) == ncol(yy))
if(ncol(yy) < 2) {
next
}
x <- parallel::parSapply(
cl, X=lbi,
FUN=fast_fisher,yy=yy,ny=ny,nn=nn)[,order(lbi)]
x <- pmin(x,t(x))
xmat[,j] <- x[upper.tri(x)]
}
if(length(m)==1){
tp[,i]<-xmat[,1]
} else if(combo=="fisher"){
Xsq <- -2*rowSums(log(xmat))
tp[,i]<-sapply(Xsq,pchisq,df=2*ncol(xmat),lower.tail=FALSE)
} else if(combo=="stouffer"){
Z<-colSums(apply(1-xmat,1,qnorm))/sqrt(ncol(xmat))
tp[,i]<-1-sapply(Z,pnorm)
}
}
parallel::stopCluster(cl)
tp[tp==0]<-2*.Machine$double.xmin
return(tp)
}
#' Simulate the Fisher's test p-values.
#'
#' Given the incidence table for selected features (i.e. pinmat generated by
#' \code{calc_pins}), computes the Fisher's test p-values for pairwise comparisons.
#' Also perform permutations on the incidence table and compute a set of
#' Fisher's test p-values for each permutation.
#'
#' @param pinmat_df The incidence table generated by \code{findpins}.
#' @param pins_df The pin generated by \code{findpins}. The bin information for
#' the selected feature set.
#' @param nsim the number of permutations/simulations. Default value: 150.
#' @return a list of two numeric vector objects. The Fisher's test p-values
#' for the observation (true) and for the permutations (sim).
#' @export
sim_fisher_wrapper <- function(pinmat_df, pins_df, njobs=NULL,
nsim=150, nsweep=200, seedme=123) {
if(is.null(njobs)) {
njobs <- parallel::detectCores()-4
}
len <- length(unique(pins_df[,"sign"]))
m<-vector(mode="list",length=len)
if(len <= 1) { #AK: I am not sure what this condition means.
return(NULL) #AK: If we have only amps ending with the chromosome, there is
} #AK: only "+".
for(i in 1:len) {
m[[i]]<-as.matrix(pinmat_df[pins_df[,"sign"]==unique(pins_df[,"sign"])[i],,drop=F])
}
vtrue <- sim_fisher(m, nsim=1, nsweep=0,
seedme=seedme, njobs=njobs, combo="fisher")
msim <- sim_fisher(m, nsim=nsim, nsweep=nsweep,
seedme=seedme, njobs=njobs, combo="fisher")
res <- list(vtrue, msim)
names(res) <- c("true", "sim")
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.