Pmisc: A collection of miscellaneous functions.

#' Calculates a Bayes Facotor, given a prior and posterior probabilties. Check2
#' 
#' @param prior.prob prior probability
#' @param post.prob posterior probability
#' @return Bayes Factor
#' @author Paul Newcombe
.BayesFactor <- function(prior.prob, post.prob) {
  priorOdds <- prior.prob/(1-prior.prob)
  postOdds <- post.prob/(1-post.prob)
  bayesFactor <- postOdds/priorOdds
  return(bayesFactor)
}

#' Calculates prior probabability of causality for a particular variable, when a Poisson prior is used for model space
#' @param V total number of variables
#' @param mu model space mean
#' @param n.var number of specific variables (default is 1)
#' @return The prior probability for a single variable
#' @author Paul Newcombe
.ModelSpaceSpecProb <- function(V, mu, n.var=1) {
  ### Normalising constant for truncated Poisson
  poiDenom <- 0
  for (v in 0:V) {	
    poiDenom <- poiDenom + (mu^v)*exp(-mu)/gamma(v+1)
  }
  ## m - 1 places to fill (saince 1st place taken by SNP in question), from M-1 SNPs (SNP in question, that is in 1st place, is not an option)
  priorProb <- 0
  for (v in 1:V) {	# Add across all ways can select specific nvar
    p.v.selected <- (mu^v)*exp(-mu)/(gamma(v+1)*poiDenom) # prob of v variables included under the poisson
    # Union of rule for P(A U B U C etc)
    p.atleast.one.given.v <- 0
    i.max <- min(v,n.var)
    for (i in 1:i.max) {
      p.atleast.one.given.v <- p.atleast.one.given.v + (choose(n.var,i)*choose( (V-i) , (v-i) )/choose( V , v ))*((-1)^(i+1))
    }
    priorProb <- priorProb + p.v.selected*p.atleast.one.given.v
  }
  
  return(priorProb)
}

#' Calculates prior probababilities for x, and >=x causal variables, when a truncated Poisson prior is used for model space
#' @param V total number of variables
#' @param mu model space mean
#' @return prior matrix whereby 1st row is prior probabilities for =x causal variables, and 2nd row for >=x variables (x = 1,...V)
#' @author Paul Newcombe
.ModelSpaceProbs <- function(V, mu) {
  # Normalising constant for truncated Poisson
  poiDenom <- 0
  for (v in 0:V) {	
    poiDenom <- poiDenom + (mu^v)*exp(-mu)/gamma(v+1)
  }
  
  # =x
  numPresPriorProb <- c(1:(V+1))
  for (v in 1:V) {
    # Prior prob of  >= m SNPs for m = 1,..M
    numPresPriorProb[v] <- (mu^v)*exp(-mu)/(gamma(v+1)*poiDenom)
  }
  numPresPriorProb[V+1] <- exp(-mu)/(gamma(1)*poiDenom)
  
  # >=x
  geqPriorProb <- c(1:(V+1))
  for (v in 1:V) {
    geqPriorProb[v] <- sum(numPresPriorProb[v:V]) 
  }	
  geqPriorProb[V+1] <- sum(numPresPriorProb[0:V])
  
  # combining
  return.mat <- rbind(numPresPriorProb,geqPriorProb)
  colnames(return.mat) <- paste(c(c(1:V),0),sep="")
  rownames(return.mat) <- c("=",">=")
  
  return(return.mat)
}

#' Calculates FDR thresholds according to Algorithm 18.3 on P.689 of elemnts of statistical learning by Hastie et al.
#' This is a conservative caclulation - where resolution is not sufficient
#' 
#' @param obs.probs Posterior probabilities from analysis of actual data
#' @param permuted.probs Posterior probabilties from analysis of permuted outcome analyses
#' @param target.fdrs Vector of FDRs to estimate Posterior Probability thresholds for (defaults to 1%, 5% and 10%)
#' @param n.cuts.order Order of magnitude for vector length of possible thresholds to explore
#' @return Matrix of tagret FDRs, their estimated posterior probability thresholds, and the estimated FDR at each
#' threshold
#' @author Paul Newcombe
.GetFdrThresholds <- function(
  obs.probs,
  permuted.probs,
  target.fdrs=c(0.01,0.05,0.1),
  n.cuts.order=5
) {
  if (is.matrix(permuted.probs)) {
    n.permute <- ncol(permuted.probs)
  } else {
    n.permute <- 1
  }
  cuts <- seq(from=min(obs.probs), to=max(obs.probs), length.out=10^n.cuts.order)
  cuts.fdr <- sapply(cuts, function(x) sum(permuted.probs>=x)/(n.permute*sum(obs.probs>=x)) )
  
  fdr.thresholds <- matrix(NA,length(target.fdrs),3,
                           dimnames=list(paste(target.fdrs),c("FDR", "PostProbThreshold", "FDR_hat")))
  for (fdr in target.fdrs) {
    fdr.diff <- fdr - cuts.fdr
    cut.index <- which(fdr.diff==min(fdr.diff[fdr.diff>=0]))
    if (length(cut.index)>1) {
      cut.index <- cut.index[1]
    }
    fdr.thresholds[paste(fdr),"FDR"] <- fdr
    fdr.thresholds[paste(fdr),"PostProbThreshold"] <- cuts[cut.index]
    fdr.thresholds[paste(fdr),"FDR_hat"] <- cuts.fdr[cut.index]
  }
  
  return(fdr.thresholds)
}

#' Richard Samworth's function
#' I don't really understand the input parameters
#' @author Richard Samworth
.r.TailProbs <- function(eta, B, r) {
  # TailProbs returns a vector with the tail probability for each \tau = ceil{B*2\eta}/B + 1/B,...,1
  # We return 1 for all \tau = 0, 1/B, ... , ceil{B*2\eta}/B
  # s is -1/r
  MAXa <- 100000
  MINa <- 0.0001
  s <- -1/r
  etaB <- eta * B
  k_start <- (ceiling(2 * etaB) + 1)
  if(k_start > B) stop("eta is too large")
  
  a_vec <- rep(MAXa,B)
  
  Find.a <- function(prev_a) uniroot(Calc.a, lower = MINa, upper = prev_a, tol = .Machine$double.eps^0.75)$root
  Calc.a <- function(a) {
    denom <- sum((a + 0:k)^(-s))
    num <- sum((0:k) * (a + 0:k)^(-s))
    num / denom - etaB
  }
  
  for(k in k_start:B) a_vec[k] <- Find.a(a_vec[k-1])
  
  OptimInt <- function(a) {
    num <- (k + 1 - etaB) * sum((a + 0:(t-1))^(-s))
    denom <- sum((k + 1 - (0:k)) * (a + 0:k)^(-s))
    1 - num / denom
  }
  
  output <- rep(1, B)
  
  prev_k <- k_start
  for(t in k_start:B) {
    cur_optim <- rep(0, B)
    for (k in prev_k:(B-1)) cur_optim[k] <- optimize(f=OptimInt, lower = a_vec[k+1], upper = a_vec[k], maximum  = TRUE)$objective
    output[t] <- max(cur_optim)
    prev_k <- which.max(cur_optim)
  }
  return(output)
}

#' Richard Samworth's function
#' @param theta Estimate of the proportion of true signal variables. Can estimate as the sum of the CPSS probabilities, divided by
#' the number of variables
#' @param B the number of CPSS folds
#' @author Richard Samworth
.minD <- function(theta, B, r = c(-1/2, -1/4)) {
  pmin(c(rep(1, B), .r.TailProbs(theta^2, B, r[1])), .r.TailProbs(theta, 2*B, r[2]))
}