R/BINDER.R

Defines functions run_sampler_binder run_sampler_non_auxiliary run_sampler_deterministic binder summary.posteriorDraws

Documented in binder run_sampler_binder run_sampler_deterministic run_sampler_non_auxiliary summary.posteriorDraws

#----------------------------------------------------------------------------------------------------

#' Run BINDER model using prepared data.
#'
#' @export
#' @param prepared_data A named list generated by `prepare_data` function comprising model input data.
#' @param hyperparams A named list of user-defined hyperparameters; this list should contain some or all of the following named values: mu_zeta (prior mean for auxiliary stratum intercept (normal distribution)), sigma_zeta (prior variance for auxiliary stratum intercept (normal distribution)), mu_tau_ME (prior mean for auxiliary stratum ME coefficient (normal distribution)), sigma_tau_ME (prior variance for auxiliary stratum ME coefficient (normal distribution)), mu_tau_PE (prior mean for auxiliary stratum PE coefficient (normal distribution)), sigma_tau_PE (prior variance for auxiliary stratum PE coefficient (normal distribution)), alpha_phi (prior shape parameter for phi (inverse gamma distribution)), beta_phi (prior scale parameter for phi (inverse gamma distribution)), alpha_psi_CM (prior shape parameter for psi_CM (inverse gamma distribution)), beta_psi_CM (prior scale parameter for psi_CM (inverse gamma distribution)), alpha_psi_CP (prior shape parameter for psi_CP (inverse gamma distribution)), beta_psi_CP (prior scale parameter for psi_CP (inverse gamma distribution))); for a given hyperparameter, default values are used if no user-defined value is provided to the function call (mu_zeta := 0, sigma_zeta := 1, mu_tau_ME := 0, sigma_tau_ME := 1, mu_tau_PE := 0, sigma_tau_PE := 1, alpha_phi := 1.5, beta_phi := 1.5, alpha_psi_CM := 1.5, beta_psi_CM := 1.5, alpha_psi_CP := 1.5, beta_psi_CP := 1.5).
#' @param inits A named list of user-defined initial values to begin each MCMC chain (each element of this list should comprise a vector of length equal to the number of chains and each element of the vector should correspond to the initial value for that parameter for each chain); this list should contain some or all of the following named values: zeta_init (defaults to randomly generated value from prior normal distribution for zeta), tau_ME_init (defaults to randomly generated value from prior normal distribution for tau_ME), tau_PE_init (defaults to randomly generated value from prior normal distribution for tau_PE), phi_init (defaults to randomly generated value from prior inverse-gamma distribution for phi), psi_CM (defaults to randomly generated value from prior inverse-gamma distribution for psi_CM), psi_CP (defaults to randomly generated value from prior inverse-gamma distribution for psi_CP).
#' @param n_chains Number of MCMC chains to run; defaults to 1.
#' @param n_draws Number of MCMC draws to run; defaults to 1000.
#' @param burn_in Number of MCMC draws to discard from the beginning of each chain; defaults to 0.
#' @param thin Number of MCMC draws to discard between each accepted MCMC draw for each chain; defaults to 0.
#' @param seed Seed for random number generation; defaults to 1.
#' @return A list containing the MCMC draws for each model parameter for each chain.
#' 
run_sampler_binder <- function(prepared_data, hyperparams=NULL, inits=NULL, n_chains=1, n_draws=1000, burn_in=0, thin=0, seed=1){
  
  set.seed(seed)
  
  regulator <- prepared_data$regulator
  target_candidate <- prepared_data$target_candidate
  
  N <- prepared_data$N
  
  Y <- prepared_data$Y; Y <- log(prepared_data$Y/(1-prepared_data$Y))
  X <- cbind(1, prepared_data$X)
  
  all_posterior_draws <- list()
  for(c in 1:n_chains){
    
    mu_zeta <- if(is.numeric(hyperparams$mu_zeta)) hyperparams$mu_tau else 0
    sigma_zeta <- if(is.numeric(hyperparams$sigma_zeta)) hyperparams$sigma_zeta else 1
    mu_tau_ME <- if(is.numeric(hyperparams$mu_tau_ME)) hyperparams$mu_tau_ME else 0
    sigma_tau_ME <- if(is.numeric(hyperparams$sigma_tau_ME)) hyperparams$sigma_tau_ME else 1
    mu_tau_PE <- if(is.numeric(hyperparams$mu_tau_PE)) hyperparams$mu_tau_PE else 0
    sigma_tau_PE <- if(is.numeric(hyperparams$sigma_tau_PE)) hyperparams$sigma_tau_PE else 1
    alpha_phi <- if(is.numeric(hyperparams$alpha_phi)) hyperparams$alpha_phi else 1.5
    beta_phi <- if(is.numeric(hyperparams$beta_phi)) hyperparams$beta_phi else 1.5
    alpha_psi_CM <- if(is.numeric(hyperparams$alpha_psi_CM)) hyperparams$alpha_psi_CM else 1.5
    beta_psi_CM <- if(is.numeric(hyperparams$beta_psi_CM)) hyperparams$beta_psi_CM else 1.5
    alpha_psi_CP <- if(is.numeric(hyperparams$alpha_psi_CP)) hyperparams$alpha_psi_CP else 1.5
    beta_psi_CP <- if(is.numeric(hyperparams$beta_psi_CP)) hyperparams$beta_psi_CP else 1.5
    
    zeta_init <- if(is.numeric(inits$zeta)) inits$zeta[[c]] else rnorm(1, mu_zeta, sqrt(sigma_zeta))
    tau_ME_init <- if(is.numeric(inits$tau_ME)) inits$tau_ME[[c]] else rnorm(1, mu_tau_ME, sqrt(sigma_tau_ME))
    tau_PE_init <- if(is.numeric(inits$tau_PE)) inits$tau_PE[[c]] else rnorm(1, mu_tau_PE, sqrt(sigma_tau_PE))
    phi_init <- if(is.numeric(inits$phi)) inits$phi[[c]] else 1/rgamma(1, alpha_phi, beta_phi)
    psi_CM_init <- if(is.numeric(inits$psi_CM)) inits$psi_CM[[c]] else 1/rgamma(1, alpha_psi_CM, beta_psi_CM)
    psi_CP_init <- if(is.numeric(inits$psi_CP)) inits$psi_CP[[c]] else 1/rgamma(1, alpha_psi_CP, beta_psi_CP)
    
    mu_tau <- c(mu_zeta, mu_tau_ME, mu_tau_PE)
    sigma_tau <- c(sigma_zeta, sigma_tau_ME, sigma_tau_PE) * diag(nrow=3)
    tau_init <- c(zeta_init, tau_ME_init, tau_PE_init)
    
    logit_theta_init <- if(is.numeric(inits$theta)) log(inits$theta/(1-inits$theta)) else rnorm(N, (X%*%tau_init), sqrt(phi_init))
    
    tau_draws <- matrix(rep(NA, (n_draws*3)), ncol=3)
    tau_draws[1, ] <- tau_init
    
    phi_draws <- rep(NA, n_draws)
    phi_draws[1] <- phi_init
    
    logit_theta_draws <- matrix(rep(NA, (n_draws*N)), ncol=N)
    logit_theta_draws[1, ] <- logit_theta_init
    
    psi_CM_draws <- rep(NA, n_draws)
    psi_CM_draws[1] <- psi_CM_init
    
    psi_CP_draws <- rep(NA, n_draws)
    psi_CP_draws[1] <- psi_CP_init
    
    for(d in 2:n_draws){
      
      tau_a <- solve(solve(sigma_tau) + (t(X)%*%X)/phi_draws[d-1])
      tau_b <- tau_a %*% (solve(sigma_tau)%*%mu_tau + (t(X)%*%logit_theta_draws[d-1, ])/phi_draws[d-1])
      tau_draw <- MASS::mvrnorm(1, tau_b, tau_a)
      tau_draws[d, ] <- tau_draw
      
      phi_a <- alpha_phi + (N/2)
      phi_b <- (t(logit_theta_draws[d-1, ]-X%*%tau_draws[d, ])%*%(logit_theta_draws[d-1, ]-X%*%tau_draws[d, ]))/2 + beta_phi
      phi_draw <- 1/rgamma(1, shape=phi_a, rate=phi_b)
      phi_draws[d] <- phi_draw
      
      logit_theta_a <- 1/psi_CM_draws[d-1] + 1/psi_CP_draws[d-1] + 1/phi_draws[d]
      logit_theta_b <- Y[, 1]/psi_CM_draws[d-1] + Y[, 2]/psi_CP_draws[d-1] + (X%*%tau_draws[d, ])/phi_draws[d]
      logit_theta_draw <- rnorm(N, (logit_theta_b/logit_theta_a), sqrt(1/logit_theta_a))
      logit_theta_draws[d, ] <- logit_theta_draw
      
      psi_CM_a <- alpha_psi_CM + (N/2)
      psi_CM_b <- (t(Y[, 1]-logit_theta_draws[d, ])%*%(Y[, 1]-logit_theta_draws[d, ]))/2 + beta_psi_CM
      psi_CM_draw <- 1/rgamma(1, shape=psi_CM_a, rate=psi_CM_b)
      psi_CM_draws[d] <- psi_CM_draw
      
      psi_CP_a <- alpha_psi_CP + (N/2)
      psi_CP_b <- (t(Y[, 2]-logit_theta_draws[d, ])%*%(Y[, 2]-logit_theta_draws[d, ]))/2 + beta_psi_CP
      psi_CP_draw <- 1/rgamma(1, shape=psi_CP_a, rate=psi_CP_b)
      psi_CP_draws[d] <- psi_CP_draw
      
    }
    
    all_posterior_draws[[c]] <- list()
    all_posterior_draws[[c]] <- cbind(exp(logit_theta_draws)/(exp(logit_theta_draws)+1), logit_theta_draws, tau_draws, phi_draws, psi_CM_draws, psi_CP_draws)
    colnames(all_posterior_draws[[c]]) <- c(paste("theta[", target_candidate, "]", sep=""), paste("logit(theta[", target_candidate, "])", sep=""), "zeta", "tau_ME", "tau_PE", "phi", "psi_CM", "psi_CP")
    all_posterior_draws[[c]][seq((burn_in+1), n_draws, (thin+1)), ]
    
  }
  
  return(all_posterior_draws)
  
}

#--------------------------------------------------

#' Run non-auxiliary BINDER model using prepared data.
#'
#' @export
#' @param prepared_data A named list generated by `prepare_data` function comprising model input data.
#' @param hyperparams A named list of user-defined hyperparameters; this list should contain some or all of the following named values: alpha_psi_CM (prior shape parameter for psi_CM (inverse gamma distribution)), beta_psi_CM (prior scale parameter for psi_CM (inverse gamma distribution)), alpha_psi_CP (prior shape parameter for psi_CP (inverse gamma distribution)), beta_psi_CP (prior scale parameter for psi_CP (inverse gamma distribution))); for a given hyperparameter, default values are used if no user-defined value is provided to the function call (alpha_psi_CM := 1.5, beta_psi_CM := 1.5, alpha_psi_CP := 1.5, beta_psi_CP := 1.5).
#' @param inits A named list of user-defined initial values to begin each MCMC chain (each element of this list should comprise a vector of length equal to the number of chains and each element of the vector should correspond to the initial value for that parameter for each chain); this list should contain some or all of the following named values: theta (defaults to randomly generated value from uniform(0.0001, 0.9999) distribution for each target candidate's corresponding theta), psi_CM (defaults to randomly generated value from prior inverse-gamma distribution for psi_CM), psi_CP (defaults to randomly generated value from prior inverse-gamma distribution for psi_CP).
#' @param n_chains Number of MCMC chains to run; defaults to 1.
#' @param n_draws Number of MCMC draws to run; defaults to 1000.
#' @param burn_in Number of MCMC draws to discard from the beginning of each chain; defaults to 0.
#' @param thin Number of MCMC draws to discard between each accepted MCMC draw for each chain; defaults to 0.
#' @param seed Seed for random number generation; defaults to 1.
#' @return A list containing the MCMC draws for each model parameter for each chain.
#' 
run_sampler_non_auxiliary <- function(prepared_data, hyperparams=NULL, inits=NULL, n_chains=1, n_draws=1000, burn_in=0, thin=0, seed=1){
  
  set.seed(seed)
  
  regulator <- prepared_data$regulator
  target_candidate <- prepared_data$target_candidate
  
  N <- prepared_data$N
  
  Y <- prepared_data$Y; Y <- log(prepared_data$Y/(1-prepared_data$Y))
  
  all_posterior_draws <- list()
  for(c in 1:n_chains){
    
    alpha_psi_CM <- if(is.numeric(hyperparams$alpha_psi_CM)) hyperparams$alpha_psi_CM else 1.5
    beta_psi_CM <- if(is.numeric(hyperparams$beta_psi_CM)) hyperparams$beta_psi_CM else 1.5
    alpha_psi_CP <- if(is.numeric(hyperparams$alpha_psi_CP)) hyperparams$alpha_psi_CP else 1.5
    beta_psi_CP <- if(is.numeric(hyperparams$beta_psi_CP)) hyperparams$beta_psi_CP else 1.5
    
    logit_theta_init <- if(is.numeric(inits$theta)) log(inits$theta[[c]]/(1-inits$theta[[c]])) else { u <- runif(N, 0.0001, 0.9999); logit_u <- log(u/(1-u)) }
    psi_CM_init <- if(is.numeric(inits$psi_CM)) inits$psi_CM[[c]] else 1/rgamma(1, alpha_psi_CM, beta_psi_CM)
    psi_CP_init <- if(is.numeric(inits$psi_CP)) inits$psi_CP[[c]] else 1/rgamma(1, alpha_psi_CP, beta_psi_CP)
    
    logit_theta_draws <- matrix(rep(NA, (n_draws*N)), ncol=N)
    logit_theta_draws[1, ] <- logit_theta_init
    
    psi_CM_draws <- rep(NA, n_draws)
    psi_CM_draws[1] <- psi_CM_init
    
    psi_CP_draws <- rep(NA, n_draws)
    psi_CP_draws[1] <- psi_CP_init
    
    for(d in 2:n_draws){
      
      logit_theta_a <- 1/psi_CM_draws[d-1] + 1/psi_CP_draws[d-1]
      logit_theta_b <- Y[, 1]/psi_CM_draws[d-1] + Y[, 2]/psi_CP_draws[d-1]
      logit_theta_draw <- rnorm(N, (logit_theta_b/logit_theta_a), sqrt(1/logit_theta_a))
      logit_theta_draws[d, ] <- logit_theta_draw
      
      psi_CM_a <- alpha_psi_CM + (N/2)
      psi_CM_b <- (t(Y[, 1]-logit_theta_draws[d, ])%*%(Y[, 1]-logit_theta_draws[d, ]))/2 + beta_psi_CM
      psi_CM_draw <- 1/rgamma(1, shape=psi_CM_a, rate=psi_CM_b)
      psi_CM_draws[d] <- psi_CM_draw
      
      psi_CP_a <- alpha_psi_CP + (N/2)
      psi_CP_b <- (t(Y[, 2]-logit_theta_draws[d, ])%*%(Y[, 2]-logit_theta_draws[d, ]))/2 + beta_psi_CP
      psi_CP_draw <- 1/rgamma(1, shape=psi_CP_a, rate=psi_CP_b)
      psi_CP_draws[d] <- psi_CP_draw
      
    }
    
    all_posterior_draws[[c]] <- list()
    all_posterior_draws[[c]] <- cbind(exp(logit_theta_draws)/(exp(logit_theta_draws)+1), logit_theta_draws, psi_CM_draws, psi_CP_draws)
    colnames(all_posterior_draws[[c]]) <- c(paste("theta[", target_candidate, "]", sep=""), paste("logit(theta[", target_candidate, "])", sep=""), "psi_CM", "psi_CP")
    all_posterior_draws[[c]][seq((burn_in+1), n_draws, (thin+1)), ]
    
  }
  
  return(all_posterior_draws)
  
}

#--------------------------------------------------

#' Run deterministic BINDER model using prepared data.
#'
#' @export
#' @param prepared_data A named list generated by `prepare_data` function comprising model input data.
#' @param hyperparams A named list of user-defined hyperparameters; this list should contain some or all of the following named values: mu_zeta (prior mean for auxiliary stratum intercept (normal distribution)), sigma_zeta (prior variance for auxiliary stratum intercept (normal distribution)), mu_tau_ME (prior mean for auxiliary stratum ME coefficient (normal distribution)), sigma_tau_ME (prior variance for auxiliary stratum ME coefficient (normal distribution)), mu_tau_PE (prior mean for auxiliary stratum PE coefficient (normal distribution)), sigma_tau_PE (prior variance for auxiliary stratum PE coefficient (normal distribution)), alpha_psi_CM (prior shape parameter for psi_CM (inverse gamma distribution)), beta_psi_CM (prior scale parameter for psi_CM (inverse gamma distribution)), alpha_psi_CP (prior shape parameter for psi_CP (inverse gamma distribution)), beta_psi_CP (prior scale parameter for psi_CP (inverse gamma distribution))); for a given hyperparameter, default values are used if no user-defined value is provided to the function call (mu_zeta := 0, sigma_zeta := 1, mu_tau_ME := 0, sigma_tau_ME := 1, mu_tau_PE := 0, sigma_tau_PE := 1, alpha_psi_CM := 1.5, beta_psi_CM := 1.5, alpha_psi_CP := 1.5, beta_psi_CP := 1.5).
#' @param inits A named list of user-defined initial values to begin each MCMC chain (each element of this list should comprise a vector of length equal to the number of chains and each element of the vector should correspond to the initial value for that parameter for each chain); this list should contain some or all of the following named values: zeta_init (defaults to randomly generated value from prior normal distribution for zeta), tau_ME_init (defaults to randomly generated value from prior normal distribution for tau_ME), tau_PE_init (defaults to randomly generated value from prior normal distribution for tau_PE), psi_CM (defaults to randomly generated value from prior inverse-gamma distribution for psi_CM), psi_CP (defaults to randomly generated value from prior inverse-gamma distribution for psi_CP).
#' @param n_chains Number of MCMC chains to run; defaults to 1.
#' @param n_draws Number of MCMC draws to run; defaults to 1000.
#' @param burn_in Number of MCMC draws to discard from the beginning of each chain; defaults to 0.
#' @param thin Number of MCMC draws to discard between each accepted MCMC draw for each chain; defaults to 0.
#' @param seed Seed for random number generation; defaults to 1.
#' @return A list containing the MCMC draws for each model parameter for each chain.
#' 
run_sampler_deterministic <- function(prepared_data, hyperparams=NULL, inits=NULL, n_chains=1, n_draws=1000, burn_in=0, thin=0, seed=1){
  
  set.seed(seed)
  
  regulator <- prepared_data$regulator
  target_candidate <- prepared_data$target_candidate
  
  N <- prepared_data$N
  
  Y <- prepared_data$Y; Y <- log(prepared_data$Y/(1-prepared_data$Y))
  X <- cbind(1, prepared_data$X)
  
  all_posterior_draws <- list()
  for(c in 1:n_chains){
    
    mu_zeta <- if(is.numeric(hyperparams$mu_zeta)) hyperparams$mu_tau else 0
    sigma_zeta <- if(is.numeric(hyperparams$sigma_zeta)) hyperparams$sigma_zeta else 1
    mu_tau_ME <- if(is.numeric(hyperparams$mu_tau_ME)) hyperparams$mu_tau_ME else 0
    sigma_tau_ME <- if(is.numeric(hyperparams$sigma_tau_ME)) hyperparams$sigma_tau_ME else 1
    mu_tau_PE <- if(is.numeric(hyperparams$mu_tau_PE)) hyperparams$mu_tau_PE else 0
    sigma_tau_PE <- if(is.numeric(hyperparams$sigma_tau_PE)) hyperparams$sigma_tau_PE else 1
    alpha_psi_CM <- if(is.numeric(hyperparams$alpha_psi_CM)) hyperparams$alpha_psi_CM else 1.5
    beta_psi_CM <- if(is.numeric(hyperparams$beta_psi_CM)) hyperparams$beta_psi_CM else 1.5
    alpha_psi_CP <- if(is.numeric(hyperparams$alpha_psi_CP)) hyperparams$alpha_psi_CP else 1.5
    beta_psi_CP <- if(is.numeric(hyperparams$beta_psi_CP)) hyperparams$beta_psi_CP else 1.5
    
    zeta_init <- if(is.numeric(inits$zeta)) inits$zeta[[c]] else rnorm(1, mu_zeta, sqrt(sigma_zeta))
    tau_ME_init <- if(is.numeric(inits$tau_ME)) inits$tau_ME[[c]] else rnorm(1, mu_tau_ME, sqrt(sigma_tau_ME))
    tau_PE_init <- if(is.numeric(inits$tau_PE)) inits$tau_PE[[c]] else rnorm(1, mu_tau_PE, sqrt(sigma_tau_PE))
    psi_CM_init <- if(is.numeric(inits$psi_CM)) inits$psi_CM [[c]]else 1/rgamma(1, alpha_psi_CM, beta_psi_CM)
    psi_CP_init <- if(is.numeric(inits$psi_CP)) inits$psi_CP[[c]] else 1/rgamma(1, alpha_psi_CP, beta_psi_CP)
    
    mu_tau <- c(mu_zeta, mu_tau_ME, mu_tau_PE)
    sigma_tau <- c(sigma_zeta, sigma_tau_ME, sigma_tau_PE) * diag(nrow=3)
    tau_init <- c(zeta_init, tau_ME_init, tau_PE_init)
    
    logit_theta_init <- X%*%tau_init
    
    tau_draws <- matrix(rep(NA, (n_draws*3)), ncol=3)
    tau_draws[1, ] <- tau_init
    
    logit_theta_draws <- matrix(rep(NA, (n_draws*N)), ncol=N)
    logit_theta_draws[1, ] <- logit_theta_init
    
    psi_CM_draws <- rep(NA, n_draws)
    psi_CM_draws[1] <- psi_CM_init
    
    psi_CP_draws <- rep(NA, n_draws)
    psi_CP_draws[1] <- psi_CP_init
    
    for(d in 2:n_draws){
      
      tau_a <- solve(solve(sigma_tau) + (t(X)%*%X)/psi_CM_draws[d-1] + (t(X)%*%X)/psi_CP_draws[d-1])
      tau_b <- tau_a %*% (solve(sigma_tau)%*%mu_tau + (t(X)%*%Y[, 1])/psi_CM_draws[d-1] + (t(X)%*%Y[, 2])/psi_CP_draws[d-1])
      tau_draw <- MASS::mvrnorm(1, tau_b, tau_a)
      tau_draws[d, ] <- tau_draw
      
      logit_theta_draw <- X%*%tau_draws[d, ]
      logit_theta_draws[d, ] <- logit_theta_draw
      
      psi_CM_a <- alpha_psi_CM + (N/2)
      psi_CM_b <- (t(Y[, 1]-logit_theta_draws[d, ])%*%(Y[, 1]-logit_theta_draws[d, ]))/2 + beta_psi_CM
      psi_CM_draw <- 1/rgamma(1, shape=psi_CM_a, rate=psi_CM_b)
      psi_CM_draws[d] <- psi_CM_draw
      
      psi_CP_a <- alpha_psi_CP + (N/2)
      psi_CP_b <- (t(Y[, 2]-logit_theta_draws[d, ])%*%(Y[, 2]-logit_theta_draws[d, ]))/2 + beta_psi_CP
      psi_CP_draw <- 1/rgamma(1, shape=psi_CP_a, rate=psi_CP_b)
      psi_CP_draws[d] <- psi_CP_draw
      
    }
    
    all_posterior_draws[[c]] <- list()
    all_posterior_draws[[c]] <- cbind(exp(logit_theta_draws)/(exp(logit_theta_draws)+1), logit_theta_draws, tau_draws, psi_CM_draws, psi_CP_draws)
    colnames(all_posterior_draws[[c]]) <- c(paste("theta[", target_candidate, "]", sep=""), paste("logit(theta[", target_candidate, "])", sep=""), "zeta", "tau_ME", "tau_PE", "psi_CM", "psi_CP")
    all_posterior_draws[[c]][seq((burn_in+1), n_draws, (thin+1)), ]
    
  }
  
  return(all_posterior_draws)
  
}

#----------------------------------------------------------------------------------------------------

#' BINDER wrapper function.
#'
#' @export
#' @param proxy_regulon Data frame comprising columns: `regulator`, `target_candidate`, `ortholog_module_status` (1 if orthologous, 0 otherwise), `ME` (1 if high affinity for regulator motif, 0 otherwise) and `PE` (1 if orthologous interaction, 0 otherwise).
#' @param expression Numerical expression matrix; can be expression matrix with genes represented on rows and experimental conditions represented on columns or symmetric gene-gene coexpression matrix.
#' @param hyperparams A named list of user-defined hyperparameters (see `?BINDER::run_sampler_binder`, `?BINDER::run_sampler_non_auxiliary` and `?BINDER::run_sampler_deterministic`).
#' @param inits A named list of user-defined initial values to begin each MCMC chain (see `?BINDER::run_sampler_binder`, `?BINDER::run_sampler_non_auxiliary` and `?BINDER::run_sampler_deterministic`).
#' @param O Named list where each name is a target candidate and each element is a character vector comprising names of genes that should not be included in the computation of `CM` and `CP` for the corresponding target candidate name; by default only autocoexpression is excluded.
#' @param delta_CM Numerical cutoff point for coexpression module for CM: any values above `threshold` are considered to form a coexpression module with `target_candidate`; defaults to "auto" which represents the 95th percentile of coexpression scores involving `target_candidate`.
#' @param delta_CP Numerical cutoff point for coexpression module for CM: any values above `threshold` are considered to form a coexpression module with `target_candidate`; defaults to "auto" which represents the 95th percentile of coexpression scores involving `target_candidate`.
#' @param is_coexpression logical; if TRUE, expression matrix is treated as symmetric coexpression matrix.
#' @param model Indicates which model should be fit to the data: one of "BINDER", "non_auxiliary" or "deterministic".
#' @param n_chains Number of MCMC chains to run; defaults to 1.
#' @param n_draws Number of MCMC draws to run; defaults to 1000.
#' @param burn_in Number of MCMC draws to discard from the beginning of each chain; defaults to 0.
#' @param thin Number of MCMC draws to discard between each accepted MCMC draw for each chain; defaults to 0.
#' @param seed Seed for random number generation; defaults to 1.
#' @return A list containing the MCMC draws for each model parameter for each chain for each putative regulor in `proxy_regulon`.
#'
binder <- function(proxy_regulon, expression, hyperparams=NULL, inits=NULL, O=NULL, delta_CM="auto", delta_CP="auto", is_coexpression=FALSE, model="BINDER", n_chains=1, n_draws=1000, burn_in=0, thin=0, n_cores=1, seed=1){
  proxy_regulon %>% dplyr::mutate_if(is.factor, as.character) -> proxy_regulon
  if(is_coexpression == TRUE){
    coexpression <- expression
  }else{
    coexpression <- compute_coexpression(expression)
  }
  
  regulators <- unique(proxy_regulon$regulator)
  n_regulators <- length(regulators)
  
  print(paste0("Setting up cluster with ", n_cores, " cores..."))
  cl <- parallel::makeCluster(n_cores, type="SOCK", outfile="")
  doSNOW::registerDoSNOW(cl)
  all_posterior_draws <- foreach::foreach(n=1:n_regulators) %dopar% {
      
      current_regulator <- regulators[n]
      print(paste0("Starting ", current_regulator, "..."))
      current_proxy_regulon <- proxy_regulon[proxy_regulon$regulator == current_regulator, ]
      current_proxy_regulon <- current_proxy_regulon[current_proxy_regulon$target_candidate %in% colnames(coexpression), ]
      
      print(paste0("Building proxy structure for ", current_regulator, "..."))
      proxy_structure <- build_proxy_structure(current_proxy_regulon, coexpression, O, delta_CM, delta_CP)
      print(paste0("Preparing data for ", current_regulator, "..."))
      prepared_data <- prepare_data(proxy_structure)
      if(model == "BINDER"){
        print(paste0("Running BINDER model for ", current_regulator, "..."))
        current_results <- run_sampler_binder(prepared_data=prepared_data, hyperparams=hyperparams, inits=inits, n_chains=n_chains, n_draws=n_draws, burn_in=burn_in, thin=thin, seed=seed)
      }else if(model == "non_auxiliary"){
        print(paste0("Running non-auxiliary model for ", current_regulator, "..."))
        current_results <- run_sampler_non_auxiliary(prepared_data=prepared_data, hyperparams=hyperparams, inits=inits, n_chains=n_chains, n_draws=n_draws, burn_in=burn_in, thin=thin, seed=seed)
      }else if(model == "deterministic"){
        print(paste0("Running deterministic model for ", current_regulator, "..."))
        current_results <- run_sampler_deterministic(prepared_data=prepared_data, hyperparams=hyperparams, inits=inits, n_chains=n_chains, n_draws=n_draws, burn_in=burn_in, thin=thin, seed=seed)
      }
      print(paste0("Finishing ", current_regulator, "..."))
      current_results
      
  }
  names(all_posterior_draws) <- regulators
  parallel::stopCluster(cl)
  
  class(all_posterior_draws) <- "posteriorDraws"
  return(all_posterior_draws)
}

#----------------------------------------------------------------------------------------------------

#' `summary` function for class `posteriorDraws`.
#'
#' @export
#' @param x Object of class `posteriorDraws` such as that returned by `BINDER::binder` function call.
#' @param target_candidates Named list where each element comprises a vector subset of target candidate IDs of interest and each list element is named by the regulator ID to which the target candidate subset pertains; if `target_candidates` is not specified, default behaviour returns summaries for all regulator target candidate pairs considered in the model run.
#' @return Summaries (mean, std.dev, 0%, 25%, 50%, 75% and 100% quantiles) for the posterior distribution of theta for each regulator and target candidate pair of interest.
#'
summary.posteriorDraws <- function(x, target_candidates=c("all")){
  stopifnot(inherits(x, "posteriorDraws"))
  results_summary <- c()
  for(r in 1:length(x)){
    regulator <- names(x)[r]
    if(identical(target_candidates, "all") | regulator %in% names(target_candidates)){
      y <- c()
      for(c in 1:length(x[[r]])){
        y <- rbind(y, x[[r]][[c]][, grep("^theta", colnames(x[[r]][[c]]))])
      }
      colnames(y) <- gsub("theta\\[", "", gsub("\\]", "", colnames(y)))
      filter_idx <- if(identical(target_candidates, "all")) 1:ncol(y) else (which(colnames(y) %in% target_candidates[[regulator]]))
      regulon_summary <- t(apply(y[, filter_idx], 2, function(z){ c(mean(z), sd(z), quantile(z))}))
      regulon_summary <- data.frame(regulator=regulator, target_candidate=rownames(regulon_summary), regulon_summary)
      colnames(regulon_summary) <- c("regulator", "target_candidate", "mean", "std.dev.", "0%", "25%", "50%", "75%", "100%")
      rownames(regulon_summary) <- NULL
      results_summary <- rbind(results_summary, regulon_summary)
    }
  }
  results_summary
}

#----------------------------------------------------------------------------------------------------
ptrcksn/BINDER documentation built on Nov. 5, 2019, 1:56 a.m.