R/RcppExports.R

Defines functions z2comat compute_Q_condpr equal_unit_vec log_marginal_Q_identity log_marginal log_full mat_times_vec_by_col logsumexp

Documented in equal_unit_vec log_full log_marginal log_marginal_Q_identity z2comat

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

logsumexp <- function(logv_arma) {
    .Call('_rewind_logsumexp', PACKAGE = 'rewind', logv_arma)
}

mat_times_vec_by_col <- function(m, v) {
    .Call('_rewind_mat_times_vec_by_col', PACKAGE = 'rewind', m, v)
}

#' Function to compute the full cluster-specific likelihood given latent variables
#'
#' This function computes the likelihood WITHOUT integrating over
#' the distribution of component specific parameter (e.g., machine usage profiles).
#' This function conditions upon a few model parameters: the true and false positive
#' rates (theta and psi), the Q matrix and {p}-the prevalence parameter for each machines.
#'
#' @param Y the data for the current cluster (a subset of observations.)
#' @param eta_star A matrix of M columns (of machines). Multivariate binary indicators of presence or absence of
#' protein landmarks (could be a matrix with rows for multiple )
#' @param Q Q-matrix
#' @param p prevalence parameter for each machine; should be a vector of dimension M.
#' @param theta true positive rates
#' @param psi true positive rates
#'
#' @return a vector of likelihood values (one per cluster) given other model parameters.
#' @export
log_full <- function(Y, eta_star, Q, p, theta, psi) {
    .Call('_rewind_log_full', PACKAGE = 'rewind', Y, eta_star, Q, p, theta, psi)
}

#' R Function to compute the cluster-specific marginal likelihood
#'
#' This R function computes the marginal likelihood by integrating over
#' the distribution of component specific parameter (e.g., machine usage profiles).
#' This function conditions upon a few model parameters: the true and false positive
#' rates (theta and psi), the Q matrix and {p}-the prevalence parameter for each machines.
#'
#' @param Y the data for the current cluster (a subset of observations.)
#' @param eta_star_enumerate fixed binary matrix of 2^M rows and M columns. Need to be prespecified.
#' @param Q Q-matrix
#' @param p prevalence parameter for each machine; should be a vector of dimension M.
#' @param theta true positive rates
#' @param psi true positive rates
#'
#' @examples
#' # simulate data:
#' L0 <- 100
#' options_sim0  <- list(N = 200,  # sample size.
#'                       M = 3,   # true number of machines.
#'                       L = L0,   # number of antibody landmarks.
#'                       K = 8,    # number of true components.,
#'                      theta = rep(0.8,L0), # true positive rates
#'                      psi   = rep(0.01,L0), # false positive rates
#'                      alpha1 = 1, # half of the people have the first machine.
#'                      frac = 0.2, # fraction of positive dimensions (L-2M) in Q.
#'                      #pop_frac = rep(1/K0,K0) # population prevalences.
#'                      #pop_frac = (1:K0)/sum(1:K0) # population prevalences.
#'                      pop_frac = c(rep(2,4),rep(1,4)) # population prevalences.
#')
#'
#'  simu     <- simulate_data(options_sim0, SETSEED=TRUE)
#'  simu_dat <- simu$datmat
#'  Y <- simu_dat
#'  Q <- simu$Q
#'  p <- c(0.5,0.25,0.1,0.02,0.05)
#'  theta <- options_sim0$theta
#'  psi   <- options_sim0$psi
#'  H_enumerate <- as.matrix(expand.grid(rep(list(0:1), options_sim0$M)),ncol=options_sim0$M)
#'
#' #log_marginal0(Y, Q, p, theta, psi)
#' log_marginal(Y,H_enumerate, Q, p, theta, psi) # <-- this is the Rcpp implementation.
#'
#' @return log of marginal likelihood given other model parameters.
#' @export
log_marginal <- function(Y, eta_star_enumerate, Q, p, theta, psi) {
    .Call('_rewind_log_marginal', PACKAGE = 'rewind', Y, eta_star_enumerate, Q, p, theta, psi)
}

#' R Function to compute the cluster-specific marginal likelihood (just for Q=I)
#'
#' This R function computes the marginal likelihood by integrating over
#' the distribution of component specific parameter (e.g., machine usage profiles).
#' This function conditions upon a few model parameters: the true and false positive
#' rates (theta and psi), the Q matrix and {p}-the prevalence parameter for each machines.
#'
#' @param Y the data for the current cluster (a subset of observations.)
#' @param p prevalence parameter for each machine; should be a vector of dimension
#' M=L.
#' @param theta true positive rates
#' @param psi true positive rates
#'
#' @examples
#' # simulate data:
#' L0 <- 100
#' options_sim0  <- list(N = 200,  # sample size.
#'                       M = 3,   # true number of machines.
#'                       L = L0,   # number of antibody landmarks.
#'                       K = 8,    # number of true components.,
#'                      theta = rep(0.8,L0), # true positive rates
#'                      psi   = rep(0.01,L0), # false positive rates
#'                      alpha1 = 1, # half of the people have the first machine.
#'                      frac = 0.2, # fraction of positive dimensions (L-2M) in Q.
#'                      #pop_frac = rep(1/K0,K0) # population prevalences.
#'                      #pop_frac = (1:K0)/sum(1:K0) # population prevalences.
#'                      pop_frac = c(rep(2,4),rep(1,4)) # population prevalences.
#')
#'
#'  simu     <- simulate_data(options_sim0, SETSEED=TRUE)
#'  simu_dat <- simu$datmat
#'  Y <- simu_dat
#'  Q <- simu$Q
#'  p <- rep(0.5,L0) #<----- M must equal L.
#'  theta <- options_sim0$theta
#'  psi   <- options_sim0$psi
#'
#' log_marginal_Q_identity(Y, p, theta, psi) # <-- this is the Rcpp implementation.
#'
#' @return log of marginal likelihood given other model parameters.
#' @export
log_marginal_Q_identity <- function(Y, p, theta, psi) {
    .Call('_rewind_log_marginal_Q_identity', PACKAGE = 'rewind', Y, p, theta, psi)
}

#' check whether a vector is equal to a unit vector with the one at a particular
#' position
#'
#' This function is used in updating Q matrix if we constrain the updates within
#' the identifiability assumption
#'
#' @param v the vector (a binary vector)
#' @param k the index that is being checked if \code{v[k]} is the only one in
#' vector \code{v}. \code{k} must be smaller than or equal to the length of k
#' @return TRUE if \eqn{v = \mathbf{e}_k}; FALSE otherwise.
#' @examples
#'  equal_unit_vec(c(1,0,0,0,0,0),1)
#'  equal_unit_vec(c(1,0,0,0,0,0),2)
#'  equal_unit_vec(c(0,0,2,0,0,0),3)
#'  equal_unit_vec(c(0,0,1,0,0,0),3)
#' @export
equal_unit_vec <- function(v, k) {
    .Call('_rewind_equal_unit_vec', PACKAGE = 'rewind', v, k)
}

compute_Q_condpr <- function(Q, H, Yl, k, l, theta, psi) {
    .Call('_rewind_compute_Q_condpr', PACKAGE = 'rewind', Q, H, Yl, k, l, theta, psi)
}

#' Compute the posterior co-clustering probability matrix (probability that i and j 
#' are clustered together).
#'
#' This function is to evaluate the recovered clusters
#' 
#' @param z a matrix of posterior samples, with subjects and MCMC samples in 
#' the rows and columns, respectively.
#'
#' @return a matrix of empirical co-clustering frequencies based on the
#' posterior samples
#' 
#' @examples 
#' z2comat(matrix(c(1,1,2,2,3,4,5,6,5,7),ncol=1))
#' 
#' @export
z2comat <- function(z) {
    .Call('_rewind_z2comat', PACKAGE = 'rewind', z)
}
zhenkewu/rewind documentation built on Sept. 9, 2020, 3:40 p.m.