#' Observed-data log-likelihood
#'
#' This function returns the value of the observed-data log-likelihood (equation (2) in Lotspeich et al. (2021))
#' for a given dataset and parameter values `theta`, `gamma`, and `p`.
#
#'
#' @param N Phase I sample size
#' @param n Phase II sample size
#' @param Y_unval Column names with the unvalidated outcome. If \code{Y_unval} is null, the outcome is assumed to be error-free.
#' @param Y_val Column names with the validated outcome.
#' @param X_unval Column name(s) with the unvalidated predictors. If \code{X_unval} and \code{X_val} are \code{null}, all precictors are assumed to be error-free.
#' @param X_val Column name(s) with the validated predictors. If \code{X_unval} and \code{X_val} are \code{null}, all precictors are assumed to be error-free.
#' @param C (Optional) Column name(s) with additional error-free covariates.
#' @param Validated Column name with the validation indicator. The validation indicator can be defined as \code{Validated = 1} or \code{TRUE} if the subject was validated and \code{Validated = 0} or \code{FALSE} otherwise.
#' @param Bspline Vector of column names containing the B-spline basis functions.
#' @param comp_dat_all Augmented dataset containing rows for each combination of unvalidated subjects' data with values from Phase II (a matrix)
#' @param theta_pred Vector of columns in \code{comp_dat_all} that pertain to the predictors in the analysis model.
#' @param gamma_pred Vector of columns in \code{comp_dat_all} that pertain to the predictors in the outcome error model.
#' @param theta Parameters for the analysis model (a column vector)
#' @param gamma Parameters for the outcome error model (a column vector)
#' @param p B-spline coefficients for the approximated covariate error model (a matrix)
#' @return Scalar value of the function
#' @export
observed_data_loglik <- function(N, n, Y_unval = NULL, Y_val = NULL, X_unval = NULL, X_val = NULL, C = NULL,
Bspline = NULL, comp_dat_all, theta_pred, gamma_pred, theta, gamma, p) {
# Determine error setting -----------------------------------------
## If unvalidated variable was left blank, assume error-free ------
errorsY <- errorsX <- TRUE
if (is.null(Y_unval)) {errorsY <- FALSE}
if (is.null(X_unval) & is.null(X_val)) {errorsX <- FALSE}
## ------ If unvalidated variable was left blank, assume error-free
# ----------------------------------------- Determine error setting
if (errorsX) {
#sn <- ncol(p)
m <- nrow(p)
}
# For validated subjects --------------------------------------------------------
#################################################################################
## Sum over log[P_theta(Yi|Xi)] -------------------------------------------------
pY_X <- 1 / (1 + exp(-as.numeric(cbind(int = 1, comp_dat_all[c(1:n), theta_pred]) %*% theta)))
pY_X <- ifelse(as.vector(comp_dat_all[c(1:n), c(Y_val)]) == 0, 1 - pY_X, pY_X)
return_loglik <- sum(log(pY_X))
## ------------------------------------------------- Sum over log[P_theta(Yi|Xi)]
#################################################################################
## Sum over log[P(Yi*|Xi*,Yi,Xi)] -----------------------------------------------
if (errorsY) {
pYstar <- 1 / (1 + exp(-as.numeric(cbind(int = 1, comp_dat_all[c(1:n), gamma_pred]) %*% gamma)))
pYstar <- ifelse(as.vector(comp_dat_all[c(1:n), Y_unval]) == 0, 1 - pYstar, pYstar)
return_loglik <- return_loglik + sum(log(pYstar))
}
## ----------------------------------------------- Sum over log[P(Yi*|Xi*,Yi,Xi)]
#################################################################################
if (errorsX) {
## Sum over I(Xi=xk)Bj(Xi*)log p_kj ---------------------------------------------
pX <- p[comp_dat_all[c(1:n), "k"], ]
log_pX <- log(pX)
log_pX[log_pX == -Inf] <- 0
return_loglik <- return_loglik + sum(comp_dat_all[c(1:n), Bspline] * log_pX, na.rm = TRUE)
## --------------------------------------------- Sum over I(Xi=xk)Bj(Xi*)log q_kj
}
#################################################################################
# -------------------------------------------------------- For validated subjects
# For unvalidated subjects ------------------------------------------------------
## Calculate P_theta(y|x) for all (y,xk) ----------------------------------------
pY_X <- 1 / (1 + exp(-as.numeric(cbind(int = 1, comp_dat_all[-c(1:n), theta_pred]) %*% theta)))
pY_X[which(comp_dat_all[-c(1:n), Y_val] == 0)] <- 1 - pY_X[which(comp_dat_all[-c(1:n), Y_val] == 0)]
## ---------------------------------------- Calculate P_theta(y|x) for all (y,xk)
################################################################################
if (errorsY) {
## Calculate P(Yi*|Xi*,y,xk) for all (y,xk) ------------------------------------
pYstar <- 1 / (1 + exp(-as.numeric(cbind(int = 1, comp_dat_all[-c(1:n), gamma_pred]) %*% gamma)))
pYstar[which(comp_dat_all[-c(1:n), Y_unval] == 0)] <- 1 - pYstar[which(comp_dat_all[-c(1:n), Y_unval] == 0)]
## ------------------------------------ Calculate P(Yi*|Xi*,y,xk) for all (y,xk)
} else {
pYstar <- rep(1, nrow(comp_dat_all[-c(1:n), ]))
}
################################################################################
if (errorsX) {
## Calculate Bj(Xi*) p_kj for all (k,j) ----------------------------------------
pX <- rowSums(p[comp_dat_all[-c(1:n), "k"], ] * comp_dat_all[-c(1:n), Bspline])
## ---------------------------------------- Calculate Bj(Xi*) p_kj for all (k,j)
} else {
pX <- rep(1, nrow(comp_dat_all[-c(1:n), ]))
}
################################################################################
## Calculate sum of P(y|xk) x P(Y*|X*,y,xk) x Bj(X*) x p_kj --------------------
if (errorsY & errorsX) {
person_sum <- rowsum(c(pY_X * pYstar) * pX, group = rep(seq(1, (N - n)), times = 2 * m))
} else if (errorsY) {
person_sum <- rowsum(pY_X * pYstar, group = rep(seq(1, (N - n)), times = 2))
} else if (errorsX) {
person_sum <- rowsum(pY_X * pX, group = rep(seq(1, (N - n)), times = m))
}
#person_sum <- rowSums(person_sum)
log_person_sum <- log(person_sum)
log_person_sum[log_person_sum == -Inf] <- 0
## And sum over them all -------------------------------------------------------
return_loglik <- return_loglik + sum(log_person_sum)
################################################################################
# ----------------------------------------------------- For unvalidated subjects
return(return_loglik)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.