Nothing
#' Fitting binary regression with missing categorical covariates using new likelihood based method
#'@description This function allows users to fit logistic regression models with incomplete predictors that are categorical. The model is fitted using a new likelihood-based method, which ensures reliable parameter estimation even when dealing with missing data. For more information on the underlying methodology, please refer to Pradhan, Nychka, and Bandyopadhyay (2024).
#'
#' @param formula A formula expression as for regression models, of the form \code{response ~ predictors}. The response should be a numeric binary variable with missing values, and predictors can be any variables. A predictor with categorical values with missing can be used in the model. See the documentation of formula for other details.
#' @param data Input data for fitting the model
#' @param conflev Confidence level, the default is 0.95
#' @param correctn a TRUE or FALSE value, by default it is TRUE.
#' @param verbose a TRUE or FALSE value, default is verbose = TRUE
#'
#' @return return the logistic regression estimates
#' @export
#'
#' @examples
#' \donttest{
#' # -----------------Example 1: Metastatic Melanoma --------------------------
#'
#' est1 <- logRegMAR (failcens ~ size+type+nodal+age+sex+trt,
#' data = metastmelanoma, conflev = 0.95, correctn = FALSE)
#'
#' est1
#' # -----------------Bias reduced estimates due to Firth (1993) --------------
#' est2 <- logRegMAR (failcens ~ size+type+nodal+age+sex+trt,
#' data = metastmelanoma, conflev = 0.95, correctn = TRUE)
#'
#' est2
#' # -----------------Bias reduced estimates due to Firth (1993) --------------
#' est2 <- logRegMAR (CaseCntrl ~ Numnill+Numsleep+Smoke+Set+Reftime,
#' data=meningitis, conflev = 0.95, correctn = TRUE)
#' est2
#' }
#' @references
#' Firth, D. (1993). Bias reduction of maximum likelihood estimates, Biometrika, 80, 27-38. doi:10.2307/2336755.
#'
#' Kosmidis, I., Firth, D. (2021). Jeffreys-prior penalty, finiteness and shrinkage in binomial-response generalized linear models. Biometrika, 108, 71-82. doi:10.1093/biomet/asaa052.
#'
#' Pradhan, V., Nychka, D. and Bandyopadhyay, S. (2025). Bridging Gaps in Logistic Regression: Tackling Missing Categorical Covariates with a New Likelihood Method (to be submitted).
#'
#' Pradhan, V., Nychka, D. and Bandyopadhyay, S. (2025). glmFitMiss: Binary Regression with Missing Data in R (to be submitted)
#'
#' @importFrom stats optim
logRegMAR <- function(formula, data, conflev=0.95, correctn=TRUE, verbose = TRUE){
# Extract the response variable and covariates from the formula
response_var <- all.vars(formula)[1]
covariates <- all.vars(formula)[-1]
# Check for missing values in the covariates
if (!any(sapply(data[covariates], function(x) any(is.na(x))))) {
if (verbose) message("This function is not applicable as there are no missing values in the covariates.\n")
return(NULL)
}
# Check for missing values in the response variable
if (any(is.na(data[[response_var]]))) {
if (verbose) message("This function is not applicable as there are a missing values in the response variable.\n")
return(NULL)
}
# Extract variable names from the formula
term_labels <- attr(terms(formula), "term.labels")
# Add the intercept name
param_names <- c("(Intercept)", term_labels)
zalpha <- abs(qnorm((1-conflev)/2))
original_df <- data
augData <- dataAugmentationDWN(subset(original_df, select = all.vars(formula)))
k <- augData$distptrn
# ----initialize the beta and theta parameters-------------
beta = matrix(rep(1e-4), length(all.vars(formula)), 1)
initial_theta <- matrix(1/k, k, 1)
initial_par <- c(as.vector(beta), initial_theta)
# Optimization call with additional arguments
optim_result <- optim(par = initial_par, fn = llkmiss, data = original_df,
formula = formula, augData = augData, method = "BFGS",
biasCorr=correctn, hessian = TRUE)
# Now, calculate the standard errors
hessian_matrix <- optim_result$hessian
hessian_submatrix_beta <- hessian_matrix[1:length(all.vars(formula)), 1:length(all.vars(formula))]
if(is.null(hessian_submatrix_beta)){
message("Hessian was not returned. Ensure that 'hessian=TRUE' and the method supports it")
} else {
hessian_inv <- solve(hessian_submatrix_beta) # Inverse of the Hessian
std_errors <- sqrt(diag(hessian_inv)) # Standard errors are sqrt of diagonal elements
}
# Extract beta estimates
beta <- optim_result$par[1:length(beta)]
# Set the names of the estimates
names(beta) <- param_names
se_beta_em <- std_errors
# Compute the z-values from the betas and standard errors
z_values <- beta / se_beta_em
# Calculate the p-values using the standard normal distribution
p_values <- 2 * (1 - pnorm(abs(z_values)))
LowerCI <- beta -zalpha*se_beta_em
UpperCI <- beta +zalpha*se_beta_em
lkest <- data.frame(beta, se_beta_em, LowerCI, UpperCI, p_values)
# rownames(lkest) <- c("Intercept", "x1", "x2", "x3")
colnames(lkest) <- c("Estimate", "StdError", "LowerCI", "UpperCI","Pr(>|z|)")
return (lkest)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.