R/em_estimation.R

Defines functions em_estimation

Documented in em_estimation

#' Penalized expectation-maximization algorithm.
#'
#' @param p List of parameters with starting values obtained from preprocess.
#' @param item_data Matrix or data frame of item responses.
#' @param pred_data Matrix or data frame of DIF and/or impact predictors.
#' @param prox_data Vector of observed proxy scores.
#' @param mean_predictors Possibly different matrix of predictors for the mean
#' impact equation.
#' @param var_predictors Possibly different matrix of predictors for the
#' variance impact equation.
#' @param item_type Character value or vector indicating the type of
#' item to be modeled.
#' @param theta Vector of fixed quadrature points.
#' @param pen_type Character value indicating the penalty function to use.
#' @param tau_vec Vector of tau values that either are automatically generated
#' or provided by the user. The first \code{tau_vec} will be equal to \code{Inf}
#' to identify a minimal value of tau in which all DIF is removed from the
#' model.
#' @param id_tau Logical indicating whether to identify the minimum value of tau
#' in which all DIF parameters are removed from the model.
#' @param num_tau Numeric value indicating the number of tau values to run
#' regDIF on.
#' @param alpha Numeric value indicating the alpha parameter in the elastic net
#' penalty function.
#' @param gamma Numeric value indicating the gamma parameter in the MCP
#' function.
#' @param pen Index for the tau vector.
#' @param pen.deriv Logical value indicating whether to use the second
#' derivative of the penalized parameter during regularization. The default is
#' TRUE.
#' @param anchor Optional numeric value or vector indicating which item
#' response(s) are anchors (e.g., \code{anchor = 1}).
#' @param final_control Control parameters.
#' @param samp_size Numeric value indicating the sample size.
#' @param num_items Numeric value indicating the number of items.
#' @param num_responses Vector with number of responses for each item.
#' @param num_predictors Numeric value indicating the number of predictors.
#' @param num_quad Numeric value indicating the number of quadrature points.
#' @param adapt_quad Logical value indicating whether to use adaptive quad.
#' needs to be identified.
#' @param optim_method Character value indicating the type of optimization
#' method to use.
#' @param estimator_history List to save EM iterations for supplemental EM algorithm.
#' @param estimator_limit Logical value indicating whether the EM algorithm reached
#' the maxit limit in the previous estimation round.
#' @param NA_cases Logical vector indicating if observation is missing.
#' @param exit_code Integer indicating if the model has converged properly.
#'
#' @return a \code{"list"} of matrices with unprocessed model estimates
#'
#' @keywords internal
#'
em_estimation <- function(p,
                          item_data,
                          pred_data,
                          prox_data,
                          mean_predictors,
                          var_predictors,
                          item_type,
                          theta,
                          pen_type,
                          tau_vec,
                          id_tau,
                          num_tau,
                          alpha,
                          gamma,
                          pen,
                          pen.deriv,
                          anchor,
                          final_control,
                          samp_size,
                          num_items,
                          num_responses,
                          num_predictors,
                          num_quad,
                          adapt_quad,
                          optim_method,
                          estimator_history,
                          estimator_limit,
                          NA_cases,
                          exit_code) {

  # Maximization and print settings.
  lastp <- p
  eps <- Inf
  iter <- 1
  models_to_fit <- ifelse(id_tau,num_tau,length(tau_vec))

  # Loop until convergence or maximum number of iterations.
  while(eps > final_control$tol && iter < final_control$maxit){


    # E-step: Evaluate Q function with current parameter estimates p.
    eout <- if(is.null(prox_data)) Estep(p,
                                         item_data,
                                         pred_data,
                                         item_type,
                                         mean_predictors,
                                         var_predictors,
                                         theta,
                                         samp_size,
                                         num_items,
                                         num_responses,
                                         adapt_quad,
                                         num_quad,
                                         get_eap = FALSE,
                                         NA_cases = NA_cases)



    mout <- tryCatch(
      {
        Mstep_simple(p,
                     item_data,
                     pred_data,
                     prox_data,
                     mean_predictors,
                     var_predictors,
                     eout,
                     item_type,
                     pen_type,
                     tau_vec[pen],
                     pen,
                     pen.deriv,
                     alpha,
                     gamma,
                     anchor,
                     final_control,
                     samp_size,
                     num_responses,
                     num_items,
                     num_quad,
                     num_predictors,
                     num_tau,
                     max_tau = FALSE,
                     optim_method)
      },
      error = function(e) {e; return(NULL)},
      warning = function(w) {} )



    if(is.null(mout)) {
      exit_code <- 4
      break
    }

    # Obtain parameter estimates.
    p <- mout$p

    # Update and check for convergence: Calculate the difference
    # in parameter estimates from current to previous.
    eps = sqrt(sum((unlist(p)-unlist(lastp))^2))

    # Save parameter estimates and observed log-likelihood for supplemental em.
    eout_obs_ll <- ifelse(is.null(eout), NA, eout$observed_ll)

    if(!is.null(eout)) {
      estimator_history[[pen]][,iter] <- c(unlist(p), eout_obs_ll)
    } else {
      estimator_history[[pen]][,iter] <- NA
    }


    # Add row for next EM step.
    if(eps > final_control$tol) {
      estimator_history[[pen]] <- cbind(estimator_history[[pen]],
                                        matrix(0,ncol=1,nrow=length(unlist(p))+1))
    }

    # Update parameter list.
    lastp <- p


    # Update the iteration number.
    iter = iter + 1
    if(iter == final_control$maxit) {
      warning("Iteration limit reached without convergence", call. = FALSE, immediate. = TRUE)
      estimator_limit <- T
      exit_code <- exit_code + 1
    }

    # if(final_control$optim.method == "CD") {
    #   cat('\r', '                                         ',
    #       sprintf("Models Completed: %d of %d  EM Iteration: %d  EM Change: %f",
    #               pen - 1,
    #               models_to_fit,
    #               iter,
    #               round(eps, nchar(final_control$tol))))
    # } else {
      cat('\r', sprintf("Models Completed: %d of %d  Iteration: %d  Change: %f",
                        pen - 1,
                        models_to_fit,
                        iter,
                        round(eps, nchar(final_control$tol))))
    # }



    utils::flush.console()

    # Stop estimation if model would become under-identified because of tau
    # being too small.
    if(mout$under_identified) break
    # if(!is.null(prox_data)) break



  }

  if(exit_code == 4) return(NULL)

  # eout <- if(!is.null(prox_data)) Estep_proxy(p,
  #                                            item_data,
  #                                            pred_data,
  #                                            item_type,
  #                                            mean_predictors,
  #                                            var_predictors,
  #                                            prox_data,
  #                                            samp_size,
  #                                            num_items,
  #                                            num_responses,
  #                                            get_eap = FALSE,
  #                                            NA_cases = NA_cases)

  # Get information criteria.
  infocrit <- information_criteria(eout,
                                   p,
                                   item_data,
                                   pred_data,
                                   prox_data,
                                   mean_predictors,
                                   var_predictors,
                                   item_type,
                                   gamma,
                                   samp_size,
                                   num_responses,
                                   num_items,
                                   num_quad)

  # Get EAP score.
  eout_eap <- if(is.null(prox_data)) {
    Estep(p,
          item_data,
          pred_data,
          item_type,
          mean_predictors,
          var_predictors,
          theta,
          samp_size,
          num_items,
          num_responses,
          adapt_quad,
          num_quad,
          get_eap = TRUE,
          NA_cases = NA_cases)
  } else {
    NULL
  }

  # Option to identify maximum value of tau which removes all DIF from model.
  if(id_tau) {

    max_tau <- Mstep_simple(p,
                     item_data,
                     pred_data,
                     prox_data,
                     mean_predictors,
                     var_predictors,
                     eout,
                     item_type,
                     pen_type,
                     tau_vec[1],
                     pen,
                     pen.deriv,
                     alpha,
                     gamma,
                     anchor,
                     final_control,
                     samp_size,
                     num_responses,
                     num_items,
                     num_quad,
                     num_predictors,
                     num_tau,
                     max_tau = TRUE,
                     optim_method)


  } else {
    max_tau <- NULL
  }

  # Return model results.
  return(list(p=p,
              # complete_info=mout$inv_hess_diag,
              infocrit=infocrit,
              max_tau=max_tau,
              # max_tau=mout$id_max_z,
              estimator_history=estimator_history,
              under_identified=mout$under_identified,
              estimator_limit=estimator_limit,
              eap=eout_eap,
              exit_code=exit_code))

}

Try the regDIF package in your browser

Any scripts or data that you put into this service are public.

regDIF documentation built on May 29, 2024, 9:31 a.m.