R/itemrest_core.R
In ItemRest: Automated Item Removal Strategies for Exploratory Factor Analysis

Documented in cor_matrix_custom descriptive_stats determine_n_factors efa_custom get_combinations howard identify_problem_items itemrest print.itemrest_result sort_item_ids test_removals

# ===================================================================
# NOTE ON PACKAGE DEPENDENCIES
# ===================================================================
# Required packages (e.g., psych, gtools, utils, stats) should be
# listed under the 'Imports:' field in the package's DESCRIPTION file.
# ===================================================================


# ===================================================================
# HELPER (INTERNAL) FUNCTIONS
# ===================================================================

#' Check for cross-loading using Howard and Costello & Osborne criteria.
#' @keywords internal
howard <- function(primary, secondary) {
  # An item is considered "clean" if:
  # Primary loading >= 0.40, secondary loading < 0.30, and difference >= 0.20.
  # Otherwise, it is identified as a cross-loading item.
  return(!(primary >= 0.40 && secondary < 0.30 && (primary - secondary) >= 0.20))
}

#' Calculate basic descriptive statistics for a dataset.
#' @keywords internal
descriptive_stats <- function(data) {
  list(
    n_items = ncol(data),
    n_obs = nrow(data),
    min_value = min(data, na.rm = TRUE),
    max_value = max(data, na.rm = TRUE)
  )
}

#' Calculate a correlation matrix.
#' @keywords internal
cor_matrix_custom <- function(data, method = "polychoric") {
  if (method == "polychoric") {
    cor_mat <- suppressMessages(qgraph::cor_auto(data))
  } else {
    cor_mat <- stats::cor(data, use = "pairwise.complete.obs", method = method)
  }
  return(cor_mat)
}

#' Determine the number of factors using Parallel Analysis.
#' @keywords internal
determine_n_factors <- function(data, cor_method = "pearson") {
  cor_mat <- cor_matrix_custom(data, cor_method)
  n_obs <- nrow(data)
  fa_parallel <- suppressMessages(psych::fa.parallel(
    cor_mat, fa = "fa", n.iter = 20, n.obs = n_obs,
    show.legend = FALSE, main = NULL
  ))
  return(fa_parallel$nfact)
}

#' Run a custom EFA.
#' @keywords internal
efa_custom <- function(data, n_factors = 1, cor_method = "polychoric", extract = "uls", rotate = "oblimin") {
  cor_mat <- cor_matrix_custom(data, cor_method)
  efa <- NULL
  invisible(utils::capture.output({
    efa <- psych::fa(r = cor_mat, nfactors = n_factors, rotate = rotate, fm = extract)
  }))
  alpha_val <- psych::alpha(data, check.keys = FALSE)$total$raw_alpha
  loading <- efa$loadings[]
  explained_var <- sum(efa$Vaccounted["SS loadings", 1:n_factors]) / ncol(data)
  list(efa = efa, alpha = alpha_val, explained_var = explained_var, loadings = loading)
}

#' Sort item IDs numerically.
#' @keywords internal
sort_item_ids <- function(x) {
  gtools::mixedsort(x)
}

#' Identify problematic items (low-loading or cross-loading).
#' @keywords internal
identify_problem_items <- function(efa_res, min_loading = 0.30, loading_diff = 0.10) {
  loadings <- abs(efa_res$loadings)
  items <- rownames(loadings)
  cross_3 <- c()
  cross_2 <- c()
  low_loading <- c()

  if (ncol(loadings) == 0) return(list(cross_3 = c(), cross_2 = c(), low_loading = c()))

  for (i in 1:nrow(loadings)) {
    item_loads <- loadings[i, ]
    primary <- max(item_loads, na.rm = TRUE)

    # 1. LOW LOADING CHECK:
    # If it is smaller than the user-defined `min_loading` value, mark it as `low_loading`.
    if (all(item_loads < min_loading, na.rm = TRUE)) {
      low_loading <- c(low_loading, items[i])
      next
    }

    if (ncol(loadings) > 1) {
      sorted_loads <- sort(item_loads, decreasing = TRUE)
      secondary <- sorted_loads[2]

      # 2. CROSS-LOADING CHECK:
      # If the user selected `howard`, use the classic criterion (0.40-0.30-0.20).
      # Otherwise, check whether the difference between the two loadings is smaller than `loading_diff`.
      if (loading_diff == "howard") {
        is_cross <- howard(primary, secondary)
      } else {
        is_cross <- (primary - secondary) < as.numeric(loading_diff)
      }

      if (is_cross) {
        # 0.30 threshold is retained for categorization.
        if (sum(item_loads >= 0.30, na.rm = TRUE) >= 3) {
          cross_3 <- c(cross_3, items[i])
        } else if (sum(item_loads >= 0.30, na.rm = TRUE) == 2) {
          cross_2 <- c(cross_2, items[i])
        }
      }
    }
  }
  list(cross_3 = unique(cross_3), cross_2 = unique(cross_2), low_loading = unique(low_loading))
}


#' Generate combinations of items to remove.
#' @keywords internal
get_combinations <- function(items) {
  n <- length(items)
  if (n == 0) return(list(list()))
  all_combs <- unlist(lapply(1:n, function(i) utils::combn(items, i, simplify = FALSE)), recursive = FALSE)
  return(c(list(list()), all_combs)) # Add empty set for the baseline (no removal) case
}

#' Test removal strategies and build a summary table.
#' @keywords internal
test_removals <- function(data, base_items, combs, n_factors, cor_method, extract, rotate, min_loading, loading_diff) {
  summary_list <- list()
  total <- length(combs)
  pb <- utils::txtProgressBar(min = 0, max = total, style = 3)

  for (i in seq_along(combs)) {
    items_to_remove <- combs[[i]]
    remaining_items <- setdiff(base_items, items_to_remove)

    if (length(remaining_items) > n_factors) {
      efa_out <- efa_custom(data[, remaining_items, drop = FALSE], n_factors, cor_method, extract, rotate)

      # Pass the new settings to the identify_problem_items function.
      prob_items_test <- identify_problem_items(efa_out, min_loading, loading_diff)

      has_cross_loading <- length(prob_items_test$cross_2) > 0 || length(prob_items_test$cross_3) > 0

      removed_label <- if (length(items_to_remove) == 0) "None" else paste(sort_item_ids(items_to_remove), collapse = "-")
      load_mat <- as.matrix(efa_out$loadings)
      threshold <- 0.30
      valid_loads <- abs(load_mat)[abs(load_mat) > threshold]
      load_min <- if (length(valid_loads) > 0) formatC(min(valid_loads), digits = 2, format = "f") else NA
      load_max <- if (length(valid_loads) > 0) formatC(max(valid_loads), digits = 2, format = "f") else NA
      loading_range <- if(is.na(load_min)) "N/A" else paste0(load_min, "-", load_max)

      summary_list[[i]] <- data.frame(
        Removed_Items = removed_label, Total_Explained_Var = efa_out$explained_var,
        Factor_Loading_Range = loading_range, Cronbachs_Alpha = efa_out$alpha,
        Cross_Loading = ifelse(has_cross_loading, "Yes", "No"), stringsAsFactors = FALSE
      )
    }
    utils::setTxtProgressBar(pb, i)
  }
  close(pb)
  summary_table <- do.call(rbind, summary_list)
  summary_table <- summary_table[!duplicated(summary_table$Removed_Items), ]

  if (nrow(summary_table) > 0) {
    sorted_indices <- order(summary_table$Cross_Loading, -summary_table$Total_Explained_Var)
    summary_table <- summary_table[sorted_indices, ]
  }

  return(summary_table)
}


# ===================================================================
# MAIN (COMPUTATION) FUNCTION
# ===================================================================

#' Evaluate Item Removal Strategies for Exploratory Factor Analysis (EFA)
#'
#' @description
#' This function automates the process of identifying low-quality items (those
#' with low factor loadings or significant cross-loadings) in an Exploratory
#' Factor Analysis (EFA). It systematically tests various combinations of
#' removing these problematic items and evaluates the impact on model fit,
#' returning a comprehensive summary of all tested strategies.
#'
#' @param data A numeric `data.frame` or `matrix` for the analysis.
#' @param cor_method The correlation method to use, e.g., `"pearson"` or `"polychoric"`.
#' @param n_factors The number of factors. If `NULL`, it's determined automatically by parallel analysis.
#' @param extract The factor extraction (estimation) method. See `psych::fa`. Default is `"uls"`.
#' @param rotate The rotation method. See `psych::fa`. Default is `"oblimin"`.
#' @param min_loading Minimum factor loading threshold. Default is 0.30.
#' @param loading_diff Threshold for cross-loading identification. Default is 0.10. Can be "howard".
#'
#' @return An object of class `itemrest_result`. This is a list containing the
#'   following components:
#'   \item{descriptive_stats}{Basic descriptive statistics of the input data.}
#'   \item{initial_efa}{The results of the initial EFA before any items are removed.}
#'   \item{problem_items}{A list of items identified as low-loading or cross-loading.}
#'   \item{removal_summary}{A data.frame summarizing the results of all tested removal strategies.}
#'   \item{optimal_strategy}{The best-performing strategy that resulted in a clean factor structure (no cross-loadings).}
#'   \item{settings}{A list of the settings used for the analysis.}
#'
#' @export
#'
#' @examples
#' \donttest{
#' # We will use the 'bfi' dataset from the 'psych' package.
#' # This requires the 'psych' package to be installed.
#' if (requireNamespace("psych", quietly = TRUE)) {
#'   data(bfi, package = "psych")
#'
#'   # 1. Prepare the data: Select the personality items (first 25 columns)
#'   #    and remove rows with missing values for this example.
#'   example_data <- bfi[, 1:25]
#'   example_data <- na.omit(example_data)
#'
#'   # 2. Run the item removal analysis.
#'   #    Based on theory, the Big Five model has 5 factors.
#'   results <- itemrest(
#'     data = example_data,
#'     n_factors = 5,
#'     cor_method = "pearson" # Data is not ordinal, so pearson is appropriate
#'   )
#'
#'   # 3. Print the report for optimal strategies (default).
#'   print(results)
#'
#'   # 4. Print the report for all tested strategies.
#'   print(results, report = "all")
#' }
#' }
itemrest <- function(data,
                     cor_method = "pearson",
                     n_factors = NULL,
                     extract = "uls",
                     rotate = "oblimin",
                     min_loading = 0.30,
                     loading_diff = 0.10) {

  # --- Parameter Validation and Setup ---
  colnames(data) <- gsub("\\.", "_", colnames(data))
  cor_method <- tolower(cor_method)
  # ... (other parameter validations would go here) ...

  n_factors_determined <- n_factors
  auto_n_factors_flag <- is.null(n_factors)
  if (auto_n_factors_flag) {
    n_factors_determined <- determine_n_factors(data, cor_method)
  }
  descriptives <- descriptive_stats(data)
  initial_efa <- efa_custom(data, n_factors_determined, cor_method, extract, rotate)

  problem_items <- identify_problem_items(initial_efa, min_loading = min_loading, loading_diff = loading_diff)
  all_problem_items <- sort_item_ids(unique(c(problem_items$cross_3, problem_items$cross_2, problem_items$low_loading)))

  message("--- Settings and Descriptive Statistics ---")
  if (auto_n_factors_flag) {
    message("Number of Factors (Auto): ", n_factors_determined, " (Determined by Parallel Analysis)")
  } else {
    message("Number of Factors (Manual): ", n_factors_determined)
  }
  message("Number of Items: ", descriptives$n_items)
  message("Number of Observations: ", descriptives$n_obs)
  message("Minimum Value: ", descriptives$min_value)
  message("Maximum Value: ", descriptives$max_value)
  message("\n--- Initial EFA Results (No items removed) ---")
  message("Cronbach's Alpha: ", formatC(initial_efa$alpha, digits = 3, format = "f"))
  message("Total Explained Variance: % ", formatC(initial_efa$explained_var * 100, digits = 2, format = "f"))
  message("Low-loading Items: ", ifelse(length(problem_items$low_loading) == 0, "None", paste(problem_items$low_loading, collapse = ", ")))
  message("Minimum Loading Threshold: ", formatC(min_loading, digits = 2, format = "f"))
  message("Loading Difference Criterion: ",
          ifelse(loading_diff == "howard",
                 "Howard (2016)",
                 formatC(as.numeric(loading_diff), digits = 2, format = "f")))
  cross_items_combined <- c(problem_items$cross_2, problem_items$cross_3)
  message("Cross-loading Items: ", ifelse(length(cross_items_combined) == 0, "None", paste(unique(sort_item_ids(cross_items_combined)), collapse = ", ")))

  message("\nAll Identified Low-Quality Items: ", ifelse(length(all_problem_items) == 0, "None", paste(all_problem_items, collapse = ", ")))

  # --- Test Removal Strategies ---
  removal_summary <- NULL
  optimal_strategy <- NULL

  if (length(all_problem_items) > 0) {
    all_combs <- get_combinations(all_problem_items)

    # --- Display the progress bar ---
    message("\n[Info] Testing ", length(all_combs) - 1, " different removal combinations for low-quality items...")
    removal_summary <- test_removals(data, colnames(data), all_combs, n_factors_determined,
                                     cor_method, extract, rotate,
                                     min_loading = min_loading, loading_diff = loading_diff)

    optimal_candidates <- removal_summary[ removal_summary[["Cross_Loading"]] == "No",
                                           , drop = FALSE ]

    if (nrow(optimal_candidates) > 0) {
      optimal_strategy <- optimal_candidates[1, , drop = FALSE]
    } else {
      optimal_strategy <- NULL
    }
  }

  # --- Create the output object ---
  output <- list(
    descriptive_stats = descriptives,
    initial_efa = initial_efa,
    problem_items = problem_items,
    all_problem_items_combined = all_problem_items,
    removal_summary = removal_summary,
    optimal_strategy = if (exists("optimal_strategy")) optimal_strategy else NULL,
    settings = list(
      n_factors = n_factors_determined, cor_method = cor_method,
      extract = extract, rotate = rotate,
      auto_n_factors = auto_n_factors_flag
    )
  )

  # Assign the custom class
  class(output) <- "itemrest_result"

  # Return the object
  return(output)
}


# ===================================================================
# PRESENTATION (PRINT) METHOD
# ===================================================================

#' Print method for `itemrest_result` class
#'
#' @param x An object of class `itemrest_result`.
#' @param report The type of report to generate: `"optimal"` (default) or `"all"`.
#' @param ... Other arguments (not used).
#'
#' @return No return value, called for side effects (prints the report to the console).
#' @export
#'
#' @examples
#' \donttest{
#' if (requireNamespace("psych", quietly = TRUE)) {
#'   data(bfi, package = "psych")
#'   example_data <- na.omit(bfi[, 1:25])
#'   results <- itemrest(example_data, n_factors = 5)
#'
#'   # Print the default optimal report
#'   print(results)
#'
#'   # Print the report of all strategies
#'   print(results, report = "all")
#' }
#' }
print.itemrest_result <- function(x, report = "optimal", ...) {

  # Initial info is printed by itemrest() during computation.
  # This function only prints the final result tables and the final message.

  # Header
  cat("\n==============================\n")
  cat(" Item Removal Strategy Report\n")
  cat("==============================\n")

  if (is.null(x$removal_summary)) {
    cat("\n>> Result: No low-quality items were detected. No additional report table available.\n")
  } else {
    report_table <- NULL
    header <- ""

    # Choose which table to display based on the 'report' argument
    if (report == "all") {
      header <- "\n--- Results of All Removal Strategies ---\n"
      report_table <- x$removal_summary
    } else { # default to optimal
      header <- "\n--- Optimal Removal Strategies (No Cross-Loadings) ---\n"

      # For the optimal report, we only show strategies with No cross-loading
      report_table <- x$removal_summary[x$removal_summary$Cross_Loading == "No", ]
    }

    cat(header)
    if (is.null(report_table) || nrow(report_table) == 0) {
      cat("No strategy matching this criterion was found.\n")
    } else {

      # Make the table more readable before printing
      display_table <- report_table
      display_table$Total_Explained_Var <- paste0("% ", formatC(display_table$Total_Explained_Var * 100, format = "f", digits = 2))
      display_table$Cronbachs_Alpha <- round(display_table$Cronbachs_Alpha, 3)
      row.names(display_table) <- NULL
      print(display_table)
    }
  }

  # Final Message
  cat("\n-----------------------------------------------------\n")
  message("Final Reminder: Let algorithms be your compass, not your captain. Valid item removal also requires theoretical competence.")

  invisible(x)
}

Any scripts or data that you put into this service are public.

ItemRest documentation built on April 13, 2026, 5:07 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

ItemRest
Automated Item Removal Strategies for Exploratory Factor Analysis

R/itemrest_core.R
In ItemRest: Automated Item Removal Strategies for Exploratory Factor Analysis

Defines functions print.itemrest_result itemrest test_removals get_combinations identify_problem_items sort_item_ids efa_custom determine_n_factors cor_matrix_custom descriptive_stats howard

Documented in cor_matrix_custom descriptive_stats determine_n_factors efa_custom get_combinations howard identify_problem_items itemrest print.itemrest_result sort_item_ids test_removals

Try the ItemRest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ItemRest Automated Item Removal Strategies for Exploratory Factor Analysis

R/itemrest_core.R In ItemRest: Automated Item Removal Strategies for Exploratory Factor Analysis

Defines functions print.itemrest_result itemrest test_removals get_combinations identify_problem_items sort_item_ids efa_custom determine_n_factors cor_matrix_custom descriptive_stats howard

Documented in cor_matrix_custom descriptive_stats determine_n_factors efa_custom get_combinations howard identify_problem_items itemrest print.itemrest_result sort_item_ids test_removals

Try the ItemRest package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

ItemRest
Automated Item Removal Strategies for Exploratory Factor Analysis

R/itemrest_core.R
In ItemRest: Automated Item Removal Strategies for Exploratory Factor Analysis