R/mark_sig.R

Defines functions mark_sig

Documented in mark_sig

#'@title Mark Parameter Estimates (Edge Labels) Based on p-Value
#'
#'@description Mark parameter estimates (edge labels) based on
#'p-value.
#'
#'@details Modify a [qgraph::qgraph] object generated by semPaths and
#' add marks (currently asterisk, "*") to the labels based on their
#' p-values. Require either the original object used in the semPaths call,
#' or a data frame with the p-values for each parameter. The latter
#' option is for p-values not computed by lavaan but by
#' other functions.
#'
#' Currently supports only plots based on lavaan output.
#'
#' ## R-squares
#'
#' Normally, `lavaan` does not compute
#' *p*-values for R-squares. If
#' R-squares are detected in the plot
#' (based on `r2_prefix`), they will not
#' be marked, except for the following
#' conditions:
#'
#' - Users set `ests` to a data frame
#' with R-squares (`op` = "r2") as well
#' as their *p*-values (under `pvalue`)
#' computed by some methods.
#'
#' - Users set `ests_r2` to a data frame
#' storing only the *p*-values for
#' R-squares computed by some methods
#' (and let the function find the
#' *p*-values for other parameters from
#' `object`).
#'
#'@return A [qgraph::qgraph] based on the original one, with marks
#' appended to edge labels based on their p-values.
#'
#'@param semPaths_plot A [qgraph::qgraph] object generated by
#' semPaths, or a similar qgraph object modified by other [semptools]
#' functions.
#'
#'@param object The object used by semPaths to generate the plot. Use
#' the same argument name used in semPaths to make the meaning of this
#' argument obvious. Currently only object of class
#' `lavaan` is supported.
#'
#'@param alphas A named numeric vector. Each element is the cutoff
#' (level of significance), and the name of it is the symbol to be
#' used if p-value is less than this cutoff. The default is c("*" =
#' .05, "**" = .01, "***" = .001).
#'
#'@param ests A data.frame from the
#'   \code{\link[lavaan]{parameterEstimates}} function, or
#'   from other function with these columns: `lhs`, `op`,
#'   `rhs`, and `pvalue`. Only used when
#'   \code{object} is not specified.
#'
#'@param std_type If standardized solution is used in the plot,
#'   set this either to the type of standardization (e.g., `"std.all"`)
#'   or to `TRUE`. It will be passed to [lavaan::standardizedSolution()]
#'   to compute the *p*-values for the standardized solution.
#'   Used only if *p*-values are not supplied directly
#'   through `ests`.
#'
#' @param ests_r2 A data.frame with
#' these columns: `lhs` and `pvalue`,
#' storing the *p*-values for R-squares
#' of the variables listed on `lhs`. If
#' provided, the *p*-values in this
#' data.frame will override those in
#' `object` or `ests`, if any. Used only
#' when R-squares are present in the
#' `semPaths_plot`.
#'
#' @param r2_prefix The prefix used to
#' identify R-squares in `semPaths_plot`.
#' Default is `"R2="`.
#'
#'@examples
#'mod_pa <-
#'  'x1 ~~ x2
#'   x3 ~  x1 + x2
#'   x4 ~  x1 + x3
#'  '
#'fit_pa <- lavaan::sem(mod_pa, pa_example)
#'lavaan::parameterEstimates(fit_pa)[, c("lhs", "op", "rhs", "est", "pvalue")]
#'m <- matrix(c("x1",   NA,   NA,
#'                NA, "x3", "x4",
#'              "x2",   NA,   NA), byrow = TRUE, 3, 3)
#'p_pa <- semPlot::semPaths(fit_pa, whatLabels="est",
#'            style = "ram",
#'            nCharNodes = 0, nCharEdges = 0,
#'            layout = m)
#'p_pa2 <- mark_sig(p_pa, fit_pa)
#'plot(p_pa2)
#'
#'mod_cfa <-
#'  'f1 =~ x01 + x02 + x03
#'   f2 =~ x04 + x05 + x06 + x07
#'   f3 =~ x08 + x09 + x10
#'   f4 =~ x11 + x12 + x13 + x14
#'  '
#'fit_cfa <- lavaan::sem(mod_cfa, cfa_example)
#'lavaan::parameterEstimates(fit_cfa)[, c("lhs", "op", "rhs", "est", "pvalue")]
#'p_cfa <- semPlot::semPaths(fit_cfa, whatLabels="est",
#'            style = "ram",
#'            nCharNodes = 0, nCharEdges = 0)
#'p_cfa2 <- mark_sig(p_cfa, fit_cfa)
#'plot(p_cfa2)
#'
#'mod_sem <-
#'  'f1 =~ x01 + x02 + x03
#'   f2 =~ x04 + x05 + x06 + x07
#'   f3 =~ x08 + x09 + x10
#'   f4 =~ x11 + x12 + x13 + x14
#'   f3 ~  f1 + f2
#'   f4 ~  f1 + f3
#'  '
#'fit_sem <- lavaan::sem(mod_sem, sem_example)
#'lavaan::parameterEstimates(fit_sem)[, c("lhs", "op", "rhs", "est", "pvalue")]
#'p_sem <- semPlot::semPaths(fit_sem, whatLabels="est",
#'            style = "ram",
#'            nCharNodes = 0, nCharEdges = 0)
#'p_sem2 <- mark_sig(p_sem, fit_sem)
#'plot(p_sem2)
#'
#' @importFrom rlang .data
#' @export

mark_sig <- function(semPaths_plot, object,
                     alphas = c("*" = .05, "**" = .01, "***" = .001),
                     ests = NULL,
                     std_type = FALSE,
                     ests_r2 = NULL,
                     r2_prefix = "R2=") {
  if ("triangle" %in% semPaths_plot$graphAttributes$Nodes$shape) {
    rlang::inform(paste("The semPaths plot seems to have one or",
                        "more intercepts. Support for models with",
                        "are only experimental. If failed,",
                        "consider setting",
                        "'intercepts = FALSE' in semPaths."))
  }

  alphas_sorted <- sort(alphas, decreasing = FALSE)
  if (is.null(ests)) {
    if (isFALSE(std_type)) {
      ests <- lavaan::parameterEstimates(object, se = TRUE, ci = FALSE,
                                         zstat = TRUE, pvalue = TRUE)
    } else {
      if (isTRUE(std_type)) std_type <- "std.all"
      ests <- lavaan::standardizedSolution(object, type = std_type,
                                           se = TRUE, ci = FALSE,
                                           zstat = TRUE, pvalue = TRUE)
    }
  }

  # ==== Extract r2, if exists ====

  has_rsq <- isTRUE("r2" %in% ests$op) ||
              !is.null(ests_r2)
  has_rsq_pvalue <- FALSE
  if (has_rsq) {
    if (is.null(ests_r2)) {
      i <- ests$op == "r2"
      ests_r2 <- ests[i, , drop = FALSE]
      rownames(ests_r2) <- ests_r2$lhs
      ests <- ests[!i, ]
      has_rsq_pvalue <- all(!is.na(ests_r2$pvalue))
    } else {
      # User ests_r2 overrides ests
      i <- ests$op == "r2"
      if (any(i)) {
        ests <- ests[!i, ]
      }
      rownames(ests_r2) <- ests_r2$lhs
      has_rsq_pvalue <- all(!is.na(ests_r2$pvalue))
    }
  } else {
    ests_r2 <- NULL
    has_rsq_pvalue <- FALSE
  }

  if (inherits(semPaths_plot, "list")) {
    if (length(semPaths_plot) != length(unique(ests$group))) {
      rlang::abort(paste("length of qgraph list does not match",
                         "number of groups in model fit object."))
    }
    ests_list <- split(ests, ests$group)
    if (has_rsq_pvalue) {
      ests_r2_list <- split(ests_r2, ests$group)
    } else {
      ests_r2_list <- lapply(ests$group,
                             \(x) NULL)
    }
    mapply(mark_sig,
           semPaths_plot,
           ests = ests_list,
           ests_r2 = ests_r2_list,
           MoreArgs = list(alphas = alphas,
                           std_type = std_type,
                           r2_prefix = r2_prefix),
           SIMPLIFY = FALSE)
  } else {
    if (!missing(object) && lavaan::lavInspect(object, "ngroups") > 1) {
      rlang::abort(paste("length of qgraph list does not match",
                         "number of groups in model fit object."))
    }
    Nodes_names <- semPaths_plot$graphAttributes$Nodes$names
    if (!is.null(names(Nodes_names))) {
      Nodes_names <- names(Nodes_names)
    }
    ests$rhs <- ifelse(ests$op == "~1", yes = "1", no = ests$rhs)
    if (!all(Nodes_names %in% union(ests$lhs, ests$rhs))) {
      abort_nomatch(Nodes_names, union(ests$lhs, ests$rhs))
    }
    Edgelist <- data.frame(
      from_names = Nodes_names[semPaths_plot$Edgelist$from],
      to_names   = Nodes_names[semPaths_plot$Edgelist$to],
      semPaths_plot$Edgelist, stringsAsFactors = FALSE)
    graphAttributes_Edges <- data.frame(
      from_names = Nodes_names[semPaths_plot$Edgelist$from],
      to_names   = Nodes_names[semPaths_plot$Edgelist$to],
      semPaths_plot$graphAttributes$Edges, stringsAsFactors = FALSE)
    graphAttributes_Edges$id <- as.numeric(rownames(graphAttributes_Edges))
    edge_labels <- graphAttributes_Edges[, c("id",
                                             "from_names",
                                             "to_names",
                                             "labels")]

    # Remove thresholds. Not used
    to_keep <- ests$op != "|"
    # Remove ~*~. Not used.
    to_keep <- to_keep & (ests$op != "~*~")

    ests_pvalues <- ests[to_keep, c("lhs",
                             "op",
                             "rhs",
                             "pvalue")]
    colnames(ests_pvalues) <- gsub("\\<lhs\\>",
                                   "from_names",
                                   colnames(ests_pvalues))
    colnames(ests_pvalues) <- gsub("\\<rhs\\>",
                                   "to_names",
                                   colnames(ests_pvalues))
    ests_pvalues_rev <- ests[to_keep, c("lhs",
                                 "rhs",
                                 "pvalue")]
    colnames(ests_pvalues_rev) <- gsub("\\<pvalue\\>",
                                   "pvalue_rev",
                                   colnames(ests_pvalues_rev))
    colnames(ests_pvalues_rev) <- gsub("\\<rhs\\>",
                                   "from_names",
                                   colnames(ests_pvalues_rev))
    colnames(ests_pvalues_rev) <- gsub("\\<lhs\\>",
                                   "to_names",
                                   colnames(ests_pvalues_rev))
    edge_pvalues <- merge(x = edge_labels,
                          y = ests_pvalues,
                          by = c("from_names",
                                 "to_names"),
                          all.x = TRUE,
                          all.y = FALSE,
                          sort = FALSE)
    edge_pvalues <- merge(x = edge_pvalues,
                          y = ests_pvalues_rev,
                          by = c("from_names",
                                 "to_names"),
                          all.x = TRUE,
                          all.y = FALSE,
                          sort = FALSE)
    all_na <- apply(edge_pvalues[, c("pvalue", "pvalue_rev")],
                    MARGIN = 1,
                    FUN = function(x) all(is.na(x)))
    edge_pvalues$pvalue <- suppressWarnings(
                              apply(edge_pvalues[, c("pvalue", "pvalue_rev")],
                                    MARGIN = 1,
                                    FUN = max,
                                    na.rm = TRUE))
    edge_pvalues$pvalue[all_na] <- NA
    edge_pvalues <- edge_pvalues[order(edge_pvalues$id), ]

    i <- grepl(r2_prefix, edge_pvalues$labels)
    plot_has_rsq <- any(i)
    if (plot_has_rsq) {

      # ==== Import Rsq p-values, if available ====

      if (has_rsq_pvalue) {
        tmp <- edge_pvalues$from_names[i]
        tmp2 <- ests_r2[tmp, "pvalue"]
        edge_pvalues[i, "pvalue"] <- tmp2
      } else {
        edge_pvalues[i, "pvalue"] <- NA
      }
    }

    sig_symbols <- sapply(edge_pvalues$pvalue, function(x) {
                      ind <- which(x < alphas_sorted)[1]
                      ifelse(is.na(ind), "", names(ind[1]))
                    })
    labels_old <- semPaths_plot$graphAttributes$Edges$labels
    labels_new <- paste0(labels_old, sig_symbols)

    # # Identify probable R-squares and do not mark them, for now
    # tmp <- is.na(suppressWarnings(as.numeric(labels_old)))
    # labels_new[tmp] <- labels_old[tmp]

    semPaths_plot$graphAttributes$Edges$labels <- labels_new
    semPaths_plot
  }
}

Try the semptools package in your browser

Any scripts or data that you put into this service are public.

semptools documentation built on Aug. 8, 2025, 6:22 p.m.