R/bruceR-stats_5_advance.R

Defines functions granger_causality vargranger granger_test ccf_plot med_summary process_mod process_med extract_med process_lav lavaan_summary PROCESS boot_ci lav_med_modeler interaction_test interaction_Chi2_test interaction_F_test

Documented in ccf_plot granger_causality granger_test lavaan_summary med_summary PROCESS

#### PROCESS Macro (GLM and HLM) ####


interaction_F_test = function(model, data=NULL, data.name="data") {
  Run("{data.name} = data")
  df2 = df.residual(model)
  interms = attr(terms(model), "term.labels")
  interms = interms[grepl(":", interms)]
  interms.form = as.formula(paste("~", paste(interms, collapse=" + ")))
  interms.drop = as.formula(paste(". ~ . -", paste(interms, collapse=" - ")))
  if(inherits(model, "lm")) {
    dp1 = drop1(model, scope=interms.form, test="F")
    dp1 = dp1[!is.na(dp1$Df), c("Df", "F value", "Pr(>F)")]
    aov = anova(update(model, interms.drop), model)
    if(df2!=aov[2, "Res.Df"]) warning("Error!", call.=TRUE)
    aov.table = data.frame(
      `F` = c(dp1[[2]], aov[2, "F"]),
      df1 = c(dp1[[1]], aov[2, "Df"]),
      df2 = df2,
      pval = c(dp1[[3]], aov[2, "Pr(>F)"]))
    row.names(aov.table) = c(gsub(":", " * ", row.names(dp1)),
                           "(All Interactions)")
  } else {
    # dp1 = drop1(model, scope=interms, test="F")
    dp1 = anova(model)[interms,]
    aov.table = data.frame(
      `F` = dp1[,"F value"],
      df1 = dp1[,"NumDF"],
      df2 = dp1[,"DenDF"],
      pval = dp1[,"Pr(>F)"])
    row.names(aov.table) = gsub(":", " * ", row.names(dp1))
  }
  return(aov.table)
}


interaction_Chi2_test = function(model, data=NULL, data.name="data") {
  Run("{data.name} = data")
  interms = attr(terms(model), "term.labels")
  interms = interms[grepl(":", interms)]
  interms.form = as.formula(paste("~", paste(interms, collapse=" + ")))
  interms.drop = as.formula(paste(". ~ . -", paste(interms, collapse=" - ")))
  if(inherits(model, "glm")) {
    dp1 = drop1(model, scope=interms.form, test="Chisq")
    dp1 = dp1[!is.na(dp1$Df), c("Df", "LRT", "Pr(>Chi)")]
    chi = anova(update(model, interms.drop), model, test="Chisq")
    chi.table = data.frame(
      `Chisq` = c(dp1[[2]], chi[2, "Deviance"]),
      df = c(dp1[[1]], chi[2, "Df"]),
      pval = c(dp1[[3]], chi[2, "Pr(>Chi)"]))
    row.names(chi.table) = c(gsub(":", " * ", row.names(dp1)),
                             "(All Interactions)")
  } else {
    dp1 = drop1(model, scope=interms.form, test="Chisq")
    dp1 = dp1[!is.na(dp1$npar), c("npar", "LRT", "Pr(Chi)")]
    chi.table = data.frame(
      Chisq = dp1[,"LRT"],
      df = dp1[,"npar"],
      pval = dp1[,"Pr(Chi)"])
    row.names(chi.table) = gsub(":", " * ", row.names(dp1))
  }
  return(chi.table)
}


interaction_test = function(model, data=NULL, data.name="data") {
  if(inherits(model, c("glm", "glmerMod"))) {
    table = interaction_Chi2_test(model, data=data, data.name=data.name)
  } else {
    table = interaction_F_test(model, data=data, data.name=data.name)
  }
  return(table)
}


lav_med_modeler = function(y, x,
                           meds=c(),
                           covs=c(),
                           med.type=c("parallel", "serial"),
                           cov.path=c("y", "m", "both")) {
  ids = 1:length(meds)
  if(length(med.type)>1) med.type = "parallel"
  if(length(meds)==1) {
    fm = meds %^% " ~ a*" %^% x
    fy = y %^% " ~ c.*"%^% x %^% " + " %^% "b*" %^% meds
    pars = paste(
      "Indirect := a*b",
      "Direct := c.",
      "Total := c. + a*b",
      sep="\n")
  } else {
    if(grepl("p", med.type)) {
      x.all = "a" %^% ids %^% "*" %^% x
      meds.all = paste("b" %^% ids %^% "*" %^% meds, collapse=" + ")
      ind = "Indirect_X_M" %^% ids %^% "_Y := " %^% "a" %^% ids %^% "*" %^% "b" %^% ids
      fm = meds %^% " ~ " %^% x.all
      fy = y %^% " ~ c.*"%^% x %^% " + " %^% meds.all
      ind.all = paste("a" %^% ids %^% "*" %^% "b" %^% ids, collapse=" + ")
      pars = paste(
        "Indirect_All := " %^% ind.all,
        paste(ind, collapse="\n"),
        "Direct := c.",
        "Total := c. + " %^% ind.all,
        sep="\n")
    }
    if(grepl("s", med.type)) {
      x.all = "a" %^% 1:length(meds) %^% "*" %^% x
      meds.all = paste("b" %^% ids %^% "*" %^% meds, collapse=" + ")
      fm = meds %^% " ~ " %^% x.all
      for(mi in 2:length(meds)) {
        fm[mi] = fm[mi] %^% " + " %^%
          paste("d" %^% ids[1:(mi-1)] %^% ids[mi] %^% "*" %^%
                  meds[1:(mi-1)], collapse=" + ")
      }
      fy = y %^% " ~ c.*"%^% x %^% " + " %^% meds.all
      if(length(meds)==2) {
        ind.all = "a1*b1 + a2*b2 + a1*d12*b2"
        pars = Glue("
        Indirect_All := {ind.all}
        Ind_X_M1_Y := a1*b1
        Ind_X_M2_Y := a2*b2
        Ind_X_M1_M2_Y := a1*d12*b2
        Direct := c.
        Total := c. + {ind.all}
        ")
      }
      if(length(meds)==3) {
        ind.all = paste(
          "a1*b1",
          "a2*b2",
          "a3*b3",
          "a1*d12*b2",
          "a1*d13*b3",
          "a2*d23*b3",
          "a1*d12*d23*b3",
          sep=" + ")
        pars=Glue("
        Indirect_All := {ind.all}
        Ind_X_M1_Y := a1*b1
        Ind_X_M2_Y := a2*b2
        Ind_X_M3_Y := a3*b3
        Ind_X_M1_M2_Y := a1*d12*b2
        Ind_X_M1_M3_Y := a1*d13*b3
        Ind_X_M2_M3_Y := a2*d23*b3
        Ind_X_M1_M2_M3_Y := a1*d12*d23*b3
        Direct := c.
        Total := c. + {ind.all}
        ")
      }
      if(length(meds)==4) {
        ind.all = paste(
          "a1*b1",
          "a2*b2",
          "a3*b3",
          "a4*b4",
          "a1*d12*b2",
          "a1*d13*b3",
          "a1*d14*b4",
          "a2*d23*b3",
          "a2*d24*b4",
          "a3*d34*b4",
          "a1*d12*d23*b3",
          "a1*d12*d24*b4",
          "a1*d13*d34*b4",
          "a2*d23*d34*b4",
          "a1*d12*d23*d34*b4",
          sep=" + ")
        pars=Glue("
        Indirect_All := {ind.all}
        Ind_X_M1_Y := a1*b1
        Ind_X_M2_Y := a2*b2
        Ind_X_M3_Y := a3*b3
        Ind_X_M4_Y := a4*b4
        Ind_X_M1_M2_Y := a1*d12*b2
        Ind_X_M1_M3_Y := a1*d13*b3
        Ind_X_M1_M4_Y := a1*d14*b4
        Ind_X_M2_M3_Y := a2*d23*b3
        Ind_X_M2_M4_Y := a2*d24*b4
        Ind_X_M3_M4_Y := a3*d34*b4
        Ind_X_M1_M2_M3_Y := a1*d12*d23*b3
        Ind_X_M1_M2_M4_Y := a1*d12*d24*b4
        Ind_X_M1_M3_M4_Y := a1*d13*d34*b4
        Ind_X_M2_M3_M4_Y := a2*d23*d34*b4
        Ind_X_M1_M2_M3_M4_Y := a1*d12*d23*d34*b4
        Direct := c.
        Total := c. + {ind.all}
        ")
      }
    }
  }
  if(length(covs)>0)
    covs.all = " " %^% paste(covs, collapse=" + ") %^% " +"
  else
    covs.all = ""
  if("m" %in% cov.path)
    fm = str_replace(fm, "~", "~" %^% covs.all)
  if("y" %in% cov.path)
    fy = str_replace(fy, "~", "~" %^% covs.all)
  model = paste(paste(fm, collapse="\n"),
                fy, pars,
                sep="\n")
  return(model)
}


# edit(mediation::mediate)
# edit(boot::boot.ci)
# edit(boot:::perc.ci)
# edit(boot:::bca.ci)
boot_ci = function(boot,
                   type=c("boot", "bc.boot", "bca.boot", "mcmc"),
                   true=NULL,
                   conf=0.95) {
  low = (1 - conf) / 2
  high = 1 - low
  if(length(type)>1) type = "boot"
  if(type %in% c("boot", "mcmc")) {
    ci = quantile(boot, c(low, high), na.rm=TRUE)  # percentile
  } else {
    if(is.null(true)) boot0 = mean(boot) else boot0 = true
    p0 = length(boot[boot<boot0]) / length(boot)
    z0 = qnorm(p0)
    z.l = qnorm(low)
    z.h = qnorm(high)
    if(type=="bca.boot") {
      U = (length(boot) - 1) * (boot0 - boot)
      a = sum(U^3) / (6 * (sum(U^2))^1.5)  # accelerated bias-corrected
    } else {
      a = 0  # bias-corrected
    }
    lower.bc = pnorm(z0 + (z0+z.l) / (1 - a*(z0+z.l)))
    upper.bc = pnorm(z0 + (z0+z.h) / (1 - a*(z0+z.h)))
    ci = quantile(boot, c(lower.bc, upper.bc), na.rm=TRUE)
  }
  return(ci)
}


#' PROCESS for mediation and/or moderation analyses.
#'
#' @description
#' To perform mediation, moderation, and conditional process (moderated mediation) analyses,
#' people may use software like
#' \href{http://www.statmodel.com/index.shtml}{Mplus},
#' \href{https://www.processmacro.org/index.html}{SPSS "PROCESS" macro},
#' and \href{https://njrockwood.com/mlmed/}{SPSS "MLmed" macro}.
#' Some R packages can also perform such analyses separately and in a complex way, including
#' \link[mediation:mediate]{R package "mediation"},
#' \link[interactions:sim_slopes]{R package "interactions"},
#' and \link[lavaan:lavaan-class]{R package "lavaan"}.
#' Some other R packages or scripts/modules have been further developed to improve the convenience, including
#' \href{https://jamovi-amm.github.io/}{jamovi module "jAMM"} (by \emph{Marcello Gallucci}, based on the \code{lavaan} package),
#' \href{https://CRAN.R-project.org/package=processR}{R package "processR"} (by \emph{Keon-Woong Moon}, not official, also based on the \code{lavaan} package),
#' and \href{https://www.processmacro.org/download.html}{R script file "process.R"}
#' (the official PROCESS R code by \emph{Andrew F. Hayes}, but it is not yet an R package and has some bugs and limitations).
#'
#' Here, the \code{\link[bruceR:PROCESS]{bruceR::PROCESS()}} function provides
#' an alternative to performing mediation/moderation analyses in R.
#' This function supports a total of \strong{24} kinds of SPSS PROCESS models (Hayes, 2018)
#' and also supports multilevel mediation/moderation analyses.
#' Overall, it supports the most frequently used types of mediation, moderation,
#' moderated moderation (3-way interaction), and moderated mediation (conditional indirect effect) analyses
#' for \strong{(generalized) linear or linear mixed models}.
#'
#' Specifically, the \code{\link[bruceR:PROCESS]{bruceR::PROCESS()}} function
#' fits regression models based on the data, variable names, and a few other arguments
#' that users input (with \strong{no need to} specify the PROCESS model number and \strong{no need to} manually mean-center the variables).
#' The function can automatically judge the model number/type and also conduct grand-mean centering before model building
#' (using the \code{\link[bruceR:grand_mean_center]{bruceR::grand_mean_center()}} function).
#'
#' This automatic grand-mean centering can be turned off by setting \code{center=FALSE}.
#'
#' Note that this automatic grand-mean centering
#' (1) makes the results of main effects accurate for interpretation;
#' (2) does not change any results of model fit (it only affects the interpretation of main effects);
#' (3) is only conducted in "PART 1" (for an accurate estimate of main effects) but not in "PART 2" because
#' it is more intuitive and interpretable to use the raw values of variables for the simple-slope tests in "PART 2";
#' (4) is not optional to users because mean-centering should always be done when there is an interaction;
#' (5) is not conflicted with group-mean centering because after group-mean centering the grand mean of a variable will also be 0,
#' such that the automatic grand-mean centering (with mean = 0) will not change any values of the variable.
#'
#' If you need to do group-mean centering, please do this before using PROCESS.
#' \code{\link[bruceR:group_mean_center]{bruceR::group_mean_center()}} is a useful function of group-mean centering.
#' Remember that the automatic grand-mean centering in PROCESS never affects the values of a group-mean centered variable, which already has a grand mean of 0.
#'
#' The \code{\link[bruceR:PROCESS]{bruceR::PROCESS()}} function uses:
#' \enumerate{
#'   \item the \code{\link[interactions:sim_slopes]{interactions::sim_slopes()}} function to
#'   estimate simple slopes (and conditional direct effects) in moderation, moderated moderation, and moderated mediation models
#'   (PROCESS Models 1, 2, 3, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 58, 59, 72, 73, 75, 76).
#'   \item the \code{\link[mediation:mediate]{mediation::mediate()}} function to
#'   estimate (conditional) indirect effects in (moderated) mediation models
#'   (PROCESS Models 4, 5, 7, 8, 9, 10, 11, 12, 14, 15, 16, 17, 18, 19, 58, 59, 72, 73, 75, 76).
#'   \item the \code{\link[lavaan:sem]{lavaan::sem()}} function to perform serial multiple mediation analysis (PROCESS Model 6).
#' }
#' If you use this function in your research and report its results in your paper, please cite not only \code{bruceR} but also
#' the other R packages it uses internally (\code{mediation}, \code{interactions}, and/or \code{lavaan}).
#'
#' Two parts of results are printed:
#'
#' PART 1. Regression model summary (using \code{\link[bruceR:model_summary]{bruceR::model_summary()}} to summarize the models)
#'
#' PART 2. Mediation/moderation effect estimates (using one or a combination of the above packages and functions to estimate the effects)
#'
#' To organize the PART 2 output, the results of \strong{Simple Slopes} are titled in \strong{green},
#' whereas the results of \strong{Indirect Path} are titled in \strong{blue}.
#'
#' \strong{\emph{Disclaimer}:}
#' Although this function is named after \code{PROCESS}, Andrew F. Hayes has no role in its design, and
#' its development is independent from the official SPSS PROCESS macro and "process.R" script.
#' Any error or limitation should be attributed to the three R packages/functions that \code{bruceR::PROCESS()} uses internally.
#' Moreover, as mediation analyses include \emph{random processes} (i.e., bootstrap resampling or Monte Carlo simulation),
#' the results of mediation analyses are \emph{unlikely} to be exactly the same across different software
#' (even if you set the same random seed in different software).
#'
#' @param data Data frame.
#' @param y,x Variable name of outcome (Y) and predictor (X).
#'
#' It supports both continuous (numeric) and dichotomous (factor) variables.
#' @param meds Variable name(s) of mediator(s) (M).
#' Use \code{c()} to combine multiple mediators.
#'
#' It supports both continuous (numeric) and dichotomous (factor) variables.
#'
#' It allows an infinite number of mediators in parallel
#' or 2~4 mediators in serial.
#'
#' * Order matters when \code{med.type="serial"}
#' (PROCESS Model 6: serial mediation).
#' @param mods Variable name(s) of 0~2 moderator(s) (W).
#' Use \code{c()} to combine multiple moderators.
#'
#' It supports all types of variables:
#' continuous (numeric), dichotomous (factor), and multicategorical (factor).
#'
#' * Order matters when \code{mod.type="3-way"}
#' (PROCESS Models 3, 5.3, 11, 12, 18, 19, 72, and 73).
#'
#' ** Do not set this argument when \code{med.type="serial"}
#' (PROCESS Model 6).
#' @param covs Variable name(s) of covariate(s) (i.e., control variables).
#' Use \code{c()} to combine multiple covariates.
#' It supports all types of (and an infinite number of) variables.
#' @param clusters HLM (multilevel) cluster(s):
#' e.g., \code{"School"}, \code{c("Prov", "City")}, \code{c("Sub", "Item")}.
#' @param hlm.re.m,hlm.re.y HLM (multilevel) random effect term of M model and Y model.
#' By default, it converts \code{clusters} to \code{\link[lme4:lme4-package]{lme4}} syntax of random intercepts:
#' e.g., \code{"(1 | School)"} or \code{"(1 | Sub) + (1 | Item)"}.
#'
#' You may specify these arguments to include more complex terms:
#' e.g., random slopes \code{"(X | School)"}, or 3-level random effects \code{"(1 | Prov/City)"}.
#' @param hlm.type HLM (multilevel) mediation type (levels of "X-M-Y"):
#' \code{"1-1-1"} (default),
#' \code{"2-1-1"} (indeed the same as \code{"1-1-1"} in a mixed model),
#' or \code{"2-2-1"} (currently \emph{not fully supported}, as limited by the \code{\link[mediation:mediate]{mediation}} package).
#' In most cases, no need to set this argument.
#' @param med.type Type of mediator:
#' \code{"parallel"} (default) or \code{"serial"}
#' (only relevant to PROCESS Model 6).
#' Partial matches of \code{"p"} or \code{"s"} also work.
#' In most cases, no need to set this argument.
#' @param mod.type Type of moderator:
#' \code{"2-way"} (default) or \code{"3-way"}
#' (relevant to PROCESS Models 3, 5.3, 11, 12, 18, 19, 72, and 73).
#' Partial matches of \code{"2"} or \code{"3"} also work.
#' @param mod.path Which path(s) do the moderator(s) influence?
#' \code{"x-y"}, \code{"x-m"}, \code{"m-y"}, or any combination of them
#' (use \code{c()} to combine), or \code{"all"} (i.e., all of them).
#' No default value.
#' @param cov.path Which path(s) do the control variable(s) influence?
#' \code{"y"}, \code{"m"}, or \code{"both"} (default).
#' @param mod1.val,mod2.val By default (\code{NULL}), it uses
#' \strong{Mean +/- SD} of a continuous moderator (numeric) or
#' \strong{all levels} of a dichotomous/multicategorical moderator (factor) to
#' perform simple slope analyses and/or conditional mediation analyses.
#' You may manually specify a vector of certain values: e.g.,
#' \code{mod1.val=c(1, 3, 5)} or \code{mod1.val=c("A", "B", "C")}.
#' @param ci Method for estimating the standard error (SE) and
#' 95\% confidence interval (CI) of indirect effect(s).
#' Defaults to \code{"boot"} for (generalized) linear models or
#' \code{"mcmc"} for (generalized) linear mixed models (i.e., multilevel models).
#' \describe{
#'   \item{\code{"boot"}}{Percentile Bootstrap}
#'   \item{\code{"bc.boot"}}{Bias-Corrected Percentile Bootstrap}
#'   \item{\code{"bca.boot"}}{Bias-Corrected and Accelerated (BCa) Percentile Bootstrap}
#'   \item{\code{"mcmc"}}{Markov Chain Monte Carlo (Quasi-Bayesian)}
#' }
#' * Note that these methods \emph{never} apply to the estimates of simple slopes.
#' You \emph{should not} report the 95\% CIs of simple slopes as Bootstrap or Monte Carlo CIs,
#' because they are just standard CIs without any resampling method.
#' @param nsim Number of simulation samples (bootstrap resampling or Monte Carlo simulation)
#' for estimating SE and 95\% CI. Defaults to \code{100} for running examples faster.
#' In formal analyses, however, \strong{\code{nsim=1000} (or larger)} is strongly suggested!
#' @param seed Random seed for obtaining reproducible results.
#' Defaults to \code{NULL}.
#' You may set to any number you prefer
#' (e.g., \code{seed=1234}, just an uncountable number).
#'
#' * Note that all mediation models include random processes
#' (i.e., bootstrap resampling or Monte Carlo simulation).
#' To get exactly the same results between runs, you need to set a random seed.
#' However, even if you set the same seed number, it is unlikely to
#' get exactly the same results across different R packages
#' (e.g., \code{\link[lavaan:lavaan-class]{lavaan}} vs. \code{\link[mediation:mediate]{mediation}})
#' and software (e.g., SPSS, Mplus, R, jamovi).
#' @param center Centering numeric (continuous) predictors? Defaults to \code{TRUE} (suggested).
#' @param std Standardizing variables to get standardized coefficients? Defaults to \code{FALSE}.
#' If \code{TRUE}, it will standardize all numeric (continuous) variables
#' before building regression models.
#' However, it is \emph{not suggested} to set \code{std=TRUE} for \emph{generalized} linear (mixed) models.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param file File name of MS Word (\code{.doc}).
#' Currently, only regression model summary can be saved.
#'
#' @return
#' Invisibly return a list of results:
#' \describe{
#'   \item{\code{process.id}}{PROCESS model number.}
#'   \item{\code{process.type}}{PROCESS model type.}
#'   \item{\code{model.m}}{"Mediator" (M) models (a list of multiple models).}
#'   \item{\code{model.y}}{"Outcome" (Y) model.}
#'   \item{\code{results}}{Effect estimates and other results (unnamed list object).}
#' }
#'
#' @details
#' For more details and illustrations, see
#' \href{https://github.com/psychbruce/bruceR/tree/master/note}{PROCESS-bruceR-SPSS} (PDF and Markdown files).
#'
#' @seealso
#' \code{\link{lavaan_summary}}
#'
#' \code{\link{model_summary}}
#'
#' \code{\link{med_summary}}
#'
#' @references
#' Hayes, A. F. (2018). \emph{Introduction to mediation, moderation,
#' and conditional process analysis (second edition):
#' A regression-based approach}. Guilford Press.
#'
#' Yzerbyt, V., Muller, D., Batailler, C., & Judd, C. M. (2018).
#' New recommendations for testing indirect effects in mediational models:
#' The need to report and test component paths.
#' \emph{Journal of Personality and Social Psychology, 115}(6), 929--943.
#'
#' @examples
#' \donttest{#### NOTE ####
#' ## In the following examples, I set nsim=100 to save time.
#' ## In formal analyses, nsim=1000 (or larger) is suggested!
#'
#' #### Demo Data ####
#' # ?mediation::student
#' data = mediation::student %>%
#'   dplyr::select(SCH_ID, free, smorale, pared, income,
#'                 gender, work, attachment, fight, late, score)
#' names(data)[2:3] = c("SCH_free", "SCH_morale")
#' names(data)[4:7] = c("parent_edu", "family_inc", "gender", "partjob")
#' data$gender01 = 1 - data$gender  # 0 = female, 1 = male
#' # dichotomous X: as.factor()
#' data$gender = factor(data$gender01, levels=0:1, labels=c("Female", "Male"))
#' # dichotomous Y: as.factor()
#' data$pass = as.factor(ifelse(data$score>=50, 1, 0))
#'
#' #### Descriptive Statistics and Correlation Analyses ####
#' Freq(data$gender)
#' Freq(data$pass)
#' Describe(data)     # file="xxx.doc"
#' Corr(data[,4:11])  # file="xxx.doc"
#'
#' #### PROCESS Analyses ####
#'
#' ## Model 1 ##
#' PROCESS(data, y="score", x="late", mods="gender")  # continuous Y
#' PROCESS(data, y="pass", x="late", mods="gender")   # dichotomous Y
#'
#' # (multilevel moderation)
#' PROCESS(data, y="score", x="late", mods="gender",  # continuous Y (LMM)
#'         clusters="SCH_ID")
#' PROCESS(data, y="pass", x="late", mods="gender",   # dichotomous Y (GLMM)
#'         clusters="SCH_ID")
#'
#' # (Johnson-Neyman (J-N) interval and plot)
#' PROCESS(data, y="score", x="gender", mods="late") -> P
#' P$results[[1]]$jn[[1]]       # Johnson-Neyman interval
#' P$results[[1]]$jn[[1]]$plot  # Johnson-Neyman plot (ggplot object)
#' GLM_summary(P$model.y)       # detailed results of regression
#'
#' # (allows multicategorical moderator)
#' d = airquality
#' d$Month = as.factor(d$Month)  # moderator: factor with levels "5"~"9"
#' PROCESS(d, y="Temp", x="Solar.R", mods="Month")
#'
#' ## Model 2 ##
#' PROCESS(data, y="score", x="late",
#'         mods=c("gender", "family_inc"),
#'         mod.type="2-way")  # or omit "mod.type", default is "2-way"
#'
#' ## Model 3 ##
#' PROCESS(data, y="score", x="late",
#'         mods=c("gender", "family_inc"),
#'         mod.type="3-way")
#' PROCESS(data, y="pass", x="gender",
#'         mods=c("late", "family_inc"),
#'         mod1.val=c(1, 3, 5),     # moderator 1: late
#'         mod2.val=seq(1, 15, 2),  # moderator 2: family_inc
#'         mod.type="3-way")
#'
#' ## Model 4 ##
#' PROCESS(data, y="score", x="parent_edu",
#'         meds="family_inc", covs="gender",
#'         ci="boot", nsim=100, seed=1)
#'
#' # (allows an infinite number of multiple mediators in parallel)
#' PROCESS(data, y="score", x="parent_edu",
#'         meds=c("family_inc", "late"),
#'         covs=c("gender", "partjob"),
#'         ci="boot", nsim=100, seed=1)
#'
#' # (multilevel mediation)
#' PROCESS(data, y="score", x="SCH_free",
#'         meds="late", clusters="SCH_ID",
#'         ci="mcmc", nsim=100, seed=1)
#'
#' ## Model 6 ##
#' PROCESS(data, y="score", x="parent_edu",
#'         meds=c("family_inc", "late"),
#'         covs=c("gender", "partjob"),
#'         med.type="serial",
#'         ci="boot", nsim=100, seed=1)
#'
#' ## Model 8 ##
#' PROCESS(data, y="score", x="fight",
#'         meds="late",
#'         mods="gender",
#'         mod.path=c("x-m", "x-y"),
#'         ci="boot", nsim=100, seed=1)
#'
#' ## For more examples and details, see the "note" subfolder at:
#' ## https://github.com/psychbruce/bruceR/tree/main/note
#' }
#' @export
PROCESS = function(
    data,
    y="",
    x="",
    meds=c(),
    mods=c(),
    covs=c(),
    clusters=c(),
    hlm.re.m="",
    hlm.re.y="",
    hlm.type=c("1-1-1", "2-1-1", "2-2-1"),
    med.type=c("parallel", "serial"),  # "p"*, "s"
    mod.type=c("2-way", "3-way"),  # "2"*, "3"
    mod.path=c("x-y", "x-m", "m-y", "all"),
    cov.path=c("y", "m", "both"),
    mod1.val=NULL,
    mod2.val=NULL,
    ci=c("boot", "bc.boot", "bca.boot", "mcmc"),
    nsim=100,
    seed=NULL,
    center=TRUE,
    std=FALSE,
    digits=3,
    file=NULL
) {
  ## Default Setting
  warning.y.class = "\"y\" should be a numeric variable or a factor variable with only 2 levels."
  warning.x.class = "\"x\" should be a numeric variable or a factor variable with only 2 levels."
  warning.m.class = "\"meds\" should be numeric variable(s) or factor variable(s) with only 2 levels."
  warning.mod.path = "Please also specify \"mod.path\":\n    \"all\" or any combination of c(\"x-y\", \"x-m\", \"m-y\")"
  if(x=="" | y=="") stop("Please specify both \"x\" and \"y\".", call.=TRUE)
  if(length(meds)>0 & length(mods)>0 & length(mod.path)>3)
    stop(warning.mod.path, call.=TRUE)
  if("all" %in% mod.path)
    mod.path = c("x-y", "x-m", "m-y")
  if("both" %in% cov.path)
    cov.path = c("y", "m")
  if(length(mods)>0) mod1 = mods[1] else mod1 = NULL
  if(length(mods)>1) mod2 = mods[2] else mod2 = NULL
  if(length(mods)>2) stop("The number of moderators (\"mods\") should be no more than 2.", call.=TRUE)
  if(length(med.type)>1) med.type = "parallel"  # default
  if(length(mod.type)>1) mod.type = "2-way"     # default
  if(grepl("p", med.type)) med.type = "parallel"
  if(grepl("s", med.type)) med.type = "serial"
  if(grepl("2", mod.type)) mod.type = "2-way"
  if(grepl("3", mod.type)) mod.type = "3-way"
  if(grepl("p|s", med.type)==FALSE)
    stop("\"med.type\" should be \"parallel\" or \"serial\".", call.=TRUE)
  if(grepl("2|3", mod.type)==FALSE)
    stop("\"mod.type\" should be \"2-way\" or \"3-way\".", call.=TRUE)
  if(length(meds)>0) {
    if(length(mods)>0 & "m-y" %in% mod.path) {
      if(mod.type=="2-way")
        meds.all = " + " %^% paste(rep(meds, each=length(mods)) %^% "*" %^% mods, collapse=" + ")
      if(mod.type=="3-way")
        meds.all = " + " %^% paste(meds %^% "*" %^% paste(mods, collapse="*"), collapse=" + ")
    } else {
      meds.all = " + " %^% paste(meds, collapse=" + ")
    }
  }
  if(length(covs)>0)
    covs.all = " " %^% paste(covs, collapse=" + ") %^% " +"
  else
    covs.all = ""
  if(length(ci)>1) ci = "boot"  # default: percentile bootstrap
  if(grepl("p", ci) | ci=="boot") ci = "boot"
  if(ci %in% c("bc", "bc.boot")) ci = "bc.boot"
  if(grepl("bca", ci)) ci = "bca.boot"
  if(grepl("m", ci)) ci = "mcmc"
  if(ci %notin% c("boot", "bc.boot", "bca.boot", "mcmc"))
    stop("Please choose \"boot\", \"bc.boot\", \"bca.boot\", or \"mcmc\" for ci.", call.=TRUE)
  nsim.type = ifelse(grepl("boot", ci), "Bootstrap", "Monte Carlo")
  if(length(clusters)>0) HLM = TRUE else HLM = FALSE
  if(length(hlm.type)>1) hlm.type = "1-1-1"  # default; same as "2-1-1"
  if(hlm.type %notin% c("1-1-1", "2-1-1", "2-2-1"))
    stop("\"hlm.type\" should be \"1-1-1\", \"2-1-1\", or \"2-2-1\".", call.=TRUE)
  if(HLM) {
    if(hlm.re.m=="")  # default: random intercept
      hlm.re.m = paste("(1 | " %^% clusters %^% ")", collapse=" + ")
    hlm.re.m = " + " %^% hlm.re.m
    if(hlm.re.y=="")  # default: random intercept
      hlm.re.y = paste("(1 | " %^% clusters %^% ")", collapse=" + ")
    hlm.re.y = " + " %^% hlm.re.y
    if(length(meds)>0 & ci!="mcmc")
      message("\nNOTE: \nci has been reset to \"mcmc\" because bootstrap method is not applicable to multilevel models.")
    ci = "mcmc"
  }

  ## Data Centering and Recoding
  data = as.data.frame(data)
  data.v = na.omit(data[c(y, x, meds, mods, covs, clusters)])
  if(inherits(data.v[[y]], c("factor", "character", "logical"))) {
    if(length(unique(data.v[[y]]))==2) {
      data.v[[y]] = as.numeric(as.factor(data.v[[y]])) - 1  # 0, 1
      Y01 = TRUE
    } else {
      stop(warning.y.class, call.=TRUE)
    }
  } else {
    Y01 = FALSE
  }
  M01 = c()
  for(med in meds) {
    if(inherits(data.v[[med]], c("factor", "character", "logical"))) {
      if(length(unique(data.v[[med]]))==2) {
        data.v[[med]] = as.numeric(as.factor(data.v[[med]])) - 1  # 0, 1
        M01 = c(M01, TRUE)
      } else {
        stop(warning.m.class, call.=TRUE)
      }
    } else {
      M01=c(M01, FALSE)
    }
  }
  if(inherits(data.v[[x]], c("factor", "character", "logical"))) {
    if(length(unique(data.v[[x]]))==2) {
      x.levels = levels(as.factor(data.v[[x]]))
      data.v[[x]] = as.numeric(as.factor(data.v[[x]])) - 1  # 0, 1
      x.trans.info = " (recoded: " %^% paste(x.levels, 0:1, sep="=", collapse=", ") %^% ")"
    } else {
      stop(warning.x.class, call.=TRUE)
    }
  } else {
    x.trans.info = ""
  }
  if(HLM & length(meds)>0 & hlm.type=="2-2-1") {
    if(length(clusters)>1) stop("The number of clusters should be 1.", call.=TRUE)
    dt = data.v[c(x, meds, mods, covs, clusters)]
    Run("dt1 = dplyr::summarise(dplyr::group_by(dt, {clusters}), dplyr::across(where(is.numeric), mean))",
        "dt2 = dplyr::summarise(dplyr::group_by(dt, {clusters}), dplyr::across(where(is.factor), mean))",
        "dt = dplyr::left_join(dt1, dt2, by=\"{clusters}\")")
    data.meds.L2 = as.data.frame(dt)[c(clusters, x, meds, mods, covs)]
    rm(dt)
  }
  if(std) {
    # caution !!!
    if(Y01)
      data.v = data.c = data.c.NOmed =
        grand_mean_center(data.v, vars=c(x, meds, mods, covs), std=TRUE)
    else
      data.v = data.c = data.c.NOmed =
        grand_mean_center(data.v, vars=c(y, x, meds, mods, covs), std=TRUE)
  } else if(center) {
    data.c.NOmed = grand_mean_center(data.v, vars=c(x, mods, covs), std=FALSE)
    data.c = grand_mean_center(data.v, vars=c(x, meds, mods, covs), std=FALSE)
  } else {
    data.c = data.c.NOmed = data.v
  }
  nmis = nrow(data) - nrow(data.v)

  ## File Opening
  # if(!is.null(file)) {
  #   file = str_replace(file, "\\.docx$", ".doc")
  #   FILE = file(file, "a", encoding="UTF-8")
  # }

  ## Formula Building
  ft = Glue("{y} ~ {x}")
  if(length(meds)==0) {
    # (moderated) moderation
    fm = c()
    if(length(mods)==0) {
      stop("Please specify \"meds\" (mediators) and/or \"mods\" (moderators).", call.=TRUE)
    } else if(length(mods)==1) {
      pid = 1
      ptype = "Simple Moderation"
      fy = Glue("{y} ~ {x}*{mod1}")
    } else if(length(mods)==2) {
      if(mod.type=="2-way") {
        pid = 2
        ptype = "Parallel Moderation (2 mods; 2-way)"
        fy = Glue("{y} ~ {x}*{mod1} + {x}*{mod2}")
      }
      if(mod.type=="3-way") {
        pid = 3
        ptype = "Moderated Moderation (2 mods; 3-way)"
        fy = Glue("{y} ~ {x}*{mod1}*{mod2}")
      }
    }
  }
  else if(length(meds)==1 | med.type=="parallel") {
    # single/parallel (moderated) mediation
    if(length(mods)==0) {
      pid = 4
      ptype = ifelse(
        length(meds)==1,
        "Simple Mediation",
        Glue("Parallel Multiple Mediation ({length(meds)} meds)"))
      fm = meds %^% Glue(" ~ {x}")
      fy = Glue("{y} ~ {x}") %^% meds.all
    }
    if(length(mods)==1) {
      ptype = ifelse(
        length(meds)==1,
        "Moderated Mediation",
        Glue("Parallel Multiple Moderated Mediation ({length(meds)} meds)"))
      if("x-y" %in% mod.path) {
        fy = Glue("{y} ~ {x}*{mod1}") %^% meds.all
        if("x-m" %in% mod.path) {
          pid = ifelse("m-y" %in% mod.path, 59, 8)
          fm = meds %^% Glue(" ~ {x}*{mod1}")
        } else {
          pid = ifelse("m-y" %in% mod.path, 15, 5)
          if(pid==5) ptype = Glue("Mediation and Moderation ({length(meds)} meds and 1 mods)")
          fm = meds %^% Glue(" ~ {x}")
        }
      } else {
        fy = Glue("{y} ~ {x}") %^% meds.all
        if("x-m" %in% mod.path) {
          pid = ifelse("m-y" %in% mod.path, 58, 7)
          fm = meds %^% Glue(" ~ {x}*{mod1}")
        } else {
          pid = ifelse("m-y" %in% mod.path, 14, -1)
          fm = meds %^% Glue(" ~ {x}")
        }
      }
    }
    if(length(mods)==2 & mod.type=="2-way") {
      ptype = ifelse(
        length(meds)==1,
        "Moderated Mediation (2 mods; 2-way)",
        Glue("Parallel Multiple Moderated Mediation ({length(meds)} meds and 2 mods; 2-way)"))
      if("x-y" %in% mod.path) {
        fy = Glue("{y} ~ {x}*{mod1} + {x}*{mod2}") %^% meds.all
        if("x-m" %in% mod.path) {
          pid = ifelse("m-y" %in% mod.path, 76, 10)
          fm = meds %^% Glue(" ~ {x}*{mod1} + {x}*{mod2}")
        } else {
          pid = ifelse("m-y" %in% mod.path, 17, 5.2)
          if(pid==5.2) ptype = Glue("Mediation and Parallel Moderation ({length(meds)} meds and 2 mods; 2-way)")
          fm = meds %^% Glue(" ~ {x}")
        }
      } else {
        fy = Glue("{y} ~ {x}") %^% meds.all
        if("x-m" %in% mod.path) {
          pid = ifelse("m-y" %in% mod.path, 75, 9)
          fm = meds %^% Glue(" ~ {x}*{mod1} + {x}*{mod2}")
        } else {
          pid = ifelse("m-y" %in% mod.path, 16, -2)
          fm = meds %^% Glue(" ~ {x}")
        }
      }
    }
    if(length(mods)==2 & mod.type=="3-way") {
      ptype = ifelse(
        length(meds)==1,
        "Moderated Mediation (2 mods; 3-way)",
        Glue("Parallel Multiple Moderated Mediation ({length(meds)} meds and 2 mods; 3-way)"))
      if("x-y" %in% mod.path) {
        fy = Glue("{y} ~ {x}*{mod1}*{mod2}") %^% meds.all
        if("x-m" %in% mod.path) {
          pid = ifelse("m-y" %in% mod.path, 73, 12)
          fm = meds %^% Glue(" ~ {x}*{mod1}*{mod2}")
        } else {
          pid = ifelse("m-y" %in% mod.path, 19, 5.3)
          if(pid==5.3) ptype = Glue("Mediation and Moderated Moderation ({length(meds)} meds and 2 mods; 3-way)")
          fm = meds %^% Glue(" ~ {x}")
        }
      } else {
        fy = Glue("{y} ~ {x}") %^% meds.all
        if("x-m" %in% mod.path) {
          pid = ifelse("m-y" %in% mod.path, 72, 11)
          fm = meds %^% Glue(" ~ {x}*{mod1}*{mod2}")
        } else {
          pid = ifelse("m-y" %in% mod.path, 18, -3)
          fm = meds %^% Glue(" ~ {x}")
        }
      }
    }
    if(pid<0)
      stop(warning.mod.path, call.=TRUE)
    if(pid %in% 7)
      fy = Glue("{y} ~ {x} + {mod1}") %^% meds.all
    if(pid %in% c(9, 11))
      fy = Glue("{y} ~ {x} + {mod1} + {mod2}") %^% meds.all
    if(pid %in% 14:15)
      fm = meds %^% Glue(" ~ {x} + {mod1}")
    if(pid %in% 16:19)
      fm = meds %^% Glue(" ~ {x} + {mod1} + {mod2}")
  }
  else if(med.type=="serial") {
    # serial mediation
    if(length(mods)==0) {
      if(length(meds)>4)
        stop("PROCESS() does not support serial mediation with more than 4 mediators.", call.=TRUE)
      if(any(M01))
        stop("PROCESS() does not support serial mediation with dichotomous mediators.", call.=TRUE)
      pid = 6
      ptype = Glue("Serial Multiple Mediation ({length(meds)} meds)")
      fy = Glue("{y} ~ {x}") %^% meds.all
      fm = meds %^% Glue(" ~ {x}")
      for(mi in 2:length(meds)) {
        fm[mi] = fm[mi] %^% " + " %^% paste(meds[1:(mi-1)], collapse=" + ")
      }
    } else {
      stop("PROCESS() does not support serial mediation with moderators.\nPlease remove the \"mods\" argument from your code.", call.=TRUE)
    }
  }
  if("m" %in% cov.path)
    fm = str_replace(fm, "~", "~" %^% covs.all)
  if("y" %in% cov.path) {
    fy = str_replace(fy, "~", "~" %^% covs.all)
    ft = str_replace(ft, "~", "~" %^% covs.all)  # y ~ [covs] + x
  }
  if(HLM) {
    if(hlm.type!="2-2-1")
      fm = fm %^% hlm.re.m
    fy = fy %^% hlm.re.y
    ft = ft %^% hlm.re.y
  }

  ## Regression Model Summary
  varlist = function(vars=c()) {
    vars.text = paste(vars, collapse=', ')
    if(vars.text=="") vars.text = "-"
    return(vars.text)
  }
  meds.text = varlist(meds)
  mods.text = varlist(mods)
  covs.text = varlist(covs)
  clusters.text = varlist(clusters)
  Print("
  \n
  <<bold ****************** PART 1. Regression Model Summary ******************>>

  <<blue PROCESS Model Code : {pid}>> <<white (Hayes, 2018; <<underline www.guilford.com/p/hayes3>>)>>
  <<blue PROCESS Model Type : {ptype}>>
  <<green
  -    Outcome (Y) : {y}
  -  Predictor (X) : {x}{x.trans.info}
  -  Mediators (M) : {meds.text}
  - Moderators (W) : {mods.text}
  - Covariates (C) : {covs.text}
  -   HLM Clusters : {clusters.text}
  >>
  \n
  ")
  if(center | std) {
    Print("<<yellow
    All numeric predictors have been {ifelse(std, 'standardized', 'grand-mean centered')}.
    (For details, please see the help page of PROCESS.)
    >>
    \n
    ")
  }

  if(length(meds)>0) {
    Print("<<italic Formula of Mediator>>:")
    cat("-   ", paste(fm, collapse="\n-    "))
    cat("\n")
  }
  Print("<<italic Formula of Outcome>>:")
  cat("-   ", fy)
  cat("\n
CAUTION:
  Fixed effect (coef.) of a predictor involved in an interaction
  denotes its \"simple effect/slope\" at the other predictor = 0.
  Only when all predictors in an interaction are mean-centered
  can the fixed effect denote the \"main effect\"!
  ")

  ## Regression Model Building
  if(Y01==FALSE) {
    FUN.y = ifelse(HLM, "lmerTest::lmer", "lm")
    FML.y = ""
  } else {
    FUN.y = ifelse(HLM, "lme4::glmer", "glm")
    FML.y = ", family=binomial"
  }
  model.t = model.y = NULL
  Run("model.y0 = {FUN.y}({fy}, data=data.v{FML.y})")
  Run("model.y = {FUN.y}({fy}, data=data.c{FML.y})")
  Run("model.t = {FUN.y}({ft}, data=data.c{FML.y})")
  model.m = list()
  model.m0 = list()
  if(pid>=4) {
    data.v.temp = data.v
    data.c.temp = data.c
    data.c = data.c.NOmed
    if(HLM & hlm.type=="2-2-1") {
      data.v = data.meds.L2
      if(center) {
        data.c = data.c.NOmed =
          grand_mean_center(data.v, vars=c(x, mods, covs))
      } else {
        data.c = data.c.NOmed = data.v
      }
    }
    for(i in 1:length(fm)) {
      if(M01[i]==FALSE) {
        FUN.m = ifelse(HLM & hlm.type!="2-2-1", "lmerTest::lmer", "lm")
        FML.m = ""
      } else {
        FUN.m = ifelse(HLM & hlm.type!="2-2-1", "lme4::glmer", "glm")
        FML.m = ", family=binomial"
      }
      Run("model.m0.{i} = {FUN.m}({fm[i]}, data=data.v{FML.m})")
      Run("model.m.{i} = {FUN.m}({fm[i]}, data=data.c{FML.m})")
      Run("model.m0 = c(model.m0, list(model.m0.{i}=model.m0.{i}))")
      Run("model.m = c(model.m, list(model.m.{i}=model.m.{i}))")
    }
    data.v = data.v.temp
    data.c = data.c.temp
    rm(data.v.temp, data.c.temp)
  }
  model_summary(c(list(model.t), model.m, list(model.y)),
                digits=digits, std=std, file=file)
  file = NULL

  ## PROCESS Model Summary
  if(pid %in% 1:3)
    pkg = Glue("\u2018interactions\u2019 (v{packageVersion('interactions')})")
  else if(pid==4)
    pkg = Glue("\u2018mediation\u2019 (v{packageVersion('mediation')})")
  else if(pid==6)
    pkg = Glue("\u2018lavaan\u2019 (v{packageVersion('lavaan')})")
  else
    pkg = Glue("\u2018mediation\u2019 (v{packageVersion('mediation')}), \u2018interactions\u2019 (v{packageVersion('interactions')})")
  Print("
  <<bold ************ PART 2. Mediation/Moderation Effect Estimate ************>>

  <<magenta
  Package Use : {pkg}
  Effect Type : {ptype} (Model {pid})
  Sample Size : {nrow(data.v)}{ifelse(nmis>0, Glue(' ({nmis} missing observations deleted)'), '')}
  Random Seed : {ifelse(length(meds)>0, 'set.seed('%^%seed%^%')', '-')}
  Simulations : {ifelse(length(meds)>0, nsim %^% ' (' %^% nsim.type %^% ')', '-')}
  >>")
  if(length(meds)>0 & nsim<1000)
    message("\nWarning: nsim=1000 (or larger) is suggested!")
  cat("\n")

  ## PROCESS Model Building
  if(HLM & hlm.type=="2-2-1")
    stop("As limited by the \"mediation\" package, the estimate of \"2-2-1\" multilevel mediation is not supported currently.", call.=TRUE)
  RES = list()
  run.process.mod.xy = function(eff.tag="") {
    text = Glue("
    res = process_mod(model.y0, model.y,
                      data.c, x, y, mod1, mod2,
                      mod1.val, mod2.val,
                      mod.type,
                      x.label=\"X\",
                      y.label=\"Y\",
                      eff.tag=\"{eff.tag}\",
                      digits, file=file)
    RES = c(RES, list(res))")
  }
  run.process.mod.xm = function(i, eff.tag="") {
    text = Glue("
    res = process_mod(model.m0[[i]], model.m[[i]],
                      data.c.NOmed, x, meds[i], mod1, mod2,
                      mod1.val, mod2.val,
                      mod.type,
                      x.label=\"X\",
                      y.label=\"M\",
                      eff.tag=\"{eff.tag}\",
                      digits, file=file)
    RES = c(RES, list(res))")
  }
  run.process.mod.my = function(i, eff.tag="") {
    text = Glue("
    res = process_mod(model.y0, model.y,
                      data.c, meds[i], y, mod1, mod2,
                      mod1.val, mod2.val,
                      mod.type,
                      x.label=\"M\",
                      y.label=\"Y\",
                      eff.tag=\"{eff.tag}\",
                      digits, file=file)
    RES = c(RES, list(res))")
  }
  run.process.med = function(eff.tag="") {
    text = Glue("
    res = process_med(model.m0[[i]], model.y0,
                      x, y, meds[i],
                      conditional, simple.slopes,
                      ci, nsim, seed,
                      direct=ifelse(length(mods)==0, TRUE, FALSE),
                      total=ifelse(length(meds)==1, TRUE, FALSE),
                      eff.tag=\"{eff.tag}\",
                      digits, file=file)
    RES = c(RES, list(res))")
  }
  conditional = NULL
  simple.slopes = NULL
  if(pid %in% 1:3) {
    # moderation
    Run(run.process.mod.xy())
  } else if(pid==4) {
    # mediation
    for(i in 1:length(meds)) Run(run.process.med())
    # res = process_lav(data.v, y, x, meds, covs,
    #                   med.type, cov.path,
    #                   ci, nsim, seed,
    #                   digits=digits,
    #                   file=file)
    # RES = c(RES, list(res))
    # lavaan ERROR: unordered factor(s) detected; make them numeric or ordered: pass gender
  } else if(pid %in% c(5, 5.2, 5.3)) {
    # mediation and moderation
    Run(run.process.mod.xy(eff.tag="(Conditional Direct Effects [c'] of X on Y)"))
    for(i in 1:length(meds)) Run(run.process.med())
  } else if(pid==6) {
    # serial mediation
    res = process_lav(data.v, y, x, meds, covs, clusters,
                      med.type, cov.path,
                      ci, nsim, seed,
                      digits=digits,
                      file=file)
    RES = c(RES, list(res))
  } else {
    # moderated mediation
    if("x-y" %in% mod.path) {
      Run(run.process.mod.xy(eff.tag="(Conditional Direct Effects [c'] of X on Y)"))
    } else {
      Print("<<cyan <<underline Direct Effect:>> \"{x}\" (X) ==> \"{y}\" (Y)>>")
      de = as.data.frame(coef(summary(model.y)))
      de = de[which(row.names(de)==x),]
      de$df = NULL
      conf.int = confint(model.y)
      conf.int = conf.int[which(row.names(conf.int)==row.names(de)),]
      de$CI = paste0("[", paste(formatF(conf.int, digits), collapse=", "), "]")
      names(de)[1] = "Effect"
      names(de)[5] = "[95% CI]"
      row.names(de)[1] = "Direct (c')"
      print_table(de, digits=digits, file=file)
      cat("\n")
    }
    for(i in 1:length(meds)) {
      if("x-m" %in% mod.path)
        Run(run.process.mod.xm(i, eff.tag="(Conditional Effects [a] of X on M)"))
      if("m-y" %in% mod.path)
        Run(run.process.mod.my(i, eff.tag="(Conditional Effects [b] of M on Y)"))
      conditional = res$conditional
      simple.slopes = res$simple.slopes
      Run(run.process.med(eff.tag="(Conditional Indirect Effects [ab] of X through M on Y)"))
    }
  }

  if(length(meds)>0)
    Print("
    <<italic Note>>. The results based on bootstrapping or other random processes
    are <<italic unlikely>> identical to other statistical software (e.g., SPSS).
    To make results reproducible, you need to set a <<bold seed>> (any number).
    Please see the help page for details: <<bold help(PROCESS)>>
    Ignore this note if you have already set a seed. :)
    \n
    ")

  invisible(list(
    process.id=pid,
    process.type=ptype,
    model.m=model.m,
    model.y=model.y,
    results=RES
  ))
}


#' Tidy report of lavaan model.
#'
#' @param lavaan Model object fitted by \code{\link[lavaan:lavaan-class]{lavaan}}.
#' @param ci Method for estimating standard error (SE) and
#' 95\% confidence interval (CI).
#'
#' Defaults to \code{"raw"} (the standard approach of \code{lavaan}).
#' Other options:
#' \describe{
#'   \item{\code{"boot"}}{Percentile Bootstrap}
#'   \item{\code{"bc.boot"}}{Bias-Corrected Percentile Bootstrap}
#'   \item{\code{"bca.boot"}}{Bias-Corrected and Accelerated (BCa) Percentile Bootstrap}
#' }
#' @param nsim Number of simulation samples (bootstrap resampling)
#' for estimating SE and 95\% CI.
#' In formal analyses, \strong{\code{nsim=1000} (or larger)} is strongly suggested.
#' @param seed Random seed for obtaining reproducible results. Defaults to \code{NULL}.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param print Print results. Defaults to \code{TRUE}.
#' @param covariance Print (co)variances. Defaults to \code{FALSE}.
#' @param file File name of MS Word (\code{.doc}).
#'
#' @return
#' Invisibly return a list of results:
#' \describe{
#'   \item{\code{fit}}{Model fit indices.}
#'   \item{\code{measure}}{Latent variable measures.}
#'   \item{\code{regression}}{Regression paths.}
#'   \item{\code{covariance}}{Variances and/or covariances.}
#'   \item{\code{effect}}{Defined effect estimates.}
#' }
#'
#' @seealso
#' \code{\link{PROCESS}}, \code{\link{CFA}}
#'
#' @examples
#' \donttest{## Simple Mediation:
#' ## Solar.R (X) => Ozone (M) => Temp (Y)
#'
#' # PROCESS(airquality, y="Temp", x="Solar.R",
#' #         meds="Ozone", ci="boot", nsim=1000, seed=1)
#'
#' model = "
#' Ozone ~ a*Solar.R
#' Temp ~ c.*Solar.R + b*Ozone
#' Indirect := a*b
#' Direct := c.
#' Total := c. + a*b
#' "
#' lv = lavaan::sem(model=model, data=airquality)
#' lavaan::summary(lv, fit.measure=TRUE, ci=TRUE, nd=3)  # raw output
#' lavaan_summary(lv)
#' # lavaan_summary(lv, ci="boot", nsim=1000, seed=1)
#'
#'
#' ## Serial Multiple Mediation:
#' ## Solar.R (X) => Ozone (M1) => Wind(M2) => Temp (Y)
#'
#' # PROCESS(airquality, y="Temp", x="Solar.R",
#' #         meds=c("Ozone", "Wind"),
#' #         med.type="serial", ci="boot", nsim=1000, seed=1)
#'
#' model0 = "
#' Ozone ~ a1*Solar.R
#' Wind ~ a2*Solar.R + d12*Ozone
#' Temp ~ c.*Solar.R + b1*Ozone + b2*Wind
#' Indirect_All := a1*b1 + a2*b2 + a1*d12*b2
#' Ind_X_M1_Y := a1*b1
#' Ind_X_M2_Y := a2*b2
#' Ind_X_M1_M2_Y := a1*d12*b2
#' Direct := c.
#' Total := c. + a1*b1 + a2*b2 + a1*d12*b2
#' "
#' lv0 = lavaan::sem(model=model0, data=airquality)
#' lavaan::summary(lv0, fit.measure=TRUE, ci=TRUE, nd=3)  # raw output
#' lavaan_summary(lv0)
#' # lavaan_summary(lv0, ci="boot", nsim=1000, seed=1)
#'
#' model1 = "
#' Ozone ~ a1*Solar.R
#' Wind ~ d12*Ozone
#' Temp ~ c.*Solar.R + b1*Ozone + b2*Wind
#' Indirect_All := a1*b1 + a1*d12*b2
#' Ind_X_M1_Y := a1*b1
#' Ind_X_M1_M2_Y := a1*d12*b2
#' Direct := c.
#' Total := c. + a1*b1 + a1*d12*b2
#' "
#' lv1 = lavaan::sem(model=model1, data=airquality)
#' lavaan::summary(lv1, fit.measure=TRUE, ci=TRUE, nd=3)  # raw output
#' lavaan_summary(lv1)
#' # lavaan_summary(lv1, ci="boot", nsim=1000, seed=1)
#' }
#' @export
lavaan_summary = function(
    lavaan,
    ci=c("raw", "boot", "bc.boot", "bca.boot"),
    nsim=100,
    seed=NULL,
    digits=3,
    print=TRUE,
    covariance=FALSE,
    file=NULL
) {
  FIT = lavaan::fitMeasures(lavaan)

  try({
    pe.error = TRUE
    pe = lavaan::parameterEstimates(lavaan, standardized=TRUE)
    pe.error = FALSE
  }, silent=TRUE)
  if(pe.error) {
    pe = lavaan::parameterEstimates(lavaan, standardized=FALSE)
    pe$std.all = NA
  }
  if("label" %notin% names(pe)) pe$label = ""

  if(length(ci)>1) ci = "raw"
  CI = switch(
    ci,
    "raw"="Raw (Standard)",
    "boot"="Percentile Bootstrap",
    "bc.boot"="Bias-Corrected Percentile Bootstrap",
    "bca.boot"="Bias-Corrected and Accelerated (BCa) Percentile Bootstrap")
  if(ci!="raw") {
    set.seed(seed)
    lv.boot = lavaan::bootstrapLavaan(
      lavaan, type="nonparametric",
      FUN=function(...) { lavaan::coef(..., type="user") },
      R=nsim)
    lv.boot = as.data.frame(lv.boot)
    pe$se = apply(lv.boot, 2, sd)
    bootci = apply(lv.boot, 2, boot_ci, type=ci)
    pe$ci.lower = bootci[1,]
    pe$ci.upper = bootci[2,]
  }

  extract_lavaan_table = function(lav.df, op, ci.raw) {
    RES = data.frame(Estimate=lav.df$est,
                     S.E.=lav.df$se)
    RES$z = RES$Estimate / RES$S.E.
    RES$pval = p.z(RES$z)
    if(ci.raw) {
      RES$LLCI = lav.df$ci.lower
      RES$ULCI = lav.df$ci.upper
    } else {
      RES$BootLLCI = lav.df$ci.lower
      RES$BootULCI = lav.df$ci.upper
    }
    RES$Beta = lav.df$std.all
    if(nrow(RES)>0) {
      if(op==":=") {
        row.names(RES) = paste(" ", lav.df$label)
      } else {
        row.names(RES) = paste(
          " ", lav.df$lhs, op, lav.df$rhs,
          str_replace_all("(" %^% lav.df$label %^% ")", "\\(\\)", ""))
      }
    }
    return(RES)
  }

  MES = extract_lavaan_table(pe[pe$op=="=~",], "=~", ci=="raw")
  REG = extract_lavaan_table(pe[pe$op=="~",], "<-", ci=="raw")
  COV = extract_lavaan_table(pe[pe$op=="~~",], "~~", ci=="raw")
  EFF = extract_lavaan_table(pe[pe$op==":=",], ":=", ci=="raw")

  ALL = data.frame()
  if(ci=="raw")
    NUL = data.frame(Estimate=NA, S.E.=NA, z=NA, pval=NA, LLCI=NA, ULCI=NA, Beta=NA)
  else
    NUL = data.frame(Estimate=NA, S.E.=NA, z=NA, pval=NA, BootLLCI=NA, BootULCI=NA, Beta=NA)
  if(nrow(MES)>0) {
    row.names(NUL) = "Latent Variables:"
    ALL = rbind(ALL, NUL, MES)
  }
  if(nrow(REG)>0) {
    row.names(NUL) = "Regression Paths:"
    ALL = rbind(ALL, NUL, REG)
  }
  if(nrow(COV)>0 & covariance) {
    row.names(NUL) = "(Co)variances:"
    ALL = rbind(ALL, NUL, COV)
  }
  if(nrow(EFF)>0) {
    row.names(NUL) = "Defined Effects:"
    ALL = rbind(ALL, NUL, EFF)
  }

  if(print) {
    cat("\n")
    Print("
    <<cyan Fit Measures (lavaan):>>
    {p(chi2=FIT['chisq'], df=FIT['df'], n=FIT['ntotal'], digits=digits)}
    \u03c7\u00b2/<<italic df>> = {FIT['chisq']/FIT['df']:.{digits}}{ifelse(FIT['df']==0, ' <<red (saturated model)>>', '')}
    AIC = {FIT['aic']:.{digits}} <<white (Akaike Information Criterion)>>
    BIC = {FIT['bic']:.{digits}} <<white (Bayesian Information Criterion)>>
    CFI = {FIT['cfi']:.{digits}} <<white (Comparative Fit Index)>>
    TLI = {FIT['tli']:.{digits}} <<white (Tucker-Lewis Index; Non-Normed Fit Index, NNFI)>>
    NFI = {FIT['nfi']:.{digits}} <<white (Normed Fit Index)>>
    IFI = {FIT['ifi']:.{digits}} <<white (Incremental Fit Index)>>
    GFI = {FIT['gfi']:.{digits}} <<white (Goodness-of-Fit Index)>>
    AGFI = {FIT['agfi']:.{digits}} <<white (Adjusted Goodness-of-Fit Index)>>
    RMSEA = {FIT['rmsea']:.{digits}}, 90% CI [{FIT['rmsea.ci.lower']:.{digits}}, {FIT['rmsea.ci.upper']:.{digits}}] <<white (Root Mean Square Error of Approximation)>>
    SRMR = {FIT['srmr']:.{digits}} <<white (Standardized Root Mean Square Residual)>>
    ")
    cat("\n")
    print_table(ALL, row.names=TRUE, digits=digits,
                title="<<cyan Model Estimates (lavaan):>>",
                note=Glue("<<italic Note>>. {CI} Confidence Interval (CI) and SE."))
    cat("\n")
  }

  if(!is.null(file)) {
    print_table(
      ALL, digits=digits, row.names=TRUE,
      title="<b>Table. Model Estimates.</b>",
      note="<i>Note</i>. * <i>p</i> < .05. ** <i>p</i> < .01. *** <i>p</i> < .001.",
      file=file,
      file.align.text=c(
        "left", "right", "right", "right", "right", "left", "right", "right", "right"
      ))
  }

  invisible(list(fit=FIT,
                 measure=MES,
                 regression=REG,
                 covariance=COV,
                 effect=EFF))
}


## Model 4 and 6
process_lav = function(
    data, y, x, meds, covs, clusters,
    med.type, cov.path,
    ci, nsim, seed,
    digits=3,
    file=NULL,
    print=TRUE
) {
  if(length(clusters)>=1)
    stop("Multilevel serial mediation is not supported currently.", call.=TRUE)

  if(length(clusters)==0)
    clusters = NULL
  if(length(clusters)>1)
    stop("As limited by the \"lavaan\" package, only one cluster is allowed.", call.=TRUE)

  CI = switch(
    ci,
    "mcmc"="<<red Reset to:>> Percentile Bootstrap",
    "boot"="Percentile Bootstrap",
    "bc.boot"="Bias-Corrected Percentile Bootstrap",
    "bca.boot"="Bias-Corrected and Accelerated (BCa) Percentile Bootstrap")

  # ?lavaan::`lavaan-class`

  model = lav_med_modeler(y, x, meds, covs, med.type, cov.path)
  # cat(model)

  lv = lavaan::sem(model=model, data=data, cluster=clusters)
  # lavaan::summary(lv, header=FALSE,
  #                 fit.measure=TRUE,
  #                 ci=TRUE,
  #                 standardized=FALSE,
  #                 rsquare=FALSE, nd=3)
  # lvb = lavaan::sem(model=model, data=data, se="bootstrap", bootstrap=100)
  # lavaan::fitMeasures(lv)

  cat(crayon::white("Running", nsim, "simulations (lavaan model)...\n"))

  MED = lavaan_summary(lv, ci=ci, nsim=nsim, seed=seed, print=FALSE)$effect

  # cat("\015")
  # cat(rep_char(" ", 30))
  # cat("\015")

  if(print) {
    MED.print = MED
    MED.print$Beta = NULL
    MED.print$CI = cc_ci(MED[["BootLLCI"]], MED[["BootULCI"]], digits)
    names(MED.print)[length(MED.print)] = "[Boot 95% CI]"
    MED.print$Beta = MED$Beta
    print_table(
      dplyr::select(MED.print, !c("BootLLCI", "BootULCI")),
      row.names=TRUE,
      digits=digits,
      line=is.null(file),
      file=file,
      title=Glue("
      <<blue <<underline LAVAAN Syntax:>>>>\n<<italic {model}>>"),
      note=Glue("
      {CI} Confidence Interval
      <<white (SE and CI are estimated based on {nsim} Bootstrap samples.)>>"))
    cat("\n")
  }

  return(list(lavaan.syntax=model,
              lavaan.mediation=MED))
}


extract_med = function(med, ci, digits=3, direct=TRUE, total=TRUE) {
  boot = ifelse(grepl("boot", ci), TRUE, FALSE)
  # MED = rbind(
  #   c(med$d.avg, sd(med$d.avg.sims), med$d.avg.ci, med$d.avg.p),
  #   c(med$z.avg, sd(med$z.avg.sims), med$z.avg.ci, med$z.avg.p),
  #   c(med$tau.coef, sd(med$tau.sims), med$tau.ci, med$tau.p))
  MED = rbind(
    c(med$d.avg, sd(med$d.avg.sims),
      boot_ci(med$d.avg.sims, ci, med$d.avg),
      med$d.avg.p),
    c(med$z.avg, sd(med$z.avg.sims),
      boot_ci(med$z.avg.sims, ci, med$z.avg),
      med$z.avg.p),
    c(med$tau.coef, sd(med$tau.sims),
      boot_ci(med$tau.sims, ci, med$tau.coef),
      med$tau.p))
  MED = as.data.frame(MED)
  row.names(MED) = c("Indirect (ab)", "Direct (c')", "Total (c)")
  names(MED) = c("Effect", "S.E.", "LLCI", "ULCI", "p")
  MED$z = MED$Effect / MED$S.E.
  MED$pval = p.z(MED$z)
  MED$CI = cc_ci(MED[[3]], MED[[4]], digits)
  names(MED)[8] = ifelse(boot, "[Boot 95% CI]", "[MCMC 95% CI]")
  if(direct==FALSE) total = FALSE
  if(direct==FALSE) MED = MED[which(row.names(MED)!="Direct (c')"),]
  if(total==FALSE) MED = MED[which(row.names(MED)!="Total (c)"),]
  return(MED)
}


process_med = function(
    model.m,
    model.y,
    x, y,
    medi,
    conditional=NULL,  # process_mod => RES0
    simple.slopes=NULL,  # process_mod => RES
    ci,
    nsim=100,
    seed=1,
    direct=TRUE,
    total=TRUE,
    eff.tag="",
    digits=3,
    file=NULL,
    print=TRUE
) {
  if(inherits(model.m, "lmerModLmerTest")) class(model.m) = "lmerMod"
  if(inherits(model.y, "lmerModLmerTest")) class(model.y) = "lmerMod"
  boot = ifelse(grepl("boot", ci), TRUE, FALSE)
  CI = switch(
    ci,
    "mcmc"="Monte Carlo (Quasi-Bayesian)",
    "boot"="Percentile Bootstrap",
    "bc.boot"="Bias-Corrected Percentile Bootstrap",
    "bca.boot"="Bias-Corrected and Accelerated (BCa) Percentile Bootstrap")
  suppressMessages({
    if(is.null(conditional)) {
      # simple mediation
      cat(crayon::white("Running", nsim, "simulations...\n"))
      set.seed(seed)
      med = mediation::mediate(
        model.m=model.m,
        model.y=model.y,
        treat=x,
        mediator=medi,
        boot=boot,
        boot.ci.type="perc",  # "bca", "perc"
        sims=nsim)
      MED = extract_med(med, ci, digits, direct, total)
    } else {
      # moderated mediation
      COV.list = list()
      MED = data.frame()
      # DRC = data.frame()
      simple.slopes = simple.slopes[1:ncol(conditional)]
      for(i in 1:nrow(conditional)) {
        cond.list = list()
        for(j in 1:ncol(conditional)) {
          a = conditional[i, j]
          names(a) = names(conditional)[j]
          cond.list = c(cond.list, a)
        }
        COV.list = c(COV.list, list(cond.list))
      }
      cat(crayon::white("Running", nsim, "*", length(COV.list), "simulations...\n"))
      for(COV in COV.list) {
        set.seed(seed)
        med = mediation::mediate(
          model.m=model.m,
          model.y=model.y,
          treat=x,
          mediator=medi,
          covariates=COV,
          boot=boot,
          boot.ci.type="perc",  # "bca", "perc"
          sims=nsim)
        MEDi = extract_med(med, ci, digits, direct=FALSE, total=FALSE)
        # MEDi = cbind(data.frame(Path=row.names(MEDi)), MEDi)
        # names(MEDi)[1] = " "
        row.names(MEDi) = NULL
        MED = rbind(MED, MEDi)
        # MED = rbind(MED, MEDi[1,])
        # DRC = rbind(DRC, MEDi[2,])
      }
      # MED = cbind(rbind(simple.slopes, simple.slopes),
      #             rbind(MED, DRC))
      MED = cbind(simple.slopes, MED)
    }
  })

  # cat("\015")
  # cat(rep_char(" ", 30))
  # cat("\015")

  if(print) {
    if(eff.tag!="") eff.tag = "\n" %^% eff.tag
    print_table(
      dplyr::select(MED, !c("LLCI", "ULCI", "p")),
      row.names=is.null(conditional),
      digits=digits,
      line=is.null(file),
      title=Glue("
      <<blue <<underline Indirect Path:>> \"{x}\" (X) ==> \"{medi}\" (M) ==> \"{y}\" (Y)>>{eff.tag}"),
      note=Glue("
      {CI} Confidence Interval
      <<white ({ifelse(boot, 'SE', 'Effect, SE,')} and CI are estimated based on {nsim} {ifelse(boot, 'Bootstrap samples', 'Monte Carlo samples')}.)>>"))
    cat("\n")
  }

  invisible(list(mediation=MED))
}


process_mod = function(
    model0,
    model,
    data.c,
    x, y,
    mod1,
    mod2=NULL,
    mod1.val=NULL,
    mod2.val=NULL,
    mod.type=c("2-way", "3-way"),
    x.label="X",
    y.label="Y",
    eff.tag="",
    digits=3,
    file=NULL,
    print=TRUE
) {
  data.c = data.c
  suppressWarnings({
    simple.slopes = interactions::sim_slopes(
      model=model0,
      pred=!!x,
      modx=!!mod1,
      mod2=!!mod2,
      modx.values=mod1.val,
      mod2.values=mod2.val,
      johnson_neyman=TRUE)
  })
  mod1 = attributes(simple.slopes)[["modx"]]
  mod2 = attributes(simple.slopes)[["mod2"]]
  mod1.vals = attributes(simple.slopes)[["modx.values"]]
  mod2.vals = attributes(simple.slopes)[["mod2.values"]]
  res.sl = simple.slopes[["slopes"]]
  res.jn = simple.slopes[["jn"]]
  if(inherits(res.sl, "data.frame"))
    res.sl = list(res.sl)

  # MOD = coefficients(summary(model))
  # MOD = as.data.frame(MOD)[which(grepl(":", row.names(MOD))),]
  # row.names(MOD) = str_replace_all(row.names(MOD), ":", " * ")
  MOD = interaction_test(model, data=data.c, data.name="data.c")
  if(nrow(MOD)==2)
    if(row.names(MOD)[2]=="(All Interactions)")
      MOD = MOD[1,]

  RES = data.frame()
  RES0 = data.frame()
  for(i in 1:length(res.sl)) {
    res = res.sl[i]
    mod2.val = mod2.vals[i]
    names(mod2.val) = NULL
    if(!inherits(res, "data.frame"))
      res = as.data.frame(res) %>%
        mutate(across(c(-1), as.numeric))
    names(res)[2:3] = c("Effect", "S.E.")
    names(res)[4:5] = c("LLCI", "ULCI")
    names(res)[6] = str_sub(names(res)[6], 1, 1)
    names(res)[7] = "pval"
    if(is.null(mod2)) {
      res0 = res = cbind(data.frame(Mod1=mod1.vals), res[-1])
      if(c("- 1 SD", "Mean", "+ 1 SD") %allin% names(mod1.vals))
        res[[1]] = str_trim(formatF(res[[1]], digits)) %^%
          c(" (- SD)", " (Mean)", " (+ SD)")
      names(res0)[1] = mod1
      names(res)[1] = "\"" %^% mod1 %^% "\""
    } else {
      res0 = res = cbind(data.frame(Mod2=mod2.val), res)
      if(c("- 1 SD", "Mean", "+ 1 SD") %allin% names(mod2.vals))
        res[[1]] = str_trim(formatF(res[[1]], digits)) %^%
          c(" (- SD)", " (Mean)", " (+ SD)")[i]
      if(c("- 1 SD", "Mean", "+ 1 SD") %allin% names(mod1.vals))
        res[[2]] = str_trim(formatF(res[[2]], digits)) %^%
          c(" (- SD)", " (Mean)", " (+ SD)")
      names(res0)[1] = mod2
      names(res0)[2] = mod1
      names(res)[1] = "\"" %^% mod2 %^% "\""
      names(res)[2] = "\"" %^% mod1 %^% "\""
    }
    row.names(res) = row.names(res0) = NULL
    RES = rbind(RES, res)
    RES0 = rbind(RES0, res0)
  }
  RES$`[95% CI]` = cc_ci(RES[["LLCI"]], RES[["ULCI"]], digits)
  RES[[1]] = format(str_trim(formatF(RES[[1]], digits)),
                    width=nchar(names(RES)[1]))
  names(RES)[1] = format(names(RES)[1], width=max(nchar(RES[[1]])))
  if(!is.null(mod2)) {
    RES[[2]] = format(str_trim(formatF(RES[[2]], digits)),
                      width=nchar(names(RES)[2]))
    names(RES)[2] = format(names(RES)[2], width=max(nchar(RES[[2]])))
  }

  if(!is.null(mod2)) {
    term = row.names(MOD)[1]
    MOD.MOD = do.call(rbind, lapply(
      simple.slopes[["mods"]],
      function(model) {
        dt = model.frame(model)
        coef = interaction_test(model, data=dt, data.name="dt")
        coef = coef[term,]
        coef = cbind(data.frame(Interaction=row.names(coef)), coef)
        row.names(coef) = NULL
        return(coef)
      }))
    MOD.MOD = cbind(RES[1], MOD.MOD)
  } else {
    MOD.MOD = NULL
  }

  if(is.null(mod2))
    orders = order(RES0[[1]])
  else
    orders = order(RES0[[1]], RES0[[2]])
  RES0 = RES0[orders,]
  RES = RES[orders,]
  if(!is.null(mod2))
    MOD.MOD = MOD.MOD[orders,][!duplicated(MOD.MOD[[1]]),]

  if(print) {
    if(eff.tag!="") eff.tag="\n" %^% eff.tag
    print_table(
      MOD, row.names=TRUE, digits=c(2, 0, 0, 0),
      line=is.null(file),
      title=Glue("Interaction Effect{ifelse(is.null(mod2), '', 's')} on \"{y}\" ({y.label})"))
    cat("\n")
    if(!is.null(mod2) & mod.type=="3-way") {
      print_table(
        MOD.MOD, row.names=FALSE, digits=c(0, 0, 2, 0, 0, 0),
        line=is.null(file),
        title=Glue("Conditional Interaction Effects on \"{y}\" ({y.label})"))
      cat("\n")
    }
    print_table(
      dplyr::select(RES, !c("LLCI", "ULCI")),
      row.names=FALSE, digits=digits,
      line=is.null(file),
      title=Glue("<<green <<underline Simple Slopes:>> \"{x}\" ({x.label}) ==> \"{y}\" ({y.label})>>{eff.tag}"))
    cat("\n")
  }

  invisible(list(mod=MOD,
                 mod.mod=MOD.MOD,
                 conditional=RES0[1:(ncol(RES0)-6)],
                 simple.slopes=RES,
                 jn=res.jn))
}


# interactions:::print.johnson_neyman
# print.johnson_neyman = function(x, raw=FALSE, ...) {
#   if(raw) {
#     interactions:::print.johnson_neyman(x, ...)
#   } else {
#     atts = attributes(x)
#     if(atts$inside==FALSE)
#       inout = crayon::inverse("OUTSIDE")
#     else
#       inout = crayon::inverse("INSIDE")
#     b_format = jtools::num_print(x$bounds, atts$digits)
#     m_range = jtools::num_print(atts$modrange, atts$digits)
#     alpha = gsub("0\\.", "\\.", as.character(atts$alpha))
#     pmsg = paste("p <", alpha)
#     cat("\nJohnson-Neyman Internal:\n\n")
#     if(all(is.finite(x$bounds))) {
#       jtools::cat_wrap(
#         "When ", atts$modx, " is ", inout,
#         " the interval [", b_format[1], ", ",
#         b_format[2], "], the slope of ", atts$pred,
#         " is ", pmsg, ".", brk="\n\n")
#       jtools::cat_wrap(
#         crayon::italic("Note: The range of observed values of",
#                        atts$modx, "is "), "[", m_range[1], ", ",
#         m_range[2], "].", brk="\n\n")
#     } else {
#       jtools::cat_wrap("The Johnson-Neyman interval could not be found.\n        Is the p value for your interaction term below\n        the specified alpha?",
#                        brk="\n\n")
#     }
#     if(atts$control.fdr==TRUE) {
#       cat("Interval calculated using false discovery rate adjusted",
#           "t =", num_print(x$t_value, atts$digits), "\n\n")
#     }
#     if(atts$plot==TRUE) {
#       print(x$plot)
#     }
#   }
# }




#### Indirect Effect: Model-Based (using the "mediation" package) ####


#' Tidy report of mediation analysis.
#'
#' @description
#' Tidy report of mediation analysis,
#' which is performed using the \code{\link[mediation]{mediation}} package.
#'
#' @param model Mediation model built using \code{\link[mediation:mediate]{mediation::mediate()}}.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param file File name of MS Word (\code{.doc}).
## @param print.avg Just set as \code{TRUE} for a concise output.
## For details, see the "Value" section in \code{\link[mediation:mediate]{mediation::mediate()}}.
#'
#' @return Invisibly return a data frame containing the results.
#'
#' @seealso
#' \code{\link{PROCESS}}
#'
#' @examples
#' \dontrun{
#'
#' library(mediation)
#' # ?mediation::mediate
#'
#' ## Example 1: OLS Regression
#' ## Bias-corrected and accelerated (BCa) bootstrap confidence intervals
#'
#' ## Hypothesis: Solar radiation -> Ozone -> Daily temperature
#' lm.m = lm(Ozone ~ Solar.R + Month + Wind, data=airquality)
#' lm.y = lm(Temp ~ Ozone + Solar.R + Month + Wind, data=airquality)
#' set.seed(123)  # set a random seed for reproduction
#' med = mediate(lm.m, lm.y,
#'             treat="Solar.R", mediator="Ozone",
#'             sims=1000, boot=TRUE, boot.ci.type="bca")
#' med_summary(med)
#'
#' ## Example 2: Multilevel Linear Model (Linear Mixed Model)
#' ## (models must be fit using "lme4::lmer" rather than "lmerTest::lmer")
#' ## Monte Carlo simulation (quasi-Bayesian approximation)
#' ## (bootstrap method is not applicable to "lmer" models)
#'
#' ## Hypothesis: Crips -> Sweetness -> Preference (for carrots)
#' data = lmerTest::carrots  # long-format data
#' data = na.omit(data)  # omit missing values
#' lmm.m = lme4::lmer(Sweetness ~ Crisp + Gender + Age + (1 | Consumer), data=data)
#' lmm.y = lme4::lmer(Preference ~ Sweetness + Crisp + Gender + Age + (1 | Consumer), data=data)
#' set.seed(123)  # set a random seed for reproduction
#' med.lmm = mediate(lmm.m, lmm.y,
#'                   treat="Crisp", mediator="Sweetness",
#'                   sims=1000)
#' med_summary(med.lmm)
#' }
#'
#' @export
med_summary = function(model, digits=3, file=NULL) {
  # for raw function, see:
  # edit(mediation::mediate)
  # edit(mediation:::print.summary.mediate)
  # edit(mediation:::pval)
  x = model
  clp = 100 * x$conf.level
  if(x$boot) {
    ci.type = sprintf("%s Bootstrap Confidence Interval",
                      ifelse(x$boot.ci.type=="perc",
                             "Percentile",
                             "Bias-Corrected and Accelerated (BCa)"))
  } else {
    ci.type = sprintf("%s Confidence Interval",
                      ifelse(inherits(x, "mediate.tsls"),
                             "Two-Stage Least Squares",
                             "Monte Carlo (Quasi-Bayesian)"))
  }

  cat("\n")
  Print("Mediation Analysis:")
  X = x[["treat"]]
  M = x[["mediator"]]
  Y = names(model.frame(x[["model.y"]]))[1]
  Print("{X} ==> {M} ==> {Y}")

  if(!is.null(x$covariates)) {
    cat("\n")
    Print("Conditional on ...")
    conditional = data.frame(Value=unlist(x$covariates))
    conditional$Value = paste("=", conditional$Value)
    print(conditional)
    cat("\n")
  }

  smat = rbind(
    c(x$d.avg, sd(x$d.avg.sims), x$d.avg.ci, x$d.avg.p),
    c(x$z.avg, sd(x$z.avg.sims), x$z.avg.ci, x$z.avg.p),
    c(x$tau.coef, sd(x$tau.sims), x$tau.ci, x$tau.p))
  smat = as.data.frame(smat)
  row.names(smat) = c("Indirect", "Direct", "Total")
  smat$CI = cc_ci(smat[[3]], smat[[4]], digits)
  names(smat) = c("Effect",
                  "S.E.",
                  ifelse(x$boot, "Boot LLCI", "LLCI"),
                  ifelse(x$boot, "Boot ULCI", "ULCI"),
                  "pval",
                  ifelse(x$boot, "[Boot 95% CI]", "[MCMC 95% CI]"))
  print_table(smat[c(1, 2, 6, 5)], digits=digits)
  Print(ci.type)
  Print("Sample Size: {x$nobs}")
  Print("Simulations: {x$sims} ({ifelse(x$boot, 'Bootstrap', 'Monte Carlo')})")
  cat("\n")

  if(!is.null(file)) {
    smat.new = smat
    smat.new.names = names(smat.new)
    smat.new.names[5] = "<i>p</i>"
    smat.new[[6]] = str_replace_all(smat.new[[6]], "-", "\u2013")
    print_table(
      smat.new[c(1, 2, 6, 5)], digits=digits,
      col.names=c(smat.new.names[c(1, 2, 6, 5)], " "),
      file=file,
      file.align.head=c("left", "right", "right", "right", "right", "left"),
      file.align.text=c("left", "right", "right", "right", "right", "left"),
      title=paste0(
        "<b>",
        ifelse(is.null(x$covariates),
               "Mediation Analysis",
               "Mediation Analysis (Conditional)"),
        "</b></p>\n<p>",
        "Model Hypothesis:</p>\n<p>",
        X, " \u2192 ", M, " \u2192 ", Y
      ),
      note=paste0(
        ci.type, "</p>\n<p>",
        "Conf. Level: ", clp, "%</p>\n<p>",
        "Sample Size: ", x$nobs, "</p>\n<p>",
        "Simulations: ", x$sims, ifelse(x$boot, " (Bootstrap)", " (Monte Carlo)")
      ))
  }

  invisible(smat)
}




#### Time-Series Analyses ####


#' Cross-correlation analysis.
#'
#' @description
#' Plot the results of cross-correlation analysis using \code{ggplot2}
#' (rather than R base plot) for more flexible modification of the plot.
#'
#' @details
#' Significant correlations with \emph{negative time lags} suggest
#' shifts in a predictor \emph{precede} shifts in an outcome.
#'
#' @param formula Model formula like \code{y ~ x}.
#' @param data Data frame.
#' @param lag.max Maximum time lag. Defaults to \code{30}.
#' @param sig.level Significance level. Defaults to \code{0.05}.
#' @param xbreaks X-axis breaks.
#' @param ybreaks Y-axis breaks.
#' @param ylim Y-axis limits. Defaults to \code{NULL} to automatically estimate.
#' @param alpha.ns Color transparency (opacity: 0~1) for non-significant values.
#' Defaults to \code{1} for no transparency (i.e., opaque color).
#' @param pos.color Color for positive values. Defaults to \code{"black"}.
#' @param neg.color Color for negative values. Defaults to \code{"black"}.
#' @param ci.color Color for upper and lower bounds of significant values.
#' Defaults to \code{"blue"}.
#' @param title Plot title. Defaults to an illustration of the formula.
#' @param subtitle Plot subtitle.
#' @param xlab X-axis title. Defaults to \code{"Lag"}.
#' @param ylab Y-axis title. Defaults to \code{"Cross-Correlation"}.
#'
#' @return
#' A \code{gg} object, which you can further modify using
#' \code{ggplot2} syntax and save using \code{ggsave()}.
#'
#' @examples
#' # resemble the default plot output by `ccf()`
#' p1 = ccf_plot(chicken ~ egg, data=lmtest::ChickEgg)
#' p1
#'
#' # a more colorful plot
#' p2 = ccf_plot(chicken ~ egg, data=lmtest::ChickEgg, alpha.ns=0.3,
#'               pos.color="#CD201F",
#'               neg.color="#21759B",
#'               ci.color="black")
#' p2
#'
#' @seealso \code{\link{granger_test}}
#'
#' @export
ccf_plot = function(formula, data,
                    lag.max=30, sig.level=0.05,
                    xbreaks=seq(-100, 100, 10),
                    ybreaks=seq(-1, 1, 0.2),
                    ylim=NULL, alpha.ns=1,
                    pos.color="black",
                    neg.color="black",
                    ci.color="blue",
                    title=NULL, subtitle=NULL,
                    xlab="Lag", ylab="Cross-Correlation") {
  lag = acf = direc = sig = NULL
  pos.color1 = see::social_colors("red")
  neg.color1 = see::social_colors("blue grey")

  x = as.character(formula)[3]
  y = as.character(formula)[2]
  data = as.data.frame(data)
  if(is.null(title)) title = Glue("{x} \u2192 {y}")

  cc = stats::ccf(x=data[[x]], y=data[[y]], lag.max=lag.max, plot=FALSE, na.action=na.omit)
  ccdata = with(cc, data.frame(lag, acf))
  n = cc$n.used
  rsig = psych::t2r(qt(sig.level/2, n, lower.tail=F), n-2)
  ccdata$sig = as.factor(ifelse(abs(ccdata$acf)<rsig, 0, 1))
  ccdata$direc = as.factor(ifelse(ccdata$acf<0, 0, 1))

  p = ggplot(ccdata, aes(x=lag, y=acf, color=direc, alpha=sig)) +
    geom_segment(aes(xend=lag, yend=0), show.legend=FALSE) +
    geom_hline(aes(yintercept=0), linetype=1, color="black") +
    geom_hline(aes(yintercept=rsig), linetype=2, color=ci.color) +
    geom_hline(aes(yintercept=-rsig), linetype=2, color=ci.color) +
    scale_x_continuous(breaks=xbreaks) +
    scale_y_continuous(limits=ylim, breaks=ybreaks) +
    scale_color_manual(values=c(as.character(neg.color),
                                as.character(pos.color))) +
    scale_alpha_manual(values=c(alpha.ns, 1)) +
    labs(x=xlab, y=ylab, title=title, subtitle=subtitle) +
    theme_bruce()

  return(p)
}


#' Granger causality test (bivariate).
#'
#' @description
#' Granger test of predictive causality (between two time series)
#' using the \code{\link[lmtest:grangertest]{lmtest::grangertest()}} function.
#'
#' @details
#' Granger causality test examines whether
#' the lagged values of a predictor
#' have an incremental role in predicting (i.e., help to predict)
#' an outcome when controlling for the lagged values of the outcome.
#'
#' Granger causality does not necessarily constitute a true causal effect.
#'
#' @inheritParams ccf_plot
#' @param lags Time lags. Defaults to \code{1:5}.
#' @param test.reverse Whether to test reverse causality. Defaults to \code{TRUE}.
#' @param file File name of MS Word (\code{.doc}).
#' @param ... Further arguments passed to \code{\link[lmtest:grangertest]{lmtest::grangertest()}}.
#' For example, you may use \emph{robust} standard errors by specifying
#' the \code{vcov} argument (see \href{https://github.com/psychbruce/bruceR/issues/23}{GitHub Issue #23}).
#'
#' @return A data frame of results.
#'
#' @examples
#' granger_test(chicken ~ egg, data=lmtest::ChickEgg)
#' granger_test(chicken ~ egg, data=lmtest::ChickEgg, lags=1:10, file="Granger.doc")
#' unlink("Granger.doc")  # delete file for code check
#'
#' @seealso
#' \code{\link{ccf_plot}},
#' \code{\link{granger_causality}}
#'
#' @export
granger_test = function(formula, data, lags=1:5,
                        test.reverse=TRUE,
                        file=NULL, ...) {
  installed("lmtest")
  res = data.frame(Lag=lags, D1="", D2="", D12="")
  names(res)[2:4] = c("Hypothesized Direction",
                      "Reverse Direction",
                      "Hypothesized (vs. Reverse)")

  if(test.reverse) {
    formula.rev = as.formula(paste(formula[3], formula[1], formula[2]))
    formulas = list(formula, formula.rev)
  } else {
    formulas = list(formula)
  }

  Print("
  \n
  <<cyan Granger Causality Test (Bivariate)>>

  Hypothesized direction:
  <<blue {formula[2]} ~ {formula[2]}[1:Lags] + <<green {formula[3]}[1:Lags]>>>>
  ")

  for(f in formulas) {
    rev = FALSE
    if(test.reverse & f!=formulas[[1]]) {
      rev = TRUE
      Print("
      \n
      Reverse direction:
      <<blue {formula[3]} ~ {formula[3]}[1:Lags] + <<green {formula[2]}[1:Lags]>>>>
      ")
    }
    for(lag in lags) {
      gt = lmtest::grangertest(formula=f, data=data, order=lag, na.action=na.omit, ...)
      Fval = gt[2, "F"]
      df1 = -gt[2, "Df"]
      df2 = gt[1, "Res.Df"]
      sig = str_trim(sig.trans(p.f(Fval, df1, df2)))
      result = bruceR::p(f=Fval, df1=df1, df2=df2)
      result.simple = formatF(Fval, 2) %^% ifelse(sig=="", "", "<sup>" %^% sig %^% "</sup>")
      Print("Lags = {lag}:\t{result}")
      res[which(res$Lag==lag), ifelse(rev, 3, 2)] = p.plain(f=Fval, df1=df1, df2=df2)
      res[which(res$Lag==lag), 4] = ifelse(
        rev,
        res[[which(res$Lag==lag), 4]] %^% " (vs. " %^% result.simple %^% ")",
        result.simple)
    }
  }

  cat("\n")
  if(!is.null(file)) {
    RES = res
    RES[[2]] = str_replace(str_replace(
      RES[[2]], "p", "<i>p</i>"), "F", "<i>F</i>")
    RES[[3]] = str_replace(str_replace(
      RES[[3]], "p", "<i>p</i>"), "F", "<i>F</i>")
    if(test.reverse==FALSE) RES = RES[1:2]
    print_table(RES, row.names=FALSE, digits=0,
                file.align.head="left",
                file.align.text="left",
                title="<b>Table. Granger Causality Test (Bivariate).</b>",
                note="<i>Note</i>. * <i>p</i> < .05. ** <i>p</i> < .01. *** <i>p</i> < .001.",
                file=file)
  }

  invisible(res[1:3])
}


vargranger = function(varmodel, var.y, var.x) {
  vms = varmodel[["varresult"]]
  vars = names(vms)
  if(length(var.x)==1) {
    if(var.x=="ALL")
      dropped.var = paste(paste0("^", vars[which(vars!=var.y)]), collapse="|")
    else
      dropped.var = var.x
  } else {
    dropped.var = var.x = paste(var.x, collapse="|")
  }
  vm.raw = vms[[var.y]]
  vm = lm(vm.raw[["terms"]], data=vm.raw[["model"]])
  lags = names(vm[["coefficients"]])
  dropped.vars = lags[which(grepl(dropped.var, lags))]
  dropped = paste(dropped.vars, collapse=" - ")
  aov = anova(update(vm, as.formula(paste("~ . -", dropped))), vm)
  df1 = aov[2, "Df"]
  df2 = aov[2, "Res.Df"]
  chi2 = aov[2, "F"] * df1
  p.chisq = p.chi2(chi2, df1)
  data.frame(Equation=var.y,
             Excluded=var.x,
             `F`=aov[2, "F"],
             df1=df1,
             df2=df2,
             p.F=aov[2, "Pr(>F)"],
             sig.F=formatF(sig.trans(aov[2, "Pr(>F)"]), 0),
             Chisq=chi2,  # F = Chisq/k where k is the difference in degrees of freedom
             df=df1,
             p.Chisq=p.chisq,
             sig.Chisq=formatF(sig.trans(p.chisq), 0),
             Dropped=paste(dropped.vars, collapse=", "))
}


#' Granger causality test (multivariate).
#'
#' @description
#' Granger test of predictive causality (between multivariate time series)
#' based on vector autoregression (\code{\link[vars:VAR]{VAR}}) model.
#' Its output resembles the output of the \code{vargranger}
#' command in Stata (but here using an \emph{F} test).
#'
#' @details
#' Granger causality test (based on VAR model) examines whether
#' the lagged values of a predictor (or predictors)
#' help to predict an outcome when controlling for
#' the lagged values of the outcome itself.
#'
#' Granger causality does not necessarily constitute a true causal effect.
#'
#' @param varmodel VAR model fitted using the \code{\link[vars:VAR]{vars::VAR()}} function.
#' @param var.y,var.x [Optional] Defaults to \code{NULL} (all variables).
#' If specified, then perform tests for specific variables.
#' Values can be a single variable (e.g., \code{"X"}),
#' a vector of variables (e.g., \code{c("X1", "X2")}),
#' or a string containing regular expression (e.g., \code{"X1|X2"}).
#' @param test \emph{F} test and/or Wald \eqn{\chi}^2 test. Defaults to both: \code{c("F", "Chisq")}.
#' @param file File name of MS Word (\code{.doc}).
#' @param check.dropped Check dropped variables. Defaults to \code{FALSE}.
#'
#' @return A data frame of results.
#'
#' @seealso
#' \code{\link{ccf_plot}},
#' \code{\link{granger_test}}
#'
#' @examples
#' \donttest{# R package "vars" should be installed
#' library(vars)
#' data(Canada)
#' VARselect(Canada)
#' vm = VAR(Canada, p=3)
#' model_summary(vm)
#' granger_causality(vm)
#' }
#' @export
granger_causality = function(varmodel, var.y=NULL, var.x=NULL,
                             test=c("F", "Chisq"),
                             file=NULL,
                             check.dropped=FALSE) {
  vars = names(varmodel[["varresult"]])
  if(is.null(var.y)) var.y = vars
  if(is.null(var.x)) {
    res = data.frame(Equation=rep(var.y, each=length(vars)+1),
                     Excluded=c(vars, "ALL"))
    res = res[which(res$Equation!=res$Excluded), ]
  } else {
    if(length(var.x)==1)
      res = expand.grid(Equation=var.y,
                        Excluded=var.x)
    else
      res = expand.grid(
        Equation = var.y,
        Excluded = unique(c(
          var.x,
          paste(
            str_subset(var.x, "\\|", negate=TRUE),
            collapse="|")
        )))
    res$Equation = as.character(res$Equation)
    res$Excluded = as.character(res$Excluded)
    res = res[which(res$Excluded!=""), ]
  }

  res = do.call("rbind", lapply(1:nrow(res), function(i) {
    vargranger(varmodel, res[i, "Equation"], res[i, "Excluded"])
  }))

  res$Causality = res$Equation %^% " <= " %^% res$Excluded
  nchars = max(nchar(res$Causality))
  res.check = rbind(data.frame(Dropped="Dropped variables (lags)"),
                    res["Dropped"])
  res.check$Dropped = paste(" ", format(res.check$Dropped))
  names(res.check) = " "
  row.names(res.check) = c(" ", res$Causality)
  if(check.dropped) print(res.check)
  res$Dropped = NULL

  result = data.frame()
  for(var in var.y)
    result = rbind(result,
                   data.frame(
                     Equation=NA, Excluded=NA,
                     `F`=NA, df1=NA, df2=NA, p.F=NA, sig.F="",
                     `Chisq`=NA, df=NA, p.Chisq=NA, sig.Chisq="",
                     Causality=rep_char("-", nchars)),
                   res[which(res$Equation==var),])
  result$`F` = formatF(result$`F`, 2)
  result$`Chisq` = formatF(result$`Chisq`, 2)
  result$p.F = p.trans(result$p.F)
  result$p.Chisq = p.trans(result$p.Chisq)
  result$` ` = "  "
  names(result)[6] = "p"
  names(result)[7] = "  "
  names(result)[10] = " p"
  names(result)[11] = "   "
  names(result)[12] = "    "

  test.which = c()
  test.text = c()
  align.which = c("left")
  col.which = c("")
  if("F" %in% test) {
    test.which = c(test.which, 3:7)
    test.text = c(test.text, "<<italic F>> test")
    align.which = c(align.which, c("right", "right", "right", "right", "left"))
    col.which = c(col.which, c("<i>F</i>", "<i>df</i><sub>1</sub>", "<i>df</i><sub>2</sub>", "<i>p</i>", " "))
  }
  if("Chisq" %in% test) {
    test.which = c(test.which, 8:11)
    test.text = c(test.text, "Wald \u03c7\u00b2 test")
    align.which = c(align.which, c("right", "right", "right", "left"))
    col.which = c(col.which, c("\u03c7<sup>2</sup>", "<i>df</i>", " <i>p</i>", "  "))
  }

  cat("\n")
  Print("
  <<cyan Granger Causality Test (Multivariate)>>

  {paste(test.text, collapse=' and ')} based on VAR({varmodel$p}) model:
  ")
  print_table(result[c(12:13, test.which)],
              digits=0,
              row.names=FALSE)
  cat("\n")

  if(!is.null(file)) {
    result[[12]] = result[[12]] %>%
      str_replace_all("<=", "\u2190") %>%
      str_replace_all("^-+$", "")
    print_table(
      result[c(12, test.which)],
      digits=0,
      row.names=FALSE,
      col.names=col.which,
      title=paste0("<b>Table. Granger Causality Test (Multivariate) Based on VAR(", varmodel$p, ") Model.</b>"),
      note="<i>Note</i>. * <i>p</i> < .05. ** <i>p</i> < .01. *** <i>p</i> < .001.",
      file=file,
      file.align.text=align.which)
  }

  invisible(list(result=res, check.dropped=res.check))
}

Try the bruceR package in your browser

Any scripts or data that you put into this service are public.

bruceR documentation built on Sept. 27, 2023, 5:06 p.m.