R/bruceR-stats_4_regress.R
In bruceR: Broadly Useful Convenient and Efficient R Functions

Documented in formula_expand formula_paste GLM_summary grand_mean_center group_mean_center HLM_ICC_rWG HLM_summary model_summary regress

#### Regression ####


#' Paste a formula into a string.
#'
#' @param formula R formula.
#'
#' @return A character string indicating the formula.
#'
#' @examples
#' formula_paste(y ~ x)
#' formula_paste(y ~ x + (1 | g))
#'
#' @export
formula_paste = function(formula) {
  if(inherits(formula, c("formula", "call")))
    paste(formula[2], formula[1], formula[3], collapse=" ")
  else
    as.character(formula)
}


#' Expand all interaction terms in a formula.
#'
#' @param formula R formula or a character string indicating the formula.
#' @param as.char Return character? Defaults to \code{FALSE}.
#'
#' @return A formula/character object including all expanded terms.
#'
#' @examples
#' formula_expand(y ~ a*b*c)
#' formula_expand("y ~ a*b*c")
#'
#' @export
formula_expand = function(formula, as.char=FALSE) {
  inter_expand = function(inter) paste(attr(terms.formula(as.formula(paste("~", inter))), "term.labels"), collapse=" + ")
  f = as.character(as.formula(formula))
  fx = f[3]
  fx.R = str_extract_all(fx, "\\([^\\)]+\\)", simplify=T)
  if(length(fx.R)>0) for(i in 1:length(fx.R)) if(grepl("\\*", fx.R[i])) fx.R[i] = paste0("(", inter_expand(str_remove_all(fx.R[i], "\\(|\\|.*")), " ", str_extract(fx.R[i], "\\|.*"))
  fx.F = str_remove_all(fx, "[\\+ ]*\\([^\\)]+\\)[\\+ ]*")
  fx.F = ifelse(fx.F=="", "", inter_expand(fx.F))
  fx = paste(fx.F, paste(fx.R, collapse=" + "), sep=ifelse(length(fx.R)==0 | fx.F=="", "", " + "))
  f = as.formula(paste(f[2], f[1], fx))
  if(as.char) f = formula_paste(f)
  return(f)
}


#' Grand-mean centering.
#'
#' Compute grand-mean centered variables.
#' Usually used for GLM interaction-term predictors and HLM level-2 predictors.
#'
#' @param data Data object.
#' @param vars Variable(s) to be centered.
#' @param std Standardized or not. Defaults to \code{FALSE}.
#' @param add.suffix The suffix of the centered variable(s).
#' Defaults to \code{""}. You may set it to \code{"_c"}, \code{"_center"}, etc.
#'
#' @return A new data object containing the centered variable(s).
#'
#' @examples
#' d = data.table(a=1:5, b=6:10)
#'
#' d.c = grand_mean_center(d, "a")
#' d.c
#'
#' d.c = grand_mean_center(d, c("a", "b"), add.suffix="_center")
#' d.c
#'
#' @seealso \code{\link{group_mean_center}}
#'
#' @export
grand_mean_center = function(data, vars=names(data),
                             std=FALSE, add.suffix="") {
  data.c = as.data.frame(data)
  for(var in vars)
    if(inherits(data.c[[var]], c("numeric", "integer", "double", "logical")))
      data.c[paste0(var, add.suffix)] = as.numeric(scale(data.c[var], center=TRUE, scale=std))
  if(data.table::is.data.table(data))
    data.c = data.table::as.data.table(data.c)
  return(data.c)
}

#' Group-mean centering.
#'
#' Compute group-mean centered variables.
#' Usually used for HLM level-1 predictors.
#'
#' @inheritParams grand_mean_center
#' @param by Grouping variable.
#' @param add.group.mean The suffix of the variable name(s) of group means.
#' Defaults to \code{"_mean"} (see Examples).
#'
#' @return A new data object containing the centered variable(s).
#'
#' @examples
#' d = data.table(x=1:9, g=rep(1:3, each=3))
#'
#' d.c = group_mean_center(d, "x", by="g")
#' d.c
#'
#' d.c = group_mean_center(d, "x", by="g", add.suffix="_c")
#' d.c
#'
#' @seealso \code{\link{grand_mean_center}}
#'
#' @export
group_mean_center = function(data, vars=setdiff(names(data), by), by,
                             std=FALSE,
                             add.suffix="",
                             add.group.mean="_mean") {
  data.c = as.data.frame(data)
  grouplist = sort(unique(data.c[[by]]))
  for(var in vars) {
    for(group in grouplist) {
      if(inherits(data.c[[var]], c("numeric", "integer", "double", "logical"))) {
        dvar = data.c[which(data.c[by]==group), var]
        data.c[which(data.c[by]==group), paste0(var, add.group.mean)] = mean(dvar, na.rm=TRUE)
        data.c[which(data.c[by]==group), paste0(var, add.suffix)] = as.numeric(scale(dvar, center=TRUE, scale=std))
      }
    }
  }
  if(data.table::is.data.table(data))
    data.c = data.table::as.data.table(data.c)
  return(data.c)
}


#' Regression analysis.
#'
#' NOTE: \code{\link{model_summary}} is preferred.
#'
#' @inheritParams GLM_summary
#' @inheritParams HLM_summary
#' @param formula Model formula.
#' @param data Data frame.
#' @param family [Optional] The same as in \code{glm} and \code{glmer}
#' (e.g., \code{family=binomial} fits a logistic regression model).
#'
#' @return No return value.
#'
#' @examples
#' \dontrun{
#'
#'   ## lm
#'   regress(Temp ~ Month + Day + Wind + Solar.R, data=airquality, robust=TRUE)
#'
#'   ## glm
#'   regress(case ~ age + parity + education + spontaneous + induced,
#'           data=infert, family=binomial, robust="HC1", cluster="stratum")
#'
#'   ## lmer
#'   library(lmerTest)
#'   regress(Reaction ~ Days + (Days | Subject), data=sleepstudy)
#'   regress(Preference ~ Sweetness + Gender + Age + Frequency +
#'             (1 | Consumer), data=carrots)
#'
#'   ## glmer
#'   library(lmerTest)
#'   data.glmm = MASS::bacteria
#'   regress(y ~ trt + week + (1 | ID), data=data.glmm, family=binomial)
#'   regress(y ~ trt + week + hilo + (1 | ID), data=data.glmm, family=binomial)
#' }
#'
#' @seealso
#' \code{\link{print_table}} (print simple table)
#'
#' \code{\link{model_summary}} (highly suggested)
#'
#' \code{\link{GLM_summary}}
#'
#' \code{\link{HLM_summary}}
#'
#' @export
regress = function(
    formula, data, family=NULL,
    digits=3,
    robust=FALSE, cluster=NULL,
    # level2.predictors="", vartypes=NULL,
    test.rand=FALSE
) {
  call = sys.call()[-1]  # get function call (argument list)
  if(!is.null(family))
    family.text = ifelse(!is.null(call$family),
                         deparse(call$family),
                         deparse(call[[3]]))
  y = as.character(formula)[2]
  x = as.character(formula)[3]
  # dots = list(...)
  if(grepl("\\|", x)==FALSE) {
    # lm & glm
    if(is.null(family)) {
      # model = lm(formula=formula, data=data)
      GLM_summary(model=NULL,
                  robust, cluster,
                  digits,
                  formula=formula, data=data)
    } else {
      # model=glm(formula=formula, data=data, family=family)
      GLM_summary(model=NULL,
                  robust, cluster,
                  digits,
                  formula=formula, data=data, family=family.text)
    }
  } else {
    # lmer & glmer
    if(is.null(family)) {
      # model=lmerTest::lmer(formula=formula, data=data)
      HLM_summary(model=NULL,
                  test.rand=test.rand, digits=digits,
                  formula=formula, data=data)
    } else {
      # model=lme4::glmer(formula=formula, data=data, family=family)
      HLM_summary(model=NULL,
                  test.rand=test.rand, digits=digits,
                  formula=formula, data=data, family=family.text)
    }
  }
}




#### Model Summary ####


#' Tidy report of regression models.
#'
#' Tidy report of regression models (most model types are supported).
#' This function uses:
#' \itemize{
#'   \item \code{\link[texreg:screenreg]{texreg::screenreg()}}
#'   \item \code{\link[texreg:htmlreg]{texreg::htmlreg()}}
#'   \item \code{\link[MuMIn:std.coef]{MuMIn::std.coef()}}
#'   \item \code{\link[MuMIn:r.squaredGLMM]{MuMIn::r.squaredGLMM()}}
#'   \item \code{\link[performance:r2_mcfadden]{performance::r2_mcfadden()}}
#'   \item \code{\link[performance:r2_nagelkerke]{performance::r2_nagelkerke()}}
#' }
#'
#' @param model.list A single model or a list of (various types of) models.
#' Most types of regression models are supported!
#' @param std Standardized coefficients? Defaults to \code{FALSE}.
#' Only applicable to linear models and linear mixed models.
#' Not applicable to generalized linear (mixed) models.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param file File name of MS Word (\code{.doc}).
#' @param check If there is only one model in \code{model.list}, it checks for multicollinearity
#' using \code{\link[performance:check_collinearity]{performance::check_collinearity()}}.
#' You may turn it off by setting \code{check=FALSE}.
#' @param zero Display "0" before "."? Defaults to \code{TRUE}.
#' @param modify.se Replace standard errors.
#' Useful if you need to replace raw SEs with robust SEs.
#' New SEs should be provided as a list of numeric vectors.
#' See usage in \code{\link[texreg:screenreg]{texreg::screenreg()}}.
#' @param modify.head Replace model names.
#' @param line Lines look like true line (\code{TRUE}) or \code{=== --- ===} (\code{FALSE}).
#' Only relevant to R Console output.
#' @param bold The \emph{p}-value threshold below which the coefficients will be formatted in bold.
#' @param ... Other arguments passed to
#' \code{\link[texreg:screenreg]{texreg::screenreg()}} or
#' \code{\link[texreg:htmlreg]{texreg::htmlreg()}}.
#'
#' @return Invisibly return the output (character string).
#'
#' @seealso
#' \code{\link{print_table}} (print simple table)
#'
#' \code{\link{GLM_summary}}
#'
#' \code{\link{HLM_summary}}
#'
#' \code{\link{med_summary}}
#'
#' \code{\link{lavaan_summary}}
#'
#' \code{\link{PROCESS}}
#'
#' @examples
#' \donttest{#### Example 1: Linear Model ####
#' lm1 = lm(Temp ~ Month + Day, data=airquality)
#' lm2 = lm(Temp ~ Month + Day + Wind + Solar.R, data=airquality)
#' model_summary(lm1)
#' model_summary(lm2)
#' model_summary(list(lm1, lm2))
#' model_summary(list(lm1, lm2), std=TRUE, digits=2)
#' model_summary(list(lm1, lm2), file="OLS Models.doc")
#' unlink("OLS Models.doc")  # delete file for code check
#'
#' #### Example 2: Generalized Linear Model ####
#' glm1 = glm(case ~ age + parity,
#'            data=infert, family=binomial)
#' glm2 = glm(case ~ age + parity + education + spontaneous + induced,
#'            data=infert, family=binomial)
#' model_summary(list(glm1, glm2))  # "std" is not applicable to glm
#' model_summary(list(glm1, glm2), file="GLM Models.doc")
#' unlink("GLM Models.doc")  # delete file for code check
#'
#' #### Example 3: Linear Mixed Model ####
#' library(lmerTest)
#' hlm1 = lmer(Reaction ~ (1 | Subject), data=sleepstudy)
#' hlm2 = lmer(Reaction ~ Days + (1 | Subject), data=sleepstudy)
#' hlm3 = lmer(Reaction ~ Days + (Days | Subject), data=sleepstudy)
#' model_summary(list(hlm1, hlm2, hlm3))
#' model_summary(list(hlm1, hlm2, hlm3), std=TRUE)
#' model_summary(list(hlm1, hlm2, hlm3), file="HLM Models.doc")
#' unlink("HLM Models.doc")  # delete file for code check
#'
#' #### Example 4: Generalized Linear Mixed Model ####
#' library(lmerTest)
#' data.glmm = MASS::bacteria
#' glmm1 = glmer(y ~ trt + week + (1 | ID), data=data.glmm, family=binomial)
#' glmm2 = glmer(y ~ trt + week + hilo + (1 | ID), data=data.glmm, family=binomial)
#' model_summary(list(glmm1, glmm2))  # "std" is not applicable to glmm
#' model_summary(list(glmm1, glmm2), file="GLMM Models.doc")
#' unlink("GLMM Models.doc")  # delete file for code check
#'
#' #### Example 5: Multinomial Logistic Model ####
#' library(nnet)
#' d = airquality
#' d$Month = as.factor(d$Month)  # Factor levels: 5, 6, 7, 8, 9
#' mn1 = multinom(Month ~ Temp, data=d, Hess=TRUE)
#' mn2 = multinom(Month ~ Temp + Wind + Ozone, data=d, Hess=TRUE)
#' model_summary(mn1)
#' model_summary(mn2)
#' model_summary(mn2, file="Multinomial Logistic Model.doc")
#' unlink("Multinomial Logistic Model.doc")  # delete file for code check
#' }
#' @export
model_summary = function(
    model.list,
    std=FALSE,
    digits=3,
    file=NULL,
    check=TRUE,
    zero=ifelse(std, FALSE, TRUE),
    modify.se=NULL,
    modify.head=NULL,
    line=TRUE,
    bold=0,
    ...
) {
  if(inherits(model.list, "varest")) {
    model.list = model.list$varresult
    modify.head = names(model.list)
  }
  if(inherits(model.list, "list")==FALSE)
    model.list = list(model.list)
  if(is.null(file)) {
    sumreg = texreg::screenreg
  } else {
    sumreg = texreg::htmlreg
  }

  model_y = function(model) {
    # if(inherits(model, c("lmerMod", "lmerModLmerTest", "glmerMod")))
    #   y = model@call[["formula"]][[2]]
    # else if(inherits(model, "lme"))
    #   y = model$call[["fixed"]][[2]]
    # else
    #   y = model$call[["formula"]][[2]]
    # if(is.null(y)) y = ""
    y = names(model.frame(model))[1]
    return(y)
  }
  model_std_coef = function(model) {
    MuMIn::std.coef(model, partial.sd=FALSE)[,1]
  }
  model_std_s.e. = function(model) {
    MuMIn::std.coef(model, partial.sd=FALSE)[,2]
  }
  model_R2mcfadden = function(model) {
    ifelse(
      inherits(model, "glm"),
      1-model$deviance/model$null.deviance,
      NA)
  }
  model_R2nagelkerke = function(model) {
    ifelse(
      inherits(model, "glm"),
      as.numeric(performance::r2_nagelkerke(model)),
      NA)
  }
  model_R2m = function(model) {
    ifelse(
      inherits(model, c("lme", "lmerMod", "lmerModLmerTest", "glmerMod")),
      as.numeric(MuMIn::r.squaredGLMM(model)[1, "R2m"]),
      NA)
  }
  model_R2c = function(model) {
    ifelse(
      inherits(model, c("lme", "lmerMod", "lmerModLmerTest", "glmerMod")),
      as.numeric(MuMIn::r.squaredGLMM(model)[1, "R2c"]),
      NA)
  }

  if(is.null(modify.head)) {
    new.model.names = NULL
    try({
      if(any(unlist(lapply(model.list, inherits, "nnet")))) {
        multinom.y = as.character(lapply(model.list, model_y))[1]
        multinom.ref = model.list[[1]][["lab"]][1]
      } else {
        new.model.names = paste(paste0("(", 1:length(model.list), ")"),
                                as.character(lapply(model.list, model_y)))
        new.model.names = str_trim(new.model.names)
      }
    }, silent=TRUE)
  } else {
    new.model.names = modify.head
  }

  if(std) {
    new.coef = lapply(model.list, model_std_coef)
    new.s.e. = lapply(model.list, model_std_s.e.)
    omit = "Intercept"
  } else {
    new.coef = 0
    new.s.e. = 0
    omit = NULL
  }

  if(!is.null(modify.se)) new.s.e. = modify.se

  suppressWarnings({
    new.R2.all = list()
    if(any(unlist(lapply(model.list, inherits, c("lme", "lmerMod", "lmerModLmerTest", "glmerMod"))))) {
      try({
        new.R2 = list(
          R2m = as.numeric(lapply(model.list, model_R2m)),
          R2c = as.numeric(lapply(model.list, model_R2c))
        )
        names(new.R2) = c("Marginal R^2", "Conditional R^2")
        new.R2.all = c(new.R2.all, new.R2)
      }, silent=TRUE)
    }
    if(any(unlist(lapply(model.list, inherits, "glm")))) {
      new.R2 = list(
        R2mcfadden = as.numeric(lapply(model.list, model_R2mcfadden)),
        R2nagelkerke = as.numeric(lapply(model.list, model_R2nagelkerke))
      )
      names(new.R2) = c("McFadden's R^2", "Nagelkerke's R^2")
      new.R2.all = c(new.R2.all, new.R2)
    }
    if(length(new.R2.all)==0)
      new.R2.all = NULL

    output = sumreg(
      model.list, file=NULL,
      leading.zero=zero, digits=digits, bold=bold,
      custom.model.names=new.model.names,
      override.coef=new.coef,
      override.se=new.s.e.,
      omit.coef=omit,
      custom.gof.rows=new.R2.all,
      margin=1,
      inline.css=FALSE,
      doctype=TRUE,
      html.tag=TRUE,
      head.tag=TRUE,
      body.tag=TRUE,
      caption.above=TRUE,
      caption=NULL,
      custom.note="",
      include.loglik=FALSE,
      include.deviance=FALSE,
      beside=TRUE,  # for 'multinom' models
      ...)
  })

  if(is.null(file)) {
    if(line) {
      output = output %>%
        str_replace_all(
          "[-=]{3,}",
          rep_char("\u2500", str_count(output, "=")/2))
    }
    cat("\n")
    Print("<<cyan Model Summary>>")
    cat(output)
    Print("<<italic Note>>. * <<italic p>> < .05, ** <<italic p>> < .01, *** <<italic p>> < .001.")
    cat("\n")
    if(length(model.list)==1 & check==TRUE) {
      try({
        suppressWarnings({
          check.coll = performance::check_collinearity(model.list[[1]])
        })
        if(!is.null(check.coll)) {
          print(check.coll)
          cat("\n")
        }
      }, silent=TRUE)
    }
  } else {
    file = str_replace(file, "\\.docx$", ".doc")
    output = output %>%
      str_replace_all("&nbsp;", "") %>%
      str_replace_all("'", "\u2019") %>%
      str_replace_all("<td>-", "<td>\u2013") %>%
      str_replace_all("<td>(?=\\.|\\d\\.)", "<td>&ensp;") %>%
      str_replace_all("R\\^2|R<sup>2</sup>", "<i>R</i><sup>2</sup>") %>%
      str_replace(
        "<style>",
        "<style>\nbody {font-size: 10.5pt; font-family: Times New Roman;}\np {margin: 0px;}\nth, td {height: 19px;}") %>%
      str_replace(
        "<body>",
        paste0(
          "<body>\n<p><b>Table X. Regression Models",
          ifelse(any(unlist(lapply(model.list, class)) %in% "nnet"),
                 paste0(" (Reference Group: ", multinom.y, " = \u2018", multinom.ref, "\u2019)"),
                 ""),
          ".</b></p>")) %>%
      str_replace(
        "</body>",
        paste0(
          "<p><i>Note</i>. ",
          ifelse(std, "Standardized ", "Unstandardized "),
          "regression coefficients are displayed, with standard errors in parentheses.</p><p>",
          "* <i>p</i> < .05. ** <i>p</i> < .01. *** <i>p</i> < .001.</p>",
          "</body>"))
    # sink(file)
    # cat(output)
    # sink()
    f = file(file, "w", encoding="UTF-8")
    cat(output, file=f)
    close(f)
    Print("<<green \u2714>> Table saved to <<blue '{paste0(getwd(), '/', file)}'>>")
    cat("\n")
  }

  invisible(output)
}




#### GLM Functions ####


#' Tidy report of GLM (\code{lm} and \code{glm} models).
#'
#' NOTE: \code{\link{model_summary}} is preferred.
#'
#' @param model A model fitted with \code{lm} or \code{glm} function.
#' @param robust [Only for \code{lm} and \code{glm}]
#' \code{FALSE} (default), \code{TRUE} (then the default is \code{"HC1"}),
#' \code{"HC0"}, \code{"HC1"}, \code{"HC2"}, \code{"HC3"}, \code{"HC4"}, \code{"HC4m"}, or \code{"HC5"}.
#' It will add a table with heteroskedasticity-robust standard errors (aka. Huber-White standard errors).
#' For details, see \code{?sandwich::vcovHC} and \code{?jtools::summ.lm}.
#'
#' *** \code{"HC1"} is the default of Stata, whereas \code{"HC3"} is the default suggested by the \code{sandwich} package.
#' @param cluster [Only for \code{lm} and \code{glm}]
#' Cluster-robust standard errors are computed if cluster is set to the name of the input data's cluster variable or is a vector of clusters.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param ... Other arguments. You may re-define \code{formula}, \code{data}, or \code{family}.
#'
#' @return No return value.
#'
#' @examples
#' \donttest{## Example 1: OLS regression
#' lm = lm(Temp ~ Month + Day + Wind + Solar.R, data=airquality)
#' GLM_summary(lm)
#' GLM_summary(lm, robust="HC1")
#' # Stata's default is "HC1"
#' # R package <sandwich>'s default is "HC3"
#'
#' ## Example 2: Logistic regression
#' glm = glm(case ~ age + parity + education + spontaneous + induced,
#'           data=infert, family=binomial)
#' GLM_summary(glm)
#' GLM_summary(glm, robust="HC1", cluster="stratum")
#' }
#' @seealso
#' \code{\link{print_table}} (print simple table)
#'
#' \code{\link{model_summary}} (highly suggested)
#'
#' \code{\link{HLM_summary}}
#'
#' \code{\link{regress}}
#'
#' @export
GLM_summary = function(model, robust=FALSE, cluster=NULL,
                       digits=3, ...) {
  dots = list(...)
  if(c("formula", "data") %allin% names(dots)) {
    # re-modeling
    formula = dots$formula
    data = dots$data
    if("family" %notin% names(dots))
      Run("model = lm({formula_paste(formula)}, data=data)")
    else
      Run("model = glm({formula_paste(formula)}, data=data, family={dots$family})")
  }
  dv = names(model[["model"]])[1]
  sumModel = summary(model)
  N = nrow(model$model)
  N.missing = ifelse("na.action" %in% names(model), Glue(" ({length(model$na.action)} missing cases deleted)"), "")

  ## lm vs.glm ##
  if(inherits(model, "lm") & !inherits(model, "glm")) {
    ## Print: Model Fit ##
    Ftest = sumModel[["fstatistic"]]
    names(Ftest) = c("F", "df1", "df2")
    Print("
    \n
    <<cyan General Linear Model (OLS Regression)>>

    Model Fit:
    {p(f={Ftest['F']}, df1={Ftest['df1']}, df2={Ftest['df2']})}
    <<italic R>>\u00b2 = {sumModel$r.squared:.5} (Adjusted <<italic R>>\u00b2 = {sumModel$adj.r.squared:.5})
    \n
    ")

    ## Print: Fixed Effects ##
    FE = as.data.frame(sumModel[["coefficients"]])
    df = model[["df.residual"]]
    FE$CI = cc_ci(FE[,1] + qt(0.025, df) * FE[,2],
                  FE[,1] + qt(0.975, df) * FE[,2],
                  digits)
    names(FE) = c("b", "S.E.", "t", "pval", "[95% CI of b]")
    if(length(model[["model"]])>2) {
      FE.vif = jtools::summ(model, vif=TRUE)
      FE = cbind(FE, VIF=FE.vif$coeftable[,"VIF"])
    } else {
      FE = cbind(FE, VIF=NA)
    }
    print_table(FE, digits=digits,
                title=Glue("
    Unstandardized Coefficients:
    Outcome Variable: {dv}
    <<italic N>> = {N}{N.missing}"))
    cat("\n")

    ## Print: Robust SE ##
    if(robust!=FALSE | !is.null(cluster)) {
      if(robust==TRUE) robust = "HC1"
      summ.rob = jtools::summ(model, robust=robust, cluster=cluster)
      FE.rob = as.data.frame(summ.rob$coeftable)
      FE.rob$CI = cc_ci(FE.rob[,1] + qt(0.025, df) * FE.rob[,2],
                        FE.rob[,1] + qt(0.975, df) * FE.rob[,2],
                        digits)
      names(FE.rob) = c("b", "S.E.", "t", "pval", "[95% CI of b]")
      print_table(FE.rob, digits=digits,
                  title=Glue("{ifelse(is.null(cluster), 'Heteroskedasticity', 'Cluster')}-Robust Standard Errors:"),
                  note=Glue("<<blue Robust S.E.: type = {robust}{ifelse(is.null(cluster), '', glue('; clustering variable = {paste(cluster, collapse=', ')}'))}.>>"))
      cat("\n")
    }

    ## Print: Standardized Coefficients ##
    if(nrow(FE)>1) {
      FE.std = as.data.frame(MuMIn::std.coef(model, partial.sd=FALSE))[-1, 1:2]
      FE.rp = jtools::summ(model, part.corr=TRUE)
      t = FE.std[,1] / FE.std[,2]  # FE$t[-1]
      FE.std = cbind(
        FE.std,
        t = t,
        pval = p.t(t, df),
        CI.std = cc_ci(FE.std[,1] + qt(0.025, df) * FE.std[,2],
                       FE.std[,1] + qt(0.975, df) * FE.std[,2],
                       digits),
        r.partial = FE.rp$coeftable[-1, "partial.r"],
        r.part = FE.rp$coeftable[-1, "part.r"])
      names(FE.std) = c("\u03b2", "S.E.", "t", "pval",
                        "[95% CI of \u03b2]",
                        "r(partial)", "r(part)")
      print_table(FE.std, digits=digits,
                  title=Glue("
      Standardized Coefficients (\u03b2):
      Outcome Variable: {dv}
      <<italic N>> = {N}{N.missing}"))
      cat("\n")
    }
  } else if(inherits(model, "glm")) {
    ## Print: Model Fit ##
    aov.glm = anova(model, test="Chisq")
    Chi2 = sum(aov.glm$Deviance, na.rm=TRUE)
    Df = sum(aov.glm$Df, na.rm=TRUE)
    R2.glm = performance::r2_nagelkerke(model)
    Print("
    \n
    <<cyan Generalized Linear Model (GLM)>>

    Model Fit:
    AIC = {AIC(model):.{digits}}
    BIC = {BIC(model):.{digits}}
    {p(chi2={Chi2}, df={Df})}
    {rep_char('\u2500', 7)} Pseudo-<<italic R>>\u00b2 {rep_char('\u2500', 7)}
    McFadden\u2019s <<italic R>>\u00b2   = {1 - model$deviance/model$null.deviance:.5} <<blue (= 1 - logLik(model)/logLik(null.model))>>
    Nagelkerke\u2019s <<italic R>>\u00b2 = {R2.glm:.5} <<blue (= Cragg-Uhler\u2019s <<italic R>>\u00b2, adjusts Cox & Snell\u2019s)>>
    \n
    ")

    ## Print: Fixed Effects ##
    FE = as.data.frame(sumModel[["coefficients"]])
    FE$CI = cc_ci(FE[,1] + qnorm(0.025) * FE[,2],
                  FE[,1] + qnorm(0.975) * FE[,2],
                  digits)
    FE$OR = exp(FE[,1])
    if(length(model[["model"]])>2) {
      FE$VIF = jtools::summ(model, vif=TRUE)$coeftable[,"VIF"]
    } else {
      FE$VIF = NA
    }
    names(FE) = c("b", "S.E.", "z", "pval", "[95% CI of b]", "OR", "VIF")
    print_table(FE, digits=digits,
                title=Glue("
    Unstandardized Coefficients:
    Outcome Variable: {dv} (family: {model$family$family}; link function: {model$family$link})
    <<italic N>> = {N}{N.missing}"),
    note=Glue("<<blue OR = odds ratio.>>"))
    cat("\n")

    ## Print: Robust SE ##
    if(robust!=FALSE | !is.null(cluster)) {
      if(robust==TRUE) robust = "HC1"
      summ.rob = jtools::summ(model, robust=robust, cluster=cluster)
      FE.rob = as.data.frame(summ.rob$coeftable)
      FE.rob$CI = cc_ci(FE.rob[,1] + qnorm(0.025) * FE.rob[,2],
                        FE.rob[,1] + qnorm(0.975) * FE.rob[,2],
                        digits)
      names(FE.rob) = c("b", "S.E.", "z", "pval", "[95% CI of b]")
      print_table(FE.rob, digits=digits,
                  title=Glue("{ifelse(is.null(cluster), 'Heteroskedasticity', 'Cluster')}-Robust Standard Errors:"),
                  note=Glue("<<blue Robust S.E.: type = {robust}{ifelse(is.null(cluster), '', glue('; clustering variable = {paste(cluster, collapse=', ')}'))}.>>"))
      cat("\n")
    }
  } else {
    stop("GLM_summary() can only deal with 'lm' or 'glm' models.", call.=TRUE)
  }
}




#### HLM Functions ####


## Testing random effects and computing intraclass correlation coefficient (ICC) for HLM
HLM_ICC = function(model, digits=3) {
  ## Extract components from model ##
  sumModel = summary(model)
  data = as.data.frame(model@frame)
  RE = as.data.frame(sumModel[["varcor"]])
  RE = RE[is.na(RE[[3]]), -3]

  ## Initialize ICC data.frame ##
  N = nrow(model@frame)
  K = sumModel[["ngrps"]][RE$grp]
  group.id = na.omit(names(K))
  ICC = data.frame(RE[1], K, RE[2], RE[3])
  names(ICC) = c("Group", "K", "Parameter", "Variance")
  var.resid = ICC[ICC$Group=="Residual", "Variance"]
  var.total = sum(ICC[ICC$Parameter=="(Intercept)" | ICC$Group=="Residual", "Variance"])

  ## Mean sample size of each group ##
  # n.mean = c()
  # for(group in group.id) n.mean = c(n.mean, (N-sum(data[,.(n=.N^2), by=group]$n)/N) / (nlevels(data[[group]])-1) )  # quite similar to "N / K"
  # n.mean = c(n.mean, NA)

  ## Test for variance (Wen Fuxing, 2009, pp.85-97; Snijders & Bosker, 2012, pp.190-191) ##
  var = ICC$Variance
  # var.se = var.resid * (2/N) * sqrt(1/(n.mean-1) + 2*var/var.resid + n.mean*var^2/var.resid^2)  # error !!!
  # var.se = sqrt(2*(var+var.resid/n.mean)^2/(K-1) + 2*var.resid^2/((N-K)*n.mean^2))
  # var.wald.z = var/var.se
  # var.p = p.z(var.wald.z)

  ## Compute and test for ICC (Snijders & Bosker, 2012, pp.20-21) ##
  icc = var/var.total
  # icc.se = (1-icc) * (1+(n.mean-1)*icc) * sqrt(2/(n.mean*(n.mean-1)*(N-1)))  # N or K ?!
  # icc.wald.z = icc/icc.se

  ## Combine results ##
  ICC$K = formatF(ICC$K, digits=0)
  ICC$Variance = formatF(ICC$Variance, digits=5)
  # ICC$S.E. = formatF(var.se, digits=digits)
  # ICC$Wald.Z = formatF(var.wald.z, digits=2)
  # ICC$p = p.trans(var.p)
  # ICC$sig = sig.trans(var.p)
  ICC$ICC = formatF(icc, digits=5)
  ICC[ICC$Group=="Residual", c("K", "Parameter", "ICC")] = ""
  ICC[ICC$Parameter!="(Intercept)" & ICC$Group!="Residual", c("Group", "K", "ICC")] = ""
  names(ICC)[1] = paste0("Cluster", rep_char(" ", max(nchar(ICC$Group))-7))
  names(ICC)[2] = "K  "
  names(ICC)[3] = paste0("Parameter", rep_char(" ", max(nchar(ICC$Parameter))-9))
  # names(ICC)[6] = "Wald Z"
  # names(ICC)[8] = " "

  return(ICC)
}


#' Tidy report of HLM (\code{lmer} and \code{glmer} models).
#'
#' @description
#' NOTE: \code{\link{model_summary}} is preferred.
#'
## @details
## Hierarchical Linear Model (HLM), also known as Multilevel Linear Model (MLM) or Linear Mixed Model (LMM).
## Predictor variables at different levels may have five types:
## \describe{
##   \item{1. Intercept}{The overall intercept (\eqn{\gamma_{00}})}
##   \item{2. L1fixed}{Level-1 predictor with \strong{fixed} slope}
##   \item{3. L1random-GROUP-L1VAR}{Level-1 predictor with \strong{random} slopes nested with a grouping/clustering variable}
##   \item{4. L2-GROUP}{Level-2 predictor (e.g., GDP per capita at city level), always with \strong{fixed} slope unless there is also a level-3 structure.
##
##   *** NOTE: the current version of \code{'HLM_summary'} function does not consider three-levels design, so you may only use this function in two-levels HLM or cross-classified HLM.}
##   \item{5. Cross-GROUP-L1VAR}{Cross-level interaction consisting of level-1 and level-2 predictors}
## }
## The degrees of freedom (\emph{df}) of predictor variables in HLM vary across different levels and also depend on the variable types.
## However, different software use different estimation methods and thus provide somewhat different \emph{df}s, which may be confusing.
## Whereas the \code{lmerTest} package in R provides \emph{df}s that are estimated by the Satterthwaite's (1946) approximation (i.e., a data-driven approach without defining variable types),
## the \code{HLM} software provides \emph{df}s that totally depend on the variable types (i.e., a theory-driven approach).
#'
#' @param model A model fitted with \code{lmer} or \code{glmer} function using the \code{lmerTest} package.
## @param level2.predictors \strong{[Only for \code{lmer}]} [Optional] Defaults to \code{NULL}.
## If you have predictors at level 2,
## you may also specify the level-2 grouping/clustering variables
## and the corresponding level-2 predictor variables.
##
## *** Example: \code{level2.predictors="School: W1 + W2; House: 1"},
## where \code{School} and \code{House} are two grouping variables,
## \code{W1 & W2} are school-level predictors,
## and there is no house-level predictor.
## @param vartypes \strong{[Only for \code{lmer}]} Manually setting variable types. Needless in most situations.
#' @param test.rand [Only for \code{lmer} and \code{glmer}]
#' \code{TRUE} or \code{FALSE} (default).
#' Test random effects (i.e., variance components) by using the likelihood-ratio test (LRT),
#' which is asymptotically chi-square distributed.
#' For large datasets, it is much time-consuming.
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#' @param ... Other arguments. You may re-define \code{formula}, \code{data}, or \code{family}.
#'
#' @return No return value.
#'
#' @examples
#' \donttest{library(lmerTest)
#'
#' ## Example 1: data from lme4::sleepstudy
#' # (1) 'Subject' is a grouping/clustering variable
#' # (2) 'Days' is a level-1 predictor nested within 'Subject'
#' # (3) No level-2 predictors
#' m1 = lmer(Reaction ~ (1 | Subject), data=sleepstudy)
#' m2 = lmer(Reaction ~ Days + (1 | Subject), data=sleepstudy)
#' m3 = lmer(Reaction ~ Days + (Days | Subject), data=sleepstudy)
#' HLM_summary(m1)
#' HLM_summary(m2)
#' HLM_summary(m3)
#'
#' ## Example 2: data from lmerTest::carrots
#' # (1) 'Consumer' is a grouping/clustering variable
#' # (2) 'Sweetness' is a level-1 predictor
#' # (3) 'Age' and 'Frequency' are level-2 predictors
#' hlm.1 = lmer(Preference ~ Sweetness + Age + Frequency +
#'                (1 | Consumer), data=carrots)
#' hlm.2 = lmer(Preference ~ Sweetness + Age + Frequency +
#'                (Sweetness | Consumer) + (1 | Product), data=carrots)
#' HLM_summary(hlm.1)
#' HLM_summary(hlm.2)
#' }
#' @references
#' Hox, J. J. (2010).
#' \emph{Multilevel analysis: Techniques and applications} (2nd ed.).
#' New York, NY: Routledge.
#'
#' Nakagawa, S., & Schielzeth, H. (2013).
#' A general and simple method for obtaining \emph{R}^2 from generalized linear mixed-effects models.
#' \emph{Methods in Ecology and Evolution, 4,} 133--142.
#'
#' Xu, R. (2003).
#' Measuring explained variation in linear mixed effects models.
#' \emph{Statistics in Medicine, 22,} 3527--3541.
#'
#' @seealso
#' \code{\link{print_table}} (print simple table)
#'
#' \code{\link{model_summary}} (highly suggested)
#'
#' \code{\link{GLM_summary}}
#'
#' \code{\link{regress}}
#'
#' @export
HLM_summary = function(
    model=NULL,
    # level2.predictors=NULL,
    # vartypes=NULL,
    test.rand=FALSE,  # time-consuming in big datasets
    digits=3,
    ...
) {
  dots = list(...)
  if(c("formula", "data") %allin% names(dots)) {
    # re-modeling
    formula = dots$formula
    data = dots$data
    if("family" %notin% names(dots))
      Run("model = lmerTest::lmer({formula_paste(formula)}, data=data)")
    else
      Run("model = lme4::glmer({formula_paste(formula)}, data=data, family={dots$family})")
  } else {
    formula = model@call[["formula"]]
  }
  dv = names(model.frame(model))[1]
  sumModel = summary(model, cor=FALSE)
  ngrps = sumModel[["ngrps"]]

  ## Print: Model Information ##
  Print("
  \n
  <<cyan
  Hierarchical Linear Model (HLM)
  (also known as) Linear Mixed Model (LMM)
  (also known as) Multilevel Linear Model (MLM)
  >>
  ")

  ## Print: Sample Sizes ##
  # .prt.grps(ngrps=ngrps(model), nobs=nobs(model))
  Print("
  \n
  Model Information:
  Formula: {formula_paste(formula)}
  Level-1 Observations: <<italic N>> = {nobs(model)}
  Level-2 Groups/Clusters: {paste(paste(names(ngrps), ngrps, sep=', '), collapse='; ')}
  \n
  ")

  ## lmer vs. glmer ##
  if(inherits(model, "lmerModLmerTest")) {
    if(!inherits(formula, c("formula", "call")))
      stop("Please specify the arguments `formula` and `data` in HLM_summary().", call.=FALSE)
    # if(is.null(vartypes) & !is.null(level2.predictors)) {
    #   tryCatch({
    #     vartypes = HLM_vartypes(model, formula, level2.predictors)
    #   }, error=function(e) {
    #     stop("Please re-specify 'level2.predictors'.", call.=FALSE)
    #   })
    # } else if(!is.null(vartypes)) {
    #   names(vartypes) = dimnames(summary(model)[["coefficients"]])[[1]]
    # }

    # if(!is.null(vartypes)) {
    #   vt = as.data.frame(vartypes)
    #   names(vt) = "Variable Type"
    #   Print("Level-2 predictors: '{level2.predictors}'")
    #   cat("\n")
    #   print(vt)
    #   cat("\n")
    # }
    # vartypes = c("Intercept",
    #              "L1fixed",
    #              "L1random-GROUP-L1VAR",
    #              "L2-GROUP",
    #              "Cross-GROUP-L1VAR")

    ## Print: Model Fit (Omega^2, Pseudo-R^2, and Information Criteria ##
    # logLik=sumModel[["logLik"]]
    # AIC = -2LL + 2p  [p = number of parameters]
    # BIC = -2LL + p*ln(N)  [N = number of cases]
    Omg2 = 1 - var(residuals(model)) / var(model.response(model.frame(model)))
    R2.glmm = suppressWarnings( MuMIn::r.squaredGLMM(model) )  # R2.glmm[1,1]; R2.glmm[1,2]
    Print("
    Model Fit:
    AIC = {AIC(model):.{digits}}
    BIC = {BIC(model):.{digits}}
    <<italic R>>_(m)\u00b2 = {R2.glmm[1,1]:.5}  <<blue (<<italic Marginal R>>\u00b2: fixed effects)>>
    <<italic R>>_(c)\u00b2 = {R2.glmm[1,2]:.5}  <<blue (<<italic Conditional R>>\u00b2: fixed + random effects)>>
    Omega\u00b2 = {Omg2:.5}  <<blue (= 1 - proportion of unexplained variance)>>
    \n
    ")

    ## Print: ANOVA Table ##
    # aov.hlm=car::Anova(model, type=3)
    aov.hlm = stats::anova(model)
    if(nrow(aov.hlm)>0) {
      print_table(aov.hlm, digits=2,
                  title="ANOVA Table:")
      cat("\n")
    }

    ## Print: Fixed Effects ##
    FE = as.data.frame(sumModel[["coefficients"]])
    FE = cbind(
      FE[c(1,2,4,3,5)],
      CI = cc_ci(FE[,1] + qt(0.025, FE[,3]) * FE[,2],
                 FE[,1] + qt(0.975, FE[,3]) * FE[,2],
                 digits))
    names(FE) = c("b/\u03b3", "S.E.", "t", "df", "pval",
                  "[95% CI of b/\u03b3]")
    print_table(FE, digits=c(digits, digits, 2, 1, 0, 0),
                title=Glue("
    Fixed Effects:
    Unstandardized Coefficients (b or \u03b3):
    Outcome Variable: {dv}"),
    note=Glue("<<blue 'df' is estimated by Satterthwaite approximation.>>"))
    cat("\n")
    if(nrow(FE)>1) {
      FE.std = as.data.frame(MuMIn::std.coef(model, partial.sd=FALSE))[-1, 1:2]
      t = FE.std[,1] / FE.std[,2]  # FE$t[-1]
      # if(!is.null(vartypes))
      #   df = HLM_df(sumModel, vartypes)[-1]
      # else
      #   df = FE$df[-1]
      df = FE$df[-1]
      FE.std = cbind(
        FE.std, t = t, df = df, pval = p.t(t, df),
        CI = cc_ci(FE.std[,1] + qt(0.025, df) * FE.std[,2],
                   FE.std[,1] + qt(0.975, df) * FE.std[,2],
                   digits))
      names(FE.std) = c("\u03b2", "S.E.", "t", "df", "pval",
                        "[95% CI of \u03b2]")
      print_table(FE.std, digits=c(digits, digits, 2, 1, 0, 0),
                  title=Glue("
      Standardized Coefficients (\u03b2):
      Outcome Variable: {dv}"))
      cat("\n")
    }

    ## Print: Random Effects & ICC ##
    # RE = sumModel[["varcor"]]
    # res = sumModel[["sigma"]]^2
    # print(RE, comp="Variance")
    RE = HLM_ICC(model, digits=digits)
    print_table(RE, row.names=FALSE, title="Random Effects:")
    cat("\n")
  } else if(inherits(model, "glmerMod")) {
    summ = jtools::summ(model, digits=digits, re.variance="var")

    ## Print: Model Fit (Omega^2, Pseudo-R^2, and Information Criteria) ##
    R2.glmm = suppressWarnings( MuMIn::r.squaredGLMM(model) ) # R2.glmm[1,1]; R2.glmm[1,2]
    Print("
    Model Fit:
    AIC = {AIC(model):.{digits}}
    BIC = {BIC(model):.{digits}}
    <<italic R>>_(m)\u00b2 = {R2.glmm[1,1]:.5}  <<blue (<<italic Marginal R>>\u00b2: fixed effects)>>
    <<italic R>>_(c)\u00b2 = {R2.glmm[1,2]:.5}  <<blue (<<italic Conditional R>>\u00b2: fixed + random effects)>>
    \n
    ")

    ## Print: Fixed Effects ##
    FE = as.data.frame(sumModel[["coefficients"]])
    FE$CI = cc_ci(FE[,1] + qnorm(0.025) * FE[,2],
                  FE[,1] + qnorm(0.975) * FE[,2],
                  digits)
    FE$OR = exp(FE[,1])
    names(FE) = c("b/\u03b3", "S.E.", "z", "pval",
                  "[95% CI of b/\u03b3]", "OR")
    print_table(FE, digits=c(digits, digits, 2, 0, 0, digits),
                title=Glue("
    Fixed Effects:
    Unstandardized Coefficients (b or \u03b3):
    Outcome Variable: {dv}"),
    note=Glue("<<blue OR = odds ratio.>>"))
    cat("\n")

    ## Print: Random Effects & ICC ##
    RE = as.data.frame(summ$rcoeftable)
    ICC = as.data.frame(summ$gvars)
    names(RE) = c("Group", "Parameter", "Variance")
    names(ICC) = c("Group", "K", "ICC")
    RE$Group = as.character(RE$Group)
    RE$Parameter = as.character(RE$Parameter)
    RE$Variance = as.numeric(as.character(RE$Variance))
    ICC$Group = as.character(ICC$Group)
    ICC$K = as.numeric(as.character(ICC$K))
    ICC$ICC = as.numeric(as.character(ICC$ICC))
    RE = left_join(cbind(left_join(RE[1], ICC[1:2], by="Group"),
                         RE[2:3]),
                   ICC[c(1,3)], by="Group")
    RE$K = formatF(RE$K, 0)
    RE$Variance = formatF(RE$Variance, 5)
    RE$ICC = formatF(RE$ICC, 5)
    RE[RE$Parameter!="(Intercept)", c("Group", "K", "ICC")] = ""
    RE$Group = sprintf(glue("%-{max(max(nchar(RE$Group)), 7)}s"), RE$Group)
    names(RE)[1] = paste0("Cluster", rep_char(" ", max(max(nchar(RE$Group))-7, 0)))
    names(RE)[2] = "K "
    names(RE)[3] = paste0("Parameter", rep_char(" ", max(nchar(RE$Parameter))-9))
    print_table(RE, row.names=FALSE, title="Random Effects:")
    Print("<<blue Residual variance is not reported for generalized linear mixed models,
           but it is assumed to be \u03c0\u00b2/3 (\u2248 {pi^2/3:.2}) in logistic models (binary data)
           and log(1/exp(intercept)+1) in poisson models (count data).>>")
    cat("\n")
  } else {
    stop("Please fit your model with `lmerTest::lmer()` or `lme4::glmer()`.", call.=TRUE)
  }

  ## Print: Likelihood-Ratio Test (LRT) for Random Effects ##
  # ANOVA-like table for random-effects: Single term deletions
  if(test.rand) {
    RE.test = lmerTest::rand(model)  # the same: ranova()
    print(RE.test)
    cat("\n")
  }
}


#' Tidy report of HLM indices: ICC(1), ICC(2), and rWG/rWG(J).
#'
#' @description
#' Compute ICC(1) (non-independence of data),
#' ICC(2) (reliability of group means),
#' and \eqn{r_{WG}}/\eqn{r_{WG(J)}} (within-group agreement for single-item/multi-item measures)
#' in multilevel analysis (HLM).
#'
#' @details
#' \describe{
#'   \item{\strong{ICC(1) (intra-class correlation, or non-independence of data)}}{
#'     ICC(1) = var.u0 / (var.u0 + var.e) = \eqn{\sigma_{u0}^2 / (\sigma_{u0}^2 + \sigma_{e}^2)}
#'
#'     ICC(1) is the ICC we often compute and report in multilevel analysis
#'     (usually in the Null Model, where only the random intercept of group is included).
#'     It can be interpreted as either \strong{"the proportion of variance explained by groups"} (i.e., \emph{heterogeneity} between groups)
#'     or \strong{"the expectation of correlation coefficient between any two observations within any group"} (i.e., \emph{homogeneity} within groups).
#'   }
#'   \item{\strong{ICC(2) (reliability of group means)}}{
#'     ICC(2) = mean(var.u0 / (var.u0 + var.e / n.k)) = \eqn{\Sigma[\sigma_{u0}^2 / (\sigma_{u0}^2 + \sigma_{e}^2 / n_k)] / K}
#'
#'     ICC(2) is a measure of \strong{"the representativeness of group-level aggregated means for within-group individual values"}
#'     or \strong{"the degree to which an individual score can be considered a reliable assessment of a group-level construct"}.
#'   }
#'   \item{\strong{\eqn{r_{WG}}/\eqn{r_{WG(J)}} (within-group agreement for single-item/multi-item measures)}}{
#'     \eqn{r_{WG} = 1 - \sigma^2 / \sigma_{EU}^2}
#'
#'     \eqn{r_{WG(J)} = 1 - (\sigma_{MJ}^2 / \sigma_{EU}^2) / [J * (1 - \sigma_{MJ}^2 / \sigma_{EU}^2) + \sigma_{MJ}^2 / \sigma_{EU}^2]}
#'
#'     \eqn{r_{WG}}/\eqn{r_{WG(J)}} is a measure of within-group agreement or consensus. Each group has an \eqn{r_{WG}}/\eqn{r_{WG(J)}}.
#'   }
#'   \item{* Note for the above formulas}{
#'   \itemize{
#'     \item \eqn{\sigma_{u0}^2}: between-group variance (i.e., tau00)
#'     \item \eqn{\sigma_{e}^2}: within-group variance (i.e., residual variance)
#'     \item \eqn{n_k}: group size of the k-th group
#'     \item \eqn{K}: number of groups
#'     \item \eqn{\sigma^2}: actual group variance of the k-th group
#'     \item \eqn{\sigma_{MJ}^2}: mean value of actual group variance of the k-th group across all J items
#'     \item \eqn{\sigma_{EU}^2}: expected random variance (i.e., the variance of uniform distribution)
#'     \item \eqn{J}: number of items
#'   }
#'   }
#' }
#'
#' @param data Data frame.
#' @param group Grouping variable.
#' @param icc.var Key variable for analysis (usually the dependent variable).
#' @param rwg.vars Defaults to \code{icc.var}. It can be:
#' \itemize{
#'   \item A single variable (\emph{single-item} measure), then computing rWG.
#'   \item Multiple variables (\emph{multi-item} measure), then computing rWG(J), where J = the number of items.
#' }
#' @param rwg.levels As \eqn{r_{WG}}/\eqn{r_{WG(J)}} compares the actual group variance to the expected random variance (i.e., the variance of uniform distribution, \eqn{\sigma_{EU}^2}),
#' it is required to specify which type of uniform distribution is.
#' \itemize{
#'   \item For \emph{continuous} uniform distribution, \eqn{\sigma_{EU}^2 = (max - min)^2 / 12}.
#'   Then \code{rwg.levels} is not useful and will be set to \code{0} (the default).
#'   \item For \emph{discrete} uniform distribution, \eqn{\sigma_{EU}^2 = (A^2 - 1) / 12},
#'   where A is the number of response options (levels).
#'   Then \code{rwg.levels} should be provided (= A in the above formula).
#'   For example, if the measure is a 5-point Likert scale, you should set \code{rwg.levels=5}.
#' }
#' @param digits Number of decimal places of output. Defaults to \code{3}.
#'
#' @return Invisibly return a list of results.
#'
#' @references
#' Bliese, P. D. (2000).
#' Within-group agreement, non-independence, and reliability:
#' Implications for data aggregation and Analysis.
#' In K. J. Klein & S. W. Kozlowski (Eds.),
#' \emph{Multilevel theory, research, and methods in organizations} (pp. 349--381).
#' San Francisco, CA: Jossey-Bass, Inc.
#'
#' James, L.R., Demaree, R.G., & Wolf, G. (1984).
#' Estimating within-group interrater reliability with and without response bias.
#' \emph{Journal of Applied Psychology, 69}, 85--98.
#'
#' @seealso
#' \code{\link{cor_multilevel}}
#'
#' \href{https://CRAN.R-project.org/package=multilevel}{R package "multilevel"}
#'
#' @examples
#' \donttest{data = lme4::sleepstudy  # continuous variable
#' HLM_ICC_rWG(data, group="Subject", icc.var="Reaction")
#'
#' data = lmerTest::carrots  # 7-point scale
#' HLM_ICC_rWG(data, group="Consumer", icc.var="Preference",
#'             rwg.vars="Preference",
#'             rwg.levels=7)
#' HLM_ICC_rWG(data, group="Consumer", icc.var="Preference",
#'             rwg.vars=c("Sweetness", "Bitter", "Crisp"),
#'             rwg.levels=7)
#' }
#' @export
HLM_ICC_rWG = function(data, group, icc.var,
                       rwg.vars=icc.var,
                       rwg.levels=0,
                       digits=3) {
  data = as.data.frame(data)

  ## ICC(1) and ICC(2)
  model = lmerTest::lmer(as.formula(Glue("{icc.var} ~ (1 | {group})")), data=data)
  d = model@frame
  d.split = split(d[[icc.var]], d[[group]])
  N = nrow(d)
  n_k = sapply(d.split, length)
  variance = as.data.frame(summary(model)[["varcor"]])[["vcov"]]
  var.u0 = variance[1]
  var.e = variance[2]
  ICC1 = var.u0 / (var.u0+var.e)
  ICC2 = mean(var.u0 / (var.u0 + var.e/n_k))

  ## rWG and rWG(J)
  if(rwg.levels==0)
    var.ran = (max(data[rwg.vars], na.rm=TRUE) - min(data[rwg.vars], na.rm=TRUE))^2 / 12
  else
    var.ran = (rwg.levels^2 - 1) / 12
  ds = na.omit(data.frame(data[rwg.vars], data[group]))
  ds.split = split(ds[, 1:(ncol(ds)-1)], ds[[group]])
  if(length(rwg.vars)==1) {
    rwg = sapply(ds.split, function(dsi) {
      if(length(dsi)>1) {
        var.dsi = min(var(dsi), var.ran)
        out = 1 - (var.dsi / var.ran)
        out
      } else {
        out = NA
        out
      }
    })
    n.k = sapply(ds.split, length)
    rwg.name = "rWG"
    rwg.type = "single-item measures"
  } else {
    rwg = sapply(ds.split, function(dsi) {
      J = ncol(dsi)
      if(nrow(dsi)>1) {
        var.dsi = min(mean(apply(dsi, 2, var, na.rm=TRUE)), var.ran)
        out = (J*(1-(var.dsi/var.ran))) / (J*(1-(var.dsi/var.ran)) + var.dsi/var.ran)
        out
      } else {
        out = NA
        out
      }
    })
    n.k = sapply(ds.split, nrow)
    rwg.name = "rWG(J)"
    rwg.type = "multi-item measures"
  }
  rwg.out = data.frame(group=names(ds.split), size=n.k, rwg=rwg)
  names(rwg.out)[3] = rwg.name

  Print("
  \n
  <<cyan ------ Sample Size Information ------>>

  Level 1: <<italic N>> = {N} observations (\"{icc.var}\")
  Level 2: <<italic K>> = {length(n.k)} groups (\"{group}\")
  \n
  ")
  summ_n_k = as.matrix(summary(n_k)[c(-2, -5)])
  colnames(summ_n_k) = "n (group sizes)"
  print(summ_n_k)

  Print("
  \n
  <<cyan ------ ICC(1), ICC(2), and {rwg.name} ------>>

  ICC variable: \"{icc.var}\"

  ICC(1) = {formatF(ICC1, digits)} <<blue (non-independence of data)>>
  ICC(2) = {formatF(ICC2, digits)} <<blue (reliability of group means)>>

  {rwg.name} variable{ifelse(length(rwg.vars)==1, '', 's')}: \"{paste(rwg.vars, collapse='\", \"')}\"

  {rwg.name} <<blue (within-group agreement for {rwg.type})>>
  ")
  summ_rwg = as.data.frame(t(as.matrix(summary(rwg))))
  rownames(summ_rwg) = rwg.name
  print_table(summ_rwg, digits=digits)
  cat("\n")

  invisible(list(ICC1=ICC1, ICC2=ICC2, rwg=rwg.out))
}
Any scripts or data that you put into this service are public.
bruceR documentation built on June 22, 2024, 12:26 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
bruceR
Broadly Useful Convenient and Efficient R Functions

R/bruceR-stats_4_regress.R
In bruceR: Broadly Useful Convenient and Efficient R Functions

Defines functions HLM_ICC_rWG HLM_summary HLM_ICC GLM_summary model_summary regress group_mean_center grand_mean_center formula_expand formula_paste

Documented in formula_expand formula_paste GLM_summary grand_mean_center group_mean_center HLM_ICC_rWG HLM_summary model_summary regress

Try the bruceR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bruceR Broadly Useful Convenient and Efficient R Functions

R/bruceR-stats_4_regress.R In bruceR: Broadly Useful Convenient and Efficient R Functions

Defines functions HLM_ICC_rWG HLM_summary HLM_ICC GLM_summary model_summary regress group_mean_center grand_mean_center formula_expand formula_paste

Documented in formula_expand formula_paste GLM_summary grand_mean_center group_mean_center HLM_ICC_rWG HLM_summary model_summary regress

Try the bruceR package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

bruceR
Broadly Useful Convenient and Efficient R Functions

R/bruceR-stats_4_regress.R
In bruceR: Broadly Useful Convenient and Efficient R Functions