R/compute_dmod.R In psychmeta: Psychometric Meta-Analysis Toolkit

Documented in compute_dmodcompute_dmod_nparcompute_dmod_par.integrate_dmod

#' Integration function for computing parametric signed or unsigned \mjeqn{d_{Mod}}{d_Mod} effect sizes
#' for a single focal group
#'
#' This internal function exists to support the [compute_dmod_par()] function, but may also be useful as a bare-bones tool for computing signed and unsigned \mjeqn{d_{Mod}}{d_Mod} effect sizes.
#' Please note that this function does not include an option for re-scaling its result to compensate for cumulative densities smaller than 1.
#'
#' The \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed} effect size (i.e., the average of differences in prediction over
#' the range of predictor scores) is computed as
#' \mjdeqn{d_{Mod_{Signed}}=\frac{1}{SD_{Y_{1}}}\intop f_{2}(X)\left[X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right] dX}{d_Mod_Signed = 1/SD_Y_1 * integrate(f_2(X) * [X * (b_1_1 - b_1_2) + b_0_1 - b_0_2])},
#' where
#'   \itemize{
#'     \item {\mjeqn{SD_{Y_{1}}}{SD_Y_1} is the referent group's criterion standard deviation;}
#'     \item {\mjeqn{f_{2}(X)}{f_2(X)} is the normal-density function for the distribution of focal-group predictor scores;}
#'     \item {\mjeqn{b_{1_{1}}}{b_1_1} and \mjeqn{b_{1_{2}}}{b_1_2} are the slopes of the regression of \mjeqn{Y}{Y} on \mjeqn{X}{X} for the referent and focal groups, respectively;}
#'     \item {\mjeqn{b_{0_{1}}}{b_0_1} and \mjeqn{b_{0_{2}}}{b_0_2} are the intercepts of the regression of \mjeqn{Y}{Y} on \mjeqn{X}{X} for the referent and focal groups, respectively; and}
#'     \item {the integral spans all \mjseqn{X} scores within the operational range of predictor scores for the focal group.}
#'   }
#'
#' The \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned} effect size (i.e., the average of absolute differences in prediction over
#' the range of predictor scores) is computed as
#' \mjdeqn{d_{Mod_{Unsigned}}=\frac{1}{SD_{Y_{1}}}\intop f_{2}(X)\left|X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|dX.}{d_Mod_Unsigned = 1/SD_Y_1 * integrate(f_2(X) * |X * (b_1_1 - b_1_2) + b_0_1 - b_0_2|).}
#'
#' @param referent_int Referent group's intercept.
#' @param referent_slope Referent group's slope.
#' @param focal_int Focal group's intercept.
#' @param focal_slope Focal group's slope.
#' @param focal_mean_x Focal group's predictor-score mean.
#' @param focal_sd_x Focal group's predictor-score standard deviation.
#' @param referent_sd_y Referent group's criterion standard deviation.
#' @param focal_min_x Focal group's minimum predictor score.
#' @param focal_max_x Focal group's maximum predictor score.
#' @param signed Logical argument that indicates whether the function should compute \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed} (\code{TRUE}; default) or \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned} (\code{FALSE}).
#'
#' @return A \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed} or \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned} effect size, depending on the \code{signed} argument.
#'
#' @keywords internal
#' @md
#'
#' @references
#' Nye, C. D., & Sackett, P. R. (2017).
#' New effect sizes for tests of categorical moderation and differential prediction.
#' *Organizational Research Methods, 20*(4), 639–664. \doi{10.1177/1094428116644505}
#'
#' @examples
#' \dontrun{
#' # Example for computing \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed}
#' .integrate_dmod(referent_int = -.05, referent_slope = .5,
#'               focal_int = -.05, focal_slope = .3,
#'               focal_mean_x = -.5, focal_sd_x = 1,
#'               referent_sd_y = 1, focal_min_x = -Inf, focal_max_x = Inf,
#'               signed = TRUE)
#'
#' # Example for computing \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned}
#' .integrate_dmod(referent_int = -.05, referent_slope = .5,
#'               focal_int = -.05, focal_slope = .3,
#'               focal_mean_x = -.5, focal_sd_x = 1,
#'               referent_sd_y = 1, focal_min_x = -Inf, focal_max_x = Inf,
#'               signed = FALSE)
#' }
.integrate_dmod <- function(referent_int, referent_slope,
focal_int, focal_slope,
focal_mean_x, focal_sd_x,
referent_sd_y,
focal_min_x, focal_max_x,
signed = TRUE){

if(zapsmall(focal_min_x) == zapsmall(focal_max_x)){
0
}else{
if(signed){signFun <- function(x){x}}else{signFun <- abs}
..internalDmodOut.1234 <- try(integrate(f = function(x){
signFun(x * (referent_slope - focal_slope) + referent_int - focal_int) *
dnorm(x, mean = focal_mean_x, sd = focal_sd_x)},
lower = focal_min_x, upper = focal_max_x)$v / referent_sd_y) if(!is.null(attributes(..internalDmodOut.1234)$class))
if(attributes(..internalDmodOut.1234)$class == "try-error"){ ..internalDmodOut.1234 <- integrate(f = function(x){ signFun(x * (referent_slope - focal_slope) + referent_int - focal_int) * dnorm(x, mean = focal_mean_x, sd = focal_sd_x)}, lower = max(c(focal_mean_x - 3 * focal_sd_x, focal_min_x)), upper = min(c(focal_mean_x + 3 * focal_sd_x, focal_max_x)))$v / referent_sd_y
}
if(!is.null(attributes(..internalDmodOut.1234)$class)){ if(attributes(..internalDmodOut.1234)$class == "try-error"){
NA
}else{
..internalDmodOut.1234
}
}else{
..internalDmodOut.1234
}
}
}

#' Function for computing non-parametric \mjeqn{d_{Mod}}{d_Mod} effect sizes for a single focal group
#'
#' This function computes non-parametric \mjeqn{d_{Mod}}{d_Mod} effect sizes from user-defined descriptive statistics
#' and regression coefficients, using a distribution of observed scores as weights.
#' This non-parametric function is best used when the assumption of normally distributed predictor
#' scores is not reasonable and/or the distribution of scores observed in a sample is likely to represent the
#' distribution of scores in the population of interest.
#' If one has access to the full raw data set, the \code{dMod} function may be used
#' as a wrapper to this function so that the regression equations and descriptive statistics can
#' be computed automatically within the program.
#'
#' The \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed} effect size (i.e., the average of differences in prediction over
#' the range of predictor scores) is computed as
#' \mjdeqn{d_{Mod_{Signed}}=\frac{\sum_{i=1}^{m}n_{i}\left[X_{i}\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right]}{SD_{Y_{1}}\sum_{i=1}^{m}n_{i}},}{d_Mod_Signed = sum(n_i * [X_i * (b_1_1 - b_1_2) + b_0_1 - b_0_2]) / (SD_Y_1 * sum(n_i)),}
#' where
#'   \itemize{
#'     \item {\mjeqn{SD_{Y_{1}}}{SD_Y_1} is the referent group's criterion standard deviation;}
#'     \item {\mjeqn{m}{m} is the number of unique scores in the distribution of focal-group predictor scores;}
#'     \item {\mjeqn{X}{X} is the vector of unique focal-group predictor scores, indexed \mjseqn{i=1} through \mjseqn{m};}
#'     \item {\mjeqn{X_{i}}{X_i} is the \mjeqn{i^{th}}{ith} unique score value;}
#'     \item {\mjeqn{n}{n} is the vector of frequencies associated with the elements of \mjeqn{X}{X};}
#'     \item {\mjeqn{n_{i}}{n_i} is the number of cases with a score equal to \mjeqn{X_{i}}{X_i};}
#'     \item {\mjeqn{b_{1_{1}}}{b_1_1} and \mjeqn{b_{1_{2}}}{b_1_2} are the slopes of the regression of \mjeqn{Y}{Y} on \mjeqn{X}{X} for the referent and focal groups, respectively; and}
#'     \item {\mjeqn{b_{0_{1}}}{b_0_1} and \mjeqn{b_{0_{2}}}{b_0_2} are the intercepts of the regression of \mjeqn{Y}{Y} on \mjeqn{X}{X} for the referent and focal groups, respectively.}
#'   }
#'
#' The \mjeqn{d_{Mod_{Under}}}{d_Mod_Under} and \mjeqn{d_{Mod_{Over}}}{d_Mod_Over} effect sizes are computed
#' using the same equation as \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed}, but \mjeqn{d_{Mod_{Under}}}{d_Mod_Under} is
#' the weighted average of all scores in the area of underprediction (i.e., the differences in prediction with
#' negative signs) and \mjeqn{d_{Mod_{Over}}}{d_Mod_Over} is the weighted average of all scores in the area of
#' overprediction (i.e., the differences in prediction with negative signs).
#'
#' The \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned} effect size (i.e., the average of absolute differences in prediction over
#' the range of predictor scores) is computed as
#' \mjdeqn{d_{Mod_{Unsigned}}=\frac{\sum_{i=1}^{m}n_{i}\left|X_{i}\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|}{SD_{Y_{1}}\sum_{i=1}^{m}n_{i}}.}{d_Mod_Unsigned = sum(n_i * |X_i * (b_1_1 - b_1_2) + b_0_1 - b_0_2]| / (SD_Y_1 * sum(n_i)).}
#'
#' The \mjeqn{d_{Min}}{d_Min} effect size (i.e., the smallest absolute difference in prediction observed over the
#' range of predictor scores) is computed as
#' \mjdeqn{d_{Min}=\frac{1}{SD_{Y_{1}}}Min\left[\left|X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|\right].}{d_Min = 1/SD_Y_1 * Min[X * (b_1_1 - b_1_2) + b_0_1 - b_0_2].}
#'
#' The \mjeqn{d_{Max}}{d_Max} effect size (i.e., the largest absolute difference in prediction observed over the
#' range of predictor scores)is computed as
#' \mjdeqn{d_{Max}=\frac{1}{SD_{Y_{1}}}Max\left[\left|X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|\right].}{d_Max = 1/SD_Y_1 * Max[X * (b_1_1 - b_1_2) + b_0_1 - b_0_2].}
#' \emph{Note}: When \mjeqn{d_{Min}}{d_Min} and \mjeqn{d_{Max}}{d_Max} are computed in this package, the output will display the
#' signs of the differences (rather than the absolute values of the differences) to aid in interpretation.
#'
#' @param referent_int Referent group's intercept.
#' @param referent_slope Referent group's slope.
#' @param focal_int Focal group's intercept.
#' @param focal_slope Focal group's slope.
#' @param focal_x Focal group's vector of predictor scores.
#' @param referent_sd_y Referent group's criterion standard deviation.
#'
#' @return A vector of effect sizes (\mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed},
#'     \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned}, \mjeqn{d_{Mod_{Under}}}{d_Mod_Under},
#'     \mjeqn{d_{Mod_{Over}}}{d_Mod_Over}), proportions of under- and over-predicted criterion scores,
#'     minimum and maximum differences (i.e., \mjeqn{d_{Mod_{Under}}}{d_Mod_Under} and \mjeqn{d_{Mod_{Over}}}{d_Mod_Over}),
#'     and the scores associated with minimum and maximum differences.
#'
#' @export
#'
#' @examples
#' # Generate some hypothetical data for a referent group and three focal groups:
#' set.seed(10)
#' refDat <- MASS::mvrnorm(n = 1000, mu = c(.5, .2),
#'                         Sigma = matrix(c(1, .5, .5, 1), 2, 2), empirical = TRUE)
#' foc1Dat <- MASS::mvrnorm(n = 1000, mu = c(-.5, -.2),
#'                          Sigma = matrix(c(1, .5, .5, 1), 2, 2), empirical = TRUE)
#' foc2Dat <- MASS::mvrnorm(n = 1000, mu = c(0, 0),
#'                          Sigma = matrix(c(1, .3, .3, 1), 2, 2), empirical = TRUE)
#' foc3Dat <- MASS::mvrnorm(n = 1000, mu = c(-.5, -.2),
#'                          Sigma = matrix(c(1, .3, .3, 1), 2, 2), empirical = TRUE)
#' colnames(refDat) <- colnames(foc1Dat) <- colnames(foc2Dat) <- colnames(foc3Dat) <- c("X", "Y")
#'
#' # Compute a regression model for each group:
#' refRegMod <- lm(Y ~ X, data.frame(refDat))$coef #' foc1RegMod <- lm(Y ~ X, data.frame(foc1Dat))$coef
#' foc2RegMod <- lm(Y ~ X, data.frame(foc2Dat))$coef #' foc3RegMod <- lm(Y ~ X, data.frame(foc3Dat))$coef
#'
#' # Use the subgroup regression models to compute d_mod for each referent-focal pairing:
#'
#' # Focal group #1:
#' compute_dmod_npar(referent_int = refRegMod, referent_slope = refRegMod,
#'              focal_int = foc1RegMod, focal_slope = foc1RegMod,
#'              focal_x = foc1Dat[,"X"], referent_sd_y = 1)
#'
#' # Focal group #2:
#' compute_dmod_npar(referent_int = refRegMod, referent_slope = refRegMod,
#'              focal_int = foc2RegMod, focal_slope = foc1RegMod,
#'              focal_x = foc2Dat[,"X"], referent_sd_y = 1)
#'
#' # Focal group #3:
#' compute_dmod_npar(referent_int = refRegMod, referent_slope = refRegMod,
#'              focal_int = foc3RegMod, focal_slope = foc3RegMod,
#'              focal_x = foc3Dat[,"X"], referent_sd_y = 1)
compute_dmod_npar <- function(referent_int, referent_slope,
focal_int, focal_slope,
focal_x, referent_sd_y){
call <- match.call()
inputs <- as.list(environment())

if(1 != length(referent_int)) stop("The length of 'referent_int' must be 1")
if(1 != length(referent_slope)) stop("The length of 'referent_slope' must be 1")
if(1 != length(focal_int)) stop("The length of 'focal_int' must be 1")
if(1 != length(focal_slope)) stop("The length of 'focal_slope' must be 1")
if(1 != length(referent_sd_y)) stop("The length of 'referent_sd_y' must be 1")

if(is.null(focal_x)) stop("'focal_x' cannot be NULL")
if(all(is.na(focal_x))) stop("'focal_x' must contain non-NA values")

if(is.na(referent_int)) stop("'referent_int' cannot be NA")
if(is.na(referent_slope)) stop("'referent_slope' cannot be NA")
if(is.na(focal_int)) stop("'focal_int' cannot be NA")
if(is.na(focal_slope)) stop("'focal_slope' cannot be NA")
if(is.na(referent_sd_y)) stop("'referent_sd_y' cannot be NA")

yhat_ref <- referent_int + referent_slope * focal_x
yhat_foc <- focal_int + focal_slope * focal_x
yhat_dif <- yhat_ref - yhat_foc

d_signed <- mean(yhat_dif, na.rm = TRUE) / referent_sd_y
d_unsigned <- mean(abs(yhat_dif), na.rm = TRUE) / referent_sd_y
prop_under <- mean(yhat_dif < 0, na.rm = TRUE)
d_under  <- mean(yhat_dif[yhat_dif < 0], na.rm = TRUE) / referent_sd_y * prop_under
prop_over  <- mean(yhat_dif > 0, na.rm = TRUE)
d_over <- mean(yhat_dif[yhat_dif > 0], na.rm = TRUE) / referent_sd_y * prop_over

if(prop_under == 0) d_under <- 0
if(prop_over == 0) d_over <- 0

minDiffId <- which(min(abs(zapsmall(yhat_dif))) == abs(zapsmall(yhat_dif)))
maxDiffId <- which(max(abs(zapsmall(yhat_dif))) == abs(zapsmall(yhat_dif)))

d_min <- yhat_dif[minDiffId] / referent_sd_y
d_min_score <- focal_x[minDiffId]

d_max <- yhat_dif[maxDiffId] / referent_sd_y
d_max_score <- focal_x[maxDiffId]

if(length(levels(factor(d_min_score))) > 1 & length(levels(factor(d_max_score))) > 1)
d_min_score <- d_max_score <- NA

out <- as.data.frame(t(setNames(c(d_signed, d_unsigned, d_under, d_over, prop_under, prop_over,
d_min, d_max, d_min_score, d_max_score),
c("d_signed", "d_unsigned", "d_under", "d_over", "prop_under", "prop_over",
"d_min", "d_max", "d_min_score", "d_max_score"))), stringsAsFactors = FALSE)

out <- list(call = call, inputs = inputs,
point_estimate = out)

class(out) <- c("dmod", "npar")
out
}

#' Function for computing parametric \mjeqn{d_{Mod}}{d_Mod} effect sizes for any number of focal groups
#'
#' This function computes \mjeqn{d_{Mod}}{d_Mod} effect sizes from user-defined descriptive statistics
#' and regression coefficients. If one has access to a raw data set, the \code{dMod} function may be used
#' as a wrapper to this function so that the regression equations and descriptive statistics can
#' be computed automatically within the program.
#'
#' The \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed} effect size (i.e., the average of differences in prediction over
#' the range of predictor scores) is computed as
#' \mjdeqn{d_{Mod_{Signed}}=\frac{1}{SD_{Y_{1}}}\intop f_{2}(X)\left[X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right] dX,}{d_Mod_Signed = 1/SD_Y_1 * integrate(f_2(X) * [X * (b_1_1 - b_1_2) + b_0_1 - b_0_2]),}
#' where
#'   \itemize{
#'     \item {\mjeqn{SD_{Y_{1}}}{Y_1} is the referent group's criterion standard deviation;}
#'     \item {\mjeqn{f_{2}(X)}{f_2(X)} is the normal-density function for the distribution of focal-group predictor scores;}
#'     \item {\mjeqn{b_{1_{1}}}{b_1_1} and \mjeqn{b_{1_{0}}}{b_1_0} are the slopes of the regression of \mjseqn{Y} on \mjseqn{X} for the referent and focal groups, respectively;}
#'     \item {\mjeqn{b_{0_{1}}}{b_0_1} and \mjeqn{b_{0_{0}}}{b_0_0} are the intercepts of the regression of \mjseqn{Y} on \mjseqn{X} for the referent and focal groups, respectively; and}
#'     \item {the integral spans all \mjseqn{X} scores within the operational range of predictor scores for the focal group.}
#'   }
#'
#' The \mjeqn{d_{Mod_{Under}}}{d_Mod_Under} and \mjeqn{d_{Mod_{Over}}}{d_Mod_Over} effect sizes are computed
#' using the same equation as \mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed}, but \mjeqn{d_{Mod_{Under}}}{d_Mod_Under} is
#' the weighted average of all scores in the area of underprediction (i.e., the differences in prediction with
#' negative signs) and \mjeqn{d_{Mod_{Over}}}{d_Mod_Over} is the weighted average of all scores in the area of
#' overprediction (i.e., the differences in prediction with negative signs).
#'
#' The \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned} effect size (i.e., the average of absolute differences in prediction over
#' the range of predictor scores) is computed as
#' \mjdeqn{d_{Mod_{Unsigned}}=\frac{1}{SD_{Y_{1}}}\intop f_{2}(X)\left|X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|dX.}{d_Mod_Unsigned = 1/SD_Y_1 * integrate(f_2(X) * |X * (b_1_1 - b_1_2) + b_0_1 - b_0_2|).}
#'
#' The \mjeqn{d_{Min}}{d_Min} effect size (i.e., the smallest absolute difference in prediction observed over the
#' range of predictor scores) is computed as
#' \mjdeqn{d_{Min}=\frac{1}{SD_{Y_{1}}}Min\left[\left|X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|\right].}{d_Min = 1/SD_Y_1 * Min[X * (b_1_1 - b_1_2) + b_0_1 - b_0_2].}
#'
#' The \mjeqn{d_{Max}}{d_Max} effect size (i.e., the largest absolute difference in prediction observed over the
#' range of predictor scores)is computed as
#' \mjdeqn{d_{Max}=\frac{1}{SD_{Y_{1}}}Max\left[\left|X\left(b_{1_{1}}-b_{1_{2}}\right)+b_{0_{1}}-b_{0_{2}}\right|\right].}{d_Max = 1/SD_Y_1 * Max[X * (b_1_1 - b_1_2) + b_0_1 - b_0_2].}
#' \emph{Note}: When \mjeqn{d_{Min}}{d_Min} and \mjeqn{d_{Max}}{d_Max} are computed in this package, the output will display the
#' signs of the differences (rather than the absolute values of the differences) to aid in interpretation.
#'
#' If \mjeqn{d_{Mod}}{d_Mod} effect sizes are to be rescaled to compensate for a cumulative density less than 1 (see the \code{rescale_cdf} argument), the result of each
#' effect size involving integration will be divided by the ratio of the cumulative density of the observed range of scores (i.e., the range bounded by the \code{focal_min_x}
#' and \code{focal_max_x} arguments) to the cumulative density of scores bounded by \code{-Inf} and \code{Inf}.
#'
#' @param referent_int Referent group's intercept.
#' @param referent_slope Referent group's slope.
#' @param focal_int Focal groups' intercepts.
#' @param focal_slope Focal groups' slopes.
#' @param focal_mean_x Focal groups' predictor-score means.
#' @param focal_sd_x Focal groups' predictor-score standard deviations.
#' @param referent_sd_y Referent group's criterion standard deviation.
#' @param focal_min_x Focal groups' minimum predictor scores.
#' @param focal_max_x Focal groups' maximum predictor scores.
#' @param focal_names Focal-group names. If \code{NULL} (the default), the focal groups will be given numeric labels ranging from 1 through the number of groups.
#' @param rescale_cdf Logical argument that indicates whether parametric \mjeqn{d_{Mod}}{d_Mod} results
#' should be rescaled to account for using a cumulative density < 1 in the computations (\code{TRUE}; default) or not (\code{FALSE}).
#'
#' @return
#' A matrix of effect sizes (\mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed},
#'     \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned}, \mjeqn{d_{Mod_{Under}}}{d_Mod_Under},
#'     \mjeqn{d_{Mod_{Over}}}{d_Mod_Over}), proportions of under- and over-predicted criterion scores,
#'     minimum and maximum differences (i.e., \mjeqn{d_{Mod_{Under}}}{d_Mod_Under} and \mjeqn{d_{Mod_{Over}}}{d_Mod_Over}),
#'     and the scores associated with minimum and maximum differences.
#'     Note that if the regression lines are parallel and infinite \code{focal_min_x} and \code{focal_max_x} values were
#'     specified, the extrema will be defined using the scores 3 focal-group SDs above and below the corresponding focal-group means.
#'
#' @export
#'
#' @references
#' Nye, C. D., & Sackett, P. R. (2017).
#' New effect sizes for tests of categorical moderation and differential prediction.
#' \emph{Organizational Research Methods, 20}(4), 639–664. \doi{10.1177/1094428116644505}
#'
#' @examples
#' compute_dmod_par(referent_int = -.05, referent_slope = .5,
#'                  focal_int = c(.05, 0, -.05), focal_slope = c(.5, .3, .3),
#'                  focal_mean_x = c(-.5, 0, -.5), focal_sd_x = rep(1, 3),
#'                  referent_sd_y = 1,
#'                  focal_min_x = rep(-Inf, 3), focal_max_x = rep(Inf, 3),
#'                  focal_names = NULL, rescale_cdf = TRUE)
compute_dmod_par <- function(referent_int, referent_slope,
focal_int, focal_slope,
focal_mean_x, focal_sd_x,
referent_sd_y,
focal_min_x, focal_max_x,
focal_names = NULL, rescale_cdf = TRUE){
call <- match.call()
inputs <- as.list(environment())

referentLengths <- c(length(referent_int), length(referent_slope), length(referent_sd_y))
if(!all(1 == referentLengths))
stop("The lengths of referent-group arguments must be 1")

focalLengths <- c(length(focal_int), length(focal_slope),
length(focal_mean_x), length(focal_sd_x),
length(focal_min_x), length(focal_max_x))
if(!all(focalLengths > 0))
stop("The lengths of all focal-group arguments must be greater than 0")
if(!all(focalLengths == focalLengths[-1]))
stop("The lengths of all focal-group arguments must be equal")
if(!is.null(focal_names)){
if(!all(length(focal_names) == focalLengths))
stop("The lengths of 'focal_names' must match the number of focal groups")
}else{focal_names <- 1:focalLengths}

paramMat <- cbind(referent_int = referent_int, referent_slope = referent_slope,
focal_int = focal_int, focal_slope = focal_slope,
focal_mean_x = focal_mean_x, focal_sd_x = focal_sd_x,
referent_sd_y = referent_sd_y,
focal_min_x = focal_min_x, focal_max_x = focal_max_x)
paramMat[is.finite(paramMat)] <- zapsmall(paramMat[is.finite(paramMat)])

rescaleCdfFull <- apply(paramMat, 1, function(x)
integrate(f = function(i) dnorm(i, mean = x["focal_mean_x"],sd = x["focal_sd_x"]),
lower = x["focal_min_x"], upper = x["focal_max_x"])$v) intMatOut <- t(apply(paramMat, 1, function(x){ coeffMat <- matrix(c(-x["referent_slope"], 1, -x["focal_slope"], 1), 2, 2, TRUE) if(det(coeffMat) == 0){ c(x_int = NA, y_int = NA) }else{ rhsMat <- matrix(c(x["referent_int"], x["focal_int"]), 2, 1, TRUE) int_point <- solve(coeffMat, tol = 0) %*% rhsMat setNames(as.numeric(int_point), c("x_int", "y_int")) } })) intMat <- as.matrix(intMatOut[,1]) colnames(intMat) <- "x_int" useFullCdf <- is.na(intMat) intBelowLimit <- intMat < focal_min_x intAboveLimit <- intMat > focal_max_x intBelowLimit[is.na(intBelowLimit)] <- TRUE intAboveLimit[is.na(intAboveLimit)] <- FALSE intMat[intBelowLimit] <- focal_min_x[intBelowLimit] intMat[intAboveLimit] <- focal_max_x[intAboveLimit] paramMat <- cbind(paramMat, intMat) extremeIntLo <- paramMat[,"x_int"] < paramMat[,"focal_min_x"] extremeIntHi <- paramMat[,"x_int"] > paramMat[,"focal_max_x"] extremeIntLo[is.na(extremeIntLo)] <- FALSE extremeIntHi[is.na(extremeIntHi)] <- FALSE paramMat[,"x_int"][extremeIntLo] <- paramMat[,"focal_min_x"][extremeIntLo] paramMat[,"x_int"][extremeIntHi] <- paramMat[,"focal_max_x"][extremeIntHi] paramMat <- cbind(paramMat, .x_int = paramMat[,"x_int"]) tooSmall <- !is.infinite(paramMat[,"x_int"]) & paramMat[,"x_int"] < paramMat[,"focal_mean_x"] - 5 * paramMat[,"focal_sd_x"] tooBig <- !is.infinite(paramMat[,"x_int"]) & paramMat[,"x_int"] > paramMat[,"focal_mean_x"] + 5 * paramMat[,"focal_sd_x"] paramMat[tooSmall, ".x_int"] <- paramMat[tooSmall, "focal_mean_x"] - 5 * paramMat[tooSmall, "focal_sd_x"] paramMat[tooBig, ".x_int"] <- paramMat[tooBig, "focal_mean_x"] + 5 * paramMat[tooBig, "focal_sd_x"] neginf2Int <- apply(paramMat, 1, function(x) if(!is.na(x["x_int"])){ pnorm(x["x_int"], mean = x["focal_mean_x"], sd = x["focal_sd_x"]) }else{0}) int2Inf <- apply(paramMat, 1, function(x) if(!is.na(x["x_int"])){ pnorm(x["x_int"], mean = x["focal_mean_x"], sd = x["focal_sd_x"], lower.tail = FALSE) }else{0}) neginf2Int[useFullCdf] <- 1 int2Inf[useFullCdf] <- 1 propBelowInt <- apply(paramMat, 1, function(x) if(!is.na(x["x_int"])){ if(zapsmall(x["focal_min_x"]) == zapsmall(x["x_int"])){ 0 }else{ pnorm(x["x_int"], mean = x["focal_mean_x"], sd = x["focal_sd_x"]) - pnorm(x["focal_min_x"], mean = x["focal_mean_x"], sd = x["focal_sd_x"]) } }else{0}) propAboveInt <- apply(paramMat, 1, function(x) if(!is.na(x["x_int"])){ if(zapsmall(x["focal_max_x"]) == zapsmall(x["x_int"])){ 0 }else{ pnorm(x["focal_max_x"], mean = x["focal_mean_x"], sd = x["focal_sd_x"]) - pnorm(x["x_int"], mean = x["focal_mean_x"], sd = x["focal_sd_x"]) } }else{0}) if(rescale_cdf){ rescaleCdfLo <- propBelowInt / neginf2Int rescaleCdfHi <- propAboveInt / int2Inf }else{ rescaleCdfLo <- 1 rescaleCdfHi <- 1 } rescaleCdfLo[is.na(rescaleCdfLo)] <- 1 rescaleCdfHi[is.na(rescaleCdfHi)] <- 1 rescaleCdfLo[rescaleCdfLo == 0] <- 1 rescaleCdfHi[rescaleCdfHi == 0] <- 1 dModLo <- apply(paramMat, 1, function(x){ if(!is.na(x[".x_int"])){ .integrate_dmod(referent_int = x["referent_int"], referent_slope = x["referent_slope"], focal_int = x["focal_int"], focal_slope = x["focal_slope"], focal_mean_x = x["focal_mean_x"], focal_sd_x = x["focal_sd_x"], referent_sd_y = x["referent_sd_y"], focal_min_x = x["focal_min_x"], focal_max_x = x[".x_int"], signed = TRUE) }else{0} }) / rescaleCdfLo dModHi <- apply(paramMat, 1, function(x){ if(!is.na(x[".x_int"])){ .integrate_dmod(referent_int = x["referent_int"], referent_slope = x["referent_slope"], focal_int = x["focal_int"], focal_slope = x["focal_slope"], focal_mean_x = x["focal_mean_x"], focal_sd_x = x["focal_sd_x"], referent_sd_y = x["referent_sd_y"], focal_min_x = x[".x_int"], focal_max_x = x["focal_max_x"], signed = TRUE) }else{0} }) / rescaleCdfHi dModLo[is.na(dModLo)] <- 0 dModHi[is.na(dModHi)] <- 0 minProp <- t(apply(cbind(dModLo, dModHi), 1, function(x){ out <- min(x) == x if(out == out){c(TRUE, FALSE)}else{out} })) propMat <- cbind(propBelowInt / rescaleCdfFull, propAboveInt / rescaleCdfFull) dModDirectional <- cbind(t(apply(cbind(dModLo, dModHi), 1, sort)), t(propMat)[t(minProp)], t(propMat)[t(!minProp)]) dimnames(dModDirectional) <- list(focal_names, c("d_under", "d_over", "prop_under", "prop_over")) scoreMat <- cbind(focal_min_x = paramMat[,"focal_min_x"], intMat, focal_max_x = paramMat[,"focal_max_x"]) scoreMat[is.infinite(scoreMat)] <- cbind(focal_mean_x, focal_mean_x, focal_mean_x)[is.infinite(scoreMat)] + sign(scoreMat[is.infinite(scoreMat)]) * 3 * cbind(focal_sd_x, focal_sd_x, focal_sd_x)[is.infinite(scoreMat)] d_extremes <- 1 / referent_sd_y * (scoreMat * (referent_slope - focal_slope) + referent_int - focal_int) d_extremes <- t(apply(d_extremes, 1, function(x){ if(all.equal(x, rep(0, length(x)), check.attributes = FALSE) == TRUE){rep(0, length(x))}else{x} })) dWhichMin <- t(apply(d_extremes, 1, function(x){abs(x) == min(abs(x))})) dWhichMin <- t(apply(dWhichMin, 1, function(x){ out <- rep(FALSE, length(x)); out[which(x)] <- TRUE; out })) dWhichMax <- t(apply(d_extremes, 1, function(x){abs(x) == max(abs(x))})) dWhichMax <- t(apply(dWhichMax, 1, function(x){ out <- rep(FALSE, length(x)); out[which(x)] <- TRUE; out })) d_extremes <- cbind(d_min = t(d_extremes)[t(dWhichMin)], d_max = t(d_extremes)[t(dWhichMax)], d_min_score = t(scoreMat)[t(dWhichMin)], d_max_score = t(scoreMat)[t(dWhichMax)]) d_extremes[,c("d_min_score", "d_max_score")][zapsmall(d_extremes[,"d_min"]) == zapsmall(d_extremes[,"d_max"])] <- NA if(rescale_cdf){ d_signed <- dModDirectional[,1] + dModDirectional[,2] d_unsigned <- abs(dModDirectional[,1]) + dModDirectional[,2] }else{ d_signed <- apply(paramMat, 1, function(x) .integrate_dmod(referent_int = x["referent_int"], referent_slope = x["referent_slope"], focal_int = x["focal_int"], focal_slope = x["focal_slope"], focal_mean_x = x["focal_mean_x"], focal_sd_x = x["focal_sd_x"], referent_sd_y = x["referent_sd_y"], focal_min_x = x["focal_min_x"], focal_max_x = x["focal_max_x"], signed = TRUE)) d_unsigned <- apply(paramMat, 1, function(x) .integrate_dmod(referent_int = x["referent_int"], referent_slope = x["referent_slope"], focal_int = x["focal_int"], focal_slope = x["focal_slope"], focal_mean_x = x["focal_mean_x"], focal_sd_x = x["focal_sd_x"], referent_sd_y = x["referent_sd_y"], focal_min_x = x["focal_min_x"], focal_max_x = x["focal_max_x"], signed = FALSE)) } outSummary <- cbind(d_signed, d_unsigned, dModDirectional, d_extremes) outSummary[apply(outSummary, 1, function(x) all(0 == zapsmall(x[1:4]))), 5:6] <- 0 outSummary <- cbind(focal_group = focal_names, as.data.frame(outSummary, stringsAsFactors = FALSE)) class(outSummary) <- "data.frame" out <- list(call = call, inputs = inputs, point_estimate = outSummary) class(out) <- c("dmod", "par") out } #' Comprehensive \mjeqn{d_{Mod}}{d_Mod} calculator #' #' \loadmathjax #' This is a general-purpose function to compute \mjeqn{d_{Mod}}{d_Mod} effect sizes from raw data and to perform bootstrapping. #' It subsumes the functionalities of the \code{compute_dmod_par} (for parametric computations) and \code{compute_dmod_npar} (for non-parametric computations) #' functions and automates the generation of regression equations and descriptive statistics for computing \mjeqn{d_{Mod}}{d_Mod} effect sizes. Please see documentation #' for \code{compute_dmod_par} and \code{compute_dmod_npar} for details about how the effect sizes are computed. #' #' @param data Data frame containing the data to be analyzed (if not a data frame, must be an object convertible to a data frame via the as.data.frame function). #' The data set must contain a criterion variable, at least one predictor variable, and a categorical variable that identifies the group to which each case (i.e., row) in the data set belongs. #' @param group Name or column-index number of the variable that identifies group membership in the data set. #' @param predictors Name(s) or column-index number(s) of the predictor variable(s) in the data set. No predictor can be a factor-type variable. #' If multiple predictors are specified, they will be combined into a regression-weighted composite that will be carried forward to compute \mjeqn{d_{Mod}}{d_Mod} effect sizes. #' \itemize{ #' \item {\emph{Note}: If weights other than regression weights should be used to combine the predictors into a composite, the user must manually compute such a composite, #' include the composite in the \code{dat} data set, and identify the composite variable in \code{predictors}.} #' } #' @param criterion Name or column-index number of the criterion variable in the data set. The criterion cannot be a factor-type variable. #' @param referent_id Label used to identify the referent group in the \code{group} variable. #' @param focal_id_vec Label(s) to identify the focal group(s) in the \code{group} variable. If \code{NULL} (the default), the specified referent group will be compared to all other groups. #' @param conf_level Confidence level (between \code{0} and \code{1}) to be used in generating confidence intervals. Default is \code{.95} #' @param parametric Logical argument that indicates whether \mjeqn{d_{Mod}}{d_Mod} should be computed using an assumed normal distribution (\code{TRUE}; default) or observed frequencies (\code{FALSE}). #' @param rescale_cdf Logical argument that indicates whether parametric \mjeqn{d_{Mod}}{d_Mod} results should be rescaled to account for using a cumulative density < 1 in the computations (\code{TRUE}; default) or not (\code{FALSE}). #' @param bootstrap Logical argument that indicates whether \mjeqn{d_{Mod}}{d_Mod} should be bootstrapped (\code{TRUE}; default) or not (\code{FALSE}). #' @param boot_iter Number of bootstrap iterations to compute (default = \code{1000}). #' @param stratify Logical argument that indicates whether the random bootstrap sampling should be stratified (\code{TRUE}) or unstratified (\code{FALSE}; default). #' @param empirical_ci Logical argument that indicates whether the bootstrapped confidence invervals should be computed from the observed empirical distributions (\code{TRUE}) or computed using #' bootstrapped means and standard errors via the normal-theory approach (\code{FALSE}). #' @param cross_validate_wts Only relevant when multiple predictors are specified and bootstrapping is performed. #' Logical argument that indicates whether regression weights derived from the full sample should be used to combine predictors in the bootstrapped samples (\code{TRUE}) #' or if a new set of weights should be derived during each iteration of the bootstrapping procedure (\code{FALSE}; default). #' #' @importFrom stats as.formula #' @importFrom stats dnorm #' @importFrom stats integrate #' @importFrom stats lm #' @importFrom stats qnorm #' @importFrom stats sd #' @importFrom stats setNames #' @importFrom dplyr sample_n #' #' @return If bootstrapping is selected, the list will include: #' \itemize{ #' \item {\code{point_estimate}} {A matrix of effect sizes (\mjeqn{d_{Mod_{Signed}}}{d_Mod_Signed}, #' \mjeqn{d_{Mod_{Unsigned}}}{d_Mod_Unsigned}, \mjeqn{d_{Mod_{Under}}}{d_Mod_Under}, #' \mjeqn{d_{Mod_{Over}}}{d_Mod_Over}), proportions of under- and over-predicted criterion scores, #' minimum and maximum differences, and the scores associated with minimum and maximum differences. #' All of these values are computed using the full data set.} #' \item {\code{bootstrap_mean}} {A matrix of the same statistics as the \code{point_estimate} matrix, #' but the values in this matrix are the means of the results from bootstrapped samples.} #' \item {\code{bootstrap_se}} {A matrix of the same statistics as the \code{point_estimate} matrix, #' but the values in this matrix are bootstrapped standard errors (i.e., the standard deviations of the results from bootstrapped samples).} #' \item {\code{bootstrap_CI_Lo}} {A matrix of the same statistics as the \code{point_estimate} matrix, #' but the values in this matrix are the lower confidence bounds of the results from bootstrapped samples.} #' \item {\code{bootstrap_CI_Hi}} {A matrix of the same statistics as the \code{point_estimate} matrix, #' but the values in this matrix are the upper confidence bounds of the results from bootstrapped samples.} #' } #' If no bootstrapping is performed, the output will be limited to the \code{point_estimate} matrix. #' @references #' Nye, C. D., & Sackett, P. R. (2017). #' New effect sizes for tests of categorical moderation and differential prediction. #' \emph{Organizational Research Methods, 20}(4), 639–664. \doi{10.1177/1094428116644505} #' #' @export #' #' @examples #' # Generate some hypothetical data for a referent group and three focal groups: #' set.seed(10) #' refDat <- MASS::mvrnorm(n = 1000, mu = c(.5, .2), #' Sigma = matrix(c(1, .5, .5, 1), 2, 2), empirical = TRUE) #' foc1Dat <- MASS::mvrnorm(n = 1000, mu = c(-.5, -.2), #' Sigma = matrix(c(1, .5, .5, 1), 2, 2), empirical = TRUE) #' foc2Dat <- MASS::mvrnorm(n = 1000, mu = c(0, 0), #' Sigma = matrix(c(1, .3, .3, 1), 2, 2), empirical = TRUE) #' foc3Dat <- MASS::mvrnorm(n = 1000, mu = c(-.5, -.2), #' Sigma = matrix(c(1, .3, .3, 1), 2, 2), empirical = TRUE) #' colnames(refDat) <- colnames(foc1Dat) <- colnames(foc2Dat) <- colnames(foc3Dat) <- c("X", "Y") #' dat <- rbind(cbind(G = 1, refDat), cbind(G = 2, foc1Dat), #' cbind(G = 3, foc2Dat), cbind(G = 4, foc3Dat)) #' #' # Compute point estimates of parametric d_mod effect sizes: #' compute_dmod(data = dat, group = "G", predictors = "X", criterion = "Y", #' referent_id = 1, focal_id_vec = 2:4, #' conf_level = .95, rescale_cdf = TRUE, parametric = TRUE, #' bootstrap = FALSE) #' #' # Compute point estimates of non-parametric d_mod effect sizes: #' compute_dmod(data = dat, group = "G", predictors = "X", criterion = "Y", #' referent_id = 1, focal_id_vec = 2:4, #' conf_level = .95, rescale_cdf = TRUE, parametric = FALSE, #' bootstrap = FALSE) #' #' # Compute unstratified bootstrapped estimates of parametric d_mod effect sizes: #' compute_dmod(data = dat, group = "G", predictors = "X", criterion = "Y", #' referent_id = 1, focal_id_vec = 2:4, #' conf_level = .95, rescale_cdf = TRUE, parametric = TRUE, #' boot_iter = 10, bootstrap = TRUE, stratify = FALSE, empirical_ci = FALSE) #' #' # Compute unstratified bootstrapped estimates of non-parametric d_mod effect sizes: #' compute_dmod(data = dat, group = "G", predictors = "X", criterion = "Y", #' referent_id = 1, focal_id_vec = 2:4, #' conf_level = .95, rescale_cdf = TRUE, parametric = FALSE, #' boot_iter = 10, bootstrap = TRUE, stratify = FALSE, empirical_ci = FALSE) compute_dmod <- function(data, group, predictors, criterion, referent_id, focal_id_vec = NULL, conf_level = .95, rescale_cdf = TRUE, parametric = TRUE, bootstrap = TRUE, boot_iter = 1000, stratify = FALSE, empirical_ci = FALSE, cross_validate_wts = FALSE){ call <- match.call() inputs <- list(data = data, referent_id = referent_id, focal_id_vec = focal_id_vec, conf_level = conf_level, rescale_cdf = rescale_cdf, parametric = parametric, bootstrap = bootstrap, boot_iter = boot_iter, stratify = stratify, empirical_ci = empirical_ci, cross_validate_wts = cross_validate_wts) if(!is.data.frame(data)) data <- as.data.frame(data, stringsAsFactors = FALSE) group <- match_variables(call = [match("group", names(call))]], arg = group, arg_name = "group", data = data) predictors <- match_variables(call = [match("predictors", names(call))]], arg = predictors, arg_name = "predictors", data = data, allow_multiple = TRUE) criterion <- match_variables(call = [match("criterion", names(call))]], arg = criterion, arg_name = "criterion", data = data) if(!is.null(dim(group))){ if(ncol(group) > 1){ stop("Only 1 group variable may be specified in 'group'", call. = FALSE) } } if(!is.null(dim(criterion))){ if(ncol(criterion) > 1){ stop("Only 1 criterion variable may be specified in 'criterion'", call. = FALSE) } } data <- data.frame(G = unlist(group), Y = unlist(criterion), predictors, stringsAsFactors = FALSE) na_screen <- apply(is.na(data), 1, sum) > 0 if(any(na_screen)){ warning("Entries with NA values were detected: Removed ", sum(na_screen), " cases due to missingness", call. = FALSE) data <- !na_screen,] } xNames <- colnames(data)[-(1:2)] groupNames <- levels(factor(,"G"])) referentId <- which(groupNames == referent_id) if(length(referentId) == 0) stop("The 'referent_id' specified does not match a level of the grouping factor") if(is.null(focal_id_vec)){ focalId <- which(groupNames != referent_id) focal_id_vec<- groupNames[focalId] }else{ focalId <- apply(t(focal_id_vec), 2, function(x) which(x == groupNames)) if(is.null(focalId)) stop("The values in 'focal_id_vec' specified do not match any levels of the grouping factor") if(length(unlist(focalId)) < length(focal_id_vec)) stop("'focal_id_vec' contains invalid group indices") } if(any(referentId == unlist(focalId))) warning("'focal_id_vec' contains the value specified for 'referent_id'") sd_vec <- unlist(by(,-1], INDICES = ,"G"], function(x){ apply(x, 2, sd, na.rm = TRUE)})) if(any(is.na(sd_vec))){ whichInvalidSd <- names(which(is.na(sd_vec))) whichInvalidSd <- strsplit(x = whichInvalidSd, split = "[.]") varVecInvalidSd <- lapply(whichInvalidSd, function(x) x[length(x)]) groupVecInvalidSd <- lapply(whichInvalidSd, function(x) paste(x[-length(x)], sep = ".")) print(data.frame(Group = unlist(groupVecInvalidSd), Variable = unlist(varVecInvalidSd), stringsAsFactors = FALSE)) stop("The variables associated with the groups listed above have no non-NA cases") } if(any(0 == sd_vec)){ whichInvalidSd <- names(which(sd_vec == 0)) whichInvalidSd <- strsplit(x = whichInvalidSd, split = "[.]") varVecInvalidSd <- lapply(whichInvalidSd, function(x) x[length(x)]) groupVecInvalidSd <- lapply(whichInvalidSd, function(x) paste(x[-length(x)], sep = ".")) print(data.frame(Group = unlist(groupVecInvalidSd), Variable = unlist(varVecInvalidSd), stringsAsFactors = FALSE)) stop("The variables associated with the groups listed above have variances of zero") } regModel <- lm(as.formula(paste("Y ~", paste(xNames, collapse = " + "))), data = data)$coeff
xFun <- function(replace = TRUE, parametric = TRUE, showFullResult = FALSE){
if(replace){
if(stratify){
invalidRep <- TRUE
while(invalidRep){
stratDat <- by(data, INDICES = ,"G"], function(x){
x[sample(1:nrow(x), nrow(x), replace = replace),]
})
data_i <- NULL
for(i in 1:length(stratDat)) data_i <- rbind(data_i, stratDat[[i]])
sd_vec <- unlist(by(data_i[,-1], INDICES = data_i[,"G"], function(x){
apply(x, 2, sd, na.rm = TRUE)}))
invalidRep <- any(0 == sd_vec) | any(is.na(sd_vec))
}
}else{
invalidRep <- TRUE
while(invalidRep){
data_i <- data[sample(1:nrow(data), nrow(data), replace = replace),]
sd_vec <- unlist(by(data_i[,-1], INDICES = data_i[,"G"], function(x){
apply(x, 2, sd, na.rm = TRUE)}))
invalidRep <- any(length(by(data_i, INDICES = data_i[,"G"], nrow)) < length(groupNames) |
as.numeric(by(data_i, INDICES = data_i[,"G"], nrow)) < 3) |
any(0 == sd_vec) | any(is.na(sd_vec))
}
}
}else{
data_i <- data
}

if(ncol(data) > 3){
if(!cross_validate_wts)
regModel <- lm(as.formula(paste("Y ~", paste(xNames, collapse = " + "))), data = data_i)$coeff data_i <- data.frame(cbind(data_i[,1:2], X = as.numeric(apply(as.matrix(data_i[,xNames]), 1, function(x) regModel[-1] %*% x))), stringsAsFactors = FALSE) }else{ data_i <- data.frame(cbind(data_i[,1:2], X = unlist(data_i[,3])), stringsAsFactors = FALSE) } regList <- by(data_i, INDICES = data_i$G, function(x) lm(Y ~ X, data = x)$coeff) meanList <- by(data_i, INDICES = data_i$G, function(x) apply(x[,c("Y", "X")], 2, mean))
sdList <- by(data_i, INDICES = data_i$G, function(x) apply(x[,c("Y", "X")], 2, sd)) intVec <- as.numeric(lapply(regList, function(x) x)) slopeVec <- as.numeric(lapply(regList, function(x) x)) xMeanVec <- as.numeric(lapply(meanList, function(x) x["X"])) yMeanVec <- as.numeric(lapply(meanList, function(x) x["Y"])) xSdVec <- as.numeric(lapply(sdList, function(x) x["X"])) ySdVec <- as.numeric(lapply(sdList, function(x) x["Y"])) minVec <- c(by(data_i[,"X"], INDICES = data_i$G, min))
maxVec <- c(by(data_i[,"X"], INDICES = data_i$G, max)) if(parametric){ out <- compute_dmod_par(referent_int = intVec[referentId], referent_slope = slopeVec[referentId], focal_int = intVec[focalId], focal_slope = slopeVec[focalId], focal_mean_x = xMeanVec[focalId], focal_sd_x = xSdVec[focalId], referent_sd_y = ySdVec[referentId], focal_min_x = minVec[focalId], focal_max_x = maxVec[focalId], focal_names = focal_id_vec, rescale_cdf = rescale_cdf)$point_estimate
}else{
out <- data.frame(t(apply(t(focalId), 2, function(x){
x_out <-  compute_dmod_npar(referent_int = intVec[referentId],
referent_slope = slopeVec[referentId],
focal_int = intVec[x],
focal_slope = slopeVec[x],
focal_x = data_i[data_i$G == groupNames[x],"X"], referent_sd_y = ySdVec[referentId])$point_estimate
class(x_out) <- NULL
unlist(x_out)
})), stringsAsFactors = FALSE)
out <- cbind(focal_group = focal_id_vec, out)
}
class(out) <- NULL
as.data.frame(out, stringsAsFactors = FALSE)
}

if(bootstrap){
obsOut <- xFun(replace = FALSE, parametric = parametric)

repOut <- t(replicate(boot_iter, c(as.matrix(xFun(parametric = parametric)[,-1]))))
bootstrap_mean <- matrix(apply(repOut, 2, mean), nrow(obsOut), ncol(obsOut) - 1, FALSE)
bootstrap_se <- matrix(apply(repOut, 2, sd), nrow(obsOut), ncol(obsOut) - 1, FALSE)

if(empirical_ci){
bootstrap_CI_LL <- matrix(apply(repOut, 2, function(x)
sort(x)[nrow(repOut) * (1 - conf_level) / 2]), nrow(obsOut), ncol(obsOut) - 1, FALSE)
bootstrap_CI_UL <- matrix(apply(repOut, 2, function(x)
sort(x)[nrow(repOut) * (1 - (1 - conf_level) / 2)]), nrow(obsOut), ncol(obsOut) - 1, FALSE)
}else{
bootstrap_CI_LL <- bootstrap_mean - qnorm((1 - (1 - conf_level) / 2)) * bootstrap_se
bootstrap_CI_UL <- bootstrap_mean + qnorm((1 - (1 - conf_level) / 2)) * bootstrap_se
}

colnames(bootstrap_mean) <- colnames(bootstrap_se) <- colnames(bootstrap_CI_LL) <- colnames(bootstrap_CI_UL) <- colnames(obsOut)[-1]
bootstrap_mean <- cbind(focal_group = obsOut[,1], as.data.frame(bootstrap_mean, stringsAsFactors = FALSE))
bootstrap_se <- cbind(focal_group = obsOut[,1], as.data.frame(bootstrap_se, stringsAsFactors = FALSE))
bootstrap_CI_LL <- cbind(focal_group = obsOut[,1], as.data.frame(bootstrap_CI_LL, stringsAsFactors = FALSE))
bootstrap_CI_UL <- cbind(focal_group = obsOut[,1], as.data.frame(bootstrap_CI_UL, stringsAsFactors = FALSE))

out <- list(point_estimate = obsOut, bootstrap_mean = bootstrap_mean, bootstrap_se = bootstrap_se,
bootstrap_CI_LL = bootstrap_CI_LL, bootstrap_CI_UL = bootstrap_CI_UL)
names(out)[-(1:3)] <- paste(names(out)[-(1:3)], "_", conf_level * 100, sep = "")
}else{
out <- list(point_estimate = xFun(replace = FALSE, parametric = parametric, showFullResult = !bootstrap))
}

out <- append(list(call = call, inputs = inputs), out)

if(parametric){
class(out) <- c("dmod", "par")
}else{
class(out) <- c("dmod", "npar")
}
out
}

Try the psychmeta package in your browser

Any scripts or data that you put into this service are public.

psychmeta documentation built on Jan. 3, 2022, 5:07 p.m.