# This file is used to manually create data objects that will be used in the
# actual vignette. This is done to avoid excessively long build times. To update
# the vignette, run this code first, and then knit the actual vignette in the
# `vignettes` directory.
knitr::opts_chunk$set(collapse=TRUE, comment='#>')

Simulating Power with the paramtest Package

# load all packages used in this vignette
library('paramtest')
library('pwr')
library('ggplot2')
library('knitr')
library('nlme')
library('lavaan')
library('dplyr')

Example: Simulating a t-test with paramtest

# create user-defined function to generate and analyze data
t_func <- function(simNum, N, d) {
    x1 <- rnorm(N, 0, 1)
    x2 <- rnorm(N, d, 1)

    t <- t.test(x1, x2, var.equal=TRUE)  # run t-test on generated data
    stat <- t$statistic
    p <- t$p.value

    return(c(t=stat, p=p, sig=(p < .05)))
        # return a named vector with the results we want to keep
}

power_ttest <- run_test(t_func, n.iter=5000, output='data.frame', N=50, d=.5)  # simulate data

Varying parameters

# give 'params' a list of parameters we want to vary;
# testing at N=25, N=50, and N=100
power_ttest_vary <- grid_search(t_func, params=list(N=c(25, 50, 100)),
    n.iter=5000, output='data.frame', d=.5)
# varying N and Cohen's d
power_ttest_vary2 <- grid_search(t_func, params=list(N=c(25, 50, 100), d=c(.2, .5)),
    n.iter=5000, output='data.frame')

Parallel processing

# time with no parallel processing (your mileage may very greatly)
power_ttest_time1 <- system.time(power_ttest_vary3 <- grid_search(t_func,
    params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame'))

# using parallel processing; I am using a Windows system, so I use parallel='snow';
# see the documentation for the 'parallel' package for more details
power_ttest_time2 <- system.time(power_ttest_vary4 <- grid_search(t_func,
    params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame', parallel='snow', ncpus=4))

Bootstrapping

# user function must take data and indices as first two arguments; see 'boot'
# package documentation for more details
t_func_boot <- function(data, indices) {
    sample_data <- data[indices, ]
    treatGroup <- sample_data[sample_data$group == 'trt2', 'weight']
    ctrlGroup <- sample_data[sample_data$group == 'ctrl', 'weight']

    t <- t.test(treatGroup, ctrlGroup, var.equal=TRUE)
    stat <- t$statistic
    p <- t$p.value

    return(c(t=stat, p=p, sig=(p < .05)))
}

# example using built-in dataset PlantGrowth
power_ttest_boot <- run_test(t_func_boot, n.iter=5000, output='data.frame', boot=TRUE,
    bootParams=list(data=PlantGrowth))

Sample code for various statistical models

Linear models

lm_test <- function(simNum, N, b1, b0=0, xm=0, xsd=1) {
    x <- rnorm(N, xm, xsd)
    y <- rnorm(N, b0 + b1*x, sqrt(1 - b1^2))  # var. approx. 1 after accounting
                                              # for explained variance by x
    model <- lm(y ~ x)

    # pull output from model
    est <- coef(summary(model))['x', 'Estimate']
    se <- coef(summary(model))['x', 'Std. Error']
    p <- coef(summary(model))['x', 'Pr(>|t|)']

    return(c(xm=mean(x), xsd=sd(x), ym=mean(y), ysd=sd(y), est=est, se=se, p=p,
        sig=(p < .05)))
}

# we vary N at 200 and 300; we are also setting coefficient of x predicting
# y to be approx. .15 across all simulations
power_lm <- grid_search(lm_test, params=list(N=c(200, 300)), n.iter=5000, output='data.frame', b1=.15,
    parallel='snow', ncpus=4)
lm_test_interaction <- function(simNum, N, b1, b2, b3, b0=0, x1m=0, x1sd=1,
    x2m=0, x2sd=1) {

    x1 <- rnorm(N, x1m, x1sd)
    x2 <- rnorm(N, x2m, x2sd)
    yvar <- sqrt(1 - b1^2 - b2^2 - b3^2)  # residual variance
    y <- rnorm(N, b0 + b1*x1 + b2*x2 + b3*x1*x2, yvar)
    model <- lm(y ~ x1 * x2)

    # pull output from model (two main effects and interaction)
    est_x1 <- coef(summary(model))['x1', 'Estimate']
    p_x1 <- coef(summary(model))['x1', 'Pr(>|t|)']
    sig_x1 <- p_x1 < .05
    est_x2 <- coef(summary(model))['x2', 'Estimate']
    p_x2 <- coef(summary(model))['x2', 'Pr(>|t|)']
    sig_x2 <- p_x2 < .05
    est_int <- coef(summary(model))['x1:x2', 'Estimate']
    p_int <- coef(summary(model))['x1:x2', 'Pr(>|t|)']
    sig_int <- p_int < .05

    return(c(est_x1=est_x1, p_x1=p_x1, sig_x1=sig_x1, est_x2=est_x2, p_x2=p_x2,
        sig_x2=sig_x2, est_int=est_int, p_int=p_int, sig_int=sig_int))
}

# varying N at 200 and 300; setting coefficient of x1 = .15, coefficient of
# x2 = 0, and coefficien of interaction = .3
power_lm_int <- grid_search(lm_test_interaction, params=list(N=c(200, 300)),
    n.iter=5000, output='data.frame', b1=.15, b2=0, b3=.3, parallel='snow', ncpus=4)
lm_test_simple <- function(simNum, N, b1, b2, b3, b0=0, x1m=0, x1sd=1, x2m=0, x2sd=1) {
    x1 <- rnorm(N, x1m, x1sd)
    x2 <- rnorm(N, x2m, x2sd)
    yvar <- sqrt(1 - b1^2 - b2^2 - b3^2)
    y <- rnorm(N, b0 + b1*x1 + b2*x2 + b3*x1*x2, yvar)
    model <- lm(y ~ x1 * x2)  # here is the original model

    est_int <- coef(summary(model))['x1:x2', 'Estimate']
    p_int <- coef(summary(model))['x1:x2', 'Pr(>|t|)']
    sig_int <- p_int < .05

    # calculate x1 at +/- 1 SD, to look at simple effects
    x1minus1sd <- x1 - mean(x1) + sd(x1)
    x1plus1sd <- x1 - mean(x1) - sd(x1)

    # new models to examine simple effects
    model2 <- lm(y ~ x1minus1sd * x2)
    model3 <- lm(y ~ x1plus1sd * x2)

    # test effect of x2 when x1 is at +/- 1 SD
    est_x2_minus1 <- coef(summary(model2))['x2', 'Estimate']
    p_x2_minus1 <- coef(summary(model2))['x2', 'Pr(>|t|)']
    sig_x2_minus1 <- p_x2_minus1 < .05

    est_x2_plus1 <- coef(summary(model3))['x2', 'Estimate']
    p_x2_plus1 <- coef(summary(model3))['x2', 'Pr(>|t|)']
    sig_x2_plus1 <- p_x2_plus1 < .05

    return(c(est_int=est_int, p_int=p_int, sig_int=sig_int,
        est_x2_minus1=est_x2_minus1, p_x2_minus1=p_x2_minus1,
        sig_x2_minus1=sig_x2_minus1, est_x2_plus1=est_x2_plus1,
        p_x2_plus1=p_x2_plus1, sig_x2_plus1=sig_x2_plus1))
}

power_lm_simple <- grid_search(lm_test_simple, params=list(N=c(200, 300)),
    n.iter=5000, output='data.frame', b1=.15, b2=0, b3=.3, parallel='snow', ncpus=4)

Multilevel models

mlm_test <- function(simNum, N, b1, b0=0, xm=0, xsd=1, varInt=1, varSlope=1, varResid=1) {
    timePoints <- 4
    subject <- rep(1:N, each=timePoints)
    sub_int <- rep(rnorm(N, 0, sqrt(varInt)), each=timePoints)  # random intercept
    sub_slope <- rep(rnorm(N, 0, sqrt(varSlope)), each=timePoints)  # random slope
    time <- rep(0:(timePoints-1), N)
    y <- (b0 + sub_int) + (b1 + sub_slope)*time + rnorm(N*timePoints, 0, sqrt(varResid))
        # y-intercept as a function of b0 plus random intercept;
        # slope as a function of b1 plus random slope
    data <- data.frame(subject, sub_int, sub_slope, time, y)

    # for more complex models that might not converge, tryCatch() is probably
    # a good idea
    return <- tryCatch({
        model <- nlme::lme(y ~ time, random=~time|subject, data=data)
            # when using parallel processing, we must refer to functions from
            # packages directly, e.g., package::function()

        est <- summary(model)$tTable['time', 'Value']
        se <- summary(model)$tTable['time', 'Std.Error']
        p <- summary(model)$tTable['time', 'p-value']
        return(c(est=est, se=se, p=p, sig=(p < .05)))
    },
    error=function(e) {
        #message(e)  # print error message
        return(c(est=NA, se=NA, p=NA, sig=NA))
    })

    return(return)
}

# I am cutting this down to 500 iterations so that the document compiles faster; I would, however,
# recommend more iterations for a stable estimate
power_mlm <- grid_search(mlm_test, params=list(N=c(200, 300)), n.iter=500, output='data.frame', b1=.15,
    varInt=.05, varSlope=.15, varResid=.4, parallel='snow', ncpus=4)

Structural equation modelling

med_test <- function(simNum, N, aa, bb, cc) {
    x <- rnorm(N, 0, 1)
    m <- rnorm(N, aa*x, sqrt(1 - aa^2))
    y <- rnorm(N, cc*x + bb*m, sqrt(1 - cc^2 - bb^2))
    data <- data.frame(x, m, y)

    # set up lavaan model to calculate indirect effect (ab) and total effect
    model <- '
        m ~ a*x
        y ~ c*x
        y ~ b*m
        ab := a*b
        total := c + (a*b)'

    fit <- lavaan::sem(model, data=data)
    ests <- lavaan::parameterEstimates(fit)
        # when using parallel processing, we must refer to functions from
        # packages directly, e.g., package::function()

    # pull output from model
    a_est <- ests[ests$label == 'a', 'est']
    a_p <- ests[ests$label == 'a', 'pvalue']
    b_est <- ests[ests$label == 'b', 'est']
    b_p <- ests[ests$label == 'b', 'pvalue']
    c_est <- ests[ests$label == 'c', 'est']
    c_p <- ests[ests$label == 'c', 'pvalue']
    ab_est <- ests[ests$label == 'ab', 'est']
    ab_p <- ests[ests$label == 'ab', 'pvalue']

    return(c(a_est=a_est, a_p=a_p, b_est=b_est, b_p=b_p, c_est=c_est, c_p=c_p,
        ab_est=ab_est, ab_p=ab_p, sig=(ab_p < .05)))
}

# set up mediation model where x -> m = .15, m -> y = .2, and x -> y = .05
power_med <- grid_search(med_test, params=list(N=c(200, 300)), n.iter=1000, output='data.frame', aa=.15,
    bb=.2, cc=.05, parallel='snow', ncpus=4)
latent_test <- function(simNum, N, b1, ind1, ind2, ind3) {
    # matrix of factor structure; we have x as observed predictor, and y is a
    # latent variable with three indicators
    fmodel <- matrix(
        c(1, 0,      # x
          0, ind1,   # y1
          0, ind2,   # y2
          0, ind3),  # y3
        nrow=4, ncol=2, byrow=TRUE, dimnames=list(
            c('x', 'y1', 'y2', 'y3'),  # rows (observed)
            c('x', 'y')))              # cols (latent)

    # matrix of effects structure (variance-covariance); we are using x to
    # predict y (with coefficient specified as b1)
    y_resid_var <- sqrt(1 - b1^2)
    effects <- matrix(
        c(1, b1,            # x
          0, y_resid_var),  # y
        nrow=2, ncol=2, byrow=TRUE, dimnames=list(
            c('x', 'y'), c('x', 'y')))

    data <- paramtest::gen_data(fmodel, effects)
        # generates the data using factor and effects matrices

    model <- '
        y =~ y1 + y2 + y3
        y ~ b1*x'

    fit <- lavaan::sem(model, data=data)
    ests <- lavaan::parameterEstimates(fit)

    est <- ests[ests$label == 'b1', 'est']
    p <- ests[ests$label == 'b1', 'pvalue']

    return(c(est=est, p=p, sig=(p < .05)))
}

power_sem <- grid_search(latent_test, params=list(N=c(200, 300)), n.iter=1000, output='data.frame',
    b1=.15, ind1=.4, ind2=.4, ind3=.4, parallel='snow', ncpus=4)
save(power_ttest, power_ttest_vary, power_ttest_vary2, power_ttest_vary3,
    power_ttest_vary4, power_ttest_time1, power_ttest_time2, power_ttest_boot,
    power_lm, power_lm_int, power_lm_simple, power_mlm, power_med, power_sem,
    file='../vignettes/power_data.Rdata')


jeff-hughes/paramtest documentation built on May 19, 2019, 1:45 a.m.