# This file is used to manually create data objects that will be used in the # actual vignette. This is done to avoid excessively long build times. To update # the vignette, run this code first, and then knit the actual vignette in the # `vignettes` directory. knitr::opts_chunk$set(collapse=TRUE, comment='#>')
# load all packages used in this vignette library('paramtest') library('pwr') library('ggplot2') library('knitr') library('nlme') library('lavaan') library('dplyr')
# create user-defined function to generate and analyze data t_func <- function(simNum, N, d) { x1 <- rnorm(N, 0, 1) x2 <- rnorm(N, d, 1) t <- t.test(x1, x2, var.equal=TRUE) # run t-test on generated data stat <- t$statistic p <- t$p.value return(c(t=stat, p=p, sig=(p < .05))) # return a named vector with the results we want to keep } power_ttest <- run_test(t_func, n.iter=5000, output='data.frame', N=50, d=.5) # simulate data
# give 'params' a list of parameters we want to vary; # testing at N=25, N=50, and N=100 power_ttest_vary <- grid_search(t_func, params=list(N=c(25, 50, 100)), n.iter=5000, output='data.frame', d=.5)
# varying N and Cohen's d power_ttest_vary2 <- grid_search(t_func, params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame')
# time with no parallel processing (your mileage may very greatly) power_ttest_time1 <- system.time(power_ttest_vary3 <- grid_search(t_func, params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame')) # using parallel processing; I am using a Windows system, so I use parallel='snow'; # see the documentation for the 'parallel' package for more details power_ttest_time2 <- system.time(power_ttest_vary4 <- grid_search(t_func, params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame', parallel='snow', ncpus=4))
# user function must take data and indices as first two arguments; see 'boot' # package documentation for more details t_func_boot <- function(data, indices) { sample_data <- data[indices, ] treatGroup <- sample_data[sample_data$group == 'trt2', 'weight'] ctrlGroup <- sample_data[sample_data$group == 'ctrl', 'weight'] t <- t.test(treatGroup, ctrlGroup, var.equal=TRUE) stat <- t$statistic p <- t$p.value return(c(t=stat, p=p, sig=(p < .05))) } # example using built-in dataset PlantGrowth power_ttest_boot <- run_test(t_func_boot, n.iter=5000, output='data.frame', boot=TRUE, bootParams=list(data=PlantGrowth))
lm_test <- function(simNum, N, b1, b0=0, xm=0, xsd=1) { x <- rnorm(N, xm, xsd) y <- rnorm(N, b0 + b1*x, sqrt(1 - b1^2)) # var. approx. 1 after accounting # for explained variance by x model <- lm(y ~ x) # pull output from model est <- coef(summary(model))['x', 'Estimate'] se <- coef(summary(model))['x', 'Std. Error'] p <- coef(summary(model))['x', 'Pr(>|t|)'] return(c(xm=mean(x), xsd=sd(x), ym=mean(y), ysd=sd(y), est=est, se=se, p=p, sig=(p < .05))) } # we vary N at 200 and 300; we are also setting coefficient of x predicting # y to be approx. .15 across all simulations power_lm <- grid_search(lm_test, params=list(N=c(200, 300)), n.iter=5000, output='data.frame', b1=.15, parallel='snow', ncpus=4)
lm_test_interaction <- function(simNum, N, b1, b2, b3, b0=0, x1m=0, x1sd=1, x2m=0, x2sd=1) { x1 <- rnorm(N, x1m, x1sd) x2 <- rnorm(N, x2m, x2sd) yvar <- sqrt(1 - b1^2 - b2^2 - b3^2) # residual variance y <- rnorm(N, b0 + b1*x1 + b2*x2 + b3*x1*x2, yvar) model <- lm(y ~ x1 * x2) # pull output from model (two main effects and interaction) est_x1 <- coef(summary(model))['x1', 'Estimate'] p_x1 <- coef(summary(model))['x1', 'Pr(>|t|)'] sig_x1 <- p_x1 < .05 est_x2 <- coef(summary(model))['x2', 'Estimate'] p_x2 <- coef(summary(model))['x2', 'Pr(>|t|)'] sig_x2 <- p_x2 < .05 est_int <- coef(summary(model))['x1:x2', 'Estimate'] p_int <- coef(summary(model))['x1:x2', 'Pr(>|t|)'] sig_int <- p_int < .05 return(c(est_x1=est_x1, p_x1=p_x1, sig_x1=sig_x1, est_x2=est_x2, p_x2=p_x2, sig_x2=sig_x2, est_int=est_int, p_int=p_int, sig_int=sig_int)) } # varying N at 200 and 300; setting coefficient of x1 = .15, coefficient of # x2 = 0, and coefficien of interaction = .3 power_lm_int <- grid_search(lm_test_interaction, params=list(N=c(200, 300)), n.iter=5000, output='data.frame', b1=.15, b2=0, b3=.3, parallel='snow', ncpus=4)
lm_test_simple <- function(simNum, N, b1, b2, b3, b0=0, x1m=0, x1sd=1, x2m=0, x2sd=1) { x1 <- rnorm(N, x1m, x1sd) x2 <- rnorm(N, x2m, x2sd) yvar <- sqrt(1 - b1^2 - b2^2 - b3^2) y <- rnorm(N, b0 + b1*x1 + b2*x2 + b3*x1*x2, yvar) model <- lm(y ~ x1 * x2) # here is the original model est_int <- coef(summary(model))['x1:x2', 'Estimate'] p_int <- coef(summary(model))['x1:x2', 'Pr(>|t|)'] sig_int <- p_int < .05 # calculate x1 at +/- 1 SD, to look at simple effects x1minus1sd <- x1 - mean(x1) + sd(x1) x1plus1sd <- x1 - mean(x1) - sd(x1) # new models to examine simple effects model2 <- lm(y ~ x1minus1sd * x2) model3 <- lm(y ~ x1plus1sd * x2) # test effect of x2 when x1 is at +/- 1 SD est_x2_minus1 <- coef(summary(model2))['x2', 'Estimate'] p_x2_minus1 <- coef(summary(model2))['x2', 'Pr(>|t|)'] sig_x2_minus1 <- p_x2_minus1 < .05 est_x2_plus1 <- coef(summary(model3))['x2', 'Estimate'] p_x2_plus1 <- coef(summary(model3))['x2', 'Pr(>|t|)'] sig_x2_plus1 <- p_x2_plus1 < .05 return(c(est_int=est_int, p_int=p_int, sig_int=sig_int, est_x2_minus1=est_x2_minus1, p_x2_minus1=p_x2_minus1, sig_x2_minus1=sig_x2_minus1, est_x2_plus1=est_x2_plus1, p_x2_plus1=p_x2_plus1, sig_x2_plus1=sig_x2_plus1)) } power_lm_simple <- grid_search(lm_test_simple, params=list(N=c(200, 300)), n.iter=5000, output='data.frame', b1=.15, b2=0, b3=.3, parallel='snow', ncpus=4)
mlm_test <- function(simNum, N, b1, b0=0, xm=0, xsd=1, varInt=1, varSlope=1, varResid=1) { timePoints <- 4 subject <- rep(1:N, each=timePoints) sub_int <- rep(rnorm(N, 0, sqrt(varInt)), each=timePoints) # random intercept sub_slope <- rep(rnorm(N, 0, sqrt(varSlope)), each=timePoints) # random slope time <- rep(0:(timePoints-1), N) y <- (b0 + sub_int) + (b1 + sub_slope)*time + rnorm(N*timePoints, 0, sqrt(varResid)) # y-intercept as a function of b0 plus random intercept; # slope as a function of b1 plus random slope data <- data.frame(subject, sub_int, sub_slope, time, y) # for more complex models that might not converge, tryCatch() is probably # a good idea return <- tryCatch({ model <- nlme::lme(y ~ time, random=~time|subject, data=data) # when using parallel processing, we must refer to functions from # packages directly, e.g., package::function() est <- summary(model)$tTable['time', 'Value'] se <- summary(model)$tTable['time', 'Std.Error'] p <- summary(model)$tTable['time', 'p-value'] return(c(est=est, se=se, p=p, sig=(p < .05))) }, error=function(e) { #message(e) # print error message return(c(est=NA, se=NA, p=NA, sig=NA)) }) return(return) } # I am cutting this down to 500 iterations so that the document compiles faster; I would, however, # recommend more iterations for a stable estimate power_mlm <- grid_search(mlm_test, params=list(N=c(200, 300)), n.iter=500, output='data.frame', b1=.15, varInt=.05, varSlope=.15, varResid=.4, parallel='snow', ncpus=4)
med_test <- function(simNum, N, aa, bb, cc) { x <- rnorm(N, 0, 1) m <- rnorm(N, aa*x, sqrt(1 - aa^2)) y <- rnorm(N, cc*x + bb*m, sqrt(1 - cc^2 - bb^2)) data <- data.frame(x, m, y) # set up lavaan model to calculate indirect effect (ab) and total effect model <- ' m ~ a*x y ~ c*x y ~ b*m ab := a*b total := c + (a*b)' fit <- lavaan::sem(model, data=data) ests <- lavaan::parameterEstimates(fit) # when using parallel processing, we must refer to functions from # packages directly, e.g., package::function() # pull output from model a_est <- ests[ests$label == 'a', 'est'] a_p <- ests[ests$label == 'a', 'pvalue'] b_est <- ests[ests$label == 'b', 'est'] b_p <- ests[ests$label == 'b', 'pvalue'] c_est <- ests[ests$label == 'c', 'est'] c_p <- ests[ests$label == 'c', 'pvalue'] ab_est <- ests[ests$label == 'ab', 'est'] ab_p <- ests[ests$label == 'ab', 'pvalue'] return(c(a_est=a_est, a_p=a_p, b_est=b_est, b_p=b_p, c_est=c_est, c_p=c_p, ab_est=ab_est, ab_p=ab_p, sig=(ab_p < .05))) } # set up mediation model where x -> m = .15, m -> y = .2, and x -> y = .05 power_med <- grid_search(med_test, params=list(N=c(200, 300)), n.iter=1000, output='data.frame', aa=.15, bb=.2, cc=.05, parallel='snow', ncpus=4)
latent_test <- function(simNum, N, b1, ind1, ind2, ind3) { # matrix of factor structure; we have x as observed predictor, and y is a # latent variable with three indicators fmodel <- matrix( c(1, 0, # x 0, ind1, # y1 0, ind2, # y2 0, ind3), # y3 nrow=4, ncol=2, byrow=TRUE, dimnames=list( c('x', 'y1', 'y2', 'y3'), # rows (observed) c('x', 'y'))) # cols (latent) # matrix of effects structure (variance-covariance); we are using x to # predict y (with coefficient specified as b1) y_resid_var <- sqrt(1 - b1^2) effects <- matrix( c(1, b1, # x 0, y_resid_var), # y nrow=2, ncol=2, byrow=TRUE, dimnames=list( c('x', 'y'), c('x', 'y'))) data <- paramtest::gen_data(fmodel, effects) # generates the data using factor and effects matrices model <- ' y =~ y1 + y2 + y3 y ~ b1*x' fit <- lavaan::sem(model, data=data) ests <- lavaan::parameterEstimates(fit) est <- ests[ests$label == 'b1', 'est'] p <- ests[ests$label == 'b1', 'pvalue'] return(c(est=est, p=p, sig=(p < .05))) } power_sem <- grid_search(latent_test, params=list(N=c(200, 300)), n.iter=1000, output='data.frame', b1=.15, ind1=.4, ind2=.4, ind3=.4, parallel='snow', ncpus=4)
save(power_ttest, power_ttest_vary, power_ttest_vary2, power_ttest_vary3, power_ttest_vary4, power_ttest_time1, power_ttest_time2, power_ttest_boot, power_lm, power_lm_int, power_lm_simple, power_mlm, power_med, power_sem, file='../vignettes/power_data.Rdata')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.