Nothing
## ----set_options, echo=FALSE---------------------------------------------
# This file is the actual vignette file, which uses prepopulated data objects
# from the file in the `vignettes-build` directory. To update the vignette, run
# the code in the `vignettes-build` file first, and then knit this file.
knitr::opts_chunk$set(collapse=TRUE, comment='#>')
## ----load_functions, message=FALSE---------------------------------------
# load all packages used in this vignette
library('paramtest')
library('pwr')
library('ggplot2')
library('knitr')
library('nlme')
library('lavaan')
library('dplyr')
## ----load_data, echo=FALSE-----------------------------------------------
load('power_data.Rdata')
## ----pwr_ttest-----------------------------------------------------------
pwr.t.test(n=50, d=.5, type='two.sample')
## ----sim_ttest, eval=FALSE-----------------------------------------------
# # create user-defined function to generate and analyze data
# t_func <- function(simNum, N, d) {
# x1 <- rnorm(N, 0, 1)
# x2 <- rnorm(N, d, 1)
#
# t <- t.test(x1, x2, var.equal=TRUE) # run t-test on generated data
# stat <- t$statistic
# p <- t$p.value
#
# return(c(t=stat, p=p, sig=(p < .05)))
# # return a named vector with the results we want to keep
# }
#
# power_ttest <- run_test(t_func, n.iter=5000, output='data.frame', N=50, d=.5) # simulate data
# results(power_ttest) %>%
# summarise(power=mean(sig))
## ----sim_ttest_out, echo=FALSE-------------------------------------------
results(power_ttest) %>%
summarise(power=mean(sig))
## ----vary_params, eval=FALSE---------------------------------------------
# # give 'params' a list of parameters we want to vary;
# # testing at N=25, N=50, and N=100
# power_ttest_vary <- grid_search(t_func, params=list(N=c(25, 50, 100)),
# n.iter=5000, output='data.frame', d=.5)
# results(power_ttest_vary) %>%
# group_by(N.test) %>%
# summarise(power=mean(sig))
## ----vary_params_out, echo=FALSE-----------------------------------------
results(power_ttest_vary) %>%
group_by(N.test) %>%
summarise(power=mean(sig)) %>%
kable(digits=3)
## ----vary_params2, eval=FALSE--------------------------------------------
# # varying N and Cohen's d
# power_ttest_vary2 <- grid_search(t_func, params=list(N=c(25, 50, 100), d=c(.2, .5)),
# n.iter=5000, output='data.frame')
# power <- results(power_ttest_vary2) %>%
# group_by(N.test, d.test) %>%
# summarise(power=mean(sig))
# print(power)
# ggplot(power, aes(x=N.test, y=power, group=factor(d.test), colour=factor(d.test))) +
# geom_point() +
# geom_line() +
# ylim(c(0, 1)) +
# labs(x='Sample Size', y='Power', colour="Cohen's d") +
# theme_minimal()
## ----vary_params2_out, echo=FALSE, fig.width=6, fig.height=4-------------
power <- results(power_ttest_vary2) %>%
group_by(N.test, d.test) %>%
summarise(power=mean(sig))
kable(power, digits=3)
ggplot(power, aes(x=N.test, y=power, group=factor(d.test), colour=factor(d.test))) +
geom_point() +
geom_line() +
ylim(c(0, 1)) +
labs(x='Sample Size', y='Power', colour="Cohen's d") +
theme_minimal()
## ----parallel1, eval=FALSE-----------------------------------------------
# # time with no parallel processing (your mileage may very greatly)
# system.time(power_ttest_vary3 <- grid_search(t_func,
# params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame'))
## ----parallel1_out, echo=FALSE-------------------------------------------
print(power_ttest_time1)
## ----parallel2, eval=FALSE-----------------------------------------------
# # using parallel processing; I am using a Windows system, so I use parallel='snow';
# # see the documentation for the 'parallel' package for more details
# system.time(power_ttest_vary4 <- grid_search(t_func,
# params=list(N=c(25, 50, 100), d=c(.2, .5)), n.iter=5000, output='data.frame', parallel='snow', ncpus=4))
## ----parallel2_out, echo=FALSE-------------------------------------------
print(power_ttest_time2)
## ----bootstrap, eval=FALSE-----------------------------------------------
# # user function must take data and indices as first two arguments; see 'boot'
# # package documentation for more details
# t_func_boot <- function(data, indices) {
# sample_data <- data[indices, ]
# treatGroup <- sample_data[sample_data$group == 'trt2', 'weight']
# ctrlGroup <- sample_data[sample_data$group == 'ctrl', 'weight']
#
# t <- t.test(treatGroup, ctrlGroup, var.equal=TRUE)
# stat <- t$statistic
# p <- t$p.value
#
# return(c(t=stat, p=p, sig=(p < .05)))
# }
#
# # example using built-in dataset PlantGrowth
# power_ttest_boot <- run_test(t_func_boot, n.iter=5000, output='data.frame', boot=TRUE,
# bootParams=list(data=PlantGrowth))
# results(power_ttest_boot) %>%
# summarise(power=mean(sig))
## ----bootstrap_out, echo=FALSE-------------------------------------------
results(power_ttest_boot) %>%
summarise(power=mean(sig))
## ----sim_linear, eval=FALSE----------------------------------------------
# lm_test <- function(simNum, N, b1, b0=0, xm=0, xsd=1) {
# x <- rnorm(N, xm, xsd)
# y <- rnorm(N, b0 + b1*x, sqrt(1 - b1^2)) # var. approx. 1 after accounting
# # for explained variance by x
# model <- lm(y ~ x)
#
# # pull output from model
# est <- coef(summary(model))['x', 'Estimate']
# se <- coef(summary(model))['x', 'Std. Error']
# p <- coef(summary(model))['x', 'Pr(>|t|)']
#
# return(c(xm=mean(x), xsd=sd(x), ym=mean(y), ysd=sd(y), est=est, se=se, p=p,
# sig=(p < .05)))
# }
#
# # we vary N at 200 and 300; we are also setting coefficient of x predicting
# # y to be approx. .15 across all simulations
# power_lm <- grid_search(lm_test, params=list(N=c(200, 300)), n.iter=5000, output='data.frame', b1=.15,
# parallel='snow', ncpus=4)
# results(power_lm) %>%
# group_by(N.test) %>%
# summarise(power=mean(sig))
## ----sim_linear_out, echo=FALSE------------------------------------------
results(power_lm) %>%
group_by(N.test) %>%
summarise(power=mean(sig)) %>%
kable(digits=3)
## ----pwr_linear----------------------------------------------------------
# f2 = R^2 / (1 - R^2)
pwr.f2.test(u=1, v=c(200-2, 300-2), f2=(.15^2) / (1 - .15^2))
## ----sim_linear2, eval=FALSE---------------------------------------------
# lm_test_interaction <- function(simNum, N, b1, b2, b3, b0=0, x1m=0, x1sd=1,
# x2m=0, x2sd=1) {
#
# x1 <- rnorm(N, x1m, x1sd)
# x2 <- rnorm(N, x2m, x2sd)
# yvar <- sqrt(1 - b1^2 - b2^2 - b3^2) # residual variance
# y <- rnorm(N, b0 + b1*x1 + b2*x2 + b3*x1*x2, yvar)
# model <- lm(y ~ x1 * x2)
#
# # pull output from model (two main effects and interaction)
# est_x1 <- coef(summary(model))['x1', 'Estimate']
# p_x1 <- coef(summary(model))['x1', 'Pr(>|t|)']
# sig_x1 <- p_x1 < .05
# est_x2 <- coef(summary(model))['x2', 'Estimate']
# p_x2 <- coef(summary(model))['x2', 'Pr(>|t|)']
# sig_x2 <- p_x2 < .05
# est_int <- coef(summary(model))['x1:x2', 'Estimate']
# p_int <- coef(summary(model))['x1:x2', 'Pr(>|t|)']
# sig_int <- p_int < .05
#
# return(c(est_x1=est_x1, p_x1=p_x1, sig_x1=sig_x1, est_x2=est_x2, p_x2=p_x2,
# sig_x2=sig_x2, est_int=est_int, p_int=p_int, sig_int=sig_int))
# }
#
# # varying N at 200 and 300; setting coefficient of x1 = .15, coefficient of
# # x2 = 0, and coefficien of interaction = .3
# power_lm_int <- grid_search(lm_test_interaction, params=list(N=c(200, 300)),
# n.iter=5000, output='data.frame', b1=.15, b2=0, b3=.3, parallel='snow', ncpus=4)
# results(power_lm_int) %>%
# group_by(N.test) %>%
# summarise(
# power_x1=mean(sig_x1),
# power_x2=mean(sig_x2),
# power_int=mean(sig_int))
## ----sim_linear2_out, echo=FALSE-----------------------------------------
results(power_lm_int) %>%
group_by(N.test) %>%
summarise(
power_x1=mean(sig_x1),
power_x2=mean(sig_x2),
power_int=mean(sig_int)) %>%
kable(digits=3)
## ----sim_simple_eff, eval=FALSE------------------------------------------
# lm_test_simple <- function(simNum, N, b1, b2, b3, b0=0, x1m=0, x1sd=1, x2m=0, x2sd=1) {
# x1 <- rnorm(N, x1m, x1sd)
# x2 <- rnorm(N, x2m, x2sd)
# yvar <- sqrt(1 - b1^2 - b2^2 - b3^2)
# y <- rnorm(N, b0 + b1*x1 + b2*x2 + b3*x1*x2, yvar)
# model <- lm(y ~ x1 * x2) # here is the original model
#
# est_int <- coef(summary(model))['x1:x2', 'Estimate']
# p_int <- coef(summary(model))['x1:x2', 'Pr(>|t|)']
# sig_int <- p_int < .05
#
# # calculate x1 at +/- 1 SD, to look at simple effects
# x1minus1sd <- x1 - mean(x1) + sd(x1)
# x1plus1sd <- x1 - mean(x1) - sd(x1)
#
# # new models to examine simple effects
# model2 <- lm(y ~ x1minus1sd * x2)
# model3 <- lm(y ~ x1plus1sd * x2)
#
# # test effect of x2 when x1 is at +/- 1 SD
# est_x2_minus1 <- coef(summary(model2))['x2', 'Estimate']
# p_x2_minus1 <- coef(summary(model2))['x2', 'Pr(>|t|)']
# sig_x2_minus1 <- p_x2_minus1 < .05
#
# est_x2_plus1 <- coef(summary(model3))['x2', 'Estimate']
# p_x2_plus1 <- coef(summary(model3))['x2', 'Pr(>|t|)']
# sig_x2_plus1 <- p_x2_plus1 < .05
#
# return(c(est_int=est_int, p_int=p_int, sig_int=sig_int,
# est_x2_minus1=est_x2_minus1, p_x2_minus1=p_x2_minus1,
# sig_x2_minus1=sig_x2_minus1, est_x2_plus1=est_x2_plus1,
# p_x2_plus1=p_x2_plus1, sig_x2_plus1=sig_x2_plus1))
# }
#
# power_lm_simple <- grid_search(lm_test_simple, params=list(N=c(200, 300)),
# n.iter=5000, output='data.frame', b1=.15, b2=0, b3=.3, parallel='snow', ncpus=4)
# results(power_lm_simple) %>%
# group_by(N.test) %>%
# summarise(
# power_x2_minus1=mean(sig_x2_minus1),
# power_x2_plus1=mean(sig_x2_plus1))
## ----sim_simple_eff_out, echo=FALSE--------------------------------------
results(power_lm_simple) %>%
group_by(N.test) %>%
summarise(
power_x2_minus1=mean(sig_x2_minus1),
power_x2_plus1=mean(sig_x2_plus1)) %>%
kable(digits=3)
## ----sim_mlm, eval=FALSE-------------------------------------------------
# mlm_test <- function(simNum, N, b1, b0=0, xm=0, xsd=1, varInt=1, varSlope=1, varResid=1) {
# timePoints <- 4
# subject <- rep(1:N, each=timePoints)
# sub_int <- rep(rnorm(N, 0, sqrt(varInt)), each=timePoints) # random intercept
# sub_slope <- rep(rnorm(N, 0, sqrt(varSlope)), each=timePoints) # random slope
# time <- rep(0:(timePoints-1), N)
# y <- (b0 + sub_int) + (b1 + sub_slope)*time + rnorm(N*timePoints, 0, sqrt(varResid))
# # y-intercept as a function of b0 plus random intercept;
# # slope as a function of b1 plus random slope
# data <- data.frame(subject, sub_int, sub_slope, time, y)
#
# # for more complex models that might not converge, tryCatch() is probably
# # a good idea
# return <- tryCatch({
# model <- nlme::lme(y ~ time, random=~time|subject, data=data)
# # when using parallel processing, we must refer to functions from
# # packages directly, e.g., package::function()
#
# est <- summary(model)$tTable['time', 'Value']
# se <- summary(model)$tTable['time', 'Std.Error']
# p <- summary(model)$tTable['time', 'p-value']
# return(c(est=est, se=se, p=p, sig=(p < .05)))
# },
# error=function(e) {
# #message(e) # print error message
# return(c(est=NA, se=NA, p=NA, sig=NA))
# })
#
# return(return)
# }
#
# # I am cutting this down to 500 iterations so that the document compiles faster; I would, however,
# # recommend more iterations for a stable estimate
# power_mlm <- grid_search(mlm_test, params=list(N=c(200, 300)), n.iter=500, output='data.frame', b1=.15,
# varInt=.05, varSlope=.15, varResid=.4, parallel='snow', ncpus=4)
# results(power_mlm) %>%
# group_by(N.test) %>%
# summarise(
# power=mean(sig, na.rm=TRUE),
# na=sum(is.na(sig))) # we use this to count up how many cases did not properly converge
## ----sim_mlm_out, echo=FALSE---------------------------------------------
results(power_mlm) %>%
group_by(N.test) %>%
summarise(
power=mean(sig, na.rm=TRUE),
na=sum(is.na(sig))) %>% # we use this to count up how many cases did not properly converge
kable(digits=3)
## ----sim_mediation, eval=FALSE-------------------------------------------
# med_test <- function(simNum, N, aa, bb, cc) {
# x <- rnorm(N, 0, 1)
# m <- rnorm(N, aa*x, sqrt(1 - aa^2))
# y <- rnorm(N, cc*x + bb*m, sqrt(1 - cc^2 - bb^2))
# data <- data.frame(x, m, y)
#
# # set up lavaan model to calculate indirect effect (ab) and total effect
# model <- '
# m ~ a*x
# y ~ c*x
# y ~ b*m
# ab := a*b
# total := c + (a*b)'
#
# fit <- lavaan::sem(model, data=data)
# ests <- lavaan::parameterEstimates(fit)
# # when using parallel processing, we must refer to functions from
# # packages directly, e.g., package::function()
#
# # pull output from model
# a_est <- ests[ests$label == 'a', 'est']
# a_p <- ests[ests$label == 'a', 'pvalue']
# b_est <- ests[ests$label == 'b', 'est']
# b_p <- ests[ests$label == 'b', 'pvalue']
# c_est <- ests[ests$label == 'c', 'est']
# c_p <- ests[ests$label == 'c', 'pvalue']
# ab_est <- ests[ests$label == 'ab', 'est']
# ab_p <- ests[ests$label == 'ab', 'pvalue']
#
# return(c(a_est=a_est, a_p=a_p, b_est=b_est, b_p=b_p, c_est=c_est, c_p=c_p,
# ab_est=ab_est, ab_p=ab_p, sig=(ab_p < .05)))
# }
#
# # set up mediation model where x -> m = .15, m -> y = .2, and x -> y = .05
# power_med <- grid_search(med_test, params=list(N=c(200, 300)), n.iter=1000, output='data.frame', aa=.15,
# bb=.2, cc=.05, parallel='snow', ncpus=4)
# results(power_med) %>%
# group_by(N.test) %>%
# summarise(
# power=mean(sig, na.rm=TRUE))
## ----sim_mediation_out, echo=FALSE---------------------------------------
results(power_med) %>%
group_by(N.test) %>%
summarise(
power=mean(sig, na.rm=TRUE)) %>%
kable(digits=3)
## ----sim_latent, eval=FALSE----------------------------------------------
# latent_test <- function(simNum, N, b1, ind1, ind2, ind3) {
# # matrix of factor structure; we have x as observed predictor, and y is a
# # latent variable with three indicators
# fmodel <- matrix(
# c(1, 0, # x
# 0, ind1, # y1
# 0, ind2, # y2
# 0, ind3), # y3
# nrow=4, ncol=2, byrow=TRUE, dimnames=list(
# c('x', 'y1', 'y2', 'y3'), # rows (observed)
# c('x', 'y'))) # cols (latent)
#
# # matrix of effects structure (variance-covariance); we are using x to
# # predict y (with coefficient specified as b1)
# y_resid_var <- sqrt(1 - b1^2)
# effects <- matrix(
# c(1, b1, # x
# 0, y_resid_var), # y
# nrow=2, ncol=2, byrow=TRUE, dimnames=list(
# c('x', 'y'), c('x', 'y')))
#
# data <- paramtest::gen_data(fmodel, effects)
# # generates the data using factor and effects matrices
#
# model <- '
# y =~ y1 + y2 + y3
# y ~ b1*x'
#
# fit <- lavaan::sem(model, data=data)
# ests <- lavaan::parameterEstimates(fit)
#
# est <- ests[ests$label == 'b1', 'est']
# p <- ests[ests$label == 'b1', 'pvalue']
#
# return(c(est=est, p=p, sig=(p < .05)))
# }
#
# power_sem <- grid_search(latent_test, params=list(N=c(200, 300)), n.iter=1000, output='data.frame',
# b1=.15, ind1=.4, ind2=.4, ind3=.4, parallel='snow', ncpus=4)
# results(power_sem) %>%
# group_by(N.test) %>%
# summarise(
# power=mean(sig, na.rm=TRUE))
## ----sim_latent_out, echo=FALSE------------------------------------------
results(power_sem) %>%
group_by(N.test) %>%
summarise(
power=mean(sig, na.rm=TRUE)) %>%
kable(digits=3)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.