Nothing
library(testthat)
library(devtools)
library(doParallel)
library(StepwiseTest)
library(matrixcalc)
library(foreach)
library(mvtnorm)
# # THIS TEST TAKES A WHILE BECAUSE IT RUNS 500 RESAMPLES
# # COMMENT IT OUT IF YOU WANT TO AVOID IT
# # test that increasing the correlation between outcomes increases width of null interval
# test_that("corr_tests #2", {
#
# ######## Low Correlation Between Outcomes ########
# N = 250
#
# cor = make_corr_mat( nX = 3,
# nY = 100,
# rho.XX = 0,
# rho.YY = 0.05,
# rho.XY = 0,
# prop.corr = 1 )
#
# d = sim_data( n = N, cor = cor )
# all.covars = names(d)[ grep( "X", names(d) ) ]
# C = all.covars[ !all.covars == "X1" ]
# Y = names(d)[ grep( "Y", names(d) ) ]
#
# res1 = corr_tests( d,
# X = "X1",
# C = C,
# Ys = Y,
# B = 500,
# alpha = 0.1,
# alpha.fam=0.1,
# method = c( "nreject", "bonferroni", "holm", "minP", "Wstep", "romano" ) )
#
# ######## Check Results of First Sample ########
# # check inference: excess hits
# expect_equal( as.numeric(res1$samp.res$rej) - as.numeric(res1$null.int[2]),
# res1$excess.hits )
#
# # check inference: critical value from global test
# expect_equal( as.numeric( quantile( res1$nrej.bt, 1-0.1 ) ),
# as.numeric( res1$global.test$crit[ res1$global.test$method == "nreject"] ) )
#
# # check p-value of global test
# expect_equal( sum( res1$nrej.bt >= res1$samp.res$rej ) / length( res1$nrej.bt ),
# res1$global.test$pval[ res1$global.test$method == "nreject"] )
#
#
# # check results from original sample
# # do analysis manually
# alpha = 0.1
#
# rej.man = 0
# tvals.man = c()
# bhats.man = c()
# pvals.man = c()
# resid.man = matrix( NA, nrow = nrow(d), ncol = length(Y) )
#
# for ( i in 1:length(Y) ) {
# m = lm( d[[ Y[i] ]] ~ X1 + X2 + X3, data = d )
# bhats.man[i] = coef(m)[["X1"]]
# tvals.man[i] = summary(m)$coefficients["X1","t value"]
# pvals.man[i] = summary(m)$coefficients["X1", "Pr(>|t|)"]
# resid.man[,i] = residuals(m)
#
# # did we reject it?
# if ( summary(m)$coefficients["X1", "Pr(>|t|)"] < alpha ) rej.man = rej.man + 1
# }
#
# # check bhats
# expect_equal( bhats.man, res1$samp.res$bhats )
# expect_equal( tvals.man, res1$samp.res$tvals )
# expect_equal( pvals.man, res1$samp.res$pvals )
# expect_equal( as.numeric(as.matrix(resid.man)),
# as.numeric(as.matrix(res1$samp.res$resid)) )
#
# expect_equal( sum( pvals.man < alpha ),
# sum( res1$samp.res$rej ) )
#
# # check other global tests
# expect_equal( res1$global.test$pval[ res1$global.test$method == "Wstep" ],
# min( adj_Wstep( p = res1$samp.res$pvals, p.bt = res1$pvals.bt ) ) )
#
# expect_equal( res1$global.test$pval[ res1$global.test$method == "minP" ],
# min( adj_minP( p = res1$samp.res$pvals, p.bt = res1$pvals.bt ) ) )
#
# expect_equal( res1$global.test$pval[ res1$global.test$method == "bonferroni" ],
# min( p.adjust( res1$samp.res$pvals, method="bonferroni" ) ) )
#
# expect_equal( res1$global.test$pval[ res1$global.test$method == "holm" ],
# min( p.adjust( res1$samp.res$pvals, method="holm" ) ) )
#
# expect_equal( res1$global.test$reject[ res1$global.test$method == "romano" ],
# any( FWERkControl( res1$samp.res$tvals, as.matrix( res1$tvals.bt ), k = 1, alpha = .1 )$Reject == 1 ) )
#
# ######## Higher Correlation Between Outcomes ########
# cor = make_corr_mat( nX = 3,
# nY = 100,
# rho.XX = 0,
# rho.YY = 0.25,
# rho.XY = 0,
# prop.corr = 1 )
#
# d = sim_data( n = N, cor = cor )
# all.covars = names(d)[ grep( "X", names(d) ) ]
# C = all.covars[ !all.covars == "X1" ]
# Y = names(d)[ grep( "Y", names(d) ) ]
#
# res2 = corr_tests( d,
# X = "X1",
# C = C,
# Ys = Y,
# B = 500,
# alpha = 0.1,
# alpha.fam = 0.1,
# method = c( "nreject", "bonferroni", "holm", "minP", "Wstep", "romano" ) )
#
#
# ######## Tests ########
# # null interval should be wider for the second one
# expect_equal( as.logical( res2$null.int[2] >= res1$null.int[2] ), TRUE )
#
# # p-value should be larger for the second one
# expect_equal( as.logical( res2$global.test$pval[ res2$global.test$method == "nreject" ] >=
# res1$global.test$pval[ res1$global.test$method == "nreject" ] ), TRUE )
#
# } )
# only checks a few things:
# two of the global tests
# and the average number of rejections in resamples
test_that( "corr_tests #1", {
library(carData)
data(Soils)
X = "pH"
C = c("Na", "Conduc")
Y = c("N", "Dens", "P", "Ca", "Mg", "K")
res = corr_tests( Soils,
X = X,
Ys = Y,
B = 200,
alpha = 0.1,
method = c( "nreject", "bonferroni", "holm", "minP", "Wstep", "romano" ) )
# should be about equal
expect_equal( mean(res$nrej.bt),
.10*length(Y),
tolerance = 0.1 )
# Bonferroni: should be exactly equal
expect_equal( min( res$samp.res$pvals * length(Y) ),
res$global.test$pval[2] )
# Holm: should be exactly equal
expect_equal( min( p.adjust( res$samp.res$pvals, method = "holm" ) ),
res$global.test$pval[3] )
} )
###################### TEST FNS FOR APPLYING OUR METRICS ######################
# fix_input with extra covariates
# X1 is extra and should be removed
test_that("fix_input #2", {
cor = make_corr_mat( nX = 1,
nY = 4,
rho.XX = 0,
rho.YY = 0.25,
rho.XY = 0,
prop.corr = 1 )
d = sim_data( n = 20, cor = cor )
all.covars = names(d)[ grep( "X", names(d) ) ]
C = all.covars[ !all.covars == "X1" ]
##### Add Bad Input ######
# insert missing data
d[1,4] = NA
# insert a decoy variable that should be removed in analysis
d$X20 = rnorm( n = nrow(d) )
d$X21 = rnorm( n = nrow(d) )
# make one of the covariates not mean-centered
d$X1 = d$X1 + 2
d = fix_input( X="X1",
C=NA,
Ys=names(d)[ grep( "Y", names(d) ) ],
d = d )
# check that it caught bad input
expect_equal( c( "X20", "X21" ) %in% names(d),
c(FALSE, FALSE) )
expect_equal( any( is.na(d) ),
FALSE )
} )
# fix_input with extra covariates
test_that("fix_input #1", {
cor = make_corr_mat( nX = 5,
nY = 10,
rho.XX = -0.06,
rho.YY = 0.1,
rho.XY = -0.1,
prop.corr = 8/40 )
d = sim_data( n = 20, cor = cor )
all.covars = names(d)[ grep( "X", names(d) ) ]
C = all.covars[ !all.covars == "X1" ]
##### Add Bad Input ######
# insert missing data
d[1,4] = NA
# insert a decoy variable that should be removed in analysis
d$X20 = rnorm( n = nrow(d) )
d$X21 = rnorm( n = nrow(d) )
# make one of the covariates not mean-centered
d$X5 = d$X5 + 2
d = fix_input( X="X1",
C=C,
Ys=names(d)[ grep( "Y", names(d) ) ],
d = d )
# check that it caught bad input
expect_equal( c( "X20", "X21" ) %in% names(d),
c(FALSE, FALSE) )
expect_equal( any( is.na(d) ),
FALSE )
} )
# fit_model doesn't need a test because we test it through the dataset_result tests
# without centering test stats
test_that("dataset_result #1", {
cor = make_corr_mat( nX = 5,
nY = 2,
rho.XX = -0.06,
rho.YY = 0.1,
rho.XY = -0.1,
prop.corr = 8/40 )
d = sim_data( n = 50, cor = cor )
# try to confuse fn by choosing a different X as covariate of interest
Ys = names(d)[ grep( "Y", names(d) ) ]
X = "X2"
all.covars = names(d)[ grep( "X", names(d) ) ]
C = all.covars[ !all.covars == X ]
# do analysis manually
alpha = 0.05
rej.man = 0
tvals.man = c()
bhats.man = c()
pvals.man = c()
resid.man = matrix( NA, nrow = 50, ncol = 2 )
for ( i in 1:length(Ys) ) {
m = lm( d[[ Ys[i] ]] ~ X1 + X2 + X3 + X4 + X5, data = d )
bhats.man[i] = coef(m)[[X]]
tvals.man[i] = summary(m)$coefficients[X,"t value"]
pvals.man[i] = summary(m)$coefficients[X, "Pr(>|t|)"]
resid.man[,i] = residuals(m)
# did we reject it?
if ( summary(m)$coefficients[X, "Pr(>|t|)"] < alpha ) rej.man = rej.man + 1
}
# with function
samp.res = dataset_result( d = d,
X = X,
C = C,
Ys = Ys, # all outcome names
alpha = alpha,
center.stats = FALSE,
bhat.orig = NA )
resid.man = as.data.frame(resid.man)
names(resid.man) = Ys
expect_equal( rej.man, samp.res$rej )
expect_equal( bhats.man, samp.res$bhat )
expect_equal( tvals.man, samp.res$tvals )
expect_equal( pvals.man, samp.res$pvals )
expect_equal( as.matrix(resid.man), as.matrix(samp.res$resid) )
} )
# with centered test stats
test_that("dataset_result #2", {
cor = make_corr_mat( nX = 5,
nY = 20,
rho.XX = 0.16,
rho.YY = 0.1,
rho.XY = 0.1,
prop.corr = 1 )
d = sim_data( n = 50, cor = cor )
# try to confuse fn by choosing a different X as covariate of interest
Ys = names(d)[ grep( "Y", names(d) ) ]
X = "X2"
all.covars = names(d)[ grep( "X", names(d) ) ]
C = all.covars[ !all.covars == X ]
# do analysis manually
# choose an unusual alpha level to make sure it's working
alpha = 0.4
rej.man = 0
tvals.man = c()
bhats.man = c()
pvals.man = c()
resid.man = matrix( NA, nrow = 50, ncol = length(Ys) )
# fake original coefficients
bhat.orig = rnorm( n=length(Ys), mean = 0.8, sd = 2 )
for ( i in 1:length(Ys) ) {
m = lm( d[[ Ys[i] ]] ~ X1 + X2 + X3 + X4 + X5, data = d )
bhats.man[i] = coef(m)[[X]] - bhat.orig[i]
df = 50 - 5 - 1
se = summary(m)$coefficients[X, "Std. Error"]
tvals.man[i] = bhats.man[i] / se
pvals.man[i] = 2 * ( 1 - pt( abs( tvals.man[i] ), df = df ) )
resid.man[,i] = residuals(m)
# did we reject it?
if ( pvals.man[i] < alpha ) rej.man = rej.man + 1
}
# with function
samp.res = dataset_result( d = d,
X = X,
C = C,
Ys = Ys, # all outcome names
alpha = alpha,
center.stats = TRUE,
bhat.orig = bhat.orig )
resid.man = as.data.frame(resid.man)
names(resid.man) = Ys
expect_equal( rej.man, samp.res$rej )
expect_equal( bhats.man, samp.res$bhat )
expect_equal( tvals.man, samp.res$tvals )
expect_equal( pvals.man, samp.res$pvals )
expect_equal( as.matrix(resid.man), as.matrix(samp.res$resid) )
} )
###################### TEST FNS FOR SIMULATING DATA ######################
test_that("cell_corr #1", {
expect_equal( -0.1,
cell_corr( vname.1 = "X1",
vname.2 = "Y3",
rho.XX = 0,
rho.YY = 0.25,
rho.XY = -0.1,
nY = 6,
prop.corr = 1 ) )
expect_equal( 0.25,
cell_corr( vname.1 = "Y1",
vname.2 = "Y3",
rho.XX = 0,
rho.YY = 0.25,
rho.XY = -0.1,
nY = 6,
prop.corr = 1 ) )
expect_equal( 0,
cell_corr( vname.1 = "X2",
vname.2 = "Y3",
rho.XX = 0,
rho.YY = 0.25,
rho.XY = -0.1,
nY = 6,
prop.corr = 1 ) )
expect_equal( -0.1,
cell_corr( vname.1 = "X1",
vname.2 = "Y2",
rho.XX = 0,
rho.YY = 0.25,
rho.XY = -0.1,
nY = 10,
prop.corr = .2 ) )
expect_equal( 0,
cell_corr( vname.1 = "X1",
vname.2 = "Y3",
rho.XX = 0,
rho.YY = 0.25,
rho.XY = -0.1,
nY = 10,
prop.corr = .2 ) )
} )
test_that("make_corr_mat #1", {
# sanity checks
cor = make_corr_mat( nX = 1,
nY = 40,
rho.XX = 0,
rho.YY = 0.25,
rho.XY = 0.1,
prop.corr = 8/40 )
# do we have the right number of each type of correlation?
# only look at first row (correlations of X1 with everything else)
expect_equal( c( 1, rep(0.10, 8), rep(0, 40-8) ),
as.numeric( cor[1,] ) )
} )
test_that("make_corr_mat #1", {
cor = make_corr_mat( nX = 2,
nY = 40,
rho.XX = 0.35,
rho.YY = 0.25,
rho.XY = 0.1,
prop.corr = 8/40 )
d = sim_data( n = 10000, cor = cor )
# rho.XX correlations
expect_equal( cor(d$X1, d$X2), 0.35, tolerance = 0.05 )
# rho.XY correlations for non-null ones
names = paste( "Y", seq(1,8,1), sep="" )
expect_equal( as.numeric( cor( d$X1, d[, names] ) ),
rep( 0.1, 8 ),
tolerance = 0.05 )
# rho.XY correlations for null ones
names = paste( "Y", seq(9,40,1), sep="" )
expect_equal( as.numeric( cor( d$X1, d[, names] ) ),
rep( 0, 40-8 ),
tolerance = 0.05 )
# plot empirical vs. real correlations
#plot( as.numeric(cor(d)), as.numeric(as.matrix(cor)) ); abline( a = 0, b = 1, col="red")
} )
###################### TEST WESTFALL FNS ######################
test_that("adj_minP #1", {
# sanity check
B = 200
n.tests = 10
# generate fake p-values under strong null
p.bt = matrix( runif(B*n.tests, 0, 1), nrow = n.tests)
# generate fake p-values from real dataset
p = runif( n.tests, 0, .1)
p.adj = adj_minP( p, p.bt )
#plot(p, p.adj)
# manually adjust second p-value
mins = apply( p.bt, MARGIN = 2, FUN = min )
expect_equal( prop.table( table( mins <= p[2] ) )[["TRUE"]],
p.adj[2] )
})
test_that("adjust_Wstep #1", {
# # Sanity Check
# nX = 1
# nY = 3
# B = 5
#
# library(matrixcalc)
# library(mvtnorm)
#
# cor = make_corr_mat( nX = nX,
# nY = nY,
# rho.XX = 0,
# rho.YY = 0.25,
# rho.XY = 0.05,
# prop.corr = 1 )
#
# d = sim_data( n = 1000, cor = cor )
#
# samp.res = dataset_result( X = "X1",
# C = NA,
# Ys = c("Y1", "Y2", "Y3"),
# d = d,
# alpha = 0.05,
# center.stats = FALSE )
#
#
# # do 5 bootstraps
# resamps = resample_resid( X = "X1",
# C = NA,
# Ys = c("Y1", "Y2", "Y3"),
# d = d,
# alpha = 0.05,
# resid = samp.res$resid,
# bhat.orig = samp.res$bhats,
# B=5,
# cores = 8 )
# p.bt = t( resamps$p.bt )
# pvals = samp.res$pvals
pvals = c(0.00233103655078803, 0.470366742594242, 0.00290278216035089
)
p.bt = structure(c(0.308528665936264, 0.517319402377912, 0.686518314693482,
0.637306248855186, 0.106805510862352, 0.116705315041494, 0.0732076817175753,
0.770308936364482, 0.384405349738909, 0.0434358213611965, 0.41497067850141,
0.513471489744384, 0.571213377144122, 0.628054979652722, 0.490196884985226
), .Dim = c(5L, 3L))
# indicators of which hypothesis the sorted p-vals go with
sort(pvals)
r = c(1,3,2)
qstar = matrix( NA, nrow = nrow(p.bt), ncol = ncol(p.bt) )
for (i in 1:nrow(p.bt)) {
qstar[i,3] = p.bt[ i, r[3] ]
qstar[i,2] = min( qstar[i,3], p.bt[ i, r[2] ] )
qstar[i,1] = min( qstar[i,2], p.bt[ i, r[1] ] )
}
less = t( apply( qstar, MARGIN = 1,
function(row) row <= sort(pvals) ) )
p.tilde = colMeans(less)
# enforce monotonicity
p.tilde.sort = sort(p.tilde)
p.tilde.sort[2] = max( p.tilde.sort[1], p.tilde.sort[2] )
p.tilde.sort[3] = max( p.tilde.sort[2], p.tilde.sort[3] )
# put back in original order
p.adj = p.tilde.sort[r]
expect_equal( p.adj, adj_Wstep( p = pvals, p.bt = t(p.bt) ) )
})
###################### TEST RESAMPLE_RESID ######################
# generate data NOT under null and
# check that mean p-value is .5 in resamples
# and that we have the expected number of rejections
test_that("resample_resid #1", {
# Sanity Check
nX = 1
nY = 3
B = 5
library(matrixcalc)
library(mvtnorm)
cor = make_corr_mat( nX = nX,
nY = nY,
rho.XX = 0,
rho.YY = 0.25,
rho.XY = 0.05,
prop.corr = 1 )
d = sim_data( n = 1000, cor = cor )
# mean-center them
d = as.data.frame( apply( d, 2, function(col) col - mean(col) ) )
# bookmark
samp.res = dataset_result( X = "X1",
C = NA,
Ys = c("Y1", "Y2", "Y3"),
d = d,
alpha = 0.05,
center.stats = FALSE )
# do 5 bootstraps
resamps = resample_resid( X = "X1",
C = NA,
Ys = c("Y1", "Y2", "Y3"),
d = d,
alpha = 0.05,
resid = samp.res$resid,
bhat.orig = samp.res$bhats,
B=500,
cores = 8 )
expect_equal( mean(resamps$p.bt), .5, tolerance = 0.03 )
expect_equal( mean(resamps$rej.bt), .05*nY, tolerance = 0.03 )
} )
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.