testthat.R
In NRejections: Metrics for Multiple Testing with Correlated Outcomes

library(testthat)
library(devtools)
library(doParallel)
library(StepwiseTest)
library(matrixcalc)
library(foreach)
library(mvtnorm)

# # THIS TEST TAKES A WHILE BECAUSE IT RUNS 500 RESAMPLES
# # COMMENT IT OUT IF YOU WANT TO AVOID IT
# # test that increasing the correlation between outcomes increases width of null interval
# test_that("corr_tests #2", {
#   
#   ######## Low Correlation Between Outcomes ######## 
#   N = 250
#   
#   cor = make_corr_mat( nX = 3,
#                        nY = 100,
#                        rho.XX = 0,
#                        rho.YY = 0.05,
#                        rho.XY = 0,
#                        prop.corr = 1 )
#   
#   d = sim_data( n = N, cor = cor )
#   all.covars = names(d)[ grep( "X", names(d) ) ]
#   C = all.covars[ !all.covars == "X1" ]
#   Y = names(d)[ grep( "Y", names(d) ) ]
#   
#   res1 = corr_tests( d,
#                     X = "X1",
#                     C = C,
#                     Ys = Y,
#                     B = 500,
#                     alpha = 0.1,
#                     alpha.fam=0.1,
#                     method = c( "nreject", "bonferroni", "holm", "minP", "Wstep", "romano" ) )
#   
#   ######## Check Results of First Sample ########
#   # check inference: excess hits
#   expect_equal( as.numeric(res1$samp.res$rej) - as.numeric(res1$null.int[2]),
#                 res1$excess.hits )
#   
#   # check inference: critical value from global test
#   expect_equal( as.numeric( quantile( res1$nrej.bt, 1-0.1 ) ),
#                 as.numeric( res1$global.test$crit[ res1$global.test$method == "nreject"] ) )
#   
#   # check p-value of global test
#   expect_equal( sum( res1$nrej.bt >= res1$samp.res$rej ) / length( res1$nrej.bt ),
#                 res1$global.test$pval[ res1$global.test$method == "nreject"] )
#   
#   
#   # check results from original sample
#   # do analysis manually
#   alpha = 0.1
#   
#   rej.man = 0
#   tvals.man = c()
#   bhats.man = c()
#   pvals.man = c()
#   resid.man = matrix( NA, nrow = nrow(d), ncol = length(Y) )
#   
#   for ( i in 1:length(Y) ) {
#     m = lm( d[[ Y[i] ]] ~ X1 + X2 + X3, data = d )
#     bhats.man[i] = coef(m)[["X1"]]
#     tvals.man[i] = summary(m)$coefficients["X1","t value"]
#     pvals.man[i] = summary(m)$coefficients["X1", "Pr(>|t|)"]
#     resid.man[,i] = residuals(m)
#     
#     # did we reject it?
#     if ( summary(m)$coefficients["X1", "Pr(>|t|)"] < alpha ) rej.man = rej.man + 1
#   }  
#   
#   # check bhats
#   expect_equal( bhats.man, res1$samp.res$bhats )
#   expect_equal( tvals.man, res1$samp.res$tvals )
#   expect_equal( pvals.man, res1$samp.res$pvals )
#   expect_equal( as.numeric(as.matrix(resid.man)),
#                 as.numeric(as.matrix(res1$samp.res$resid)) )
#   
#   expect_equal( sum( pvals.man < alpha ),
#                 sum( res1$samp.res$rej ) )
#   
#   # check other global tests
#   expect_equal( res1$global.test$pval[ res1$global.test$method == "Wstep" ], 
#                 min( adj_Wstep( p = res1$samp.res$pvals, p.bt = res1$pvals.bt ) ) )
#   
#   expect_equal( res1$global.test$pval[ res1$global.test$method == "minP" ], 
#                 min( adj_minP( p = res1$samp.res$pvals, p.bt = res1$pvals.bt ) ) )
#   
#   expect_equal( res1$global.test$pval[ res1$global.test$method == "bonferroni" ], 
#                 min( p.adjust( res1$samp.res$pvals, method="bonferroni" ) ) )
#   
#   expect_equal( res1$global.test$pval[ res1$global.test$method == "holm" ], 
#                 min( p.adjust( res1$samp.res$pvals, method="holm" ) ) )
#   
#   expect_equal( res1$global.test$reject[ res1$global.test$method == "romano" ], 
#                 any( FWERkControl( res1$samp.res$tvals, as.matrix( res1$tvals.bt ), k = 1, alpha = .1 )$Reject == 1 ) )
#   
#   ######## Higher Correlation Between Outcomes ######## 
#   cor = make_corr_mat( nX = 3,
#                        nY = 100,
#                        rho.XX = 0,
#                        rho.YY = 0.25,
#                        rho.XY = 0,
#                        prop.corr = 1 )
#   
#   d = sim_data( n = N, cor = cor )
#   all.covars = names(d)[ grep( "X", names(d) ) ]
#   C = all.covars[ !all.covars == "X1" ]
#   Y = names(d)[ grep( "Y", names(d) ) ]
#   
#   res2 = corr_tests( d,
#                      X = "X1",
#                      C = C,
#                      Ys = Y,
#                      B = 500,
#                      alpha = 0.1,
#                      alpha.fam = 0.1,
#                      method = c( "nreject", "bonferroni", "holm", "minP", "Wstep", "romano" ) )
#   
#   
#   ######## Tests ######## 
#   # null interval should be wider for the second one
#   expect_equal( as.logical( res2$null.int[2] >= res1$null.int[2] ), TRUE )
#   
#   # p-value should be larger for the second one
#   expect_equal( as.logical( res2$global.test$pval[ res2$global.test$method == "nreject" ] >=
#                               res1$global.test$pval[ res1$global.test$method == "nreject" ] ), TRUE )
#   
# } )



# only checks a few things: 
# two of the global tests
# and the average number of rejections in resamples
test_that( "corr_tests #1", {
  library(carData)
  data(Soils)
  
  X = "pH"
  C = c("Na", "Conduc")
  Y = c("N", "Dens", "P", "Ca", "Mg", "K")
  
  res = corr_tests( Soils,
                    X = X,
                    Ys = Y,
                    B = 200,
                    alpha = 0.1,
                    method = c( "nreject", "bonferroni", "holm", "minP", "Wstep", "romano" ) )
  
  # should be about equal
  expect_equal( mean(res$nrej.bt),
                .10*length(Y),
                tolerance = 0.1 )
  
  # Bonferroni: should be exactly equal
  expect_equal( min( res$samp.res$pvals * length(Y) ),
                res$global.test$pval[2] )
  
  # Holm: should be exactly equal
  expect_equal( min( p.adjust( res$samp.res$pvals, method = "holm" ) ),
                res$global.test$pval[3] )
} )



###################### TEST FNS FOR APPLYING OUR METRICS ###################### 


# fix_input with extra covariates
# X1 is extra and should be removed
test_that("fix_input #2", {
  cor = make_corr_mat( nX = 1,
                       nY = 4,
                       rho.XX = 0,
                       rho.YY = 0.25,
                       rho.XY = 0,
                       prop.corr = 1 )
  
  d = sim_data( n = 20, cor = cor )
  all.covars = names(d)[ grep( "X", names(d) ) ]
  C = all.covars[ !all.covars == "X1" ]
  
  ##### Add Bad Input ######
  # insert missing data
  d[1,4] = NA
  
  # insert a decoy variable that should be removed in analysis
  d$X20 = rnorm( n = nrow(d) )
  d$X21 = rnorm( n = nrow(d) )
  
  # make one of the covariates not mean-centered
  d$X1 = d$X1 + 2
  
  d = fix_input( X="X1",
                 C=NA,
                 Ys=names(d)[ grep( "Y", names(d) ) ],
                 d = d )
  
  # check that it caught bad input
  expect_equal( c( "X20", "X21" ) %in% names(d), 
                c(FALSE, FALSE) )
  
  expect_equal( any( is.na(d) ), 
                FALSE )
} )



# fix_input with extra covariates
test_that("fix_input #1", {
  cor = make_corr_mat( nX = 5,
                       nY = 10,
                       rho.XX = -0.06,
                       rho.YY = 0.1,
                       rho.XY = -0.1,
                       prop.corr = 8/40 )
  
  d = sim_data( n = 20, cor = cor )
  all.covars = names(d)[ grep( "X", names(d) ) ]
  C = all.covars[ !all.covars == "X1" ]
  
  ##### Add Bad Input ######
  # insert missing data
  d[1,4] = NA
  
  # insert a decoy variable that should be removed in analysis
  d$X20 = rnorm( n = nrow(d) )
  d$X21 = rnorm( n = nrow(d) )
  
  # make one of the covariates not mean-centered
  d$X5 = d$X5 + 2
  
  d = fix_input( X="X1",
             C=C,
             Ys=names(d)[ grep( "Y", names(d) ) ],
             d = d )
  
  # check that it caught bad input
  expect_equal( c( "X20", "X21" ) %in% names(d), 
                c(FALSE, FALSE) )
  
  expect_equal( any( is.na(d) ), 
                FALSE )

} )




# fit_model doesn't need a test because we test it through the dataset_result tests

# without centering test stats
test_that("dataset_result #1", {

  cor = make_corr_mat( nX = 5,
                       nY = 2,
                       rho.XX = -0.06,
                       rho.YY = 0.1,
                       rho.XY = -0.1,
                       prop.corr = 8/40 )
  
  d = sim_data( n = 50, cor = cor )
  
  # try to confuse fn by choosing a different X as covariate of interest
  Ys = names(d)[ grep( "Y", names(d) ) ]
  X = "X2"
  all.covars = names(d)[ grep( "X", names(d) ) ]
  C = all.covars[ !all.covars == X ]
  

  # do analysis manually
  alpha = 0.05
  
  rej.man = 0
  tvals.man = c()
  bhats.man = c()
  pvals.man = c()
  resid.man = matrix( NA, nrow = 50, ncol = 2 )
  
  for ( i in 1:length(Ys) ) {
    m = lm( d[[ Ys[i] ]] ~ X1 + X2 + X3 + X4 + X5, data = d )
    bhats.man[i] = coef(m)[[X]]
    tvals.man[i] = summary(m)$coefficients[X,"t value"]
    pvals.man[i] = summary(m)$coefficients[X, "Pr(>|t|)"]
    resid.man[,i] = residuals(m)
    
    # did we reject it?
    if ( summary(m)$coefficients[X, "Pr(>|t|)"] < alpha ) rej.man = rej.man + 1
  
  }  
  
  # with function
  samp.res = dataset_result( d = d,
                             X = X,
                             C = C,
                             Ys = Ys,  # all outcome names
                             alpha = alpha,
                             center.stats = FALSE,
                             bhat.orig = NA )
  
  resid.man = as.data.frame(resid.man)
  names(resid.man) = Ys
  
  expect_equal( rej.man, samp.res$rej )
  expect_equal( bhats.man, samp.res$bhat )
  expect_equal( tvals.man, samp.res$tvals )
  expect_equal( pvals.man, samp.res$pvals )
  
  expect_equal( as.matrix(resid.man), as.matrix(samp.res$resid) )
} )



# with centered test stats
test_that("dataset_result #2", {
  
  cor = make_corr_mat( nX = 5,
                       nY = 20,
                       rho.XX = 0.16,
                       rho.YY = 0.1,
                       rho.XY = 0.1,
                       prop.corr = 1 )
  
  d = sim_data( n = 50, cor = cor )
  
  # try to confuse fn by choosing a different X as covariate of interest
  Ys = names(d)[ grep( "Y", names(d) ) ]
  X = "X2"
  all.covars = names(d)[ grep( "X", names(d) ) ]
  C = all.covars[ !all.covars == X ]
  
  
  # do analysis manually
  # choose an unusual alpha level to make sure it's working
  alpha = 0.4
  
  rej.man = 0
  tvals.man = c()
  bhats.man = c()
  pvals.man = c()
  resid.man = matrix( NA, nrow = 50, ncol = length(Ys) )
  
  # fake original coefficients
  bhat.orig = rnorm( n=length(Ys), mean = 0.8, sd = 2 )

  for ( i in 1:length(Ys) ) {
    m = lm( d[[ Ys[i] ]] ~ X1 + X2 + X3 + X4 + X5, data = d )
    bhats.man[i] = coef(m)[[X]] - bhat.orig[i]
    
    df = 50 - 5 - 1
    se = summary(m)$coefficients[X, "Std. Error"]
    tvals.man[i] = bhats.man[i] / se
    
    pvals.man[i] = 2 * ( 1 - pt( abs( tvals.man[i] ), df = df ) )
    
    resid.man[,i] = residuals(m)
    
    # did we reject it?
    if ( pvals.man[i] < alpha ) rej.man = rej.man + 1
  }  
  
  # with function
  samp.res = dataset_result( d = d,
                             X = X,
                             C = C,
                             Ys = Ys,  # all outcome names
                             alpha = alpha,
                             center.stats = TRUE,
                             bhat.orig = bhat.orig )
  
  resid.man = as.data.frame(resid.man)
  names(resid.man) = Ys
  
  expect_equal( rej.man, samp.res$rej )
  expect_equal( bhats.man, samp.res$bhat )
  expect_equal( tvals.man, samp.res$tvals )
  expect_equal( pvals.man, samp.res$pvals )
  expect_equal( as.matrix(resid.man), as.matrix(samp.res$resid) )
} )




###################### TEST FNS FOR SIMULATING DATA ###################### 

test_that("cell_corr #1", {

  expect_equal( -0.1,
    cell_corr( vname.1 = "X1",
               vname.2 = "Y3",
               rho.XX = 0,
               rho.YY = 0.25,
               rho.XY = -0.1,
               nY = 6,
               prop.corr = 1 ) )
  
  expect_equal( 0.25,
    cell_corr( vname.1 = "Y1",
               vname.2 = "Y3",
               rho.XX = 0,
               rho.YY = 0.25,
               rho.XY = -0.1,
               nY = 6,
               prop.corr = 1 ) )

  expect_equal( 0,
    cell_corr( vname.1 = "X2",
               vname.2 = "Y3",
               rho.XX = 0,
               rho.YY = 0.25,
               rho.XY = -0.1,
               nY = 6,
               prop.corr = 1 ) )
  
  expect_equal( -0.1,
    cell_corr( vname.1 = "X1",
               vname.2 = "Y2",
               rho.XX = 0,
               rho.YY = 0.25,
               rho.XY = -0.1,
               nY = 10,
              prop.corr = .2 ) )
    
  expect_equal( 0,
    cell_corr( vname.1 = "X1",
               vname.2 = "Y3",
               rho.XX = 0,
               rho.YY = 0.25,
               rho.XY = -0.1,
               nY = 10,
               prop.corr = .2 ) )
} )


test_that("make_corr_mat #1", {
  # sanity checks
  cor = make_corr_mat( nX = 1,
                  nY = 40,
                  rho.XX = 0,
                  rho.YY = 0.25,
                  rho.XY = 0.1,
                  prop.corr = 8/40 )

  # do we have the right number of each type of correlation?
  # only look at first row (correlations of X1 with everything else)
  expect_equal( c( 1, rep(0.10, 8), rep(0, 40-8) ),
                   as.numeric( cor[1,] ) )
} )


test_that("make_corr_mat #1", {
  cor = make_corr_mat( nX = 2,
                       nY = 40,
                       rho.XX = 0.35,
                       rho.YY = 0.25,
                       rho.XY = 0.1,
                       prop.corr = 8/40 )
  
  d = sim_data( n = 10000, cor = cor )
  
  # rho.XX correlations
  expect_equal( cor(d$X1, d$X2), 0.35, tolerance = 0.05 )
  
  # rho.XY correlations for non-null ones
  names = paste( "Y", seq(1,8,1), sep="" )
  expect_equal( as.numeric( cor( d$X1, d[, names] ) ),
                rep( 0.1, 8 ),
                tolerance = 0.05 )
  
  # rho.XY correlations for null ones
  names = paste( "Y", seq(9,40,1), sep="" )
  expect_equal( as.numeric( cor( d$X1, d[, names] ) ),
                rep( 0, 40-8 ),
                tolerance = 0.05 )

  # plot empirical vs. real correlations
  #plot( as.numeric(cor(d)), as.numeric(as.matrix(cor)) ); abline( a = 0, b = 1, col="red")
} )


###################### TEST WESTFALL FNS ###################### 

test_that("adj_minP #1", {
  # sanity check
  B = 200
  n.tests = 10

  # generate fake p-values under strong null
  p.bt = matrix( runif(B*n.tests, 0, 1), nrow = n.tests)

  # generate fake p-values from real dataset
  p = runif( n.tests, 0, .1)

  p.adj = adj_minP( p, p.bt )
  #plot(p, p.adj)

  # manually adjust second p-value
  mins = apply( p.bt, MARGIN = 2, FUN = min )

  expect_equal( prop.table( table( mins <= p[2] ) )[["TRUE"]],
                p.adj[2] )
})



test_that("adjust_Wstep #1", {
  # # Sanity Check
  # nX = 1
  # nY = 3
  # B = 5
  # 
  # library(matrixcalc)
  # library(mvtnorm)
  # 
  # cor = make_corr_mat( nX = nX,
  #                      nY = nY,
  #                      rho.XX = 0,
  #                      rho.YY = 0.25,
  #                      rho.XY = 0.05,
  #                      prop.corr = 1 )
  # 
  # d = sim_data( n = 1000, cor = cor )
  # 
  # samp.res = dataset_result( X = "X1",
  #                 C = NA,
  #                 Ys = c("Y1", "Y2", "Y3"), 
  #                 d = d,
  #                 alpha = 0.05,
  #                 center.stats = FALSE )
  #               
  # 
  # # do 5 bootstraps
  # resamps = resample_resid( X = "X1",
  #                           C = NA,
  #                           Ys = c("Y1", "Y2", "Y3"),
  #                           d = d,
  #                           alpha = 0.05,
  #                           resid = samp.res$resid,
  #                           bhat.orig = samp.res$bhats,
  #                           B=5,
  #                           cores = 8 )
  # p.bt = t( resamps$p.bt )
  # pvals = samp.res$pvals
  
  pvals = c(0.00233103655078803, 0.470366742594242, 0.00290278216035089
  )

  p.bt = structure(c(0.308528665936264, 0.517319402377912, 0.686518314693482,
                     0.637306248855186, 0.106805510862352, 0.116705315041494, 0.0732076817175753,
                     0.770308936364482, 0.384405349738909, 0.0434358213611965, 0.41497067850141,
                     0.513471489744384, 0.571213377144122, 0.628054979652722, 0.490196884985226
  ), .Dim = c(5L, 3L))
  

  # indicators of which hypothesis the sorted p-vals go with
  sort(pvals)
  r = c(1,3,2)
  
  qstar = matrix( NA, nrow = nrow(p.bt), ncol = ncol(p.bt) )
  
  for (i in 1:nrow(p.bt)) {
    qstar[i,3] = p.bt[ i, r[3] ]
    qstar[i,2] = min( qstar[i,3], p.bt[ i, r[2] ] )
    qstar[i,1] = min( qstar[i,2], p.bt[ i, r[1] ] )
  }
  
  less = t( apply( qstar, MARGIN = 1,
                function(row) row <= sort(pvals) ) )
  
  p.tilde = colMeans(less)
  
  # enforce monotonicity
  p.tilde.sort = sort(p.tilde)
  p.tilde.sort[2] = max( p.tilde.sort[1], p.tilde.sort[2] )
  p.tilde.sort[3] = max( p.tilde.sort[2], p.tilde.sort[3] )
  
  # put back in original order
  p.adj = p.tilde.sort[r]
  
  expect_equal( p.adj, adj_Wstep( p = pvals, p.bt = t(p.bt) ) )
})




###################### TEST RESAMPLE_RESID ###################### 

# generate data NOT under null and
# check that mean p-value is .5 in resamples
# and that we have the expected number of rejections

test_that("resample_resid #1", {
  # Sanity Check
  nX = 1
  nY = 3
  B = 5
  
  library(matrixcalc)
  library(mvtnorm)
  
  cor = make_corr_mat( nX = nX,
                       nY = nY,
                       rho.XX = 0,
                       rho.YY = 0.25,
                       rho.XY = 0.05,
                       prop.corr = 1 )
  
  d = sim_data( n = 1000, cor = cor )
  # mean-center them
  d = as.data.frame( apply( d, 2, function(col) col - mean(col) ) )
  
  # bookmark
  samp.res = dataset_result( X = "X1",
                             C = NA,
                             Ys = c("Y1", "Y2", "Y3"), 
                             d = d,
                             alpha = 0.05,
                             center.stats = FALSE )
  
  # do 5 bootstraps
  resamps = resample_resid( X = "X1",
                            C = NA,
                            Ys = c("Y1", "Y2", "Y3"),
                            d = d,
                            alpha = 0.05,
                            resid = samp.res$resid,
                            bhat.orig = samp.res$bhats,
                            B=500,
                            cores = 8 )
  
  expect_equal( mean(resamps$p.bt), .5, tolerance = 0.03 )
  expect_equal( mean(resamps$rej.bt), .05*nY, tolerance = 0.03 )
  
} )
Any scripts or data that you put into this service are public.
NRejections documentation built on July 9, 2020, 5:08 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
NRejections
Metrics for Multiple Testing with Correlated Outcomes

tests/testthat/testthat.R
In NRejections: Metrics for Multiple Testing with Correlated Outcomes

Try the NRejections package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NRejections Metrics for Multiple Testing with Correlated Outcomes

tests/testthat/testthat.R In NRejections: Metrics for Multiple Testing with Correlated Outcomes

Try the NRejections package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

NRejections
Metrics for Multiple Testing with Correlated Outcomes

tests/testthat/testthat.R
In NRejections: Metrics for Multiple Testing with Correlated Outcomes