# Internal functions
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2018-11-14
# Table of contents
# 1) is_binary
# 2) std_difference
# 3) bootstrap_std_diff
# 4) compare_groups
# 1)
is_binary = function( x ) {
# Purpose:
# Determines whether a variable consists
# of only two values.
# Arguments:
# x - A vector of numeric values
# Returns:
# A logical value, TRUE if 'x' consists
# only of two values.
u = unique( x )
if ( length(u[!is.na(u)]) == 2 ) {
out = TRUE
} else {
out = FALSE
}
return( out )
}
# 2)
std_difference = function( x, test ) {
# Purpose:
# Computes the difference between
# means for test vs. control after
# standardizing the variable of interest.
# Formula adjusts based on whether the
# variable of interest is continuous
# versus binary.
# Arguments:
# x - A vector of numeric values
# test - A binary vector indicating group
# membership for the test (1) versus
# control (0) groups
# Returns:
# The difference in means between groups
# after standardizing the variable.
# Check if variable is dichotomous
ib = is_binary( x )
if ( !ib ) {
# If continuous
# Standardize values
m = mean( x )
s = sd( x )
z = (x-m)/s
# Compute standardized difference
# Means
m_test = mean( z[ test == 1 ] )
m_control = mean( z[ test == 0 ] )
# Variances
s2_test = var( z[ test == 1 ] )
s2_control = var( z[ test == 0 ] )
# Difference
num = ( m_test - m_control )
denom = sqrt( (s2_test + s2_control)/2 )
out = num/denom
} else {
# If binary
# Determine values
u = unique( x )
p_test = mean( x[ test == 1 ] == u[1] )
p_control = mean( x[ test == 0 ] == u[1] )
num = p_test - p_control
denom =
p_test*(1-p_test) +
p_control*(1-p_control)
denom = sqrt( denom/2 )
out = num/denom
}
return( out )
}
# 3)
bootstrap_std_diff = function( x, test, nRep = 1000 ) {
# Purpose:
# ...
# Arguments:
# x -
# test -
# nRep -
# Returns:
# ...
# Sample size
n = length( x )
# Index
i = 1:n
# Compute observed standardized difference
std_diff = std_difference( x, test )
# Function to shuffle values (with replacement)
# breaking ties with group assignment
f = function( r ) {
# New index, sampled with replacement
ni = sample( i, replace = T )
# New sample
nx = x[ ni ]
# Standardized difference
out = std_difference( nx, test = test )
return( out )
}
# Approximate distribution for std. diff. = 0
iterations = sapply( 1:nRep, f )
# Compute p-value
p_val = sum( iterations > std_diff )/nRep
if ( p_val > .5 ) p_val = 1 - p_val
# Return output
out = list(
observed = std_diff,
p.value = p_val,
bootstraps = iterations
)
return( out )
}
# 4)
compare_groups = function( x, group, sel ) {
# Purpose:
# Reports the comparison between two groups
# via t-tests (continuous) or a chi-square
# test (categorical).
# Arguments:
# x - A vector of values
# group - A binary vector denoting group membership,
# where the test group = 1, and the control
# group = 0
# sel - A logical vector indicating the subset of
# the vectors to examine
# Returns:
# A vector giving the test statistic, degrees of
# freedom, p-value, Bayes factor for the null,
# posterior p-value for the difference, and the
# type of test (1 = chi-square test, 2 = t-test).
# Isolate subset of data to examine
x = x[sel]
group = group[sel]
# Check if data is binary or continuous
if ( is_binary( x ) ) {
# Extract the two categories
u = unique( x )
# Create contingency table
ct = matrix( NA, 2, 2 )
colnames( ct ) = c( 'group_1', 'group_0' )
rownames( ct ) = c( 'cat_1', 'cat_2' )
ct[1,1] = sum( x[ group == 1 ] == u[1] )
ct[2,1] = sum( x[ group == 1 ] == u[2] )
ct[1,2] = sum( x[ group == 0 ] == u[1] )
ct[2,2] = sum( x[ group == 0 ] == u[2] )
# A) Test for proportions (Frequentist)
pt = prop.test( t(ct) )
# B) Bayes factor for test of proportions
bf = BayesFactor::contingencyTableBF( ct,
sampleType = "indepMulti",
fixedMargin = "cols" )
# Sample from posterior
chains = BayesFactor::posterior( bf, iterations = 10000,
progress = F )
# Compute difference in proportions
cat_1_given_group_1 = chains[,"pi[1,1]"] / chains[,"pi[*,1]"]
cat_1_given_group_0 = chains[,"pi[1,2]"] / chains[,"pi[*,2]"]
diff_prop = coda::mcmc(
cat_1_given_group_1 - cat_1_given_group_0
)
post_p_val = sum( as.vector( diff_prop ) < 0 )/10000
if ( post_p_val > .5 ) post_p_val = 1 - post_p_val
# C) Standardized difference
val = bootstrap_std_diff( x, group )
# D) Save results
out = numeric(8)
names( out ) = c(
'statistic',
'df',
'p_val',
'bf_for_null',
'post_p_val',
'test',
'std_diff',
'std_diff_p_val' )
out[1] = pt$statistic
out[2] = pt$parameter
out[3] = pt$p.value
out[4] = as.vector( 1/bf )
out[5] = post_p_val
out[6] = 1
out[7] = val$observed
out[8] = val$p.value
} else {
# Create data frame
dtbf = data.frame(
x = x,
group = group
)
# A) Independent samples t-test (Frequentist)
tt = t.test( x ~ group, data = dtbf )
# B) Bayes factor for independent samples t-test
bf = BayesFactor::ttestBF( formula = x ~ group, data = dtbf )
# Sample from posterior
chains = BayesFactor::posterior( bf, iterations = 10000,
progress = F )
# Compute posterior p-value
post_p_val = sum( as.vector( chains[,2] ) < 0 )/10000
if ( post_p_val > .5 ) post_p_val = 1 - post_p_val
# C) Standardized difference
val = bootstrap_std_diff( x, group )
# D) Save results
out = numeric(8)
names( out ) = c(
'statistic',
'df',
'p_val',
'bf_for_null',
'post_p_val',
'test',
'std_diff',
'std_diff_p_val' )
out[1] = tt$statistic
out[2] = tt$parameter
out[3] = tt$p.value
out[4] = as.vector( 1/bf )
out[5] = post_p_val
out[6] = 2
out[7] = val$observed
out[8] = val$p.value
}
return( out )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.