R/Internal_functions.R

# Internal functions
# Written by Kevin Potter
# email: kevin.w.potter@gmail.com
# Please email me directly if you
# have any questions or comments
# Last updated 2018-11-14

# Table of contents
# 1) is_binary
# 2) std_difference
# 3) bootstrap_std_diff
# 4) compare_groups

# 1)
is_binary = function( x ) {
  # Purpose:
  # Determines whether a variable consists
  # of only two values.
  # Arguments:
  # x    - A vector of numeric values
  # Returns:
  # A logical value, TRUE if 'x' consists
  # only of two values.

  u = unique( x )
  if ( length(u[!is.na(u)]) == 2 ) {
    out = TRUE
  } else {
    out = FALSE
  }

  return( out )
}

# 2)
std_difference = function( x, test ) {
  # Purpose:
  # Computes the difference between
  # means for test vs. control after
  # standardizing the variable of interest.
  # Formula adjusts based on whether the
  # variable of interest is continuous
  # versus binary.
  # Arguments:
  # x    - A vector of numeric values
  # test - A binary vector indicating group
  #        membership for the test (1) versus
  #        control (0) groups
  # Returns:
  # The difference in means between groups
  # after standardizing the variable.

  # Check if variable is dichotomous
  ib = is_binary( x )

  if ( !ib ) {
    # If continuous

    # Standardize values
    m = mean( x )
    s = sd( x )
    z = (x-m)/s

    # Compute standardized difference

    # Means
    m_test = mean( z[ test == 1 ] )
    m_control = mean( z[ test == 0 ] )

    # Variances
    s2_test = var( z[ test == 1 ] )
    s2_control = var( z[ test == 0 ] )

    # Difference
    num = ( m_test - m_control )
    denom = sqrt( (s2_test + s2_control)/2 )
    out = num/denom

  } else {
    # If binary

    # Determine values
    u = unique( x )

    p_test = mean( x[ test == 1 ] == u[1] )
    p_control = mean( x[ test == 0 ] == u[1] )

    num = p_test - p_control
    denom =
      p_test*(1-p_test) +
      p_control*(1-p_control)
    denom = sqrt( denom/2 )
    out = num/denom

  }

  return( out )
}


# 3)
bootstrap_std_diff = function( x, test, nRep = 1000 ) {
  # Purpose:
  # ...
  # Arguments:
  # x    -
  # test -
  # nRep -
  # Returns:
  # ...

  # Sample size
  n = length( x )
  # Index
  i = 1:n

  # Compute observed standardized difference
  std_diff = std_difference( x, test )

  # Function to shuffle values (with replacement)
  # breaking ties with group assignment
  f = function( r ) {

    # New index, sampled with replacement
    ni = sample( i, replace = T )
    # New sample
    nx = x[ ni ]
    # Standardized difference
    out = std_difference( nx, test = test )

    return( out )
  }

  # Approximate distribution for std. diff. = 0
  iterations = sapply( 1:nRep, f )

  # Compute p-value
  p_val = sum( iterations > std_diff )/nRep
  if ( p_val > .5 ) p_val = 1 - p_val

  # Return output
  out = list(
    observed = std_diff,
    p.value = p_val,
    bootstraps = iterations
  )

  return( out )
}

# 4)
compare_groups = function( x, group, sel ) {
  # Purpose:
  # Reports the comparison between two groups
  # via t-tests (continuous) or a chi-square
  # test (categorical).
  # Arguments:
  # x     - A vector of values
  # group - A binary vector denoting group membership,
  #         where the test group = 1, and the control
  #         group = 0
  # sel   - A logical vector indicating the subset of
  #         the vectors to examine
  # Returns:
  # A vector giving the test statistic, degrees of
  # freedom, p-value, Bayes factor for the null,
  # posterior p-value for the difference, and the
  # type of test (1 = chi-square test, 2 = t-test).

  # Isolate subset of data to examine
  x = x[sel]
  group = group[sel]

  # Check if data is binary or continuous
  if ( is_binary( x ) ) {

    # Extract the two categories
    u = unique( x )

    # Create contingency table
    ct = matrix( NA, 2, 2 )
    colnames( ct ) = c( 'group_1', 'group_0' )
    rownames( ct ) = c( 'cat_1', 'cat_2' )
    ct[1,1] = sum( x[ group == 1 ] == u[1] )
    ct[2,1] = sum( x[ group == 1 ] == u[2] )
    ct[1,2] = sum( x[ group == 0 ] == u[1] )
    ct[2,2] = sum( x[ group == 0 ] == u[2] )

    # A) Test for proportions (Frequentist)
    pt = prop.test( t(ct) )

    # B) Bayes factor for test of proportions
    bf = BayesFactor::contingencyTableBF( ct,
                                          sampleType = "indepMulti",
                                          fixedMargin = "cols" )

    # Sample from posterior
    chains = BayesFactor::posterior( bf, iterations = 10000,
                                     progress = F )
    # Compute difference in proportions
    cat_1_given_group_1 = chains[,"pi[1,1]"] / chains[,"pi[*,1]"]
    cat_1_given_group_0 = chains[,"pi[1,2]"] / chains[,"pi[*,2]"]
    diff_prop = coda::mcmc(
      cat_1_given_group_1 - cat_1_given_group_0
    )
    post_p_val = sum( as.vector( diff_prop ) < 0 )/10000
    if ( post_p_val > .5 ) post_p_val = 1 - post_p_val

    # C) Standardized difference
    val = bootstrap_std_diff( x, group )

    # D) Save results
    out = numeric(8)
    names( out ) = c(
      'statistic',
      'df',
      'p_val',
      'bf_for_null',
      'post_p_val',
      'test',
      'std_diff',
      'std_diff_p_val' )
    out[1] = pt$statistic
    out[2] = pt$parameter
    out[3] = pt$p.value
    out[4] = as.vector( 1/bf )
    out[5] = post_p_val
    out[6] = 1
    out[7] = val$observed
    out[8] = val$p.value

  } else {

    # Create data frame
    dtbf = data.frame(
      x = x,
      group = group
    )

    # A) Independent samples t-test (Frequentist)
    tt = t.test( x ~ group, data = dtbf )

    # B) Bayes factor for independent samples t-test
    bf = BayesFactor::ttestBF( formula = x ~ group, data = dtbf )

    # Sample from posterior
    chains = BayesFactor::posterior( bf, iterations = 10000,
                                     progress = F )

    # Compute posterior p-value
    post_p_val = sum( as.vector( chains[,2] ) < 0 )/10000
    if ( post_p_val > .5 ) post_p_val = 1 - post_p_val

    # C) Standardized difference
    val = bootstrap_std_diff( x, group )

    # D) Save results
    out = numeric(8)
    names( out ) = c(
      'statistic',
      'df',
      'p_val',
      'bf_for_null',
      'post_p_val',
      'test',
      'std_diff',
      'std_diff_p_val' )
    out[1] = tt$statistic
    out[2] = tt$parameter
    out[3] = tt$p.value
    out[4] = as.vector( 1/bf )
    out[5] = post_p_val
    out[6] = 2
    out[7] = val$observed
    out[8] = val$p.value

  }

  return( out )
}
rettopnivek/guiMatchIt documentation built on May 17, 2019, 10:39 a.m.