psy.phd: Things that might be useful for a PhD in psychology

Documented in ab_descriptives ab_t_test accuracy age_summary age_summary_by_group aladins ant_descriptives ant_descriptives_by_cue_type ant_plots ant_scores apa_bayes_t blink blink_contrasts ci95_to_se d d_ci95 exclude_rt extract_aov extract_rm_aov forest_bayes format_bf funnel_plot get_participants normal process_demographics_survey process_posner qq_plot report_aov report_chisq report_rm_aov report_t rsvp_3_8_plot rsvp_accuracy rsvp_by_group save_rds sd_pooled set_effect sex_totals spssSkewKurtosis t1_accuracy

globalVariables(c('..scaled..'))

#' Set effect size for a study
#'
#' Set effect size for a study.
#'
#' Calculates mean difference and 95% confidence interval.
#'
#' @param id identifier for result
#' @param study data frame with publication column
#' @param m1 mean for group 1
#' @param m2 mean for group 2
#' @param sd standard deviation
#' @param n1 n in group 1
#' @param n2 n in group 2
#' @importFrom rlang .data
#' @export
#' @return tibble
set_effect <- function(id, data, m1, m2, sd, n1, n2) {
  df <- tibble(
    id     = id,
    mean1  = m1,
    mean2  = m2,
    study  = data$publication,
    d      = d(m1, m2, sd), ci = 0, l = 0, u = 0
  )
  df <- df %>%
    mutate(ci = d_ci95(d, n1, n2)) %>%
    mutate(
      l     = d - .data$ci,
      u     = d + .data$ci,
      group = data$group
    )
}

#' Convert 95% confidence interval to standard error
#'
#' Convert 95% confidence interval to standard error
#'
#' https://training.cochrane.org/handbook/current/chapter-06#section-6-3-1
#'
#' @param u upper interval
#' @param l lower interval
#' @export
#' @return numeric
ci95_to_se <- function(u, l) { (u - l) / 3.92 }

#' 95% confidence interval for Cohen's d (Rosnow & Rosenthal, 2009)
#'
#' 95% confidence interval for Cohen's d (Rosnow & Rosenthal, 2009)
#'
#' 95% confidence interval for Cohen's d (Rosnow & Rosenthal, 2009)
#'
#' @param d Cohen's d
#' @param n1 n in group1
#' @param n2 n in group2
#' @export
#' @return numeric
d_ci95 <- function(d, n1, n2) {
  df <- n1 + n2 - 2
  sqrt((((n1 + n2) / (n1 * n2)) + (d^2 / (2 * df))) * ((n1 + n2) / df))
}

#' Mean difference
#'
#' Mean difference
#'
#' Returns (m1 - m2) / sd. If sd is the pooled standard deviation, then this is Hedge's g.
#'
#' @param m1 mean 1
#' @param m2 mean 2
#' @param sd standard deviation
#' @export
#' @return numeric
d <- function(m1, m2, sd) {
  # https://www.statisticshowto.com/hedges-g/
  (m1 - m2) / sd
}

#' Pooled standard deviation
#'
#' Pooled standard deviation
#'
#' Returns the pooled standard deviation for two groups with same or different n.
#'
#' @param n1 n for group 1
#' @param n2 n for group 2
#' @param sd1 standard deviation for group 1
#' @param sd2 standard deviation for group 2
#' @export
#' @return numeric
sd_pooled <- function(n1, n2, sd1, sd2) {
  # https://www.statisticshowto.com/pooled-standard-deviation/
  sqrt(((n1 - 1) * sd1^2 + (n2 - 1) * sd2^2) / (n1 + n2 - 2))
}

#' Exclude RT outliers
#'
#' Exclude RT outliers.
#'
#' Returns a list of data frames with keys, 'ok', 'too_fast', and 'too_slow',
#' which contain rows from df where the RT met the inclusion critera, was too
#' fast, or too slow respectively.
#'
#' @param df data frame Must contain columns p, rt
#' @param fast numeric Fast RTs
#' @param slow numeric Standard deviation of slow RTs
#' @importFrom dplyr filter group_by ungroup
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats sd
#' @export
#' @return List of data frames
exclude_rt <- function(df, fast=50, slow=3) {
  # Ben:
  # 1.  If you are aggregating data before modelling it then you need to exclude
  # them. I would keep obs > 50ms and < 3 SD (calculated within each __
  # participant__). 200ms is too long a window to be excluding observations for
  # this experiment. There are multiple components to an RT. Although an RT of <
  # 100ms might be implausible for this experiment, I don’t think you can
  # immediately conclude that there is no signal in all responses < 100ms.
  # Multiple errors will summate but may also be correlated - I don’t think you
  # gain enough by dropping the data to warrant it.
  #
  # 2. If you ware running a mixed-model within-participant then you can just
  # include them. Mixed models perform "shrinkage" so that extreme values are
  # down-weighted, and especially so if they are extreme within-participants.
  # Provided you have enough good data they won’t have much effect.

  too_fast <- df %>% filter(.data$rt < fast)
  too_slow <- df %>% filter(.data$rt > mean(.data$rt) + slow * sd(.data$rt))
  ok       <- df %>% filter(.data$rt > fast & .data$rt < mean(.data$rt) + slow * sd(.data$rt))

  list(ok = ok, too_fast = too_fast, too_slow = too_slow)
}

#' Process demographics survey
#'
#' Process demographics survey.
#'
#' @param survey Data frame containing demographics survey results pre-processed by \code{expfactory::process_expfactory_survey}
#' @param participants Data frame with columns p, condition, token
#' @param by String, column to join survey to participants (default='token')
#' @param extra Boolean, process extra demographics columns (default=F)
#' @importFrom dplyr left_join mutate rename select
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom tidyr pivot_wider
#' @export
#' @return Data frame
process_demographics_survey <- function(survey, participants, by = 'token', extra=F) {
  demographics <- survey %>%
    # DEBT: Some data has multiple values for hand which needs, err, handling
    filter(question != 'Which is your dominant hand?') %>%
    pivot_wider(names_from = .data$question, values_from = .data$value) %>%
    rename(
      age = .data$`How old are you (in years)?`,
      sex = .data$`What was your assigned sex at birth?`,
#      hand = .data$`Which is your dominant hand?`
    ) %>%
    mutate(age = as.numeric(.data$age)) %>%
    mutate(sex = replace(.data$sex, .data$sex == 0, 'M')) %>%
    mutate(sex = replace(.data$sex, .data$sex == 1, 'F')) %>%
    mutate(sex = as.factor(.data$sex))

  # always return these columns
  columns = c('p', 'condition', 'age', 'sex')

  # extra demographics survey questions
  if (extra) {
    demographics <- demographics %>%
    rename(
      meditation_times = .data$`Approximately how many times in your life have you meditated?`,
      meditation_hours = .data$`Approximately how many hours in your life have you spent meditating?`
    ) %>%
      mutate(meditation_times = as.numeric(.data$meditation_times),
             meditation_hours = as.numeric(.data$meditation_hours))
    columns <- c(columns, 'meditation_times', 'meditation_hours')
    }

  demographics <- left_join(participants, demographics, by = by) %>%
    select(columns)
}

### util

#' Get participants
#'
#' Get participants.
#'
#' @param df Data frame
#' @param c Condition
#' @importFrom dplyr count filter select ungroup
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return List
#'
get_participants <- function(df, c) {
  p <-
    df %>%
    ungroup() %>%
    filter(.data$condition == c) %>%
    select(p) %>%
    unique()
  list('participants' = p, 'n' = count(p))
}

### Posner

#' Process Posner task data files
#'
#' Process Posner task data files.
#'
#' @param task_file String
#' @param p Numeric Participant number
#' @importFrom dplyr filter mutate select
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom utils read.csv
#' @export
#' @return Data frame
process_posner <- function(task_file, p) {
  if(!file.exists(task_file)){
    return(data.frame(p=p, file=task_file))
  }
  df <- read.csv(task_file, header = TRUE) %>%
    filter(.data$exp_stage == 'test') %>%
    filter(.data$trial_type == 'poldrack-single-stim') %>%
    filter(! is.na(.data$correct_response)) %>%
    select(.data$rt,.data$key_press,.data$stim_duration,.data$correct,
           .data$correct_response,.data$location,.data$cue,.data$trial_num) %>%
    mutate(p = as.numeric(p)) %>%
    mutate(file=task_file)
}

### Attentional Blink

## demographics

#' Sex totals
#'
#' Sex totals.
#'
#' @param df Data frame
#' @importFrom dplyr group_by summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return Data frame
#'
sex_totals <- function(df) {
  df %>%
    group_by(.data$sex) %>%
    summarise(sex_count = length(.data$sex))
}

#' Age summary
#'
#' Age summary.
#'
#' @param df Data frame
#' @importFrom dplyr summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats sd
#' @export
#' @return Data frame
#'
age_summary <- function(df) {
  df %>%
    summarise(mean_age = round(mean(.data$age), 2), sd_age = round(sd(.data$age), 2))
}

#' Age summary by group
#'
#' Age summary by group.
#'
#' @param df Data frame
#' @importFrom dplyr group_by summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats sd
#' @export
#' @return Data frame
#'
age_summary_by_group <- function(df) {
  df %>%
    group_by(.data$condition) %>%
    summarise(mean_age = round(mean(.data$age), 2), sd_age = round(sd(.data$age), 2))
}

## rsvp

#' Accuracy
#'
#' Accuracy.
#'
#' @param x Data frame
#' @param y Something
#' @importFrom dplyr summarise_all
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @return Data frame
#'
accuracy <- function(x, y) {
  x %>% summarise_all(mean) # on a logical vector calculates % TRUE (correct)
}

#' RSVP Accuracy
#'
#' RSVP Accuracy.
#'
#' @param rsvp Data frame
#' @param participants Data frame
#' @param combined Defaults to FALSE
#' @importFrom dplyr do group_by group_modify left_join mutate select ungroup
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return Data frame
#'
rsvp_accuracy <- function(rsvp, participants, combined=FALSE) {
  if (! combined) {
    df <- rsvp %>%
      select(.data$p, .data$lag, .data$t1_correct, .data$t2.t1) %>%
      group_by(.data$p, .data$lag)
  } else {
    df <- rsvp %>%
      select(.data$study, .data$p, .data$lag, .data$t1_correct, .data$t2.t1) %>%
      group_by(.data$study, .data$p, .data$lag)
  }

  # common to uncombined/combined
  df <- df %>%
    group_modify(accuracy) %>%
    ungroup() %>%
    mutate(lag = as.factor(.data$lag), t1_accuracy = .data$t1_correct * 100,
           t2_accuracy = .data$t2.t1 * 100)

  if (! combined) {
    df <- df %>%
      left_join(participants, by=c('p')) %>%
      mutate(p = as.factor(.data$p)) %>%
      select(.data$p, .data$condition, .data$lag, .data$t1_accuracy, .data$t2_accuracy)
  } else {
    df <- df %>%
      left_join(participants, by=c('study','p')) %>%
      mutate(study = as.factor(.data$study), p = as.factor(.data$p)) %>%
      select(.data$study, .data$p, .data$condition, .data$lag, .data$t1_accuracy,
             .data$t2_accuracy)
  }

  as.data.frame(df) # mute later BayesFactor conversion from tibble to data frame
}

#
#' T1 accuracy by participant
#'
#' T1 accuracy by participant.
#'
#' @param rsvp_accuracy Data frame
#' @importFrom dplyr group_by select summarise ungroup
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return Data frame
#'
t1_accuracy <- function(rsvp_accuracy) {
  rsvp_accuracy %>%
    select(.data$p, .data$condition, .data$t1_accuracy) %>%
    group_by(.data$p, .data$condition) %>%
    summarise(mean_t1_accuracy = mean(.data$t1_accuracy)) %>%
    ungroup()
}

#' Attentional blink, descriptive stats
#'
#' Attentional blink, descriptive stats.
#'
#' @param x Data frame
#' @param y Something
#' @importFrom dplyr mutate_if rename summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats sd
#' @export
#' @return Data frame
#'
ab_descriptives <- function(x, y) {
  df <- x %>%
    rename(ab = .data$`blink`) %>%
    summarise(m_t1 = mean(.data$mean_t1_accuracy), sd_t1 = sd(.data$mean_t1_accuracy),
              m_lag3 = mean(.data$`3`), sd_lag3 = sd(.data$`3`),
              m_lag8 = mean(.data$`8`), sd_lag8 = sd(.data$`8`),
              m_ab = mean(.data$ab), sd_ab = sd(.data$ab)) %>%
    mutate_if(is.numeric, round, 2)
}

#' Attentional blink, t-tests
#'
#' Attentional blink, t-tests.
#'
#' @param x Data frame
#' @param y Something!
#' @param lag1 Numeric
#' @param lag2 Numeric
#' @importFrom BayesFactor ttestBF
#' @importFrom dplyr filter
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats sd t.test
#' @importFrom tibble tibble
#' @export
#' @return Data frame
#'
ab_t_test <- function(x, y, lag1=3, lag2=8) {
  df <- x %>%
    filter(.data$lag %in% c(lag1,lag2))
  # TECH_DEBT: Best I could come up with for returning objects in a data frame i.e. wrap in list()s
  tibble(t = list(t = t.test(t2_accuracy ~ lag, data = df, paired=TRUE)),
         bayes_t = list(bf = ttestBF(x = df$t2_accuracy[df$lag == lag1],
                                     y = df$t2_accuracy[df$lag == lag2],
                                     paired = TRUE))
  )
}

#' Attentional blink contrast t-tests.
#'
#' Attentional blink contrast t-tests.
#'
#' Does _not_ correct for multiple comparisons.
#' t-tests are are for _paired_ samples
#'
#' @param df Data frame
#' @importFrom BayesFactor ttestBF
#' @importFrom dplyr bind_rows filter
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats sd t.test
#' @importFrom tibble tibble
#' @export
#' @return Data frame
#'
blink_contrasts <- function(df) {
  d <- df %>% filter(.data$condition %in% c('fam','omm'))
  # TECH_DEBT: Best I could come up with for returning objects in a data frame i.e. wrap in list()s
  fo <- tibble(
    condition = 'fam-omm',
    t = list(t = t.test(blink ~ condition, data = d, paired=TRUE)),
           bayes_t = list(bf = ttestBF(x = d$blink[d$condition == 'fam'],
                                       y = d$blink[d$condition == 'omm'],
                                       paired = TRUE))
  )
  d <- df %>% filter(.data$condition %in% c('fam','control'))
  fc <- tibble(
    condition = 'fam-control',
    t = list(t = t.test(blink ~ condition, data = d, paired=TRUE)),
    bayes_t = list(bf = ttestBF(x = d$blink[d$condition == 'fam'],
                                y = d$blink[d$condition == 'control'],
                                paired = TRUE))
  )
  d <- df %>% filter(.data$condition %in% c('omm','control'))
  oc <- tibble(
    condition = 'omm-control',
    t = list(t = t.test(blink ~ condition, data = d, paired=TRUE)),
    bayes_t = list(bf = ttestBF(x = d$blink[d$condition == 'omm'],
                                y = d$blink[d$condition == 'control'],
                                paired = TRUE))
  )
  bind_rows(fo,fc,oc)
}

#' Calculate accuracy difference between lag 3 and lag 8
#'
#' Calculate accuracy difference between lag 3 and lag 8.
#'
#' @param rsvp_accuracy_3_8 Data frame
#' @importFrom dplyr group_by mutate select ungroup
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom tidyr spread
#' @export
#' @return Data frame
#'
blink <- function(rsvp_accuracy_3_8) {
  rsvp_accuracy_3_8 %>%
   select(-t1_accuracy) %>%
   group_by(.data$p) %>%
   spread(key = .data$lag, value = .data$t2_accuracy) %>%
   mutate(blink = .data$`8` - .data$`3`) %>%
   ungroup()
}

#' RSVP 3-8 plot
#'
#' RSVP 3-8 plot.
#'
#' @param rsvp_accuracy Data frame
#' @importFrom dplyr filter
#' @importFrom ggplot2 aes element_text geom_hline ggplot mean_cl_boot position_dodge stat_summary theme xlab ylab
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return Data frame
#'
rsvp_3_8_plot <- function(rsvp_accuracy) {
  rsvp_accuracy_3_8 <- rsvp_accuracy %>%
    filter(.data$lag %in% c(3,8))

  rsvp_accuracy_3_8 %>%
    ggplot(aes(.data$lag, .data$t2_accuracy, group=.data$condition, color=.data$condition)) +
    stat_summary(geom="pointrange", fun.data=mean_cl_boot, position=position_dodge(width=.1)) +
    stat_summary(geom="line", fun.data=mean_cl_boot, position=position_dodge(width=.1), size=1.2) +
    geom_hline(yintercept = 0) +
    ylab("T2|T1 accuracy (% with 95% bootstrap CI)") + xlab("Lag") +
    theme(text = element_text(size=22))
}

# DEPRECATED: written against tidystats 0.3 API which changed radically in 0.4
# report_collapsed_t <- function(freq, bayes) {
#   bayes <- as.data.frame(bayes)
#   results <- list()
#   results <- add_stats(results, freq, identifier = 'freq')
#   bf    <- bayes[1,1]
#   if (bf > 100) { bf <- round(bf,0) } else { bf <- round(bf,3) }
#   s     <- report('freq', results = results) # tidy frequentist string
#   s     <- gsub(", 95.*$",'',s)              # remove CI
#   paste0(s,', *BF* = ', bf)
# }

# DEPRECATED: written against tidystats 0.3 API which changed radically in 0.4
# report_t <- function(df, results, c) {
#   bayes <- as.data.frame(df[df[, 'condition'] == c,'bayes_t'][[1]])
#   bf    <- bayes[1,1]
#   if (bf > 100) { bf <- round(bf,0) } else { bf <- round(bf,3) }
#   s     <- report(c, results = results) # tidy frequentist string
#   s     <- gsub(", 95.*$",'',s)         # remove CI
#   paste0(s,', *BF* = ', bf)
# }

# DEPRECATED: written against tidystats 0.3 API which changed radically in 0.4
# report_f <- function(df_n, df_d, f, p) {
#   p <- if(p < .001) '< .001' else paste0('= ',round(p,2))
#   paste0('*F*(', as.integer(df_n), ',', round(df_d,2), ') = ', round(f, 2), ', *p* ', p)
# }

# t = t.test() object, m = mean
sem <- function(t, m) {
  m / t$statistic # SEM = mean / t
}

#' Returns values for Aladins Bayesian t-tests BF_t()
#'
#' Returns values for Aladins Bayesian t-tests BF_t().
#'
#' @param conditions Character vector
#' @param t_df Numeric
#' @param mean_diff_df Numeric
#' @importFrom dplyr case_when filter mutate select
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return Data frame
#'
aladins <- function(conditions, t_df, mean_diff_df) {
  df <- data.frame('condition' = conditions, meandiff = 0, sem = 0, df = 0)
  for (c in conditions) {
    t <- t_df[t_df[, 'condition'] == c,'t'][[1]][[1]]
    ## get values for BF_t() test
    md <- as.numeric(mean_diff_df %>%
                       filter(.data$condition == c) %>%
                       select(.data$meandiff))
    # SEM for condition
    df <- df %>%
      mutate(
        sem      = case_when(.data$condition == c ~ sem(t, md),  TRUE ~ .$sem),
        meandiff = case_when(.data$condition == c ~ md,          TRUE ~ .$meandiff),
        df       = case_when(.data$condition == c ~ t$parameter, TRUE ~ .$df)
      )
  }
  return(df)
}

#' Report Bayesian t-test in APA format.
#'
#' Report Bayesian t-test in APA format.
#'
#' @param bayes Bayesian t-test object
#' @export
#' @return String
#'
apa_bayes_t <- function(bayes) {
  bayes <- as.data.frame(bayes)
  bf    <- bayes[1,1]
  if (bf > 100) { bf <- round(bf,0) } else { bf <- sprintf("%.2f", round(bf,2)) }
  paste0('$BF$ = ', prettyNum(bf, big.mark = ',', trim = TRUE))
}

#' Format Bayes Factor.
#'
#' Format Bayes Factor.
#'
#' @param bf Bayes Factor object
#' @export
#' @return Character
#'
format_bf <- function(bf) {
  bf     <- as.data.frame(bf)
  result <- bf[1,1]
  if (result > 100) { result <- round(result,0) } else { result <- sprintf("%.2f", round(result,2)) }
  prettyNum(result, big.mark = ',', trim = TRUE)
}

#' Extract numbers from aov object.
#'
#' Extract numbers from aov object.
#'
#' @param aov aov object
#' @param design string
#' @param effect string
#' @export
#' @return tibble
#'
extract_aov <- function(aov, design='mixed', effect='1way') {
  aov <- summary(aov)

  # 1-way ANOVA
  if (effect == '1way') {
    return(
      tibble(
        df_n = aov[[1]]$Df[1],
        df_d = aov [[1]]$Df[2],
        f    = sprintf("%.2f", aov[[1]]$'F value'[[1]]),
        p    = sub("^(-?)0.", "\\1.", sprintf("%.3f", aov[[1]]$'Pr(>F)'[[1]]))
      )
    )
  }

  # 2 x 2 ANOVA
  if (design == 'mixed') {
    if (effect == 'A') {        # A (first main effect)
      tibble(
        df_n = aov[[1]][[1]]$Df[1],
        df_d = aov [[1]][[1]]$Df[2],
        f    = sprintf("%.2f", aov[[1]][[1]]$'F value'[[1]]),
        p    = sub("^(-?)0.", "\\1.", sprintf("%.3f", aov[[1]][[1]]$'Pr(>F)'[[1]]))
      )
    } else if (effect == 'B') { # B (second main effect)
      tibble(
        df_n = aov[[2]][[1]]$Df[1],
        df_d = aov [[2]][[1]]$Df[3],
        f    = sprintf("%.2f", aov[[2]][[1]]$'F value'[[1]]),
        p    = sub("^(-?)0.", "\\1.", sprintf("%.3f", aov[[2]][[1]]$'Pr(>F)'[[1]]))
      )
    } else if (effect == 'A*B') { # A*B
      tibble(
        df_n = aov[[2]][[1]]$Df[1],
        df_d = aov [[2]][[1]]$Df[3],
        f    = sprintf("%.2f", aov[[2]][[1]]$'F value'[[2]]),
        p    = sub("^(-?)0.", "\\1.", sprintf("%.3f", aov[[2]][[1]]$'Pr(>F)'[[2]]))
      )
    }
  } else { # between
    if (effect == 'A*B') { # A*B
      tibble(
        df_n = aov[[1]][[1]]$Df[1],
        df_d = aov [[1]][[1]]$Df[3],
        f    = sprintf("%.2f", aov[[1]][[1]]$'F value'[[3]]),
        p    = sub("^(-?)0.", "\\1.", sprintf("%.3f", aov[[1]][[1]]$'Pr(>F)'[[3]]))
      )
    }
  }
}

#' Report aov.
#'
#' Report aov.
#'
#' @param aov aov object
#' @param design string
#' @param effect string
#' @export
#' @return Character
#'
report_aov <- function(aov, design='mixed', effect='1way') {
  t <- extract_aov(aov, design, effect)
  if (as.numeric(t$p) < .001) {
    paste0('*F*(', t$df_n, ', ', t$df_d, ') = ', t$f, ', *p* < .001')
  } else {
    paste0('*F*(', t$df_n, ', ', t$df_d, ') = ', t$f, ', *p* = ', t$p)
  }
}

#' Extract numbers from afex_aov object.
#'
#' Extract numbers from afex_aov object.
#'
#' @param afex_aov afex_aov object
#' @param effect string
#' @export
#' @return tibble
#'
extract_rm_aov <- function(afex_aov, effect = 'condition') {
  # For RM ANOVA, values are (probably Greenhouse-Geisser) corrected if sphericity violated
  tibble(
    df_n = sprintf("%.2f", afex_aov$anova_table[effect, 'num Df']),
    df_d = sprintf("%.2f", afex_aov$anova_table[effect, 'den Df']),
    f    = sprintf("%.2f", afex_aov$anova_table[effect, 'F']),
    p    = sub("^(-?)0.", "\\1.", sprintf("%.3f", afex_aov$anova_table[effect, 'Pr(>F)']))
  )
}

#' Report repeated measures aov.
#'
#' Report repeated measures aov.
#'
#' @param t tibble
#' @export
#' @return Character
#'
report_rm_aov <- function(t) {
  if (as.numeric(t$p) < .001) {
    paste0('*F*(', t$df_n, ', ', t$df_d, ') = ', t$f, ', *p* < .001')
  } else {
    paste0('*F*(', t$df_n, ', ', t$df_d, ') = ', t$f, ', *p* = ', t$p)
  }
}

#' Report chisq.
#'
#' Report chisq.
#'
#' @param chisq chisq object
#' @export
#' @return Character
#'
report_chisq <- function(chisq) {
  n <- chisq$observed %>% as.data.frame() %>% select(Freq) %>% sum()
  if (as.numeric(chisq$p.value) < .001) {
    paste0('\\emph{$\\chi^2$}(', chisq$parameter, ', \\emph{N} = ', n, ') = ', sprintf("%.2f", chisq$statistic), ', \\emph{p} < .001')
  } else {
    paste0('\\emph{$\\chi^2$}(', chisq$parameter, ', \\emph{N} = ', n, ') = ', sprintf("%.2f", chisq$statistic), ', \\emph{p} =', sprintf("%.3f", chisq$p.value))
  }
}

#' Report t test.
#'
#' Report t test.
#'
#' @param t htest object
#' @export
#' @return Character
#'
report_t <- function(t) {
  paste0('*t*(', sprintf("%.2f", t$parameter), ') = ', sprintf("%.2f", t$statistic), ', *p* = ', sprintf("%.3f", t$p.value))
}

#' Save frequentist and Baysian t-tests and Cohen's d for T1 and T2|T1
#'
#' Save frequentist and Baysian t-tests and Cohen's d for T1 and T2|T1.
#'
#' @param df Data frame
#' @param group1 String
#' @param group2 String
#' @param dir String Directory
#' @param prefix String
#' @importFrom BayesFactor ttestBF
#' @importFrom dplyr group_by mutate summarise
#' @importFrom effsize cohen.d
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stats t.test
#' @export
#' @return Data frame
#'
rsvp_by_group <- function(df, group1, group2, dir, prefix) {
  df <- df %>%
    group_by(.data$p, .data$condition) %>%
    summarise(mean_t2_accuracy = mean(.data$t2_accuracy),
              mean_t1_accuracy = mean(.data$t1_accuracy)) %>%
    ungroup()

  # T2|T1
  freq_t <- t.test(mean_t2_accuracy ~ condition, data = df, paired=FALSE)
  bayes_t <- ttestBF(x = df$mean_t2_accuracy[df$condition == group1],
                     y = df$mean_t2_accuracy[df$condition == group2],
                    paired = FALSE)
  save_rds(freq_t, dir, paste0(prefix,'_t2_med_control_freq_t'))
  save_rds(bayes_t, dir, paste0(prefix,'_t2_med_control_bayes_t'))
  t2_d <- cohen.d(df$mean_t2_accuracy ~ df$condition)
  save_rds(t2_d, dir, paste0(prefix,'_t2_med_control_d'))

  # T1
  freq_t <- t.test(mean_t1_accuracy ~ condition, data = df, paired=FALSE)
  bayes_t <- ttestBF(x = df$mean_t1_accuracy[df$condition == group1],
                     y = df$mean_t1_accuracy[df$condition == group2],
                    paired = FALSE)
  save_rds(freq_t, dir, paste0(prefix,'_t1_med_control_freq_t'))
  save_rds(bayes_t, dir, paste0(prefix,'_t1_med_control_bayes_t'))
  t1_d <- cohen.d(df$mean_t1_accuracy ~ df$condition)
  save_rds(t1_d, dir, paste0(prefix,'_t1_med_control_d'))

  return(df)
}

#' Save data as RDS object.
#'
#' Save data as RDS object.
#'
#' @param object R object
#' @param dir String Directory
#' @param filename String Filename
#' @export
#'
save_rds <- function(object, dir, filename) {
  saveRDS(object, paste(dir,paste0(filename,'.Rd'),sep='/'))
}

## bias testing

#' ANT plots
#'
#' ANT plots.
#'
#' @param df Data frame
#' @param condition Character vector length 2
#' @param xlab String
#' @importFrom dplyr group_by mutate summarise
#' @importFrom ggplot2 aes geom_boxplot geom_density geom_vline ggplot xlab ylab
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stringr str_to_upper
#' @export
#' @return Data frame
#'
ant_plots <- function(df, condition, xlab) {
  means <- df %>%
    group_by(.data$condition) %>%
    summarise(mean = mean(.data$rt)) %>%
    mutate(means = paste("Mean", str_to_upper(.data$condition), sep = "."))

  den <- df %>%
    ggplot(aes(.data$rt, colour=.data$condition)) + geom_density(aes(y=..scaled..)) +
    geom_vline(aes(xintercept = mean, color = condition, linetype = means), data = means) +
    xlab(paste0('RT (ms) for ', xlab)) +
    ylab("Scaled density")
  qq_fam     <- qq_plot(df, condition[1], xlab)
  qq_control <- qq_plot(df, condition[2], xlab)

  box_plot <- df %>% ggplot(aes(x = .data$condition, y = .data$rt)) +
    geom_boxplot(aes(colour=.data$condition))

  return(list(boxplot = box_plot, qq_fam = qq_fam, qq_control = qq_control, density = den))
}

#' QQ plot
#'
#' QQ plot.
#'
#' @param df Data frame
#' @param c String Condition
#' @param xlab String
#' @importFrom dplyr group_by mutate summarise
#' @importFrom ggplot2 aes ggplot labs stat_qq stat_qq_line
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @export
#' @return Data frame
#'
qq_plot <- function(df, c, xlab) {
  df %>%
    filter(.data$condition == c) %>%
    ggplot(aes(sample = .data$rt)) + stat_qq() + stat_qq_line() +
    labs(title = paste0('q-q plot for ', c, ' ', xlab))
}

#' Various tests for normality
#'
#' Various tests for normality.
#'
#' @param x Data frame
#' @importFrom dplyr bind_cols
#' @importFrom kableExtra cell_spec kable kable_styling
#' @importFrom magrittr %>%
#' @importFrom psych describe
#' @importFrom purrr pmap_dfr
#' @importFrom rlang .data
#' @importFrom stats ks.test shapiro.test var
#' @importFrom tidyr pivot_wider
#' @export
#' @return Data frame
#'
normal <- function(x) {
  print(shapiro.test(x)) # requires 3-5000 non-NA values
  # ties should not be present for the Kolmogorov-Smirnov test
  # https://stats.stackexchange.com/questions/232011/ties-should-not-be-present-in-one-sample-kolmgorov-smirnov-test-in-r/232067
  print(ks.test(x, 'pnorm', mean=mean(x), sd=sd(x)))

  stats <- bind_cols(describe(x), var = var(x))

  # calculate z-score for skew and kurtosis
  sk <- spssSkewKurtosis(x)
  # https://blog.az.sg/posts/map-and-walk/
  extra <- sk %>%
    pmap_dfr(function(...) {
      current <- tibble(...)
      current %>%
        mutate(z = current$estimate / current$se)
    }) %>%
    pivot_wider(names_from = .data$stat, values_from = c(.data$estimate, .data$se, .data$z))

  bind_cols(stats, extra) %>%
    round(2) %>%
    mutate(
      z_skew = cell_spec(.data$z_skew, 'html',
                         color = ifelse(abs(.data$z_skew) > 1.96, 'red', 'blue'), bold = T),
      z_kurtosis = cell_spec(.data$z_kurtosis, 'html', color = ifelse(abs(.data$z_kurtosis) > 1.96, 'red',
                                                                      'blue'), bold = T),
    ) %>%
    kable(escape = FALSE) %>%
    kable_styling(font_size = 9) # pander(split.table = Inf)
}

#' Skewness and kurtosis and their standard errors as implement by SPSS
#'
#' Skewness and kurtosis and their standard errors as implement by SPSS.
#'
#' @param x Data frame
#' @importFrom tibble add_row
#' @export
#' @return Data frame
#
#' Reference: pp 451-452 of
#' http://support.spss.com/ProductsExt/SPSS/Documentation/Manuals/16.0/SPSS 16.0 Algorithms.pdf
#
#' See also: Suggestion for Using Powerful and Informative Tests of Normality,
#' Ralph B. D'Agostino, Albert Belanger, Ralph B. D'Agostino, Jr.,
#' The American Statistician, Vol. 44, No. 4 (Nov., 1990), pp. 316-321
#'
#' This gives slightly different values for skew and kurtosis to
#' psych::describe(), which doesn't give SE of skew and kurtosis, which
#' you need to test for normality using a z-score
spssSkewKurtosis=function(x) {
  w          <- length(x)
  m1         <- mean(x)
  m2         <- sum((x-m1)^2)
  m3         <- sum((x-m1)^3)
  m4         <- sum((x-m1)^4)
  s1         <- sd(x)
  skew       <- w*m3/(w-1)/(w-2)/s1^3
  sdskew     <- sqrt( 6*w*(w-1) / ((w-2)*(w+1)*(w+3)) )
  kurtosis   <- (w*(w+1)*m4 - 3*m2^2*(w-1)) / ((w-1)*(w-2)*(w-3)*s1^4)
  sdkurtosis <- sqrt( 4*(w^2-1) * sdskew^2 / ((w-3)*(w+5)) )
  df         <- data.frame(stat = 'skew', estimate = skew, se = sdskew)
  df         <- add_row(df, stat = 'kurtosis', estimate = kurtosis, se = sdkurtosis)
  return(df)
}

### ANT

#' ANT descriptives
#'
#' ANT descriptives by score.
#'
#' @param scores Data frame
#' @importFrom dplyr arrange desc group_by mutate recode_factor summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom tidyr pivot_longer pivot_wider unite
#' @export
#' @return Data frame
#'
ant_descriptives <- function(scores) {
  df <- scores %>%
    group_by(.data$t, .data$group, .data$var) %>%
    summarise(mean_rt=sprintf("%.2f", round(mean(.data$rt), 2)),
              sd_rt=sprintf("%.2f", round(sd(.data$rt), 2))) %>%
    ungroup() %>%
    pivot_wider(names_from = var, values_from = c(.data$mean_rt, .data$sd_rt)) %>%
    unite('alerting', .data$mean_rt_alerting, .data$sd_rt_alerting, sep=' (') %>%
    unite('orienting', .data$mean_rt_orienting, .data$sd_rt_orienting, sep=' (') %>%
    unite('conflict', .data$mean_rt_conflict, .data$sd_rt_conflict, sep=' (') %>%
    mutate(alerting = paste0(.data$alerting, ')'), orienting = paste0(.data$orienting, ')'),
           conflict = paste0(.data$conflict, ')'))

  df$t <- recode_factor(df$t, `1` = 'Pre', `2` = 'Post')
  df %>%  pivot_longer(cols = c(.data$alerting, .data$orienting, .data$conflict),
                       names_to = 'score', values_to = 'value') %>%
    pivot_wider(names_from = c(t), values_from = .data$value) %>%
    arrange(desc(.data$group)) %>%
    mutate(score = stringr::str_to_title(.data$score))
}

#' ANT descriptives by cue type
#'
#' ANT descriptives by cue type.
#'
#' @param df Data frame
#' @importFrom dplyr arrange desc group_by mutate summarise
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom tidyr pivot_longer pivot_wider unite
#' @export
#' @return Data frame
#'
ant_descriptives_by_cue_type <- function(df) {
  df %>%
    group_by(.data$t, .data$group, .data$cue, .data$flanker_type) %>%
    summarise(mean_rt=round(mean(.data$rt)), sd_rt=round(sd(.data$rt))) %>%
    ungroup() %>%
    pivot_wider(names_from = .data$flanker_type, values_from = c(.data$mean_rt, .data$sd_rt)) %>%
    unite('congruent', .data$mean_rt_congruent, .data$sd_rt_congruent, sep=' (') %>%
    unite('incongruent', .data$mean_rt_incongruent, .data$sd_rt_incongruent, sep=' (') %>%
    unite('neutral', .data$mean_rt_neutral, .data$sd_rt_neutral, sep=' (') %>%
    mutate(congruent = paste0(.data$congruent, ')'), incongruent = paste0(.data$incongruent, ')'),
           neutral = paste0(.data$neutral, ')')) %>%
    pivot_longer(cols = c(.data$congruent, .data$incongruent, .data$neutral), names_to = 'target', values_to = 'value') %>%
    pivot_wider(names_from = c(.data$t, .data$cue), values_from = .data$value) %>%
    arrange(desc(.data$group)) %>%
    mutate(target = sub("(.)", "\\U\\1", .data$target, perl=TRUE))
}

#' Compute ANT scores for alerting, orienting and conflict
#'
#' Compute ANT scores for alerting, orienting and conflict.
#'
#' @param df Data frame
#' @importFrom dplyr group_by left_join mutate select
#' @importFrom forcats fct_relevel
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom tidyr pivot_longer pivot_wider unite
#' @export
#' @return Data frame
#'
ant_scores <- function(df) {
  alerting_orienting <- df %>%
    pivot_wider(id_cols = c(.data$p,.data$group,.data$t), names_from = .data$cue,
                values_from = .data$rt, values_fn = list(rt = mean)) %>%
    mutate(alerting = .data$nocue - .data$double, orienting = .data$center - .data$spatial) %>%
    select(.data$p, .data$group, .data$t, .data$alerting, .data$orienting)
  conflict <- df %>%
    pivot_wider(id_cols = c(.data$p,.data$group,.data$t),
                names_from = .data$flanker_type, values_from = .data$rt,
                values_fn = list(rt = mean)) %>%
    mutate(conflict = .data$incongruent - .data$congruent) %>%
    select(.data$p, .data$group, .data$t, .data$conflict)
  result <- left_join(alerting_orienting, conflict, by=c('p', 'group', 't')) %>%
    pivot_longer(cols = c(.data$alerting, .data$orienting, .data$conflict),
                  names_to = 'var', values_to = 'rt') %>%
    mutate(var = factor(var))
  # arrange for plot facets to be LtR: Alerting, Orienting, Conflict
  result$var <- fct_relevel(result$var, 'conflict', after = Inf)
  return(result)
}

#' Generate a funnel plot
#'
#' Generate a funnel plot.
#'
#' @param data data frame
#' @importFrom dplyr bind_rows filter mutate select
#' @importFrom ggplot2 geom_point geom_function geom_hline geom_rect xlab ylab scale_linetype_discrete scale_x_reverse scale_y_continuous coord_flip theme_bw
#' @importFrom rlang .data
#' @export
#' @return ggplot
#'
funnel_plot <- function(data) {
  estimate <- with(data, fitted_smd[Study == 'pooled'])
  meanll95 <- with(data, .lower[Study == 'pooled'])
  meanul95 <- with(data, .upper[Study == 'pooled'])
  se_max   <- with(data, se_max[Study == 'pooled'])
  se_seq   <- data.frame(se = seq(0, se_max, .001))

  data <- head(data, -1) %>% # remove pooled row
    rename(Group = group)

  p <- ggplot(aes(x = se, y = fitted_smd), data = data) +
    geom_point(shape = 19, aes(colour = Group)) +
    geom_rect(aes(ymin = meanll95, ymax = meanul95, xmin = 0, xmax = Inf, fill='95% credible interval'), colour=NA, alpha = .05) +
    scale_fill_manual('Pooled effect', values = 'gray', guide = guide_legend(override.aes = list(alpha = 1))) +
    geom_function(aes(linetype = '95%'), fun = function(x) { x * 1.96 + estimate }, data = se_seq, inherit.aes = FALSE) +
    geom_function(aes(linetype = '95%'), fun=function(x) { x * -1.96 + estimate }, data = se_seq, inherit.aes = FALSE) +
    geom_function(aes(linetype = '99%'), fun = function(x) { x * 3.29 + estimate }, data = se_seq, inherit.aes = FALSE) +
    geom_function(aes(linetype = '99%'), fun=function(x) { x * -3.29 + estimate }, data = se_seq, inherit.aes = FALSE) +
    xlab('Study Standard Error') + ylab('SMD') +
    scale_linetype_discrete(name = "Absence of bias and heterogeneity") +
    scale_x_reverse(limits = c(se_max, 0)) +
    scale_y_continuous(breaks=seq(-1.25, 2, 0.25)) +
    coord_flip() +
    geom_hline(yintercept = estimate, colour = 'blue') +
    theme_bw()
  return(p)
}

#' Generate a forest plot from a brms model
#'
#' Generate a forest plot from a brms model.
#'
#' @param rem brms model
#' @param xlab String
#' @importFrom dplyr bind_rows filter mutate select
#' @importFrom ggplot2 geom_density geom_text geom_vline ggplot ggsave theme_bw xlab
#' @importFrom ggridges geom_density_ridges
#' @importFrom magrittr %>%
#' @importFrom rlang .data
#' @importFrom stringr str_replace
#' @importFrom tidybayes geom_pointintervalh mean_qi spread_draws
#' @export
#' @return ggplot
#'
forest_bayes <- function(rem, xlab = 'Standardised Mean Difference') {
# Based on https://vuorre.netlify.com/post/2017/01/19/better-forest-plots-from-meta-analytic-models-estimated-with-brms/
# Don't use brmstools::forest() as brmstools is deprecated
# This is from https://github.com/mvuorre/brmstools

  # For an explanation of tidybayes::spread_draws(), refer to http://mjskay.github.io/tidybayes/articles/tidy-brms.html
  # Study-specific effects are deviations + average
  out_r <- spread_draws(rem, r_study[study, term], b_Intercept) %>%
    mutate(b_Intercept = r_study + b_Intercept) %>%
    mutate(study = str_replace_all(study, "\\.\\.", "."))

  # add study type for faceting
  # type <- studies %>%
  #   select(publication, type) %>%
  #   mutate(study = str_replace_all(publication, "[, ]", ".")) %>%
  #   mutate(study = str_replace_all(study, "\\.\\.", ".")) %>%
  #   select(-publication)

  #out_r <- left_join(out_r, type, by='study')

  # Average effect
  out_f <- spread_draws(rem, b_Intercept) %>%
    mutate(study = "Average")
  # Combine average and study-specific effects' data frames
  out_all <- bind_rows(out_r, out_f) %>%
    ungroup() %>%
    mutate(study = fct_relevel(study, "Average")) # put Average effect at bottom of the forest plot
  # Data frame of summary numbers
  out_all_sum <- group_by(out_all, study) %>% mean_qi(b_Intercept)
  #  out_all_sum <- left_join(out_all_sum, type, by='study')

  # tidy study names
  #out_r <- out_r %>% mutate(study = recode(study, !!!study.recode))
  #out_all_sum <- out_all_sum %>% mutate(study = recode(study, !!!study.recode))
  #  out_all <- out_all %>% mutate(study = recode(study, !!!study.recode))

  # FIXME: average by group for RCT and CT
  #  out_all_sum <- out_all_sum %>% filter(study != 'Average')
  #  out_all <- out_all %>% filter(study != 'Average')

  #> Warning: unnest() has a new interface. See ?unnest for details.
  #> Try `cols = c(.lower, .upper)`, with `mutate()` needed
  # Draw plot
  forest <- out_all %>%
    ggplot(aes(b_Intercept, study)) +
    geom_density_ridges(
      rel_min_height = 0.01,
      col = NA,
      scale = 1
    ) +
    geom_pointintervalh(
      data = out_all_sum, size = 1
    ) +
    geom_text(
      data = mutate_if(out_all_sum, is.numeric, round, 2),
      # Use glue package to combine strings
      aes(label = glue::glue("{b_Intercept} [{.lower}, {.upper}]"), x = Inf),
      hjust = "inward"
    ) +
    geom_vline(xintercept = 0) +
    #    facet_grid(type ~ ., scales = 'free') +
    xlab(xlab) +
    theme_bw()

  return(forest)
}
earcanal/psy.phd documentation built on Aug. 6, 2021, 2:12 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
earcanal/psy.phd
Things that might be useful for a PhD in psychology

R/psy.phd.R
In earcanal/psy.phd: Things that might be useful for a PhD in psychology

R Package Documentation

Browse R Packages

We want your feedback!

earcanal/psy.phd Things that might be useful for a PhD in psychology

R/psy.phd.R In earcanal/psy.phd: Things that might be useful for a PhD in psychology

R Package Documentation

Browse R Packages

We want your feedback!

earcanal/psy.phd
Things that might be useful for a PhD in psychology

R/psy.phd.R
In earcanal/psy.phd: Things that might be useful for a PhD in psychology