summary_se <- function(data, measurevar, idvar, betweenvars) {
require('dplyr')
require('lazyeval')
summarized = data %>%
group_by_(.dots=c(idvar, betweenvars)) %>%
dplyr::summarise_(.dots = list(
MeasureVar = interp( ~mean( MEASUREVAR, na.rm=TRUE), MEASUREVAR = as.name(measurevar) )
)) %>%
ungroup() %>%
group_by_(.dots=betweenvars) %>%
dplyr::summarise(N = n(),
Mean = mean(MeasureVar, na.rm=TRUE),
SD = sd(MeasureVar, na.rm=TRUE),
SE = SD / sqrt(N)
) %>%
ungroup()
colnames(summarized)[colnames(summarized)=='Mean'] = measurevar
return( summarized )
}
summary_se_within <- function(data, measurevar, idvar, withinvars= NULL, betweenvars= NULL) {
require('dplyr')
require('lazyeval')
# Really just between subjects?
if (is.null(withinvars)) {
return( summary_se(data, measurevar, idvar, betweenvars) )
}
# Warn about impossibility of errorbars in mixed designs:
if (!is.null(betweenvars)) {
warning('Error bars cannot be accurately represented in mixed designs. ',
'Treating each level of any between-subjects factors as separate experiment.')
}
# Collapse Multiple Observations in the Same Participant x Design Cell, Get Normed MeasureVar:
normed = collapse_and_norm(data, measurevar, idvar, withinvars, betweenvars)
# Get Correction factor:
num_within_groups = prod( apply(data[,withinvars,drop=FALSE], MARGIN = 2, FUN = function(x) length(unique(x))) )
correction_factor = sqrt( num_within_groups / (num_within_groups-1) )
# Get Means, SDs, Etc:
summarized = normed %>%
group_by_(.dots= c(betweenvars, withinvars) ) %>%
dplyr::summarise(N = n(),
Mean = mean(MeasureVar, na.rm= TRUE),
SD = sd(MeasureVarNormed, na.rm= TRUE),
SE = SD / sqrt( N ),
CI = SE * qt(.975, df = N-1)
) %>%
dplyr::mutate(SD = SD*correction_factor,
SE = SE*correction_factor,
CI = CI*correction_factor)
colnames(summarized)[colnames(summarized)=='Mean'] = measurevar
summarized
}
collapse_and_norm = function(data, measurevar, idvar, withinvars, betweenvars= NULL) {
require('dplyr')
require('lazyeval')
# Collapse Multiple Observations in the Same Participant x Design Cell :
collapsed = data %>%
group_by_(.dots= c(idvar, betweenvars, withinvars)) %>%
dplyr::summarise_(.dots = list(MeasureVar = interp( ~mean( MEASUREVAR,na.rm=TRUE), MEASUREVAR = as.name(measurevar) )
)) %>%
ungroup() %>%
group_by_(.dots= c(idvar, betweenvars) ) %>%
dplyr::mutate(SubMean = mean(MeasureVar, na.rm=TRUE),
Diff = MeasureVar - SubMean
) %>%
ungroup()
# Get Normed Data:
normed = collapsed %>%
dplyr::mutate(MeasureVarNormed = (MeasureVar-SubMean) + mean(MeasureVar, na.rm=TRUE) )
}
#' Get Within Standard Errors
#'
#' @param x data
#' @param variable specify which variable you want to calculate
#' @param WaveType which wave you want to compare (should be "Post" or "FollowUp")
withinSE <- function(x, variable, WaveType) {
# variable <- "C1"
SEdat <- x %>%
dplyr::filter(variable_code == variable)
var <- SEdat %>% distinct(variable_code) %>% pull(variable_code)
# if (WaveType == "Post" & var %in% c("C1", "C2", "C3")) {
#
# # SEdat <- SEdat %>% dplyr::filter(variable_code %nin% c("C1", "C2", "C3"))
#
# return(tibble(variable_code = variable))
#
# }
SEdat <- summary_se_within(data = SEdat,
measurevar = "Response",
withinvars = "Time",
idvar = "OMID") %>%
spread(Time, Response) %>%
mutate(variable_code = variable)
if (nrow(SEdat) == 2) {
if ("Post" %in% colnames(SEdat)) {
SEdat <- SEdat %>%
fill(Pre) %>%
fill(Post, .direction = "up")%>%
fill(Pre, .direction = "up") %>%
fill(Post) %>%
slice(1)
}
if ("FollowUp" %in% colnames(SEdat)) {
SEdat <- SEdat %>%
fill(Pre) %>%
fill(FollowUp, .direction = "up")%>%
fill(Pre, .direction = "up") %>%
fill(FollowUp) %>%
slice(1)
}
}
SEdat <- SEdat %>%
select(variable_code, everything())
return(SEdat)
}
#' Conducts t-tests and calculates Cohen's d
#'
#' This is lower-level function that belongs to om_summarize_comparisons. This function is not meant to be used outside of om_summarize_comparisons.
#'
#' @param x data
#' @param waves either PrePost or PreFollow
#' @param q14_q17 logical, Q14 and Q17 are coded seperately
#' @export
summarize_comparison <- function(x, waves, q14_q17 = F) {
## this function calculates ttests and cohens d
if (waves == "PrePost") {
WaveType <- "Post"
}
if (waves == "PreFollow") {
WaveType <- "FollowUp"
}
vars <- x %>% select(variable_code) %>% distinct() %>% pull()
# vars <- "C1"
if (any(c("C1", "C2", "C3") %in% vars) & WaveType == "Post") {
x <- x %>%
dplyr::filter(variable_code %nin% c("C1", "C2", "C3"))
vars <- x %>% select(variable_code) %>% distinct() %>% pull()
}
within_stats <- vars %>% map_dfr(~withinSE(x, variable = .x, WaveType = WaveType))
# withinSE(x, "Q18", WaveType)
# print(head(x %>% select(variable_code, Response)))
print(count(x, Time))
print(vars)
# print(x)
## This is one direction
if (q14_q17) {
final_dat <- x %>% dplyr::summarize(
cohend = abs(effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$estimate),
cohendCIlow = abs(effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$conf.int[1]),
cohendCIhi = abs(effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$conf.int[2]),
tstat = abs(t.test(Response~Time, paired=TRUE)$statistic),
pvalue = t.test(Response~Time, paired=TRUE)$p.value,
df = t.test(Response~Time, paired=TRUE)$parameter,
# ttests = list(broom::tidy(t.test(Response~Time, paired=TRUE, data = .))),
percentimproved = sum((Response[Time == "Pre"] > Response[Time == WaveType])==TRUE)/(df+1)
) %>%
left_join(within_stats)
return(final_dat)
}
## This is the other direction
if (magrittr::not(q14_q17)) {
final_dat <- x %>% dplyr::summarize(
cohend = abs(effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$estimate),
cohendCIlow = abs(effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$conf.int[1]),
cohendCIhi = abs(effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$conf.int[2]),
tstat = abs(t.test(Response~Time, paired = TRUE)$statistic),
pvalue = t.test(Response~Time, paired = TRUE)$p.value,
df = t.test(Response~Time, paired = TRUE)$parameter,
# ttests = list(broom::tidy(t.test(Response~Time, paired=TRUE, data = .))),
percentimproved = sum((Response[Time == "Pre"] < Response[Time == WaveType]) == TRUE)/(df+1)
) %>%
left_join(within_stats)
return(final_dat)
}
}
#' Conducts t-tests and calculates Cohen's d for Q14 and Q17 seperately
#'
#' This is lower-level function that belongs to om_summarize_comparisons. This function is not meant to be used outside of om_summarize_comparisons.
#'
#' @param .data dataset
#' @param waves which wave (passed to summarize_comparison)
#' @export
bind_questions <- function(.data, waves) {
# .data <- moderate_dat_prePostfollow
# ## Just Q14 and Q17
# x <- .data %>%
# dplyr::filter(variable_code %nin% c("Q14", "Q17")) %>%
# dplyr::group_by(variable_code) #%>%
# summarize_comparison(#...,
# q14_q17 = F),
##IMPORTANT: This function is needed because Q14 and Q17 are coded in different directions.
dplyr::bind_rows(
## All variables that are not Q14 or Q17
.data %>%
dplyr::filter(variable_code %nin% c("Q14", "Q17")) %>%
dplyr::group_by(variable_code) %>%
summarize_comparison(waves = waves,
q14_q17 = F),
## Just Q14 and Q17
.data %>%
dplyr::filter(variable_code %in% c("Q14", "Q17")) %>%
dplyr::group_by(variable_code) %>%
summarize_comparison(waves = waves,
q14_q17 = T)
)
}
#' Run linear regression model (with interactions)
#'
#' This function performs linear regression and gives back some neat info
#' @param .data data
#' @param lin_mod a fitted model
#' @param type what kind of model (currently only accepts \code{"int"} for interactions)
#' @param switch logical. Switch variables in interaction plot. Default is \code{FALSE}
#' @param nudge_y Decide the absolute value by which text label should be nudged (defaults to -0.5)
#' @export
om_lm <- function(.data,
lin_mod,
type = "int",
switch = F,
nudge_y = -0.5) {
skip_report <- F
if(!check_for_pkg("report")){
message('You are missing the "report" package which returns neat interprations of the models. If you want to see the report output please install this package from GitHub: devtools::install_github("easystats/report")')
skip_report <- T
}
if(type == "int"){
interactions <- lin_mod %>%
insight::find_interactions()
if(is.null(interactions)){
stop("Model does not include an interaction.")
}
int_vars <- interactions$conditional %>%
stringr::str_split(":") %>%
unlist()
if(length(int_vars) > 2) {
stop(("Threeway interactions not supported yet."))
}
## setting default values for int plot
V1 <- 1
V2 <- 2
if(switch == T){
int_vars <- rev(int_vars)
V1 <- 2
V2 <- 1
}
}
var_types <- .data %>%
dplyr::select(int_vars[V1], int_vars[V2]) %>%
purrr::map_dfr(class) %>%
tidyr::gather(var, class_type)
any_numeric <- any(var_types %>% dplyr::pull(class_type) %in% c("numeric"))
all_numeric <- all(var_types %>% dplyr::pull(class_type) %in% c("numeric"))
## if there is only one numeric variable
if(any_numeric & !all_numeric){
## which is the numeric variable?
num_var <- var_types %>%
dplyr::filter(class_type == "numeric") %>%
dplyr::pull(var)
num_position <- which(var_types[,2]=="numeric")
## estimate response
means_dat <- lin_mod %>%
modelbased::estimate_link(length=3,
numerics = "combination",
standardize = TRUE) %>%
dplyr::rename(col = tidyselect::all_of(num_position)) %>%
dplyr::rename(Mean = Predicted)
## get factors
groupings <- means_dat %>%
dplyr::group_indices(col)
## rename variable labels
means_dat <- means_dat %>%
dplyr::mutate(lab = groupings) %>%
dplyr::mutate(lab = dplyr::case_when(
lab == 1 ~ "-1 SD",
lab == 2 ~ "Mean",
lab == 3 ~ "+1 SD"
)) %>%
dplyr::mutate_at(dplyr::vars(col), ~facet_labs({{num_var}}, lab, .x))
factor_order <- means_dat %>%
dplyr::pull(col) %>%
unique()
## sort dataset
means_dat <- means_dat %>%
dplyr::mutate(col = forcats::fct_relevel(col, factor_order)) %>%
dplyr::select(col1 = tidyselect::all_of(V1),
col2 = tidyselect::all_of(V2),
dplyr::everything())
}
if(all_numeric){
# stop("Data all numeric. Not supported yet")
means_dat <- modelbased::estimate_link(lin_mod,
length=3,
numerics = "combination",
standardize = TRUE) %>%
dplyr::select(col1 = tidyselect::all_of(V1),
col2 = tidyselect::all_of(V2),
dplyr::everything()) %>%
dplyr::rename(Mean = Predicted)
groupings1 <- means_dat %>%
dplyr::group_indices(col1)
groupings2 <- means_dat %>%
dplyr::group_indices(col2)
means_dat <- means_dat %>%
dplyr::mutate(lab1 = groupings1) %>%
dplyr::mutate(lab1 = dplyr::case_when(
lab1 == 1 ~ "-1 SD",
lab1 == 2 ~ "Mean",
lab1 == 3 ~ "+1 SD"
)) %>%
dplyr::mutate_at(dplyr::vars(col1), ~facet_labs(int_vars[1], lab1, .x)) %>%
dplyr::mutate(lab2 = groupings2) %>%
dplyr::mutate(lab2 = dplyr::case_when(
lab2 == 1 ~ "-1 SD",
lab2 == 2 ~ "Mean",
lab2 == 3 ~ "+1 SD"
)) %>%
dplyr::mutate_at(dplyr::vars(col2), ~facet_labs(int_vars[2], lab2, .x))
factor_order1 <- means_dat %>%
dplyr::pull(col1) %>%
unique()
factor_order2 <- means_dat %>%
dplyr::pull(col2) %>%
unique()
means_dat <- means_dat %>%
dplyr::mutate(col1 = forcats::fct_relevel(col1, factor_order1)) %>%
dplyr::mutate(col2 = forcats::fct_relevel(col2, factor_order2))
}
if(!any_numeric){
means_dat <- lin_mod %>%
modelbased::estimate_means(data = .data) %>%
tibble::as_tibble() %>%
dplyr::select(col1 = tidyselect::all_of(V1),
col2 = tidyselect::all_of(V2),
dplyr::everything())
}
int_plot <- means_dat %>%
dplyr::select(Var1 = tidyselect::all_of(1),
Var2 = tidyselect::all_of(2),
tidyselect::everything()) %>%
dplyr::mutate(mean_lab = specify_decimal(Mean, 2)) %>%
ggplot2::ggplot(ggplot2::aes(Var1, Mean, fill = Var1)) +
ggplot2::geom_col() +
ggplot2::geom_errorbar(ggplot2::aes(ymin = CI_low,
ymax = CI_high),
width = 0.3) +
ggplot2::facet_wrap(~Var2) +
ggplot2::labs(x = int_vars[1], y = "Average Response") +
ggplot2::geom_text(ggplot2::aes(label = mean_lab),
nudge_y = nudge_y,
color = "white") +
scale_fill_om(name = int_vars[2]) +
ggplot2::theme(legend.position = "none")
means_dat_names <- colnames(means_dat)
means_dat_names[1] <- int_vars[1]
means_dat_names[2] <- int_vars[2]
results <- list(
model = lin_mod,
table = texreg::screenreg(lin_mod),
estimated_means = means_dat %>% purrr::set_names(means_dat_names),
plot = int_plot
)
if(!skip_report){
## report
report_text <- lin_mod %>%
report::report() %>%
report::text_long()
results <- c(results, report = report_text)
}
return(results)
}
#' @export
facet_labs <- function(varname, lab, varvalue) {
glue::glue("{varname} {lab} ({specify_decimal(varvalue, 2)})")
}
#' A pipable if statement
#'
#' This function allows to create an if statement that can be used within a pipable workflow
#'
#' @importFrom magrittr %>%
#' @importFrom rlang parse_expr
#' @param .data tibble
#' @param condition logical test
#' @param call a formula descibing a pipe to be evaluated if condition is \code{code}
#' @examples
#' any_condition <- TRUE
#'
#' mtcars %>%
#' do_if(any_condition, ~{
#' .x %>%
#' dplyr::filter(cyl == 6) %>%
#' dplyr::mutate(x = disp > 170)
#' })
#' @export
do_if <- function(.data, condition, call){
if(condition){
.x <- .data
call_str <- call %>%
as.character %>%
.[2]
out <- eval(rlang::parse_expr(call_str))
return(out)
} else {
return(.data)
}
}
#' Run mixed models (with interactions)
#'
#' This function performs a mixed model and gives back some neat info
#' @param mod a fitted model
#' @param type what kind of model (currently only accepts \code{"int"} for interactions)
#' @param mod_transform Indicates which values of the moderator variable should be
#' used when plotting interaction terms (i.e. \code{type = "int"}). \describe{
#' \item{\code{"minmax"}}{(default) minimum and maximum values (lower and
#' upper bounds) of the moderator are used to plot the interaction between
#' independent variable and moderator(s).} \item{\code{"meansd"}}{uses the
#' mean value of the moderator as well as one standard deviation below and
#' above mean value to plot the effect of the moderator on the independent
#' variable (following the convention suggested by Cohen and Cohen and
#' popularized by Aiken and West (1991), i.e. using the mean, the value one
#' standard deviation above, and the value one standard deviation below the
#' mean as values of the moderator, see
#' \href{http://www.theanalysisfactor.com/3-tips-interpreting-moderation/}{Grace-Martin
#' K: 3 Tips to Make Interpreting Moderation Effects Easier}).}
#' \item{\code{"zeromax"}}{is similar to the \code{"minmax"} option, however,
#' \code{0} is always used as minimum value for the moderator. This may be
#' useful for predictors that don't have an empirical zero-value, but absence
#' of moderation should be simulated by using 0 as minimum.}
#' \item{\code{"quart"}}{calculates and uses the quartiles (lower, median and
#' upper) of the moderator value.} \item{\code{"all"}}{uses all values of the
#' moderator variable.} }
#' @export
om_lmer <- function(mod, type = "int", mod_transform = "minmax"){
skip_report <- F
if(!check_for_pkg("report")){
message('You are missing the "report" package which returns neat interprations of the models. If you want to see the report output please install this package from GitHub: devtools::install_github("easystats/report")')
skip_report <- T
}
if(type == "int"){
interactions <- mod %>%
insight::find_interactions()
if(is.null(interactions)){
stop("Model does not include an interaction.")
}
int_vars <- interactions$conditional %>%
stringr::str_split(":") %>%
unlist()
if(length(int_vars) > 2) {
stop(("Threeway interactions not supported yet."))
}
}
means_dat <- sjPlot::get_model_data(mod,
type = "int", mdrt.values = mod_transform) %>%
tibble::as_tibble() %>%
dplyr::rename(Time = x)
int_plot <- sjPlot::plot_model(mod,
type = "int", mdrt.values = mod_transform,
title = paste0("IVs: ", knitr::combine_words(int_vars)), colors = pal_om(7))
results <- list(
model = mod,
table = texreg::screenreg(mod),
estimated_means = means_dat,
plot = int_plot
)
if(!skip_report){
## report
report_text <- mod %>%
report::report() %>%
report::text_long()
results <- c(results, report = report_text)
}
return(results)
}
#' Conduct t-tests and calculate Cohen's d
#'
#' This is lower-level function that belongs to om_summarize_comparisons. This function is not meant to be used outside of om_summarize_comparisons.
#' @param gathered_dat Assessment data as long format
#' @param compare With the `compare` argument you can specify either \code{"PrePost"}, \code{"PreFollow"} or both \code{c("PrePost", "PreFollow")} comparisons (the latter is the default).
#' @export
om_compare <- function(gathered_dat, compare = c("PrePost", "PreFollow", "PrePostFollow")) {
# gathered_dat <- n3v4long
# compare = c("PrePost", "PreFollow", "PrePostFollow")
if ("PrePostFollow" %in% compare) {
## PrePost Data
compare_dat_prepostfollow <- gathered_dat %>%
dplyr::filter(Time %in% c("Pre", "Post", "FollowUp")) %>%
tidyr::drop_na(Response) %>%
# dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post", "FollowUp"))) %>%
## count OMIDs and PrePost Time
dplyr::add_count(OMID, variable_code) %>%
## only keep cases where Pre, Post and FollowUp exist
dplyr::filter(n == 3)
# debugonce(withinSE)
## Calculate Scores for all data
moderate_dat_prePostfollow <- compare_dat_prepostfollow %>%
## remove vars that depend on moderates being excluded (Q20 only for AV4)
dplyr::filter(variable_code %nin% c("Q15", "Q16", "Q17", "Q20", "C1", "C2", "C3") | (variable_code == "Q20" & AssessmentVersion == 4)) %>%
dplyr::filter(Time %in% c("Pre", "Post")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post"))) %>%
## PrePost
bind_questions(waves = "PrePost") %>%
dplyr::mutate(Comparison = "PrePostFollowT1T2") %>%
## add indicator
dplyr::mutate(moderates = "WithModerates") %>%
## Deal with Culture Vars
dplyr::filter(!is.nan(cohend)) %>%
dplyr::mutate(moderates = ifelse(variable_code %in% c("C1", "C2", "C3"), "CultureVars", moderates))
## Calculate scores where Moderates need to be excluded
no_moderate_dat_prePostfollow <- compare_dat_prepostfollow %>%
## only include vars that depend on moderates being excluded (Q20 only for AV5+)
dplyr::filter(variable_code %in% c("Q15", "Q16", "Q17") | (variable_code == "Q20" & AssessmentVersion >= 5)) %>%
dplyr::filter(Time %in% c("Pre", "Post")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post"))) %>%
tidyr::drop_na(ppol_cat) %>%
## PrePost
bind_questions(waves = "PrePost") %>%
dplyr::mutate(Comparison = "PrePostFollowT1T2") %>%
## add indicator
dplyr::mutate(moderates = "WithoutModerates")
## Calculate Scores for all data
moderate_dat_prepostFollow <- compare_dat_prepostfollow %>%
## remove vars that depend on moderates being excluded (Q20 only for AV4)
dplyr::filter(variable_code %nin% c("Q15", "Q16", "Q17", "Q20", "C1", "C2", "C3") | (variable_code == "Q20" & AssessmentVersion == 4)) %>%
dplyr::filter(Time %in% c("Pre", "FollowUp")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "FollowUp"))) %>%
## PreFollow
bind_questions(waves = "PreFollow") %>%
dplyr::mutate(Comparison = "PrePostFollowT1T3") %>%
## add indicator
dplyr::mutate(moderates = "WithModerates") %>%
## Deal with Culture Vars
dplyr::filter(!is.nan(cohend)) %>%
dplyr::mutate(moderates = ifelse(variable_code %in% c("C1", "C2", "C3"), "CultureVars", moderates))
## Calculate scores where Moderates need to be excluded
no_moderate_dat_prepostFollow <- compare_dat_prepostfollow %>%
dplyr::filter(Time %in% c("Pre", "FollowUp")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "FollowUp"))) %>%
## only include vars that depend on moderates being excluded (Q20 only for AV5+)
dplyr::filter(variable_code %in% c("Q15", "Q16", "Q17") | (variable_code == "Q20" & AssessmentVersion >= 5)) %>%
tidyr::drop_na(ppol_cat) %>%
## PreFollow
bind_questions(waves = "PreFollow") %>%
dplyr::mutate(Comparison = "PrePostFollowT1T3") %>%
## add indicator
dplyr::mutate(moderates = "WithoutModerates")
## Calculate scores for C1, C2 and C3
culture_vars <- gathered_dat %>%
dplyr::filter(Time %in% c("Pre", "FollowUp")) %>%
dplyr::filter(variable_code %in% c("C1", "C2", "C3")) %>%
tidyr::drop_na(Response) %>%
# dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post", "FollowUp"))) %>%
## count OMIDs and PrePost Time
dplyr::add_count(OMID, variable_code) %>%
## only keep cases where Pre and Post exist
dplyr::filter(n == 2) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "FollowUp"))) %>%
dplyr::group_by(variable_code) %>%
## PreFollow
summarize_comparison(waves = "PreFollow",
q14_q17 = F) %>%
dplyr::mutate(Comparison = "PrePostFollowT1T3") %>%
## add indicator
dplyr::mutate(moderates = "CultureVars") %>%
## Deal with Culture Vars
dplyr::filter(!is.nan(cohend))
final_compared_prePostfollow <- moderate_dat_prePostfollow %>% dplyr::bind_rows(no_moderate_dat_prePostfollow)
final_compared_prepostFollow <- moderate_dat_prepostFollow %>% dplyr::bind_rows(no_moderate_dat_prepostFollow) %>% dplyr::bind_rows(culture_vars)
final_compared_prepostfollow <- dplyr::bind_rows(final_compared_prePostfollow, final_compared_prepostFollow)
}
## this is the higher level function which
## brings together bind_questions and summarize_comparison
if ("PrePost" %in% compare) {
## PrePost Data
compare_dat_prepost <- gathered_dat %>%
dplyr::filter(Time %in% c("Pre", "Post")) %>%
tidyr::drop_na(Response) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post"))) %>%
## count OMIDs and PrePost Time
dplyr::add_count(OMID, variable_code) %>%
## only keep cases where Pre and Post exist
dplyr::filter(n == 2)
## Calculate Scores for all data
moderate_dat_prepost <- compare_dat_prepost %>%
## remove vars that depend on moderates being excluded (Q20 only for AV4)
dplyr::filter(variable_code %nin% c("Q15", "Q16", "Q17", "Q20", "C1", "C2", "C3") | (variable_code == "Q20" & AssessmentVersion == 4)) %>%
## PrePost
bind_questions(waves = "PrePost") %>%
dplyr::mutate(Comparison = "PrePost") %>%
## add indicator
dplyr::mutate(moderates = "WithModerates") %>%
## Deal with Culture Vars
dplyr::filter(!is.nan(cohend)) %>%
dplyr::mutate(moderates = ifelse(variable_code %in% c("C1", "C2", "C3"), "CultureVars", moderates))
## Calculate scores where Moderates need to be excluded
no_moderate_dat_prepost <- compare_dat_prepost %>%
## only include vars that depend on moderates being excluded (Q20 only for AV5+)
dplyr::filter(variable_code %in% c("Q15", "Q16", "Q17") | (variable_code == "Q20" & AssessmentVersion >= 5)) %>%
tidyr::drop_na(ppol_cat) %>%
## PrePost
bind_questions(waves = "PrePost") %>%
dplyr::mutate(Comparison = "PrePost") %>%
## add indicator
dplyr::mutate(moderates = "WithoutModerates")
final_compared_prepost <- moderate_dat_prepost %>% dplyr::bind_rows(no_moderate_dat_prepost)
}
if ("PreFollow" %in% compare) {
## PreFollow Data
compare_dat_prefollow <- gathered_dat %>%
dplyr::filter(Time %in% c("Pre", "FollowUp")) %>%
tidyr::drop_na(Response) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "FollowUp"))) %>%
## count OMIDs and PreFollow Time
dplyr::add_count(OMID, variable_code) %>%
## only keep cases where Pre and Post exist
dplyr::filter(n == 2)
## Calculate Scores for all data
moderate_dat_prefollow <- compare_dat_prefollow %>%
## remove vars that depend on moderates being excluded (Q20 only for AV4)
dplyr::filter(variable_code %nin% c("Q15", "Q16", "Q17", "Q20", "C1", "C2", "C3") | (variable_code == "Q20" & AssessmentVersion == 4)) %>%
## PreFollow
bind_questions(waves = "PreFollow") %>%
dplyr::mutate(Comparison = "PreFollow") %>%
## add indicator
dplyr::mutate(moderates = "WithModerates") %>%
## Deal with Culture Vars
dplyr::filter(!is.nan(cohend)) %>%
dplyr::mutate(moderates = ifelse(variable_code %in% c("C1", "C2", "C3"), "CultureVars", moderates))
## Calculate scores where Moderates need to be excluded
no_moderate_dat_prefollow <- compare_dat_prefollow %>%
## only include vars that depend on moderates being excluded (Q20 only for AV5+)
dplyr::filter(variable_code %in% c("Q15", "Q16", "Q17") | (variable_code == "Q20" & AssessmentVersion >= 5)) %>%
tidyr::drop_na(ppol_cat) %>%
## PreFollow
bind_questions(waves = "PreFollow") %>%
dplyr::mutate(Comparison = "PreFollow") %>%
## add indicator
dplyr::mutate(moderates = "WithoutModerates")
final_compared_prefollow <- moderate_dat_prefollow %>% dplyr::bind_rows(no_moderate_dat_prefollow)
}
## if both PrePost and PreFollow are given
if (all(c("PrePost" ,"PreFollow", "PrePostFollow") %in% compare)) {
final_compared <- final_compared_prepost %>%
dplyr::bind_rows(final_compared_prefollow) %>%
dplyr::bind_rows(final_compared_prepostfollow)
}
if (all("PrePost" == compare)) final_compared <- final_compared_prepost
if (all("PreFollow" == compare)) final_compared <- final_compared_prefollow
if (all("PrePostFollow" == compare)) final_compared <- final_compared_prepostfollow
final_compared <- final_compared %>%
dplyr::mutate(cohendCIlow2 = ifelse(cohendCIhi < cohendCIlow, cohendCIhi, cohendCIlow)) %>%
dplyr::mutate(cohendCIhi2 = ifelse(cohendCIhi < cohendCIlow, cohendCIlow, cohendCIhi)) %>%
dplyr::select(-cohendCIlow, -cohendCIhi) %>%
dplyr::rename(cohendCIlow = cohendCIlow2,
cohendCIhi = cohendCIhi2)
return(final_compared)
}
#' Conduct t-tests and calculate Cohen's d
#'
#' This is a higher-level function that uses "om_compare", "bind_questions" and "summarize_comparison" to calculate t-tests and Cohen's d on Assessment data.
#' @param gathered_dat Assessment data as long format
#' @param compare With the `compare` argument you can specify either \code{"PrePost"}, \code{"PreFollow"} or both \code{c("PrePost", "PreFollow")} comparisons (the latter is the default).
#' @param aversion AssessmentVersion should be one of \code{"V4"}, \code{"V5/V5.1"} or \code{"All"}
#' @export
om_summarize_comparisons <- function(gathered_dat, aversion = "All", compare = c("PrePost", "PreFollow", "PrePostFollow")) {
# Variant <- "V5"
# aversion <- "V4"
gathered_dat <- gathered_dat %>%
do_if(.data = .,
condition = aversion == "V4",
call = ~{
.x %>%
dplyr::filter(AssessmentVersion == 4)
}
) %>%
do_if(.data = .,
condition = aversion == "V5/V5.1",
call = ~{
.x %>%
dplyr::filter(AssessmentVersion >= 5)
}
)
basicsummarystats <- gathered_dat %>%
om_compare(compare) %>%
do_if(.data = .,
condition = aversion == "V5/V5.1",
call = ~{
.x %>%
dplyr::left_join(assessmentv5_codebook %>%
dplyr::rename(variable_code = Mapping)) %>%
dplyr::mutate(Variant = "V5/V5.1")
}
) %>%
do_if(.data = .,
condition = aversion == "V4",
call = ~{
.x %>%
dplyr::left_join(assessmentv4_codebook %>%
dplyr::rename(variable_code = Mapping)) %>%
dplyr::mutate(Variant = "V4")
}
) %>%
do_if(.data = .,
condition = aversion == "All",
call = ~{
.x %>%
dplyr::left_join(assessmentv5_codebook %>%
dplyr::rename(variable_code = Mapping)) %>%
dplyr::mutate(Variant = "All")
}
) %>%
dplyr::rename(Question_txt = Content) %>%
dplyr::mutate(Question_txt = dplyr::case_when(
variable_code == "Q14" ~ "Affective Polarization",
variable_code == "Q15" ~ "Liking for Ingroup",
variable_code == "Q16" ~ "Liking for Outgroup",
variable_code == "Q17" ~ "Ingroup-Outgroup Polarization",
variable_code == "Q18" ~ "Intellectual Humility",
variable_code == "Q19" ~ "Perspective-Taking",
variable_code == "Q20" ~ "Social Closeness",
T ~ Question_txt
)) %>%
dplyr::mutate(Outcome = dplyr::case_when(
variable_code == "Q14" ~ 'Affective Polarization Measure',
variable_code == "Q15" ~ 'Liking of Ingroup',
variable_code == "Q16" ~ 'Liking of Outgroup',
variable_code == "Q17" ~ 'Ingroup-Outgroup Polarization Measure',
variable_code == "Q18" ~ 'Intellectual Humility Measure',
variable_code == "Q19" ~ "Perspective-Taking",
variable_code == "Q20" ~ "Social Closeness",
T ~ Construct
)) %>%
dplyr::select(Outcome, Question_txt, cohend:percentimproved, variable_code,
N, sd = SD, se = SE, ci = CI, Pre, Post, FollowUp, Comparison, moderates, Variant) %>%
tidyr::drop_na(Outcome)
return(basicsummarystats)
}
#' Prepare paired data for plot with within subject error term
#'
#' This function calculates several measures for plotting
#' @param gathered_dat Assessment data as long format
#' @param aversion AssessmentVersion should be one of \code{"V4"}, \code{"V5/V5.1"} or \code{"All"}
#' @export
om_label_stats <- function(gathered_dat, aversion = "All") {
gathered_dat <- gathered_dat %>%
openmindR::do_if(.data = .,
condition = aversion == "V4",
call = ~{
.x %>%
dplyr::filter(AssessmentVersion == 4)
}
) %>%
openmindR::do_if(.data = .,
condition = aversion == "V5/V5.1",
call = ~{
.x %>%
dplyr::filter(AssessmentVersion >= 5)
}
)
plot_dat <- dplyr::bind_rows(
openmindR::q_strings_seps %>%
purrr::map_dfr(~summary_se_within(subset(gathered_dat, variable_code == .x),
measurevar = "Response",
withinvars = "Time",
idvar = "OMID", na.rm = T) %>%
dplyr::mutate(variable_code = .x)) %>%
dplyr::mutate(Variant = aversion) ,
openmindR::c_strings_seps %>%
purrr::map_dfr(~summary_se_within(subset(gathered_dat, variable_code == .x),
measurevar = "Response",
withinvars = "Time",
idvar = "OMID", na.rm = T) %>%
dplyr::mutate(variable_code = .x)) %>%
mutate(Variant = aversion) ,
c("Q15", "Q16", "Q17") %>%
purrr::map_dfr(~summary_se_within(subset(gathered_dat %>% tidyr::drop_na(ppol_cat), variable_code == .x),
measurevar = "Response",
withinvars = "Time",
idvar = "OMID", na.rm = T) %>%
dplyr::mutate(variable_code = .x)) %>%
dplyr::mutate(Variant = aversion)
)
final_dat <- plot_dat %>% dplyr::as_tibble()
return(final_dat)
}
#' Run mixed effects model
#'
#' This function performs mixed models (Currently only works on Ann Miller experimental data)
#' @param gathered_dat Assessment data as long format
#' @param question Specify question that you want to perform analysis for (ex: \code{"Q18"})
#' @param plot_model logical. Show a coefficient plot of the model. Default is \code{FALSE}
#' @param get_effects logical. Get marginal effects. Default is \code{FALSE}
#' @param get_tidy logical. Get a tidy dataframe with estimates. Also calculates pseudo cohen's d efecct sizes. Default is \code{FALSE}
#' @export
om_mix_models <- function(gathered_dat, question, plot_model = F, get_effects = F, get_tidy = F) {
##todo:this function needs examples in the documentation
## where do we specify what the between subjects variable being tested in the model is?
## for ann miller, it's "condition"... does this function simply look for that variable?
## need to make this more general/robust to other experiments#
# question <- "Q18"
# is.data.frame(mods_dat2)
## some data wrangling
mods_dat <- gathered_dat %>%
dplyr::filter(variable_code == question) %>%
dplyr::filter(Time %in% c("Pre", "Post")) %>%
dplyr::mutate(Condition = as.factor(Condition)) %>%
dplyr::mutate(Time = as.factor(Time)) %>%
dplyr::mutate(OMID = as.factor(OMID)) %>%
tidyr::drop_na(Response) %>%
dplyr::add_count(OMID) %>%
dplyr::filter(n == 2) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post"))) %>%
base::as.data.frame()
## Assign to a symbol that's unlikely to be in use in .GlobalEnv
## (and make sure this function cleans up after itself!)
assign(".TeMpVaR", mods_dat, envir = globalenv())
on.exit(rm(.TeMpVaR, envir = globalenv()))
## getting unique IDs we will need this for calculating pseudo cohens d
individs <- mods_dat %>% dplyr::distinct(OMID) %>% nrow
## Run model
lme_dat <- lme4::lmer(Response ~ Condition*Time +(1+Condition|OMID) + (1+Time|OMID),
data = .TeMpVaR, REML = F,
control = lme4::lmerControl(check.nobs.vs.nRE = "ignore"))
final <- list(lme_dat = lme_dat)
if (plot_model) {
## get coefficient plot for model
ggmod <- lme_dat %>%
sjPlot::plot_model(Time = "std", show.p = T, show.values = T)# +
# ggplot2::theme_minimal()
final <- rlist::list.append(final, ggmod = ggmod)
}
if (get_effects) {
### This tells R to give the AP estimate at levels of Time and Ideology that we
### specify. In this example we want an estimate of AP for each level of time
### (Pre, Post, Follow-up) plotted at 1 sd below (liberal) and above
### (conservative) the midpoint (4) for ideology. It uses the info from mod1
### above to adjust for the within subjects variance we're throwing out.
effects_dat <- ggeffects::ggeffect(lme_dat, c("Condition", "Time" ),
x.as.factor = T,
ci.lvl = .95,
typical = "mean") %>%
### Now we turn both of these into factors to make plots easier (i.e. add labels
### and make sure time is in the right order, not graphed alphabetically)
openmindR::do_if(.data = .,
condition = "Article" %in% unique(mods_dat$Condition),
call = ~{.x %>% dplyr::mutate(Condition = factor(x, levels = c("Article","OpenMind")))}
) %>%
openmindR::do_if(.data = .,
condition = "Delayed Treatment" %in% unique(mods_dat$Condition),
call = ~{.x %>% dplyr::mutate(Condition = factor(x, levels = c("Experimental Treatment", "Delayed Treatment")))}
) %>%
dplyr::mutate(Time = factor(group, levels = c("Pre", "Post")))
final <- rlist::list.append(final, effects_dat = effects_dat)
}
if (get_tidy) {
## get tidy dataframe
coefs <- broom::tidy(lme_dat) %>%
dplyr::filter(group == "fixed") %>%
dplyr::mutate(n_coef = nrow(.)) %>%
dplyr::mutate(n_dat = individs) %>%
## calculate pseudo cohens d
# todo: add note in output that it is a pseudo-d
dplyr::mutate(d = abs(estimate/(sqrt(n_dat - n_coef)*std.error)))
final <- rlist::list.append(final, tidy_dat = coefs)
}
return(final)
}
#' Plot mixed effects model
#'
#' This function plots the results of mixed models (Currently only works on Ann Miller experimental data)
#' @param effects_dat \code{effects_dat} is a dataset produced by \code{\link{om_mix_models}} and supplies the marginal effects of the model
#' @param tidy_dat \code{tidy_dat} is a dataset produced by \code{\link{om_mix_models}} and supplies the pseudo cohen's d for plotting
#' @param var_label supply a character that is plotted as title and y-axis
#' @param show_stats Show statistics on the bottom right. Only possible if you supply \code{tidy_dat}
#' @export
om_mix_plot <- function(effects_dat, tidy_dat = NULL, var_label, show_stats = T) {
### Here's a pretty graph of effects by time period with the proper within subjects error bars/estimates
# effects_dat <- gm.s$effects_dat
# tidy_dat <- gm.s$tidy_dat
# var_label <- "Intellctual Humility"
ggmod <-
effects_dat %>%
ggplot2::ggplot(ggplot2::aes(x = Time, y = predicted, fill = Condition))+
ggplot2::geom_bar(position = ggplot2::position_dodge(),
stat = "identity",
colour = "black", # Use black outlines,
size = .3,show.legend=TRUE) + # Thinner lines
ggplot2::geom_errorbar(ggplot2::aes(ymin = predicted - std.error, ymax = predicted + std.error),
size=.7, # Thinner lines
width=.2,
position=ggplot2::position_dodge(.9)) +
ggplot2::scale_fill_manual("Condition", values = c("#3d4fa1", "#65c6c3")) +
ggplot2::ylab(var_label)+
ggplot2::xlab("Time Point") +
ggplot2::coord_cartesian(ylim=c(0, 1))+
ggplot2::ggtitle(var_label) +
ggplot2::scale_y_continuous(expand = c(0, 0),limits = c(0,1)) +
ggplot2::theme(panel.background = ggplot2::element_rect(fill="white"),
text = ggplot2::element_text(family="Poppins",size=20),
legend.text = ggplot2::element_text(size=12),
legend.position = c(.08, .81),
#legend.title=element_blank(),
plot.title = ggplot2::element_text(hjust = 0.5,size=28),
axis.line.x = ggplot2::element_line(),
axis.text.x = ggplot2::element_text(hjust=0.5,size=24),
axis.title.x = ggplot2::element_text(hjust=0.5, size=24),
axis.line.y = ggplot2::element_line(),
axis.text.y = ggplot2::element_text(hjust=0.5,size=24),
axis.title.y = ggplot2::element_text(hjust=0.5, size=24),
legend.margin = ggplot2::margin(0, 0, 0, 0))
if (show_stats) {
label_dat <- tidy_dat %>%
dplyr::select(-group) %>%
dplyr::mutate_at(dplyr::vars(estimate, std.error, d), ~openmindR::specify_decimal(.x, 3)) %>%
#todo: add subscript p to d because it is a pseudo-d, not actual d
dplyr::mutate(cite_stats = stringr::str_glue("B = {estimate}, SE = {std.error}, d = {d}")) %>%
openmindR::do_if(.data = .,
condition = "ConditionExperimental Treatment" %in% unique(tidy_dat$term),
call = ~{.x %>%
dplyr::mutate(label = dplyr::case_when(
term == "(Intercept)" ~ stringr::str_glue("N = {n_dat}\n\n"),
term == "ConditionExperimental Treatment" ~ stringr::str_glue("Experimental v. Delayed: {cite_stats}\n\n"),
term == "TimePost" ~ stringr::str_glue("Pre v. Post: {cite_stats}\n\n"),
term == "ConditionExperimental Treatment:TimePost" ~ stringr::str_glue("Condition X Time: {cite_stats}")
)
)}
) %>%
openmindR::do_if(.data = .,
condition = "ConditionOpenMind" %in% unique(tidy_dat$term),
call = ~{.x %>%
dplyr::mutate(label = dplyr::case_when(
term == "(Intercept)" ~ stringr::str_glue("N = {n_dat}\n\n"),
term == "ConditionOpenMind" ~ stringr::str_glue("Article v. OpenMind: {cite_stats}\n\n"),
term == "TimePost" ~ stringr::str_glue("Pre v. Post: {cite_stats}\n\n"),
term == "ConditionOpenMind:TimePost " ~ stringr::str_glue("Condition X Time: {cite_stats}")
)
)}
) %>%
dplyr::pull(label) %>% glue::glue_collapse() %>% as.character()
ggmod <- ggmod +
ggplot2::labs(caption = label_dat) +
ggplot2::theme(plot.caption = ggplot2::element_text(family = "Poppins", size = 14, colour = "#756f71"))
}
return(ggmod)
}
#' Run and plot mixed effects model
#'
#' This function allows to run and plot a mixed model. It makes use of both \code{\link{om_mix_models}} and \code{\link{om_mix_plot}} (currently only works on Ann Miller experimental data)
#' @param experiment Specify a dataset with experimental data
#' @param title specify a title and y-label for the plot
#' @export
om_mix_complete <- function(experiment, title) {
#todo: would be nice to be able to specify ALL, some, or just one of the outcome variables
# question <- "Q11"
question <- dplyr::case_when(
title == "Growth Mindset" ~ "Q11",
title == "Intellectual Humility" ~ "Q18",
title == "Affective Polarization" ~ "Q14",
title == "Social Closeness" ~ "Q10",
title == "Perspective-Taking" ~ "Q4"
)
gg_dat <- openmindR::om_mix_models(experiment,
question = question,
plot_model = F,
get_effects = T,
get_tidy = T)
openmindR::om_mix_plot(gg_dat$effects_dat, gg_dat$tidy_dat, title)
}
#' Run T-Tests on Long Format data
#'
#' This function performs t-tests on v6 and v7 data
#'
#' @param gathered_dat Long format data
#' @param comparison Three possible comparisons "PrePost", "PreFollowUpT1T2" or "PreFollowUpT1T3". "PreFollowUpT1T2" performs Pre-Post comparison only for people who completed the FollowUp. "PreFollowUpT1T3" is the same sample of people (only those who completed the FollowUp) but compares them Pre-FollowUp.
#' @examples
#' ## Get Key (may differ in your code)
#' key <- readr::read_lines("../../Research/Projects/Keys/airtabler.txt")
#'
#' ## get v7 data
#' assessmentv7 <- om_download_at(key,
#' tables = "AssessmentV7",
#' clean = TRUE)
#'
#' ## Perform t-test on a single variable
#' assessmentv7 %>%
#' om_gather("AffPol1") %>%
#' om_ttest(comparison = "PrePost")
#'
#' ## get results for all variables
#' assessmentv7 %>%
#' ## select only relevant variables and composite scores
#' select(OMID, AffPol1Pre:IHCultureSub3FollowUp) %>%
#' ## turn data into long format
#' om_gather(which_strings = v7_var_strings) %>%
#' ## perform t-test on each variable (for Pre and Post)
#' om_ttest("PrePost") %>%
#' ## arrange by cohens D
#' arrange(desc(cohend))
#'
#' ## get v6 data
#' assessmentv6 <- om_download_at(key,
#' tables = "AssessmentV6",
#' clean = TRUE)
#'
#' ## get results for all variables
#' assessmentv6 %>%
#' ## select only relevant variables and composite scores
#' select(OMID, AffPol1Pre:IntellectualHumilityFollowUp,
#' GrowthMindsetPre, GrowthMindsetPost, GrowthMindsetFollowUp,
#' C1Pre, C5Pre, C6Pre,
#' C1Post, C5Post, C6Post,
#' C1FollowUp, C5FollowUp, C6FollowUp,
#' -contains("Preparedness3")) %>%
#' ## turn data into long format
#' om_gather(which_strings = v6_var_strings) %>%
#' ## perform t-test on each variable (for Pre and Post)
#' om_ttest("PrePost") %>%
#' ## arrange by cohens D
#' arrange(desc(cohend))
#'
#' @export
om_ttest <- function(gathered_dat, comparison) {
gathered_dat <- gathered_dat %>%
group_split(variable_code)
final <- gathered_dat %>%
map_dfr(~{
if (comparison == "PrePost"){
internal <- .x %>%
dplyr::filter(Time %in% c("Pre", "Post")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post"))) %>%
dplyr::filter(variable_code %nin% c("C1", "C5", "C6"))
if(nrow(internal)==0){
return(NULL)
}
T2 <- "Post"
}
if (comparison == "PreFollowUpT1T2"){
internal <- .x %>%
dplyr::mutate(Time = as.factor(Time)) %>%
dplyr::mutate(OMID = as.factor(OMID)) %>%
tidyr::drop_na(Response) %>%
dplyr::add_count(OMID) %>%
dplyr::filter(n == 3) %>%
dplyr::filter(Time %in% c("Pre", "Post")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "Post"))) %>%
dplyr::filter(variable_code %nin% c("C1", "C5", "C6"))
if(nrow(internal)==0){
return(NULL)
}
T2 <- "Post"
}
if (comparison == "PreFollowUpT1T3"){
var <- .x %>%
dplyr::slice(1) %>%
dplyr::pull(variable_code)
OMID_n <- 3
if(var %in% c("C1", "C5", "C6")){
OMID_n <- 2
}
internal <- .x %>%
dplyr::mutate(Time = as.factor(Time)) %>%
dplyr::mutate(OMID = as.factor(OMID)) %>%
tidyr::drop_na(Response) %>%
dplyr::add_count(OMID) %>%
dplyr::filter(n == OMID_n) %>%
dplyr::filter(Time %in% c("Pre", "FollowUp")) %>%
dplyr::mutate(Time = forcats::fct_relevel(Time, c("Pre", "FollowUp")))
T2 <- "FollowUp"
}
ttest_dat <- internal %>%
dplyr::mutate(Time = as.factor(Time)) %>%
dplyr::mutate(OMID = as.factor(OMID)) %>%
tidyr::drop_na(Response) %>%
dplyr::add_count(OMID) %>%
dplyr::filter(n == 2) %>%
dplyr::mutate(var_code = variable_code) %>%
dplyr::group_by(variable_code) %>%
dplyr::summarize(
cohend = abs(
effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$estimate
)
,
cohendCIlow = #abs(
effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$conf.int[1]
#)
,
cohendCIhi = #abs(
effsize::cohen.d(Response~Time, paired = TRUE, conf.level = 0.95)$conf.int[2]
#)
,
tstat = abs(t.test(Response~Time, paired=TRUE)$statistic),
pvalue = t.test(Response~Time, paired=TRUE)$p.value,
df = t.test(Response~Time, paired=TRUE)$parameter,
percentimproved = perc_improved(Response[Time == "Pre"],
Response[Time == T2],
(df+1),
.data$var_code[1]),
percentworse = perc_improved(Response[Time == "Pre"],
Response[Time == T2],
(df+1),
.data$var_code[1], reverse = T)
)
vars <- internal %>% select(variable_code) %>% dplyr::distinct() %>% dplyr::pull()
within_stats <- vars %>% map_dfr(~openmindR:::withinSE(internal, variable = .x, WaveType = NULL))
final <- ttest_dat %>%
dplyr::left_join(within_stats, by = "variable_code") %>%
mutate(N = df+1)
return(final)
})
return(final)
}
#' Helper function to calculate percent of people who improved
#'
#' This function calculates percent of people who improved from pre to post/followup
#' @param all_pre Pre Data
#' @param all_post Post data
#' @param total N
#' @param variable_code variable code
#' @export
perc_improved <- function(all_pre, all_post, total, variable_code, reverse = F){
ups <- c("GrowthMindset", "CIHS_LIO",
"OutgroupLiking", "OutgroupMotivation",
"Preparedness", "C1", "C6",
"IntellectualHumility", "GM", "IHSub1", "IHSub2", "IHSub3",
"IHCultureSub1", "IHCultureSub2", "IHCultureSub3", "SE",
"Belong", "Dissent", "Tolerance", "IngroupLiking", "IngroupMotivation",
"OutgroupLiking", "OutgroupMotivation", "MotivationCon", "MotivationProg")
downs <- c("AffPol1", "AffPol2",
"GBSS", "MAA", "C5", "Anxiety", "Attribution", "IntAnx", "SocialDistance", "Avoidance")
if(variable_code %nin% ups & variable_code %nin% downs){
message(paste0("Variable '", variable_code, "' has no percent improvement direction. Variable 'percentimproved' set to NA.\n"))
return(NA)
}
if(!reverse){
if (variable_code %in% ups) {
p_improv <- sum((all_pre < all_post)==TRUE)/total
}
if (variable_code %in% downs) {
p_improv <- sum((all_pre > all_post)==TRUE)/total
}
}
if(reverse){
if (variable_code %in% ups) {
p_improv <- sum((all_pre > all_post)==TRUE)/total
}
if (variable_code %in% downs) {
p_improv <- sum((all_pre < all_post)==TRUE)/total
}
}
return(p_improv)
}
get_vader <- function(x) {
tibble::enframe(vader::getVader(x)) %>%
dplyr::filter(name != "") %>%
t() %>%
as.data.frame() %>%
janitor::clean_names() %>%
janitor::row_to_names(1) %>%
tibble::as_tibble() %>%
dplyr::mutate(compound = as.character(compound)) %>%
dplyr::mutate(compound = as.numeric(compound)) %>%
tibble::as_tibble()
}
get_blob <- function(string) {
blob <- textblob::text_blob(string)
tibble::tibble(polarity = as.character(blob$sentiment[0]),
subjectivity = as.character(blob$sentiment[1])) %>%
dplyr::mutate_all(as.numeric)
}
get_nrc_sentiment <- function(nrc_dat) {
nrc_sentiment <- nrc_dat %>%
## we only want sentiments here
dplyr::filter(sentiment %in% c("positive", "negative")) %>%
## count how often negative and positive sentiment appears per author
dplyr::count(OMID, sentiment, .drop = F) %>%
## calculate word count per author
dplyr::group_by(OMID, .drop = F) %>%
dplyr::mutate(total = sum(n)) %>%
dplyr::ungroup() %>%
## we only need positive sentiment because negative is just the inverse
dplyr::filter(sentiment == "positive") %>%
## percentage of positive words
dplyr::mutate(nrc = n / total) %>%
## rescale sentiment -1 to +1
dplyr::mutate(nrc_rescaled = scales::rescale(nrc, from = c(0, 1), to = c(-1,1))) %>%
## remove variables that we will not need
dplyr::select(-n, -total, -sentiment)
return(nrc_sentiment)
}
get_nrc_emotions <- function(nrc_dat){
total_emotions <- nrc_dat %>%
## we only want emotions here
dplyr::filter(!(sentiment %in% c("positive", "negative"))) %>%
## group by author
dplyr::group_by(OMID, .drop = F) %>%
## only keep distinct words
dplyr::distinct(word, .keep_all = T) %>%
## count the number of distinct words per author
dplyr::tally(name = "total") %>%
dplyr::ungroup()
nrc_emotions <- nrc_dat %>%
## we only want emotions here
dplyr::filter(!(sentiment %in% c("positive", "negative"))) %>%
## count how often emotions appear per author
dplyr::count(OMID, sentiment, .drop = F) %>%
## add the total unique word count
dplyr::left_join(total_emotions, by = "OMID") %>%
## percentage of emotion words
dplyr::mutate(perc = n / total) %>%
## remove variables that we will not need
dplyr::select(-total, -n) %>%
## turn data into long format
tidyr::spread(sentiment, perc, fill = 0) %>%
## add rescaled variables
dplyr::mutate_at(dplyr::vars(anger:trust), list(rescaled = ~scales::rescale(.x,
from = c(0, 1),
to = c(-1,1))))
}
get_nrc_emotions <- purrr::possibly(get_nrc_emotions, otherwise = tibble::tibble(OMID = "0", anger = NA, trust = NA))
get_nrc <- function(author_words, sentiment_dat){
## nrc
nrc_dat <- author_words %>%
## nrc sentiment/emotion analysis
dplyr::inner_join(tidytext::get_sentiments("nrc"), by = "word")
nrc_sentiment <- get_nrc_sentiment(nrc_dat)
nrc_emotions <- get_nrc_emotions(nrc_dat)
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(nrc_sentiment, by = "OMID") %>%
dplyr::left_join(nrc_emotions, by = "OMID")
}
#' Use various text analysis tools
#'
#' This function performs sentiment analysis and other text tools on any custom strings that you supply.
#' @param .data dataset including text and a personal identifier (must be named `OMID`)
#' @param method which method(s) should be applied? Currently supports: "nrc", "bing", "afinn", "textblob", "vader", "sentiment_stanford", "sentimentr_polarity", "affect3d", "receptiveness", and "toxicity"
#' @param text specify the text variable which should be scored
#' @export
om_textscore <- function(.data, method, text, verbose = T) {
## words by author
author_words <- .data %>%
## get tokens from text (here: individual words)
tidytext::unnest_tokens(word, {{ text }})
sentiment_dat <- .data %>%
dplyr::select(OMID, {{ text }}) %>%
tidyr::drop_na({{ text }})
.data <- .data %>%
dplyr::mutate(body = {{ text }}) %>%
tidyr::drop_na(body)
# skip_report <- F
if("vader" %in% method & !check_for_pkg("vader")){
stop('You are missing the "vader" package. Please install it like this: install.packages("vader")')
}
if("textblob" %in% method & !check_for_pkg("textblob")){
stop('You are missing the "textblob" package. Please install it like this: devtools::install_github("news-r/textblob")\n\nCheck out instructions for setting up textblob here: https://github.com/news-r/textblob')
}
if("toxicity" %in% method & !check_for_pkg("peRspective")){
stop('You are missing the "peRspective" package. Please install it like this: devtools::install_github("favstats/openmindR")')
}
if("sentiment_stanford" %in% method & !check_for_pkg("stansent")){
stop('You are missing the "stansent" package. Please install it like this: devtools::install_github("trinker/stansent").\n\nAlso make sure that you have the "coreNLPsetup" package installed. Install it like this: devtools::install_github("trinker/coreNLPsetup")\n\nCheck out instructions for setting up stansent here. https://github.com/trinker/stansent')
}
if("sentimentr_polarity" %in% method & !check_for_pkg("sentimentr")){
stop('You are missing the "sentimentr" package. Please install it like this: devtools::install_github("trinker/sentimentr").\n\nAlso make sure that you have the "lexicon" package installed. Install it like this: devtools::install_github("trinker/lexicon")')
}
if("affect3d" %in% method & !check_for_pkg("affectr")){
stop('You are missing the "affectr" package. Please install it like this: devtools::install_github("markallenthornton/affectr").')
}
if("receptiveness" %in% method & !check_for_pkg("politeness")){
stop('You are missing the "politeness" package. Please install it like this: install.packages("politeness").')
}
if ("nrc" %in% method){
message("\nCalculating NRC scores")
t1 <- Sys.time()
sentiment_dat <- get_nrc(author_words, sentiment_dat)
t2 <- Sys.time()
print(round(t2 - t1))
}
if("bing" %in% method){
message("\nCalculating BING scores")
t1 <- Sys.time()
bing_dat <- author_words %>%
dplyr::inner_join(tidytext::get_sentiments("bing"), by = "word") %>%
## count how often negative and positive sentiment appears per author
dplyr::count(OMID, sentiment, .drop = F) %>%
## calculate word count per author
dplyr::group_by(OMID, .drop = F) %>%
dplyr::mutate(total = sum(n)) %>%
dplyr::ungroup() %>%
# dplyr::filter(total != 0) %>%
## we only need positive sentiment because negative is just the inverse
dplyr::filter(sentiment == "positive") %>%
## percentage of positive words
dplyr::mutate(bing = n / total) %>%
## rescale sentiment -1 to +1
dplyr::mutate(bing_rescaled = scales::rescale(bing, from = c(0, 1), to = c(-1,1))) %>%
## remove variables that we will not need
dplyr::select(-n, -total, -sentiment)
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(bing_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("afinn" %in% method){
message("\nCalculating AFINN scores")
t1 <- Sys.time()
afinn_dat <- author_words %>%
dplyr::inner_join(tidytext::get_sentiments("afinn"), by = "word") %>%
dplyr::group_by(OMID) %>%
dplyr::summarise(afinn = mean(value, na.rm = T),
afinn_sd = sd(value, na.rm = T)) %>%
dplyr::ungroup() %>%
## rescale sentiment -1 to +1
dplyr::mutate(afinn_rescaled = scales::rescale(afinn, from = c(-5, 5), to = c(-1,1)))
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(afinn_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("sentimentr_polarity" %in% method){
message("\nCalculating sentimentr_polarity scores")
t1 <- Sys.time()
sentimentr_polarity_dat <- .data %>%
dplyr::mutate(dialogue_split = sentimentr::get_sentences(body)) %$%
sentimentr::sentiment_by(text.var = dialogue_split,
by = list(OMID),
polarity_dt = lexicon::hash_sentiment_jockers_rinker) %>%
dplyr::rename(sentimentr_polarity_sd = sd,
sentimentr_polarity = ave_sentiment) %>%
dplyr::select(-word_count) %>%
## rescale sentiment -1 to +1
dplyr::mutate(sentimentr_polarity_rescaled = scales::rescale(sentimentr_polarity, to = c(-1,1)))
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(sentimentr_polarity_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("sentiment_stanford" %in% method){
message("\nCalculating sentiment_stanford scores")
t1 <- Sys.time()
sentiment_stanford_dat <- .data %$%
# dplyr::mutate(dialogue_split = sentimentr::get_sentences(body)) %$%
stansent::sentiment_stanford_by(text.var = body,
by = list(OMID)) %>%
dplyr::rename(sentiment_stanford_sd = sd,
sentiment = ave_sentiment) %>%
dplyr::select(-word_count) %>%
## rescale sentiment -1 to +1
dplyr::mutate(sentiment_stanford_rescaled = scales::rescale(sentiment, to = c(-1,1))) %>%
dplyr::rename(sentiment_stanford = sentiment)
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(sentiment_stanford_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("vader" %in% method){
message("\nCalculating vader scores")
# get_vader <- suppressWarnings(get_vader)
# get_vader <- suppressMessages(get_vader)
# get_vader <- purrr::possibly(get_vader, otherwise = NULL, quiet = F)
t1 <- Sys.time()
vaderscoring_dat <- .data %>%
## apply vader
dplyr::mutate(vader = body %>% purrr::map(~suppressWarnings(get_vader(.x))))
vader_dat <- vaderscoring_dat %>%
tidyr::unnest(vader) %>%
dplyr::mutate(compound = as.numeric(compound)) %>%
dplyr::select(OMID, compound) %>%
## get mean vader scores per author
dplyr::group_by(OMID) %>%
dplyr::summarize(vader = mean(compound, na.rm = T)) %>%
dplyr::ungroup()
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(vader_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("textblob" %in% method){
message("\nCalculating textblob scores")
t1 <- Sys.time()
textblob_dat <- .data %>%
## apply textblob
dplyr::mutate(textblob = body %>% purrr::map(get_blob)) %>%
tidyr::unnest(textblob) %>%
## get mean textblob scores per author
dplyr::group_by(OMID) %>%
dplyr::summarize_at(dplyr::vars(polarity, subjectivity), ~mean(.x, na.rm = T)) %>%
dplyr::rename(textblob = polarity) %>%
dplyr::rename(textblob_subjectivity = subjectivity)
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(textblob_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("affect3d" %in% method){
message("\nCalculating affect3d scores")
t1 <- Sys.time()
affect3d_dat <- .data %>%
dplyr::pull(body) %>%
affectr::affect3d() %>%
dplyr::as_tibble() %>%
janitor::clean_names() %>%
dplyr::bind_cols(.data, .) %>%
dplyr::select(OMID,
rationality_affect3d = rationality,
social_impact_affect3d = social_impact,
valence_affect3d = valence) %>%
## get mean affect3d scores per author
dplyr::group_by(OMID) %>%
dplyr::summarize_at(dplyr::vars(rationality_affect3d, social_impact_affect3d, valence_affect3d),
~mean(.x, na.rm = T)) %>%
dplyr::ungroup()
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(affect3d_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("receptiveness" %in% method){
message("\nCalculating receptiveness scores")
t1 <- Sys.time()
receptiveness_dat <- .data %>%
dplyr::mutate(receptiveness = politeness::receptiveness(body)) %>%
dplyr::group_by(OMID) %>%
dplyr::summarize(receptiveness = mean(receptiveness)) %>%
dplyr::ungroup() %>%
## add rescaled variables
dplyr::mutate(receptiveness_rescaled = scales::rescale(receptiveness, from = c(1, 7),
to = c(-1,1)))
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(receptiveness_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
if("toxicity" %in% method){
message("\nCalculating toxicity scores")
t1 <- Sys.time()
## apply toxicity
toxic_transcripts <- .data %>%
dplyr::mutate(OMID = as.character(OMID)) %>%
peRspective::prsp_stream(body,
text_id = OMID,
verbose = verbose,
safe_output = T,
score_model = prsp_models)
toxicity_dat <- toxic_transcripts %>%
## make sure message_id is numeric
dplyr::mutate(OMID = as.character(text_id)) %>%
## add transcripts data
dplyr::left_join(.data, by = "OMID") %>%
## get mean toxicity scores per author
dplyr::group_by(OMID) %>%
dplyr::summarize_at(dplyr::vars(TOXICITY:UNSUBSTANTIAL), ~mean(.x, na.rm = T)) %>%
dplyr::select(OMID, TOXICITY:UNSUBSTANTIAL) %>%
## add rescaled variables
dplyr::mutate_at(dplyr::vars(TOXICITY:UNSUBSTANTIAL), list(rescaled = ~scales::rescale(.x, from = c(0, 1), to = c(-1,1))))
sentiment_dat <- sentiment_dat %>%
dplyr::left_join(toxicity_dat, by = "OMID")
t2 <- Sys.time()
print(round(t2 - t1))
}
return(sentiment_dat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.