#' Create a data set with one row for each sudden gain/loss
#'
#' @description This function returns a wide data set with one row for each sudden gain/loss and assigns a unique identifier to each.
#' The data set includes variables indicating values around the period of each gain/loss, and calculates descriptives of each gain/loss.
#' @param data A data set in wide format including an ID variable and variables for each measurement point.
#' @param sg_crit1_cutoff Numeric, specifying the cut-off value to be used for the first sudden gains criterion.
#' The function \code{\link{define_crit1_cutoff}} can be used to calculate a cutoff value based on the Reliable Change Index (RCI; Jacobson & Truax, 1991).
#' If set to \code{NULL} the first criterion wont be applied.
#' @param sg_crit2_pct Numeric, specifying the percentage change to be used for the second sudden gains/losses criterion.
#' If set to \code{NULL} the second criterion wont be applied.
#' @param sg_crit3 If set to \code{TRUE} the third criterion will be applied automatically adjusting the critical value for missingness.
#' If set to \code{FALSE} the third criterion wont be applied.
#' @param sg_crit3_alpha Numeric, alpha for the student t-test (two-tailed) to determine the critical value to be used for the third criterion.
#' Degrees of freedom are based on the number of available data in the three sessions preceding the gain and the three sessions following the gain.
#' @param sg_crit3_adjust Logical, specify whether critical value gets adjusted for missingness, see Lutz et al. (2013) and the documentation of this R package for further details.
#' This argument is set to \code{TRUE} by default adjusting the critical value for missingness as described in the package documentation and Lutz et al. (2013):
#' A critical value of 2.776 is used when all three data points before and after a potential gain are available,
#' where one datapoint is missing either before or after a potential gain a critical value of 3.182 is used,
#' and where one datapoint is missing both before and after the gain a critical value of 4.303 is used (for sg_crit3_alpha = 0.05).
#' If set to \code{FALSE} the critical value set in \code{sg_crit3_critical_value} will instead be used for all comparisons, regardless of missingnes in the sequence of data points that are investigated for potential sudden gains.
#' @param sg_crit3_critical_value Numeric, specifying the critical value to instead be used for all comparisons, regardless of missingnes in the sequence of data points that are investigated for potential sudden gains.#'
#' @param id_var_name String, specifying the name of the ID variable.
#' @param sg_var_list Vector, specifying the variable names of each measurement point sequentially.
#' @param tx_start_var_name String, specifying the variable name of the first measurement point of the intervention.
#' @param tx_end_var_name String, specifying the variable name of the last measurement point of the intervention.
#' @param sg_measure_name String, specifying the name of the measure used to identify sudden gains/losses.
#' @param identify String, specifying whether to identify sudden gains (\code{"sg"}) using \code{\link{identify_sg}} or sudden losses (\code{"sl"}) using \code{\link{identify_sl}}.
#' The default is to identify sudden gains (\code{"sg"}).
#' @param identify_sg_1to2 Logical, indicating whether to identify sudden losses from measurement point 1 to 2.
#' If set to TRUE, this implies that the first variable specified in \code{sg_var_list} represents a baseline measurement point, e.g. pre-intervention assessment.
#' @references Tang, T. Z., & DeRubeis, R. J. (1999). Sudden gains and critical sessions in cognitive-behavioral therapy for depression. Journal of Consulting and Clinical Psychology, 67(6), 894–904. \doi{10.1037/0022-006X.67.6.894}.
#' @return A wide data set with one row per sudden gain/loss.
#' @export
#' @examples # Create bypsg data set
#' create_bysg(data = sgdata,
#' sg_crit1_cutoff = 7,
#' id_var_name = "id",
#' tx_start_var_name = "bdi_s1",
#' tx_end_var_name = "bdi_s12",
#' sg_var_list = c("bdi_s1", "bdi_s2", "bdi_s3",
#' "bdi_s4", "bdi_s5", "bdi_s6",
#' "bdi_s7", "bdi_s8", "bdi_s9",
#' "bdi_s10", "bdi_s11", "bdi_s12"),
#' sg_measure_name = "bdi")
#'
create_bysg <- function(data, sg_crit1_cutoff, id_var_name, sg_var_list, tx_start_var_name, tx_end_var_name, sg_measure_name, sg_crit2_pct = .25, sg_crit3 = TRUE, sg_crit3_alpha = .05, sg_crit3_adjust = TRUE, sg_crit3_critical_value = 2.776, identify = c("sg", "sl"), identify_sg_1to2 = FALSE) {
# Check arguments
identify <- base::match.arg(identify)
# Before doing anything, save the raw data that was put in function as data argument
data_in <- data
# Run identify_sg function first to find positions of gain
if (identify == "sg") {
# Suppress warnings because I dont want to carry them over from the identify function
# Insted return an error for the same case, no gains/losses for all create functions
data_crit123 <- base::suppressWarnings(suddengains::identify_sg(data = data,
id_var_name = dplyr::all_of(id_var_name),
sg_var_list = dplyr::all_of(sg_var_list),
sg_crit1_cutoff = sg_crit1_cutoff,
sg_crit2_pct = sg_crit2_pct,
sg_crit3 = sg_crit3,
sg_crit3_alpha = sg_crit3_alpha,
sg_crit3_adjust = sg_crit3_adjust,
sg_crit3_critical_value = sg_crit3_critical_value,
identify_sg_1to2 = identify_sg_1to2))
# Calculate number of sudden gains
sg_sum <- data_crit123 %>%
dplyr::select(dplyr::matches("sg_\\d+to\\d+")) %>%
base::sum(., na.rm = T)
# Stop if no sudden gains were identified and return error
if (sg_sum == 0) {
stop("No sudden gains were identified.", call. = FALSE)
}
} else if (identify == "sl") {
# Suppress warnings because I dont want to carry them over from the identify function
# Insted return an error for the same case, no gains/losses for all create functions
data_crit123 <- base::suppressWarnings(suddengains::identify_sl(data = data,
id_var_name = dplyr::all_of(id_var_name),
sg_var_list = dplyr::all_of(sg_var_list),
sg_crit1_cutoff = sg_crit1_cutoff,
sg_crit2_pct = sg_crit2_pct,
sg_crit3 = sg_crit3,
sg_crit3_alpha = sg_crit3_alpha,
sg_crit3_adjust = sg_crit3_adjust,
sg_crit3_critical_value = sg_crit3_critical_value,
identify_sg_1to2 = identify_sg_1to2))
# Calculate number of sudden gains
sg_sum <- data_crit123 %>%
dplyr::select(dplyr::matches("sl_\\d+to\\d+")) %>%
base::sum(., na.rm = T)
# Stop if no sudden losses were identified and return error
if (sg_sum == 0) {
stop("No sudden losses were identified.", call. = FALSE)
}
}
# Set missings to zero to calculate in next step
data_crit123[base::is.na(data_crit123)] <- 0
# Start creating bysg dataset
data_bysg <- data_crit123 %>%
dplyr::select(!! rlang::sym(id_var_name), dplyr::matches("sg_|sl_\\d+to\\d+")) %>%
tidyr::gather(key = "session", value = "sg_crit123", -!! rlang::sym(id_var_name)) %>%
dplyr::mutate(session = as.numeric(stringr::str_extract(session, "(\\d+)")),
sg_session_n = session) %>%
dplyr::arrange(!! rlang::sym(id_var_name)) %>%
dplyr::group_by(!! rlang::sym(id_var_name)) %>%
dplyr::mutate(sg_freq_byperson = sum(sg_crit123, na.rm = TRUE)) %>%
dplyr::arrange(dplyr::desc(sg_freq_byperson)) %>%
dplyr::filter(sg_crit123 != 0) %>%
dplyr::select(-session) %>%
dplyr::mutate(id_sg = paste0(!! rlang::sym(id_var_name), "_sg_", sg_session_n)) %>%
dplyr::select(!! rlang::sym(id_var_name), id_sg, dplyr::everything()) %>%
dplyr::ungroup()
# Select variables for further calculations from data
# First check if tx_start_var_name and tx_end_var_name are in sg_var_list, if not add them
if (tx_start_var_name %in% sg_var_list == TRUE) {
sg_var_select <- base::c(id_var_name, sg_var_list)
} else if (tx_start_var_name %in% sg_var_list == FALSE) {
sg_var_select <- base::c(id_var_name, tx_start_var_name, sg_var_list)
}
if (tx_end_var_name %in% sg_var_list == TRUE) {
sg_var_select <- c(sg_var_select)
} else if (tx_end_var_name %in% sg_var_list == FALSE) {
sg_var_select <- c(sg_var_select, tx_end_var_name)
}
data_bysg <- data_bysg %>%
dplyr::left_join(dplyr::select(data_in, dplyr::all_of(sg_var_select)), by = id_var_name)
# Set start value for numbering to extract the correct values around the gain
if (identify_sg_1to2 == TRUE) {
start_numbering <- 0
} else if (identify_sg_1to2 == FALSE) {
start_numbering <- 1
}
# Extract scores of the sudden gains measure around the sudden gain ----
data_extract <- base::suppressMessages(
suddengains::extract_values(data_bysg,
id_var_name = "id_sg",
extract_var_list = sg_var_list,
extract_measure_name = sg_measure_name,
start_numbering = start_numbering,
add_to_data = FALSE)
)
# Combine the extracted scores with the bysg dataset
data_bysg <- data_bysg %>%
dplyr::left_join(data_extract, by = "id_sg")
# Calculate descriptive sudden gains variables ----
# Create varialbe names for mutate command here
tx_change <- base::paste("sg", sg_measure_name, "tx", "change", sep = "_")
var_x_n <- base::paste("sg", sg_measure_name, "n", sep = "_")
var_x_n_post1 <- base::paste("sg", sg_measure_name, "n1", sep = "_")
# Calculate sg reversal differently for gains and losses
if (identify == "sg") {
data_bysg <- data_bysg %>%
dplyr::mutate(sg_magnitude = !! rlang::sym(var_x_n) - !! rlang::sym(var_x_n_post1),
!! tx_change := !! rlang::sym(tx_start_var_name) - !! rlang::sym(tx_end_var_name),
sg_change_proportion = sg_magnitude / !! rlang::sym(tx_change),
# Replace "-Inf" in sg_change_proportion with NA for cases where tx_chage is 0
sg_change_proportion = dplyr::na_if(sg_change_proportion, -Inf),
sg_reversal_value = !! rlang::sym(var_x_n_post1) + (sg_magnitude / 2))
# Create lsit with all ids for each sudden gain
id_sg_list <- data_bysg$id_sg
sg_reversal <- data_bysg %>%
dplyr::select(id_sg, sg_session_n, sg_reversal_value, dplyr::all_of(sg_var_list)) %>%
rename_sg_vars(sg_var_list, start_numbering = 0) %>%
tidyr::gather(key = "time_str", value = "value", -id_sg, -sg_session_n, -sg_reversal_value) %>%
dplyr::mutate(time_num = as.numeric(stringr::str_extract(time_str, "\\d+"))) %>%
dplyr::select(-time_str) %>%
dplyr::mutate(index = time_num - sg_session_n) %>%
dplyr::filter(index >= 2) %>%
dplyr::arrange(id_sg, time_num) %>%
dplyr::group_by(id_sg) %>%
dplyr::mutate(sg_reversal = dplyr::if_else(value > sg_reversal_value, 1, 0),
sg_reversal = dplyr::if_else(sg_reversal == 1, 1, 0)) %>%
dplyr::select(id_sg, sg_reversal) %>%
dplyr::filter(sg_reversal == 1) %>%
base::unique() %>%
dplyr::ungroup() %>%
tidyr::complete(id_sg = id_sg_list) %>%
tidyr::replace_na(base::list(sg_reversal = 0))
} else if (identify == "sl") {
data_bysg <- data_bysg %>%
dplyr::mutate(sg_magnitude = !! rlang::sym(var_x_n) - !! rlang::sym(var_x_n_post1),
!! tx_change := !! rlang::sym(tx_start_var_name) - !! rlang::sym(tx_end_var_name),
sg_change_proportion = base::abs(sg_magnitude) / !! rlang::sym(tx_change),
# Replace "-Inf" in sg_change_proportion with NA for cases where tx_chage is 0
sg_change_proportion = dplyr::na_if(sg_change_proportion, -Inf),
# The calculation of the sg_reversal_value is still correct as sg_magnitude is negative for losses
sg_reversal_value = !! rlang::sym(var_x_n_post1) + (sg_magnitude / 2))
# Create lsit with all ids for each sudden gain
id_sg_list <- data_bysg$id_sg
sg_reversal <- data_bysg %>%
dplyr::select(id_sg, sg_session_n, sg_reversal_value, dplyr::all_of(sg_var_list)) %>%
rename_sg_vars(sg_var_list, start_numbering = 0) %>%
tidyr::gather(key = "time_str", value = "value", -id_sg, -sg_session_n, -sg_reversal_value) %>%
dplyr::mutate(time_num = as.numeric(stringr::str_extract(time_str, "\\d+"))) %>%
dplyr::select(-time_str) %>%
dplyr::mutate(index = time_num - sg_session_n) %>%
dplyr::filter(index >= 2) %>%
dplyr::arrange(id_sg, time_num) %>%
dplyr::group_by(id_sg) %>%
dplyr::mutate(sg_reversal = dplyr::if_else(value < sg_reversal_value, 1, 0),
sg_reversal = dplyr::if_else(sg_reversal == 1, 1, 0)) %>%
dplyr::select(id_sg, sg_reversal) %>%
dplyr::filter(sg_reversal == 1) %>%
base::unique() %>%
dplyr::ungroup() %>%
tidyr::complete(id_sg = id_sg_list) %>%
tidyr::replace_na(base::list(sg_reversal = 0))
}
data_bysg <- data_bysg %>%
dplyr::left_join(sg_reversal, by = "id_sg")
# Return tibble
data_bysg %>%
tibble::as_tibble() %>%
dplyr::arrange(!! rlang::sym(id_var_name))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.