R/import_score_neuromap_sidp.R

Defines functions import_score_neuromap_sidp

Documented in import_score_neuromap_sidp

#' Import and Score NeuroMAP SIDP Survey Data
#'
#' This function imports survey data from the NeuroMAP S1 - SIDP, processes it,
#' and computes scores for different personality disorders based on survey responses.
#' It performs data cleaning, reformatting, column sorting, and calculation of both
#' categorical and dimensional scores for various personality disorders.
#' Additionally, it handles specific diagnostic criteria for certain disorders.
#'
#' @importFrom qualtRics all_surveys fetch_survey
#' @importFrom dplyr rename select filter mutate arrange rowwise ungroup
#' @importFrom janitor clean_names
#' @importFrom tidyr pivot_longer
#' @importFrom stringr str_replace str_replace_all str_extract
#' @importFrom magrittr %>%
#'
#' @param ... Additional arguments passed to `qualtRics::fetch_survey`.
#'
#' @return A list containing two elements:
#'         - `summary`: a dataframe with the summary scores for each personality disorder.
#'         - `items`: a list of dataframes, each containing item-level data for a specific disorder.
#'
#' @examples
#' # Example usage:
#' result <- import_score_neuromap_sidp()
#' summary_data <- result$summary
#' item_data <- result$items
#'
#' @author Nate Hall
#' @export

import_score_neuromap_sidp <- function(...){
  # peek at surveys available
  my_surveys <- qualtRics::all_surveys() #%>% print(n = 100)

  sidp_id <- my_surveys %>% filter(name == "NeuroMAP S1 - SIDP (outside of the pipe - use this one)") %>% pull(id)

  sidp_items <- qualtRics::fetch_survey(sidp_id)
  colnames(sidp_items)

  ##----------------------
  ##  Clean-up dataframe
  ##----------------------

  sidp_df <- sidp_items %>%
    dplyr::rename(id = session_info_4,
                  interviewer = session_info_6) %>%
    select(id, interviewer, StartDate, EndDate, ends_with("_rating")) %>%
    janitor::clean_names() %>%
    rename_with(~ str_replace_all(., "_rating", "")) %>%
    rename_with(~ str_replace(., "^[xX]", "")) %>%
    rename_with(~ str_replace(., "^(\\d+)_([a-z]+)$", "\\2_\\1")) %>%
    rename(antso_c = c_antso) %>%
    # for some reason, this is the only item that needs to be manually formatted
    mutate(avoid_7 = case_when(avoid_7 == 0 ~ "1 - Not Present",
                               avoid_7 == 1 ~ "2 - Subthreshold",
                               avoid_7 == 2 ~ "3 - Present",
                               avoid_7 == 3 ~ "4 - Strongly Present",
                               TRUE ~ NA
    ))

  # names(sidp_df)
  # sidp_df %>% select(contains("avoid"))

  ##----------------------------------------------------------------------
  ##  Rearrange columns to be grouped by disorder and sorted numerically
  ##----------------------------------------------------------------------


  sort_columns <- function(df, prefix_order) {
    cols <- names(df)
    # Extracting prefix and numeric parts
    prefix <- str_extract(cols, "^[a-z]+")
    num <- str_extract(cols, "\\d+")

    # Creating a data frame for sorting
    sorting_df <- data.frame(column = cols, prefix = prefix, num = as.numeric(num))
    sorting_df$num[is.na(sorting_df$num)] <- Inf # Treat NA as the highest number for sorting

    # Sorting columns based on prefix and then number
    sorted_cols <- sorting_df %>%
      filter(prefix %in% prefix_order) %>%
      arrange(match(prefix, prefix_order), num) %>%
      pull(column)

    # Include columns that did not match any prefix at the end
    non_matching_cols <- setdiff(cols, sorted_cols)
    c(sorted_cols, non_matching_cols)
  }

  prefix_order <- c("bordl", "avoid", "narci", "antso", "obcmp", "histr", "parnd", "depen", "szoid", "stypl")

  sorted_col_names <- sort_columns(sidp_df[,-c(1:4)], prefix_order)

  # perform re-ordering
  sidp_item_df <- sidp_df[, c("id", "interviewer", "start_date", "end_date", sorted_col_names)] %>%
    # remove fake entries if they exist (removed the 6 or so hanging out in Qualtrics on 12/6/23)
    dplyr::filter(!is.na(id), !id %in% c(999, 9999)) %>% arrange(id) %>%
    mutate(across(!c(id, start_date, end_date), ~as.factor(.)))

  ##------------------------------------------------------------------
  ##  Count Symptoms of Each Diagnosis and Compute Dimensional Score
  ##------------------------------------------------------------------


  count_responses <- function(data, prefix) {
    pd_columns <- grep(prefix, names(data), value = TRUE)

    # Count each response type
    data[paste0(prefix, "_sum_not_present")] <- apply(data[pd_columns], 1, function(x) sum(grepl("^1 - Not Present", x), na.rm = TRUE))
    data[paste0(prefix, "_sum_subthreshold")] <- apply(data[pd_columns], 1, function(x) sum(grepl("^2 - Subthreshold", x), na.rm = TRUE))
    data[paste0(prefix, "_sum_present")] <- apply(data[pd_columns], 1, function(x) sum(grepl("^3 - Present", x), na.rm = TRUE))
    data[paste0(prefix, "_sum_strongly_present")] <- apply(data[pd_columns], 1, function(x) sum(grepl("^4 - Strongly Present", x), na.rm = TRUE))
    data[paste0(prefix, "_sum_NA")] <- rowSums(is.na(data[pd_columns]), na.rm = TRUE)

    # Calculate total score
    data[paste0(prefix, "_sum_total")] <- rowSums(data[c(paste0(prefix, "_sum_not_present"),
                                                         paste0(prefix, "_sum_subthreshold"),
                                                         paste0(prefix, "_sum_present"),
                                                         paste0(prefix, "_sum_strongly_present"))],
                                                  na.rm = TRUE)

    # Calculate dimensional score
    data <- data %>% mutate("{prefix}_dimensional_score" :=
                              .data[[paste0(prefix, "_sum_subthreshold")]] * 1 +
                              .data[[paste0(prefix, "_sum_present")]] * 2 +
                              .data[[paste0(prefix, "_sum_strongly_present")]] * 3)

    data <- data %>% rowwise()

    if (prefix == "bordl") {
      data <- data %>% mutate(bordl_dx_dsm = if_else(sum(bordl_sum_present, bordl_sum_strongly_present) >= 5, 1, 0),
                              bordl_dx_neuromap = if_else(sum(bordl_sum_present, bordl_sum_strongly_present) >= 3 & (bordl_6 == "3 - Present" | bordl_6 == "4 - Strongly Present"), 1, 0))
    } else if (prefix == "avoid") {
      data <- data %>% mutate(avoid_dx_dsm = if_else(sum(avoid_sum_present, avoid_sum_strongly_present) >= 4, 1, 0))
    } else if (prefix == "narci") {
      data <- data %>% mutate(narci_dx_dsm = if_else(sum(narci_sum_present, narci_sum_strongly_present) >= 5, 1, 0))
    } else if (prefix == "antso") {
      data <- data %>% mutate(antso_dx_dsm = if_else(sum(antso_sum_present, antso_sum_strongly_present) >= 3 & (grepl("3", antso_c) | grepl("4", antso_c)), 1, 0))
    } else if (prefix == "obcmp") {
      data <- data %>% mutate(obcmp_dx_dsm = if_else(sum(obcmp_sum_present, obcmp_sum_strongly_present) >= 4, 1, 0))
    } else if (prefix == "histr") {
      data <- data %>% mutate(histr_dx_dsm = if_else(sum(histr_sum_present, histr_sum_strongly_present) >= 5, 1, 0))
    } else if (prefix == "parnd") {
      data <- data %>% mutate(parnd_dx_dsm = if_else(sum(parnd_sum_present, parnd_sum_strongly_present) >= 4, 1, 0))
    } else if (prefix == "depen") {
      data <- data %>% mutate(depen_dx_dsm = if_else(sum(depen_sum_present, depen_sum_strongly_present) >= 5, 1, 0))
    } else if (prefix == "szoid") {
      data <- data %>% mutate(szoid_dx_dsm = if_else(sum(szoid_sum_present, szoid_sum_strongly_present) >= 4, 1, 0))
    } else if (prefix == "stypl") {
      data <- data %>% mutate(stypl_dx_dsm = if_else(sum(stypl_sum_present, stypl_sum_strongly_present) >= 5, 1, 0))
    }

    data <- data %>% ungroup()


    # data %>% select(contains(prefix))


    # Remove item columns leaving only summary scores
    data <- data %>% dplyr::select(!all_of(pd_columns))

    return(data)
  }


  # create a copy of item-level data, which we append summaries and then remove
  data <- sidp_summary_df <- sidp_item_df

  item_list <- list()

  for(prefix in prefix_order) {
    item_list[[prefix]] <- sidp_item_df %>% select(id, interviewer, start_date, end_date, contains(prefix))
    sidp_summary_df <- count_responses(sidp_summary_df, prefix)
  }

  return_list <- list(summary = sidp_summary_df,
                      items = item_list)

  return(return_list)
}
PennStateDEPENdLab/dependlab documentation built on April 10, 2024, 5:15 p.m.