R/runAnalysis.R

Defines functions runAnalysis

Documented in runAnalysis

## ----setup, message=FALSE, warning=FALSE, results='hide'--------------------------------------------------------------------------------
# my_packages <- c('tidyverse', 'readxl', 'DT', 'rcartocolor', 'gghighlight',
#                  'cowplot', 'broom', 'devtools', 'tableone')
#
# existing_pkgs <- installed.packages()[,"Package"]
# to_install <- setdiff(my_packages, existing_pkgs)
# if (length(to_install)) install.packages(to_install)
# if (!'icd' %in% existing_pkgs) devtools::install_github('jackwasey/icd')

# lapply(my_packages, library, character.only = TRUE)
# theme_set(theme_bw() +
#             theme(legend.title = element_blank(),
#                   panel.grid.minor = element_blank()))
# for (r_file in list.files('R', full.names = TRUE)) source(r_file)

#' Run analysis
#'
#' @param mysite Your site acronym
#' @param mask_thres Obfuscation small count mask threshold (e.g. 10)
#' @param blur_abs Absolute max of obfuscation blurring range
#' @param include_race Boolean. Whether race should be included
#' in the regression model.
#' @param out_dir Optional. Directory where resulting outputs are located.
#' Defaults to 'output'.
#' @param data_dir Optional. Directory where datasets are located.
#' Defaults to 'Input'.
#'
#' @return NULL. Result files are written out to the `out_dir` directory.
#'
#' @export
#' @import dplyr
#' @importFrom stats cor glm lm median sd
#' @importFrom forcats fct_recode fct_reorder
#' @importFrom tidyr pivot_longer pivot_wider replace_na
#'
runAnalysis <-
  function(mysite,
           mask_thres,
           blur_abs,
           include_race = TRUE,
           out_dir = 'output',
           data_dir = 'Input') {
    # count mask threshold
    # absolute max of blurring range
    clin_raw <-
      readr::read_csv(
        file.path(data_dir, 'LocalPatientClinicalCourse.csv'),
        col_types = list(patient_num = readr::col_character())
      )
    demo_raw <-
      readr::read_csv(
        file.path(data_dir, 'LocalPatientSummary.csv'),
        col_types = list(patient_num = readr::col_character()),
        na = '1900-01-01'
      )
    obs_raw <-
      readr::read_csv(
        file.path(data_dir, 'LocalPatientObservations.csv'),
        col_types = list(patient_num = readr::col_character())
      )

    ## -------------------------------------------------------------------------
    neuro_patients <- obs_raw %>%
      filter(days_since_admission >= 0,
             concept_code %in% neuro_icds_10$icd) %>%
      distinct(patient_num, concept_code)

    neuro_pt_post <- unique(neuro_patients$patient_num)

    non_neuro_patients <-
      data.frame(patient_num = setdiff(demo_raw$patient_num, neuro_pt_post)) %>%
      mutate(concept_code = 'NN')

    readmissions <- clin_raw %>%
      group_by(patient_num) %>%
      mutate(delta_hospitalized = diff(c(in_hospital[1], in_hospital))) %>%
      ungroup() %>%
      filter(delta_hospitalized != 0,
             in_hospital == 1) %>%
      add_count(patient_num, name = 'n_readmissions') %>%
      arrange(desc(n_readmissions)) %>%
      select(patient_num, n_readmissions) %>%
      distinct()

    ## -------------------------------------------------------------------------
    days_count_min_max <- obs_raw %>%
      group_by(patient_num) %>%
      summarise(
        distinct_days = n_distinct(days_since_admission),
        min_hos = min(days_since_admission),
        .groups = 'drop'
      )

    demo_df <- demo_raw %>%
      mutate(
        time_to_severe = severe_date - admission_date,
        time_to_severe = ifelse(time_to_severe < 0, NA, time_to_severe),
        time_to_death = death_date - admission_date,
        time_to_death = ifelse(time_to_death < 0, NA, time_to_death),
        readmitted = patient_num %in% readmissions$patient_num,
        neuro_post = patient_num %in% neuro_pt_post %>%
          as.factor() %>%
          fct_recode(neuro_cond = "TRUE",
                     no_neuro_cond = "FALSE"),
        Survival = as.factor(deceased) %>%
          fct_recode(Alive = "0", Deceased = "1"),
        sex = as.factor(sex),
        race = as.factor(race),
        age_group = as.factor(age_group),
        Severity = as.factor(severe) %>%
          fct_recode(Severe = "1", `Non-severe` = "0"),
        n_stay = as.numeric(last_discharge_date - admission_date,
                            units = "days")
      ) %>%
      left_join(days_count_min_max, by = 'patient_num')

    ## -------------------------------------------------------------------------
    vars_to_obfs <- c("sex",
                      'age_group',
                      "race",
                      "Severity",
                      "Survival",
                      "readmitted")
    get_stats <-
      function(x)
        demo_stats(demo_df, x, blur_abs, mask_thres)
    demo_obfus_table <- lapply(vars_to_obfs, get_stats) %>%
      do.call(rbind, .)

    ## -------------------------------------------------------------------------
    nstay_obfus_table <- nstay_stats(demo_df, blur_abs, mask_thres)
    severity_obfus_table <-
      severity_stats(demo_df, blur_abs, mask_thres)
    survival_obfus_table <-
      survival_stats(demo_df, blur_abs, mask_thres)
    readmission_obfus_table <- demo_df %>%
      left_join(readmissions, by = 'patient_num') %>%
      replace_na(list(n_readmissions = 0)) %>%
      readmission_stats(blur_abs, mask_thres)

    ## -------------------------------------------------------------------------
    n_patients <- nrow(demo_raw)
    obs_raw <- obs_raw %>%
      filter(concept_type %in% c("DIAG-ICD10", "DIAG-ICD9"))

    ## -------------------------------------------------------------------------
    # for elixhauser
    comorb_names_elix <- get_quan_elix_names()
    comorbs_elix <- as.vector(comorb_names_elix$Abbreviation)

    # t1: earliest time point to consider comorbidities
    # t2: latest time point to consider comorbidities
    # example <- t1 = -365, and t2 = -1 will map all all codes up to a year prior but before admission (admission = day 0)

    comorb_elix <- map_char_elix_codes(
      df = obs_raw,
      comorb_names = comorb_names_elix,
      t1 = -365,
      t2 = -15,
      map_type = 'elixhauser'
    )

    ## -------------------------------------------------------------------------
    # elixhauser
    index_scores_elix <- comorb_elix$index_scores %>%
      rename('elixhauser_score' = van_walraven_score)
    # van Walraven is a modification of Elixhauser comorbidity measure
    # doi.org/10.1097/MLR.0b013e31819432e5
    mapped_codes_table_elix <- comorb_elix$mapped_codes_table
    comorb_names_elix$Abbreviation <-
      as.character(comorb_names_elix$Abbreviation)

    ## -------------------------------------------------------------------------
    n_comorbs <- colSums(index_scores_elix[, comorbs_elix])
    pos_comorbs <- names(n_comorbs[n_comorbs > 0])
    elix_mat <- cor(index_scores_elix[, pos_comorbs])
    comorb_unique <- index_scores_elix %>%
      select(patient_num, elixhauser_score) %>%
      left_join(demo_df, by = 'patient_num')

    elix_obfus_table <-
      elix_stats(comorb_unique, blur_abs, mask_thres)

    other_obfus_table <-
      bind_rows(
        nstay_obfus_table,
        readmission_obfus_table,
        severity_obfus_table,
        survival_obfus_table,
        elix_obfus_table
      )

    ## -------------------------------------------------------------------------
    scores_unique <-
      left_join(index_scores_elix, demo_df, by = 'patient_num')

    scores_neuro <- obs_raw %>%
      # 1 patient can have different code but each only counted once
      distinct(patient_num, concept_code) %>%
      left_join(neuro_icds_10, by = c('concept_code' = 'icd')) %>%
      left_join(scores_unique, by = 'patient_num') %>%
      filter(!is.na(elixhauser_score)) %>%
      mutate(
        concept_code = case_when(
          is.na(`Neurological Disease Category`) ~ 'NN',
          TRUE ~ concept_code
        ) %>%
          as.factor() %>%
          fct_reorder(-elixhauser_score),
        `Neurological Disease Category` =
          as.factor(`Neurological Disease Category`) %>%
          fct_reorder(elixhauser_score)
      ) %>%
      {
        .
      }

    elix_obfus_table1 <-
      Reduce(
        function(...)
          left_join(..., by = c("Comorbidity", "Abbreviation")),
        lapply(
          c('no_neuro_cond', 'neuro_cond'),
          list_table1,
          df = scores_unique,
          num_pats = nrow(demo_df),
          comorb_names = comorb_names_elix,
          blur_abs = blur_abs,
          mask_thres = mask_thres
        )
      ) %>%
      mutate(n_Total = n_no_neuro_cond + n_neuro_cond,
             prop_Total = n_Total / nrow(demo_raw)) %>%
      arrange(desc(n_Total))


    ## -------------------------------------------------------------------------
    severe_reg_elix <-
      glm(
        severe ~ neuro_post + elixhauser_score + sex + age_group + race,
        data = scores_unique,
        family = 'binomial'
      )

    deceased_reg_elix <-
      glm(
        deceased ~ neuro_post + elixhauser_score + sex + age_group + race,
        data = scores_unique,
        family = 'binomial'
      )

    n_stay_reg_elix <-
      lm(n_stay ~ neuro_post + elixhauser_score + race + sex + age_group,
         data = scores_unique)


    ## -------------------------------------------------------------------------
    nstay_df <- neuro_patients %>%
      bind_rows(non_neuro_patients) %>%
      left_join(demo_df, by = 'patient_num') %>%
      left_join(index_scores_elix, by = 'patient_num') %>%
      # mutate(concept_code = fct_reorder(concept_code, n_stay)) %>%
      left_join(neuro_icds_10, by = c('concept_code' = 'icd')) %>%
      mutate(
        full_icd = case_when(
          concept_code == 'NN' ~ 'No neurological condition',
          TRUE ~ paste0(`ICD-10 Description`, ' (', concept_code, ')')
        ) %>%
          as.factor() %>% fct_reorder(n_stay)
      )

    summarised_obfus_icd <- nstay_df %>%
      group_by(concept_code) %>%
      summarise(
        mean_stay = mean(n_stay),
        median_stay = median(n_stay),
        sd_stay = sd(n_stay),
        mean_elix = mean(elixhauser_score, na.rm = TRUE),
        median_elix = median(elixhauser_score, na.rm = TRUE),
        sd_elix = sd(elixhauser_score, na.rm = TRUE),
        n_patients = n(),
        prop_deceased = mean(deceased),
        prop_severe = mean(severe),
        .groups = 'drop'
      ) %>%
      blur_it('n_patients', blur_abs, mask_thres)


    ## ----save-results-----------------------------------------------------------------------------------------------------------------------
    list_results = list(
      demo_table = demo_obfus_table,
      other_obfus_table = other_obfus_table,
      elix_obfus_table1 = elix_obfus_table1,
      summarised_obfus_icd = summarised_obfus_icd
    )

    list_results <-
      lapply(list_results, function(x)
        mutate(x, site = mysite))
    list_results <- c(
      list_results,
      list(
        site = mysite,
        elix_mat = elix_mat,
        n_stay_reg_elix = n_stay_reg_elix,
        severe_reg_elix = severe_reg_elix,
        deceased_reg_elix = deceased_reg_elix
      )
    )


    ## -------------------------------------------------------------------------
    ### Part 2: PNS vs CNS

    ## -------------------------------------------------------------------------
    neuro_types <- c('None', 'Peripheral', 'Central', 'Both')

    neuro_patients <- obs_raw %>%
      filter(days_since_admission >= 0, ) %>%
      right_join(neuro_icds_10, by = c('concept_code' = 'icd')) %>%
      filter(!is.na(patient_num)) %>%
      distinct(patient_num, concept_code, pns_cns) %>%
      group_by(patient_num) %>%
      mutate(nerv_sys_count = length(unique(pns_cns))) %>%
      ungroup() %>%
      mutate(neuro_type = case_when(nerv_sys_count == 2 ~ 'Both',
                                    TRUE ~ as.character(pns_cns)))

    patients_pns_cns <- neuro_patients %>%
      distinct(patient_num, neuro_type)

    neuro_pt_post <- unique(neuro_patients$patient_num)

    non_neuro_patients <-
      data.frame(patient_num = setdiff(demo_raw$patient_num, neuro_pt_post)) %>%
      mutate(concept_code = 'NN')

    readmissions <- clin_raw %>%
      group_by(patient_num) %>%
      mutate(delta_hospitalized = diff(c(in_hospital[1], in_hospital))) %>%
      ungroup() %>%
      filter(delta_hospitalized != 0,
             in_hospital == 1) %>%
      add_count(patient_num, name = 'n_readmissions') %>%
      arrange(desc(n_readmissions)) %>%
      select(patient_num, n_readmissions) %>%
      distinct()


    ## -------------------------------------------------------------------------
    days_count_min_max <- obs_raw %>%
      group_by(patient_num) %>%
      summarise(
        distinct_days = n_distinct(days_since_admission),
        min_hos = min(days_since_admission),
        .groups = 'drop'
      )

    demo_df <- demo_raw %>%
      mutate(
        time_to_severe = severe_date - admission_date,
        time_to_severe = ifelse(time_to_severe < 0, NA, time_to_severe),
        time_to_death = death_date - admission_date,
        time_to_death = ifelse(time_to_death < 0, NA, time_to_death),
        readmitted = patient_num %in% readmissions$patient_num,
        Survival = as.factor(deceased) %>%
          fct_recode(Alive = "0", Deceased = "1"),
        sex = as.factor(sex),
        race = as.factor(race),
        age_group = as.factor(age_group),
        Severity = as.factor(severe) %>%
          fct_recode(Severe = "1", `Non-severe` = "0"),
        n_stay = as.numeric(last_discharge_date - admission_date,
                            units = "days")
      ) %>%
      left_join(select(neuro_patients, patient_num, neuro_type),
                by = 'patient_num') %>%
      replace_na(list(neuro_type = 'None')) %>%
      mutate(neuro_post = forcats::fct_relevel(neuro_type, neuro_types)) %>%
      left_join(days_count_min_max, by = 'patient_num')


    ## -------------------------------------------------------------------------
    get_stats <-
      function(x)
        demo_stats(demo_df, x, blur_abs, mask_thres)

    demo_obfus_table <- lapply(vars_to_obfs, get_stats) %>%
      do.call(rbind, .)


    ## -------------------------------------------------------------------------
    nstay_obfus_table <- nstay_stats(demo_df, blur_abs, mask_thres)
    severity_obfus_table <-
      severity_stats(demo_df, blur_abs, mask_thres)
    survival_obfus_table <-
      survival_stats(demo_df, blur_abs, mask_thres)
    readmission_obfus_table <- demo_df %>%
      left_join(readmissions, by = 'patient_num') %>%
      replace_na(list(n_readmissions = 0)) %>%
      readmission_stats(blur_abs, mask_thres)

    other_obfus_table <-
      bind_rows(
        nstay_obfus_table,
        readmission_obfus_table,
        severity_obfus_table,
        survival_obfus_table
      )

    ## -------------------------------------------------------------------------
    scores_unique <-
      left_join(index_scores_elix, demo_df, by = 'patient_num')

    scores_neuro <- obs_raw %>%
      # 1 patient can have different code but each only counted once
      distinct(patient_num, concept_code) %>%
      left_join(neuro_icds_10, by = c('concept_code' = 'icd')) %>%
      left_join(scores_unique, by = 'patient_num') %>%
      filter(!is.na(elixhauser_score)) %>%
      mutate(
        concept_code = case_when(
          is.na(`Neurological Disease Category`) ~ 'NN',
          TRUE ~ concept_code
        ) %>%
          as.factor() %>%
          fct_reorder(-elixhauser_score),
        `Neurological Disease Category` =
          as.factor(`Neurological Disease Category`) %>%
          fct_reorder(elixhauser_score)
      ) %>%
      {
        .
      }


    elix_obfus_table1 <-
      Reduce(
        function(...)
          left_join(..., by = c("Comorbidity", "Abbreviation")),
        lapply(
          neuro_types,
          list_table1,
          df = scores_unique,
          num_pats = nrow(demo_df),
          comorb_names = comorb_names_elix,
          blur_abs = blur_abs,
          mask_thres = mask_thres
        )
      ) %>%
      mutate(
        n_Total = n_None + n_Central + n_Peripheral + n_Both,
        prop_Total = n_Total / nrow(demo_raw)
      ) %>%
      arrange(desc(n_Total))

    ## -------------------------------------------------------------------------
    if (include_race){
      severe_reg_elix <-
        glm(
          severe ~ neuro_post + elixhauser_score + sex + age_group + race,
          data = scores_unique,
          family = 'binomial'
        )
      deceased_reg_elix <-
        glm(
          deceased ~ neuro_post + elixhauser_score + sex + age_group + race,
          data = scores_unique,
          family = 'binomial'
        )

      n_stay_reg_elix <-
        lm(n_stay ~ neuro_post + elixhauser_score + race + sex + age_group,
           data = scores_unique)
    } else {
      severe_reg_elix <-
        glm(
          severe ~ neuro_post + elixhauser_score + sex + age_group,
          data = scores_unique,
          family = 'binomial'
        )
      deceased_reg_elix <-
        glm(
          deceased ~ neuro_post + elixhauser_score + sex + age_group,
          data = scores_unique,
          family = 'binomial'
        )

      n_stay_reg_elix <-
        lm(n_stay ~ neuro_post + elixhauser_score + sex + age_group,
           data = scores_unique)
    }


    ## ----save-results---------------------------------------------------------
    list_results_cpns = list(
      demo_table = demo_obfus_table,
      other_obfus_table = other_obfus_table,
      elix_obfus_table1 = elix_obfus_table1,
      summarised_obfus_icd = summarised_obfus_icd
    )

    list_results_cpns <-
      lapply(list_results_cpns, function(x)
        mutate(x, site = mysite))
    list_results_cpns <- c(
      list_results_cpns,
      list(
        site = mysite,
        elix_mat = elix_mat,
        n_stay_reg_elix = n_stay_reg_elix,
        severe_reg_elix = severe_reg_elix,
        deceased_reg_elix = deceased_reg_elix
      )
    )

    results <- list(list_results = list_results,
                    list_results_cpns = list_results_cpns)
    site_results <- paste0(mysite, '_results')
    assign(site_results, results)
    save(list = site_results,
         file = file.path(out_dir, paste0(mysite, '-results.rda')))


  }
trang1618/Phase2.1NeuroRPackage documentation built on Jan. 27, 2021, 8:03 p.m.