R/case_crossover.R

Defines functions case_crossover

Documented in case_crossover

#' Case-crossover analysis
#'
#' @description Performs case-crossover analysis
#'
#' @param case_stay The data.table generated with age_study_period_restrictions() function.
#' @param data A data.table of electronic medical records (EMRs): the same is used in first_case().
#' @param exposure_diagnoses A character vector containing codes to identify 
#' exposure diagnoses in stays.
#' @param exposure_procedures A character vector containing codes to identify 
#' exposure procedures in stays.
#' @param screening_index_stay A Boolean. If TRUE, the case stay is also screened for exposure. 
#' If FALSE, the case stay is ignored.
#' @param unique_exposure A Boolean. If TRUE, patients with multiple stays with exposures are excluded.
#' @param interval_length A scalar giving the duration of each interval.
#' @param number_of_interval A scalar giving the number of intervals to compute.
#' @param wash_out A scalar giving the wash-out period (defaut = 365 days).
#' @param los_max A scalar giving the  maximum possible length for an episode with exposure.
#'
#' @return A list
#' 
#' @export 
#'
#' @examples 
#' data(ep)
#' obj0 <- ep
#' 
#' obj1 <- first_case(
#'   data = obj0,
#'   diagnoses_case =  'PE',
#'   diagnoses_exclusion = 'EXCLUSION',
#'   exclude_in_case_stay = FALSE,
#'   n_of_stays_max = 20
#' )
#' 
#' obj1 
#' 
#' obj2 <- age_study_period_restrictions(
#'   data = obj0,
#'   cases = obj1,
#'   starting_year = 2007,
#'   final_year = 2013,
#'   age_min = 0,
#'   age_max = 120
#' )
#' 
#' obj2
#' 
#' obj3 <- case_crossover(
#'   case_stay = obj2,
#'   data = obj0,
#'   exposure_diagnoses = '',
#'   exposure_procedures = 'THR',
#'   screening_index_stay = FALSE,
#'   unique_exposure = TRUE,
#'   interval_length = 42,
#'   number_of_interval = 8,
#'   wash_out = 365,
#'   los_max = 42
#' )
#' obj3 
#' obj3$graph
#' @export
#' 
#' @details 
#' The exposure is defined as a combination of diagnoses AND medical procedures which 
#' is sought on the one hand during the case period and on the other hand during the 
#' control period (one year earlier). The case period and hence the control period 
#' were split into several intervals. A paired-matched interval approach was used, as 
#' described by Mittleman. Conditional logistic regression was used to compare the likelihood 
#' of exposure during each interval of the case period with that during the control period. 
#' An OR and its 95% confidence interval (CI) was computed for each interval. This ORs reflects 
#' the risk of onset of the primary outcome compared to the basal risk. 
#' He/she also defined the maximum possible length for an episode with exposure. 
#' The case stay was excluded from the exposure screening period, if required. 
#' A patient presenting the exposure in multiple episodes was excluded, if required.


case_crossover <- function(
  
  case_stay,
  data,
  exposure_diagnoses,
  exposure_procedures,
  screening_index_stay,
  unique_exposure,
  interval_length,
  number_of_interval,
  wash_out,
  los_max
  
){
  
  # Control
  for (parameter in c("case_stay","data","screening_index_stay","unique_exposure",
                      "interval_length","number_of_interval","wash_out","los_max")){
    
    if (!exists(x = parameter, inherits = FALSE)){stop(paste(parameter, "missing with no default"))}
    
  }
  
  if (!exists(x = "exposure_diagnoses", inherits = FALSE)){
    stop("exposure_diagnoses missing with no default")
  }
  
  if (!exists(x = "exposure_procedures", inherits = FALSE)){
    stop("exposure_procedures missing with no default")
  }
  
  if(length(exposure_diagnoses) == 1 && exposure_diagnoses == "" && 
     length(exposure_procedures) == 1 && exposure_procedures == ""){
    stop("at least one diagnosis or procedure necessary")
  }
  
  
  # Creation des vecteurs de codes
  if(length(exposure_diagnoses) != 0){
    exposure_diagnoses <- paste(exposure_diagnoses,collapse='|')
  } else {
    exposure_diagnoses <- ''
  }
  
  if(length(exposure_procedures) != 0){
    exposure_procedures <- paste(exposure_procedures,collapse='|')
  } else {
    exposure_procedures <- ''
  }
  
  # Selection des sejours exposes
  stays_exposed <- data %>%
    select(id_stay, 
           id_patient,
           diagnoses,
           procedures,
           admission,
           length_of_stay) %>%
    filter(id_patient %in% case_stay$id_patient) %>%
    filter(diagnoses %like% exposure_diagnoses & procedures %like% exposure_procedures) %>%
    select(id_stay, id_patient, admission, length_of_stay) %>%
    # group_by(id_patient) %>%
    # arrange(id_patient) %>%
    rename(date_of_exposure = admission) 
  
  # Screening index stay for exposure
  if (screening_index_stay){
    
    stays_exposed <- stays_exposed %>%
      select(-id_stay)
    
  }else{
    
    stays_exposed <- stays_exposed %>%
      anti_join(.,case_stay %>% 
                  # group_by() %>% 
                  select(id_stay)
      ) %>%
      select(-id_stay)
    
  }
  
  # Suppression des individus ayant des expositions mutliples
  if (unique_exposure){
    
    stays_exposed <- stays_exposed %>% group_by(id_patient) %>%
      summarise(count = n()) %>%
      filter(count  == 1) %>%
      select(- count) %>%
      left_join(stays_exposed)
    
  }
  
  table(stays_exposed$length_of_stay > los_max)
  
  obs_sequence <- seq(from = 0, to = interval_length * number_of_interval, by = interval_length)
  periode_observation <- c(0 : max(obs_sequence), wash_out : (wash_out + max(obs_sequence)))
  
  
  exposed_cases_table <- left_join(case_stay, stays_exposed, by = "id_patient") %>%
    mutate(time_to_case = case_date - date_of_exposure) %>%
    filter(time_to_case %in% periode_observation) %>%
    select(-case_date, -date_of_exposure) %>%
    filter(length_of_stay <= los_max) %>%
    select(-length_of_stay)
  #   %>%
  #     group_by(id_patient) %>%
  #     arrange(time_to_case) %>%
  #     slice(1L)
  
  clogit_table <- bind_rows(
    x = exposed_cases_table %>%
      mutate(latency = cut(time_to_case, obs_sequence, right = FALSE)) %>%
      select(-time_to_case) %>%
      mutate(latency = ifelse(is.na(latency),'unexposed',latency)) %>%
      group_by(id_patient) %>% arrange(id_patient, latency) %>% slice(1L) %>%
      mutate(cas = TRUE) %>% 
      mutate(latency = as.character(latency)) %>% ungroup(),
    y = exposed_cases_table %>%
      mutate(latency = cut(time_to_case, obs_sequence + wash_out, right = FALSE)) %>%
      select(-time_to_case) %>%
      mutate(latency = ifelse(is.na(latency),'unexposed',latency)) %>%
      group_by(id_patient) %>% arrange(id_patient, latency) %>% slice(1L) %>%
      mutate(cas = FALSE)%>% 
      mutate(latency = as.character(latency)) %>% ungroup()
  )
  
  
  clogit_table <- clogit_table %>%
    mutate(latency = as.factor(latency)) %>%
    mutate(latency = relevel(latency, ref = "unexposed"))
  
  test <- clogit_table %>%
    group_by(cas, latency) %>%
    summarize(count = n()) %>%
    group_by(cas) %>% 
    summarize(count = n()) %>% 
    transmute(test = count == length(obs_sequence))
  
  if(FALSE %in% test[[1]]){
    
    clogit_table <- clogit_table %>%
      group_by(cas, latency) %>%
      summarize(count = n()) %>%
      arrange(latency) %>%
      ungroup()
    
  }else{
    
    levels(clogit_table$latency)[-1] <- 
      paste(obs_sequence-interval_length+1, obs_sequence, sep = ' - ')[-1]
    
    require(survival)
    cond_log_reg <- clogit(
      formula = cas ~ latency + strata(id_patient),
      data = clogit_table
    )
    
    
    clogit_table <- clogit_table %>%
      group_by(cas, latency) %>%
      summarize(count = n()) %>%
      arrange(latency) %>%
      ungroup()
    
    
    clogit_table <- bind_cols(
      clogit_table %>%
        filter(cas == TRUE) %>%
        select(Period = latency, Case = count),
      clogit_table %>%
        filter(cas == FALSE) %>%
        select(Control = count) %>%
        mutate(Odds_ratio = 
                 c('reference',
                   round(exp(cond_log_reg$coefficients),2))) %>%
        mutate(CI95_inf = rbind(c('',''),round(exp(confint(cond_log_reg)),2))[,1]) %>%
        mutate(CI95_sup = rbind(c('',''),round(exp(confint(cond_log_reg)),2))[,2])
    )
    
    
    
    return(
      list(
        table = clogit_table,
        graph = plot_odds(cond_log_reg),
        exposed = exposed_cases_table
      )
    )
    
    
  }
  
  
}
jomuller/ITCARES documentation built on May 19, 2019, 7:26 p.m.