R/preprocess_bio_concepts.R

#' preprocess_bio_concepts
#' @export
preprocess_bio_concepts <- function(bio, type='all', nb_occurrences = 1, map_loinc = TRUE) {
  
  if (type == 'all') {
    bio <-  bio %>% group_by(PATIENT_NUM, CODE, CODE_LABEL, PARENT_LABEL,SEX,BIRTH_YEAR, group) %>%
      summarise(INF=sum(INF), SUP = sum(SUP), N = n()) %>%
      mutate(freq_inf = INF/N, freq_sup = SUP/N, not_norm = INF+SUP, freq_not_norm = (INF+SUP)/N) %>%
      dplyr::filter(not_norm >= nb_occurrences)
  } else if (type == 'encounter') {
    bio <- bio %>% group_by(PATIENT_NUM, ENCOUNTER_NUM, CODE, CODE_LABEL, PARENT_LABEL,SEX,BIRTH_YEAR, group) %>%
      summarise(INF = ifelse(sum(INF) > 0, 1, 0), SUP = ifelse(sum(SUP) > 0, 1, 0)) %>% 
      ungroup() 
    
    bio <- bio %>%
      group_by(PATIENT_NUM, CODE, CODE_LABEL, PARENT_LABEL,SEX,BIRTH_YEAR, group) %>%
      summarise(INF = sum(INF), SUP = sum(SUP), N = n()) %>%
      mutate(freq_inf = INF/N, freq_sup = SUP/N, not_norm = INF+SUP, freq_not_norm = (INF+SUP)/N) %>%
      dplyr::filter(not_norm >= nb_occurrences)
  } 
  
  if(map_loinc) {
    
    bio <- bio %>%
      dplyr::left_join(anabio_loinc, by = c('CODE' = 'IndexAP'))
    
    bio$CODE <- ifelse(!is.na(bio$codeLoinc), bio$codeLoinc, bio$CODE)
    bio$CODE_LABEL <- ifelse(!is.na(bio$STR), bio$STR, bio$CODE_LABEL)
    bio$PARENT_LABEL <- ifelse (!is.na(bio$Discipline_eng), bio$Discipline_eng, bio$PARENT_LABEL)
    
    bio <- dplyr::select(bio, -`Libelle AP`,-Discipline, -codeLoinc, -`Libelle LOINC`, -CUI, -STR, -Discipline_eng)
    
  }
  
  return (bio)
  
}
aneuraz/multimodalPhewas documentation built on May 29, 2019, 4:50 p.m.