R/preprocess_umls_concepts.R

#' preprocess_umls_concepts
#' @export
preprocess_umls_concepts <- function(concepts, nb_occurrences) {
  concepts <- concepts %>%
    #dplyr::group_by(PATIENT_NUM, CODE, CODE_LABEL, group, SEX, CODE_LABEL_EN, PARENT_LABEL_EN) %>%
    dplyr::group_by(PATIENT_NUM,CODE, group, SEX, CODE_LABEL, PARENT_LABEL) %>%
    dplyr::summarise(count = n()) %>%
    dplyr::filter(count >= nb_occurrences) %>%
    dplyr::ungroup()
  
  
  
  # concepts <- concepts %>% group_by(PATIENT_NUM, CODE)  %>% slice(1) %>% ungroup()
  # concepts <- concepts %>% distinct(PATIENT_NUM, CODE, CODE_LABEL,CHEMIN_LIBELLE, group, SEXE, CODE_LABEL_EN, PARENT_LABEL_EN) %>% ungroup()
  concepts$count = 1
  
  # concepts$CHEMIN_LENGTH <- concepts$CHEMIN_LIBELLE %>% 
  #   stringr::str_match_all('/') %>% 
  #   purrr::map(length) %>% 
  #   unlist()
  #concepts <- concepts %>% 
  #  filter(CHEMIN_LIBELLE > 1) 
  #mutate(CHEMIN_LIBELLE = ifelse (CHEMIN_LENGTH == 1, 'Autres /', CHEMIN_LIBELLE))
  # concepts$CAT <- concepts$CHEMIN_LIBELLE %>% 
  #   stringr::str_extract("([\\w -(),]*)/") %>% 
  #   stringr::str_replace('/', '') %>% 
  #   stringr::str_trim() %>% 
  #   stringr::str_to_lower()
  
  #concepts$CODE_LABEL <- ifelse(!is.na(concepts$CODE_LABEL_EN), concepts$CODE_LABEL_EN, concepts$CODE_LABEL)
  concepts$CODE_LABEL <- stringr::str_to_title(concepts$CODE_LABEL)
  concepts$CAT <-stringr::str_to_title(concepts$PARENT_LABEL)
  #concepts <- concepts %>% dplyr::select(-CODE_LABEL_EN, -PARENT_LABEL_EN)
  concepts
}
aneuraz/multiWAS documentation built on May 14, 2019, 2:37 p.m.