R/ImportFunctionsModular.R

Defines functions process.outcome.data process.laboratory.data process.vital.sign.data process.treatment.icu.data process.common.treatment.data process.treatment.data process.ICU.data process.pregnancy.data process.symptom.data process.comorbidity.data import.symptom.and.comorbidity.data import.microb.data import.demographic.data

Documented in import.demographic.data import.microb.data import.symptom.and.comorbidity.data process.common.treatment.data process.comorbidity.data process.ICU.data process.laboratory.data process.outcome.data process.pregnancy.data process.symptom.data process.treatment.data process.treatment.icu.data process.vital.sign.data

#' Import demographic data
#' @param file.name Path of the demographics data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble 
#' @return Formatted demographic data as a tibble or \code{dtplyr_step}
#' @export import.demographic.data



import.demographic.data <- function(file.name, dtplyr.step = FALSE){
  
  wdi_dat <- WDI(indicator = c("NY.GDP.PCAP.KD", "SP.DYN.LE00.IN", "SP.DYN.IMRT.IN"), 
                 start = 2020, end = 2020, extra = TRUE)%>%
                  filter(region != "Aggregates")%>%
                  select("Alpha_3"=iso3c,income,region)

  
  country.lookup <- ISOcodes::ISO_3166_1 %>% as_tibble%>%
    mutate(Name=case_when(!is.na(Common_name)~Common_name,
                          Name=="Lao People's Democratic Republic"~"Lao PDR",
                          TRUE~Name))%>%select(Alpha_3, Name)%>%left_join(wdi_dat)
  #regexp <- "[[:digit:]]+"  # process string
  
  out <- dm %>%
    ###delete patients duplicates
    group_by(usubjid) %>% 
    mutate(count=1)%>% 
    mutate(n = sum(count)) %>%
    filter(n == 1) %>%
    ungroup()%>%
    rename(date_admit=rfstdtc)%>%
    as.data.frame()%>%
    mutate(age_d=case_when(ageu=="MONTHS"~12,
                           ageu=="YEARS" ~ 1,
                           ageu=="DAYS" ~ 365.25,
                           TRUE~ NA_real_))%>%
    mutate(age2=age/age_d)%>%
    select(-(age))%>%
    rename(age=age2)%>%
    mutate(age=replace(age,age<0,NA))%>%
    mutate(ethnic = iconv(ethnic, to ="ASCII//TRANSLIT") %>% tolower()) %>%
    mutate(ethnic = str_remove_all(ethnic, "\\s*\\([^)]*\\)")) %>%
    mutate(ethnic = str_replace_all(ethnic, " - ", "_")) %>%
    mutate(ethnic = str_replace_all(ethnic, "-", "_")) %>%
    mutate(ethnic = str_replace_all(ethnic, "/| / ", "_")) %>%
    mutate(ethnic = str_replace_all(ethnic, " ", "_")) %>%
    mutate(ethnic = str_replace_all(ethnic, ",", "_")) %>%
    mutate(ethnic = replace(ethnic, ethnic == "n_a" | ethnic == "na" | ethnic == "", NA))%>%
    #mutate(studyid=substr(usubjid,1, 7))%>%
    mutate(siteid_final=invid)%>%
    mutate(siteid_final=case_when(invid=="00741cca_network"~ substr(subjid,1, 12),
                                  invid=="227inserm"~ sub("\\-.*", "",subjid),
                                  invid=="00689us_nhlbi_peta"~ sub("\\-.*", "",subjid),
                                  invid==""~studyid,
                                  studyid=="CVPRQTA"~"CVPRQTA",
                                  TRUE~invid))%>%
    mutate(sex = case_when(sex == "M" ~ "Male",
                           sex == "F" ~ "Female",
                           TRUE ~ NA_character_))%>%
    mutate(date_admit=substr(date_admit,1, 10))%>%
    mutate(date_admit=as_date(date_admit))%>%
    mutate(date_admit=replace(date_admit,date_admit >date_pull,NA))%>%
    select(usubjid, studyid, siteid_final, date_admit, age, sex, ethnic, country)
  
  site_id_country<-out%>%
    mutate(country=ifelse(siteid_final=="321cub_erasme__bru","BEL",country))%>% 
    mutate(country=ifelse(siteid_final=="435civil_hospital","BEL",country))%>%
    mutate(country=ifelse(siteid_final=="00657hospital_de_c","PRT",country))%>% 
    mutate(country=ifelse(siteid_final==" 00727clinica_unive","COL",country))%>%
    mutate(country=ifelse(siteid_final=="00580netcare_unita","ITA",country))%>%
    mutate(country=ifelse(siteid_final=="00835consortium_im","POL",country))%>%
    mutate(country=ifelse(siteid_final=="00831nicvd_dhaka","BGD",country))%>%
    mutate(country = replace(country, country == "", NA)) %>%
    left_join(country.lookup, by = c("country" = "Alpha_3")) %>%
    select(-country) %>%
    rename(country = Name) %>%
    filter(!is.na(country))%>%
    arrange(desc(country, income, siteid_final))%>%
    distinct(siteid_final, .keep_all =T)%>% 
    select(siteid_final, 'country_2'=country,income, region)
  
  out<-out%>% 
    left_join(site_id_country)%>%
    mutate(country=country_2)%>%select(-c(country_2))
  
  if(dtplyr.step){
    return(out)
  } else {
    return(out %>% as_tibble())
  }
}



#' Import microb data
#' @param file.name Path of the microbio data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble 
#' @return Formatted demographic data as a tibble or \code{dtplyr_step}
#' @export import.microb.data


import.microb.data <- function(file.name, dtplyr.step = FALSE){
  
  #mb<-shared.data.import(file.name, dtplyr.step = TRUE)
  mb<-mb
  
  detection<- mb%>%
    #select(usubjid,mbtestcd,mbtest,mbtstdtl,mbcat,mbstresc,mbspec,mbloc,mbmethod)%>%
    filter(mbtstdtl=="DETECTION")%>%
    filter(mbtestcd=="CRONAVIR"|mbtestcd=="SARSCOV2")%>%
    mutate(mbstresc = case_when(mbstresc == "NO" ~ "NEGATIVE",
                                mbstresc == "NEGATIVE" ~ "NEGATIVE",
                                mbstresc == "POSITIVE" ~ "POSITIVE",
                                TRUE ~ NA_character_)) %>%
    mutate(mbtestcd = paste0("cov_det_",mbtestcd)%>% tolower%>%str_replace_all(" ", "_")) %>%
    arrange(desc(mbstresc))%>%
    distinct(usubjid, mbtestcd, .keep_all =T)%>% 
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = mbtestcd, values_from = mbstresc) %>%
    as.data.frame()
  
  
  identification<-mb%>%
    filter(mbtstdtl=="IDENTIFICATION")%>%
    distinct(usubjid, mbstresc, .keep_all =T)%>% 
    filter(mbstresc=="SEVERE ACUTE RESPIRATORY SYNDROME CORONAVIRUS 2"|
             mbstresc=="CORONAVIRIDAE")%>%
    mutate(mbstresc=replace(mbstresc,mbstresc=="SEVERE ACUTE RESPIRATORY SYNDROME CORONAVIRUS 2","SARSCOV2"))%>%
    mutate(mbstresc=replace(mbstresc,mbstresc=="SEVERE ACUTE RESPIRATORY SYNDROME-RELATED CORONAVIRUS","SARSCOV2"))%>%
    
    mutate(mbstresc=replace(mbstresc,mbstresc=="CORONAVIRIDAE","CRONAVIR"))%>%
    mutate(result="POSITIVE")%>%
    mutate(mbstresc = paste0("cov_id_",mbstresc)%>%
             tolower%>%
             str_replace_all(" ", "_")) %>%
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = mbstresc, values_from = result) %>%
    as.data.frame()
  
  out<-full_join(detection,identification)%>%
    mutate(cov_det_id="NEGATIVE")%>%
    mutate(cov_det_id=case_when(cov_det_cronavir=="POSITIVE"|
                                  cov_det_sarscov2=="POSITIVE"|
                                  cov_id_cronavir=="POSITIVE"|
                                  cov_id_sarscov2=="POSITIVE"~
                                  "POSITIVE",
                                is.na(cov_det_cronavir)&
                                  is.na(cov_det_sarscov2)&
                                  is.na(cov_id_cronavir)&
                                  is.na(cov_id_sarscov2)~
                                  NA_character_,
                                TRUE~cov_det_id))
  
  
  if(dtplyr.step){
    return(out)
  } else {
    return(out %>% as_tibble())
  }
}

#' Import data on symptoms and comorbidities
#' @param file.name Path of the symptoms data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble stringr
#' @return Formatted comorbidity and symptom data as a tibble or \code{dtplyr_step}
#' @export import.symptom.and.comorbidity.data
import.symptom.and.comorbidity.data <- function(file.name, dtplyr.step = TRUE){
  
  #out <- shared.data.import(file.name, 
  #                         dtplyr.step = TRUE, 
  #                        immutable = TRUE) %>% # this will often by used twice, so should be immutable
  
  out<-sa%>%
    
    #imp_sa<-sa%>%
    select(usubjid, saterm, sacat,  samodify, sapresp, saoccur, sastdtc) %>%
    mutate(sacat=case_when(
      saterm=="CLINICALLY-DIAGNOSED COVID-19"~"CLINICALLY-DIAGNOSED COVID-19",
      TRUE~sacat))%>%
    filter(
          sacat=="MEDICAL HISTORY"|
      sacat=="SIGNS AND SYMPTOMS AT HOSPITAL ADMISSION"
      |sacat=="CLINICALLY-DIAGNOSED COVID-19"
           )%>%
    #mutate(sacat=replace(sacat,saterm=="MALNUTRITION","MEDICAL HISTORY"))%>%#temporary correction
    #filter( sapresp=="Y") %>%
    mutate(saoccur = case_when(saoccur == "Y" ~ TRUE,
                               saoccur == "N" ~ FALSE,
                               TRUE ~ NA)) %>%
    filter(!is.na(saoccur)) %>%
    mutate(saterm=toupper(saterm))%>%
    mutate(saterm=case_when(samodify!=""|is.na(samodify)~samodify,
                            TRUE ~ saterm))%>%#to add
    mutate(saterm=case_when(saterm%like%'CARDIAC ARRHYTHMIA'~'CHRONIC CARDIAC DISEASE',
                            saterm%like%'CARDIAC DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm%like%'CHORNIC CARDIAC DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm%like%'CHRONIC HEART DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm%like%'CONGENITAL CA'~'CHRONIC CARDIAC DISEASE',
                            saterm%like%'CONGENTIAL CARDIOPATHY'~'CHRONIC CARDIAC DISEASE',
                            saterm=='CORONARY DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm=='HEART FAILURE'~'CHRONIC CARDIAC DISEASE',
                            saterm=='OROVALVA DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm=='RHEUMATIC HEART DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm=='VALVULAR HEART DISEASE'~'CHRONIC CARDIAC DISEASE',
                            saterm=='CONGESTIVE HEART FAILURE'~'CHRONIC CARDIAC DISEASE',
                            saterm=='CORONARY ARTERY DISEASE'~'CHRONIC CARDIAC DISEASE',
                            TRUE~saterm))%>%
    mutate(saterm=case_when(saterm=='CHRONIC DIALYSIS'~'CHRONIC KIDNEY DISEASE',
                            saterm%like%'DEPRESSION'~'PSYCHIATRIC CONDITION',
                            saterm%like%'PSYCHOSIS'~'PSYCHIATRIC CONDITION',
                            saterm%like%'DYSLIPIDEMIA'~'CHRONIC METABOLIC DISORDER',
                            saterm%like%'HYPOTHYROIDISM'~'CHRONIC ENDOCRINE DISORDER NON DIABITES',
                            saterm%like%'HEPATITIS'~'LIVER DISEASE',	
                            saterm%like%'MARASUMAS'~'MALNUTRITION',	
                            saterm=='SAM UNDEFINED'~'MALNUTRITION',	
                            saterm=='MIXED MARASMIC-KWASH'~'MALNUTRITION',	
                            saterm=='OSA/ HOME CPAP/BI-PAP USE'~'OBESITY',
                            saterm=='PAOD'~'OTHER COMORBIDITIES',
                            saterm=='PEPTIC ULCER DISEASE EXCLUDING BLEEDING'~'OTHER COMORBIDITIES',
                            saterm=='PARALYSIS'~'CHRONIC NEUROLOGICAL DISORDER',
                            saterm=='STROKE OR OTHER NEUROLOGICAL DISORDERS'~'CHRONIC NEUROLOGICAL DISORDER',
                            saterm=='PULMONARY CIRCULATION DISORDER'~'CHRONIC CARDIAC DISEASE',
                            saterm%like%'ARRHYTHMIA'~'CHRONIC CARDIAC DISEASE',
                            
                            saterm=='SUBSTANCE USE DISORDER'~'SUBSTANCE MISUSE',	
                            saterm=='VENOUS THROMBOEMBOLISM- DVT/PE'~'THROMBOLIC DISORDERS',
                            
                            saterm=='CHILLS/RIGORS'~'RIGOR OR SWEATING',
                            saterm=='NIGHT SWEAT'~'RIGOR OR SWEATING',
                            
                            saterm=='CONGESTION/RHINORRHEA'~'RUNNY NOSE',
                            saterm=='CONJUNCTIVAL CONGESTION'~'UPPER RESPIRATORY TRACT SYMPTOMS',
                            saterm=='SNEEZING'~'UPPER RESPIRATORY TRACT SYMPTOMS',
                            
                            saterm=='DELIRIUM / ENCEPHALOPATHY'~'ALTERED CONSCIOUSNESS CONFUSION',
                            saterm=='DIZZINESS/LIGHTHEADEDNESS'~'OTHER SIGNS AND SYMPTOMS',	
                            saterm=='GASTROGASTROINTESTINAL HEMORRHAGE'~'OTHER SIGNS AND SYMPTOMS',	
                            TRUE~saterm))%>%
    mutate(saterm=case_when(saterm%like%'TUBERCULOSIS'~'TUBERCULOSIS',
                            saterm%like%'MALIGNANCY'~'MALIGNANT NEOPLASM',
                            saterm%like%'SPECIFIC CANCERS'~'MALIGNANT NEOPLASM',
                            saterm%like%'SOLID TUMOR'~'MALIGNANT NEOPLASM',
                            saterm%like%'METASTATIC CANCER'~'MALIGNANT NEOPLASM',
                            
                            
                            saterm=='SORE THROAT/THROAT PAIN'~'SORE THROAT',
                            
                            saterm=='COAGULOPATHY'~'CHRONIC HEMATOLOGIC DISEASE',
                            saterm=='DYSLIPIDEMIA/HYPERLIPIDEMIA'~'CHRONIC HEMATOLOGIC DISEASE',
                            saterm=='IRON DEFICIENCY ANEMIA'~'CHRONIC HEMATOLOGIC DISEASE',
                            saterm=='BLOOD LOSS ANEMIA'~'CHRONIC HEMATOLOGIC DISEASE',
                            
                            saterm=='CHRONIC HEMATOLOGICAL DISEASE'~'CHRONIC HEMATOLOGIC DISEASE',
                            saterm=='CHRONIC LIVER DISEASE'~'LIVER DISEASE',
                            saterm%like%'ACUTE LIVER'~'LIVER DISEASE',
                            
                            saterm%like%'CHRONIC RENAL FAILURE'~'CHRONIC KIDNEY DISEASE',
                            
                            saterm%like%'CHRONIC LUNG DISEASE'~'CHRONIC PULMONARY DISEASE',
                            saterm%like%'CHROMIC PULMONARY DISEASE'~'CHRONIC PULMONARY DISEASE',
                            TRUE~saterm))%>%
    mutate(saterm=case_when(saterm%like%'RHEMATOLOGICAL DISORDER'~'rheumatologic disorder',
                            saterm%like%'CHRONIC NEUROLOGICAL'~'CHRONIC NEUROLOGICAL DISORDER',
                            saterm%like%'CURRENT SMOK'~'SMOKING',
                            saterm%like%'DIABETES'~'DIABETES',
                            saterm=='HISTORY OF PERIPHERAL OR CARDIAC REVASCULARIZATION'~'HISTORY OF PERIPHERAL OR CARDIAC REVASCULARIZATION',
                            saterm=='HISTORY OF SMOKING'~'SMOKING',
                            saterm%like%'SMOKING'~'SMOKING',
                            saterm%like%'HIV'~'AIDS/HIV',
                            saterm%like%'LIVER DISEASE'~'LIVER DISEASE',
                            saterm%like%'OTHER RELEVANT RISK'~'OTHER COMORBIDITIES',
                            saterm=='OTHER RISK FACTOR'~'OTHER COMORBIDITIES',
                            saterm%like%'RHEUMATOLOGICAL DISORD'~'RHEUMATOLOGIC DISORDER',
                            saterm=='SMOKER'~'SMOKING',
                            saterm=='SMOKER - CURRENT'~'SMOKING',
                            saterm=='SMOKER - FORMER'~'SMOKING - FORMER',
                            saterm=='FEEDING INTOLERANCE (PAEDIATRICS)'~'ANOREXIA',
                            saterm=='REFUSING TO EAT OR DRINK/HISTORY OF POOR ORAL INTAKE'~'ANOREXIA',
                            saterm%like%'ANOREXIA'~'ANOREXIA',
                            saterm=='ANOREXIA - LOSS OF APPETITE'~'ANOREXIA',
                            saterm=='CHEST PAIN/TIGHTNESS'~'CHEST PAIN',
                            
                            
                            saterm=='SWOLLEN NECK GLANDS/LYMPHADENOPATHY'~'LYMPHADENOPATHY',
                            TRUE~saterm))%>%
    mutate(saterm=case_when(saterm%like%'COUGH'~'COUGH',
                            saterm%like%'COUTH'~'COUGH',
                            saterm=='HEMOPTYSIS'~'COUGH',
                            saterm=='DIARRHEA'~'DIARRHOEA',
                            saterm=='CONJUNCTIVAL CONGESTION '~'CONJUNCTIVITIS',
                            
                            saterm%like%'FEVER'~'HISTORY OF FEVER',
                            saterm=='SEIZURE'~'SEIZURES',
                            saterm%like%'TRANSPLANT'~'TRANSPLANTATION',
                            saterm%like%'ANOSMIA'~'LOSS OF SMELL',
                            saterm%like%'AGEUSIA'~'LOSS OF TASTE',
                            saterm=="LOSS OF TASTE OR LOSS OF SMELL"~'LOSS OF SMELL/TASTE',
                            saterm=="NAUSEA/VOMITING"~'VOMITING/NAUSEA',
                            
                            saterm%like%'MYALGIA OR FATIGUE'~'MUSCLE ACHES/JOINT PAIN',
                            saterm%like%'JOINT PAIN'~'MUSCLE ACHES/JOINT PAIN',
                            saterm%like%'MUSCLE ACHES'~'MUSCLE ACHES/JOINT PAIN',
                            saterm=='OTHER SIGN OR SYMPTOM'~'OTHER SIGNS AND SYMPTOMS',
                            saterm=='LOWER CHEST WALL INDRAWING'~'SHORTNESS OF BREATH',
                            saterm%like%'DEHYDRATION'~'SEVERE DEHYDRATION',
                            saterm%like%'RASH'~'SKIN RASH',
                            saterm=='EARPAIN'~'EAR PAIN',
                            TRUE ~ saterm ))%>%
    mutate(saterm = iconv(saterm, to ="ASCII//TRANSLIT") %>% tolower()) %>%
    mutate(saterm = str_remove_all(saterm, "\\s*\\([^)]*\\)")) %>%
    mutate(saterm = str_replace_all(saterm, " - ", "_")) %>%
    mutate(saterm = str_replace_all(saterm, "/| / ", "_")) %>%
    mutate(saterm = str_replace_all(saterm, " ", "_")) %>%
    arrange(desc(saoccur))%>%
    distinct(usubjid,saterm, .keep_all =T)
  
  
  
  if(dtplyr.step){
    return(out)
  } else {
    return(out %>% as_tibble())
  }
}


#' Process data on comorbidities
#' @param input Either the path of the symptoms/comorbidities data file (CDISC format) or output of \code{import.symptom.and.comorbidity.data}
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble stringr tidyfast
#' @importFrom data.table as.data.table
#' @importFrom glue glue
#' @return Formatted comorbidity data as a tibble or \code{dtplyr_step}
#' @export process.comorbidity.data
process.comorbidity.data <- function(input,  minimum=100, dtplyr.step = FALSE){
  
  comorbid <- imp_sa_com %>%
    filter(sacat=="MEDICAL HISTORY") %>%
    filter(!is.na(sacat))%>%
    filter(!is.na(saterm))%>%
    arrange(desc(saoccur))%>%
    group_by(saterm) %>% 
    arrange(desc(saoccur))%>%
    mutate(n = sum(!is.na(saoccur))) %>%
    filter(n >= eval(!!minimum))%>%
    ungroup()%>%
    mutate(saterm = paste0("comorbid_",saterm)) %>%
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = saterm, values_from = saoccur) 
  if(dtplyr.step){
    return(comorbid %>% lazy_dt(immutable = FALSE))
  } else {
    return(comorbid %>% as_tibble())
  }
}


#' Process data on symptoms
#' @param input Either the path of the symptoms/comorbidities data file (CDISC format) or output of \code{import.symptom.and.comorbidity.data}
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble tidyfast dtplyr
#' @importFrom data.table as.data.table
#' @importFrom glue glue
#' @return Formatted symptom data as a tibble or \code{dtplyr_step}
#' @export process.symptom.data
process.symptom.data <- function(input,  minimum=100, dtplyr.step = FALSE){
  
  
  symptom_w <- imp_sa%>%mutate(studyid=substr(usubjid,1, 7))%>%filter(studyid!="CVZXZMV")%>%
    filter(sacat=="SIGNS AND SYMPTOMS AT HOSPITAL ADMISSION") %>%
    filter(saterm!="covid-19_symptoms")%>%
    arrange(desc(saoccur))%>%
    group_by(saterm) %>% 
    arrange(desc(saoccur))%>%
    mutate(n = sum(!is.na(saoccur))) %>%
    filter(n >= eval(!!minimum))%>%
    ungroup()%>%
    mutate(saterm = paste0("symptoms_",saterm)) %>%
    #mutate(saterm = glue("symptoms_{saterm}", .envir = .SD)) %>%
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = saterm, values_from = saoccur) %>%
    as.data.frame()
  
  date_onset<-imp_sa%>%
    ungroup()%>%
    filter(sacat=="SIGNS AND SYMPTOMS AT HOSPITAL ADMISSION" & saoccur==TRUE) %>%
    mutate(sastdtc=as.character(sastdtc))%>%
    mutate(sastdtc = replace(sastdtc, sastdtc =="" , NA))%>%
    mutate(sastdtc=substr(sastdtc,1, 10))%>%
    mutate(sastdtc=as_date(sastdtc))%>%
    filter(sastdtc >= "2020-01-01")%>%
    filter(sastdtc < date_pull)%>%
    arrange(sastdtc)%>%
    distinct(usubjid, .keep_all =T)%>%
    select(usubjid, "date_onset"=sastdtc)
  
  covid_clinic_diagn<- imp_sa%>%
    filter(sacat=="CLINICALLY-DIAGNOSED COVID-19")%>%
    mutate(saoccur=case_when(is.na(sapresp)~TRUE,
                             TRUE~saoccur))%>%
    arrange(desc(saoccur))%>%
    distinct(usubjid, .keep_all =T)%>%
    rename("clin_diag_covid_19"=saoccur)%>%
    select(usubjid,clin_diag_covid_19)
  
  
  symptomatic<-imp_sa%>%mutate(studyid=substr(usubjid,1, 7))%>%filter(studyid!="CVZXZMV")%>%
    ungroup()%>%
    filter(sacat=="SIGNS AND SYMPTOMS AT HOSPITAL ADMISSION")%>%
    mutate(symptomatic=case_when(saterm=="asymptomatic" & saoccur==TRUE~FALSE,
                                 saterm=="asymptomatic" & saoccur==FALSE~TRUE,
                                 TRUE~saoccur
    ))%>%
    arrange(desc(symptomatic))%>%
    distinct(usubjid, .keep_all =T)%>%
    select(usubjid, symptomatic)
  
  symptom<- date_onset%>%
    full_join(covid_clinic_diagn, by=c("usubjid"))%>%
    full_join(symptomatic, by = c("usubjid"))%>%
    full_join(symptom_w, by = c("usubjid"))
  
  
  
  if(dtplyr.step){
    return(symptom %>% lazy_dt(immutable = FALSE))
  } else {
    return(symptom %>% as_tibble())
  }
}
  




#' Process data on pregnancy (as comorbidity)
#' @param file.name Path of the dispositions data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble stringr
#' @return Formatted pregnancy data as a tibble or \code{dtplyr_step}
#' @export process.pregnancy.data



process.pregnancy.data <- function(file.name, dtplyr.step = FALSE){
  comorbid_pregnancy <- rp%>%
    #comorbid_pregnancy <- shared.data.import(pregnancy.file.name, dtplyr.step = FALSE)%>%
    #comorbid_pregnancy <- rp_open%>%
    filter(rptestcd=="PREGIND") %>%
    mutate(comorbid_pregnancy=rpstresc) %>%
    mutate(comorbid_pregnancy = case_when(comorbid_pregnancy == "Y" ~ TRUE,
                                          comorbid_pregnancy == "N" ~ FALSE,
                                          TRUE ~ NA)) %>%
    select(usubjid,comorbid_pregnancy)
  if(dtplyr.step){
    return(comorbid_pregnancy %>% lazy_dt(immutable = FALSE))
  } else {
    return(comorbid_pregnancy %>% as_tibble())
  }
}



#' Process data on ICU admission
#' @param file.name Path of the healthcare encounters data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble tidyfast dtplyr
#' @importFrom data.table as.data.table
#' @return Formatted symptom data as a tibble or \code{dtplyr_step}
#' @export process.ICU.data
process.ICU.data <- function(file.name, dtplyr.step = FALSE){
  icu <- ho%>%
    mutate(hooccur = case_when(hooccur == "Y" ~ TRUE,
                               hooccur == "N" ~ FALSE,
                               TRUE ~ NA)) %>%
    filter(!is.na (hooccur))%>%
    select(usubjid, hodecod, hostdtc, hoendtc, hooccur, hostdy,hoendy)%>% 
    mutate(hostdtc=substr(hostdtc,1, 10))%>%
    mutate(hostdtc=as_date(hostdtc))%>%
    mutate(hoendtc=substr(hoendtc,1, 10))%>%
    mutate(hoendtc=as_date(hoendtc))
  
  last_ho_datea<-icu%>%
    filter(hooccur==TRUE)%>%
    filter(hostdtc >= "2020-01-01"|hostdtc<date_pull )%>%
    arrange(desc(hostdtc))%>%
    distinct(usubjid, .keep_all =T)%>%
    select(usubjid,hostdtc)      
  
  last_ho_dates<-icu%>%
    filter(hooccur==TRUE)%>%
    filter(hoendtc>= "2020-01-01"|hoendtc<date_pull)%>%
    arrange(desc(hoendtc))%>%
    distinct(usubjid, .keep_all =T)%>%
    select(usubjid,hoendtc)%>%
    left_join(last_ho_datea, by = c("usubjid"))%>%
    mutate(date_ho_last=case_when(is.na(hoendtc) ~ hostdtc,
                                  is.na(hostdtc) ~ hoendtc,
                                  hostdtc>hoendtc ~ hostdtc,
                                  hostdtc<=hoendtc ~ hoendtc))%>% 
    select(usubjid,date_ho_last)
  
  icu <-icu%>%
    filter(hodecod=="INTENSIVE CARE UNIT")%>%
    arrange(desc(hostdtc))%>%
    distinct(usubjid, .keep_all =T)%>%
    rename(ever_icu=hooccur)%>%
    rename(icu_in=hostdtc)%>%
    mutate(icu_in=as_date(icu_in))%>%
    mutate(icu_in=replace(icu_in,icu_in < "2020-01-01" | icu_in >date_pull,NA))%>%
    rename(icu_out=hoendtc)%>%
    mutate(icu_out=as_date(icu_out))%>%
    mutate(icu_out=replace(icu_out,icu_out < "2020-01-01" | icu_out>date_pull,NA))%>%
    select(-c(hodecod))%>%
    full_join(last_ho_dates, by = c("usubjid"))
  
  
  if(dtplyr.step){
    return(icu)
  } else {
    return(icu %>% as_tibble())
  }
}



#' Process data on treatments
#' @param file.name Path of the intervention data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble stringr
#' @return Formatted treatment data (long format) as a tibble or \code{dtplyr_step}
#' @export process.treatment.data
process.treatment.data <- function(file.name,  dtplyr.step = FALSE){
  
  
   
  #int<-int%>%filter(studyid!="CVZXZMV")
  treatment<-int%>%
    filter(inpresp =="Y") %>%
    filter(inevintx!="BEFORE HOSPITAL ADMISSION")%>%
    mutate(inoccur = case_when(inoccur == "Y" ~ TRUE,
                               inoccur == "N" ~ FALSE,
                               TRUE ~ NA))%>%
    filter(!is.na(inoccur))%>%
    filter(incat!="MEDICAL HISTORY" | is.na (incat))%>%
    mutate(intrt_original=intrt)%>%
    mutate(intrt=toupper(intrt))%>%
    mutate(intrt=as.character(intrt))%>%
    mutate(inmodify=as.character(inmodify))%>%
    mutate(incat=as.character(incat))%>%
    mutate(intrt=case_when(inmodify!=""~inmodify,
                           TRUE ~ intrt))%>%
    mutate(intrt=case_when(incat=="EXTRACORPOREAL"~'EXTRACORPOREAL',
                           incat=="INVASIVE VENTILATION"~'INVASIVE VENTILATION',
                           incat=="FACE MASK"~'NASAL OR MASK OXYGEN THERAPY',
                           incat=="NASAL / MASK OXYGEN THERAPY"~'NASAL OR MASK OXYGEN THERAPY',
                           incat=="INVASIVE VENTILATION"~'INVASIVE VENTILATION',
                           
                           incat=="NON-INVASIVE VENTILATION "~'NON-INVASIVE VENTILATION ',
                           incat=="OTHER INTEVENTIONS"~'OTHER INTERVENTIONS',
                           incat=="PRONE POSITIONING"~'PRONE POSITIONING',
                           incat=="PRONE VENTILATION"~'PRONE VENTILATION',
                           incat=="ANTIBIOTIC AGENTS"~ "ANTIBIOTIC AGENTS",
                           incat=="ANTIFUNGAL AGENTS"~ "ANTIFUNGAL AGENTS",
                           incat=="ANTIVIRAL AGENTS"~ "ANTIVIRAL AGENTS",
                           incat=="CORTICOSTEROIDS"~ "CORTICOSTEROIDS",
                           incat=="ANTIMALARIAL AGENTS"~ "ANTIMALARIAL AGENTS",
                           incat=="NSAIDS"~"NON-STEROIDAL ANTI-INFLAMMATORY (NSAIDS)",
                           TRUE~intrt)) %>%
    mutate(intrt=case_when(intrt%like%'ECMO'~'EXTRACORPOREAL',
                           intrt=='EXTRA CORPOREAL LIFE SUPPORT'~'EXTRACORPOREAL',
                           intrt=='EXTRACORPOREAL SUPPORT'~'EXTRACORPOREAL',
                           intrt=='PRONE POSITIONING WITH UNKNOWN VENTILATION'~'PRONE POSITION VENTILATION',
                           intrt=='PRONE VENTILATION'~'PRONE POSITION VENTILATION',
                           
                           
                           intrt=='CONTINUOUS RENAL REPLACEMENT THERAPIES (CRRT)'~'RENAL REPLACEMENT THERAPIES',
                           intrt%like%'RENAL REPLACEMENT THERAPY' |
                             intrt%like% 'DIALYSIS'~ 'RENAL REPLACEMENT THERAPIES',
                           intrt%like% 'HEMOFILTRATION'~ 'RENAL REPLACEMENT THERAPIES',
                           intrt=='ERP CVVH'~ 'RENAL REPLACEMENT THERAPIES',
                           ###IMV
                           intrt=='INVASIVE MECHANICAL LUNG VENTILATION'~'INVASIVE VENTILATION',
                           intrt=='INVASIVE MECHANICAL VENTILATION'~'INVASIVE VENTILATION',
                           intrt=='MECHANICAL VENTILATION'~'INVASIVE VENTILATION',
                           intrt=='RE-INTUBATION'~'INVASIVE VENTILATION',
                           intrt=='INVASIVE VENTILATION'~'INVASIVE VENTILATION',
                           intrt%like%'APRV'~'INVASIVE VENTILATION',
                           intrt=='INTUBATION AND MECHANICAL VENTILATION'~'INVASIVE VENTILATION',
                           intrt=='MECHANICAL SUPPORT'~'INVASIVE VENTILATION',
                           intrt%like%'EXTUBATION'~'INVASIVE VENTILATION',
                           intrt=="VENTILATED"~'INVASIVE VENTILATION',
                           
                           ###NIV
                           intrt%like%'CPAP'~'NON-INVASIVE VENTILATION',
                           intrt%like%'BIPAP'~'NON-INVASIVE VENTILATION',
                           intrt%like%'NON-INVASIVE MECHANICAL VENTILATION (BIPAP, CPAP, OCNAF (OPTIFLOW) ...)'~'NON-INVASIVE VENTILATION',
                           intrt%like%'NON-INVASIVE VENTILATION'~'NON-INVASIVE VENTILATION',
                           intrt=='NON-INVASIVE MECHANICAL VENTILATION'~'NON-INVASIVE VENTILATION',
                           intrt=='NON-INVASIVE POSITIVE PRESSURE VENTILATION'~'NON-INVASIVE VENTILATION',
                           intrt=='NON-INVASIVE RESPIRATORY SUPPORT'~'NON-INVASIVE VENTILATION',
                           TRUE ~ intrt))%>%
    mutate(intrt=case_when(intrt%like%'OTHER INTERVENTION'~'OTHER INTERVENTIONS',
                           intrt%like%'CHEMOTHERAPY'| intrt%like%'ANTI-DIABETIC MEDICATIONS'|intrt%like%'BRONCHOSCOPY'|
                             intrt%like%'PROTON PUMP INHIBITORS'|intrt%like%'STATINS'|intrt%like%'MORPHINE'|
                             intrt%like%'HALOPERIDOL'|intrt%like%'OLANZAPINE'~'OTHER INTERVENTIONS',
                           
                           intrt=='OTHER TARGETED COVID-19 MEDICATIONS'~'OTHER INTERVENTIONS',
                           intrt=='OTHER TREATMENTS FOR COVID19'~'OTHER INTERVENTIONS',
                           intrt%like%"NON-STEROIDAL"~"NON-STEROIDAL ANTI-INFLAMMATORY",
                           intrt%like%"NON STEROIDAL"~"NON-STEROIDAL ANTI-INFLAMMATORY",                           
                           TRUE ~ intrt))%>%
    
    mutate(intrt=case_when(intrt=='NASAL CANULA'|intrt=='NASAL CANNULA'~'NASAL OXYGEN THERAPY',
                           intrt%like%'SURGICAL FEEDING TUBE'~'TOTAL PARENTERAL NUTRITION',
                           
                           intrt=='FACE MASK'~'MASK OXYGEN THERAPY',
                           
                           ####HFNC
                           intrt=='OXYGEN THERAPY WITH HIGH FLOW NASAL CANULA'~'HIGH-FLOW NASAL CANULA OXYGEN THERAPY',
                           intrt=='HIGH-FLOW NASAL CANNULA OXYGEN THERAPY'~'HIGH-FLOW NASAL CANULA OXYGEN THERAPY',
                           
                           ###Prone positioning
                           intrt%like%'PRONACI'~'PRONE POSITIONING',
                           intrt=='PRONE POSITIONING'~'PRONE POSITIONING',
                           
                           
                           intrt%like%'TRACHEOSTOMY'~'TRACHEOSTOMY',
                           intrt%like%'NITRIC OXIDE'~'INHALED NITRIC OXIDE',
                           
                           ###Corticosteroids
                           intrt=="CORTICOSTEROID"~ "CORTICOSTEROIDS",
                           intrt=="DEXAMETHASONE"~ "CORTICOSTEROIDS",
                           intrt=="BETAMETHASONE"~ "CORTICOSTEROIDS",
                           intrt%like%"PREDNISOLONE"~ "CORTICOSTEROIDS",
                           intrt=="ORAL STEROIDS"~ "CORTICOSTEROIDS",
                           intrt=="STEROIDS"~ "CORTICOSTEROIDS",
                           intrt=="STEROIDS"~ "convalescent_plasma",
                           intrt%like%"HYDROCORTISONE"~ "CORTICOSTEROIDS",
                           
                           intrt%like%"BLOOD TRANSFUSION OR BLOOD PRODUCT"~ "BLOOD TRANSFUSION OR BLOOD PRODUCT",
                           TRUE ~ intrt))%>%
    mutate(intrt=case_when(intrt%like%"ANTIVIRAL" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"ARV" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"ANTIRETROVIRAL" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"RIBAVIRIN" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"LOPINAVIR AND RITONAVIR" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"LOPINAVIR" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"OSELTAMIVIR" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"REMDESIVIR" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"REMDESIVIR" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"NEURAMINIDASE INHIBITORS" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"ZANAMIVIR" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"RIBAVARIN" ~ "ANTIVIRAL AGENTS",
                           intrt%like%"FLUCLOXACILLIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"ANTIBIOTIC"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"AMIKACIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"AMOX"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"AUGUMENTIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"AZITHROMYCIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"AZITHRYOMYCIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"BENZY"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"AUGUMENTIN"~ "ANTIBIOTIC AGENTS",
                           TRUE ~ intrt))%>%
    mutate(intrt=case_when(intrt%like%"AZITHRYOMYCIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"CEFTR"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"CEFR"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"DOXYCYCLINE"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"CHLORAMPHENICOL"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"CIPROFLOXACIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"GENTAMICIN"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"MEROPENEM"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"METRONIDAZOLE"~ "ANTIBIOTIC AGENTS",
                           intrt%like%"ANTIMALARIAL" | intrt%like%"CHLOROQUINE" ~ "ANTIMALARIAL AGENTS",
                           intrt%like%"ANTIFUNGAL" ~ "ANTIFUNGAL AGENTS",
                           intrt %like% "OROGASTRIC"~"NASO/ NASOGASTRIC ORAL/OROGASTRIC FLUIDS",
                           intrt %like% "NGT OR OGT REQUIRED FOR NUTRITION"~"NASO/ NASOGASTRIC ORAL/OROGASTRIC FLUIDS",
                           intrt%like%'DOBUTAMINE' |  intrt%like%'DOPAMINE' |  intrt%like%'MILRINONE' 
                           |  intrt%like%'LEVOSIMENDAN' |  intrt%like%'EPINEPHRINE' |  intrt%like%'NOREPINEPRINE'
                           |  intrt%like%'INOTROPES' |intrt%like%'VASOPRESS' |intrt%like%'NORADRENALINE' |
                             intrt%like%'ADRENALINE' |intrt%like%'BETA BLOCKER' ~'INOTROPES / VASOPRESSORS',
                           TRUE ~ intrt))%>%
    mutate(intrt=case_when(intrt%like%'IMMUNOGLOBULI' ~ "convalescent_plasma",
                           intrt%like%'IMMUNOSUPPRES' ~ "IMMUNOSUPPRESSANTS",
                           intrt%like%'IMMUNOSTIMULANTS' ~ "IMMUNOSUPPRESSANTS",
                           intrt%like%'IMMUNOTHERAPY' ~ "IMMUNOSUPPRESSANTS",
                           intrt=="IL6 INHIBITOR" ~ "IMMUNOSUPPRESSANTS",
                           intrt=="TOCILIZUMAB" ~ "IMMUNOSUPPRESSANTS",
                           intrt%like%"INTERFERON" ~ "IMMUNOSTIMULANTS",
                           
                           intrt%like%"HEPARIN" ~ "THERAPEUTIC ANTICOAGULANT",
                           intrt%like%"NOXAPARIN" ~ "THERAPEUTIC ANTICOAGULANT",
                           intrt=="ENOXAPARIN" ~ "THERAPEUTIC ANTICOAGULANT",
                           
                           intrt%like%"SPIRONOLACTONE" ~ "DIURETICS",
                           intrt%like%"DIURETIC" ~ "DIURETICS",
                           
                           intrt%like%"NITROUS OXIDE" ~ "inhaled_nitric_oxide",
                           intrt=="CPR" ~ "Cardiopulmonary resuscitation",
                           
                           intrt%like%"EXPERIMENTAL AGENT" ~ "EXPERIMENTAL AGENTS",
                           intrt%like%"SARILUMAB" ~ "EXPERIMENTAL AGENTS",
                           intrt%like%"IV FLUID" ~ "INTRAVENOUS FLUIDS",
                           intrt%like%"I.V. SOLUTIONS" ~ "INTRAVENOUS FLUIDS",
                           intrt %like% "ANGIOTENSIN" | intrt %like% "ACE"~ "AGENTS ACTING ON THE RENIN-ANGIOTENSIN SYSTEM",
                           intrt%like%"ANTIINFLAMMATORY" ~ "ANTIINFLAMMATORY",
                           TRUE ~ intrt))%>%    as.data.frame()%>%
    select(studyid,usubjid,'treatment'=intrt,inoccur,intrt_original,inmodify,incat, inevintx, 
           indur,indtc,instdtc,inendtc,indy)%>%
    mutate(treatment = iconv(treatment, to ="ASCII//TRANSLIT") %>% tolower()) %>%
    mutate(treatment = str_remove_all(treatment, "\\s*\\([^)]*\\)")) %>%
    mutate(treatment = str_replace_all(treatment, " - ", "_")) %>%
    mutate(treatment = str_replace_all(treatment, "-", "_")) %>%
    mutate(treatment = str_replace_all(treatment, "/| / ", "_")) %>%
    mutate(treatment = str_replace_all(treatment, " ", "_"))
  
  
 
 
  if(dtplyr.step){
    return(treatment)
  } else {
    return(treatment %>% as_tibble())
  }
}

#' Process data on the most common treatments
#' @param input Either the path of the interventions data file (CDISC format) or output of \code{process.treatment.data}
#' @param minimum The minimum number of times a treatment need appear to be considered "common"; default 1000.
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble dtplyr tidyfast
#' @importFrom data.table as.data.table
#' @importFrom glue glue
#' @return Formatted common treatment data (wide format) as a tibble or \code{dtplyr_step}
#' @export process.common.treatment.data

process.common.treatment.data <- function(file.name, minimum=10, dtplyr.step = FALSE){
  
  oxy_within_d1<-imp_int%>%
    mutate(treatment=case_when(treatment=="extracorporeal" | 
                                 treatment=="inhaled_nitric_oxide" |
                                 treatment=="prone_position_ventilation" |
                                 treatment=="respiratory_support" |
                                 treatment=="tracheostomy" |
                                 treatment=="high_flow_nasal_cannula" |
                                 treatment=="invasive_ventilation" |
                                 treatment=="mask_oxygen_therapy" |
                                 treatment=="nasal_oxygen_therapy" |
                                 treatment=="non_invasive_ventilation"~"oxygen_therapy",
                               TRUE~treatment))%>%
    filter(treatment=="oxygen_therapy")%>%
    filter(inevintx=="AT HOSPITAL ADMISSION"|indy==1)%>%
    mutate(oxytreat_when=case_when(inevintx=="AT HOSPITAL ADMISSION"~"at_admi",
                                   indy==1~"within_24h"))%>%
    arrange(desc(inoccur))%>%
    distinct(usubjid,treatment, .keep_all =T)%>%
    select(usubjid,"d1_oxygen_therapy"=inoccur)
  
  
  date_in_last <- imp_int %>% 
    filter(inoccur==TRUE)%>% 
    mutate(date_in_last=substr(indtc,1, 10))%>%
    mutate(date_in_last=as_date(date_in_last))%>%
    filter(date_in_last >= "2020-01-01"| date_in_last<date_pull)%>%
    arrange(desc(inoccur))%>%
    distinct(usubjid, .keep_all =T)%>%
    select(usubjid, date_in_last )
  
  treatment <- imp_int%>%
    #filter(!is.na(indtc))%>%
    group_by(treatment)%>% 
    arrange(desc(inoccur))%>%
    mutate(n = sum(!is.na(inoccur)))%>%
    filter(n >= eval(!!minimum))%>%
    ungroup()%>%
    filter(treatment!="extracorporeal" & 
             treatment!="inhaled_nitric_oxide" &
          treatment!="oxygen_therapy" &
           treatment!="prone_position_ventilation" &
            treatment!="prone_ventilation" &
           treatment!="respiratory_support" &
            treatment!="tracheostomy" &
           treatment!="prone_positioning")%>%
    #mutate(treatment=replace(treatment,treatment=="cpr","cardiopulmonary_resuscitation"))%>%
    filter(treatment!="covid_19_vaccination")%>%
    filter(treatment!="supplemental_oxygen_fio2")%>%
    arrange(desc(inoccur))%>%
    distinct(usubjid, treatment, .keep_all =T)%>% 
    #mutate(treatment = paste0("treat_",treatment)) %>%
    mutate(treatment = glue("treat_{treatment}", treatment = treatment))%>%
    as.data.table()%>%
    dt_pivot_wider(id_cols = usubjid, names_from = treatment,  values_from = inoccur)%>%
    as.data.frame()%>%
    full_join(date_in_last)
  
  ####calculating oxygen therapy overall
  treat_oxy <- imp_int%>%
    mutate(treatment=case_when(treatment=="extracorporeal" | 
             treatment=="inhaled_nitric_oxide" |
             treatment=="prone_position_ventilation" |
             treatment=="respiratory_support" |
             treatment=="tracheostomy" |
               treatment=="high_flow_nasal_cannula" |
               treatment=="invasive_ventilation" |
               treatment=="mask_oxygen_therapy" |
               treatment=="nasal_oxygen_therapy" |
               treatment=="oxygen_therapy"|
               treatment=="non_invasive_ventilation"~"treat_oxygen_therapy",
             TRUE~treatment))%>%
    filter(treatment=="treat_oxygen_therapy")%>%
    arrange(desc(inoccur))%>%
    distinct(usubjid, .keep_all =T)%>%
    select(usubjid,"treat_oxygen_therapy"=inoccur)
  
  
  ###adding duration for inasive_ventilation and non_invasive_ventilation
  
  indur<-imp_int%>%select(usubjid,treatment, inoccur,indur,indtc,instdtc,inendtc,indy)%>%
    filter(treatment=="invasive_ventilation"|treatment=="non_invasive_ventilation")%>%
    mutate(treatment=case_when(treatment=='non_invasive_ventilation'~'dur_niv',
                               treatment=='invasive_ventilation'~'dur_imv',
                               TRUE~treatment))%>%
    mutate(indur_clean=as.numeric(gsub("[^0-9.]", "",indur)))%>%
    filter(!is.na(indur_clean)  | indur_clean!="")%>%
    distinct(usubjid,treatment, .keep_all =T)%>%
    dt_pivot_wider(id_cols = usubjid, names_from = treatment,  values_from = indur_clean)%>%
    as_tibble()
  
  
  vent_st_instdtc<-imp_int%>%select(usubjid,treatment, inoccur,indur,indtc,instdtc)%>%
    filter(treatment=="invasive_ventilation"|treatment=="non_invasive_ventilation")%>%
    mutate(treatment=case_when(treatment=='non_invasive_ventilation'~'date_niv_st',
                               treatment=='invasive_ventilation'~'date_imv_st',
                               TRUE~treatment))%>%
    filter(inoccur==TRUE)%>%
    mutate(instdtc=substr(instdtc,1, 10))%>%
    mutate(instdtc=as_date(instdtc))%>%
    filter(!is.na(instdtc))%>%
    arrange(instdtc)%>%
    distinct(usubjid,treatment, .keep_all =T)%>%
    dt_pivot_wider(id_cols = usubjid, names_from = treatment,  values_from = instdtc)%>%
    as_tibble()

  vent_st_indtc<-imp_int%>%
    filter(inevintx=="00:00-24:00 ON DAY OF ASSESSMENT")%>%
    filter(treatment=="invasive_ventilation"|treatment=="non_invasive_ventilation")%>%
    mutate(treatment=case_when(treatment=='non_invasive_ventilation'~'date_niv_indtc_st',
                               treatment=='invasive_ventilation'~'date_imv_indtc_st',
                               TRUE~treatment))%>%
    filter(inoccur==TRUE)%>%
    mutate(indtc=substr(indtc,1, 10))%>%
    mutate(indtc=as_date(indtc))%>%
    filter(!is.na(indtc))%>%
    arrange(instdtc)%>%
    distinct(usubjid,treatment, .keep_all =T)%>%
    dt_pivot_wider(id_cols = usubjid, names_from = treatment,  values_from = indtc)%>%
    as_tibble()
    


    treatment <-treatment%>%
    full_join(treat_oxy)%>%
    full_join(oxy_within_d1)%>%
    full_join(indur)%>%
    full_join(vent_st_instdtc)%>%
    full_join(vent_st_indtc)%>%
    #full_join(vent_at_adm)%>%
    mutate(date_imv_st=case_when(is.na(date_imv_st)~date_imv_indtc_st,
                                 TRUE~date_imv_st))%>%
    select(-c(date_imv_indtc_st))%>%
    rename("date_niv_st"=date_niv_indtc_st)
  
  if(dtplyr.step){
    return(treatment) %>% lazy_dt(immutable = FALSE)
  } else {
    return(treatment %>% as_tibble())
  }
  
}

#' Process data on the most common icu treatments
#' @param input Either the path of the interventions data file (CDISC format) or output of \code{process.treatment.data}
#' @param minimum The minimum number of times a treatment need appear to be considered "common"; default 1000.
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble dtplyr tidyfast
#' @importFrom data.table as.data.table
#' @importFrom glue glue
#' @return Formatted common treatment data (wide format) as a tibble or \code{dtplyr_step}
#' @export process.treatment.icu.data

process.treatment.icu.data <- function(file.name,imp_icu,imp_dm,imp_ds, minimum=10, dtplyr.step = FALSE){
  
  adm_date<-imp_dm%>%
    select(usubjid,date_admit)
  
  #tabindy_icu<-tabyl(imp_treat_icu$indy)
  
  icu_ever<-imp_icu%>%
    filter(ever_icu==TRUE)%>%
    filter(!is.na(icu_in))%>%
    left_join(adm_date)%>%
    left_join(imp_ds)%>%
    mutate(icu_dy_in=icu_in-date_admit+1)%>%
    mutate(icu_dy_in=as.double(icu_dy_in))%>%
    mutate(icu_dy_in=case_when(icu_dy_in<0~NA_real_,
                               TRUE~icu_dy_in))%>%
    mutate(icu_dy_out=case_when(!is.na(icu_out)~icu_dy_in+(icu_out-icu_in),
                                is.na(icu_out)&!is.na(date_ho_last)~icu_dy_in+(date_ho_last-icu_in),
                                is.na(icu_out)&is.na(date_ho_last)~icu_dy_in+(date_outcome-icu_in),
                                #is.na(icu_out)&is.na(date_ho_last)~icu_dy_in+(date_outcome-icu_in),
                                TRUE~NA_real_))%>%
    mutate(icu_dy_out=as.integer(icu_dy_out))%>%
    mutate(hoendy=case_when(is.na(hoendy)~icu_dy_out,
                            TRUE~hoendy))%>%
    select(usubjid,hostdy,hoendy)#%>%
    #filter(!is.na(hoendy)& hoendy>-1 &!is.na(hostdy)& hostdy>-1)%>%
    #filter(!is.na(hoendy) & !is.na(hostdy))%>%
    #left_join(imp_int)%>%
  treat_oxy_icu <- imp_int%>%
    mutate(treatment=case_when(treatment=="extracorporeal" | 
                                 treatment=="inhaled_nitric_oxide" |
                                 treatment=="prone_position_ventilation" |
                                 treatment=="respiratory_support" |
                                 treatment=="tracheostomy" |
                                 treatment=="high_flow_nasal_cannula" |
                                 treatment=="invasive_ventilation" |
                                 treatment=="mask_oxygen_therapy" |
                                 treatment=="nasal_oxygen_therapy" |
                                 treatment=="oxygen_therapy"|
                                 treatment=="non_invasive_ventilation"~"treat_oxygen_therapy",
                               TRUE~treatment))%>%
    filter(treatment=="treat_oxygen_therapy")%>%
    arrange(desc(inoccur))%>%
    left_join(icu_ever,by = c("usubjid"))%>%
    mutate(indy=as.numeric(indy))%>%
    mutate(hostdy=as.numeric(hostdy))%>%
    mutate(hoendy=as.numeric(hoendy))%>%
    mutate(int_icu=case_when((indy>=hostdy)~ TRUE, 
                             TRUE ~ FALSE))%>%
    filter(int_icu==TRUE)%>%
    arrange(desc(inoccur))%>%
    distinct(usubjid, treatment, .keep_all =T)%>%
    select(usubjid,"icu_treat_oxygen_therapy"=inoccur)
  
    
  imp_treat_icu<-imp_int%>%
    filter(!is.na(indy))%>%
    group_by(treatment) %>% 
    arrange(desc(inoccur))%>%
    mutate(n = sum(!is.na(inoccur))) %>%
    filter(n >= eval(!!minimum)) %>%
    ungroup()%>%
    mutate(treatment=replace(treatment,treatment=="cpr","cardiopulmonary_resuscitation"))%>%
    filter(treatment!="covid_19_vaccination")%>%
    filter(treatment!="supplemental_oxygen_fio2")%>%
    filter(treatment!="extracorporeal" & 
             treatment!="inhaled_nitric_oxide" &
             treatment!="oxygen_therapy" &
             treatment!="prone_position_ventilation" &
             treatment!="prone_ventilation" &
             treatment!="respiratory_support" &
             treatment!="tracheostomy" &
             treatment!="prone_positioning")%>%
    arrange(desc(inoccur))%>%
    #mutate(indtc=as.Date(indtc))%>%
    #filter(indtc>= "2020-01-01"|indtc<date_pull)%>%
    left_join(icu_ever,by = c("usubjid"))%>%
    #filter((indy>=hostdy)&(indy<=hoendy))%>%
    mutate(indy=as.numeric(indy))%>%
    mutate(hostdy=as.numeric(hostdy))%>%
    mutate(hoendy=as.numeric(hoendy))%>%
    #mutate(int_icu=case_when((indy>=hostdy)&(indy<=hoendy) ~ TRUE, 
     #                        TRUE ~ FALSE))%>%
    mutate(int_icu=case_when((indy>=hostdy)~ TRUE, 
                             TRUE ~ FALSE))%>%
    #mutate(int_icu=case_when(indtc>=icu_in ~ TRUE, 
    #                         TRUE ~ FALSE))%>%
    filter(int_icu==TRUE)%>%
    arrange(desc(inoccur))%>%
    distinct(usubjid, treatment, .keep_all =T)%>%
    mutate(treatment = glue("icu_treat_{treatment}", treatment = treatment)) %>%
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = treatment,  values_from = inoccur)%>%
    full_join(treat_oxy)

    
  
  
  if(dtplyr.step){
    return(imp_treat_icu) %>% lazy_dt(immutable = FALSE)
  } else {
    return(imp_treat_icu %>% as_tibble())
  }
  
}



#' Process data on vital sign
#' @param file.name Path of the dispositions data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble dtplyr tidyfast
#' @importFrom data.table as.data.table
#' @importFrom glue glue
#' @return Formatted vital sign (wide format) as a tibble or \code{dtplyr_step}
#' @export process.vital.sign.data
process.vital.sign.data <- function(file.name, dtplyr.step = FALSE){
  vital_sign <- vs %>%
    select(usubjid, vstestcd, vscat,vsstresn,vsstresu, vsdtc, vso2src) %>%
    filter(vscat=="SIGNS AND SYMPTOMS AT HOSPITAL ADMISSION" | vscat=="SIGNS AND SYMPTOMS AT ADMISSION")%>%
    
    mutate(vsstresn=as.numeric(vsstresn))%>%
    mutate(vsstresn=case_when(vstestcd=="OXYSAT"& vsstresn< 1~ NA_real_,
                              vstestcd=="OXYSAT"& vsstresn> 100~ NA_real_,
                              
                              vstestcd=="BMI"& vsstresn< 0~ NA_real_,
                              vstestcd=="BMI"& vsstresn> 100~ NA_real_,
                              
                              vstestcd=="DIABP"& vsstresn< 0~ NA_real_,
                              vstestcd=="DIABP"& vsstresn> 300~ NA_real_,
                              
                              vstestcd=="HEIGHT"& vsstresn< 0~ NA_real_,
                              vstestcd=="HEIGHT"& vsstresn> 250~ NA_real_,
                              
                              vstestcd=="HR"& vsstresn< 0~ NA_real_,
                              vstestcd=="HR"& vsstresn> 250~ NA_real_,
                              
                              vstestcd=="MAP"& vsstresn< 0~ NA_real_,
                              vstestcd=="MAP"& vsstresn> 250~ NA_real_,
                              
                              vstestcd=="MUARMCIR"& vsstresn< 0~ NA_real_,
                              vstestcd=="MUARMCIR"& vsstresn> 100~ NA_real_,
                              
                              vstestcd=="PULSE"& vsstresn< 0~ NA_real_,
                              vstestcd=="PULSE"& vsstresn> 250~ NA_real_,
                              
                              vstestcd=="RESP"& vsstresn< 0~ NA_real_,
                              vstestcd=="RESP"& vsstresn> 60~ NA_real_,
                              
                              vstestcd=="SYSBP"& vsstresn< 0~ NA_real_,
                              vstestcd=="SYSBP"& vsstresn> 250~ NA_real_,
                              
                              vstestcd=="TEMP"& vsstresn< 30~ NA_real_,
                              vstestcd=="TEMP"& vsstresn> 44~ NA_real_,
                              
                              vstestcd=="WEIGHT"& vsstresn< 0~ NA_real_,
                              vstestcd=="WEIGHT"& vsstresn> 300~ NA_real_,
                              
                              TRUE~vsstresn))%>%
    filter(!is.na(vsstresn))%>%
    arrange(desc(vsdtc))%>%
    distinct(usubjid,vstestcd, .keep_all =T)%>%
    mutate(vso2src=case_when(vso2src==""&vstestcd=="OXYSAT"~'UNKNOWN',
                             TRUE~vso2src))%>%
    mutate(vso2src= str_replace_all(vso2src, " ", "_"))%>%
    mutate(vstestcd=case_when(vstestcd=="OXYSAT"~paste0(vstestcd,"_",vso2src),
                              TRUE~vstestcd))%>%
    mutate(vstestcd = paste0("vs_",vstestcd)) %>%
    #mutate(vstestcd = glue("vs_{vstestcd}", vstestcd = vstestcd))%>%
    mutate(vstestcd = iconv(vstestcd, to ="ASCII//TRANSLIT") %>% tolower()) %>%
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = vstestcd,  values_from = vsstresn)%>%
    as.data.frame() %>%
    mutate(vs_oxysat=case_when(!is.na(vs_oxysat_oxygen_therapy)~vs_oxysat_oxygen_therapy,
                               !is.na(vs_oxysat_room_air)~vs_oxysat_room_air,
                               TRUE~vs_oxysat_unknown))
  
  
  
  if(dtplyr.step){
    return(vital_sign)
  } else {
    return(vital_sign %>% as_tibble())
  }
  
}  


#' Process data on laboratory
#' @param file.name Path of the dispositions data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble dtplyr tidyfast
#' @importFrom data.table as.data.table
#' @importFrom glue glue
#' @return Formatted laboratory (wide format) as a tibble or \code{dtplyr_step}
#' @export process.laboratory.data
process.laboratory.data <- function(file.name, dtplyr.step = FALSE){
  laboratory <- lb%>%
    select(usubjid, lbdy, lbtestcd, lbcat,lborres,lbdtc) %>%
    mutate(lborres=replace(lborres,lborres=="",NA))%>%
    mutate(studyid=substr(usubjid,1, 7))%>%
    mutate(lbcat=case_when(lbdy==1 & (studyid=="CVCCPUK"| 
                                        studyid=="CVMEWUS" | 
                                        studyid=="CORE"|
                                        studyid=="CVTDWXD"|
                                        studyid=="CVTTYLU"|
                                        studyid=="CVZXZMV"|
                                        studyid=="CVKBQEI") ~"LABORATORY RESULTS ON ADMISSION",
                           #lbdy==1 & studyid=="CVMEWUS"~"LABORATORY RESULTS ON ADMISSION",
                           TRUE~as.character(lbcat)))%>%
    filter(lbcat=="LABORATORY RESULTS ON ADMISSION")%>%
    filter(lbtestcd=="ALT"|
             lbtestcd=="APTT"|
             lbtestcd=="CRP"|
             lbtestcd=="LYM"|
             lbtestcd=="NEUT"|
             lbtestcd=="PT"|
             lbtestcd=="WBC"|
             lbtestcd=="BILI"|
             lbtestcd=="AST"|
             lbtestcd=="UREAN")%>%
    mutate(lborres=as.numeric(lborres))%>%
    filter(!is.na(lborres))%>%
    arrange(desc(lbdtc))%>%
    distinct(usubjid,lbtestcd, .keep_all =T)%>%
    mutate(lborres=case_when(lbtestcd=="NEUT" & lborres>100 ~ lborres/1000,
                             
                             lbtestcd=="LYM" & lborres>100 ~ lborres/1000,
                             
                             lbtestcd=="WBC" & lborres>100 ~ lborres/1000, 
                             
                             lbtestcd=="ALT" & lborres>2000 ~ NA_real_,
                             lbtestcd=="ALT" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="AST" & lborres>2000 ~ NA_real_,
                             lbtestcd=="AST" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="BILI" & lborres>2000 ~ NA_real_,
                             lbtestcd=="BILI" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="CRP" & lborres>500 ~ NA_real_,
                             lbtestcd=="CRP" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="PT" & lborres>105 ~ NA_real_,
                             lbtestcd=="PT" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="UREAN" & lborres>100 ~ NA_real_,
                             lbtestcd=="UREAN" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="APTT" & lborres>2000 ~ NA_real_,
                             lbtestcd=="APTT" & lborres<0 ~ NA_real_,
                             
                             TRUE ~ lborres ))%>%
    mutate(lborres=case_when(lbtestcd=="NEUT" & lborres>100 ~ NA_real_,
                             lbtestcd=="NEUT" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="LYM" & lborres>100 ~ NA_real_,
                             lbtestcd=="LYM" & lborres<0 ~ NA_real_,
                             
                             lbtestcd=="WBC" & lborres>100 ~ NA_real_,
                             lbtestcd=="WBC" & lborres<0 ~ NA_real_,
                             
                             TRUE ~ lborres ))%>%
    
        mutate(lbtestcd  = paste0("lab_",lbtestcd )) %>%
    #mutate(lbtestcd = glue("lab_{lbtestcd}", lbtestcd = lbtestcd)) %>%
    mutate(lbtestcd = iconv(lbtestcd, to ="ASCII//TRANSLIT") %>% tolower()) %>%
    as.data.table() %>%
    dt_pivot_wider(id_cols = usubjid, names_from = lbtestcd,  values_from = lborres)
  
  
  if(dtplyr.step){
    return(laboratory)
  } else {
    return(laboratory%>% as_tibble())
  }
  
}  





#' Process data on outcomes
#' @param file.name Path of the dispositions data file (CDISC format)
#' @param dtplyr.step Return the output as \code{dtplyr_step} to avoid unnecessary future calls to \code{as_tibble} or \code{as.data.table}
#' @import dplyr tibble stringr
#' @return Formatted outcome data (long format) as a tibble or \code{dtplyr_step}
#' @export process.outcome.data
process.outcome.data <- function(file.name, dtplyr.step = FALSE){
  outcome <- ds%>%
    select(usubjid, dsterm, "date_outcome" = dsstdtc, dsmodify) %>%
    mutate(date_outcome=substr(date_outcome,1, 10))%>%
    mutate(date_outcome=as_date(date_outcome))%>%
    mutate(date_outcome=replace(date_outcome,date_outcome< "2020-01-01",NA))%>%
    mutate(date_outcome=replace(date_outcome,date_outcome>date_pull,NA))%>%
    mutate(outcome=tolower(dsterm))%>%
    mutate(outcome=case_when(outcome=="palliative"~"transferred",
                             outcome=="transferred to another unit"~"ongoing care",
                             outcome=="Ongoing health care needs NOT related to COVID episode"~"discharge",
                             outcome==""~NA_character_,
                             TRUE~outcome))%>%
    mutate(outcome=case_when(outcome%like%"hospitalis"~"ongoing care",
                             outcome%like%"hospitaliz"~"ongoing care",
                             outcome%like%"ongoing"~"ongoing care",
                             outcome=="in hospital"~"ongoing care",
                             
                             outcome%like%"death"~"death",
                             outcome=="died"~"death",
                             outcome=="deceased"~"death",
                             outcome=="died (non-covid)"~"death",
                             
                             
                             #outcome=="Death In Hospital"~"Death",
                             outcome=="alive"~"discharge",
                             outcome%like%"discharge"~"discharge",
                             outcome%like%"transfer"~"transferred",
                             outcome=="long term care facility"~"transferred",
                             outcome=="quarantine center"~"transferred",
                             outcome=="missing in database"~"unknown outcome",
                             outcome=="unknown"~"unknown outcome",
                             outcome=="not recorded"~"unknown outcome",
                             TRUE ~ outcome))%>%
  group_by(usubjid) %>% 
    mutate(count=1)%>% 
    mutate(n = sum(count)) %>%
    filter(n == 1)%>%
  select(-c(dsterm,dsmodify,n,count))
  
  
  if(dtplyr.step){
    return(outcome)
  } else {
    return(outcome %>% as_tibble())
  }
  
}
ISARICDataPlatform/CovidClinicalDataProcessor documentation built on March 22, 2022, 7:51 p.m.