R/read_sivep_gripe.R

Defines functions read_sivep_gripe

Documented in read_sivep_gripe

#' Read Sivep Gripe files from Brazilian Health Ministry
#'
#' @param files_folder Path to folder containing CSV files from Brazilian Health Ministry
#' @param chunk_size Integer. Chunk size to read CSV files. Default 100,000 lines.
#' @return A lazy connection to Sivep Gripe table in a SQLite database stored in a temporary folder.
#' @importFrom rlang .data

read_sivep_gripe <- function(files_folder, chunk_size = 100000){
  # Create temporary file for the database
  db_file_path <- tempfile()

  # Create temporary database and connection
  conn_sivep_gripe <- DBI::dbConnect(RSQLite::SQLite(), db_file_path, extended_types = TRUE)

  # Sivep Gripe variables definitions
  cols_definitions <- readr::cols(
    DT_NOTIFIC = readr::col_date(format = "%d/%m/%Y"),
    SEM_NOT = readr::col_double(),
    DT_SIN_PRI = readr::col_date(format = "%d/%m/%Y"),
    SEM_PRI = readr::col_double(),
    SG_UF_NOT = readr::col_character(),
    ID_REGIONA = readr::col_character(),
    CO_REGIONA = readr::col_double(),
    ID_MUNICIP = readr::col_character(),
    CO_MUN_NOT = readr::col_double(),
    ID_UNIDADE = readr::col_character(),
    CO_UNI_NOT = readr::col_double(),
    CS_SEXO = readr::col_character(),
    DT_NASC = readr::col_date(format = "%d/%m/%Y"),
    NU_IDADE_N = readr::col_double(),
    TP_IDADE = readr::col_double(),
    COD_IDADE = readr::col_double(),
    CS_GESTANT = readr::col_double(),
    CS_RACA = readr::col_double(),
    CS_ESCOL_N = readr::col_double(),
    ID_PAIS = readr::col_character(),
    CO_PAIS = readr::col_double(),
    SG_UF = readr::col_character(),
    ID_RG_RESI = readr::col_character(),
    CO_RG_RESI = readr::col_double(),
    ID_MN_RESI = readr::col_character(),
    CO_MUN_RES = readr::col_double(),
    CS_ZONA = readr::col_double(),
    SURTO_SG = readr::col_character(),
    NOSOCOMIAL = readr::col_double(),
    AVE_SUINO = readr::col_double(),
    FEBRE = readr::col_double(),
    TOSSE = readr::col_double(),
    GARGANTA = readr::col_double(),
    DISPNEIA = readr::col_double(),
    DESC_RESP = readr::col_double(),
    SATURACAO = readr::col_double(),
    DIARREIA = readr::col_double(),
    VOMITO = readr::col_double(),
    OUTRO_SIN = readr::col_double(),
    OUTRO_DES = readr::col_character(),
    PUERPERA = readr::col_double(),
    FATOR_RISC = readr::col_double(),
    CARDIOPATI = readr::col_double(),
    HEMATOLOGI = readr::col_double(),
    SIND_DOWN = readr::col_double(),
    HEPATICA = readr::col_double(),
    ASMA = readr::col_double(),
    DIABETES = readr::col_double(),
    NEUROLOGIC = readr::col_double(),
    PNEUMOPATI = readr::col_double(),
    IMUNODEPRE = readr::col_double(),
    RENAL = readr::col_double(),
    OBESIDADE = readr::col_double(),
    OBES_IMC = readr::col_double(),
    OUT_MORBI = readr::col_double(),
    MORB_DESC = readr::col_character(),
    VACINA = readr::col_double(),
    DT_UT_DOSE = readr::col_date(format = "%d/%m/%Y"),
    MAE_VAC = readr::col_double(),
    DT_VAC_MAE = readr::col_date(format = "%d/%m/%Y"),
    M_AMAMENTA = readr::col_double(),
    DT_DOSEUNI = readr::col_date(format = "%d/%m/%Y"),
    DT_1_DOSE = readr::col_date(format = "%d/%m/%Y"),
    DT_2_DOSE = readr::col_date(format = "%d/%m/%Y"),
    ANTIVIRAL = readr::col_double(),
    TP_ANTIVIR = readr::col_double(),
    OUT_ANTIV = readr::col_character(),
    DT_ANTIVIR = readr::col_date(format = "%d/%m/%Y"),
    HOSPITAL = readr::col_double(),
    DT_INTERNA = readr::col_date(format = "%d/%m/%Y"),
    SG_UF_INTE = readr::col_character(),
    ID_RG_INTE = readr::col_character(),
    CO_RG_INTE = readr::col_double(),
    ID_MN_INTE = readr::col_character(),
    CO_MU_INTE = readr::col_double(),
    UTI = readr::col_double(),
    DT_ENTUTI = readr::col_date(format = "%d/%m/%Y"),
    DT_SAIDUTI = readr::col_date(format = "%d/%m/%Y"),
    SUPORT_VEN = readr::col_double(),
    RAIOX_RES = readr::col_double(),
    RAIOX_OUT = readr::col_character(),
    DT_RAIOX = readr::col_date(format = "%d/%m/%Y"),
    AMOSTRA = readr::col_double(),
    DT_COLETA = readr::col_date(format = "%d/%m/%Y"),
    TP_AMOSTRA = readr::col_double(),
    OUT_AMOST = readr::col_character(),
    PCR_RESUL = readr::col_double(),
    DT_PCR = readr::col_date(format = "%d/%m/%Y"),
    POS_PCRFLU = readr::col_double(),
    TP_FLU_PCR = readr::col_double(),
    PCR_FLUASU = readr::col_double(),
    FLUASU_OUT = readr::col_character(),
    PCR_FLUBLI = readr::col_double(),
    FLUBLI_OUT = readr::col_character(),
    POS_PCROUT = readr::col_double(),
    PCR_VSR = readr::col_double(),
    PCR_PARA1 = readr::col_character(),
    PCR_PARA2 = readr::col_character(),
    PCR_PARA3 = readr::col_character(),
    PCR_PARA4 = readr::col_character(),
    PCR_ADENO = readr::col_double(),
    PCR_METAP = readr::col_double(),
    PCR_BOCA = readr::col_character(),
    PCR_RINO = readr::col_double(),
    PCR_OUTRO = readr::col_double(),
    DS_PCR_OUT = readr::col_character(),
    CLASSI_FIN = readr::col_double(),
    CLASSI_OUT = readr::col_character(),
    CRITERIO = readr::col_double(),
    EVOLUCAO = readr::col_double(),
    DT_EVOLUCA = readr::col_date(format = "%d/%m/%Y"),
    DT_ENCERRA = readr::col_date(format = "%d/%m/%Y"),
    DT_DIGITA = readr::col_date(format = "%d/%m/%Y"),
    HISTO_VGM = readr::col_double(),
    PAIS_VGM = readr::col_character(),
    CO_PS_VGM = readr::col_character(),
    LO_PS_VGM = readr::col_character(),
    DT_VGM = readr::col_date(format = "%d/%m/%Y"),
    DT_RT_VGM = readr::col_date(format = "%d/%m/%Y"),
    PCR_SARS2 = readr::col_double(),
    PAC_COCBO = readr::col_character(),
    PAC_DSCBO = readr::col_character(),
    OUT_ANIM = readr::col_character(),
    DOR_ABD = readr::col_double(),
    FADIGA = readr::col_double(),
    PERD_OLFT = readr::col_double(),
    PERD_PALA = readr::col_double(),
    TOMO_RES = readr::col_double(),
    TOMO_OUT = readr::col_character(),
    DT_TOMO = readr::col_date(format = "%d/%m/%Y"),
    TP_TES_AN = readr::col_double(),
    DT_RES_AN = readr::col_date(format = "%d/%m/%Y"),
    RES_AN = readr::col_double(),
    POS_AN_FLU = readr::col_double(),
    TP_FLU_AN = readr::col_double(),
    POS_AN_OUT = readr::col_double(),
    AN_SARS2 = readr::col_double(),
    AN_VSR = readr::col_double(),
    AN_PARA1 = readr::col_character(),
    AN_PARA2 = readr::col_character(),
    AN_PARA3 = readr::col_character(),
    AN_ADENO = readr::col_character(),
    AN_OUTRO = readr::col_character(),
    DS_AN_OUT = readr::col_character(),
    TP_AM_SOR = readr::col_double(),
    SOR_OUT = readr::col_character(),
    DT_CO_SOR = readr::col_date(format = "%d/%m/%Y"),
    TP_SOR = readr::col_double(),
    OUT_SOR = readr::col_character(),
    DT_RES = readr::col_date(format = "%d/%m/%Y"),
    RES_IGG = readr::col_double(),
    RES_IGM = readr::col_double(),
    RES_IGA = readr::col_double(),
    ESTRANG = readr::col_double(),
    VACINA_COV = readr::col_double(),
    DOSE_1_COV = readr::col_character(),
    DOSE_2_COV = readr::col_character(),
    DOSE_REF = readr::col_character(),
    FAB_COV_1 = readr::col_character(),
    FAB_COV_2 = readr::col_character(),
    FAB_COVREF = readr::col_character(),
    LOTE_REF = readr::col_character(),
    LAB_PR_COV = readr::col_character(),
    LOTE_1_COV = readr::col_character(),
    LOTE_2_COV = readr::col_character(),
    FNT_IN_COV = readr::col_double()
  )

  # Function to write chunk of lines to database
  chunk_to_bank <- function(x, pos){
    tmp <- dplyr::bind_rows(dadosPainelPI::sivep_gripe_header, x)
    DBI::dbWriteTable(conn = conn_sivep_gripe, name = "sivep_gripe", value = tmp, append = TRUE)
  }

  # List of Sivep Gripe files
  files_list <- list.files(path = files_folder, full.names = TRUE)

  # Remove table from database if exists
  if(DBI::dbExistsTable(conn = conn_sivep_gripe, name = "sivep_gripe")) DBI::dbRemoveTable(conn = conn_sivep_gripe, name = "sivep_gripe")

  # Read files from the list in chunks and store in temporary database
  for(f in files_list){
    message(f)
    readr::read_csv2_chunked(file = f, callback = readr::DataFrameCallback$new(chunk_to_bank), col_types = cols_definitions, chunk_size = chunk_size)
  }

  # Return the connection object to the temporary database
  return(dplyr::tbl(conn_sivep_gripe, "sivep_gripe"))
}
rfsaldanha/dadosPainelPI documentation built on March 24, 2022, 9:12 p.m.