#' Read Sivep Gripe files from Brazilian Health Ministry
#'
#' @param files_folder Path to folder containing CSV files from Brazilian Health Ministry
#' @param chunk_size Integer. Chunk size to read CSV files. Default 100,000 lines.
#' @return A lazy connection to Sivep Gripe table in a SQLite database stored in a temporary folder.
#' @importFrom rlang .data
read_sivep_gripe <- function(files_folder, chunk_size = 100000){
# Create temporary file for the database
db_file_path <- tempfile()
# Create temporary database and connection
conn_sivep_gripe <- DBI::dbConnect(RSQLite::SQLite(), db_file_path, extended_types = TRUE)
# Sivep Gripe variables definitions
cols_definitions <- readr::cols(
DT_NOTIFIC = readr::col_date(format = "%d/%m/%Y"),
SEM_NOT = readr::col_double(),
DT_SIN_PRI = readr::col_date(format = "%d/%m/%Y"),
SEM_PRI = readr::col_double(),
SG_UF_NOT = readr::col_character(),
ID_REGIONA = readr::col_character(),
CO_REGIONA = readr::col_double(),
ID_MUNICIP = readr::col_character(),
CO_MUN_NOT = readr::col_double(),
ID_UNIDADE = readr::col_character(),
CO_UNI_NOT = readr::col_double(),
CS_SEXO = readr::col_character(),
DT_NASC = readr::col_date(format = "%d/%m/%Y"),
NU_IDADE_N = readr::col_double(),
TP_IDADE = readr::col_double(),
COD_IDADE = readr::col_double(),
CS_GESTANT = readr::col_double(),
CS_RACA = readr::col_double(),
CS_ESCOL_N = readr::col_double(),
ID_PAIS = readr::col_character(),
CO_PAIS = readr::col_double(),
SG_UF = readr::col_character(),
ID_RG_RESI = readr::col_character(),
CO_RG_RESI = readr::col_double(),
ID_MN_RESI = readr::col_character(),
CO_MUN_RES = readr::col_double(),
CS_ZONA = readr::col_double(),
SURTO_SG = readr::col_character(),
NOSOCOMIAL = readr::col_double(),
AVE_SUINO = readr::col_double(),
FEBRE = readr::col_double(),
TOSSE = readr::col_double(),
GARGANTA = readr::col_double(),
DISPNEIA = readr::col_double(),
DESC_RESP = readr::col_double(),
SATURACAO = readr::col_double(),
DIARREIA = readr::col_double(),
VOMITO = readr::col_double(),
OUTRO_SIN = readr::col_double(),
OUTRO_DES = readr::col_character(),
PUERPERA = readr::col_double(),
FATOR_RISC = readr::col_double(),
CARDIOPATI = readr::col_double(),
HEMATOLOGI = readr::col_double(),
SIND_DOWN = readr::col_double(),
HEPATICA = readr::col_double(),
ASMA = readr::col_double(),
DIABETES = readr::col_double(),
NEUROLOGIC = readr::col_double(),
PNEUMOPATI = readr::col_double(),
IMUNODEPRE = readr::col_double(),
RENAL = readr::col_double(),
OBESIDADE = readr::col_double(),
OBES_IMC = readr::col_double(),
OUT_MORBI = readr::col_double(),
MORB_DESC = readr::col_character(),
VACINA = readr::col_double(),
DT_UT_DOSE = readr::col_date(format = "%d/%m/%Y"),
MAE_VAC = readr::col_double(),
DT_VAC_MAE = readr::col_date(format = "%d/%m/%Y"),
M_AMAMENTA = readr::col_double(),
DT_DOSEUNI = readr::col_date(format = "%d/%m/%Y"),
DT_1_DOSE = readr::col_date(format = "%d/%m/%Y"),
DT_2_DOSE = readr::col_date(format = "%d/%m/%Y"),
ANTIVIRAL = readr::col_double(),
TP_ANTIVIR = readr::col_double(),
OUT_ANTIV = readr::col_character(),
DT_ANTIVIR = readr::col_date(format = "%d/%m/%Y"),
HOSPITAL = readr::col_double(),
DT_INTERNA = readr::col_date(format = "%d/%m/%Y"),
SG_UF_INTE = readr::col_character(),
ID_RG_INTE = readr::col_character(),
CO_RG_INTE = readr::col_double(),
ID_MN_INTE = readr::col_character(),
CO_MU_INTE = readr::col_double(),
UTI = readr::col_double(),
DT_ENTUTI = readr::col_date(format = "%d/%m/%Y"),
DT_SAIDUTI = readr::col_date(format = "%d/%m/%Y"),
SUPORT_VEN = readr::col_double(),
RAIOX_RES = readr::col_double(),
RAIOX_OUT = readr::col_character(),
DT_RAIOX = readr::col_date(format = "%d/%m/%Y"),
AMOSTRA = readr::col_double(),
DT_COLETA = readr::col_date(format = "%d/%m/%Y"),
TP_AMOSTRA = readr::col_double(),
OUT_AMOST = readr::col_character(),
PCR_RESUL = readr::col_double(),
DT_PCR = readr::col_date(format = "%d/%m/%Y"),
POS_PCRFLU = readr::col_double(),
TP_FLU_PCR = readr::col_double(),
PCR_FLUASU = readr::col_double(),
FLUASU_OUT = readr::col_character(),
PCR_FLUBLI = readr::col_double(),
FLUBLI_OUT = readr::col_character(),
POS_PCROUT = readr::col_double(),
PCR_VSR = readr::col_double(),
PCR_PARA1 = readr::col_character(),
PCR_PARA2 = readr::col_character(),
PCR_PARA3 = readr::col_character(),
PCR_PARA4 = readr::col_character(),
PCR_ADENO = readr::col_double(),
PCR_METAP = readr::col_double(),
PCR_BOCA = readr::col_character(),
PCR_RINO = readr::col_double(),
PCR_OUTRO = readr::col_double(),
DS_PCR_OUT = readr::col_character(),
CLASSI_FIN = readr::col_double(),
CLASSI_OUT = readr::col_character(),
CRITERIO = readr::col_double(),
EVOLUCAO = readr::col_double(),
DT_EVOLUCA = readr::col_date(format = "%d/%m/%Y"),
DT_ENCERRA = readr::col_date(format = "%d/%m/%Y"),
DT_DIGITA = readr::col_date(format = "%d/%m/%Y"),
HISTO_VGM = readr::col_double(),
PAIS_VGM = readr::col_character(),
CO_PS_VGM = readr::col_character(),
LO_PS_VGM = readr::col_character(),
DT_VGM = readr::col_date(format = "%d/%m/%Y"),
DT_RT_VGM = readr::col_date(format = "%d/%m/%Y"),
PCR_SARS2 = readr::col_double(),
PAC_COCBO = readr::col_character(),
PAC_DSCBO = readr::col_character(),
OUT_ANIM = readr::col_character(),
DOR_ABD = readr::col_double(),
FADIGA = readr::col_double(),
PERD_OLFT = readr::col_double(),
PERD_PALA = readr::col_double(),
TOMO_RES = readr::col_double(),
TOMO_OUT = readr::col_character(),
DT_TOMO = readr::col_date(format = "%d/%m/%Y"),
TP_TES_AN = readr::col_double(),
DT_RES_AN = readr::col_date(format = "%d/%m/%Y"),
RES_AN = readr::col_double(),
POS_AN_FLU = readr::col_double(),
TP_FLU_AN = readr::col_double(),
POS_AN_OUT = readr::col_double(),
AN_SARS2 = readr::col_double(),
AN_VSR = readr::col_double(),
AN_PARA1 = readr::col_character(),
AN_PARA2 = readr::col_character(),
AN_PARA3 = readr::col_character(),
AN_ADENO = readr::col_character(),
AN_OUTRO = readr::col_character(),
DS_AN_OUT = readr::col_character(),
TP_AM_SOR = readr::col_double(),
SOR_OUT = readr::col_character(),
DT_CO_SOR = readr::col_date(format = "%d/%m/%Y"),
TP_SOR = readr::col_double(),
OUT_SOR = readr::col_character(),
DT_RES = readr::col_date(format = "%d/%m/%Y"),
RES_IGG = readr::col_double(),
RES_IGM = readr::col_double(),
RES_IGA = readr::col_double(),
ESTRANG = readr::col_double(),
VACINA_COV = readr::col_double(),
DOSE_1_COV = readr::col_character(),
DOSE_2_COV = readr::col_character(),
DOSE_REF = readr::col_character(),
FAB_COV_1 = readr::col_character(),
FAB_COV_2 = readr::col_character(),
FAB_COVREF = readr::col_character(),
LOTE_REF = readr::col_character(),
LAB_PR_COV = readr::col_character(),
LOTE_1_COV = readr::col_character(),
LOTE_2_COV = readr::col_character(),
FNT_IN_COV = readr::col_double()
)
# Function to write chunk of lines to database
chunk_to_bank <- function(x, pos){
tmp <- dplyr::bind_rows(dadosPainelPI::sivep_gripe_header, x)
DBI::dbWriteTable(conn = conn_sivep_gripe, name = "sivep_gripe", value = tmp, append = TRUE)
}
# List of Sivep Gripe files
files_list <- list.files(path = files_folder, full.names = TRUE)
# Remove table from database if exists
if(DBI::dbExistsTable(conn = conn_sivep_gripe, name = "sivep_gripe")) DBI::dbRemoveTable(conn = conn_sivep_gripe, name = "sivep_gripe")
# Read files from the list in chunks and store in temporary database
for(f in files_list){
message(f)
readr::read_csv2_chunked(file = f, callback = readr::DataFrameCallback$new(chunk_to_bank), col_types = cols_definitions, chunk_size = chunk_size)
}
# Return the connection object to the temporary database
return(dplyr::tbl(conn_sivep_gripe, "sivep_gripe"))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.