#' Process SINAN Malaria variables from DataSUS
#'
#' \code{process_sinan_malaria} processes SINAN Malaria variables retrieved by \code{fetch_datasus()}.
#'
#' This function processes SINAN Malaria variables retrieved by \code{fetch_datasus()}, informing labels for categoric variables including NA values.
#'
#' @param data \code{data.frame} created by \code{fetch_datasus()}.
#' @param municipality_data optional logical. \code{TRUE} by default, creates new variables in the dataset informing the full name and other details about the municipality of residence.
#'
#' @examples \dontrun{
#' df <- fetch_datasus(year_start = 2016, year_end = 2016,
#' uf = "RJ", information_system = "SINAN-MALARIA-FINAL")
#' df_a <- process_sinan_malaria(df)
#' df_b <- process_sinan_malaria(df, municipality_data = FALSE)
#' }
#' @export
process_sinan_malaria <- function(data, municipality_data = TRUE){
# Variables names
variables_names <- names(data)
# Use dtplyr
data <- dtplyr::lazy_dt(data)
# TP_NOT
if ("TP_NOT" %in% variables_names) {
data <- data %>%
dplyr::mutate(TP_NOT = dplyr::case_match(
.data$TP_NOT,
"1" ~ "Negativa",
"2" ~ "Individual",
"3" ~ "Surto",
"4" ~ "Agregado",
.default = .data$TP_NOT
)) %>%
dplyr::mutate(TP_NOT = as.factor(.data$TP_NOT))
}
# DT_NOTIFIC
if ("DT_NOTIFIC" %in% variables_names) {
data <- data %>%
dplyr::mutate(DT_NOTIFIC = as.Date(.data$DT_NOTIFIC))
}
# SG_UF_NOT
if ("SG_UF_NOT" %in% variables_names) {
data <- data %>%
dplyr::mutate(SG_UF_NOT = dplyr::case_match(
.data$SG_UF_NOT,
"0" ~ "Ignorado",
"99" ~ "Ignorado",
"11" ~ "Rond\u00f4nia",
"12" ~ "Acre",
"13" ~ "Amazonas",
"14" ~ "Roraima",
"15" ~ "Par\u00e1",
"16" ~ "Amap\u00e1",
"17" ~ "Tocantins",
"21" ~ "Maranh\u00e3o",
"22" ~ "Piau\u00ed",
"23" ~ "Cear\u00e1",
"24" ~ "Rio Grande do Norte",
"25" ~ "Para\u00edba",
"26" ~ "Pernambuco",
"27" ~ "Alagoas",
"28" ~ "Sergipe",
"29" ~ "Bahia",
"31" ~ "Minas Gerais",
"32" ~ "Esp\u00edrito Santo",
"33" ~ "Rio de Janeiro",
"35" ~ "S\u00e3o Paulo",
"41" ~ "Paran\u00e1",
"42" ~ "Santa Catarina",
"43" ~ "Rio Grande do Sul",
"50" ~ "Mato Grosso do Sul",
"51" ~ "Mato Grosso",
"52" ~ "Goi\u00e1s",
"53" ~ "Distrito Federal",
.default = .data$SG_UF_NOT
)) %>%
dplyr::mutate(SG_UF_NOT = as.factor(.data$SG_UF_NOT))
}
# IDADE
if ("NU_IDADE_N" %in% variables_names) {
data <- data %>%
dplyr::mutate(NU_IDADE_N = dplyr::case_match(.data$NU_IDADE_N,
999 ~ NA,
.default = .data$NU_IDADE_N)) %>%
# Codigo e valor
dplyr::mutate(idade_cod = substr(.data$NU_IDADE_N, 1, 1),
idade_value = as.numeric(substr(.data$NU_IDADE_N, 2, 3)),) %>%
dplyr::mutate(IDADEminutos = dplyr::case_match(.data$idade_cod,
"0" ~ idade_value,
.default = NA)) %>%
dplyr::mutate(IDADEhoras = dplyr::case_match(.data$idade_cod,
"1" ~ idade_value,
.default = NA)) %>%
dplyr::mutate(IDADEdias = dplyr::case_match(.data$idade_cod,
"2" ~ idade_value,
.default = NA)) %>%
dplyr::mutate(IDADEmeses = dplyr::case_match(.data$idade_cod,
"3" ~ idade_value,
.default = NA)) %>%
dplyr::mutate(
IDADEanos = dplyr::case_match(
.data$idade_cod,
"4" ~ idade_value,
"5" ~ idade_value + 100,
.default = NA
)
) %>%
dplyr::select(-"idade_cod", -"idade_value")
}
# CS_SEXO
if ("CS_SEXO" %in% variables_names) {
data <- data %>%
dplyr::mutate(CS_SEXO = dplyr::case_match(
.data$CS_SEXO,
"M" ~ "Masculino",
"F" ~ "Feminino",
"I" ~ "Ignorado",
.default = .data$CS_SEXO
)) %>%
dplyr::mutate(CS_SEXO = as.factor(.data$CS_SEXO))
}
# CS_GESTANT
if ("CS_GESTANT" %in% variables_names) {
data <- data %>%
dplyr::mutate(CS_GESTANT = dplyr::case_match(
.data$CS_GESTANT,
"1" ~ "1o trimestre",
"2" ~ "2o trimestre",
"3" ~ "3o trimestre",
"4" ~ "Idade gestacional ignorada",
"5" ~ "N\u00e3o",
"6" ~ "N\u00e3o se aplica",
"9" ~ "Ignorado",
.default = .data$CS_GESTANT
)) %>%
dplyr::mutate(CS_GESTANT = as.factor(.data$CS_GESTANT))
}
# CS_RACA
if ("CS_RACA" %in% variables_names) {
data <- data %>%
dplyr::mutate(CS_RACA = dplyr::case_match(
.data$CS_RACA,
"1" ~ "Branca",
"2" ~ "Preta",
"3" ~ "Amarela",
"4" ~ "Parda",
"5" ~ "Ind\u00edgena",
"9" ~ "Ignorado",
.default = .data$CS_RACA
)) %>%
dplyr::mutate(CS_RACA = as.factor(.data$CS_RACA))
}
# CS_ESCOL_N
if ("CS_ESCOL_N" %in% variables_names) {
data <- data %>%
dplyr::mutate(CS_ESCOL_N = dplyr::case_match(
.data$CS_ESCOL_N,
"1" ~ "1a a 4a s\u00e9rie incompleta do EF",
"2" ~ "4a s\u00e9rie completa do EF (antigo 1o grau)",
"3" ~ "5a \u00e0 8a s\u00e9rie incompleta do EF (antigo gin\u00e1sio ou 1o grau)",
"4" ~ "Ensino fundamental completo (antigo gin\u00e1sio ou 1o grau)",
"5" ~ "Ensino m\u00e9dio incompleto (antigo colegial ou 2o grau)",
"6" ~ "Ensino m\u00e9dio completo (antigo colegial ou 2o grau)",
"7" ~ "Educa\u00e7\u00e3o superior incompleta",
"8" ~ "Educa\u00e7\u00e3o superior completa",
"9" ~ "Ignorado",
"10" ~ "N\u00e3o se aplica",
.default = .data$CS_ESCOL_N
)) %>%
dplyr::mutate(CS_ESCOL_N <- as.factor(.data$CS_ESCOL_N))
}
# SG_UF
if ("SG_UF" %in% variables_names) {
data <- data %>%
dplyr::mutate(SG_UF = dplyr::case_match(
.data$SG_UF,
"0" ~ "Ignorado",
"99" ~ "Ignorado",
"11" ~ "Rond\u00f4nia",
"12" ~ "Acre",
"13" ~ "Amazonas",
"14" ~ "Roraima",
"15" ~ "Par\u00e1",
"16" ~ "Amap\u00e1",
"17" ~ "Tocantins",
"21" ~ "Maranh\u00e3o",
"22" ~ "Piau\u00ed",
"23" ~ "Cear\u00e1",
"24" ~ "Rio Grande do Norte",
"25" ~ "Para\u00edba",
"26" ~ "Pernambuco",
"27" ~ "Alagoas",
"28" ~ "Sergipe",
"29" ~ "Bahia",
"31" ~ "Minas Gerais",
"32" ~ "Esp\u00edrito Santo",
"33" ~ "Rio de Janeiro",
"35" ~ "S\u00e3o Paulo",
"41" ~ "Paran\u00e1",
"42" ~ "Santa Catarina",
"43" ~ "Rio Grande do Sul",
"50" ~ "Mato Grosso do Sul",
"51" ~ "Mato Grosso",
"52" ~ "Goi\u00e1s",
"53" ~ "Distrito Federal",
.default = .data$SG_UF
)) %>%
dplyr::mutate(SG_UF = as.factor(.data$SG_UF))
}
# ID_PAIS
if ("ID_PAIS" %in% variables_names) {
data$ID_PAIS <- dplyr::left_join(data, microdatasus::paisnet, by = c("ID_PAIS" = "ID_PAIS"))$NM_PAIS
}
# ID_OCUPA_N
if ("ID_OCUPA_N" %in% variables_names) {
data$ID_OCUPA_N <- factor(dplyr::left_join(data, microdatasus::tabCBO, by = c("ID_OCUPA_N" = "cod"))$nome)
}
# CLASSI_FIN
if ("CLASSI_FIN" %in% variables_names) {
data <- data %>%
dplyr::mutate(CLASSI_FIN = dplyr::case_match(
.data$CLASSI_FIN,
"1" ~ "Confirmado",
"2" ~ "Descartado",
.default = .data$CLASSI_FIN
)) %>%
dplyr::mutate(CLASSI_FIN = as.factor(.data$CLASSI_FIN))
}
# AT_ATIVIDA
if ("AT_ATIVIDA" %in% variables_names) {
data <- data %>%
dplyr::mutate(AT_ATIVIDA = dplyr::case_match(
.data$AT_ATIVIDA,
"1" ~ "Agricultura",
"2" ~ "Pecu\u00e1ria",
"3" ~ "Dom\u00e9stica",
"4" ~ "Turismo",
"5" ~ "Garimpagem",
"6" ~ "Explora\u00e7\u00e3o vegetal",
"7" ~ "Ca\u00e7a/Pesca",
"8" ~ "Construtor de estradas/barragens",
"9" ~ "Minera\u00e7\u00e3o",
"10" ~ "Viajante",
"11" ~ "Outros",
"99" ~ "Ignorado",
.default = .data$AT_ATIVIDA
)) %>%
dplyr::mutate(AT_ATIVIDA = as.factor(.data$AT_ATIVIDA))
}
# AT_LAMINA
if ("AT_LAMINA" %in% variables_names) {
data <- data %>%
dplyr::mutate(AT_ATIVIDA = dplyr::case_match(
.data$AT_LAMINA,
"1" ~ "BP",
"2" ~ "BA",
"3" ~ "LVC",
.default = .data$AT_LAMINA
)) %>%
dplyr::mutate(AT_LAMINA = as.factor(.data$AT_LAMINA))
}
# AT_SINTOMA
if ("AT_SINTOMA" %in% variables_names) {
data <- data %>%
dplyr::mutate(AT_SINTOMA = dplyr::case_match(
.data$AT_SINTOMA,
"1" ~ "Com sintomas",
"2" ~ "Sem sintomas",
.default = .data$AT_SINTOMA
)) %>%
dplyr::mutate(AT_SINTOMA = as.factor(.data$AT_SINTOMA))
}
# TPAUTOCTO
if ("TPAUTOCTO" %in% variables_names) {
data <- data %>%
dplyr::mutate(TPAUTOCTO = dplyr::case_match(
.data$TPAUTOCTO,
"1" ~ "Sim",
"2" ~ "N\u00e3o",
"3" ~ "Indeterminado",
.default = .data$TPAUTOCTO
)) %>%
dplyr::mutate(TPAUTOCTO = as.factor(.data$TPAUTOCTO))
}
# COUFINF
if ("COUFINF" %in% variables_names) {
data <- data %>%
dplyr::mutate(COUFINF = dplyr::case_match(
.data$COUFINF,
"0" ~ "Ignorado",
"99" ~ "Ignorado",
"11" ~ "Rond\u00f4nia",
"12" ~ "Acre",
"13" ~ "Amazonas",
"14" ~ "Roraima",
"15" ~ "Par\u00e1",
"16" ~ "Amap\u00e1",
"17" ~ "Tocantis",
"21" ~ "Maranh\u00e3o",
"22" ~ "Piau\u00ed",
"23" ~ "Cear\u00e1",
"24" ~ "Rio Grande do Norte",
"25" ~ "Para\u00edba",
"26" ~ "Pernambuco",
"27" ~ "Alagoas",
"28" ~ "Sergipe",
"29" ~ "Bahia",
"31" ~ "Minas Gerais",
"32" ~ "Esp\u00edrito Santo",
"33" ~ "Rio de Janeiro",
"35" ~ "S\u00e3o Paulo",
"41" ~ "Paran\u00e1",
"42" ~ "Santa Catarina",
"43" ~ "Rio Grande do Sul",
"50" ~ "Mato Grosso do Sul",
"51" ~ "Mato Grosso",
"52" ~ "Goi\u00e1s",
"53" ~ "Distrito Federal",
.default = .data$COUFINF
)) %>%
dplyr::mutate(COUFINF = as.factor(.data$COUFINF))
}
# COPAISINF
if ("COPAISINF" %in% variables_names) {
data$COPAISINF <- dplyr::left_join(data, microdatasus::paisnet, by = c("COPAISINF" = "COPAISINF"))$NM_PAIS
}
# RESULT
if ("RESULT" %in% variables_names) {
data <- data %>%
dplyr::mutate(RESULT = dplyr::case_match(
.data$RESULT,
"1" ~ "Negativo",
"2" ~ "F",
"3" ~ "F + FG",
"4" ~ "V",
"5" ~ "F + V",
"6" ~ "V + FG",
"7" ~ "FG",
"8" ~ "M",
"9" ~ "F + M",
"10" ~ "O",
.default = .data$RESULT
)) %>%
dplyr::mutate(RESULT = as.factor(.data$RESULT))
}
# PCRUZ
if ("PCRUZ" %in% variables_names) {
data <- data %>%
dplyr::mutate(PCRUZ = dplyr::case_match(
.data$PCRUZ,
"1" ~ "Menor que meia cruz",
"2" ~ "Meia cruz",
"3" ~ "Uma cruz",
"4" ~ "Duas cruzes",
"5" ~ "Tr\u00eas cruzes",
"6" ~ "Quatro cruzes",
.default = .data$PCRUZ
)) %>%
dplyr::mutate(PCRUZ = as.factor(.data$PCRUZ))
}
# TRA_ESQUEM
if ("TRA_ESQUEM" %in% variables_names) {
data <- data %>%
dplyr::mutate(TRA_ESQUEM = dplyr::case_match(
.data$TRA_ESQUEM,
"1" ~ "Infec\u00e7\u00f5es por Pv com Cloroquina em 3 dias e Primaquina em 7 dias",
"2" ~ "Infec\u00e7\u00f5es por Pf com Quinina em 3 dias + Doxiciclina em 5 dias + primaquina no 6o dia",
"3" ~ "Infec\u00e7\u00f5es mistas por Pv + Pf com Mefloquina em dose \u00fanica e primaquina em 7 dias",
"4" ~ "Infec\u00e7\u00f5es por Pm com cloroquina em 3 dias",
"5" ~ "Infec\u00e7\u00f5es por Pv em crian\u00e7as apresentando v\u00f4mitos, com c\u00e1psulas retais de artesunato em 4 dias e Primaquina em 7 dias",
"6" ~ "Infec\u00e7\u00f5es por Pf com Mefloquina em dose \u00fanica e primaquina no segundo dia",
"7" ~ "Infec\u00e7\u00f5es por Pf com Quinina em 7 dias",
"8" ~ "Infec\u00e7\u00f5es por Pf de crian\u00e7as com c\u00e1psulas retais de artesunato em 4 dias e dose \u00fanica de Mefloquina no 3o dia e Primaquina no 5o dia",
"9" ~ "Infec\u00e7\u00f5es mistas por Pv + Pf com Quinina em 3 dias, doxiciclina em 5 dias e Primaquina em 7 dias",
"10" ~ "Preven\u00e7\u00e3o de reca\u00edda da mal\u00e1ria por Pv com Cloroquina em dose \u00fanica semanal durante 3 meses",
"11" ~ "Mal\u00e1ria grave e complicada",
"99" ~ "Outro esquema utilizado (por m\u00e9dico)",
.default = .data$TRA_ESQUEM
)) %>%
dplyr::mutate(TRA_ESQUEM = as.factor(.data$TRA_ESQUEM))
}
# From data.table to tibble
data <- tibble::as_tibble(data)
# Purge levels
data <- droplevels(data)
# Unescape unicode characters
data <- suppressWarnings(tibble::as_tibble(lapply(X = data, FUN = stringi::stri_unescape_unicode)))
# Return
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.