# 0- funcao para download do censo 2019 ------------------------------------
#' Download school census microdata
#'
#' @param year numeric, at the moment works with 2018 and 2019 data
#' @param method character, default is "wget". Other options: "internal", "libcurl", "curl"
#'
#' @return .zip file with all microdata, which must be unzipped
#' @export
#'
#' @details This function works only with 2019 and 2018 data.
#' @details Other years can be downloaded in: http://inep.gov.br/microdados
#' @details This package works fine with 2019 data, and at least partially with 2015 data. Other years were not tested till now.
#'
#' @examples
#' \donttest{
#' download_microdata()
#' download_microdata(year=2018)}
#'
#' # for a more complete view, see ?insert_labels
download_microdata<-function(year=2019, method = "wget")
{
warning('You can download the microdata in: http://inep.gov.br/microdados')
source=paste0('http://download.inep.gov.br/microdados/microdados_educacao_basica_',year,'.zip')
download.file(url=source, destfile='microdados_educacao_basica_',year,'.zip', method = method)
}
# 1- funcao para importar dados para o R -------------------------------------
#' Import csv microdata to R
#'
#' @description Import csv microdata to R, optimizing for big data (student and teacher levels)
#'
#' @param file_path_origin character, path to downloaded .csv file
#' @param file_path_destiny character, path to .rda file to be created
#'
#' @return data.frame saved in .rda file
#' @export
#'
#' @examples
#' \dontrun{
#' file.downloaded='~/YOUR_PATH/downloaded_data/ESCOLA.CSV'
#' file.imported='~/YOUR_PATH/temp_data.rda'
#'
#' import_csv2rda(file_path_origin=file.downloaded,
#' file_path_destiny=file.imported)}
#'
#' # for a more complete view, see ?insert_labels
import_csv2rda=function(file_path_origin, file_path_destiny)
{
# funcao supostamente valida para importar todos arquivos do censo escolar desde 2013
censo=data.table::fread(file_path_origin, sep="|", dec=",", encoding="Latin-1")
print('SEE THE DATA IMPORTED:')
print(head(censo))
save(censo,file=file_path_destiny)
}
# 2- funcao para incluir rotulos do dicionario no data frame ------------------------------------------
# usar o que era arquivo destino agora como origem
#' Insert labels in factor variables (in portuguese).
#'
#' @description The dictionary used to label is from 2019 data
#'
#' @param file_path_origin character, path to .rda file generated by function import_csv2rda()
#' @param file_path_destiny character, path to .rda file to be created, with labels in factors
#' @param data_level character, defines census data level according to data file imported ('Escola','Docente','Gestor','Turma','Matricula')
#' @param add_variables logical, if TRUE add some useful variables to the data.frame
#'
#' @details Works fine with 2019 data, and at least partially with 2015 data. Other years not tested yet.
#'
#' @return data.frame saved in .rda file
#' @export
#'
#' @examples
#' # you must first download the .zip file. See ?download_microdata
#' # then you must unzip it and choose a data file (eg. ESCOLA.CSV)
#' # then you can run the code below, changing the first 3 lines as you wish
#' # note that 'data_level' must also be defined, in function insert_labels()
#'
#' \dontrun{
#' file.downloaded='~/YOUR_PATH/downloaded_data/ESCOLA.CSV'
#' file.imported='~/YOUR_PATH/temp_data.rda'
#' file.labelled='~/YOUR_PATH/censusData_ESCOLA.rda'
#'
#' import_csv2rda(file_path_origin=file.downloaded,
#' file_path_destiny=file.imported)
#'
#' insert_labels (file_path_origin=file.imported,
#' file_path_destiny=file.labelled,
#' data_level='Escola',
#' add_variables=TRUE)}
insert_labels=function(file_path_origin, file_path_destiny,
data_level=c('Escola','Docente','Gestor','Turma','Matricula'), add_variables=TRUE)
{
# carregar dados desse nivel para recodificar
load(system.file("recodes", paste0('dados-recode_',data_level,'.rda'), package = 'BRschoolData'))
dd <- as.data.frame(dd) # declarar pra nao dar warning no pacote
# carregar arquivo importado
load(file_path_origin)
censo <- as.data.frame(censo)
# loop para recodificar
variaveis=names(censo)
for(lop.var in 1:length(variaveis)){
# lop.var=1
if(!variaveis[lop.var]%in%dd$nome) {
warning(paste('INCOMPLETE LABELING: variable',variaveis[lop.var],'is not in the dicionary.'))
next
}
indice.dicionario=which(dd$nome%in%variaveis[lop.var])
if(length(indice.dicionario)>1) stop('Tem duplicacao de variaveis')
if(is.na(dd$trad.fatores[indice.dicionario])){ # se nao for fator
censo[,lop.var][censo[,lop.var]==999]=NA
censo[,lop.var][censo[,lop.var]==8887]=NA
censo[,lop.var][censo[,lop.var]==8888]=NA
censo[,lop.var][censo[,lop.var]==88888]=NA
next
}
## recodificar variavel ----------------
if(add_variables==TRUE){
# criar antes variaveis sinteticas para 'TP_ETAPA_ENSINO'
if(variaveis[lop.var]=='TP_ETAPA_ENSINO'){
# identificar turmas de ensino fundamental e medio
turmas.infantil=1:3
turmas.mista.InfantFundam=56
turmas.fundamental1=c(4:7,14:18)
turmas.fundamental2=c(8:11,19:21,41)
turmas.mista.Fundam=c(12:13,22:24)
turmas.medio=c(25:38) # todos os tipos, menos EJA, menos educacao profissional
turmas.eja=c(65,67,69:74)
turmas.prof=c(39,40,64,68)
# fazer variavel 'ciclo'
censo$ciclo=NA
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.infantil)]="EI"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.mista.InfantFundam)]="EIeEFmix"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.fundamental1)]="EF1"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.fundamental2)]="EF2"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.mista.Fundam)]="EFmix"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.medio)]="EM"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.eja)]="EJA"
censo$ciclo[which(censo$TP_ETAPA_ENSINO%in%turmas.prof)]="Prof"
table(censo$ciclo)
## fazer variavel so para serie do ensino medio regular
censo$serieEM=NA
censo$serieEM[which(censo$TP_ETAPA_ENSINO%in%c(25,30,35))]="1a serie"
censo$serieEM[which(censo$TP_ETAPA_ENSINO%in%c(26,31,36))]="2a serie"
censo$serieEM[which(censo$TP_ETAPA_ENSINO%in%c(27,32,37))]="3a serie"
table(censo$serieEM, useNA = 'ifany')
}
}
# recodificar
censo[,lop.var]=car::Recode(var=censo[,lop.var], recodes=dd$trad.fatores[indice.dicionario])
}
save(censo,file=file_path_destiny)
print('SEE THE DATA LABELED:')
print(head(censo))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.