#' @title Estimate proportions, ratios, aggregations and the respective margins
#' of error (MOEs).
#' @description The \code{sumacs} function uses outputs from the
#' \code{\link{acs}} package to compute proportions, ratios and aggregations
#' based on text formulas, or simply extract variables. The function
#' downloads the data and then estimate the formulas. If the function is used
#' without specifying any \code{data}, remember to define a key using the
#' \code{\link{acs}} command \code{api.key.install(key="*")}.
#' @param formula A character or vector of characters containing formulas using
#' ACS/Census variables or just variables. + - operators can be included. / defines a
#' division. When the formula contains "* 100", the final statistic and MOE is
#' multiply by 100.
#' @param varname A character or vector of characters containing the new
#' variables to be created. This vector must have same length as
#' \code{formula} and \code{method}, and it is not needed when getting only
#' variables.
#' @param method A character or vector of characters defining the type of
#' estimate expected: "proportion", "ratio", "aggregation", "variable". This
#' vector must have same length as \code{formula} and \code{varname}. It is
#' not needed when getting only variables. Default value "variable".
#' @param level A character or vector of characters specifying the geographic
#' level of the data. It may be necessary to specificy values to the
#' corresponding levels. For instance, when \code{level = "county"}, you have
#' to specify a state (e.g., \code{state = "WI"}, the default state in this
#' package). You can also use a wildcard method (\code{state = "*"}) to
#' include all the states. Below, you can see the required combinations of
#' different summary levels.
#'
#' \cr 010 us \cr 020 region \cr 030 division \cr 040 state \cr 050 state,
#' county \cr 060 state, county, county.subdivision \cr 140 state, county,
#' tract \cr 150 state, county, tract, block.group \cr 160 state, place \cr
#' 250 american.indian.area \cr 320 state, msa \cr 340 state, csa \cr 350
#' necta \cr 400 urban.area \cr 500 state, congressional.district \cr 610
#' state, state.legislative.district.upper \cr 620 state,
#' state.legislative.district.lower \cr 795 state, puma \cr 860 zip.code \cr
#' 950 state, school.district.elementary \cr 960 state,
#' school.district.secondary \cr 970 state, school.district.unified \cr
#'
#' When \code{combine == TRUE}, the geographic information should be in a list.
#' @param combine Whether the geographies are to be combined. If \code{combine = TRUE},
#' lists should be used when specifying geographic levels (the corresponding level
#' for the level specified). If the rest geographic levels has one element,
#' the function will assume that level is equal for all the sub-levels.
#' For example, if \code{state = "WI"}, and several counties were specified,
#' the function assumes that all the counties are from WI.
#' @param combine.name Label for the aggregate geography when combining levels.
#' The default value is \code{aggregate}.
#' @param dataset A string or vector of strings specifying the data set to be used: acs, sf1 or sf1.
#' The default value is "acs".
#' @param endyear An integer or vector of integers (default is 2014) indicating the latest year of
#' the data in the survey or Census year.
#' @param span An integer indicating the span (in years) of the desired ACS data
#' (should be 1, 3, or 5), defaults to 5.
#' @param conf.level Confidence level to estimate MOEs. The default value is
#' 0.90.
#' @param one.zero Whether to include standard errors for only one zero-value
#' (max value) of columns or all. The default is TRUE.
#' @param trace Shows progress of the variable creation. The default is TRUE.
#' @param data Input data generated by the \code{\link{acsdata}} function.
#' Variables and levels must be the same as those specified in the
#' \code{sumacs} function.
#' @param format.out Format of the output: "wide" or "long". The default is
#' "wide".
#' @param file The resulting output is exported to a CSV file rather than to the R prompt. The file name must be specified as a character string.
#' @param print.levels Boolean that print levels generated by the \code{geo.make} function.
#' @return Returns a \code{data.table/data.frame} object with the estimates and
#' MOEs.
#' @details When the standard error of a proportion cannot be estimated, the
#' "ratio" option is used. This adjustment is done row by row.
#' @note Depending on the quality of the internet connection, number of
#' variables and levels, getting the ACS/Census data can be slow, especially for the
#' levels "county.subdivision", "block.group", and "tract" (it might take more than 30 minutes). It is recommended to get the data using the function
#' \code{\link{acsdata}} first, and then to use \code{sumacs}.
#' @examples
#' # api.key.install(key="*")
#'
#' # without combining
#' sumacs(formula = "(b16004_004 + b16004_026 + b16004_048 / b16004_001)",
#' varname = "langspan0913", method = "prop")
#'
#' # combining
#' sumacs("(b16004_004 + b16004_026 + b16004_048 / b16004_001)",
#' varname = "test",
#' method = "prop",
#' level = c("block.group"),
#' state = list("WI"),
#' county = list(1, 141),
#' tract = list(950100, 11700),
#' block.group = list(1:2, 1:2),
#' combine = TRUE)
sumacs <- function(formula, varname = NULL, method = "variable",
level = "state",
dataset = "acs", endyear = 2014, span = 5, conf.level = 0.90,
one.zero = TRUE, trace = TRUE, data = NULL,
format.out = "wide", file = NULL, print.levels = TRUE,
us = "*",
region = "*",
division = "*",
state = "WI",
county = "*",
county.subdivision ="*",
place ="*",
tract = "*",
block.group = "*",
msa = "*",
csa = "*",
necta = "*",
urban.area = "*",
congressional.district = "*",
state.legislative.district.upper = "*",
state.legislative.district.lower = "*",
puma = "*",
zip.code = "*",
american.indian.area = "*",
school.district.elementary = "*",
school.district.secondary = "*",
school.district.unified = "*",
combine = FALSE,
combine.name = "aggregate") {
# define new name file to export csv
newfile <- NULL
if (!is.null(file)) {
newfile <- file
file <- NULL
}
if (length(varname) > 1 & any(duplicated(varname))) {
stop("Duplicated varnames, please define unique varnames!")
}
# initial tests
if (length(formula) > 1 & length(method) == 1) {
method <- rep(method, length(formula))
}
if (length(varname)==0 & all(method=='variable')) {
varname <- getvars(formula)
}
if (length(varname)==0 & all(method!='variable')) {
varname <- paste0("var", 1:length(formula))
print("Specify varname to get custom variable names!")
}
# create dataset combining features
if (length(dataset) == 1) { dataset <- rep(dataset, length(formula)) }
if (length(endyear) == 1) { endyear <- rep(endyear, length(formula)) }
if (!all.equal(length(dataset), length(endyear), length(formula))) {
stop("Formula, dataset, endyear and varname should have the same length!")
}
else {
vdata <- data.table(formula, dataset, endyear, varname, method)
# create combinations
comb <- vdata[, .(dataset, endyear)]
comb <- comb[!duplicated(comb)]
# define list to save results
output <- list()
# loop through object comb
for (i in 1:nrow(comb)) {
temp <- vdata[dataset == comb[i, dataset] & endyear == comb[i, endyear]]
print(paste0("Extracting data from: ", comb[i, dataset], " ",
comb[i, endyear]))
output[[i]] <- compute.acs(formula = temp$formula,
level = level,
dataset = unique(temp$dataset),
endyear = unique(temp$endyear),
method = temp$method,
varname = temp$varname,
span = span,
conf.level = conf.level,
one.zero = one.zero,
trace = trace,
data = data,
format.out = format.out,
file = file,
us = us,
region = region,
division = division,
state = state,
county = county,
county.subdivision = county.subdivision,
place = place,
tract = tract,
block.group = block.group,
msa = msa,
csa = csa,
necta = necta,
urban.area = urban.area,
congressional.district = congressional.district,
state.legislative.district.upper = state.legislative.district.upper,
state.legislative.district.lower = state.legislative.district.lower,
puma = puma,
zip.code = zip.code,
american.indian.area = american.indian.area,
school.district.elementary = school.district.elementary,
school.district.secondary = school.district.secondary,
school.district.unified = school.district.unified,
combine = combine,
combine.name = combine.name,
print.levels = print.levels
)
}
# merge files
if (format.out == "wide") {
vars <- grep("sumlevel|geoid|fips", names(output[[1]]), value = TRUE)
mydata <- Reduce(function(x, y) merge(x, y, all = TRUE, by=c(vars)), output)
}
else if (format.out == "long") {
mydata <- rbindlist(output, fill = TRUE)
}
if (is.null(newfile)) {
return(mydata)
}
else {
fwrite(mydata, file = newfile, na='NA')
print("Data exported to a CSV file! ")
}
}
} # end function
# replace missing values function
missing_thres <- -99999999
rmiss <- function(mat, miss_value = missing_thres) {
mat[mat < miss_value] <- NA
return(mat)
}
# individual extraction function
compute.acs <- function(formula, varname = NULL, method = NULL, level = "state",
dataset = "acs", endyear = 2014, span = 5, conf.level = 0.90,
one.zero = TRUE, trace = TRUE, data = NULL,
format.out = "wide", file = NULL, print.levels = TRUE,
us = "*",
region = "*",
division = "*",
state = "WI",
county = "*",
county.subdivision ="*",
place ="*",
tract = "*",
block.group = "*",
msa = "*",
csa = "*",
necta = "*",
urban.area = "*",
congressional.district = "*",
state.legislative.district.upper = "*",
state.legislative.district.lower = "*",
puma = "*",
zip.code = "*",
american.indian.area = "*",
school.district.elementary = "*",
school.district.secondary = "*",
school.district.unified = "*",
combine = FALSE,
combine.name = "aggregate") {
##################
# initial checks
###################
# if (!all(grepl("\\/|\\+|\\-", formula))) {
# # stop("Some formulas do not have any operator (+, - or /)")
# method <- rep("variables", length(varname))
# }
if (any(!grepl("\\/", formula[tolower(method) %in% c("proportion", "prop", "ratio")])) )
{
stop("Some proportion or ratio formulas do not have the / operator!")
}
if (any(grepl("\\/", formula[tolower(method) %in% c("aggregation", "agg")]))) {
stop("Some aggregation formulas do have the / operator!")
}
###############################
# definition of some variables
###############################
if (length(varname) > 1 & any(duplicated(varname))) {
stop("Duplicated varnames, please define unique varnames!")
}
if (length(formula) > 1 & length(method) == 1) {
method <- rep(method, length(formula))
}
if (length(varname)==0 & all(method=='variable')) {
varname <- getvars(formula)
}
if (length(varname)==0 & all(method!='variable')) {
varname <- paste0("var", 1:length(formula))
print("Specify varname to get custom variable names!")
}
if ((identical(length(formula), length(method)) == 0)) {
stop("Vector of formulas and methods must have the same length!")
}
conf.level <- round(qnorm( (1 + conf.level) / 2), digits = 3)
variables <- acsr::getvars(formula)
nvars <- length(variables)
nlevels <- length(level)
newvars <- length(varname)
print (paste0(". . . . . . ACS/Census variables : ", nvars))
print (paste0(". . . . . . Levels : ", nlevels))
print (paste0(". . . . . . New variables : ", newvars))
# where I will save results
output <- list()
#########################################
# get data for all data (time consuming)
#########################################
if ( is.null(data) ) {
ldata <- acsr::acsdata(formula = formula, level = level, dataset = dataset,
endyear = endyear, span = span,
us = us,
region = region,
division = division,
state = state,
county = county,
county.subdivision = county.subdivision,
place = place,
tract = tract,
block.group = block.group,
msa = msa,
csa = csa,
necta = necta,
urban.area = urban.area,
congressional.district = congressional.district,
state.legislative.district.upper = state.legislative.district.upper,
state.legislative.district.lower = state.legislative.district.lower,
puma = puma,
zip.code = zip.code,
american.indian.area = american.indian.area,
school.district.elementary = school.district.elementary,
school.district.secondary = school.district.secondary,
school.district.unified = school.district.unified,
combine = combine,
combine.name = combine.name,
print.levels = print.levels
)
}
else if ( !is.null(data) ) {
if ( !is.list(data) ) {
stop("The data must be a list!")
}
else if ( is.list(data) & !( all(base::sapply(data, class) == "acs") ) )
{
stop("The data must contain acs objects!")
}
else if ( !(all(level %in% names(data)))) {
stop("Not all levels coincide with the data")
}
else if ( !(all(variables %in% data[[1]]@acs.colnames)) ) {
stop("Not all the ACS/Census variables were found, check variable names in your formulas and the dataset you are using!")
}
ldata <- data
} # else if for data
################################
# loop variable and level
################################
print(". . . . . . Creating variables")
vdata <- list()
# lower case methods
method <- tolower(method)
# start loop by variable
for (v in seq_along(varname) ) {
# create formulas from text
constr <- gsub("\\(|\\)", "", formula[v] ) # remove parentheses
constr <- gsub("\\* 100", "", constr) # remove * 100
multiply <- grepl("\\* 100", formula[v]) # index multiplication by 100
# todo: to check in the future version
division <- grepl("\\/", constr)
#####################
# condition formulas
#####################
if (method[v] %in% c("proportion", "prop", "ratio") & division == TRUE) {
# division (proportion or ratio)
form <- strsplit(constr, "/") # split to get numerator and denominator
if ( length(form[[1]]) == 1 | length(form[[1]]) > 2 ) {
stop(paste0("There is no or multiple division operators: ", varname[v]))
}
# numerator
# extract operators
noper <- strsplit(form[[1]][1], "[aA-zZ]*[0-9]*")
noper <- grep("\\+|\\-", noper[[1]], value = TRUE)
# extract variables
nume <- strsplit(form[[1]][1], "[\\+]|[\\-]")
nume <- gsub("[[:space:]]", "", nume[[1]])
nume <- toupper(nume)
# denominator
# extract operators
doper <- strsplit(form[[1]][2], "[aA-zZ]*[0-9]*")
doper <- grep("\\+|\\-", doper[[1]], value=TRUE)
# extract variables
deno <- strsplit(form[[1]][2], "[\\+]|[\\-]")
deno <- gsub("[[:space:]]", "", deno[[1]])
deno <- toupper(deno)
}
if (method[v] %in% c("aggregation", "agg") & division == FALSE) {
# EXTRACT OPERATORS
oper <- strsplit(constr, "[aA-zZ]*[0-9]*")
oper <- grep("\\+|\\-", oper[[1]], value=TRUE)
# EXTRACT VARIABLES
variable <- strsplit(constr, "[\\+]|[\\-]")
variable <- gsub("^\\s+|\\s+$", "", variable[[1]])
variable <- toupper(variable)
}
###################################
# create and compute formulas
###################################
# nested level loop
if ( combine == FALSE ) { ss <- seq_along(level) } else { ss <- 1 } # only one level
for (l in ss ) {
if (combine == FALSE ) {
dat <- ldata[[level[l]]]
geo <- dat@geography
}
if ( combine == TRUE ) {
dat <- ldata
}
############################
# only variables
############################
if (method[v] == "variable")
{
p <- as.vector(dat@estimate[, v])
new_error <- as.vector(dat@standard.error[, v])
p <- ifelse(p < missing_thres, NA, p)
new_error <- ifelse(new_error < missing_thres, NA, new_error)
}
######################
# proportion or ratios
######################
if (method[v] %in% c("proportion", "prop", "ratio") & division == TRUE) {
wn <- which(dat@acs.colnames %in% nume)
wd <- which(dat@acs.colnames %in% deno)
nt <- vector()
for (i in 1:length(nume)) {
if ( i == max(length(nume))) {
x <- paste0("dat[,", wn[i], "]")
}
else {
x <- paste0("dat[,", wn[i], "]", noper[i])
}
nt <- paste0(nt, x)
}
dt <- vector()
for (i in 1:length(deno)) {
if ( i == max(length(deno))) {
x <- paste0("dat[,", wd[i], "]")
}
else {
x <- paste0("dat[,", wd[i], "]", doper[i])
}
dt <- paste0(dt, x)
}
# estimates and errors
est <- rmiss(acs::estimate(dat))
err <- rmiss(acs::standard.error(dat))
num <- rmiss(acs::estimate(eval(parse(text=nt))))
den <- rmiss(acs::estimate(eval(parse(text=dt))))
p <- num / den
if ( method[v] %in% c("prop", "proportion") & any( na.omit(as.vector(p)) > 1)) {
stop("The proportion is higher than 1, to use method ratio?")
}
# p[den == 0] <- NA
# definition of error
if ( length(p) == 1 ) {
if ( one.zero == TRUE ) {
if (length(wn) > 1) {
err_zero_num <- max(err[, wn] ^ 2 * (est[, wn] == 0))
err_num <- sum(err[, wn] ^ 2 * (est[, wn] != 0))
}
else {
err_zero_num <- err[, wn] ^ 2 * (est[, wn] == 0)
err_num <- err[, wn] ^ 2 * (est[, wn] != 0)
}
if (length(wd) > 1 ) {
err_zero_den <- max(err[, wd] ^ 2 * (est[, wd] == 0))
err_den <- sum(err[, wd] ^ 2 * (est[, wd] != 0))
}
else {
err_zero_den <- err[, wd] ^ 2 * (est[, wd] == 0)
err_den <- err[, wd] ^ 2 * (est[, wd] != 0)
}
err_num <- err_zero_num + err_num
err_den <- err_zero_den + err_den
}
else if ( one.zero == FALSE ) {
if (length(wn) > 1) {
err_num <- sum(err[, wn] ^ 2)
}
else {
err_num <- err[, wn] ^ 2
}
if (length(wd) > 1 ) {
err_den <- sum(err[, wd] ^ 2)
}
else {
err_den <- err[, wd] ^ 2
}
}
}
else if ( length(p) > 1 ) {
if ( one.zero == TRUE ) {
if (length(wn) > 1) {
err_zero_num <- base::apply(err[, wn] ^ 2 * (est[, wn] == 0), 1, max)
err_num <- base::apply(err[, wn] ^ 2 * (est[, wn] != 0), 1, sum)
}
else {
err_zero_num <- err[, wn] ^ 2 * (est[, wn] == 0)
err_num <- err[, wn] ^ 2 * (est[, wn] != 0)
}
if (length(wd) > 1 ) {
err_zero_den <- base::apply(err[, wd] ^ 2 * (est[, wd] == 0), 1, max)
err_den <- base::apply(err[, wd] ^ 2 * (est[, wd] != 0), 1, sum)
}
else {
err_zero_den <- err[, wd] ^ 2 * (est[, wd] == 0)
err_den <- err[, wd] ^ 2 * (est[, wd] != 0)
}
err_num <- err_zero_num + err_num
err_den <- err_zero_den + err_den
}
else if ( one.zero == FALSE ) {
if (length(wn) > 1) {
err_num <- base::apply(err[, wn] ^ 2 , 1, sum)
}
else {
err_num <- err[, wn] ^ 2
}
if (length(wd) > 1 ) {
err_den <- base::apply(err[, wd] ^ 2, 1, sum)
}
else {
err_den <- err[, wd] ^ 2
}
}
}
# computing standard errors for proportions and ratios (checked!)
if ( method[v] %in% c("proportion", "prop") ) {
suppressWarnings(
new_error <- ifelse((err_num - ( p ^ 2 * err_den) ) < 0 | is.na( err_num - (p ^ 2 * err_den)),
sqrt(err_num + (p ^ 2 * err_den)) / den,
sqrt(err_num - (p ^ 2 * err_den)) / den)
)
}
if (method[v] %in% c("ratio")) {
suppressWarnings(
new_error <- sqrt(err_num + (p ^ 2 * err_den)) / den
)
}
} # end proportion
#################
# aggregation
##################
if (method[v] %in% c("aggregation", "agg") & division == FALSE) {
wa <- which(dat@acs.colnames %in% variable)
ft <- vector()
for (i in 1:length(variable)) {
if ( i == max(length(variable))) {
x <- paste0("dat[,", wa[i], "]")
}
else {
x <- paste0("dat[,", wa[i], "]", oper[i])
}
ft <- paste0(ft, x)
}
# estimates and errors
est <- rmiss(acs::estimate(dat))
err <- rmiss(acs::standard.error(dat))
p <- rmiss(acs::estimate(eval(parse(text = ft))))
# one zero computation
if ( length(p) == 1 ) {
if ( one.zero == TRUE ) {
if (length(wa) > 1) {
err_zero_agg <- max(err[, wa] ^ 2 * (est[, wa] == 0))
err_agg <- sum(err[, wa] ^ 2 * (est[, wa] != 0))
}
else {
err_zero_agg <- err[, wa] ^ 2 * (est[, wa] == 0)
err_agg <- err[, wa] ^ 2 * (est[, wa] != 0)
}
err_agg <- err_zero_agg + err_agg
}
else if ( one.zero == FALSE ) {
if (length(wa) > 1) {
err_agg <- sum(err[, wa] ^ 2)
}
else {
err_agg <- err[, wa] ^ 2
}
}
}
else if ( length(p) > 1 ) {
if ( one.zero == TRUE ) {
if (length(wa) > 1) {
err_zero_agg <- base::apply(err[, wa] ^ 2 * (est[, wa] == 0), 1, max)
err_agg <- base::apply(err[, wa] ^ 2 * (est[, wa] != 0), 1, sum)
}
else {
err_zero_agg <- err[, wa] ^ 2 * (est[, wa] == 0)
err_agg <- err[, wa] ^ 2 * (est[, wa] != 0)
}
err_agg <- err_zero_agg + err_agg
}
else if ( one.zero == FALSE ) {
if (length(wa) > 1) {
err_agg <- base::apply(err[, wa] ^ 2 , 1, sum)
}
else {
err_agg <- err[, wa] ^ 2
}
}
}
# computing standard errors for aggregation
new_error <- sqrt(err_agg)
} # end aggregation
#######################
# create datasets (one by one!)
#######################
if ( combine == FALSE ) {
if (level[l] == "us") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "010",
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "region") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "020",
region = geo$region,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "division") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "030",
division = geo$division,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "state") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = sprintf("%02d", as.numeric(geo$state)),
sumlevel = "040",
st_fips = geo$state,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "county") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%03d", as.numeric(geo$county))),
sumlevel = "050",
st_fips = geo$state,
cnty_fips = geo$county,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "county.subdivision") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%03d", as.numeric(geo$county)), sprintf("%05d", as.numeric(geo$countysubdivision))),
sumlevel = "060",
st_fips = geo$state,
cnty_fips = geo$county,
cnty_sub_fips = geo$countysubdivision,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "tract") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%03d", as.numeric(geo$county)), sprintf("%06d", as.numeric(geo$tract))),
sumlevel = "140",
st_fips = geo$state,
cnty_fips = geo$county,
tract_fips = geo$tract,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "block.group") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(geo$state, sprintf("%03d", as.numeric(geo$county)), sprintf("%06d", as.numeric(geo$tract)), as.numeric(geo$blockgroup)),
sumlevel = "150",
st_fips = geo$state,
cnty_fips = geo$county,
tract_fips = geo$tract,
block_group = geo$blockgroup,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "place") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%05d", as.numeric(geo$place))),
sumlevel = "160",
st_fips = geo$state,
place = geo$place,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "american.indian.area") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "250",
indian_area = geo$americanindianarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "puma") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state))),
sumlevel = "795",
st_fips = geo$state,
puma = geo$publicusemicrodataarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "msa") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state))),
sumlevel = "320",
st_fips = geo$stat,
msa = geo$metropolitanstatisticalareamicropolitanstatisticalarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "csa") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state))),
sumlevel = "340",
st_fips = geo$state,
csa = geo$combinedstatisticalarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "necta") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "350",
necta = geo$newenglandcityandtownarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "urban.area") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "400",
urban_area = geo$urbanarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "congressional.district") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%02d", as.numeric(geo$congressionaldistrict))),
sumlevel = "500",
st_fips = geo$state,
cong_dist = geo$congressionaldistrict,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "state.legislative.district.upper") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%03d", as.numeric(geo$statelegislativedistrictupper))),
sumlevel = "610",
st_fips = geo$state,
leg_dist_upper = geo$statelegislativedistrictupper,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "state.legislative.district.lower") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%03d", as.numeric(geo$statelegislativedistrictlower))),
sumlevel = "620",
st_fips = geo$state,
leg_dist_lower = geo$statelegislativedistrictlower,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "zip.code") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = as.character(NA),
sumlevel = "860",
zip = geo$zipcodetabulationarea,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "school.district.elementary") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%05d", as.numeric(geo$schooldistrictelementary))),
sumlevel = "950",
st_fips = geo$state,
sch_dist_ele = geo$schooldistrictelementary,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "school.district.secondary") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%05d", as.numeric(geo$schooldistrictsecondary))),
sumlevel = "960",
st_fips = geo$state,
sch_dist_sec = geo$schooldistrictsecondary,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if (level[l] == "school.district.unified") {
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata[[ paste0(v,l) ]] <- data.table(
geoid = paste0(sprintf("%02d", as.numeric(geo$state)), sprintf("%05d", as.numeric(geo$schooldistrictunified))),
sumlevel = "970",
st_fips = geo$state,
sch_dist_uni = geo$schooldistrictunified,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
} # end level loop
} # end combine FALSE
if (combine == TRUE ) {
vdata <- data.table()
if (multiply == TRUE)
{ cmoe <- as.vector(new_error * conf.level * 100)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 100] <- 100 }
}
else { cmoe <- as.vector(new_error * conf.level)
if (method[v] %in% c("prop", "proportion"))
{ cmoe[cmoe > 1] <- 1 }
}
vdata <- data.table(
geoid = as.character(NA),
combined_group = combine.name,
var_name = varname[v],
est = if (multiply == TRUE) { as.vector(p * 100)} else { as.vector(p)},
moe = cmoe
)
}
if ( trace ) {
print(paste0(". . . . . . ", round( v / newvars * 100, 1), "%"))
}
} # end variable loop
print(". . . . . . Formatting output")
if ( combine == FALSE ) {
mdata <- rbindlist(vdata, fill = TRUE)
}
if ( combine == TRUE ) {
mdata <- copy(vdata)
}
if (format.out == "long") {
fdata <- copy(mdata)
}
else if (format.out == "wide") {
# dcast variables
namesformula <- grep("var_name|est|moe", names(mdata), value = TRUE, invert = TRUE)
ff <- paste(paste(namesformula, collapse = " + "), " ~ var_name")
wdata <- data.table::dcast(mdata, formula(ff), value.var = c("est", "moe"))
# awful way to deal with names (to improve!)
vnames <- sort(unique(mdata$var_name))
ids <- c( "sumlevel", "geoid", "region","division","st_fips","cnty_fips","cnty_sub_fips","tract_fips","block_group","place","indian_area","msa","csa","necta","urban_area","cong_dist","leg_dist_upper","leg_dist_lower","puma","zip","sch_dist_ele","sch_dist_sec","sch_dist_uni", "combined_group")
ids <- ids[ids %in% names(wdata)]
a <- wdata[, grep("_moe|moe_", names(wdata), invert = TRUE), with = FALSE]
b <- wdata[, grep("_est|est_", names(wdata), invert = TRUE), with = FALSE]
setnames(a, names(a), gsub("_est|est_", "", names(a)))
setnames(b, names(b), gsub("_moe|moe_", "", names(b)))
a <- a[, sort(names(a[,!ids, with = FALSE])), with = FALSE]
b <- b[, sort(names(b[,!ids, with = FALSE])), with = FALSE]
setnames(a, names(a), paste0(names(a), "_est"))
setnames(b, names(b), paste0(names(b), "_moe"))
wdata <- cbind(wdata[, ids, with = FALSE], a, b)
vars <- sort(names(wdata[, !ids, with = FALSE]))
# # old code
# vars <- names(wdata)
# setnames(wdata, names(wdata), gsub("_est", "", vars))
# ids <- c("stfid","sumlevel","st_fips","cnty_fips","cnty_sub_fips","tract_fips","block.group","cong_dist","sch_dist_sec","sch_dist_ele")
# vars <- names(wdata[,!ids, with = FALSE])
# vars <- sort(vars)
# final dataset
fdata <- wdata[, c(ids, vars), with = FALSE]
if (combine == FALSE ) {
data.table::setkey(fdata, sumlevel)
}
}
# clean names using census data
if (dataset == "sf1" | dataset == "sf3") {
dnames <- grep("moe", names(fdata), value = TRUE)
fdata <- fdata[, -dnames, with = FALSE]
enames <- gsub("_est", "", names(fdata))
setnames(fdata, names(fdata), enames)
}
# lower names of variables
dnames <- names(fdata)
setnames(fdata, dnames, tolower(dnames))
# write csv
if (!is.null(file)) {
fwrite(fdata, file = file, na = 'NA')
print(". . . . . . Data exported to a CSV file! ")
}
return(fdata)
} # end function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.