Nothing
## ----echo = FALSE-------------------------------------------------------------
if (!require(RSQLite)) {
message("Could not find RSQLite so could not run vignette.")
knitr::opts_chunk$set(eval = FALSE)
}
if (!file.exists("acs_m.Rdata")) {
message("Could not find db file so could not run vignette.")
knitr::opts_chunk$set(eval = FALSE)
}
## -----------------------------------------------------------------------------
suppressMessages({
library(survey)
library(srvyr)
library(dplyr)
library(dbplyr)
library(RSQLite)
})
# Load data (Currently only Alaska and Hawaii to keep file size small and with
# limited variables, butcode that downloaded the files is available here
# https://github.com/gergness/srvyr/blob/main/vignettes/save_acs_data.R
# and could easily be adapted to download all states.)
load("acs_m.Rdata") # acs_m data
# Set up database and table
db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
acs_m_db <- copy_to(db, acs_m, "acs_m", temporary = FALSE)
# Or, if the data was already stored in the database, you could do this
# acs_m_data <- tbl(db, sql("SELECT * FROM acs_m"))
## -----------------------------------------------------------------------------
# Same results
acs_m %>%
group_by(sex) %>%
summarize(hicov = mean(hicov))
acs_m_db %>%
group_by(sex) %>%
summarize(hicov = mean(hicov))
# But smaller object size
object.size(acs_m)
object.size(acs_m_db)
## -----------------------------------------------------------------------------
acs_m %>%
group_by(sex) %>%
summarize(hicov = mean(hicov == 1))
# Works in RSQlite, but didn't in the now defunct MonetDB
# acs_m_db %>%
# group_by(sex) %>%
# summarize(hicov = mean(hicov == 1))
#
# > Error in .local(conn, statement, ...) :
# > Unable to execute statement....
# > ....
# Creating the variable separately works as an integer works though
acs_m_db %>%
group_by(sex) %>%
mutate(hicov = ifelse(hicov == 1, 1L, 0L)) %>%
summarize(hicov = mean(hicov))
## -----------------------------------------------------------------------------
acs_m %>%
group_by(agecat = cut(agep, c(0, 19, 35, 50, 65, 200))) %>%
summarize(hicov = mean(hicov == 1))
# acs_m_db %>%
# group_by(agecat = cut(agep, c(0, 19, 35, 50, 65, 200))) %>%
# summarize(hicov = mean(hicov == 1))
#
# > Error in .local(conn, statement, ...) :
# > Unable to execute statement....
# > ...
acs_m_db %>%
mutate(agecat = ifelse(agep < 19, "0-18",
ifelse(agep >= 19 & agep < 35, "19-34",
ifelse(agep >= 35 & agep < 50, "35-49",
ifelse(agep >= 50 & agep < 65, "50-64",
ifelse(agep >= 65, "65+", NA)))))) %>%
group_by(agecat) %>%
summarize(hicov = mean(hicov))
## -----------------------------------------------------------------------------
acs_m_db_svy <- acs_m_db %>%
as_survey_rep(
weight = pwgtp,
repweights = matches("pwgtp[0-9]+") ,
scale = 4 / 80,
rscales = rep(1 , 80),
mse = TRUE,
type = "JK1",
variables = -c(matches("^pwgtp"))
)
acs_m_db_svy
## -----------------------------------------------------------------------------
object.size(acs_m_db_svy)
## -----------------------------------------------------------------------------
# You can calculate the population of the united states #
# by state
acs_m_db_svy %>%
mutate(one = 1L) %>% # Note that because of weird behavior of MonetDB, need to use 1L not just 1
group_by(st) %>%
summarize(count = survey_total(one))
# Or the average age of downloaded states
acs_m_db_svy %>%
summarize(agep = survey_mean(agep, na.rm = TRUE))
# Average age by state
acs_m_db_svy %>%
group_by(st) %>%
summarize(agep = survey_mean(agep, na.rm = TRUE))
# percent uninsured - nationwide (of downloaded states)
acs_m_db_svy %>%
mutate(hicov = as.character(hicov)) %>%
group_by(hicov) %>%
summarize(pct = survey_mean(na.rm = TRUE))
# by state
acs_m_db_svy %>%
mutate(hicov = as.character(hicov)) %>%
group_by(st, hicov) %>%
summarize(pct = survey_mean(na.rm = TRUE))
# 25th, median, and 75th percentile of age of residents of the united states (downloaded states)
acs_m_db_svy %>%
summarize(agep = survey_quantile(agep, c(0.25, 0.5, 0.75), na.rm = TRUE))
# Filter works, so we can restrict the acs_m object to females only
acs_m_db_svy_female <- acs_m_db_svy %>%
filter(sex == 2)
# Now any of the above commands can be re-run using the acs_m_female object
# instead of the acs_m object in order to analyze females only
# This is equivalent to using acs_m, and applying the filter every time.
# average age - nationwide (of downloaded states), restricted to females
acs_m_db_svy_female %>%
summarize(agep = survey_mean(agep, na.rm = TRUE))
# median age - nationwide (of downloaded states), restricted to females
acs_m_db_svy_female %>%
summarize(agep = survey_median(agep, na.rm = TRUE))
# Note that though some R functions are translated by dplyr into SQL, not
# all of them are. For example, when constructing a new age category
# variable in the dataset neither findIntervals nor cut work on databases,
# so we have to spell out groups with ifelse()
acs_m_db_svy %>%
mutate(agecat = ifelse(agep < 19, "0-18",
ifelse(agep >= 19 & agep < 35, "19-34",
ifelse(agep >= 35 & agep < 50, "35-49",
ifelse(agep >= 50 & agep < 65, "50-64",
ifelse(agep >= 65, "65+", NA)))))) %>%
group_by(agecat) %>%
summarize(pct = survey_mean(na.rm = TRUE))
## -----------------------------------------------------------------------------
acs_m_db_svy %>%
select(agep, hicov, sex) %>%
collect() %>%
{survey::svyglm(hicov ~ sex + agep, .)} %>%
summary()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.