Nothing
## ---- message = FALSE, warning = FALSE----------------------------------------
library(dplyr)
library(ggplot2)
library(carData)
## ---- message = FALSE, warning = FALSE----------------------------------------
set.seed(213141)
data("GSSvocab")
gss <-
as_tibble(GSSvocab) %>%
filter(year == '1978') %>%
mutate(weight = sample(1:3, size = nrow(.), replace = TRUE, prob = c(0.1, 0.5, 0.4))) %>%
select(ageGroup, vocab, weight)
gss
## ---- eval = FALSE------------------------------------------------------------
# devtools::install_github("cimentadaj/perccalc")
# library(perccalc)
## ---- echo = F, message = F, warning = F--------------------------------------
library(perccalc)
## ---- error = TRUE------------------------------------------------------------
perc_diff(gss, ageGroup, vocab, percentiles = c(90, 10))
## -----------------------------------------------------------------------------
gss <-
gss %>%
mutate(ageGroup = factor(ageGroup, ordered = TRUE))
## -----------------------------------------------------------------------------
perc_diff(gss, ageGroup, vocab, percentiles = c(90, 10))
## -----------------------------------------------------------------------------
perc_diff(gss, ageGroup, vocab, percentiles = c(50, 10))
## -----------------------------------------------------------------------------
perc_diff(gss, ageGroup, vocab, weight)
## ---- eval = FALSE------------------------------------------------------------
# # Saving the dataset to a path
# gss %>%
# write_dta(path = "C:\\Users\\cimentadaj\\Downloads\\gss_data.dta", version = 13)
## ---- eval = FALSE------------------------------------------------------------
# *--------
# use "/Users/cimentadaj/Downloads/gss_data.dta", clear
#
# drop if missing(ageGroup)
# drop if missing(vocab)
#
# tab ageGroup, gen(inc)
# *--------
#
# /*-----------------------
# Making a data set that has
# one observation per age group category
# and has mean and se(mean) in each category
# and percent of population in the category
# ------------------------*/
#
# tempname memhold
# tempfile results
# postfile `memhold' agegroup mean se_mean per using `results'
#
# forv i = 1/5 {
# sum inc`i' [aw=weight]
# loc per`i' = r(mean)
#
# qui sum vocab if inc`i'==1
#
# if `r(N)'>0 {
# qui regress vocab if inc`i'==1 [aw=weight]
# post `memhold' (`i') (_b[_cons]) (_se[_cons]) (`per`i'')
#
# }
# }
# postclose `memhold'
#
# /*-----------------------
# Making age group categories
# into percentiles
# ------------------------*/
#
#
# use `results', clear
#
# sort agegroup
# gen cathi = sum(per)
# gen catlo = cathi[_n-1]
# replace catlo = 0 if agegroup==1
# gen catmid = (catlo+cathi)/2
#
# /*-----------------------
# Calculate age group
# vocabulary gaps
# ------------------------*/
#
# sort agegroup
#
# g x1 = catmid
# g x2 = catmid^2 + ((cathi-catlo)^2)/12
# g x3 = catmid^3 + ((cathi-catlo)^2)/4
#
# g cimnhi = mean + 1.96*se_mean
# g cimnlo = mean - 1.96*se_mean
#
# reg mean x1 x2 x3 [aw=1/se_mean^2]
#
# twoway (rcap cimnhi cimnlo catmid) (scatter mean catmid) ///
# (function y = _b[_cons] + _b[x1]*x + _b[x2]*x^2 + _b[x3]*x^3, ran(0 1))
#
# loc hi_p = 90
# loc lo_p = 10
#
# loc d1 = [`hi_p' - `lo_p']/100
# loc d2 = [(`hi_p')^2 - (`lo_p')^2]/(100^2)
# loc d3 = [(`hi_p')^3 - (`lo_p')^3]/(100^3)
#
# lincom `d1'*x1 + `d2'*x2 + `d3'*x3
# loc diff`hi_p'`lo_p' = r(estimate)
# loc se`hi_p'`lo_p' = r(se)
#
# di "`hi_p'-`lo_p' gap: `diff`hi_p'`lo_p''"
# di "se(`hi_p'-`lo_p' gap): `se`hi_p'`lo_p''"
## -----------------------------------------------------------------------------
perc_diff(gss, ageGroup, vocab, weight)
## -----------------------------------------------------------------------------
perc_dist(gss, ageGroup, vocab) %>%
head()
## ---- fig.align = 'center', fig.width = 6, fig.height = 5---------------------
gss %>%
perc_dist(ageGroup, vocab, weight) %>%
mutate(ci_low = estimate - 1.96 * std.error,
ci_hi = estimate + 1.96 * std.error) %>%
ggplot(aes(percentile, estimate)) +
geom_point() +
geom_errorbar(aes(ymin = ci_low, ymax = ci_hi))
## -----------------------------------------------------------------------------
perc_dist(gss, ageGroup, vocab, weight) %>%
filter(percentile %in% c(90, 10)) %>%
summarize(diff = diff(estimate),
se_diff = diff(std.error))
## -----------------------------------------------------------------------------
perc_diff(gss, ageGroup, vocab, weight, percentiles = c(90, 10))
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.