Nothing
## -----------------------------------------------------------------------------
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5)
options(digits = 4)
## -----------------------------------------------------------------------------
library(dplyr)
library(infer)
## -----------------------------------------------------------------------------
# load in the dataset
data(gss)
# take a look at its structure
dplyr::glimpse(gss)
## -----------------------------------------------------------------------------
gss |>
specify(response = age)
## -----------------------------------------------------------------------------
gss |>
specify(response = age) |>
class()
## -----------------------------------------------------------------------------
# as a formula
gss |>
specify(age ~ partyid)
# with the named arguments
gss |>
specify(response = age, explanatory = partyid)
## -----------------------------------------------------------------------------
# specifying for inference on proportions
gss |>
specify(response = college, success = "degree")
## -----------------------------------------------------------------------------
gss |>
specify(college ~ partyid, success = "degree") |>
hypothesize(null = "independence")
## -----------------------------------------------------------------------------
gss |>
specify(response = hours) |>
hypothesize(null = "point", mu = 40)
## -----------------------------------------------------------------------------
set.seed(1)
gss |>
specify(response = hours) |>
hypothesize(null = "point", mu = 40) |>
generate(reps = 1000, type = "bootstrap")
## -----------------------------------------------------------------------------
gss |>
specify(partyid ~ age) |>
hypothesize(null = "independence") |>
generate(reps = 1000, type = "permute")
## -----------------------------------------------------------------------------
gss |>
specify(response = hours) |>
hypothesize(null = "point", mu = 40) |>
generate(reps = 1000, type = "bootstrap") |>
calculate(stat = "mean")
## -----------------------------------------------------------------------------
gss |>
specify(age ~ college) |>
hypothesize(null = "independence") |>
generate(reps = 1000, type = "permute") |>
calculate("diff in means", order = c("degree", "no degree"))
## -----------------------------------------------------------------------------
# find the point estimate
obs_mean <- gss |>
specify(response = hours) |>
calculate(stat = "mean")
# generate a null distribution
null_dist <- gss |>
specify(response = hours) |>
hypothesize(null = "point", mu = 40) |>
generate(reps = 1000, type = "bootstrap") |>
calculate(stat = "mean")
## -----------------------------------------------------------------------------
null_dist |>
visualize()
## -----------------------------------------------------------------------------
null_dist |>
visualize() +
shade_p_value(obs_stat = obs_mean, direction = "two-sided")
## -----------------------------------------------------------------------------
# get a two-tailed p-value
p_value <- null_dist |>
get_p_value(obs_stat = obs_mean, direction = "two-sided")
p_value
## -----------------------------------------------------------------------------
# generate a distribution like the null distribution,
# though exclude the null hypothesis from the pipeline
boot_dist <- gss |>
specify(response = hours) |>
generate(reps = 1000, type = "bootstrap") |>
calculate(stat = "mean")
# start with the bootstrap distribution
ci <- boot_dist |>
# calculate the confidence interval around the point estimate
get_confidence_interval(
point_estimate = obs_mean,
# at the 95% confidence level
level = .95,
# using the standard error
type = "se"
)
ci
## -----------------------------------------------------------------------------
boot_dist |>
visualize() +
shade_confidence_interval(endpoints = ci)
## -----------------------------------------------------------------------------
# calculate an observed t statistic
obs_t <- gss |>
specify(response = hours) |>
hypothesize(null = "point", mu = 40) |>
calculate(stat = "t")
## -----------------------------------------------------------------------------
# switch out calculate with assume to define a distribution
t_dist <- gss |>
specify(response = hours) |>
assume(distribution = "t")
## -----------------------------------------------------------------------------
# visualize the theoretical null distribution
visualize(t_dist) +
shade_p_value(obs_stat = obs_t, direction = "greater")
# more exactly, calculate the p-value
get_p_value(t_dist, obs_t, "greater")
## -----------------------------------------------------------------------------
# find the theory-based confidence interval
theor_ci <-
get_confidence_interval(
x = t_dist,
level = .95,
point_estimate = obs_mean
)
theor_ci
## -----------------------------------------------------------------------------
# visualize the theoretical sampling distribution
visualize(t_dist) +
shade_confidence_interval(theor_ci)
## -----------------------------------------------------------------------------
observed_fit <- gss |>
specify(hours ~ age + college) |>
fit()
## -----------------------------------------------------------------------------
null_fits <- gss |>
specify(hours ~ age + college) |>
hypothesize(null = "independence") |>
generate(reps = 1000, type = "permute") |>
fit()
null_fits
## -----------------------------------------------------------------------------
get_confidence_interval(
null_fits,
point_estimate = observed_fit,
level = .95
)
## -----------------------------------------------------------------------------
visualize(null_fits) +
shade_p_value(observed_fit, direction = "both")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.