inst/doc/chi_squared.R

## ----settings, include=FALSE--------------------------------------------------
knitr::opts_chunk$set(fig.width = 6, fig.height = 4.5) 
options(digits = 4)

## ----load-packages, echo = FALSE, message = FALSE, warning = FALSE------------
library(ggplot2)
library(dplyr)
library(infer)

## ----glimpse-gss-actual, warning = FALSE, message = FALSE---------------------
dplyr::glimpse(gss)

## ----plot-indep, echo = FALSE-------------------------------------------------
gss %>%
  ggplot2::ggplot() +
  ggplot2::aes(x = finrela, fill = college) +
  ggplot2::geom_bar(position = "fill") +
  ggplot2::scale_fill_brewer(type = "qual") +
  ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 45, 
                                                     vjust = .5)) +
    ggplot2::labs(x = "finrela: Self-Identification of Income Class",
                  y = "Proportion")

## ----calc-obs-stat-indep, warning = FALSE, message = FALSE--------------------
# calculate the observed statistic
observed_indep_statistic <- gss %>%
  specify(college ~ finrela) %>%
  hypothesize(null = "independence") %>%
  calculate(stat = "Chisq")

## ----generate-null-indep, warning = FALSE, message = FALSE--------------------
# generate the null distribution using randomization
null_dist_sim <- gss %>%
  specify(college ~ finrela) %>%
  hypothesize(null = "independence") %>%
  generate(reps = 1000, type = "permute") %>%
  calculate(stat = "Chisq")

## ----generate-null-indep-t, warning = FALSE, message = FALSE------------------
# generate the null distribution by theoretical approximation
null_dist_theory <- gss %>%
  specify(college ~ finrela) %>%
  assume(distribution = "Chisq")

## ----visualize-indep, warning = FALSE, message = FALSE------------------------
# visualize the null distribution and test statistic!
null_dist_sim %>%
  visualize() + 
  shade_p_value(observed_indep_statistic,
                direction = "greater")

## ----visualize-indep-theor, warning = FALSE, message = FALSE------------------
# visualize the theoretical null distribution and test statistic!
gss %>%
  specify(college ~ finrela) %>%
  assume(distribution = "Chisq") %>%
  visualize() + 
  shade_p_value(observed_indep_statistic,
                direction = "greater")

## ----visualize-indep-both, warning = FALSE, message = FALSE-------------------
# visualize both null distributions and the test statistic!
null_dist_sim %>%
  visualize(method = "both") + 
  shade_p_value(observed_indep_statistic,
                direction = "greater")

## ----p-value-indep, warning = FALSE, message = FALSE--------------------------
# calculate the p value from the observed statistic and null distribution
p_value_independence <- null_dist_sim %>%
  get_p_value(obs_stat = observed_indep_statistic,
              direction = "greater")

p_value_independence

## -----------------------------------------------------------------------------
pchisq(observed_indep_statistic$stat, 5, lower.tail = FALSE)

## ----chisq-indep-wrapper, message = FALSE, warning = FALSE--------------------
chisq_test(gss, college ~ finrela)

## ----gof-plot, echo = FALSE---------------------------------------------------
gss %>%
  ggplot2::ggplot() +
  ggplot2::aes(x = finrela) +
  ggplot2::geom_bar() +
  ggplot2::geom_hline(yintercept = 466.3, col = "red") +
  ggplot2::labs(x = "finrela: Self-Identification of Income Class",
                y = "Number of Responses")

## ----observed-gof-statistic, warning = FALSE, message = FALSE-----------------
# calculating the null distribution
observed_gof_statistic <- gss %>%
  specify(response = finrela) %>%
  hypothesize(null = "point",
              p = c("far below average" = 1/6,
                    "below average" = 1/6,
                    "average" = 1/6,
                    "above average" = 1/6,
                    "far above average" = 1/6,
                    "DK" = 1/6)) %>%
  calculate(stat = "Chisq")

## ----null-distribution-gof, warning = FALSE, message = FALSE------------------
# generating a null distribution, assuming each income class is equally likely
null_dist_gof <- gss %>%
  specify(response = finrela) %>%
  hypothesize(null = "point",
              p = c("far below average" = 1/6,
                    "below average" = 1/6,
                    "average" = 1/6,
                    "above average" = 1/6,
                    "far above average" = 1/6,
                    "DK" = 1/6)) %>%
  generate(reps = 1000, type = "draw") %>%
  calculate(stat = "Chisq")

## ----visualize-indep-gof, warning = FALSE, message = FALSE--------------------
# visualize the null distribution and test statistic!
null_dist_gof %>%
  visualize() + 
  shade_p_value(observed_gof_statistic,
                direction = "greater")

## ----get-p-value-gof, warning = FALSE, message = FALSE------------------------
# calculate the p-value
p_value_gof <- null_dist_gof %>%
  get_p_value(observed_gof_statistic,
              direction = "greater")

p_value_gof

## -----------------------------------------------------------------------------
pchisq(observed_gof_statistic$stat, 5, lower.tail = FALSE)

## ----chisq-gof-wrapper, message = FALSE, warning = FALSE----------------------
chisq_test(gss, 
           response = finrela,
           p = c("far below average" = 1/6,
                    "below average" = 1/6,
                    "average" = 1/6,
                    "above average" = 1/6,
                    "far above average" = 1/6,
                    "DK" = 1/6))

Try the infer package in your browser

Any scripts or data that you put into this service are public.

infer documentation built on May 29, 2024, 11:54 a.m.