#' Spurious Data Collated by Tyler Vigen
#'
#' A dataset containing information collated by Tyler Vigen from various
#' sources to showcase spurious relationships.
#'
#' @docType data
#'
#' @format A data frame with 15 rows and 31 variables:
#' \describe{
#' \item{year}{year of given observation}
#'
#' \item{science_spending}{US spending on science, space, and technology in billions of dollars
#' (\href{https://www.census.gov/compendia/statab/cats/science_technology/expenditures_research_development.html}{U.S. Office of Management and Budget})}
#'
#' \item{hanging_suicides}{suicides by hanging, strangulation, and suffocation
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{pool_fall_drownings}{number of people who drowned by falling into a pool
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{cage_films}{films Nicholas Cage appeared in
#' (\href{https://www.imdb.com/name/nm0000115/}{Internet Movie Database})}
#'
#' \item{cheese_percap}{per capita cheese consumption
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0217.xls}{Department of Agriculture})}
#'
#' \item{bed_deaths}{number of people who died by becoming tangled in their bedsheets
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{maine_divorce_rate}{divorce rate in Maine
#' (\href{https://www.census.gov/compendia/statab/cats/births_deaths_marriages_divorces/marriages_and_divorces.html}{National Vital Statistics Reports})}
#'
#' \item{margarine_percap}{per capita consumption of margarine
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0217.xls}{U.S. Department of Agriculture})}
#'
#' \item{miss_usa_age}{age of miss america
#' (\href{https://en.wikipedia.org/wiki/Miss_America}{Wikipedia})}
#'
#' \item{steam_murders}{number of murders by steam, hot vapours, or hot objects
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control and Prevention})}
#'
#' \item{arcade_revenue}{total revenue generated by arcades in billions of dollars
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s1228.xls}{U.S. Census Bureau})}
#'
#' \item{computer_science_doctorates}{computer science doctorates awarded in the US
#' (\href{https://www.nsf.gov/statistics/infbrief/nsf11305/}{National Science Foundation})}
#'
#' \item{noncom_space_launches}{worldwide non-commercial space launches
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0822.xls}{Federal Aviation Administration})}
#'
#' \item{sociology_doctorates}{sociology doctorates awarded (US)
#' (\href{https://www.nsf.gov/statistics/infbrief/nsf11305/}{National Science Foundation})}
#'
#' \item{mozzarella_percap}{per capita consumtion of mozzarella cheese
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0217.xls}{U.S. Department of Agriculture})}
#'
#' \item{civil_engineering_doctorates}{number of civil engineering doctorates awarded
#' (\href{https://www.nsf.gov/statistics/infbrief/nsf11305/}{National Science Foundation})}
#'
#' \item{fishing_drownings}{people who drowned after falling out of a fishing boat
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{kentucky_marriage_rate}{marriage rate in Kentucky
#' (\href{https://www.census.gov/compendia/statab/cats/births_deaths_marriages_divorces/marriages_and_divorces.html}{National Vital Statistics Reports})}
#'
#' \item{train_collision_deaths}{drivers killed in collisions with railway train
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{oil_imports_norway}{US crude oil imports from Norway
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0937.pdf}{Department of Energy})}
#'
#' \item{chicken_percap}{per capita consumption of chicken
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0217.xls}{U.S. Department of Agriculture})}
#'
#' \item{oil_imports_total}{total US crude oil imports
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0937.pdf}{Department of Energy})}
#'
#' \item{pool_drownings}{number of people who drowned while in a swimming-pool
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{nuclear_power}{power generated by US nuclear power plants
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0937.pdf}{Department of Energy})}
#'
#' \item{japanese_cars_sold}{Japanese passenger cars sold in the US
#' (\href{https://www.census.gov/compendia/statab/2008/tables/08s1026.xls}{U.S. Bureau of Transportation Statistics})}
#'
#' \item{motor_vehicle_suicides}{suicides by crashing of motor vehicle
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{spelling_bee_word_length}{letters in winning Word of Scripps National Spelling Bee
#' (\href{https://www.spellingbee.com/champions-and-their-winning-words}{National Spelling Bee})}
#'
#' \item{spider_deaths}{number of people killed by venomous spiders
#' (\href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention})}
#'
#' \item{math_doctorates}{math doctorates awarded
#' (\href{https://www.nsf.gov/statistics/infbrief/nsf11305/}{National Science Foundation})}
#'
#' \item{uranium}{uranium stored at US nuclear power plants
#' (\href{https://www.census.gov/compendia/statab/2012/tables/12s0937.pdf}{Department of Engergy})}
#' }
#' @source \url{https://www.tylervigen.com/spurious-correlations}
"tylervigen"
#' Explicitly draw plot
#'
#' @description The ggplot2 code necessary to make plots in Tyler Vigen's style
#' require aggressive parameters for the splines which produce warning messages.
#' This custom print function suppresses these messages.
#'
#' @param x object of class ggplot_suppressed
#' @param ... other arguments not used by this method
#'
#' @return Invisibly returns the result of ggplot_build(), which is a list with components that contain the plot itself, the data, information about the scales, panels etc.
#' @export
print.ggplot_suppressed <- function(x, ...) {
# current class vector
xc <- class(x)
# remove ggplot_suppressed
attr(x, 'class') <- xc[-which(xc == "ggplot_suppressed")]
suppressWarnings(print(x, ...))
}
#' Internal Color Palette
#'
#' @description Internal color palette for the package.
#'
#' @return list of named hex color codes
colors <- list("joker_green" = "#274000",
"joker_purple" = "#54003E",
"joker_red" = "#CC1005")
#' Tyler Vigen Plot 2
#'
#' @description Number of people who drowned by falling into a pool correlates with Films Nicolas Cage appeared in (r = 0.67).
#' @references
#' \href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention}
#'
#' \href{https://www.imdb.com/name/nm0000115/}{Internet Movie Database}
#' @keywords drowning, pool, cage
#' @export
tylervigen_plot2 <- function() {
suppressWarnings(
whysospurious::tylervigen %>%
dplyr::select(year, cage_films, pool_fall_drownings) %>%
dplyr::filter(complete.cases(.)) %>%
dplyr::transmute(
year = year,
series_a = (cage_films - 0) / (6 - 0),
series_b = (pool_fall_drownings - 80) / (140 - 80)
) %>%
tidyr::pivot_longer(tidyr::starts_with("series")) %>%
ggplot2::ggplot(ggplot2::aes(
x = year, y = value, color = name
)) +
ggplot2::geom_point() +
ggplot2::geom_smooth(
method = "loess",
formula = y ~ x,
span = 0.4,
se = FALSE
) +
ggplot2::scale_x_continuous(
ggplot2::element_blank(),
breaks = seq(1999, 2009, 2),
sec.axis = ggplot2::sec_axis(
~ .,
name = ggplot2::element_blank(),
breaks = seq(1999, 2009, 2)
)
) +
ggplot2::scale_y_continuous(
"Swimming pool drownings",
breaks = seq(0, 1, 1 / 3),
labels = seq(80, 140, 20),
limits = c(0, 1),
sec.axis = ggplot2::sec_axis(
~ .,
name = "Nicolas Cage Films",
breaks = seq(0, 1, 1 / 3),
labels = seq(0, 6, 2)
)
) +
ggplot2::scale_color_manual(
ggplot2::element_blank(),
labels = c("Nicholas Cage Films",
"Swimming pool drownings"),
values = c("black",
colors$joker_red)
) +
ggplot2::labs(
title = "Number of people who drowned by falling into a pool",
subtitle = expression(paste(
"correlates with ",
bold("Films Nicolas Cage appeared in"),
" (r = 0.67)"
)),
caption = "Data Sources: Centers for Disease Control & Prevention and Internet Movie Database"
) +
ggplot2::theme(
plot.title = ggplot2::element_text(color = colors$joker_red),
plot.caption = ggplot2::element_text(hjust = 0, color = "grey"),
legend.position = "bottom",
axis.text.x.top = ggplot2::element_text(color = colors$joker_red),
axis.title.y.left = ggplot2::element_text(color = colors$joker_red),
axis.text.y.left = ggplot2::element_text(color = colors$joker_red)
)
) -> tvp
attr(tvp, 'class') <- c("ggplot_suppressed", class(tvp))
return(tvp)
}
#' Tyler Vigen Plot 3
#'
#' @description Per capita cheese consumption correlates with Number of people who died by becoming tangled in their bedsheets (r = 0.95).
#' @references
#' \href{https://www.census.gov/compendia/statab/2012/tables/12s0217.xls}{Department of Agriculture}
#'
#' \href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention}
#' @keywords cheese, death, bed, sheet
#' @export
tylervigen_plot3 <- function() {
suppressWarnings(
whysospurious::tylervigen %>%
dplyr::select(year, bed_deaths, cheese_percap) %>%
dplyr::filter(complete.cases(.)) %>%
dplyr::transmute(
year = year,
series_a = (bed_deaths - 200) / (800 - 200),
series_b = (cheese_percap - 28.5) / (33.5 - 28.5)
) %>%
tidyr::pivot_longer(tidyr::starts_with("series")) %>%
ggplot2::ggplot(ggplot2::aes(
x = year, y = value, color = name
)) +
ggplot2::geom_point() +
ggplot2::geom_smooth(
method = "loess",
formula = y ~ x,
span = 0.4,
se = FALSE
) +
ggplot2::scale_x_continuous(
ggplot2::element_blank(),
breaks = seq(2000, 2009, 3),
sec.axis = ggplot2::sec_axis(
~ .,
name = ggplot2::element_blank(),
breaks = seq(2000, 2009, 3)
)
) +
ggplot2::scale_y_continuous(
"Cheese consumed",
breaks = seq(0, 1, 1 / 3),
labels = seq(28.5, 33, 1.5),
limits = c(0, 1),
sec.axis = ggplot2::sec_axis(
~ .,
name = "Bedsheet tanglings",
breaks = seq(0, 1, 1 / 3),
labels = seq(200, 800, 200)
)
) +
ggplot2::scale_color_manual(
ggplot2::element_blank(),
labels = c("Bedsheet tanglings",
"Cheese consumed"),
values = c("black",
colors$joker_red)
) +
ggplot2::labs(
title = "Per capita cheese consumption",
subtitle = expression(paste(
"correlates with ",
bold("Death by bedsheet entanglement"),
" (r = 0.95)"
)),
caption = "Data Sources: U.S. Department of Agriculture and Centers for Disease Control & Prevention"
) +
ggplot2::theme(
plot.title = ggplot2::element_text(color = colors$joker_red),
plot.caption = ggplot2::element_text(hjust = 0, color = "grey"),
legend.position = "bottom",
axis.text.x.top = ggplot2::element_text(color = colors$joker_red),
axis.title.y.left = ggplot2::element_text(color = colors$joker_red),
axis.text.y.left = ggplot2::element_text(color = colors$joker_red)
)
) -> tvp
attr(tvp, 'class') <- c("ggplot_suppressed", class(tvp))
return(tvp)
}
#' Tyler Vigen Plot 9
#'
#' @description People who drowned after falling out of a fishing boat correlates with marriage rate in Kentucky (r = 0.95).
#' @references
#' \href{https://wonder.cdc.gov/}{Centers for Disease Control & Prevention}
#'
#' \href{https://www.census.gov/compendia/statab/cats/births_deaths_marriages_divorces/marriages_and_divorces.html}{National Vital Statistics Reports}
#' @keywords drowning, fishing, boat, marriage, Kentucky
#' @export
tylervigen_plot9 <- function() {
whysospurious::tylervigen %>%
dplyr::select(year, kentucky_marriage_rate, fishing_drownings) %>%
dplyr::transmute(
year = year,
series_a = (kentucky_marriage_rate - 7) / (11 - 7),
series_b = (fishing_drownings - 0) / (20 - 0)
) %>%
tidyr::pivot_longer(tidyr::starts_with("series")) %>%
ggplot2::ggplot(ggplot2::aes(x = year, y = value, color = name)) +
ggplot2::geom_point() +
ggplot2::geom_smooth(
method = "loess",
formula = y ~ x,
span = 0.4,
se = FALSE
) +
ggplot2::scale_x_continuous(
ggplot2::element_blank(),
breaks = seq(1999, 2010, 1),
limits = c(1999, 2010),
sec.axis = ggplot2::sec_axis(
~ .,
name = ggplot2::element_blank(),
breaks = seq(1999, 2010, 1)
)
) +
ggplot2::scale_y_continuous(
"Fishing boat deaths",
breaks = seq(0, 1, 1 / 2),
labels = seq(0, 20, 10),
limits = c(0, 1),
sec.axis = ggplot2::sec_axis(
~ .,
name = "Kentucky marriages",
breaks = seq(0, 1, 1 / 4),
labels = paste(seq(7, 11, 1), "per 1,000")
)
) +
ggplot2::scale_color_manual(
ggplot2::element_blank(),
labels = c("Kentucky marriages",
"Fishing boat deaths"),
values = c("black",
colors$joker_red)
) +
ggplot2::labs(title = "People who drowned after falling out of a fishing boat",
subtitle = expression(paste(
"correlates with ",
bold("Marriage rate in Kentucky"),
" (r = 0.95)"
)),
caption = "Data Sources: Centers for Disease Control & Prevention and National Vital Statistics Reports") +
ggplot2::theme(
plot.title = ggplot2::element_text(color = colors$joker_red),
plot.caption = ggplot2::element_text(hjust = 0, color = "grey"),
legend.position = "bottom",
axis.text.x.top = ggplot2::element_text(color = colors$joker_red),
axis.title.y.left = ggplot2::element_text(color = colors$joker_red),
axis.text.y.left = ggplot2::element_text(color = colors$joker_red)
) -> tvp
attr(tvp, 'class') <- c("ggplot_suppressed", class(tvp))
return(tvp)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.