inst/doc/tidylo.R

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
  warning = FALSE, message = FALSE,
  collapse = TRUE,
  comment = "#>")

suppressPackageStartupMessages(library(ggplot2))
theme_set(theme_light())

## ----bigram_counts------------------------------------------------------------
library(dplyr)
library(janeaustenr)
library(tidytext)
library(stringr)

tidy_bigrams <- austen_books() %>%
  unnest_tokens(bigram, text, token="ngrams", n = 2, to_lower = FALSE) %>%
  filter(!str_detect(bigram, "[A-Z]"))

bigram_counts <- tidy_bigrams %>%
  count(book, bigram, sort = TRUE)

bigram_counts

## ----bigram_log_odds, dependson="bigram_counts"-------------------------------
library(tidylo)

bigram_log_odds <- bigram_counts %>%
  bind_log_odds(book, bigram, n) 

bigram_log_odds %>%
  arrange(-log_odds_weighted)

## ----bigram_plot, dependson="bigram_log_odds", fig.width=10, fig.height=7-----
library(ggplot2)

bigram_log_odds %>%
  group_by(book) %>%
  slice_max(log_odds_weighted, n = 10) %>%
  ungroup() %>%
  mutate(bigram = reorder(bigram, log_odds_weighted)) %>%
  ggplot(aes(log_odds_weighted, bigram, fill = book)) +
  geom_col(show.legend = FALSE) +
  facet_wrap(vars(book), scales = "free") +
  labs(y = NULL)

## ----gear_counts--------------------------------------------------------------
gear_counts <- mtcars %>%
  count(vs, gear)

gear_counts

## ----dependson="gear_counts"--------------------------------------------------
regularized <- gear_counts %>%
  bind_log_odds(vs, gear, n)

regularized

## ----dependson="gear_counts"--------------------------------------------------
unregularized <- gear_counts %>%
  bind_log_odds(vs, gear, n, uninformative = TRUE, unweighted = TRUE)

unregularized

Try the tidylo package in your browser

Any scripts or data that you put into this service are public.

tidylo documentation built on March 22, 2022, 5:08 p.m.