data-raw/reddit_finance/reddit_finance-dataprep.R

library(readr)
library(dplyr)
library(tidyr)
library(stringr)

# load data --------------------------------------------------------------------

raw_data <- read_csv(here::here("data-raw/reddit_finance/2020_reddit_finance.csv"), )

# cleaning:
# Note: some preprocessing done in Excel.
reddit_finance <- raw_data |>
  mutate(
    country = replace_na(country, "United States"),
    country = str_to_title(country),
    retired = replace_na(retired, "No")
  ) |>
  filter(retired == "No") |>
  select(-retired)

# save -------------------------------------------------------------------------

usethis::use_data(reddit_finance, overwrite = TRUE)
OpenIntroStat/openintro documentation built on June 4, 2024, 4:19 a.m.