# data found here: https://www.kaggle.com/zynicide/wine-reviews/ the
# description is a little off. The '150k' version doesn't have any twitter/title
# info, and both still have plenty of dupes (about 10k for 130, and 1/3 of 150),
# but for demo this should be fine. It's not clear what the actual differences
# are between 130 and 150, so I just combined them to provide a maximum number
# of reviews.
wine_reviews0 = read_csv('data-raw/wine/winemag-data-130k-v2.csv')
# wine_reviews0
wine_reviews0 = wine_reviews0 %>%
select(-X1) %>%
distinct()
# not seeing too much to clean yet.
wine_150 = read_csv('data-raw/wine/winemag-data_first150k.csv') %>%
select(-X1) %>%
distinct()
wine_reviews = full_join(wine_reviews0, wine_150)
# do a distinct on description, as that is most likely informative of true dupe
wine_reviews = wine_reviews %>%
distinct(description, .keep_all = TRUE)
# wine_reviews %>%
# arrange(description)
usethis::use_data(wine_reviews, overwrite = T)
# ml example --------------------------------------------------------------
# https://archive.ics.uci.edu/ml/datasets/Wine+Quality
# P. Cortez, A. Cerdeira, F. Almeida, T. Matos and J. Reis. Modeling wine preferences by data mining from physicochemical properties.
# In Decision Support Systems, Elsevier, 47(4):547-553, 2009.
# Available at: [http://dx.doi.org/10.1016/j.dss.2009.05.016]
wine_quality = read_csv('data-raw/wine/wine.csv')
wine_quality = wine_quality %>%
janitor::clean_names()
usethis::use_data(wine_quality, overwrite = T)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.