Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
library(knitr)
## ----campfin------------------------------------------------------------------
library(campfin)
packageVersion("campfin")
## ----setup, warning=FALSE, message=FALSE, error=FALSE-------------------------
library(dplyr)
library(readr)
library(stringr)
## ----view_messy---------------------------------------------------------------
ex_file <- system.file("extdata", "vt_contribs.csv", package = "campfin")
## ----echo=FALSE---------------------------------------------------------------
kable(read_csv(ex_file, col_types = cols(.default = col_character())))
## ----read_messy---------------------------------------------------------------
vt <- read_csv(
file = ex_file,
trim_ws = FALSE,
na = c("", "NA", "N/A"),
col_types = cols(
amount = col_number(),
date = col_date_mdy()
)
)
## ----date_math----------------------------------------------------------------
min(vt$date)
## ----prop_valid_before--------------------------------------------------------
prop_in(vt$city, str_to_lower(valid_city))
prop_in(vt$state, valid_state)
prop_in(vt$zip, valid_zip)
## ----glimpse_fun--------------------------------------------------------------
col_stats(vt, n_distinct)
col_stats(vt, count_na)
## ----normal_address-----------------------------------------------------------
vt <- vt %>%
mutate(
address = normal_address(
address = address,
abbs = usps_street,
na = invalid_city,
na_rep = TRUE
),
city = normal_city(
city = city,
abbs = usps_city,
states = "VT",
na = invalid_city
),
state = normal_state(
state = state,
abbreviate = TRUE,
na_rep = TRUE,
valid = valid_state
),
zip = normal_zip(
zip = zip,
na_rep = TRUE
)
)
## ----showal, echo=FALSE-------------------------------------------------------
vt %>% select(address, city, state, zip)
## ----length_city--------------------------------------------------------------
length(valid_city)
sample(valid_city, 6)
sample(extra_city, 6)
# combine both vectors
many_city <- c(valid_city, extra_city)
## ----bad_city, echo=FALSE-----------------------------------------------------
(bad <- vt %>%
select(1, 7:9) %>%
filter(!is.na(city)) %>%
mutate(valid = city %in% many_city) %>%
filter(!valid))
## ----bad_join-----------------------------------------------------------------
bad <- left_join(
x = bad,
y = zipcodes,
by = c("zip", "state"),
suffix = c("_raw", "_match")
)
## ----echo=FALSE---------------------------------------------------------------
kable(select(bad, -valid))
## ----str_dist-----------------------------------------------------------------
str_dist("example", "xampel")
## ----is_abbrev----------------------------------------------------------------
is_abbrev(abb = "NYC", full = "New York City")
is_abbrev(abb = "DC", full = "Washington")
## ----check_city---------------------------------------------------------------
bad <- bad %>%
mutate(
match_dist = str_dist(city_raw, city_match),
match_abb = is_abbrev(city_raw, city_match)
)
## ----echo=FALSE---------------------------------------------------------------
kable(select(bad, -valid))
## ----swap_city----------------------------------------------------------------
vt <- vt %>%
rename(city_raw = city) %>%
# match city by ZIP
left_join(zipcodes) %>%
rename(city_match = city) %>%
mutate(
# check against match
match_dist = str_dist(city_raw, city_match),
match_abb = is_abbrev(city_raw, city_match),
city = ifelse(match_abb | match_dist == 1, city_match, city_raw)
) %>%
# remove intermediary columns
select(-city_raw, -city_match, -match_dist, -match_abb)
## ----show_swap, echo=FALSE----------------------------------------------------
vt %>%
select(1, 7:9) %>%
filter(!is.na(city)) %>%
mutate(
all_valid = all(
city %in% valid_city,
state %in% valid_state,
zip %in% valid_zip
)
) %>%
kable()
## ----flag_na------------------------------------------------------------------
(vt <- flag_na(vt, name))
## ----flag_dupes---------------------------------------------------------------
(vt <- flag_dupes(vt, -id, .both = TRUE))
## ----echo=FALSE---------------------------------------------------------------
kable(vt)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.