.dictionary_ppp_names <-
function() {
tibble(nameSBA = c("LoanRange", "BusinessName", "Address", "City", "State", "Zip",
"NAICSCode", "BusinessType", "RaceEthnicity", "Gender", "Veteran",
"NonProfit", "JobsRetained", "DateApproved", "Lender", "CD",
"LoanAmount", "nameFile"),
nameActual = c("rangeLoan", "nameEntitySBA", "addressStreetEntity", "cityEntity", "stateEntity", "zipcodeEntity",
"idNAICS", "typeBusiness", "typeEthnicity", "typeGenderPrincipal", "isVeteranOwned",
"isNonProfit", "countJobsRetained", "dateApprovalPPP", "nameLenderSBA", "codeCongressionalDistrict",
"amountLoan", "nameFile")
)
}
.munge_ppp_names <-
function(data) {
dict_names <- .dictionary_ppp_names()
ppp_names <-
names(data)
actual_names <-
ppp_names %>%
map_chr(function(x) {
df_row <-
dict_names %>%
filter(nameSBA == x) %>%
distinct() %>%
slice(1)
if (nrow(df_row) == 0) {
glue::glue("Missing {x}") %>% message()
return(x)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
.parse_ppp_folders <-
function(folder) {
options(scipen = 9999999)
folders <-
list.files() %>%
discard(function(x) {
x %>% str_detect("pdf")
})
data <-
folders %>%
map_dfr(function(x) {
file <- dir(x) %>%
keep(function(x) {
x %>% str_detect("csv")
})
glue("{x}/{file}") %>%
read_csv(
col_types = cols(
LoanRange = col_character(),
BusinessName = col_character(),
Address = col_character(),
City = col_character(),
State = col_character(),
Zip = col_character(),
NAICSCode = col_double(),
BusinessType = col_character(),
RaceEthnicity = col_character(),
Gender = col_character(),
Veteran = col_character(),
NonProfit = col_character(),
JobsRetained = col_double(),
DateApproved = col_character(),
Lender = col_character(),
CD = col_character()
)
) %>%
mutate(nameFile = x) %>%
select(nameFile, everything())
}) %>%
as_tibble()
data <-
data %>%
.munge_ppp_names()
data <- data %>%
mutate(isLoanEstimate = !is.na(rangeLoan)) %>%
select(nameFile,
isLoanEstimate,
nameEntitySBA,
rangeLoan,
amountLoan,
everything())
data <-
data %>%
mutate(rangeLoan = rangeLoan %>% str_replace("\\ ", "\\|")) %>%
separate(rangeLoan,
into = c("codeRangeLoan", "rangeLoan"),
sep = "\\|") %>%
separate(
codeCongressionalDistrict,
into = c("stateCongressionalDistrict", "numberCongressionalDistrict"),
sep = "\\ - ",
convert = F,
remove = F
)
data <-
data %>%
mutate(
rangeLoan = case_when(
rangeLoan == "$1-2 million" ~ "1.00M-2.00M",
rangeLoan == "$150,000-350,000" ~ ".150M-.350M",
rangeLoan == "$2-5 million" ~ "2.000M-5.000M",
rangeLoan == "$350,000-1 million" ~ ".350M-1.000M",
rangeLoan == "$5-10 million" ~ "5.000M-10.000M",
TRUE ~ "0.001M-0.150M"
) %>% forcats::as_factor()
)
data <- data %>%
mutate(
typeBusiness = case_when(
typeBusiness == "Limited Liability Company(LLC)" ~ "Limited Liability Company",
typeBusiness == "Corporation" ~ "C Corporation",
typeBusiness == "Subchapter S Corporation" ~ "S Corporation",
typeBusiness %>% str_detect("Employee Stock Ownership Plan") ~ "Employee Stock Ownership Plan",
typeBusiness %>% str_detect("Rollover as Business") ~ "Rollover as Business Startup",
typeBusiness == "Non-Profit Organization" ~ "Non Profit Organization",
typeBusiness == "Self-Employed Individuals" ~ "Self Employed Individuals",
typeBusiness == "Non-Profit Childcare Center" ~ "Non Profit Childcare Center",
typeBusiness %>% str_detect("Independent Contractors") ~ "Independent Contractor",
is.na(typeBusiness) ~ "Unknown",
TRUE ~ typeBusiness
)
)
data <- data %>%
mutate(
hasEntityName = !is.na(nameEntitySBA),
codeRangeLoan = codeRangeLoan %>% coalesce("F") %>% str_to_upper()
) %>%
select(nameFile, nameEntitySBA, everything())
data <- data %>%
mutate(
isSBAIndividual = typeBusiness %>% str_detect("Individual|Contractor"),
isSBACorporation = typeBusiness %>% str_detect("Corporation"),
isSBAPartnership = typeBusiness %>% str_detect("Partnership|Proprietorship"),
isSBANonProfit = typeBusiness %>% str_detect("Non Profit"),
isSBAEntity = !typeBusiness %>% str_detect("Unknown|Self Employ")
)
tbl_ranges <- data %>%
distinct(codeRangeLoan, rangeLoan) %>%
filter(codeRangeLoan != "F") %>%
mutate(
amountLoanMinimum = case_when(
codeRangeLoan == "A" ~ 5000000,
codeRangeLoan == "B" ~ 2000000,
codeRangeLoan == "C" ~ 1000000,
codeRangeLoan == "D" ~ 350000,
codeRangeLoan == "E" ~ 150000
),
amountLoanMaximum = case_when(
codeRangeLoan == "A" ~ 10000000,
codeRangeLoan == "B" ~ 5000000 -
1,
codeRangeLoan == "C" ~ 2000000 -
1,
codeRangeLoan == "D" ~ 1000000 -
1,
codeRangeLoan == "E" ~ 3500000 - 1
),
amountLoanMean = (amountLoanMinimum + (amountLoanMaximum + 1)) / 2
) %>%
select(-rangeLoan)
data <- data %>%
left_join(tbl_ranges, by = "codeRangeLoan") %>%
mutate(
amountLoanMean = case_when(is.na(amountLoanMean) ~ amountLoan,
TRUE ~ amountLoanMean),
amountLoanMinimum = case_when(is.na(amountLoanMinimum) ~ amountLoan,
TRUE ~ amountLoanMinimum),
amountLoanMaximum = case_when(is.na(amountLoanMaximum) ~ amountLoan,
TRUE ~ amountLoanMaximum)
)
data <- data %>%
mutate(
dateApprovalPPP = mdy(dateApprovalPPP),
isNonProfit = isNonProfit == "Y",
isVeteranOwned = case_when(
isVeteranOwned == "Non-Veteran" ~ FALSE,
isVeteranOwned == "Veteran" ~ TRUE,
TRUE ~ NA
),
isJobsCountError = countJobsRetained < 0,
countJobsRetainedCleaned = case_when(
!is.na(countJobsRetained) ~ abs(countJobsRetained),
TRUE ~ countJobsRetained
)
)
data <- data %>%
mutate(
typeGenderPrincipal = case_when(
typeGenderPrincipal == "Female Owned" ~ "Female",
typeGenderPrincipal == "Male Owned" ~ "Male",
TRUE ~ "Unstated"
),
typeEthnicity = case_when(
typeEthnicity %>% str_detect("American Indian") ~ "American Indian",
typeEthnicity %>% str_detect("Eskimo") ~ "Eskimo",
typeEthnicity %>% str_detect("Black") ~ "Black",
typeEthnicity == "Unanswered" ~ "Unstated",
typeEthnicity == "Multi Group" ~ "Multiracial",
TRUE ~ typeEthnicity
)
)
data <- data %>%
mutate_if(is.character,
list(function(x) {
case_when(x == "" ~ NA_character_,
TRUE ~ x)
}))
data <-
data %>%
mutate_if(is.character,
list(function(x) {
x %>% str_to_upper() %>% stri_enc_toutf8() %>% str_squish()
}))
data <- data %>%
mutate(
amountLoanPerEmployee = case_when(
is.na(amountLoan) ~ NA_real_,
amountLoan == 0 ~ NA_real_,
countJobsRetainedCleaned == 0 ~ NA_real_,
TRUE ~ amountLoan / countJobsRetainedCleaned
),
amountLoanMeanPerEmployee = case_when(
is.na(amountLoanMean) ~ NA_real_,
amountLoanMean == 0 ~ NA_real_,
countJobsRetainedCleaned == 0 ~ NA_real_,
TRUE ~ amountLoanMean / countJobsRetainedCleaned
),
amountLoanMinPerEmployee = case_when(
is.na(amountLoanMinimum) ~ NA_real_,
amountLoanMinimum == 0 ~ NA_real_,
countJobsRetainedCleaned == 0 ~ NA_real_,
TRUE ~ amountLoanMinimum / countJobsRetainedCleaned
),
amountLoanMaxPerEmployee = case_when(
is.na(amountLoanMaximum) ~ NA_real_,
amountLoanMaximum == 0 ~ NA_real_,
countJobsRetainedCleaned == 0 ~ NA_real_,
TRUE ~ amountLoanMaximum / countJobsRetainedCleaned
)
)
data <-
data %>%
mutate(nameEntity = nameEntitySBA,
nameLender = nameLenderSBA)
data <- data %>%
mutate_at(vars(nameEntity, nameLender), list(function(x) {
x %>% str_replace_all(
"^_|^, |^:|^\\.|^\\,|\\'|\\,$|\\{|\\#|\\*|\\.|\\,|\\(|\\)|\\^-|\\@|\\[|\\]|^&|^+|^%|^%-|^-",
""
) %>% str_squish()
})) %>%
mutate(nameEntity = case_when(nameEntity == "-" ~ NA_character_,
TRUE ~ nameEntity))
data <- data %>%
mutate(
nameLender = nameLender %>% str_remove_all("NATIONAL ASSOCIATION") %>% str_replace_all("D/B/A", "\\ | ")
) %>%
separate(
nameLender,
into = c("nameLender", "nameLenderDBA"),
sep = "\\|",
extra = "merge",
fill = "right"
) %>%
mutate_if(is.character, str_squish)
data <- data %>%
mutate(hasLenderDBA = !is.na(nameLenderDBA))
dict_naics <-
dictionary_naics_codes() %>%
select(idNAICS:nameIndustryNAICS) %>%
group_by(idNAICS) %>%
slice(1) %>%
ungroup()
data <- data %>%
mutate(id = 1:n()) %>%
left_join(dict_naics, by = "idNAICS") %>%
distinct() %>%
group_by(id) %>%
slice(1) %>%
ungroup() %>%
select(-id)
data <- data %>%
entities::refine_columns(entity_columns = c("nameLender", "nameEntity"))
data <- data %>%
mutate(
cityEntity = cityEntity %>% str_replace_all("^_|^, |^-|^:|^\\.|^\\,|^\\'|\\,$|\\{|\\#|\\*|`", "")
) %>%
entities::refine_columns(entity_columns = c("cityEntity")) %>%
select(-c(cityEntity, slugSoundexcityEntity)) %>%
rename(cityEntity = cityEntityClean) %>%
select(one_of(names(data)), everything())
data <- data %>%
mutate(stateEntity = case_when(stateEntity == "FI" ~ "FL",
TRUE ~ stateEntity))
data <-
data %>%
build_address()
data <- data %>%
mutate(isLLC = nameEntity %>% str_detect("LLC"),
isINC = nameEntitySBA %>% str_detect("INC$"))
data
}
#' PPP Data
#'
#' FOIA PPP data
#'
#' @param unformat
#' @param snake_names
#' @param use_local
#'
#' @return
#' @export
#'
#' @examples
ppp_foia <-
function(unformat = F,
snake_names = T, use_local = F) {
file_nos <- 1:10
if (use_local) {
oldwd <- getwd()
setwd("~")
urls <- glue(
"Desktop/abresler.github.io/r_packages/govtrackR/data/ppp/ppp/ppp_{file_nos}.rda"
)
} else {
urls <- glue("https://asbcllc.com/r_packages/govtrackR/data/ppp/ppp/ppp_{file_nos}.rda")
}
data <-
urls %>%
map_dfr(function(url){
read_rda(file = url)
})
if (!unformat) {
amt_names <- data %>% select(matches("amount")) %>% names()
data <-
data %>%
mutate_at(amt_names,
list(function(x){
currency(x, digits = 0)
}))
}
data <- data %>%
select(matches("nameFile|nameEntity|nameLender"), everything())
if (snake_names) {
data <- data %>% janitor::clean_names()
}
if (use_local) {
if (getwd() != oldwd) {
setwd(oldwd)
}
}
data
}
.ppp_foia_url <-
function() {
file_nos <- 1:10
urls <- glue("https://asbcllc.com/r_packages/govtrackR/data/ppp/ppp/ppp_{file_nos}.rda")
data <-
urls %>%
map_dfr(function(url){
read_rda(file = url)
})
amt_names <- data %>% select(matches("amount")) %>% names()
data <-
data %>%
mutate_at(amt_names,
list(function(x){
currency(x, digits = 0)
})) %>%
mutate(nameAgencyParent = "NATIONAL SCIENCE FOUNDATION")
name_cols <- data %>% select(matches("name")) %>% names()
data <- data %>%
mutate_at(name_cols, str_to_upper)
data <- data %>%
separate(
nameDivisionFunding,
extra = "merge",
into = c("nameDivisionFunding", "slugDivisionFunding"),
sep = "\\("
) %>%
mutate(slugDivisionFunding = slugDivisionFunding %>% str_remove_all("\\)")) %>%
mutate_if(is.character, str_squish)
data <- data %>%
mutate(nameAgencyParent = "NATIONAL SCIENCES FOUNDATION")
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.