.munge_irs <-
function(data) {
if (data %>% hasName("code_ein")) {
data <- data %>%
mutate(id_ein = as.numeric(code_ein), .before = "code_ein")
data <- data %>%
select(-code_ein)
}
if (data %>% hasName("code_subsection")) {
data <- data %>%
mutate(code_subsection = as.character(code_subsection))
}
code_names <- data %>% select(matches("code_")) %>% names()
if (length(code_names) > 0) {
data <- data %>%
mutate_at(code_names, as.character)
}
ym_cols <-
data %>%
select(matches("year_month")) %>%
names()
if (length(ym_cols) > 0) {
data <-
data %>%
mutate_at(ym_cols, list(function(x) {
case_when(x == 0 ~ NA_real_,
TRUE ~ x)
}))
data <-
data %>%
mutate(
date_tax_period = glue("{year_month_tax_period}01") %>% ymd() %m+% months(1) - 1,
.before = "year_month_tax_period"
)
}
logical_cols <-
data %>% select_if(is.character) %>% select(matches("^is_|^has_")) %>% names()
if (length(logical_cols) > 0) {
data <-
data %>% mutate_at(logical_cols,
list(function(x) {
case_when(
str_to_upper(x) %in% c("Y", "YES") ~ TRUE,
str_to_upper(x) %in% c("N", "NO", "F") ~ F,
str_to_upper(x) == "TRUE" ~ TRUE,
TRUE ~ as.logical(x)
)
}))
}
data
}
.block_text_to_dict <-
function(x,
column_names = c("codesNationalTaxonomyExemptEntities",
"namesNationalTaxonomyExemptEntities")) {
text <-
x %>% str_split("\n") %>% flatten_chr() %>% str_squish() %>%
discard(function(x) {
x == ""
})
tibble(text) %>%
mutate(text = text %>% str_replace("\\ ", "|")) %>%
separate(
text,
into = column_names,
sep = "\\|",
extra = 'merge',
fill = 'right'
) %>%
mutate_if(is.character, list(function(x) {
x %>% str_to_upper() %>% str_squish()
}))
}
.munge_irs_names <-
function(data) {
dict_names <- dictionary_irs_names()
irs_names <-
names(data)
actual_names <-
irs_names %>%
map_chr(function(x) {
df_row <-
dict_names %>%
filter(nameIRS == str_to_lower(x)) %>%
distinct() %>%
slice(1)
if (nrow(df_row) == 0) {
glue::glue("Missing {x}") %>% message()
return(x)
}
df_row$nameActual
})
data %>%
set_names(actual_names)
}
#' IRS Affiliation Codes
#'
#' @return
#' @export
#' @family tax-exempt, IRS
#'
#' @examples
dictionary_irs_affiliations <-
function() {
tibble(
idAffiliation = c(1:3, 6:9),
nameAffiliation = c(
"CENTRAL",
"Intermediate",
"Independent",
"Central",
"Intermediate",
"Central",
"Subordinate"
),
descriptionAffiliation = c(
"This code is used if the organization is a central type organization (no group exemption) of a National,
Regional or Geographic grouping of organizations.",
"This code is used if the organization is an intermediate organization (no group exemption) of a
National, Regional or Geographic grouping of organizations (such as a state headquarters of a national
organization)",
"This code is used if the organization is an independent organization or an independent auxiliary (i.e., not affiliated with a National, Regional, or Geographic grouping of organizations)",
"This code is used if the organization is a parent (group ruling) and is not a church or 501(c)(1)
organization." ,
" This code is used if the organization is a group exemption intermediate organization of a National, Regional or Geographic grouping of organizations",
"This code is used if the organization is a parent (group ruling) and is a church or 501(c)(1) organization.",
" This code is used if the organization is a subordinate in a group ruling."
)
) %>% munge_data()
}
#' IRS Tax Deductiblity Dictionary
#'
#' @return
#' @family tax-deductibility, IRS
#' @export
#'
#' @examples
dictionary_irs_deductibility <-
function() {
tibble(
idDeductibility = c(0, 1:2, 4),
typeDeductiblity = c(
"UNKNOWN",
"DEDUCTIBLE",
"NON-DEDUCTIBLE",
"DEDUCTIBLE BY TREATY"
)
)
}
#' IRS Name Dictioanry
#'
#' Dictionary of IRS names against govtrackR
#' schema
#'
#' @return
#' @export
#'
#' @examples
dictionary_irs_names <-
function() {
tibble(
nameIRS = c(
"ein",
"name",
"ico",
"street",
"city",
"state",
"zip",
"group",
"subsection",
"affiliation",
"classification",
"ruling",
"deductibility",
"foundation",
"activity",
"organization",
"status",
"tax_period",
"asset_cd",
"income_cd",
"filing_req_cd",
"pf_filing_req_cd",
"acct_pd",
"asset_amt",
"income_amt",
"revenue_amt",
"ntee_cd",
"sort_name"
),
nameActual = c(
"codeEIN",
"nameOrganization",
"nameInCareOf",
"addressStreetOrganization",
"cityOrganization",
"stateOrganization",
"zipcodeOrganization",
"codeGroupExemption",
"codeSubsection",
"idAffiliation",
"idClassification",
"yearMonthRuling",
"idDeductibility",
"codeFoundation",
"codesActivity",
"idOrganization",
"codeExemptionStatus",
"yearMonthTaxPeriod",
"idAssetAmount",
"idIncomeAmount",
"codeFilingRequirement",
"has990PF",
"codeAccountingPeriod",
"amountAssets",
"amountIncome",
"amountRevenue",
"codeNationalTaxonomyExemptEntitiesClassification",
"nameSort"
)
)
}
dictionary_irs_data_sets <-
function() {
download_excel_file("https://www.irs.gov/pub/irs-soi/soiprogramdetails.xlsx")
}
# tax_exempt_entities -----------------------------------------------------
# https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf
#' IRS data dictionary for tax exempt entities
#'
#' \url{https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf}
#'
#' @return \code{tibble()}
#' @export
#' @family tax-exempt, IRS
#'
#' @examples
#' dictionary_irs_exempt_urls()
dictionary_irs_exempt_urls <-
function() {
page <-
read_html(
"https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf"
)
slugs <- page %>% html_nodes(".field--type-text-with-summary a")
file_names <- html_text(slugs)
urls <- slugs %>% html_attr('href')
urls <-
case_when(!urls %>% str_detect("http") ~ str_c("https://www.irs.gov", urls),
TRUE ~ urls)
data <- tibble(nameFile = file_names, urlIRS = urls)
data <-
data %>%
mutate(
typeFile = case_when(
urlIRS %>% str_detect("/eo") ~ "Exempt Entity",
urlIRS %>% str_detect("SIT|sit") ~ "Split Income",
urlIRS %>% str_detect("soi-tax-stats") ~ "SOI",
TRUE ~ "Other"
)
) %>%
mutate(
regionIRS = case_when(
urlIRS %>% str_detect("_xx") ~ "international",
urlIRS %>% str_detect("_pr") ~ "puerto rico",
urlIRS %>% str_detect("eo1") ~ "northeast",
urlIRS %>% str_detect("eo2") ~ "midatlantic",
urlIRS %>% str_detect("eo3") ~ "coasts",
urlIRS %>% str_detect("eo4") ~ "other"
),
isCSV = urlIRS %>% str_detect(".csv"),
isPDF = urlIRS %>% str_detect(".pdf")
) %>%
select(typeFile, regionIRS, everything())
data
}
#' IRS Tax Exempt entitiy masterfile
#'
#' Returns basic information for IRS tax exempt entities
#'
#' @param clean_entities if \code{TRUE} cleans organization column
#' @param snake_names if \code{TRUE} returns snake case names
#' @family tax-exempt, IRS
#'
#' @return \code{tibble()}
#' @export
#'
#' @examples
#' irs_master_exempt_entities()
irs_master_exempt_entities <-
memoise::memoise(function(clean_entities = T,
snake_names = F) {
url <-
"https://apps.irs.gov/pub/epostcard/data-download-pub78.zip"
outfile <- tempfile("download", fileext = ".zip")
file <- download.file(url, outfile)
unz_files <- outfile %>% unzip(exdir = "zip")
data <-
unz_files %>% fread(showProgress = FALSE) %>% as_tibble()
data <-
data %>%
setNames(
c(
"idEIN",
"nameOrganization",
"cityOrganization",
"stateOrganization",
"countryOrganization",
"codeDeductibility"
)
)
data <- data %>%
mutate(urlIRS = url)
unz_files %>% unlink()
file %>% unlink()
data <-
data %>%
mutate(codeDeductibility = codeDeductibility %>% str_count("\\,") + 1) %>%
munge_data(parse_dates = F, snake_names = F)
if (clean_entities) {
data <-
data %>%
entities::refine_columns(entity_columns = "nameOrganization")
}
if (snake_names) {
data <- data %>% clean_names()
}
data
})
irs_revoked_entities <-
function(clean_entities = T,
snake_names = F) {
url <- "https://apps.irs.gov/pub/epostcard/data-download-revocation.zip"
outfile <- tempfile("download", fileext = ".zip")
file <- download.file(url, outfile)
unz_files <- outfile %>% unzip(exdir = "zip")
data <-
unz_files %>% fread(showProgress = FALSE) %>% as_tibble()
}
.parse_irs_charity_detail <-
function(url = "https://www.irs.gov/pub/irs-soi/eo_xx.csv",
return_message = T) {
if (return_message) {
glue("Parsing {url}") %>% message()
}
data <-
vroom(url)
names(data) <- names(data) %>% str_to_lower()
data <-
data %>%
.munge_irs_names()
data <-
data %>%
mutate(urlIRS = url)
data <- data %>%
mutate(
classificationNTEE = case_when(
nchar(codeNationalTaxonomyExemptEntitiesClassification) == 4 ~ substr(codeNationalTaxonomyExemptEntitiesClassification, 4, 4)
),
codeNTEE = case_when(
!is.na(codeNationalTaxonomyExemptEntitiesClassification) ~ substr(codeNationalTaxonomyExemptEntitiesClassification, 1, 3),
TRUE ~ NA_character_
)
) %>%
select(-codeNationalTaxonomyExemptEntitiesClassification)
ym_cols <-
data %>%
select_if(is.character) %>%
select(matches("yearMonth")) %>%
names()
if (length(ym_cols) > 0) {
data <- data %>%
mutate_at(ym_cols, readr::parse_number)
}
ym_cols <-
data %>%
select(matches("yearMonth")) %>%
names()
if (length(ym_cols) > 0) {
data <-
data %>%
mutate_at(ym_cols, list(function(x) {
case_when(x == 0 ~ NA_real_,
TRUE ~ x)
}))
data <- data %>%
mutate(
dateRuling = glue("{yearMonthRuling}01") %>% ymd() %m+% months(1) - 1,
dateTaxPeriod = glue("{yearMonthTaxPeriod}01") %>% ymd() %m+% months(1) - 1
)
}
data <-
data %>%
mutate(has990PF = as.logical(has990PF))
data <-
data %>%
mutate(zipcodeOrganization = case_when(
zipcodeOrganization == "00000-0000" ~ NA_character_,
TRUE ~ zipcodeOrganization
))
data
}
#' Detailed IRS Exempt Entity Data
#'
#' Data on all regsitered non taxable
#' corporate entities
#'
#'
#' @param regions if `NULL` returns data on all regions otherwise \itemize{
#' \item international
#' \item puerto rico
#' \item northeast
#' \item coasts
#' \item other
#' }
#' @param clean_entities if `TRUE` cleans entitiy columns
#' @param snake_names if `TRUE` returns snake case names
#' @param join_locations if `TRUE` returns full location address
#' @param return_message if `TRUE` returns a message
#'
#' @return \code{tibble}
#' \url{https://www.irs.gov/charities-non-profits/exempt-organizations-business-master-file-extract-eo-bmf}
#' @family tax-exempt
#' @export
#'
#' @examples
irs_detailed_exempt_entities <-
function(regions = NULL,
clean_entities = F,
snake_names = T,
join_locations = T,
return_message = T) {
tbl_regions <-
dictionary_irs_exempt_urls() %>%
filter(typeFile %>% str_detect("Exempt Ent")) %>%
filter(isCSV)
if (length(regions) != 0) {
slugs <- regions %>% str_to_lower() %>% str_c(collapse = "|")
tbl_regions <-
tbl_regions %>%
filter(regionIRS %>% str_detect(slugs))
}
.parse_irs_charity_detail_safe <-
possibly(.parse_irs_charity_detail, tibble())
data <-
tbl_regions$urlIRS %>%
map_dfr(function(x) {
data <-
.parse_irs_charity_detail_safe(url = x, return_message = return_message)
if (data %>% hasName("idClassification")) {
data <- data %>%
mutate(idClassification = as.numeric(idClassification))
}
data
})
data <-
data %>%
left_join(tbl_regions %>% select(regionIRS, urlIRS),
by = "urlIRS")
data <-
data %>%
mutate(
idEIN = as.numeric(codeEIN),
hasCareOf = !is.na(nameInCareOf),
nameInCareOf = nameInCareOf %>% str_remove_all("^% ") %>%
str_remove_all("C/O") %>% str_squish()
) %>%
separate(
zipcodeOrganization,
into = c("zipcodeOrganization", "zip4Organization"),
sep = "\\-",
extra = "merge",
fill = "right"
) %>%
select(idEIN, everything())
zip_cols <- data %>% select(matches("^zip")) %>% names()
data <-
data %>% mutate_at(zip_cols,
list(function(x) {
case_when(x == "00000" ~ NA_character_,
x == "0000" ~ NA_character_,
TRUE ~ x)
}))
data <-
data %>%
left_join(dictionary_irs_ntee_codes, by = "codeNTEE") %>%
left_join(dictionary_irs_status_exemptions, by = "codeExemptionStatus") %>%
left_join(dictionary_irs_filing_requirements, by = "codeFilingRequirement") %>%
left_join(dictionary_irs_asset_codes, by = "idAssetAmount")
data <-
data %>%
left_join(dictionary_irs_affiliations(), by = "idAffiliation")
data <-
data %>%
mutate(id = 1:n()) %>%
left_join(dictionary_irs_foundation_types, by = "codeFoundation") %>%
distinct() %>%
group_by(id) %>%
slice(1) %>%
ungroup()
if (data %>% hasName("codesActivity")) {
df_codes <-
data %>%
select(id, codesActivity) %>%
gather(typeActivity, codeActivity, -id) %>%
mutate(
codeActivity01 = codeActivity %>% substr(1, 3),
codeActivity02 = codeActivity %>% substr(4, 6),
codeActivity03 = codeActivity %>% substr(7, 9)
) %>%
select(-c(codeActivity, typeActivity)) %>%
gather(variable, codeActivity, -id) %>%
filter(codeActivity != "000") %>%
mutate(variable = "codeActivity") %>%
group_by(id) %>%
mutate(number = 1:n()) %>%
ungroup() %>%
mutate(number = number - 1) %>%
select(-variable) %>%
left_join(dictionary_irs_activity_codes, by = "codeActivity")
df_primary_codes <-
df_codes %>% group_by(id) %>% filter(number == min(number)) %>% ungroup() %>%
rename(
codeActivityPrimary = codeActivity,
nameActivityParentPrimary = nameActivityParent,
nameActivityPrimary = nameActivity
) %>%
select(-number)
df_codes <-
df_codes %>%
group_by(id) %>%
nest() %>%
rename(dataActivities = data) %>%
ungroup() %>%
mutate(countActivities = dataActivities %>% map_dbl(nrow)) %>%
left_join(df_primary_codes, by = "id") %>%
select(-dataActivities, everything())
data <-
data %>%
select(-codesActivity) %>%
left_join(df_codes, by = "id")
rm(df_codes)
}
if (join_locations) {
data <-
data %>%
build_address()
}
tbl_regions <-
tbl_regions %>%
mutate(regionIRS = str_to_upper(regionIRS)) %>%
select(regionIRS, urlIRS)
data <-
data %>%
select(-one_of("regionIRS")) %>%
left_join(tbl_regions, by = "urlIRS") %>%
select(regionIRS, everything())
data <- data %>%
mutate(
hasDBA = nameSort %>% str_detect("DBA "),
nameSort = nameSort %>% str_remove_all("^DBA ")
)
data <-
data %>%
select(-codeEIN) %>%
group_by(idEIN) %>%
slice(1) %>%
ungroup() %>%
select(-id)
data <- data %>%
arrange(desc(amountAssets))
if (snake_names) {
data <- data %>% clean_names()
}
data
}
# SOI ---------------------------------------------------------------------
## https://www.irs.gov/statistics/soi-tax-stats-integrated-business-data
# https://www.irs.gov/statistics/soi-tax-stats-upcoming-data-releases
# soi 501c3 ---------------------------------------------------------------
# https://www.irs.gov/statistics/soi-tax-stats-annual-extract-of-tax-exempt-organization-financial-data
.parse_irs_url <-
function(url, url_name = "url_irs_file") {
parts <- url %>% str_split("/") %>% flatten_chr()
slug_file <- parts[[length(parts)]]
year_slug <- slug_file %>% substr(1,2)
file_parts <- slug_file %>% str_split("\\.") %>% flatten_chr()
format_file <- file_parts[[2]]
name_file <- file_parts[[1]]
slug_file <- name_file %>% substr(3, nchar(name_file))
year_file <-
glue("20{year_slug}") %>% as.numeric()
tibble(UQ(url_name) := url, name_file, year_file, slug_file, format_file)
}
.parse_990_extract_dictionary <-
function(url = "https://www.irs.gov/pub/irs-soi/19eofinextractdoc.xlsx") {
data <- .download_excel_file(url = url, has_col_names = T)
data <-
data %>%
slice(3:nrow(data)) %>%
select(1:3) %>%
setNames(c("name_irs", "description", "location"))
data <-
data %>%
mutate(is_logical = description %>% str_detect("\\?"))
if (data %>% hasName("location")) {
data <- data %>%
mutate(is_logical = case_when(location %>% str_detect(" IV") ~ T,
TRUE ~ is_logical))
}
data <-
data %>%
mutate(
name_irs = name_irs %>% str_to_lower(locale = "en"),
description = str_to_lower(description, locale = "en"),
name_actual = case_when(
name_irs == "ein" ~ "code_ein",
name_irs == "elf" ~ "type_filer",
name_irs == "miscrev11acd" ~ "code_miscrev11a",
name_irs == "miscrev11bcd" ~ "code_miscrev11bcd",
name_irs == "nonpfrea" ~ "code_non_profit_type",
name_irs == "tax_pd" ~ "year_month_tax_period",
name_irs == "subseccd" ~ "code_subsection",
name_irs %>% str_detect("cnt$") ~ glue("count_{name_irs}") %>% as.character(),
is_logical ~ glue("has_{name_irs}") %>% as.character(),
description %>% str_detect("service revenue code|qualified health plan in multiple|other revenue code 11c") ~ glue("code_{name_irs}") %>% as.character(),
description %>% str_detect("filed form|qualified health plan in multiple states
|payments for indoor tanning") ~ glue("has_{name_irs}") %>% as.character(),
description %>% str_detect("service revenue amount") ~ glue("amount_{name_irs}") %>% as.character(),
TRUE ~ glue("amount_{name_irs}") %>% as.character()
)
)
data <- data %>%
mutate(url_irs_file = url)
data
}
#' IRS 990 name dictionary
#'
#' @return tibble
#' @export
#'
#' @examples
dictionary_irs_990_extract_names <-
memoise::memoise(function(filter_years = NULL) {
dict <- dictionary_irs_990_extract_urls()
years <- dict %>% distinct(year_file) %>% pull()
df <- dict %>% filter(type_file == "dictionary")
data <-
df$url_irs_file %>%
map_dfr(function(url){
url %>% message()
.parse_990_extract_dictionary(url = url)
})
data <-
data %>%
left_join(df %>%
select(url_irs_file, year_file), by = "url_irs_file") %>%
select(year_file, everything())
if (length(filter_years) > 0) {
data <- data %>%
filter(year_file %in% filter_years)
}
data
})
#' IRS 990 extract data
#'
#' Returns links for IRS 990 and
#' related form summary of selected extracts
#'
#' @return tibble
#' \url{https://www.irs.gov/statistics/soi-tax-stats-annual-extract-of-tax-exempt-organization-financial-data}
#' @export
#' @family tax-exempt, IRS, dictionary
#'
#' @examples
#' dictionary_irs_990_extracts()
dictionary_irs_990_extract_urls <-
memoise::memoise(function() {
page <- read_html("https://www.irs.gov/statistics/soi-tax-stats-annual-extract-of-tax-exempt-organization-financial-data")
nodes <- page %>% html_nodes(".text-align-center a")
files <- nodes %>% html_text()
urls <- nodes %>% html_attr("href")
urls <- urls %>% str_replace_all("\\.dat.dat", "\\.dat")
data <-
urls %>%
map_dfr(function(url){
.parse_irs_url(url = url)
}) %>%
mutate(name_file = files) %>%
mutate(is_excel = format_file %>% str_detect("excel")) %>%
select(name_file, everything())
data <-
data %>%
mutate(type_file = case_when(
slug_file %>% str_detect("eofinextractdoc") ~ "dictionary",
slug_file %>% str_detect("ez") ~ "990EZ",
name_file %>% str_detect("-EZ") ~ "990EZ",
slug_file %>% str_detect("990pf") ~ "990PF",
TRUE ~ "990"
)) %>%
select(year_file, type_file, everything())
data
})
.munge_990_names <-
function(data, file_year = 2019) {
irs_names <- names(data) %>% str_to_lower()
tbl_names <- tibble(name_irs = irs_names)
dict_names <- dictionary_irs_990_extract_names()
df_names <- dict_names %>% filter(year_file == file_year)
actual_names <-
irs_names %>%
map_chr(function(x){
x <- str_to_lower(x)
df_n <-
df_names %>% filter(name_irs == x)
if (nrow(df_n) == 0) {
is_amt <- data %>% select(x) %>% select_if(is.numeric) %>% ncol() == 1
if (is_amt) {
x <- glue("{x}") %>% as.character()
}
return(as.character(x))
}
as.character(df_n$name_actual)
})
data %>%
setNames(actual_names)
}
.dl_990_data_excel <-
function(url = "https://www.irs.gov/pub/irs-soi/19eoextract990.xlsx", use_col_names = T) {
df_metadata <-
.parse_irs_url(url = url)
data <-
.download_excel_file(url = url, has_col_names = use_col_names)
names(data) <-
str_to_lower(names(data))
data <- data %>%
.munge_990_names(file_year = df_metadata$year_file)
data <-
data %>%
.munge_irs() %>%
mutate(url_irs_file = url)
data <-
data %>%
left_join(df_metadata, by = "url_irs_file") %>%
select(year_file, everything())
data
}
.dl_990_dat <-
function(url = "https://www.irs.gov/pub/irs-soi/15eofinextractEZ.dat") {
df_metadata <-
.parse_irs_url(url = url)
data <- vroom(url)
names(data) <-
str_to_lower(names(data))
data <-
data %>%
.munge_990_names(file_year = df_metadata$year_file)
data <-
data %>%
.munge_irs() %>%
mutate(url_irs_file = url)
data <-
data %>%
left_join(df_metadata, by = "url_irs_file") %>%
select(year_file, everything())
data
}
.dl_990_zip <-
function(url) {
df_metadata <-
.parse_irs_url(url = url)
outfile <- tempfile("download", fileext = ".zip")
file <- download.file(url, outfile)
unz_files <- outfile %>% unzip(exdir = "zip")
data <-
unz_files %>%
vroom()
names(data) <-
str_to_lower(names(data))
data <-
data %>%
.munge_990_names(file_year = df_metadata$year_file)
data <-
data %>%
.munge_irs() %>%
mutate(url_irs_file = url)
data <-
data %>%
left_join(df_metadata, by = "url_irs_file") %>%
select(year_file, everything())
unz_files %>% unlink()
file %>% unlink()
unlink("zip", recursive = T, force = T)
data
}
.irs_990_file <-
function(year = 2018, type = "990") {
slug_type <- str_to_upper(type)
if (!slug_type %in% c("990", "990PF", "990EZ")) {
stop("Type can only be 990, 990PF or 990EZ")
}
dict_urls <-
dictionary_irs_990_extract_urls()
df_url <-
dict_urls %>%
filter(year_file == year) %>%
filter(type_file == slug_type)
format_file <- df_url$format_file %>% unique()
url <- df_url$url_irs_file
if (format_file %>% str_detect("xls")) {
data <- .dl_990_data_excel(url = url )
}
if (format_file %>% str_detect("zip")) {
data <- .dl_990_zip(url = url)
}
if (format_file %>% str_detect("dat")) {
data <- .dl_990_dat(url = url)
}
data <-
data %>%
mutate(type_file = slug_type) %>%
select(type_file, everything())
data
}
#' IRS Tax-Exempt data
#'
#' Acquires and parses information about
#' non-taxable entities
#'
#' @param years vector of years
#' @param types Type of data dump \itemize{
#' \item `990` - Form 990
#' \item `990EZ` - 990 E-Z filing data
#' \item `990PF` - 990 PF data
#' }
#' @param join_ein_data if \code{TRUE} returns EIN data
#'
#' @return `tibble`
#' @export
#'
#' @examples
irs_990_data <-
function(years = 2019,
types = "990",
join_ein_data = F) {
df_inputs <-
expand.grid(year = years,
type = types,
stringsAsFactors = F) %>% as_tibble()
dict_names <- dictionary_irs_990_extract_names()
.irs_990_file_safe <- possibly(.irs_990_file, tibble())
all_data <-
1:nrow(df_inputs) %>%
map_dfr(function(x) {
df_row <- df_inputs[x, ]
data <-
.irs_990_file_safe(year = df_row$year, type = df_row$type)
data
})
if (join_ein_data) {
eins <- all_data %>% distinct(id_ein) %>% pull()
tbl_entities <-
irs_detailed_exempt_entities(
snake_names = T,
regions = NULL,
join_locations = T,
clean_entities = T
)
tbl_entities <-
tbl_entities %>%
filter(id_ein %in% eins)
all_data <- all_data %>%
left_join(tbl_entities %>% select(-matches("data")) %>%
select(-one_of(
c(
"date_tax_period",
"year_month_tax_period",
"code_subsection"
)
)), by = "id_ein")
}
all_data
}
# xml ---------------------------------------------------------------------
# https://www.irs.gov/statistics/soi-tax-stats-domestic-private-foundation-and-charitable-trust-statistics#4
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.