.extract_sam_attachment_opportunity <-
function(url = "https://beta.sam.gov/api/prod/opps/v3/opportunities/5148f266294f4bb09d8f50fc4c7c8fa7/resources/download/zip",
return_message = F) {
if (return_message) {
glue("Extracting attachment data from {url}") %>% message()
}
outfile <- tempfile("download", fileext = ".zip")
# file <- curl::curl_download(url, outfile)
file <- download.file(url, outfile)
unz_files <- outfile %>% unzip(exdir = "attachments")
data <-
unz_files %>%
map_dfr(function(x) {
is_pdf <- x %>% str_detect(".pdf")
if (is_pdf) {
df_metadata <- pdftools::pdf_info(pdf = x) %>% flatten_df()
df_metadata <- df_metadata %>%
mutate_if(is.character,
list(function(x){
case_when(x == "" ~ NA_character_,
TRUE ~ x)
})) %>%
.remove_na()
df_metadata <-
.munge_data_sam_names(df_metadata)
date_cols <- df_metadata %>% select(matches("date")) %>% names()
if (length(date_cols) >0) {
df_metadata <-
df_metadata %>%
mutate_at(date_cols,
list(function(x) {
as.POSIXct(x, origin = "1970-01-01", tz = "UTC") %>% as.Date()
}))
}
text <- pdftools::pdf_text(x)
pdfText <- text %>% str_split("\n") %>%
flatten_chr() %>%
str_squish() %>%
discard(function(x) {
x == ""
}) %>%
discard(function(x) {
x %>% str_detect("^Page")
}) %>%
str_to_upper() %>%
str_c(collapse = " ")
df_metadata <-
df_metadata %>%
mutate(pdfText)
df_metadata %>%
.munge_data()
}
}) %>%
mutate_if(is.numeric, as.numeric)
unz_files %>% unlink()
file %>% unlink()
unlink("attachments", recursive = T, force = T)
gc()
data <-
data %>%
mutate(urlOpportunityAttachmentZIP = url)
data
}
#' OCR SAM Opportunities
#'
#' @param urls vector of URLS
#' @param return_message if \code{TRUE} returns a message
#'
#' @return
#' @export
#'
#' @examples
extract_sam_attachment_opportunity_urls <-
function(urls = NULL, return_message = F, snake_names = T) {
if (length(urls) == 0) {
stop("Need SAM opportunity URLS")
}
.extract_sam_attachment_opportunity_safe <-
possibly(.extract_sam_attachment_opportunity, tibble())
data <-
unique(urls) %>%
future_map_dfr(function(url) {
.extract_sam_attachment_opportunity_safe(url = url, return_message = return_message)
})
if (snake_names) {
data <- data %>% janitor::clean_names()
}
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.