#' Nay 20.4 QA
#'
#' @return
#' @export
#'
#' @examples
navy_20.4_qa <-
function() {
page <- read_html("https://navysbir.com/n20_4/n204-topic-qa.htm")
tables <- page %>% html_table(fill = T)
data <- tables[[1]] %>% as_tibble() %>% select(1:4)
data <- data %>% slice(3:nrow(data)) %>%
setNames(c("id_topic", "date", "type", "text_question_answer"))
data <- data %>%
separate(
"id_topic",
into = c("id_topic", "focus"),
sep = "Focus",
convert = T,
extra = "merge",
fill = "right"
)
data <- data %>%
filter(!is.na(id_topic), !id_topic %>% str_detect("Topic")) %>%
mutate(type = case_when(type == "Q." ~ "question",
TRUE ~ "answer"),
date = mdy(date)) %>%
separate_rows(text_question_answer, sep = "[1-9].") %>%
filter(text_question_answer != "") %>%
mutate(text_question_answer = str_squish(text_question_answer))
data <- data %>%
filter(!id_topic %>% str_detect("How to submit a questionThe"))
data <- data %>%
group_by(id_topic, focus, date, type) %>%
summarise(text_question_answer = str_c(text_question_answer, collapse = " ")) %>%
ungroup() %>%
munge_data() %>%
mutate(text_question_answer = text_question_answer %>% str_remove_all("\\.|\u001a|\\(|\\)") %>%
str_replace_all("\\ , ", "\\ ")) %>%
filter(id_topic != "TOP")
data
}
#' Navy Selections
#'
#' @return
#' @export
#'
#' @examples
navy_20.4_selections <-
function(join_fpds_data = F) {
page <- read_html("https://navysbir.info/20_4-selections.html")
id_topic <- page %>% html_nodes(".normtable td:nth-child(2)") %>% html_text()
name_awardee <-
page %>% html_nodes(".normtable td:nth-child(3)") %>% html_text() %>% str_squish()
location_awardee <-
page %>% html_nodes(".normtable td:nth-child(4)") %>% html_text() %>% str_squish()
data <-
tibble(id_topic, name_awardee, location_awardee) %>%
filter(id_topic !="") %>%
munge_data() %>%
entities::refine_columns(entity_columns = "name_awardee") %>%
clean_names()
data <- data %>%
filter(name_awardee != "TBA")
if (join_fpds_data) {
fpds_csv_safe <- possibly(fpds_csv, tibble())
df_vendor_names <-
data %>%
distinct(name_awardee_clean) %>%
mutate(name_awardee_search = name_awardee_clean %>% str_remove_all("LLC$|INC$|CORPORATION$") %>% str_squish())
df_vendors <-
unique(df_vendor_names$name_awardee_search) %>%
map_dfr(function(x){
d <- fpds_csv_safe(global_vendor_name = x, snake_names = T)
if (length(d) == 0) {
return(tibble())
}
d %>%
mutate(name_awardee_search = x)
})
df_vendors <- df_vendors %>%
group_by(name_awardee_search) %>%
summarise(
count_contracts = n_distinct(id_contract_analysis),
count_actions = n(),
amount_contracts = sum(amount_obligation, na.rm = T),
date_first_contract = min(date_obligation),
date_most_recent_contract = max(date_obligation),
count_agencies_awards = n_distinct(id_cgac_agency),
count_offices_awards = n_distinct(id_office_award),
count_psc_awards = n_distinct(code_product_service)
) %>%
left_join(
df_vendors %>% count(name_awardee_search, id_duns, wt = amount_obligation, name = "amount") %>%
group_by(name_awardee_search) %>% filter(amount == max(amount)) %>% slice(1) %>%
ungroup() %>% select(id_duns, name_awardee_search), by = "name_awardee_search"
)
df_vendor_names <- df_vendor_names %>%
left_join(df_vendors %>% mutate_if(is.numeric,as.numeric), by = "name_awardee_search") %>%
munge_data()
data <-
data %>% left_join(df_vendor_names, by = "name_awardee_clean") %>%
select(id_topic, name_awardee, name_awardee_clean, id_duns, everything())
}
data
}
# https://navysbir.com/topics20_4.htm
#' Navy Open Topic SBIRS
#'
#' @return
#' @export
#'
#' @examples
navy_open_topic_sbir_20.4 <- function(include_qa = T,unnest_data = F) {
if (include_qa) {
unnest_data <- F
}
page <- read_html("https://navysbir.com/topics20_4.htm")
nodes <- page %>% html_nodes("ul .tnlnk")
topics <- nodes %>% html_text()
slugs <- nodes %>% html_attr("href")
urls <- str_c("https://navysbir.com/",slugs)
data <-
tibble(topics, url_topic = urls) %>%
separate(topics,
into = c("id_topic", "name_topic"),
sep = " ") %>%
munge_data()
data <- 1:nrow(data) %>%
map_dfr(function(x){
df_row <- data[x,]
df_text <- .parse_navy_url(url = df_row$url_topic)
df_row %>%
mutate(data_text = list(df_text %>% select(-url_topic)))
})
if (include_qa) {
df_qa <- navy_20.4_qa()
df_qa <- df_qa %>%
mutate(type = str_c("text_", type, sep = "") %>% str_to_lower()) %>%
spread(type, text_question_answer) %>%
group_by(id_topic) %>%
nest() %>%
rename(data_question_answer = data)
data <- data %>%
left_join(df_qa, by = "id_topic")
}
if (unnest_data) {
data <- data %>%
unnest()
}
data
}
.parse_navy_url <-
function(url = "https://navysbir.com/n20_4/n204-a01.htm") {
page <- read_html(url)
text <- page %>% html_text() %>% str_split("\n") %>% flatten_chr() %>% str_squish() %>%
discard(function(x) {
x == ""
})
data <- tibble(text) %>%
mutate(row = 1:n())
data <- data %>%
mutate(is_parent = text %>% str_detect("^[A-Z][A-Z]"))
df_sections <-
data %>% filter(is_parent) %>%
separate(text, into = c("section", "text"), sep = "\\:") %>%
select(section, text_new = text, row)
remove <- df_sections$section %>% str_c(collapse = "|")
data <-
data %>%
left_join(df_sections, by = "row") %>%
mutate(text = case_when(is.na(text_new) ~ text,
TRUE ~ text_new)) %>%
select(section, text) %>%
fill(section) %>%
filter(!is.na(section)) %>%
mutate_all(str_squish) %>%
munge_data() %>%
group_by(section) %>%
summarise(text = str_c(text, collapse = " ")) %>%
filter(!is.na(text))
links <- page %>% html_nodes("a") %>% html_attr("href")
links <- links[links %>% str_detect("http")] %>%
discard(function(x){
x %>% is.na()
}) %>% str_c(collapse = " | ")
if (length(links) >0) {
data <- data %>%
bind_rows(tibble(section = "LINKS", text = links))
}
data <-
data %>%
mutate(url_topic = url)
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.