R/scrape_cdph.R

Defines functions scrape_cdph

Documented in scrape_cdph

#' This function scrapes metadata attributes from the CDC metadata repository.
#' @import rvest
#' @import stringr
#' @import magrittr
#' @import dplyr
#' @param url the website url
#' @return dataframe with metadata attributes
#' @export

scrape_cdph <- function(url) {
  cols <- scrape_rvest(url, ".module-heading , dt , .dataset-label , .tags h3")
  data <- scrape_rvest(url, ".license span , .nav-item a , .module-shallow .heading , dd , .dataset-details , .well")
  df <- data.frame(matrix(ncol = length(cols), nrow = 0))
  df <- rbind(df, data)
  names(df) <- cols
  social <- scrape_rvest(url, ".nav-item a")
  df$Social <- paste(social, collapse = ", ")
  df$Name <- scrape_rvest(url, ".main-responsive-panel h1") %>% checkNull()
  df <- df[, c("Name", "Tags", "Public Access Level", "Rights", "Program Contact Name", "Program Contact Email", "Frequency", "Last Updated", "Created", "Followers", "Organization", "Social")]
  return(df)
}
kingsuching/Frost2021Package documentation built on March 19, 2022, 11:51 p.m.