R/utils.R

Defines functions crawl_filter_urls_time

crawl_filter_urls_time <- function(urls, crawl, time = list("unit" = "month", "value" = 1)) {

  date_start_keep <- switch (
    time$unit,
    month = Sys.Date() - months(time$value),
    day = Sys.Date() - time$value
  )

  urls <- dplyr::tibble(url = !!urls) %>%
    dplyr::anti_join(dplyr::filter(crawl, .data$date_crawl <= date_start_keep), by = "url") %>%
    dplyr::bind_rows(dplyr::filter(crawl, .data$date_crawl <= date_start_keep)) %>%
    dplyr::arrange(url) %>%
    dplyr::pull(url)

  return(urls)
}
stephLH/webr documentation built on Dec. 25, 2019, 2:54 p.m.