R/lastCached.R

Defines functions lastCached

Documented in lastCached

#' Function to get the last Google Cache Date for a URL
#'
#' This function allows you to get the latest Google Cache Date for a given URL.
#' @param url The url you want to get the Cache Date
#' lastCached()
#' @examples
#' \dontrun{
#' lastCached("https://www.r-project.org/")
#' }

lastCached <- function(url) {
  url <-
    paste0("http://webcache.googleusercontent.com/search?q=cache:",
           url)
  res <- try(url %>%
               as.character() %>%
               xml2::read_html() %>%
               html_nodes("#google-cache-hdr > div:nth-child(1)"))
  res <-
    stringr::str_extract(
      res,
      "([0-9]{1,2}..[a-zA-Z]{1,3}..[0-9]{4}|[a-zA-Z]{1,3}.[0-9]{1,2}\\,.[0-9]{4}).[0-9]{2}\\:[0-9]{2}\\:[0-9]{2}"
    )
  if (length(res) == 0) {
    return("Page not cached")
  } else {
    if (is.na(res)) {
      return("Page not cached")
    } else {
      return(res)
    }
  }
}
dschmeh/seoR documentation built on Jan. 7, 2023, 12:19 a.m.