R/parse_robotstxt.R

Defines functions parse_robotstxt

Documented in parse_robotstxt

#' function parsing robots.txt
#' @param  txt content of the robots.txt file
#' @return a named list with useragents, comments, permissions, sitemap
#' @export
parse_robotstxt <- function(txt){
  # return
  res <-
    list(
      useragents  = rt_get_useragent(txt),
      comments    = rt_get_comments(txt),
      permissions = rt_get_fields(txt, "allow"),
      crawl_delay = rt_get_fields(txt, "crawl-delay"),
      sitemap     = rt_get_fields(txt, "sitemap"),
      host        = rt_get_fields(txt, "host"),
      other       =
        rt_get_fields(
          txt,
          regex="sitemap|allow|user-agent|host|crawl-delay",
          invert=TRUE
        )
    )
  return(res)
}
ropensci/robotstxt documentation built on Jan. 7, 2023, 12:37 p.m.