R/rss_txt_pull.R

Defines functions rss_txt_pull

Documented in rss_txt_pull

#' @title Miscellaneous Codes for collecting text from RSS feeds
#'
#' @description
#'
#' @param url
#'
#' @return NULL
#'
#' @examples
#'
#' @export

rss_txt_pull <- function(url, paper, section = "") {

  # Function for streamlining the process of pulling RSS feeds
  # Used primarily for the AMAR scraping project
  # Arguments:
  #      url: the html for the RSS page to be scraped
  #    paper: the name of the newspaper being scraped
  #  section: the paper section being scraped;
  #           default is no section and the papers
  #           are saved in the paper folder


  if(section == ""){
    file_name <- paste(paper,"/",paper,"-", as.numeric(as.POSIXct(Sys.time())), ".txt", sep = "")

    download.file(url, file_name,
                  quiet = FALSE, mode = "w",
                  cacheOK = TRUE,
                  extra = getOption("download.file.extra"))
  }

  else{
    file_name <- paste(paper,"/",section,"/",paper,"-",section,"-", as.numeric(as.POSIXct(Sys.time())), ".txt", sep = "")

    download.file(url, file_name,
                  quiet = FALSE, mode = "w",
                  cacheOK = TRUE,
                  extra = getOption("download.file.extra"))
  }
}
overos93/hdmisc documentation built on Feb. 22, 2022, 10:45 p.m.