R/getMainCategory.R

Defines functions getMainCategory

Documented in getMainCategory

#' Get News Main Categories
#'
#' Get daum news main category names and ids recently.
#'
#' @param fresh If TRUE, get data from internet. 
#'              Default is FALSE which is return with cache.
#' @return Get data.frame(chr:cate_name, chr:url).
#' @export
#' @importFrom rvest read_html html_nodes html_text html_attr
#' @examples
#'   getMainCategory()
#'
getMainCategory <- function(fresh = FALSE) {
  if (!fresh) {
    return(category_main)
  }
  root <- "https://news.daum.net/breakingnews"
  hobj <- rvest::read_html(root)
  hobj_nodes <-
    rvest::html_nodes(hobj, "div#mArticle ul.tab_nav li a")
  titles <- rvest::html_text(hobj_nodes)
  titles <- trimws(titles)
  links <- rvest::html_attr(hobj_nodes, "href")
  links <- gsub("/breakingnews/", "", links)
  urls <-
    data.frame(cate_name = titles,
               url = links,
               stringsAsFactors = F)
  urls <- urls[-1, ]
  return(urls)
}
forkonlp/DNH4 documentation built on July 5, 2023, 6:39 p.m.