Nothing
#' Get Content
#'
#' Get daum news content from links.
#'
#' @param turl is daum news link.
#' @return a [tibble][tibble::tibble-package] (url,datetime,press,title,content).
#' @importFrom rvest read_html html_nodes html_text html_attr
#' @importFrom httr GET content user_agent
#' @export
getContent <- function(turl = url) {
if (!identical(url, character(0))) {
tem <-
httr::GET(turl,
httr::user_agent("DNH4 by chanyub.park <mrchypark@gmail.com>"))
if (tem$status_code == 200) {
if (grepl("^https://news.v.daum.net/v", tem$url)) {
hobj <- rvest::read_html(tem)
hobj_nodes <-
rvest::html_nodes(hobj, "div.head_view h3.tit_view")
title <- rvest::html_text(hobj_nodes)
Encoding(title) <- "UTF-8"
hobj_nodes <-
rvest::html_nodes(hobj, "span.info_view span.txt_info span.num_date")
datetime <- rvest::html_text(hobj_nodes)
Encoding(datetime) <- "UTF-8"
datetime <- gsub("[^0-9.:]","",datetime)
datetime <- trimws(datetime)
datetime <- datetime[nchar(datetime) > 0]
datetime <-
gsub("([0-9]{4})\\.([0-9]{2})\\.([0-9]{2})\\.",
"\\1-\\2-\\3",
datetime)
datetime <- as.POSIXlt(datetime)
if (length(datetime) == 1) {
edittime <- datetime[1]
}
if (length(datetime) == 2) {
edittime <- datetime[2]
datetime <- datetime[1]
}
hobj_nodes <-
rvest::html_nodes(hobj, "div.head_view em.info_cp a.link_cp img")
press <- rvest::html_attr(hobj_nodes, "alt")
Encoding(press) <- "UTF-8"
hobj_nodes <-
rvest::html_nodes(hobj, 'div.article_view section p,div[dmcf-ptype="general"]')
content <- rvest::html_text(hobj_nodes)
Encoding(content) <- "UTF-8"
content <- trimws(content)
content <- gsub("\r?\n|\r", " ", content)
content <- paste0("<p>", content, "<p>")
content <- paste0(content, collapse = " ")
content <- gsub("<p> <p>", " ", content)
content <- gsub("<p>", "", content)
newsInfo <-
data.frame(
url = turl,
datetime = datetime,
edittime = edittime,
press = press,
title = title,
content = content,
stringsAsFactors = F
)
} else {
newsInfo <-
data.frame(
url = "no news links",
datetime = "no news links",
edittime = "no news links",
press = "no news links",
title = "no news links",
content = "no news links",
stringsAsFactors = F
)
return(newsInfo)
}
} else {
newsInfo <-
data.frame(
url = url,
datetime = "page is moved.",
edittime = "page is moved.",
press = "page is moved.",
title = "page is moved.",
content = "page is moved.",
stringsAsFactors = F
)
}
return(newsInfo)
} else {
print("no news links")
newsInfo <-
data.frame(
url = "no news links",
datetime = "no news links",
edittime = "no news links",
press = "no news links",
title = "no news links",
content = "no news links",
stringsAsFactors = F
)
return(newsInfo)
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.