inst/doc/extract_meta_data.R

## ----setup, eval=FALSE---------------------------------------------------
#  # load required packages
#  library(mediacloudr)
#  library(httr)
#  library(xml2)
#  library(rvest)

## ----get_article, eval=FALSE---------------------------------------------
#  # define media id as integer
#  story_id <- 1126843780L
#  # download article
#  article <- get_story(story_id = story_id)

## ----download_website, eval=FALSE----------------------------------------
#  # download article
#  response <- GET(article$url[1])
#  # extract article html
#  html_document <- content(response, type = "text", encoding = "UTF-8")
#  # parse website
#  parsed_html <- read_html(html_document)

## ----body_content, eval=FALSE--------------------------------------------
#  # extract article body
#  article_body_nodes <- html_nodes(x = parsed_html, css = ".content-well div p")
#  article_body <- html_text(x = article_body_nodes)
#  # paste character vector to one text
#  article_body <- paste(article_body, collapse = " ")

## ----get_meta_data, eval=FALSE-------------------------------------------
#  # extract meta data from html document
#  meta_data <- extract_meta_data(html_doc = html_document)

Try the mediacloudr package in your browser

Any scripts or data that you put into this service are public.

mediacloudr documentation built on July 24, 2019, 9:03 a.m.