R/get-wp-posts-after.R

Defines functions get_wp_posts_after

Documented in get_wp_posts_after

#' @title Retrieve WordPress Posts After a Certain Date
#'
#' @description Retrieve posts made on the WordPress site.
#'
#' @param root_url The WordPress site for which posts are sought to be retrieved.
#' @param after_date The date after which posts should be returned. Should be
#' in "YYYY-MM-DD" format
#' @return A data frame returning the post ID, publication date, title, excerpt,
#' content, tag IDs,category IDs, and author IDs.
#'
#'@examples
#' \dontrun{
#'get_wp_posts(root_url = 'https://domain.com',post_count = 200, after_date = NULL)
#'}
#'
#' @export get_wp_posts_after
#' @import tibble
#' @import httr
#' @import dplyr
#' @importFrom glue glue
#' @importFrom glue glue_collapse

get_wp_posts_after <- function(root_url, after_date) {
  response <- list(list(1),list(1),list(status = 1))
  n <- 1
  posts_real <- tibble()
  after_date <- after_date %>% as.character() %>% paste0("T00:00:00")

  while (length(response) > 0 & response[[3]]$status != 400) {
    response <- content(GET(paste0(root_url,'/wp-json/wp/v2/posts?per_page=100&page=',n,'&after=',after_date,'&orderby=id&order=desc'),accept_json()))

    if(length(response) > 0 & response[[3]]$status != 400) {
      for(k in 1:length(response)) {
        response_df <- tibble(id = response[[k]]$id, date = response[[k]]$date, url = response[[k]]$guid$rendered,
                              title = response[[k]]$title$rendered, content = response[[k]]$content$rendered,
                              author = response[[k]]$author)
        response_tags <- c()
        if(length(response[[k]]$tags) > 0) {
          for(i in 1:length(response[[k]]$tags)) {
            itag = response[[k]]$tags[[i]]
            response_tags <- c(response_tags,itag)
          }
          rtg <- response_tags %>% glue_collapse(sep = ',', last = ',')
        }
        if(length(response[[k]]$tags) == 0) {
          rtg = ''
        }
        response_cats <- c()
        if(length(response[[k]]$categories) > 0) {
          for(i in 1:length(response[[k]]$categories)) {
            icat = response[[k]]$categories[[i]]
            response_cats <- c(response_cats,icat)
          }
          rtc <- response_cats %>% glue_collapse(sep = ',', last = ',')
        }
        if(length(response[[k]]$tags) == 0) {
          rtg = ''
        }
        if(length(response[[k]]$categories) == 0) {
          rtc = ''
        }
        response_df <- response_df %>% mutate(tags = rtg, categories = rtc)
        posts_real <- bind_rows(posts_real,response_df %>% mutate_at(vars(tags,categories),as.character))
      }
      n <- n + 1
    }
    else(print(paste0('out of content after ',n,' pages')))
  }
  return(posts_real)

}

Try the wordpressr package in your browser

Any scripts or data that you put into this service are public.

wordpressr documentation built on June 2, 2021, 1:06 a.m.