R/getAllChapters.R

Defines functions getAllChapters

Documented in getAllChapters

#' Iterate Through All Chapters in Fanfiction Being Scraped
#'
#' @description
#' \code{getAllChapters.absentfan} Helper function of getChapter iterating through scraping chapters from specific entry in fanfiction.net.
#'
#' @param returns A list generated by getTitles including a url (of fanfiction.net) and the href of the story being queried.
#'
#' @import stringr
#' @import dplyr
#' @import qdapRegex
#'
#' @return A data frame with the scraped text and chapter labels for each entry.
#' @examples
#' getAllChapters(list(url="https://www.fanfiction.net",href="/s/13084808/1/Modern-Marauders"))
getAllChapters <- function(returns) {
  url <- returns[[1]]
  href <- returns[[2]]
  url <- paste0(url,href)
  chapters <- url %>% read_html() %>%
    html_nodes(xpath='//*[@id="chap_select"]/option') %>%
    html_attr("value")
  if(length(chapters)==0) chapters<-1
  # get the total number of chapters
  allChapters <- data.frame(text=numeric(0),
                            chapter=character(0))
  for(i in 1:length(unique(chapters))) {
    chapters <- data.frame()
    summary <- rm_between(url, str_match(url, "/[0-9]+"), gsub("[0-9]+/[0-9]+/","",str_match(url, "[0-9]+/[0-9]+/[A-Za-z]+")), extract=TRUE)
    url <- gsub(summary, paste0("/",i,"/"), url)
    returns <- list(url=url, href=href)
    print1<-as.data.frame(getChapter(returns))
    print1[2] <- i
    names(print1) <- c("text", "chapter")
    allChapters <- rbind(allChapters,print1)
  }

  return(allChapters)

}
ekmaus19/absentfan documentation built on Nov. 20, 2019, 3:20 a.m.