R/essay_vector.R

Defines functions essay_vector

Documented in essay_vector

#' a list of terms in a vector.
#' @param x path to student essay in the form of a/an .xml, .doc, or .docx document. An xml document must have the writing stored in a node named "essay"
#' @return a vector of terms to be sent to other functions (viz. lexical_uniqueness, mean_use_frequency, term_frequencies)
#' @export

essay_vector <- function(x){
  library(magrittr)

  that <- stopwords::stopwords(language = "en", source = "smart") %>%
    gsub("^", "^", .) %>%
    gsub("$", "$", .) %>%
    paste(., sep="", collapse = "|")

  a <- gsub("^.*[\\.]", "", x)
  b <- grepl("docx", a)

  if(b == TRUE){
    d <-  readtext::readtext(x)
    e <- d$text
    f <- grepl("Revised and Edited for Student Use", e)
    g <- ifelse(f == TRUE, gsub("^.*Use[\n]"," ", e), gsub("^.*Grade\\s[[:digit:]]"," ", e))
    h <-  gsub("[\n]", " ", g)
    k <- gsub("[[:punct:]]"," ", h)
    this <- gsub("\\s+{2,8}", " ", k)
  }
  else

  {

    this <- xml2::read_xml(x) %>%
      xml2::xml_find_all(., ".//essay") %>% #should the path be ".//essay//p" ?
      #  is using ".//essay//p, then paste(this, sep = " ", collapse = NULL) %>% print()
      xml2::xml_text(.) %>%
      gsub("[\r\n]", " ", .) %>% # checks out
      gsub("[[:punct:]]"," ", .) %>%
      gsub("\\s+{2,8}", " ", .)
  }

  other <- gsub(that, "", this) %>%  # This produces the vector equivalent to moby_word_v
    strsplit(., " ") %>%
    unlist(.)
  return(other)

}
cownr10r/teachr documentation built on Nov. 4, 2019, 9:14 a.m.