#' Extract Tokens from a Phrase
#'
#' Extract the tokens from a phrase.
#'
#' @param x A list/vetor of phrases
#' @param regex A regular expression to extract tokens. Default extracts tokens:
#' \code{"(?<=\\s)[A-Za-z'-]+(?=\\))"}. Use \code{"(?<=\\s)[A-Za-z'-]+(?=\\))"}
#' to extract words. Use short hand \code{regex = "@@words"} to extract words or
#' \code{regex = "@@tokens"} to extract tokens.
#' @return Returns a list of vectors of extracted tokens.
#' @keywords leaves words tokens
#' @export
#' @examples
#' \dontrun{
#' txt <- c(
#' "Really, I like chocolate because it is good. It smells great.",
#' "Robots are rather evil and most are devoid of decency.",
#' "He is my friend.",
#' "Clifford the big red dog ate my lunch.",
#' "Professor Johns can not teach",
#' "",
#' NA
#' )
#'
#' if(!exists('parse_ann')) {
#' parse_ann <- parse_annotator()
#' }
#' (x <- parser(txt, parse_ann))
#'
#' get_leaves(get_phrase_type_regex(x, "NP"))
#'
#' ## As a dplyr chain
#' library(dplyr)
#' x %>%
#' get_phrase_type_regex("NP") %>%
#' get_leaves()
#'
#' ## Just words (in this case no difference)
#' x %>%
#' get_phrase_type_regex("NP") %>%
#' get_leaves("@@words")
#' }
get_leaves <- function(x, regex = "@tokens"){
if (grepl("^@", regex)) {
regex <- switch(regex,
`@tokens` = "(?<=\\s)[A-Za-z'.?!;:-]+(?=\\))",
`@words` = "(?<=\\s)[A-Za-z'-]+(?=\\))",
stop("Use a valid regex")
)
}
lapply(x, function(y){
unlist(qdapRegex::rm_default(y, pattern=regex, extract=TRUE))
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.