#' Tag Dropper
#' @description Drop redundant parses
#' @param texts a list of character vectors
#' @return a list of character vectors
#' @importFrom magrittr %>%
#' @import dplyr
dropRedundantTags <- function(pos_lists, sparse=0.99){
dt_pos <- data.frame(raw = (unlist(pos_lists))) %>%
mutate(token = gsub("_[A-Z]+$","",raw)) %>%
mutate(pos = gsub("^.*_","",raw)) %>%
group_by(raw) %>%
summarize(count=n(), token=first(token),pos=first(pos))
dt_keepers<- dt_pos %>%
filter(count>length(pos_lists)*(1-sparse)) %>%
filter(token%in%(token[duplicated(token)]))
dt_switchers<-dt_pos[!dt_pos$raw%in%dt_keepers$raw,]
subbed_lists<-lapply(pos_lists, function(x) unlist(plyr::mapvalues(x,
dt_switchers$raw,
dt_switchers$token,
warn_missing=F)))
return(subbed_lists)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.