Nothing
#' Separate all matching text into multiple rows
#'
#' @param txt a tibble, usually results from \code{pmc_text}
#' @param pattern either a regular expression or a vector of words to find in
#' text
#' @param column column name, default "text"
#'
#' @return a tibble
#'
#' @note passed to \code{grepl} and \code{str_extract_all}
#'
#' @author Chris Stubben
#'
#' @examples
#' # doc <- pmc_xml("PMC2231364")
#' doc <- xml2::read_xml(system.file("extdata/PMC2231364.xml",
#' package = "tidypmc"))
#' txt <- pmc_text(doc)
#' separate_text(txt, "[ATCGN]{5,}")
#' separate_text(txt, "\\([A-Z]{3,6}s?\\)")
#' # pattern can be a vector of words
#' separate_text(txt, c("hmu", "ybt", "yfe", "yfu"))
#' # wrappers for separate_text with extra step to expand matched ranges
#' separate_refs(txt)
#' separate_tags(txt, "YPO")
#'
#' @export
separate_text <- function(txt, pattern, column = "text"){
if (!is.data.frame(txt)) stop("txt should be a tibble")
if (!column %in% names(txt)) stop("column ", column, " is not found")
## paste words into | delimited string with word boundaries
if (length(pattern) > 1) {
pattern <- paste0("\\b", paste(pattern, collapse = "\\b|\\b"), "\\b")
}
x <- dplyr::filter(txt, grepl(pattern, txt[[column]]))
if (nrow(x) == 0) {
message("No match to ", pattern)
txt2 <- NULL
} else {
y <- stringr::str_extract_all(x[[column]], pattern)
y <- lapply(y, unique)
n <- vapply(y, length, integer(1))
txt2 <- dplyr::bind_cols(match = unlist(y), x[rep(seq_len(nrow(x)), n),])
}
txt2
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.