#' Highlight Text
#'
#' \code{hilight_term} - Highlight regex matching substrings.
#'
#' @param text.var The text string variable.
#' @param map A named list or two column dataframe. For lists the names are colors
#' and the vectors are the conditional matches. For dataframes the first column
#' is a color and the secon is the match condition.
#' @param grouping.var The grouping variable(s). Default \code{NULL} generates
#' one plot for all text. Also takes a single grouping variable or a list
#' of 1 or more grouping variables.
#' @param first.appearance logical. If \code{TRUE} the first regex that matches
#' a sentence will be applied, otherwise the last matching regex will be applied.
#' @param ignore.case logical. If \code{FALSE}, the pattern initial matching is
#' case sensitive and if \code{TRUE}, case is ignored during initial matching.
#' @param keep.row.order logical. If \code{TRUE} the \code{grouping.var}
#' argument is used for headings but the row text is not collapsed within the
#' \code{grouping.var}. This produces transcript like formatting.
#' @param \ldots ignored.
#' @rdname hilight
#' @importFrom data.table := .N
#' @export
#' @examples
#' \dontrun{
#'
#' ## highlight regex expressions
#' map1 <- list(
#' `#FF69B4` = c('\\bwe(\'[a-z]+)?\\b'),
#' `#7CFC00` = c('\\bhe is', "he's"),
#' yellow = 'you(\'(ll|[vr]e))?\\b',
#' gray70 = '\\btalk'
#' )
#'
#' term_regex <- with(presidential_debates_2012,
#' hilight_term(dialogue, map1, list(person, time)))
#'
#' plot(term_regex)
#'
#' ## Keep row order
#' term_regex <- with(presidential_debates_2012,
#' hilight_term(dialogue, map1, list(person, time), keep.row.order = TRUE))
#'
#' plot(term_regex)
#'
#' ## tidier
#' library(tidyverse)
#'
#' map1B <- list(
#' `orange` = c('\\bwe(\'[a-z]+)?\\b'),
#' `pink` = c('that[\'a-z]*\\b'),
#' yellow = 'you(\'(ll|[vr]e))?\\b',
#' gray = '\\bI\\b'
#' )
#'
#' presidential_debates_2012 %>%
#' dplyr::filter(person %in% c('ROMNEY', 'OBAMA')) %$%
#' hilight_term(
#' text.var = dialogue,
#' map = map1B,
#' grouping.var = person
#' ) %>%
#' plot()
#'
#' ## highlight tokens
#' map2 <- list(
#' `#FF69B4` = c('talk', 'you'),
#' `#7CFC00` = c('he', "he's", 'we\'re', 'we'),
#' yellow = 'right',
#' gray70 = c('.', '?', '!')
#' )
#'
#' token_match <- with(presidential_debates_2012,
#' hilight_token(dialogue, map2, list(person, time)))
#'
#' plot(token_match)
#'
#'
#' ## highlight sentences regex
#' map3 <- list(
#' `#FF69B4` = 'think',
#' `#7CFC00` = c('he is', "he's", 'you(\'[vr]e)?\\b')
#' )
#'
#'
#' sent_regex <- with(presidential_debates_2012,
#' hilight_sentence(dialogue, map3, list(person, time)))
#'
#' plot(sent_regex)
#'
#' ## highlight sentences index
#' set.seed(10)
#' map_index <- list(
#' yellow = sample(1:2912, 200),
#' orange = sample(1:2912, 200),
#' `#ff69b4` = sample(1:2912, 200)
#' )
#'
#' map_index[[2]] <- map_index[[2]][!map_index[[2]] %in% map_index[[1]]]
#' map_index[[3]] <- map_index[[3]][!map_index[[3]] %in% unlist(map_index[1:2])]
#'
#'
#' sent_index <- with(presidential_debates_2012,
#' hilight_sentence(dialogue, map_index, list(person, time)))
#'
#' plot(sent_index)
#'
#' }
hilight_term <- function(text.var, map, grouping.var = NULL, ignore.case = TRUE,
keep.row.order = FALSE, ...){
stopifnot(is.atomic(text.var))
if(is.null(grouping.var)){
grouping <- rep("all", length(text.var))
} else {
if (is.list(grouping.var) & length(grouping.var)>1) {
grouping <- grouping.var
} else {
grouping <- unlist(grouping.var)
}
}
if(is.null(grouping.var)) {
G <- "all"
} else {
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
G <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
x[length(x)]
}
)
} else {
G <- as.character(substitute(grouping.var))
G <- G[length(G)]
}
}
if (isTRUE(keep.row.order)){
grouping <- data.frame(roworderid = seq_along(text.var), grouping, stringsAsFactors = FALSE)
G <- c('roworderid', G)
}
## to assign classes to text and then link class to color in script
map <- collapse_map(check_map(map))
reps <- paste0(mark_start(map[['class']]), "\\1", mark_end)
text.var <- p(.mgsub(paste0("(", map[['mark']], ")"), reps,
text.var = text.var, fixed=FALSE, ignore.case = ignore.case))
dat <- data.table::data.table(stats::setNames(data.frame(
text.var = text.var,
grouping, stringsAsFactors = FALSE
), c('text.var', G)))[]
## add grouping variables as headers
expr <- parse(text=paste0('paste(', paste(G, collapse = ", "), ", sep = '.')"))[[1]]
dat <- dat[, `grouping` := h3(eval(expr))][,
list(`text.var` = paste(text.var, collapse = " ")), by = 'grouping'][,
`text.var` := paste(grouping, text.var, sep = '\n')][]
if (isTRUE(keep.row.order)){
dat <- dat[, `text.var` := gsub('^<h3>\\d+\\.', '<h3>', text.var)][]
}
body <- paste(dat[['text.var']], collapse = "\n")
out <- list(body = body, style = map_to_style(unique(map[, c('color', 'class')])), html = hilight_html)
class(out) <- 'hilight'
out
}
#' Highlight Text
#'
#' \code{hilight_token} - Highlight matching tokens.
#'
#' @rdname hilight
#' @export
hilight_token <- function(text.var, map, grouping.var = NULL, ignore.case = TRUE,
keep.row.order = FALSE, ...){
mark <- replacement <- element_id <- token_id <- NULL
stopifnot(is.atomic(text.var))
if(is.null(grouping.var)){
grouping <- rep("all", length(text.var))
} else {
if (is.list(grouping.var) & length(grouping.var)>1) {
grouping <- grouping.var
} else {
grouping <- unlist(grouping.var)
}
}
if(is.null(grouping.var)) {
G <- "all"
} else {
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
G <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
x[length(x)]
}
)
} else {
G <- as.character(substitute(grouping.var))
G <- G[length(G)]
}
}
if (isTRUE(keep.row.order)){
grouping <- data.frame(roworderid = seq_along(text.var), grouping, stringsAsFactors = FALSE)
G <- c('roworderid', G)
}
## to assign classes to text and then link class to color in script
map <- data.table::data.table(check_map(map))[,
'replacement' := paste0(mark_start(class))][]
data.table::setkey(map, 'mark')
dat <- data.table::data.table(stats::setNames(data.frame(
text.var = text.var,
grouping, stringsAsFactors = FALSE
), c('text.var', G)))
token_dat <- textshape::split_token(dat[, c('text.var')], 'text.var', lower = FALSE)[,
'mark' := low(text.var, ignore.case)][]
data.table::setkey(token_dat, 'mark')
token_dat <- map[token_dat][,
mark := ifelse(!is.na(replacement), paste0(replacement, text.var, mark_end), text.var)][
order(element_id, token_id)][,
list(text.var = p(paste(mark, collapse = " "))), by = 'element_id'][]
dat <- data.table::data.table(dat[, G, with = FALSE], token_dat[, 'text.var', with = FALSE])
## add grouping variables as headers
expr <- parse(text=paste0('paste(', paste(G, collapse = ", "), ", sep = '.')"))[[1]]
dat <- dat[, `grouping` := h3(eval(expr))][,
list(`text.var` = paste(text.var, collapse = " ")), by = 'grouping'][,
`text.var` := paste(grouping, text.var, sep = '\n')][]
if (isTRUE(keep.row.order)){
dat <- dat[, `text.var` := gsub('^<h3>\\d+\\.', '<h3>', text.var)][]
}
body <- gsub('(\\s+)(<mark class="[a-z]+">[;:,.?!])', '\\2',
paste(dat[['text.var']], collapse = "\n"), perl=TRUE)
out <- list(body = body, style = map_to_style(unique(map[, c('color', 'class')])), html = hilight_html)
class(out) <- 'hilight'
out
}
low <- function(x, lower) {if (lower) tolower(x) else x}
#' Highlight Text
#'
#' \code{hilight_sentence} - Highlight matching sentences.
#'
#' @rdname hilight
#' @export
hilight_sentence <- function(text.var, map, grouping.var = NULL,
first.appearance = TRUE, ignore.case = TRUE, keep.row.order = FALSE, ...){
mark <- element_id <- NULL
stopifnot(is.atomic(text.var))
if(is.null(grouping.var)){
grouping <- rep("all", length(text.var))
} else {
if (is.list(grouping.var) & length(grouping.var)>1) {
grouping <- grouping.var
} else {
grouping <- unlist(grouping.var)
}
}
if(is.null(grouping.var)) {
G <- "all"
} else {
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
G <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
x[length(x)]
}
)
} else {
G <- as.character(substitute(grouping.var))
G <- G[length(G)]
}
}
if (isTRUE(keep.row.order)){
grouping <- data.frame(roworderid = seq_along(text.var), grouping, stringsAsFactors = FALSE)
G <- c('roworderid', G)
}
map <- check_map(map)
if (all(is.numeric(map[['mark']]))) {
## to assign classes to text and then link class to color in script
map <- data.table::data.table(map)
data.table::setkey(map, 'mark')
class_col <- stats::setNames(map[['mark']], map[['class']])
## split text into sentences by grouping vars
dat <- data.table::data.table(stats::setNames(data.frame(
text.var = text.var,
grouping, stringsAsFactors = FALSE
), c('text.var', G)))[, `mark` := 1:.N][]
data.table::setkey(dat, 'mark')
## left join map
dat <- map[dat]
## pasting and collapsing by turn of talk
dat <- dat[, `text.var` := ifelse(
is.na(class),
text.var,
paste0(mark_start(class), text.var, mark_end))
]
dat <- dat[, `text.var` := p(text.var)][order(mark),][,
list(`text.var` = paste(text.var, collapse = "")), by = c(G)][]
} else {
## to assign classes to text and then link class to color in script
map <- collapse_map(map)
data.table::setkey(map, 'class')
class_col <- stats::setNames(map[['mark']], map[['class']])
## split text into sentences by grouping vars
dat <- stats::setNames(data.frame(
text.var = text.var,
grouping, stringsAsFactors = FALSE
), c('text.var', G))
## determine the matching class
dat <- textshape::split_sentence(dat, 'text.var')[,
`class` := mclass(text.var, pattern = class_col,
ignore.case = ignore.case, first.appearance = first.appearance)][]
data.table::setkey(dat, 'class')
## left join map
dat <- map[dat]
## pasting and collapsing by turn of talk
dat <- dat[, `text.var` := ifelse(
is.na(class),
text.var,
paste0(mark_start(class), text.var, mark_end))
][, list(`text.var` = paste(text.var, collapse = " ")), by = c(G, 'element_id', 'sentence_id')][]
dat <- dat[, list(`text.var` = paste(text.var, collapse = "")), by = c(G, 'element_id')][,
`text.var` := p(text.var)][order(element_id),][,
list(`text.var` = paste(text.var, collapse = "")), by = c(G)][]
}
## add grouping variables as headers
expr <- parse(text=paste0('paste(', paste(G, collapse = ", "), ", sep = '.')"))[[1]]
dat <- dat[, `grouping` := h3(eval(expr))][, `text.var` := paste(grouping, text.var, sep = '\n')][]
if (isTRUE(keep.row.order)){
dat <- dat[, `text.var` := gsub('^<h3>\\d+\\.', '<h3>', text.var)][]
}
body <- paste(dat[['text.var']], collapse = "\n")
out <- list(body = body, style = map_to_style(unique(map[, c('color', 'class')])), html = hilight_html)
class(out) <- 'hilight'
out
}
#' Plots a hilight Object
#'
#' Plots a hilight object
#'
#' @param x A \code{hilight} object.
#' @param path A path ending in .html to output the plot to.
#' @param open logical If \code{TRUE} and interactive, the plot will be opened
#' in the browser or RStudio.
#' @param \ldots ignored.
#' @method plot hilight
#' @export
plot.hilight <- function(x, path = file.path(tempdir(), 'hilight.html'), open = TRUE, ...){
if(isTRUE(getOption('knitr.in.progress'))){
knitout <- paste(x[['style']], x[['body']], sep = "\n")
cat(knitout)
} else {
x[['html']][grepl('^BODYHERE$', x[['html']])] <- x[['body']]
x[['html']][grepl('^STYLEHERE$', x[['html']])] <- x[['style']]
## make the file
cat(x[['html']], file = path)
if (open) {
isRStudio <- (Sys.getenv("RSTUDIO") == "1")
if(isRStudio){
rstudioapi::viewer(path)
}else {
utils::browseURL(path)
}
}
return(invisible(path))
}
}
map_to_style <- function(map){
marks <- sprintf(paste(mark_temp, collapse="\n"), map[['class']], map[['color']])
out <- sprintf(paste(hilight_style, collapse="\n"), paste(marks, collapse="\n"), "%s")
class(out) <- c("highlight_style", class(out))
out
}
hilight_style <- c("<style>", "%s", "h1 { ",
" display: block;", " font-size: 1.2em;", " margin-top: 0.0em;",
" margin-bottom: 0.0em;", " margin-left: 0;", " margin-right: 0;",
" font-weight: bold;", "}", ".indented {", " margin-left: 5%%%%;",
" margin-right: 5%%%%;", "}", "</style>")
mark_temp <- c(
"mark.%s {",
" background-color: %s;",
" color: black;",
"}"
)
hilight_html <- c("<!DOCTYPE html>", "<html lang=\"en\">", "", "<head>", "<meta charset=\"utf-8\">",
"<title>Highlighting</title>", "</head>", "", "STYLEHERE", "", "<body>", "",
"BODYHERE", "", "</body>", "", "</html>")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.