#' Extract Discourse Connectors in Context
#'
#' Extract discourse connectors in context. This is the flexible default template
#' for modular use in specific discourse connector functions.
#'
#' @param text.var The text variable.
#' @param grouping.var The grouping variables. Also takes a single
#' grouping variable or a list of 1 or more grouping variables.
#' @param n.before The number of rows before the indexed occurrence.
#' @param tot logical. If \code{TRUE} condenses sub-units (e.g., sentences)
#' into turns of talk for that \code{grouping.var}.
#' @param n.after The number of rows after the indexed occurrence.
#' @param ord.inds logical. If \code{TRUE} inds is ordered least to greatest.
#' @param markup A character vector of length two indicating the left (element
#' 1) and right (element 2) boundary markers to use to highlight the
#' discourse connectors. Use \code{c("", "")} to not mark the discourse
#' connectors.
#' @param name A string indicating the name to search for within the internal
#' data sets, typically the function's name. Generally, for internal use.
#' @param \ldots Other arguments passed to \code{\link[qdap]{termco}}.
#' @section Arguments2: \code{discourse_connector} & \code{discourse_connector_logical}
#' require 3 arguments (passed to ellipsis or internally through the \code{name}
#' argument) that are responsible for checking for terms and naming them
#' in output. Typically \code{regex} and \code{terms} are searching for the
#' same thing but expressed as a regular expression of a simplified
#' \code{\link[qdap]{termco}} approach to terms searching. Generally, these
#' arguments are used internally but are documented here:
#' \itemize{
#' \item \code{regex} - A list of strings of or single string regular expression(s) used to search for expressions in the transcript excerpts and mark them up.
#' \item \code{terms} - A list of terms to search for in \code{\link[qdap]{termco}} and \code{\link[qdap]{dispersion plot}}.
#' \item \code{names} - A vector of names that corresponds to the number of regular expressions searched for.
#' }
#' @return Returns returns a list of 2-3:
#' \item{counts}{A \code{\link[qdap]{termco}} object of discourse connector counts.}
#' \item{Context 1}{A \code{\link[qdap]{trans_context}} object of the discourse connectors in context. Note the name of this object is supplied by \code{names} element one.}
#' \item{Context 2...n}{An optional (not returned if \code{regex} is of length one) \code{\link[qdap]{trans_context}} object of the discourse connectors in context. Note the name of this (these) object(s) is supplied by \code{names} element 2...n.}
#' @references Kalajahi, S. A. R., Abdullah, A. N., Mukundan, J., & Tannacito, D. J. (2012) \href{http://goo.gl/eS0OwV}{Discourse connectors: An overview of the history, definition and classification of the term}. \emph{World Applied Sciences Journal, 19}(11), 1659-1673.
#'
#' @keywords discourse connector
#' @export
#' @importFrom qdap trans_context termco
#' @seealso \code{\link[qdap]{termco}},
#' \code{\link[qdap]{trans_context}}
#' @rdname discourse_connector
#' @include utils.R is.isolate.R is.first_in_set.R is.within_n_preceding_words.R internal_data_bases_list.R internal_data_regex_list.R internal_data_term_list.R internal_functions_list.R
#' @examples
#' ## Marker with one type (just: "I")
#' out1 <- with(pres_debates2012[1:200, ], discourse_connector(dialogue, person,
#' names = c("I"),
#' regex = "\\bI('[a-z]+)*\\b",
#' terms = list(I = c(" I ", " I'"))
#' ))
#'
#' out1[[1]]
#' out1[[2]]
#' plot(out1)
#'
#' ## Marker with two types (both: "I" & "you")
#' out2 <- with(pres_debates2012[1:200, ], discourse_connector(dialogue, person,
#' names = c("I", "you"),
#' regex = list(
#' I = "I('[a-z]+)*\\b",
#' you = "(\\b[Yy]ou('[a-z]+)*\\b)"
#' ),
#' terms = list(
#' I = c(" I ", " I'"),
#' you = c(" you ", " you'")
#' )
#' ))
#' out2[[1]]
#' out2[[2]]
#' out2[[3]]
#'
#' ## Save externally use .doc or .txt
#' ## print(out2[[2]], file="you_I.doc")
#'
#' ## Key Words in Context
#' ## Determine top 15 words
#' topterms <- qdap::freq_terms(
#' qdap::pres_debates2012[["dialogue"]],
#' top = 20,
#' at.least = 5,
#' stopwords = c(qdapDictionaries::contractions[[1]], qdapDictionaries::Top200Words)
#' )
#'
#' ## Marker with top 15 words
#' out3 <- with(pres_debates2012, discourse_connector(dialogue, person,
#' names = c("top15"),
#' regex = list(
#' top15 = qdapRegex::pastex(qdapRegex::group(qdapRegex::bind(topterms[[1]])))
#' ),
#' terms = list(
#' top15 = qdap::spaste(topterms[[1]])
#' )
#' ))
#' out3[[1]]
#' out3[[2]]
#' plot(out3)
discourse_connector <- function(text.var, grouping.var, n.before = 1, tot = FALSE,
n.after = n.before, ord.inds = TRUE, markup = c("<<", ">>"),
name = NULL, ...){
## Grab the grouping variable name
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
x[length(x)]
}
)
group.nms <- paste(m, collapse="&")
} else {
G <- as.character(substitute(grouping.var))
group.nms <- G[length(G)]
}
## Either grab the regex, names, and terms
## from the internal source or grab elements
## from the ellipsis
myargs <- list(...)
if (!is.null(name)){
terms <- term_list[[name]]
regex <- regex_list[[name]]
if (length(regex) > 1) {
names <- names(regex)
} else {
names <- name
}
}else {
## Grab elements from ellipsis &
## remove the function control specific
## arguments from dots
regex <- myargs[["regex"]]
names <- myargs[["names"]]
terms <- myargs[["terms"]]
myargs[c("regex", "names", "terms", "markup.regex")] <- NULL
}
stopifnot(length(markup) == 2)
missing.args <- sapply(list(regex, terms, names), is.null)
if (any(missing.args)){
stop(sprintf("please supply the following arguments: %s\n\n%s",
paste(c("`regex = ??`", "`terms = ??`", "`names = ??`")[missing.args], collapse=", "),
"See section Functions: in `?discourse_connectors` for details."
))
}
discmark_helper(text.var = text.var, grouping.var = grouping.var,
n.before = n.before, tot = tot, n.after = n.after, ord.inds = ord.inds,
markup = markup, names = names, terms = terms, regex = regex,
group.nms = group.nms, myargs = myargs)
}
discmark_helper <- function(text.var, grouping.var, n.before = 1, tot,
n.after = n.before, ord.inds, markup, names, terms, regex, markup.regex,
group.nms, myargs) {
## Counts (termco)
if (!identical(myargs, structure(list(), .Names = character(0)))){
myargs[["text.var"]] <- text.var
myargs[["grouping.var"]] <- grouping.var
myargs[["match.list"]] <- terms
counts <- do.call(qdap::termco, myargs)
} else {
counts <- qdap::termco(text.var, grouping.var, terms)
}
## correct group var names
counts <- termco_group_name_replace(
x = counts,
nms = group.nms
)
## Create marked transcript excerpts
out2 <- lapply(regex, function(x){
inds <- grep(x, text.var)
## make sure there are matches for the regex
if (identical(inds, integer(0))) {
message(sprintf("The following regex did not return any indices:\n\n%s", x))
return(NULL)
}
out1 <- qdap::trans_context(text.var = text.var, grouping.var = grouping.var,
inds = inds, n.before = n.before, tot = tot, n.after = n.after,
ord.inds = ord.inds)
out1[out1[["event"]], "text"] <- gsub(
sprintf("(%s)", x),
paste0(markup[1], "\\1", markup[2]), out1[out1[["event"]], "text"]
)
out1
})
## Flatten the list
output <- setNames(unlist(list(list(counts), out2), recursive=FALSE),
c("counts", names))
class(output) <- "discourse_connector"
attributes(output)[["meta"]] <- list2env(list(text.var = text.var,
grouping.var = grouping.var, terms = terms, group.nms = group.nms))
output
}
#' Prints an discourse_connector Object
#'
#' Prints an discourse_connector object.
#'
#' @param x The discourse_connector object.
#' @param \ldots ignored.
#' @method print discourse_connector
#' @export
print.discourse_connector <- function(x, ...) {
attributes(x)[["meta"]] <- NULL
class(x) <- "list"
print(x)
}
#' Plots a discourse_connector Object
#'
#' Plots a discourse_connector object.
#'
#' @param x The discourse_connector object.
#' @param grouping.var The grouping variables. Take from \code{x} unless
#' supplied by the user.
#' @param plot logical. If \code{TRUE} the plot will automatically plot. The
#' user may wish to set to \code{FALSE} for use in \pkg{knitr}, \pkg{sweave},
#' etc. to add additional plot layers.
#' @param unlist.terms logical. If \code{TRUE} the terms will be separated rather than grouped as a single (sub)marker.
#' @param \ldots Other arguments passed to \code{\link[qdap]{dispersion_plot}}.
#' @method plot discourse_connector
#' @export
plot.discourse_connector <- function(x, grouping.var = NULL,
unlist.terms = TRUE, plot = TRUE, ...){
dat <- as.list.environment(attributes(x)[["meta"]])
dat[["terms"]] <- lapply(dat[["terms"]], function(x){
gsub("^~+|~+$", " ", gsub("\\s", "~~", tolower(x)))
})
if (isTRUE(unlist.terms)){
dat[["terms"]] <- unlist(dat[["terms"]], use.names=FALSE)
}
if (!is.null(grouping.var)) {
dat[["grouping.var"]] <- grouping.var
## Grab the grouping variable name
if (is.list(grouping.var)) {
m <- unlist(as.character(substitute(grouping.var))[-1])
m <- sapply(strsplit(m, "$", fixed=TRUE), function(x) {
x[length(x)]
}
)
dat[["group.nms"]] <- paste(m, collapse="&")
} else {
G <- as.character(substitute(grouping.var))
dat[["group.nms"]] <- G[length(G)]
}
}
out <- with(dat, qdap::dispersion_plot(match.terms = terms,
grouping.var = grouping.var, text.var = text.var, plot = FALSE, ...)) +
ggplot2::ylab(paste(sapply(unlist(strsplit(dat[["group.nms"]],
"\\&")), Caps), collapse = " & "))
if (isTRUE(plot)) {
print(out)
}
return(invisible(out))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.