#' Sentence match retriever
#'
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("experimental")}
#'
#' @description The function extract full sentence-match for a (set of significant) collocate(s) for a given nodeword.
#' @param collout List output of \code{\link{colloc_leipzig}}.
#' @param colloc Character vector of the collocate(s) whose sentence match(es) to be retrieved.
#' @param wspan Character vector of the window span in which the collocates occur. Default to \code{NULL}, which will retrieve the collocate's occurrence in all span.
#' @param nodeword Character vector specifying one of the nodewords if search parameter in \code{\link{colloc_leipzig}} includes more than one nodeword.
#' @param sampled Integer vector indicating the number of random sample of the sentence match to be retrieve. Default to \code{NULL}, which will retrieve all sentence-matches.
#'
#' @return Character vector of sentence-match(es).
#'
#' @seealso \code{\link{colloc_sentmatch_tagged}} for tagged and data frame version of the output.
#'
#' @export
#'
#' @importFrom assertthat assert_that
#'
#' @examples
#' collout <- colloc_leipzig(leipzig_corpus_list = demo_corpus_leipzig,
#' pattern = "mengatakan",
#' window = "r",
#' span = 3,
#' save_interim = FALSE)
#'
#' colloc_sentmatch(collout,
#' colloc = "bahwa",
#' sampled = 10)
#'
#' # This will produce message indicating that
#' # the queried sample number is higher than
#' # the sentence match for "akan"
#' colloc_sentmatch(collout,
#' colloc = "akan",
#' sampled = 10)
colloc_sentmatch <- function(collout, colloc = NULL, wspan = NULL, nodeword = NULL, sampled = NULL) {
assertthat::assert_that(!is.null(colloc),
msg = "`colloc` argument cannot be NULL. Specify with one or more collocates!")
w <- dplyr::quo(w)
span <- dplyr::quo(span)
node <- dplyr::quo(node)
if(is.null(nodeword)) {
if(!is.null(wspan)) {
sent <- subset(collout[[1]], !!w %in% colloc & !!span %in% wspan)$sent_match
} else {
sent <- subset(collout[[1]], !!w %in% colloc)$sent_match
}
} else if (!is.null(nodeword)) {
if(!is.null(wspan)) {
sent <- subset(collout[[1]], !!w %in% colloc & !!node %in% nodeword & !!span %in% wspan)$sent_match
} else {
sent <- subset(collout[[1]], !!w %in% colloc & !!node %in% nodeword)$sent_match
}
}
if(!is.null(sampled)) {
if(length(sent) > sampled) {
return(sample(sent, sampled, TRUE))
} else {
warning(paste("Returning all ", length(sent),"matches!\n Length of matches (",
length(sent),
") is lower than the number of the queried sample (",
sampled,
").\n",
sep = ""))
return(sent)
}
} else if(is.null(sampled)) {
return(sent)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.