R/synonyms.R

Defines functions rcst qdap_recoder seqs synonyms_frame synonyms

Documented in synonyms synonyms_frame

#' Search For Synonyms
#' 
#' \code{synonyms} - Search for synonyms that match term(s).
#' 
#' @param terms The terms to find synonyms for.  
#' @param return.list logical.  If \code{TRUE} returns the output for multiple 
#' synonyms as a list by search term rather than a vector.
#' @param multiwords logical.  IF \code{TRUE} retains vector elements that 
#' contain phrases (defined as having one or more spaces) rather than a single 
#' word.
#' @param report.null logical.  If \code{TRUE} reports the words that no match 
#' was found at the head of the output.
#' @return Returns a list of vectors or vector of possible words that match 
#' term(s).
#' @param synonym.frame A dataframe or hash key of positive/negative words and weights.
#' @rdname synonyms
#' @references The synonyms dictionary (see \code{\link[qdapDictionaries]{key.syn}}) was 
#' generated by web scraping the Reverso (https://dictionary.reverso.net/english-synonyms/) Online Dictionary.
#' The word list fed to Reverso
#' is the unique words from the combination of \code{\link[qdapDictionaries]{DICTIONARY}} 
#' and \code{\link[qdapDictionaries]{labMT}}.
#' @export
#' @importFrom qdapTools hash hash_look %hl%
#' @examples
#' \dontrun{
#' synonyms(c("the", "cat", "job", "environment", "read", "teach"))
#' head(syn(c("the", "cat", "job", "environment", "read", "teach"), 
#'     return.list = FALSE), 30)
#' syn(c("the", "cat", "job", "environment", "read", "teach"), multiwords = FALSE)
#'
#' ## User defined synonym lookup
#' syn_dat <- list(
#'     like = list(c("want", "desire"), c("love", "care")),
#'     show = list(c("reveal"), c("movie", "opera")),
#'     R = c("old friend", "statistics language")
#' )
#' 
#' synonyms_frame(syn_dat)
#' syn(c("R", "show"), synonym.frame = syn_frame(syn_dat))
#' 
#' syns.hash <- syn_frame(syn_dat, prior.frame = qdapDictionaries::key.syn)
#' syn(c("R", "show", "like", "robot"), synonym.frame = syns.hash)
#' }
synonyms <- function(terms, return.list = TRUE, 
    multiwords = TRUE, report.null = TRUE, 
    synonym.frame = qdapDictionaries::key.syn){

    `%hlqdap%` <- qdapTools::`%hl%`        
    
    z <- terms %hlqdap% synonym.frame
    out <- lapply(z, rcst)

    if (!multiwords){
        out <- lapply(out, function(a) {
            lapply(a, function(y) {
                mults <- grepl("\\s", y)
                if (any(mults)){
                    y <- y[!mults]
                }
                return(y)
            })})
    }

    names(out) <- terms
    nulls <- sapply(lapply(out, is.na), sum) > 0

    if (report.null & any(nulls)) {
        message("no match for the following:\n")
        message(paste(names(nulls)[nulls], collapse = ", "))
        message("========================\n")
    }
    if (return.list) {
        unlist(out[!nulls], recursive = FALSE)
    } else {
        outs2 <- unlist(out[!nulls])
        names(outs2) <- NULL
        unique(outs2)
    }
}




#' @rdname synonyms
#' @export
syn <- synonyms

#' Search For Synonyms
#' 
#' \code{synonyms_frame} - Generate a synonym lookup hash key 
#' for use with the \code{synonym.frame} argument in the \code{synonym} 
#' function.
#' 
#' @param synonym.list A named list of lists (or vectors) of synonyms.
#' @param prior.frame A prior synonyms data.frame in the format produced by 
#' \code{synonyms_frame}.
#' @export
#' @importFrom qdapTools list2df
#' @rdname synonyms
synonyms_frame <- function(synonym.list, prior.frame) {
   
    synonym.list <- lapply(synonym.list, function(x) {
        if(is.list(x)) {
            x
        } else {
            list(x)
        }
    })
    phase1 <- lapply(synonym.list, lapply, paste, collapse = ", ")
    phase2 <- mapply(seqs, phase1, lapply(phase1, function(x) 1:length(x))) 
    phase3 <- list2df(lapply(phase2, paste, collapse = " @@@@ "), 
        col2 = "word", col1 = "match.string")[2:1]
    phase3[] <- lapply(phase3, as.character)

    if (!missing(prior.frame)) {

        class(prior.frame) <- "data.frame"
        suppressWarnings(colnames(prior.frame) <- colnames(phase3))

        phase3 <- data.frame(rbind(phase3, 
            prior.frame[!prior.frame[, "word"] %in%  phase3[, "word"], ]
        ), stringsAsFactors = FALSE)
    }
    hash(phase3)
}

#' @rdname synonyms
#' @export
syn_frame <- synonyms_frame

seqs <- function(x, y) sprintf("[%s]%s", y, x)

qdap_recoder <- function(x, envr, missing){                               
    x <- as.character(x)                                                         
    sapply(x, hash_look, USE.NAMES = FALSE, envir=envr, missing = missing)                       
}   


rcst <- function(x) {  
    if (is.na(x)) return(NA)            
    y <- c(sapply(strsplit(x, "@@@@"), Trim))
    nms <- bracketXtract(y, "square")
    y <- bracketX(y, "square")
    names(y) <- paste0("def_", nms)
    lapply(lapply(y, strsplit, "\\,"), function(x){
        Trim(unlist(x))
    })
}
trinker/qdap documentation built on Sept. 30, 2020, 6:28 p.m.