
#' Search metadata for search terms using regex (more powerful than searching
#'    online without regex).
#' @import RCurl XML stringr
#' @param input Dryad metadata list, from e.g., getalldryad_metadata function,
#'    or load xml from directory (in which case, provide directory)
#' @param terms search terms e.g., 'plants', 'Whickam'
#' @param fuzzy (logical) do fuzzy search, TRUE (uses agrep) or FALSE (uses grep)
#' @param ignorecase (logical) if FALSE, pattern matching is case sensitive, and if
#'    TRUE, case is ignored during matching
#' @param value (logical) if FALSE, a vector containing integer (row) indices of the
#'    matches returned, and if TRUE, a vector containing the matching elements
#'    themselves is returned
#' @param maxdistance maximum distance allowed for a match. As integer, OR fraction
#'    of the pattern length, OR a list with possible entries:
#'    all (max. overall distance), insertions (max. number/fraction of
#'    insertions), deletions (max. number/fraction of deletions),
#'    and substitutions (max. number/fraction of substitutions)
#' @param loc where you want to search, any of title, creator, description, date, type,
#'    identifier, relation, OR 'all' for search over all metadata fields
#' @details Input is a Dryad metadata data frame from function getalldryad_metadata,
#'    or from directory (if latter, give path with arg 'input').
#' @return A numeric vector of OAI identifier's for datasets that match search.
#' @export
#' @examples \dontrun{
#' # Search data.frame in R
#' mymetdata <- getalldryad_metadata(T, progress = 'text', T, '/Mac/R_stuff/Blog_etc/Dryad/')
#' search_dryad(mymetdata, 'map', fuzzy=F, loc='type', maxdistance='all')
#' search_dryad(mymetdata, 'asddddf', fuzzy=T, loc='all')
#' search_dryad(mymetdata, 'clustal', fuzzy=F, ignorecase=T, value=F, loc='all')
#' # Or search from a saved data.frame on file
#' search_dryad('/Mac/R_stuff/Blog_etc/Dryad/dryadmetadata.csv', 'me', fuzzy=T)
#' }
search_dryad <-
function(input, terms, fuzzy = "FALSE", ignorecase = "TRUE",
    value = "FALSE", maxdistance = 0.1, loc = "all") {
    searchdf <- function(x, terms) {
        identifier.1 <- "NA"
        # The above two non-sense lines are to allow check() to ignore the global var warning.
        if (loc == "all") {
            if (fuzzy == "TRUE") {
                rowslist <- apply(x, 2, agrep, pattern = terms, = ignorecase,
                  value = value, max.distance = maxdistance)
            } else if (fuzzy == "FALSE") {
                rowslist <- apply(x, 2, grep, pattern = terms, = ignorecase,
                  value = value)
        } else if (!loc == "all") {
            dat <-, "[a-z]+"))
            dat[, 1] <- as.character(dat[, 1])
            dat$rows <- rownames(dat)
            cols_ <- as.numeric(dat[dat[, 1] %in% loc, 2])
            if (fuzzy == "TRUE") {
                rowslist <- as.list(apply(data.frame(x[, cols_], x[,
                  cols_]), 2, agrep, pattern = terms, = ignorecase,
                  value = value, max.distance = maxdistance))
            } else if (fuzzy == "FALSE") {
                rowslist <- as.list(apply(data.frame(x[, cols_], x[,
                  cols_]), 2, grep, pattern = terms, = ignorecase,
                  value = value))

        if (class(try(, rowslist), silent = T)) %in% "try-error") {
            stop("Awwwww snap. No datasets contain your search results")
        } else {
            rows <-, rowslist)
        rowsus <- sort(unique(rows))
        ids <- subset(x, rownames(x) %in% rowsus, identifier.1)
        oais <- as.numeric(apply(ids, 1, function(x) str_split(as.character(x),
        oais_ <- oais

    if (class(input) == "data.frame") {
        oais_ <- searchdf(input, terms)
        oais_ <- oais_[!]
    } else if (class(input) == "character") {
        xdf <- read.csv(input)
        oais_ <- searchdf(xdf, terms)
        oais_ <- oais_[!]
    } else {
        stop("Error: input must be one of class data.frame or directory-file\nlocation\n or file name if in directory already")

Try the rdryad package in your browser

Any scripts or data that you put into this service are public.

rdryad documentation built on July 2, 2020, 3:13 a.m.