#' @title Find variable by name or label
#' @name find_var
#'
#' @description This functions finds variables in a data frame, which variable
#' names or variable (and value) label attribute match a specific
#' pattern. Regular expression for the pattern is supported.
#'
#' @param data A data frame.
#' @param pattern Character string to be matched in \code{data}. May also be a
#' character vector of length > 1 (see 'Examples'). \code{pattern} is
#' searched for in column names and variable label attributes of
#' \code{data} (see \code{\link[sjlabelled]{get_label}}). \code{pattern}
#' might also be a regular-expression object, as returned by \code{stringr::regex()}.
#' Alternatively, use \code{regex = TRUE} to treat \code{pattern} as a regular
#' expression rather than a fixed string.
#' @param ignore.case Logical, whether matching should be case sensitive or not.
#' \code{ignore.case} is ignored when \code{pattern} is no regular expression or
#' \code{regex = FALSE}.
#' @param search Character string, indicating where \code{pattern} is sought.
#' Use one of following options:
#' \describe{
#' \item{\code{"name_label"}}{The default, searches for \code{pattern} in
#' variable names and variable labels.}
#' \item{\code{"name_value"}}{Searches for \code{pattern} in
#' variable names and value labels.}
#' \item{\code{"label_value"}}{Searches for \code{pattern} in
#' variable and value labels.}
#' \item{\code{"name"}}{Searches for \code{pattern} in
#' variable names.}
#' \item{\code{"label"}}{Searches for \code{pattern} in
#' variable labels}
#' \item{\code{"value"}}{Searches for \code{pattern} in
#' value labels.}
#' \item{\code{"all"}}{Searches for \code{pattern} in
#' variable names, variable and value labels.}
#' }
#' @param out Output (return) format of the search results. May be abbreviated
#' and must be one of:
#' \describe{
#' \item{\code{"table"}}{A tabular overview (as data frame) with
#' column indices, variable names and labels of matching variables.
#' }
#' \item{\code{"df"}}{A data frame with all matching variables.}
#' \item{\code{"index"}}{
#' A named vector with column indices of all matching variables.
#' }
#' }
#' @param fuzzy Logical, if \code{TRUE}, "fuzzy matching" (partial and
#' close distance matching) will be used to find \code{pattern}
#' in \code{data} if no exact match was found.
#' @param regex Logical, if \code{TRUE}, \code{pattern} is treated as a regular
#' expression rather than a fixed string.
#'
#' @return By default (i.e. \code{out = "table"}, returns a data frame with three
#' columns: column number, variable name and variable label. If
#' \code{out = "index"}, returns a named vector with column indices
#' of matching variables (variable names are used as names-attribute);
#' if \code{out = "df"}, returns the matching variables as data frame
#'
#' @details This function searches for \code{pattern} in \code{data}'s column names
#' and - for labelled data - in all variable and value labels of \code{data}'s
#' variables (see \code{\link[sjlabelled]{get_label}} for details on variable labels and
#' labelled data). Regular expressions are supported as well, by simply using
#' \code{pattern = stringr::regex(...)} or \code{regex = TRUE}.
#'
#' @examples
#' data(efc)
#'
#' # find variables with "cop" in variable name
#' find_var(efc, "cop")
#'
#' # return data frame with matching variables
#' find_var(efc, "cop", out = "df")
#'
#' # or return column numbers
#' find_var(efc, "cop", out = "index")
#'
#' # find variables with "dependency" in names and variable labels
#' library(sjlabelled)
#' find_var(efc, "dependency")
#' get_label(efc$e42dep)
#'
#' # find variables with "level" in names and value labels
#' res <- find_var(efc, "level", search = "name_value", out = "df")
#' res
#' get_labels(res, attr.only = FALSE)
#'
#' # use sjPlot::view_df() to view results
#' \dontrun{
#' library(sjPlot)
#' view_df(res)}
#' @export
find_var <- function(data,
pattern,
ignore.case = TRUE,
search = c("name_label", "name_value", "label_value", "name", "label", "value", "all"),
out = c("table", "df", "index"),
fuzzy = FALSE,
regex = FALSE) {
# check valid args
if (!is.data.frame(data)) {
stop("`data` must be a data frame.", call. = FALSE)
}
# match args
search <- match.arg(search)
out <- match.arg(out)
if (regex) class(pattern) <- c("regex", class(pattern))
pos1 <- pos2 <- pos3 <- c()
fixed <- !inherits(pattern, "regex")
# avoid warning. For fixed=TRUE, ignore.case is ignored.
if (.is_true(fixed)) ignore.case <- FALSE
# search for pattern in variable names
if (search %in% c("name", "name_label", "name_value", "all")) {
pos1 <- which(grepl(pattern = pattern, x = colnames(data), ignore.case = ignore.case, fixed = fixed))
# if nothing found, find in near distance
if (sjmisc::is_empty(pos1) && fuzzy && !inherits(pattern, "regex")) {
pos1 <- fuzzy_grep(x = colnames(data), pattern = pattern)
}
}
# search for pattern in variable labels
if (search %in% c("label", "name_label", "label_value", "all")) {
labels <- sjlabelled::get_label(data)
pos2 <- which(grepl(pattern, x = labels, ignore.case = ignore.case, fixed = fixed))
# if nothing found, find in near distance
if (sjmisc::is_empty(pos2) && fuzzy && !inherits(pattern, "regex")) {
pos2 <- fuzzy_grep(x = labels, pattern = pattern)
}
}
# search for pattern in value labels
if (search %in% c("value", "name_value", "label_value", "all")) {
labels <- sjlabelled::get_labels(data, attr.only = FALSE)
pos3 <- which(sapply(labels, function(.x) any(grepl(pattern, x = .x, ignore.case = ignore.case, fixed = fixed)), simplify = TRUE))
# if nothing found, find in near distance
if (sjmisc::is_empty(pos3) && fuzzy && !inherits(pattern, "regex")) {
pos3 <- which(sapply(
labels,
function(.x) {
p <- fuzzy_grep(
x = .x,
pattern = pattern
)
!sjmisc::is_empty(p[1])
}, simplify = TRUE))
}
}
# get unique variable indices
pos <- unique(c(pos1, pos2, pos3))
# remove -1
pos <- pos[which(pos != -1)]
# return data frame?
if (out == "df") {
return(data[, pos, drop = FALSE])
}
# return variable labels?
if (out == "table") {
return(data_frame(
col.nr = pos,
var.name = colnames(data)[pos],
var.label = sjlabelled::get_label(data[, pos, drop = FALSE], def.value = colnames(data)[pos])
))
}
# use column names
names(pos) <- colnames(data)[pos]
pos
}
#' @rdname find_var
#' @export
find_in_data <- find_var
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.