#' Find duplicate keys in a dataframe.
#'
#' Return a vector of the duplicate keys within a dataframe. Filters out NA
#' values before determining duplicates.
#'
#' @param df The dataframe to check for duplicate keys.
#' @param ... The column(s) to check for duplicates.
#'
#' @return A tibble containing the duplicate keys and a count of the number of
#' duplicates.
#'
#' @examples
#' ## dataframe to convert
#' key <- c(1000L, 2000L, 3000L, 4000L, 1000L)
#' amount <- c("46.41", "118.11", "84.68", "493.59", "51.10")
#'
#' test_df <- data.frame(key, amount)
#'
#' ## which keys are duplicated
#' duplicate_keys(df = test_df, "key")
#'
#' @export
duplicate_keys <- function(df, ...) {
df %>%
dplyr::group_by_(...) %>%
dplyr::tally() %>%
dplyr::filter_(~n > 1)
}
#' Match column name.
#'
#' \code{match_cols} returns matching column names from a table. The matching
#' is performed using regular expressions and \link[base]{grepl}.
#'
#' Ignores case by default. Can be overridden by passing \code{ignore.case} via
#' \code{...}.
#'
#' @param tbl A \link[tibble]{tibble} or dataframe.
#' @param patt Regular expression pattern to pass to \link[base]{grepl}.
#' @param ... Parameters to pass along to \link[base]{grepl}.
#'
#' @return A character vector of column names matching the regex.
#'
#' @examples
#' ## dataframe to convert
#' key <- c(1000L, 2000L, 3000L, 4000L, 1000L)
#' amount <- c("46.41", "118.11", "84.68", "493.59", "51.10")
#'
#' test_df <- data.frame(key, amount)
#'
#' # column names
#' my_col <- "^am"
#'
#' # use
#' test_df %>%
#' match_cols(my_col)
#'
#' @export
match_cols <- function(tbl, patt, ...) {
cols <- dplyr::tbl_vars(tbl)
purrr::keep(.x = cols,
.p = grepl(patt, cols, ignore.case = TRUE, ...))
}
#' List exact names of columns.
#'
#' \code{exact_cols} returns the exact column names, including the case.
#'
#' This particularly useful when working with Microsoft SQL Server tables as
#' the column names may be case sensitive. This is also useful when working
#' with table metadata and the exact column names are required for matching
#' purposes.
#'
#' \link[dplyr]{tbl_vars} does not return case sensitive columns, thus the
#' the need for this function.
#'
#' @param tbl A \link[tibble]{tibble} or dataframe.
#' @param cols A character vector of column names.
#'
#' @return A character vector of column names with the appropriate case.
#'
#' @examples
#' ## create tibble
#' my_tibble <- tibble::tribble(
#' ~MixedCase, ~lowercase, ~UPPERCASE, ~aLtCaSe,
#' 1, 11, 111, 1111,
#' 2, 22, 222, 2222,
#' 3, 33, 333, 3333
#' )
#'
#' ## column names to search for, ignoring case
#' my_cols <- c("mixedcase", "altcase")
#'
#' my_cols_case <- my_tibble %>%
#' exact_cols(my_cols)
#'
#' ## particularly useful when combined with dplyr::one_of
#' my_tibble %>%
#' dplyr::select(dplyr::one_of(my_cols_case))
#'
#' @export
exact_cols <- function(tbl, cols) {
purrr::map_chr(
cols,
function(col) dplyr::tbl_vars(tbl)[match(tolower(col),
tolower(dplyr::tbl_vars(tbl)))]
)
}
#' Column names and types in a dataframe.
#'
#' Given a dataframe, \code{types_df} returns a \link[tibble]{tibble} with the
#' column names and types.
#'
#' @param df A dataframe.
#'
#' @return A \link[tibble]{tibble} with the column names and types.
#'
#' @examples
#' (mtcars %>%
#' types_df())
#'
#' @export
types_df <- function(df) {
df %>%
purrr::dmap(class) %>%
tidyr::gather(key = "col_name", value = "col_type") %>%
dplyr::distinct_("col_name", .keep_all = TRUE) %>%
dplyr::mutate(col_type = dplyr::case_when(
.$col_type == "POSIXct" ~ "datetime",
.$col_type == "POSIXt" ~ "time",
.$col_type == "Date" ~ "date",
TRUE ~ .$col_type
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.