#' Test whether a data.frame is a proper subset of any defined groups within a
#' target data.frame.
#'
#' @param x Focal data.frame, reflecting a single 'group' of interest
#' @param target Target data.frame, containing one or more defined groups
#' @param group Name of the column within \code{target} defining groups
#' @param by Character vector giving the column(s) by which \code{x} and
#' \code{target} should be matched and compared.
#'
#' @return
#' Logical indicating whether \code{x} is a proper subset of any groups within
#' \code{target}
#'
#' @examples
#' target <- data.frame(
#' g = c("1", "1", "1", "2", "2", "3", "3", "4", "4", "5", "5"),
#' x1 = c("a", "a", "a", "b", "b", "a", "a", "b", "c", "a", "b"),
#' x2 = c("a", "a", "a", "b", "b", "a", "a", "b", "c", "a", "b"),
#' stringsAsFactors = FALSE
#' )
#'
#' x1 <- data.frame(
#' g = c("6", "6"),
#' x1 = c("a", "a"),
#' x2 = c("a", "a"),
#' stringsAsFactors = FALSE
#' )
#'
#' is_group_subset(x1, target, group = "g", by = c("x1", "x2"))
#'
#' x2 <- data.frame(
#' g = c("7", "7"),
#' x1 = c("a", "d"),
#' x2 = c("a", "d"),
#' stringsAsFactors = FALSE
#' )
#'
#' is_group_subset(x2, target, group = "g", by = c("x1", "x2"))
#'
#' @importFrom dplyr semi_join left_join
#' @export is_group_subset
is_group_subset <- function(x, target, by, group) {
if (!(group %in% names(target))) {
stop("Argument 'group' must be a name of a column within 'target'")
}
if (!all(by %in% names(x)) | !all(by %in% names(target))) {
stop("Both 'x' and 'target' must contain all columns specified in 'by'")
}
# check whether relevant rows of x[,by] and target[,by] are duplicated
# 'relevant' means has matching row in x/target
has_dups_x <- any(duplicated(semi_join(x[,by], target, by = by)))
has_dups_target <- any(duplicated(semi_join(target[,by], x, by = by)))
if (!has_dups_x & !has_dups_target) {
# if relevant rows of x and target all unique
x_join <- left_join(x[,by], target, by = by)
out <- .unique_value(x_join[[group]])
} else {
# else, join will result in duplicates so must use alternate method
target_split <- split(target, target[[group]])
out <- any(vapply(target_split, .is_group_subset, x = x, by = by, FALSE))
}
out
}
#' @noRd
.is_group_subset <- function(x, y, by) {
# utility to test whether 'x' is a subset of a single group within 'target'
if (nrow(x) > nrow(y)) {
# if x has more rows than y, then x is not a subset of y
is_subset <- FALSE
} else {
# else..
y$row <- 1:nrow(y) # add row id to y
i <- 1L # initiate counter for focal row of x
m <- 0L # initiate tracker for matching row from y
# loop through rows of x, looking for matching rows in y
while (!is.na(m) & i <= nrow(x)) {
m <- left_join(x[i,], y, by = by)$row[1] # find first matching row
if (!is.na(m)) y <- y[y$row != m,] # remove matching row from y
i <- i + 1L # increment i
}
is_subset <- ifelse(!is.na(m), TRUE, FALSE)
}
is_subset
}
#' @noRd
.unique_value <- function(x) {
!any(is.na(x)) & length(unique(x)) == 1L
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.