Nothing
######################################################################
## CODE AND DOCUMENTATION FOR PACKAGE tuple
######################################################################
#' @name tuple-package
#' @docType package
#' @title
#' Find Matches, or Orphan, Duplicate or Triplicate Values
#' @description
#' This package extends the base R functionality around checking
#' for \code{\link[base]{unique}} and duplicate values in vectors.
#' @details
#' The following changes are documented since the first release
#' of this package on CRAN:
#' \tabular{lll}{
#' Version \tab Change \tab Description \cr
#' 0.3-06 \tab None \tab Initial release to CRAN. \cr
#' 0.4-01 \tab Added \code{\link{\%!in\%}} \tab
#' This function tests for the opposite of the commonly \cr
#' \tab \tab used testing operator \code{"\%in\%"}
#' as documented in \code{\link[base]{match}}. \cr
#' \tab Added documentation \tab
#' Added documentation for the package as a whole. \cr
#' \tab \tab Implemented this change log. \cr
#' \tab Improved documentation \tab
#' Cleaned and otherwise improved documentation \cr
#' \tab \tab that is generated by way of the \pkg{roxygen2}
#' package \cr
#' \tab \tab for existing functions. \cr
#' \tab Added \code{\link{tuplicated}} \tab
#' This function is a major addition to the package. \cr
#' \tab \tab It provides a generic way to find elements of a \cr
#' \tab \tab vector that are replicated n or more times. \cr
#' \tab \tab Fundamentally it depends only on the code for \cr
#' \tab \tab \code{\link{duplicated}} as in the first version
#' of this \cr
#' \tab \tab package released to CRAN. The implementation \cr
#' \tab \tab of \code{\link{triplicated}} has not been changed
#' in this \cr
#' \tab \tab in this update from version 0.3-06, but it will be \cr
#' \tab \tab changed to call \code{\link{tuplicated}}
#' with \code{tuple = 3} \cr
#' \tab \tab in a future release. \cr
#' \tab Added \code{\link{tuplicate}} \tab
#' This function is another major addition. It provides \cr
#' \tab \tab a generic way to find elements of a vector that are \cr
#' \tab \tab replicated exactly n times. It depends on the code \cr
#' \tab \tab for the newly-released
#' \code{\link{tuplicated}}, and on the code \cr
#' \tab \tab for \code{\link{orphan}} as in the initial package
#' released to CRAN. \cr
#' \tab \tab The implementation of
#' \code{\link{triplicate}} has not changed \cr
#' \tab \tab from version 0.3-06, but it will be changed to call \cr
#' \tab \tab \code{\link{tuplicate}} with \code{tuple = 3}
#' in a future release. \cr
#' 0.4-02 \tab Added \code{\link{matchNone}} \tab
#' This function returns a character string, based \cr
#' \tab \tab on the table, that does not appear in the data. \cr
#' }
NULL
#' @title
#' Match All Values
#' @description
#' Extends the functionality of \code{\link[base]{match}} to identify
#' all matching values, instead of just the first one.
#' @details
#' Returns an integer vector of the index in \code{table} for all
#' the matches. The result is not sorted in numerical index order when
#' more than one value is sought to be matched.
#' Instead, the matches of the first value in \code{x} are listed first,
#' followed by matches to the second value in \code{x} and so on.
#' Values of \code{NA} are treated as data.
#' @param x
#' A vector.
#' @param table
#' The lookup table as a vector.
#' @examples
#' matchAll(3, c(1:3, 3, 4:6, 3, NA, 4))
#' matchAll(3:4, c(1:3, 3, 4:6, 3, NA, 4))
#' matchAll(c(NA, 3:4), c(NA, 1:3, 3, 4:6, 3, NA, 4))
#' @keywords match
#' @seealso \code{\link[base]{match}}
#' @export
matchAll <- function(x, table) {
myMatch <- match(x, table)
myRecords <- integer()
while(length(myMatch)>0) {
myRecords <- c(myRecords, myMatch)
table[myMatch] <- ""
myMatch <- match(x, table)
if(any(is.na(myMatch))) {
x <- x[-which(is.na(myMatch))]
myMatch <- myMatch[-which(is.na(myMatch))]
}
}
if(all(is.na(myRecords))) myRecords <- NA
if(length(myRecords)>1 && any(is.na(myRecords)))
myRecords <- myRecords[!is.na(myRecords)]
return(myRecords)
}
#' @title
#' Return a Symbol That Matches No Values
#' @description
#' The tag value is chosen from among special characters so
#' that it does not appear anywhere in the reference input data.
#' The shortest possible tag is chosen.
#' @details
#' This function is used in other packages by the same author
#' to extend missing data handling in R. It provides for
#' flexible missing data identifiers where needed by an
#' S4 class, and similar unmatched identifiers for other
#' dirty data problems.
#' @param x
#' A vector or matrix.
#' @param table
#' The lookup table against which to seek non-matches.
#' This can be a simple vector, or it can be a list of
#' two vectors.
#' @return
#' A string composed of the strings in the \code{table}.
#' The default list choses the first non-matching value
#' out of 179 values that are unlikely to be used in
#' most real sets of data.
#' If only \code{table} is specified, the possible
#' values for a non-matching string, ordered from the
#' most to the least preferable, are returned.
#' @examples
#' my.x <- c(1,2,3,2,3,1,2)
#' matchNone(my.x)
#' matchNone(c(my.x,"."))
#' matchNone(c(my.x,".","!"))
#' matchNone(c(my.x,".","!","/"))
#' matchNone(c(my.x,".","!","/",".."))
#' matchNone(table = ".")
#' @export
matchNone <- function(x, table = list(c(".", "!", "/"), c("NA", "na"))) {
if(is.list(table)) my.blocks1 <- table[[1]]
else my.blocks1 <- table
my.blocks2 <- apply(expand.grid(my.blocks1, my.blocks1), 1,
function(Y) { paste(Y, collapse = "") })
if(is.list(table) && length(table)>1)
my.blocks2 <- c(my.blocks2, table[[2]])
my.blocks3 <- c(apply(expand.grid(my.blocks1, my.blocks2), 1,
function(Y) { paste(Y, collapse = "") }),
apply(expand.grid(my.blocks2, my.blocks1), 1,
function(Y) { paste(Y, collapse = "") }))
my.blocks4 <- apply(expand.grid(my.blocks1, my.blocks2,
my.blocks1), 1,
function(Y) { paste(Y, collapse = "") })
my.preferred <- c(my.blocks1, my.blocks2, my.blocks3, my.blocks4)
if(missing(x)) return(my.preferred)
if(is.factor(x)) my.data <- levels(x)
else my.data <- as.character(x)
my.in <- TRUE
my.counter <- -1
my.sample.size <- 4
while(all(my.in)) {
if(my.counter > -1) {
if(my.counter == 0)
my.preferred <-
apply(expand.grid(my.blocks1, my.blocks3, my.blocks1),
1, function(Y) { paste(Y, collapse = "") })
else my.preferred <-
paste(sample(c(letters, LETTERS, my.blocks1),
my.sample.size), collapse = "")
}
my.in <- my.preferred %in% my.data
my.counter <- my.counter + 1
if(my.counter > 1000) {
my.sample.size <- my.sample.size + 1
my.counter <- 1
}
}
return(my.preferred[!my.in][1])
}
#' @name not-in
#' @title
#' Mismatch Test
#' @description
#' Test whether some data are not in a table.
#' @details
#' This helps avoid code structures like \code{!(x \%in\% table)}.
#' @param x
#' A vector of data.
#' @param table
#' A table of reference values.
#' @keywords time, arithmetic
#' @examples
#' 1:2 %!in% 2:4
#' @keywords match
#' @seealso \code{\link[base]{match}}
#' @export
"%!in%" <- function(
x,
table
) {
!(x %in% table)
}
#' @title
#' Find Orphan Values
#' @description
#' Finds values that occur exactly once in a vector.
#' @details
#' Returns the unique values in the same order that they would be
#' returned in a call to \code{\link[base]{unique}}.
#' @param x
#' A vector.
#' @examples
#' orphan(c(NA, 1:3, 3, 4:6, 3, NA, 4))
#' @keywords orphan, unique
#' @seealso \code{\link[base]{unique}}
#' @export
orphan <- function(x) {
return(x[!(duplicated(x) | duplicated(x, fromLast = TRUE))])
}
#' @title
#' Find Duplicate Values
#' @description
#' Finds values that occur exactly twice in a vector.
#' @details
#' Returns the duplicated values in the same order that they would be
#' returned in a call to \code{\link[tuple]{orphan}}. This fundamentally
#' differs from \code{\link[base]{duplicated}}, which returns
#' a logical vector that is \code{TRUE} when it runs into any but
#' the first occurrence of a value (and is therefore dependent on
#' the direction of testing of the vector).
#' @param x
#' A vector.
#' @examples
#' duplicate(c(NA, 1:3, 3, 4:6, 3, NA, 4))
#' @keywords duplicate, duplicated, repeat, repeated
#' @seealso \code{\link[base]{unique}} for similar output, and
#' \code{\link{duplicated}} for the underlying calculations
#' @export
duplicate <- function(x) {
return(x[duplicated(x)][!(duplicated(x[duplicated(x)]) |
duplicated(x[duplicated(x)], fromLast=TRUE))])
}
#' @title
#' Find Values That Are Repeated At Least Thrice
#' @description
#' Finds values that are repeated at least three times in a vector.
#' @details
#' Returns a logical vector that is \code{TRUE} when it runs into
#' any but the first or second occurrences of a value, analogous
#' to \code{\link[base]{duplicated}}.
#' @param x
#' A vector.
#' @param fromLast
#' A logical indicating if triplication should be considered from
#' the reverse side, i.e., the two last (or rightmost) of identical
#' elements would return \code{FALSE}.
#' @param ...
#' Other optional arguments are ignored.
#' @examples
#' triplicated(c(NA, 1:3, 3, 4:6, 3, NA, 4, 3))
#' @keywords triplicate, triplicated, repeat, repeated
#' @seealso \code{\link[base]{duplicated}}
#' @export
triplicated <- function(x, ..., fromLast = FALSE) {
retVec <- rep(FALSE, length(x))
retVec[which(duplicated(x, fromLast = fromLast))[
which(duplicated(x[which(duplicated(x, fromLast = fromLast))],
fromLast = fromLast))]] <- TRUE
return(retVec)
}
#' @title
#' Find Triplicate Values
#' @description
#' Finds values that occur exactly three times in a vector.
#' @details
#' Returns the triplicated values in the same order that they would be
#' returned in a call to \code{\link{orphan}}. This fundamentally
#' differs from \code{\link{triplicated}}, which returns
#' a logical vector that is \code{TRUE} when it runs into any but
#' the first or second occurrences of a value (and is therefore
#' dependent on the direction of testing of the vector).
#' @param x
#' A vector.
#' @examples
#' triplicate(c(NA, 1:3, 3, 4:6, 3, NA, 4))
#' triplicate(c(NA, 1:3, 3, 4:6, 3, NA, 4, 3))
#' @keywords triplicate, triplicated, repeat, repeated
#' @seealso \code{\link{duplicate}}
#' @export
triplicate <- function(x) {
return(x[triplicated(x)][!(duplicated(x[triplicated(x)]) |
duplicated(x[triplicated(x)], fromLast = TRUE))])
}
#' @title
#' Find Elements That Are Repeated At Least n Times
#' @description
#' Finds elements that are repeated at least n times in a vector.
#' @details
#' Returns a logical vector that is \code{TRUE} when it runs into
#' any but the \code{(n-1)}-st occurrences of an element, analogous
#' to \code{\link[base]{duplicated}}.
#' @param x
#' A vector.
#' @param n
#' An integer.
#' @param fromLast
#' A logical indicating if n-replication should be considered
#' from the right side of the vector. If \code{TRUE},
#' the \code{n-1} last (or rightmost) of replicated
#' identical elements return \code{FALSE}.
#' @param ...
#' Other optional arguments are ignored.
#' @examples
#' x <- c(NA, 1:3, 4:5, rep(6, 6), 3, NA, 4, 3, 3)
#' all(tuplicated(x, 3) == triplicated(x))
#' @keywords n-replicate, n-replicated, repeat, repeated, match
#' @seealso \code{\link[base]{duplicated}}
#' @export
tuplicated <- function(x, n, ..., fromLast = FALSE) {
if(missing(n)) {
warning(paste("Parameter 'n' not specified.",
"Triplicate checking presumed."))
n <- 3
} else if(!is.numeric(n) || length(n) != 1)
stop("Parameter 'n' must be numeric of length 1.")
else if(n < 2)
stop("Parameter 'n' must be >= 2.")
else if(as.integer(n) != n) {
warning(paste("Parameter 'n' is not an integer (",
n, "). Truncating to ",
as.integer(n), ".", sep = ""))
n <- as.integer(n)
}
if(n == 2)
return(duplicated(x, fromLast = fromLast))
retVec <- rep(FALSE, length(x))
retVec[which(tuplicated(x, n - 1, fromLast = fromLast))[
which(duplicated(x[which(
tuplicated(x, n - 1, fromLast = fromLast))],
fromLast = fromLast))]] <- TRUE
return(retVec)
}
#' @title
#' Find n-Replicated Elements
#' @description
#' Finds elements that occur exactly n times in a vector.
#' @details
#' Returns the n-replicated elements in the same order that they would be
#' returned in a call to \code{\link{orphan}}. This fundamentally
#' differs from \code{\link{tuplicated}}, which returns
#' a logical vector that is \code{TRUE} when it runs into any but
#' the \code{(n-1)}-st and fewer occurrences of an element
#' (and is therefore dependent on the direction of testing of the vector).
#' @inheritParams tuplicated
#' @examples
#' x <- c(NA, 1:3, 4:5, rep(6, 6), 3, NA, 4, 3, 3)
#' lapply(2:6, function(X) { tuplicate(x, X) })
#' @keywords n-replicate, n-replicated, repeat, repeated, match
#' @seealso \code{\link{duplicate}}
#' @export
tuplicate <- function(x, n) {
if(missing(n)) {
warning(paste("Parameter 'n' not specified.",
"Triplicate checking presumed."))
n <- 3
} else if(!is.numeric(n) || length(n) != 1)
stop("Parameter 'n' must be numeric of length 1.")
else if(n < 2)
stop("Parameter 'n' must be >= 2.")
else if(as.integer(n) != n) {
warning(paste("Parameter 'n' is not an integer (",
n, "). Truncating to ",
as.integer(n), ".", sep = ""))
n <- as.integer(n)
}
if(n == 2)
return(duplicate(x))
return(orphan(x[tuplicated(x, n)][
!(tuplicated(x[tuplicated(x, n)], n - 1) |
tuplicated(x[tuplicated(x, n)], n - 1, fromLast = TRUE))]))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.