#' Duplicate and Split Out Rows of a dtable with Shared Values
#'
#' Given a table and a vector of length(vector) = nrow(table), duplicate each row accordingly. Optionally, one can also split up a cell based on a string and have each substring fill a unique cell in the new rows.
#' @param dtable Table (data frame, tbl, matrix) to be have rows cloned.
#' @param count.vec Vector of counts, where 1 indicates no change, 2 indicates two copies, etc. (Does not accept 0 for dropping rows).
#' @param split String on which to split unique values into. The result of calling str_count on split.col MUST give the same number of rows as the matching element of count.vec.
#' @param split.col The column index (or name) containing the string in 'split'.
#' @import stringr
#' @export
#' @keywords duplicate row
#' @examples
#' dtable <- data.frame('Col1' = 1:4, 'Col2' = LETTERS[1:4])
#'
#' dtable
#' Col1 Col2
#'1 1 A
#'2 2 B
#'3 3 C
#'4 4 D
#'
#'row_duplicator(dtable = dtable, count.vec = c(1, 2, 2, 1))
#' Col1 Col2
#'1 1 A
#'2 2 B
#'2.1 2 B
#'3 3 C
#'3.1 3 C
#'4 4 D
#'
#'dtable[, 2] <- c('A,B,A', 'C', 'D,E,F', 'G,H')
#'
#'dtable
#' Col1 Col2
#'1 1 A,B,A
#'2 2 C
#'3 3 D,E,F
#'4 4 G,H
#'
#'row_duplicator(dtable = dtable, split = ',', split.col = 'Col2')
#' Col1 Col2
#'1 1 A
#'1.1 1 B
#'1.2 1 A
#'2 2 C
#'3 3 D
#'3.1 3 E
#'3.2 3 F
#'4 4 G
#'4.1 4 H
#'
row_duplicator <- function(dtable, count.vec = NULL, split = NULL, split.col = NULL) {
# A lot can go wrong here, so we need fairly extensive error catching.
# Univerally applicable errors:
if ( any(class(dtable) != 'data.frame') ) { dtable <- as.data.frame(dtable) }
if ( all(!is.null(count.vec), !is.null(split), !is.null(split.col)) ) { stop('Please provide EITHER count.vec OR split and split.col.') }
if ( is.null(dim(dtable)) || any(dim(dtable) == 1) ) { stop('The dtable for duplication must have at least two rows and columns.') }
if ( !is.null(split) ) { if ( is.null(split.col) ) { stop('If "split" is specified, "split.col" must also be specified.') } }
if ( !is.null(split.col) ) { if ( is.null(split) ) { stop('If "split.col" is specified, "split" must also be specified.') } }
if ( all(is.null(count.vec), is.null(split)) ) { stop('You must provide at least one of count.vec or split and split.col') }
# If count.vec is assigned:
if ( !is.null(count.vec) ) {
if ( nrow(dtable) != length(count.vec) ) { stop('The count vector and dtable row number must be equal.') }
if ( !is.vector(count.vec) ) { stop('The count vector must be a vector (1D).') }
}
# If split/split.col is assigned:
if ( !is.null(split) ) {
if ( length(split.col) != 1 ) { stop('The "split.col" must be a single value.') }
if ( is.numeric(split.col) ) { if ( !(split.col %in% 1:ncol(dtable)) ) { stop('The "split.col" value must be an actual column index.') } }
if ( is.character(split.col) ) {
if ( !(split.col %in% colnames(dtable)) ) {
stop('The "split.col" name must be an actual column name.')
split.col <-
which(colnames(dtable) == split.col) %>%
as.double
# For column indexing.
} }
split.count <- stringr::str_count(string = dtable[, split.col], pattern = split) + 1
# To test against any provided count.vec.
if ( all(split.count == 1) ) {
warning('No instances of the splitting pattern found.')
return(dtable)
# Throw a warning and return the table itself.
}
replacement <-
stringr::str_split(string = dtable[, split.col], pattern = split) %>%
unlist
count.vec <- split.count
# Always have a count.vec for downstream, now.
}
result <-
lapply(X = 1:length(count.vec), FUN = function(r) { dtable[rep(r, times = count.vec[r]), ] }) %>%
{ do.call('rbind', .) }
if ( is.null(split) ) {
return(as_tibble(result))
} else {
result[, split.col] <- replacement
return(as_tibble(result))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.