R/row_duplicator.R

Defines functions row_duplicator

Documented in row_duplicator

#' Duplicate and Split Out Rows of a dtable with Shared Values
#'
#' Given a table and a vector of length(vector) = nrow(table), duplicate each row accordingly. Optionally, one can also split up a cell based on a string and have each substring fill a unique cell in the new rows.
#' @param dtable Table (data frame, tbl, matrix) to be have rows cloned.
#' @param count.vec Vector of counts, where 1 indicates no change, 2 indicates two copies, etc. (Does not accept 0 for dropping rows).
#' @param split String on which to split unique values into. The result of calling str_count on split.col MUST give the same number of rows as the matching element of count.vec.
#' @param split.col The column index (or name) containing the string in 'split'.
#' @import stringr
#' @export
#' @keywords duplicate row
#' @examples
#' dtable <- data.frame('Col1' = 1:4, 'Col2' = LETTERS[1:4])
#'
#' dtable
#'  Col1 Col2
#'1    1    A
#'2    2    B
#'3    3    C
#'4    4    D
#'
#'row_duplicator(dtable = dtable, count.vec = c(1, 2, 2, 1))
#'   Col1 Col2
#'1     1    A
#'2     2    B
#'2.1   2    B
#'3     3    C
#'3.1   3    C
#'4     4    D
#'
#'dtable[, 2] <- c('A,B,A', 'C', 'D,E,F', 'G,H')
#'
#'dtable
#'  Col1  Col2
#'1    1 A,B,A
#'2    2     C
#'3    3 D,E,F
#'4    4   G,H
#'
#'row_duplicator(dtable = dtable, split = ',', split.col = 'Col2')
#'    Col1 Col2
#'1      1    A
#'1.1    1    B
#'1.2    1    A
#'2      2    C
#'3      3    D
#'3.1    3    E
#'3.2    3    F
#'4      4    G
#'4.1    4    H
#'
row_duplicator <- function(dtable, count.vec = NULL, split = NULL, split.col = NULL) {

  # A lot can go wrong here, so we need fairly extensive error catching.

  # Univerally applicable errors:
  if ( any(class(dtable) != 'data.frame') ) { dtable <- as.data.frame(dtable) }
  if ( all(!is.null(count.vec), !is.null(split), !is.null(split.col)) ) { stop('Please provide EITHER count.vec OR split and split.col.') }
  if ( is.null(dim(dtable)) || any(dim(dtable) == 1) ) { stop('The dtable for duplication must have at least two rows and columns.') }
  if ( !is.null(split) ) { if ( is.null(split.col) ) { stop('If "split" is specified, "split.col" must also be specified.') } }
  if ( !is.null(split.col) ) { if ( is.null(split) ) { stop('If "split.col" is specified, "split" must also be specified.') } }
  if ( all(is.null(count.vec), is.null(split)) ) { stop('You must provide at least one of count.vec or split and split.col') }


  # If count.vec is assigned:
  if ( !is.null(count.vec) ) {

    if ( nrow(dtable) != length(count.vec) ) { stop('The count vector and dtable row number must be equal.') }
    if ( !is.vector(count.vec) ) { stop('The count vector must be a vector (1D).') }

  }


  # If split/split.col is assigned:
  if ( !is.null(split) ) {

    if ( length(split.col) != 1 ) { stop('The "split.col" must be a single value.') }
    if ( is.numeric(split.col) ) { if ( !(split.col %in% 1:ncol(dtable)) ) { stop('The "split.col" value must be an actual column index.') } }

    if ( is.character(split.col) ) {

      if ( !(split.col %in% colnames(dtable)) ) {

        stop('The "split.col" name must be an actual column name.')

        split.col <-
          which(colnames(dtable) == split.col) %>%
          as.double
        # For column indexing.

        } }

    split.count <- stringr::str_count(string = dtable[, split.col], pattern = split) + 1
    # To test against any provided count.vec.

    if ( all(split.count == 1) ) {

      warning('No instances of the splitting pattern found.')
      return(dtable)
      # Throw a warning and return the table itself.

      }

    replacement <-
      stringr::str_split(string = dtable[, split.col], pattern = split) %>%
      unlist

    count.vec <- split.count
    # Always have a count.vec for downstream, now.

  }


  result <-
    lapply(X = 1:length(count.vec), FUN = function(r) { dtable[rep(r, times = count.vec[r]), ] }) %>%
    { do.call('rbind', .) }


  if ( is.null(split) ) {

    return(as_tibble(result))

  } else {

    result[, split.col] <- replacement

    return(as_tibble(result))

  }

}
danjamesadams/Dantools documentation built on Aug. 24, 2019, 6:15 p.m.