R/consolidate_duplicates.R

Defines functions consolidate consolidate_duplicates

Documented in consolidate consolidate_duplicates

#' Utility function for consolidating duplicates.
#'
#' @param df Dataframe to consolidate.
#' @param summary_parsing Functions to apply in the summarize function.
#' @param ... Columns by which to group for summarize().
#' @param by_cols Alternative argument to allow for passing through the columns by which to group.
#'
#' @import dplyr
#'  
#' @export
#'
#' @examples
#' consolidate_duplicates(mtcars, list(mean_hp = rlang::quo(mean(hp))), cyl)
consolidate <- function(df, summary_parsing, ..., by_cols = NA) {
  if (all(is.na(by_cols))) {by_cols <- rlang::enquos(...)}
  
  df %>%
    group_by(!!! by_cols) %>%
    summarize(!!! summary_parsing)
}


#' Consolidate duplicate entries
#' 
#' Consolidates the columns for which there are summary functions provided. 
#' For columns without summary functions, non-duplicated entries retain their 
#' original value, and duplicated entries return NA.
#'
#' @param df Dataframe to consolidate.
#' @param summary_parsing Functions to apply in the summarize function.
#' @param ... Columns by which to group for (summarize).
#' @param by_cols Alternative argument to allow for passing through the columns by which to group.
#'
#' @export
#'
#' @examples
#' consolidate_duplicates(mtcars, list(wt = rlang::quo(mean(wt))), hp)
consolidate_duplicates <- function(df, summary_parsing, ..., by_cols = NA){
  if (all(is.na(by_cols))) {by_cols <- rlang::enquos(...)}
  
  duplicates <- duplicated(select(df, !!! by_cols), fromLast = T) | duplicated(select(df, !!! by_cols), fromLast = F)
  
  df_duplicates <- consolidate(df[duplicates, ], summary_parsing = summary_parsing, by_cols = by_cols)
  
  df <- df[!duplicates, ] %>%
    bind_rows(df_duplicates)
  return(df)
}
julianbarg/DataAnalysisTools documentation built on April 9, 2020, 11:43 a.m.