R/disambiguate.R

Defines functions col_rename explain_col_rename compute_disambiguate_cols_recipe get_table_colnames dm_disambiguate_cols_impl dm_disambiguate_cols

Documented in dm_disambiguate_cols

#' Resolve column name ambiguities
#'
#' @description
#' This function ensures that all columns in a `dm` have unique names.
#'
#' @details
#' The function first checks if there are any column names that are not unique.
#' If there are, those columns will be assigned new, unique, names by prefixing their existing name
#' with the name of their table and a separator.
#' Columns that act as primary or foreign keys will not be renamed
#' because only the foreign key column will remain when two tables are joined,
#' making that column name "unique" as well.
#'
#' @inheritParams dm_add_pk
#' @inheritParams rlang::args_dots_empty
#' @param .sep The character variable that separates the names of the table and the names of the ambiguous columns.
#' @param .quiet Boolean.
#'   By default, this function lists the renamed columns in a message, pass `TRUE` to suppress this message.
#' @param .position
#'   `r lifecycle::badge("experimental")`
#'   By default, table names are appended to the column names to resolve conflicts.
#'   Prepending table names was the default for versions before 1.0.0,
#'   use `"prefix"` to achieve this behavior.
#'
#' @return A `dm` whose column names are unambiguous.
#'
#' @examplesIf rlang::is_installed("nycflights13")
#' dm_nycflights13() %>%
#'   dm_disambiguate_cols()
#' @export
dm_disambiguate_cols <- function(dm, .sep = ".", ..., .quiet = FALSE,
                                 .position = c("suffix", "prefix")) {
  check_not_zoomed(dm)
  check_dots_empty()
  .position <- arg_match(.position)
  dm_disambiguate_cols_impl(
    dm,
    tables = NULL, sep = .sep, quiet = .quiet,
    position = .position
  )
}

dm_disambiguate_cols_impl <- function(dm, tables, sep = ".", quiet = FALSE, position = "suffix") {
  table_colnames <- get_table_colnames(dm, tables, exclude_pk = FALSE)
  recipe <- compute_disambiguate_cols_recipe(table_colnames, sep = sep, position = position)
  if (!quiet) explain_col_rename(recipe)
  col_rename(dm, recipe)
}

get_table_colnames <- function(dm, tables = NULL, exclude_pk = TRUE) {
  def <- dm_get_def(dm)

  if (!is.null(tables)) {
    def <- def[def$table %in% tables, ]
  }

  table_colnames <-
    tibble(table = def$table, column = map(def$data, colnames)) %>%
    unnest_col("column", character())

  if (exclude_pk) {
    pks <- dm_get_all_pks_def_impl(def)

    keep_colnames <-
      pks[c("table", "pk_col", "autoincrement")] %>%
      set_names(c("table", "column", "autoincrement")) %>%
      unnest_col("column", character())

    table_colnames <-
      table_colnames %>%
      # in case of flattening, the primary key columns will never be responsible for the name
      # of the resulting column in the end, so they do not need to be disambiguated
      anti_join(keep_colnames, by = c("table", "column"))
  }

  table_colnames
}

#' create a disambiguation recipe tibble
#'
#' It will contain :
#'   * table: the table name
#'   * renames: a list of named symbols to be substituted in
#'     `db_rename(dm, tbl, new = old)`
#'   * name and a list of tibbles containing character cols `new_name` and `column`
#'     that will be used to print`db_rename` instructions through explain_col_rename
#' @param table_colnames a table containing table name and col names of dm
#' @param sep separator used to create new names for dupe cols
#' @noRd
#' @autoglobal
compute_disambiguate_cols_recipe <- function(table_colnames, sep, position = "suffix") {
  dupes <- vec_duplicate_detect(table_colnames$column)
  dup_colnames <- table_colnames[dupes, ]

  if (position == "prefix") {
    dup_colnames$new_name <- paste0(dup_colnames$table, sep, dup_colnames$column)
  } else {
    dup_colnames$new_name <- paste0(dup_colnames$column, sep, dup_colnames$table)
  }

  dup_data <- dup_colnames[c("new_name", "column")]
  dup_data$column_sym <- syms(dup_data$column)

  dup_nested <-
    vec_split(dup_data, dup_colnames$table) %>%
    set_names("table", "renames")

  dup_nested$names <- map(dup_nested$renames, select, new_name, column)
  dup_nested$renames <- map(dup_nested$renames, ~ deframe(select(., -column)))
  as_tibble(dup_nested)
}


#' Describe renaming of cols by printing code
#'
#' @param recipe created by `compute_disambiguate_cols_recipe`
#' @noRd
#' @autoglobal
explain_col_rename <- function(recipe) {
  if (nrow(recipe) == 0) {
    return()
  }

  disambiguation <-
    recipe %>%
    unnest(names) %>%
    mutate(text = glue("dm_rename({tick_if_needed(table)}, {tick_if_needed(new_name)} = {tick_if_needed(column)})")) %>%
    pull(text)

  message("Renaming ambiguous columns: %>%\n  ", glue_collapse(disambiguation, " %>%\n  "))
}

col_rename <- function(dm, recipe) {
  reduce2(recipe$table,
    recipe$renames,
    ~ dm_rename(..1, !!..2, !!!..3),
    .init = dm
  )
}

Try the dm package in your browser

Any scripts or data that you put into this service are public.

dm documentation built on Nov. 2, 2023, 6:07 p.m.