R/CompareMerge.R

Defines functions compare_merge

Documented in compare_merge

#' Compare and merge specific columns from two DEG data frames
#'
#' This function takes two DEG data frames, inner joins them by a specified gene column,
#' checks if a specified column is identical across both data frames, and merges them if they are.
#' The resulting data frame will have a merged column named after the compared column.
#'
#' @importFrom dplyr inner_join
#' @param df1 First data frame.
#' @param df2 Second data frame.
#' @param by_gene Column name by which to join the data frames, typically "Gene".
#' @param compare_col Column to compare for identity, which will also be the name of the merged column.
#' @param suffixes Suffixes to use for non-identical column names in the joined data frame.
#' @param df_name Name to assign to the resulting data frame for identification.
#' @return A data frame with processed columns.
#' @examples
#' # Create simulated DESeq2 data
#' DEG_deseq2 <- data.frame(
#'   Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"),
#'   change = c("up", "down", "no_change", "up", "down"),
#'   log2FoldChange = c(2.5, -3.2, 0.1, 1.8, -2.5),
#'   pvalue = c(0.01, 0.05, 0.9, 0.02, 0.03)
#' )
#'
#' # Display the first 5 rows of the DESeq2 data
#' head(DEG_deseq2, 5)
#'
#' # Create simulated edgeR data
#' DEG_edgeR <- data.frame(
#'   Gene = c("Gene1", "Gene2", "Gene3", "Gene4", "Gene5"),
#'   change = c("up", "down", "no_change", "no_change", "up"),
#'   log2FoldChange = c(2.3, -3.1, 0.2, 0.1, 2.7),
#'   pvalue = c(0.02, 0.04, 0.8, 0.6, 0.01)
#' )
#'
#' # Display the first 5 rows of the edgeR data
#' head(DEG_edgeR, 5)
#'
#' # Merge the DESeq2 and edgeR data
#' deseq2_edgeR <- compare_merge(
#'   df1 = DEG_deseq2,
#'   df2 = DEG_edgeR,
#'   by_gene = "Gene",
#'   compare_col = "change",
#'   suffixes = c("_1", "_2"),
#'   df_name = "deseq2_edgeR"
#' )
#'
#' @export
compare_merge <- function(df1, df2, by_gene, compare_col, suffixes, df_name) {
  # Perform an inner join on the 'Gene' column
  merged_df <- dplyr::inner_join(df1, df2, by = by_gene, suffix = suffixes)

  # Generate column names for comparison
  col1 <- paste0(compare_col, suffixes[1])
  col2 <- paste0(compare_col, suffixes[2])

  # Check if the specified columns are identical
  if (all(merged_df[[col1]] == merged_df[[col2]])) {
    # If completely identical, merge these columns into one and rename
    merged_df[[compare_col]] <- merged_df[[col1]]
    # Remove original compared columns
    merged_df[[col1]] <- NULL
    merged_df[[col2]] <- NULL
  } else {
    # Handle non-identical case
    message("The columns", col1, "and", col2, "are not identical.\n")
  }

  # Assign the specified name for identification
  merged_df$name <- df_name

  # Return the modified data frame
  return(merged_df)
}

Try the TransProR package in your browser

Any scripts or data that you put into this service are public.

TransProR documentation built on April 4, 2025, 3:16 a.m.