R/foldChange.R

#' Calculate fold change
#' 
#' Calculates the fold changes between samples for a given set of comparisons.
#' \code{plotFC} plots these fold changes as a bar graph.
#' 
#' 
#' @param tidydf Tidy dataframe of RFI values and their corresponding sample 
#'     and phenotype information. There should NOT be any technical replicates 
#'     (samples with the same sample name).
#' @param comparisons Dataframe with 2 columns and n rows, where n is the 
#'     number of desired comparisons. The fold change to be calculated will be
#'     column 1 divided by column 2, for each comparison/row. For \code{plotFC}
#'     only the rows of the comparisons dataframe that you wish to plot should
#'     be given.
#' @param logdata Single logical indicating: \code{calcFC} - whether the fold 
#'     change should be logged (base 2), \code{plotFC} - whether the fold change
#'     has been logged.
#' @param RFI_col Name of column containing RFI values, as string.
#' @param samples Single logical indicating whether the comparison is of 
#'     samples or ABs.
#' @param ABnames Optional argument. Merges full antibody names to the output
#'     dataframe.
#' @param fc_df Dataframe of fold changes generated by \code{calcFC}.
#' @param normalised Single logical indicating whether the data have been 
#'     normalised.
#' 
#' 
#' @importFrom assertthat assert_that
#' @importFrom magrittr %>%
#' @importFrom rlang !!
#' @import ggplot2
#' 
#' 
#' @describeIn calcFC Generates a dataframe of fold changes for each 
#'     comparison, with columns of corresponding names of samples compared and 
#'     their phenotype description.
#' @export 
calcFC <- function(tidydf, comparisons, logdata = FALSE, RFI_col = "RFI", 
                   samples = TRUE, ABnames) {
  
  # check inputs
  assert_that(is.character(RFI_col), length(RFI_col) == 1,
              msg = "Check 'RFI_col' is a single string")
  
  assert_that(
    sum(c("X1", RFI_col, "AB") %in% colnames(tidydf)) == 3,
    msg = "Check 'tidydf' has columns 'X1', 'AB' and your input for 'RFI_col'")
  
  assert_that(is.logical(logdata), length(logdata) == 1,
              msg = "Check 'logdata' is single logical")
  
  if (samples) {
    assert_that(sum(
      c(comparisons[,1], comparisons[,2]) %in% tidydf$X1) == 2 * nrow(comparisons),
    msg = "Check 'comparisons' df uses sample names")
  } else {
    assert_that(sum(
      c(comparisons[,1], comparisons[,2]) %in% tidydf$AB) == 2 * nrow(comparisons),
      msg = "Check 'comparisons' df uses AB names")
  }
   
  if (! missing(ABnames)){
    assert_that(
      sum(c("Antibody.Name","Ab.No.") %in% colnames(ABnames)) == 2,
      dim(ABnames)[2] == 2,
      msg = "Ensure columns 'Antibody.Name' and 'Ab.No.' exist in the 
            'ABnames' dataframe")
  }
  
  if (logdata) {
    assert_that(length(tidydf[tidydf[[RFI_col]] < 0, RFI_col]) == 0,
                msg = "Cannot take log of negative RFI values")
  }
  
  
  # convert to data.frame
  if (sum(class(tidydf) %in% "tbl_df") >= 1){
    tidydf <- as.data.frame(tidydf)
  }
  
  # obtain number of comparisons and Samples/ABs
  num_comparisons <- nrow(comparisons)
  num_ABs <- length(unique(tidydf$AB))
  num_samples <- length(unique(tidydf$X1))
  
  # convert to wide format
  wide_df <- tidydf %>%
    dplyr::select(X1, !!(as.name(RFI_col)), AB) %>%
    tidyr::spread(value = !!(as.name(RFI_col)), key = AB)
  
  # convert to matrix
  numeric_mat <- as.matrix(wide_df[,-1])
  rownames(numeric_mat) <- wide_df[,1]
  ## transpose data if ABs
  if (! samples) {
    numeric_mat <- t(numeric_mat)
  }
  
  
  # calculate fold changes
  if (samples){
    fc_mat <- matrix(nrow = num_comparisons, ncol = num_ABs)
  } else {
    fc_mat <- matrix(nrow = num_comparisons, ncol = num_samples)
  }
  
  if (logdata){
    
    numeric_mat <- log2(numeric_mat)
  }
  
  for (i in 1:num_comparisons){
      
      comp1 <- comparisons[i,1]
      comp2 <- comparisons[i,2]
      
      if (logdata){
        
        # subtract if logdata
        fc_mat[i,] <- numeric_mat[rownames(numeric_mat) == comp1,] - 
          numeric_mat[rownames(numeric_mat) == comp2,]
        
      } else {
        
        # divide if raw
        fc_mat[i,] <- numeric_mat[rownames(numeric_mat) == comp1,] / 
          numeric_mat[rownames(numeric_mat) == comp2,]
        
      }
  }
  
  
  # convert back to df
  fc_df <- as.data.frame(fc_mat)
  
  ## add column names (ABs or samples)
  colnames(fc_df) <- colnames(numeric_mat)
 
      
  ## add conditions column if comparing samples
  if (samples) {
    
    condition <- vector(mode = "character", length = num_comparisons)
    
    for (i in 1:num_comparisons){
      condition[i] <- tidydf[tidydf$X1 == comparisons[i,2], "Condition"][1]
    }
    
    fc_df$Condition <- condition
    
  }
  
  
  # add comparison columns
  fc_df$comp1 <- comparisons[,1]
  fc_df$comp2 <- comparisons[,2]
      
  # change to long form
  if (samples) {
    
    fc_df <- fc_df %>%
      tidyr::gather(1:num_ABs, key = "AB", value = "FoldChange") 
    
  } else {
    
    fc_df <- fc_df %>%
      tidyr::gather(1:num_samples, key = "Sample", value = "FoldChange") 
  }

  if (samples) {
    
    if (! missing(ABnames)){
      fc_df <- merge(fc_df, ABnames, by.x = "AB", by.y = "Ab.No.",
                     all.x = TRUE, all.y = FALSE)
    }
    
  }
  
  return(fc_df)
      
}


#' @describeIn calcFC Creates faceted barplots of fold changes of the 
#'     desired comparisons for each antibody/sample, in the current graphics 
#'     device.
#' @export
plotFC <- function(fc_df, comparisons, logdata = FALSE, normalised = FALSE) {
  
  # check inputs
  assert_that(sum(c("comp1","comp2", "FoldChange", "AB",
                    "Sample") %in% colnames(fc_df)) == 4, 
              msg = "'fc_df' should have one of each of the following columns: 
              'comp1','comp2' & 'FoldChange'. It should also 
              have EITHER a 'AB' or 'Sample' column")
  
  assert_that(is.character(comparisons[,1]),
                          is.character(comparisons[,2]),
                          msg = "Check that both columns in the 'comparisons' 
              data frame is of string type")
  
  assert_that(is.logical(logdata), length(logdata) == 1,
                          msg = "Check 'logdata' is a single logical")
  
  assert_that(is.logical(normalised), length(normalised) == 1,
                          msg = "Check 'normalised' is a single logical")
  
  
  # colour blind friendly palette
  pal2 <- c("#000000", "#009E73", "#e79f00", "#9ad0f3", "#0072B2", 
            "#D55E00", "#CC79A7", "#F0E442")
  
  # create titles
  ylab <- "Fold Change"
  
  if (logdata){
    
    ylab <- paste("Log2", ylab)
    
  }
  
  if (normalised){
    
    ylab <- paste("Normalised", ylab)
    
  }
  
  # number of comparisions
  num_comparisons <- nrow(comparisons) 
  
  # main plot
  gg <- fc_df %>%
    dplyr::mutate(Comp = paste(comp1, "vs", comp2)) %>%
    dplyr::filter(Comp %in% paste(comparisons[,1], "vs", comparisons[,2])) %>%
    ggplot(aes(y = FoldChange, x = Comp)) +
    geom_bar(stat = "identity", position = "dodge") +
    coord_flip() +
    labs(title = "Fold change for each comparison", x = "Comparison", 
         y = ylab) +
    theme(plot.title = element_text(hjust = 0.5))
  
  if (sum(colnames(fc_df) == "AB") == 1) {
    
    gg <- gg + 
      facet_wrap(.~Antibody.Name, ncol = 4) 
    
  } else if (sum(colnames(fc_df) == "Sample") == 1) {
    
    gg <- gg +
      facet_wrap(.~Sample, ncol = 4)
  }
  
  return(gg)
  
}
lucyleeow/RPPA documentation built on May 5, 2019, 3:46 a.m.