R/commonSeqsPlot.R

Defines functions commonSeqsPlot

Documented in commonSeqsPlot

#' Common sequences plot
#' 
#' Creates a scatter plot of just the sequences in common between two samples.
#' 
#' @param sample1 A name of a sample in a list of data frames generated by the 
#' LymphoSeq function productiveSeq.
#' @param sample2 A name of a sample in a list of data frames generated by the 
#' LymphoSeq function productiveSeq.
#' @param productive.aa A list of data frames of productive amino acid sequences 
#' produced by the LymphoSeq function productiveSeq containing the 
#' samples to be compared.
#' @param show A character vector specifying whether only the common sequences 
#' should be shown or all sequences.  Available options are "common" or "all".
#' @return Returns a frequency scatter plot of two samples showing only the 
#' shared sequences.
#' @details The plot is made using the package ggplot2 and can be reformatted
#' using ggplot2 functions.  See examples below.
#' @seealso An excellent resource for examples on how to reformat a ggplot can 
#' be found in the R Graphics Cookbook online (\url{http://www.cookbook-r.com/Graphs/}).
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
#' 
#' file.list <- readImmunoSeq(path = file.path)
#' 
#' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid")
#' 
#' commonSeqsPlot("TRB_Unsorted_32", "TRB_Unsorted_83", 
#'    productive.aa = productive.aa)
#' 
#' # Change the X and Y axises to log-10 scale
#' commonSeqsPlot("TRB_Unsorted_32", "TRB_Unsorted_83", 
#'    productive.aa = productive.aa) +
#'    ggplot2::scale_x_log10() + 
#'    ggplot2::scale_y_log10() + 
#'    ggplot2::annotation_logticks(sides = "bl")
#' @export
#' @import ggplot2
commonSeqsPlot <- function(sample1, sample2, productive.aa, show = "common") {
    if(any(unlist(lapply(productive.aa, function(x) 
        x[, "aminoAcid"] == "" |
        grepl("\\*", x[, "aminoAcid"]) | 
        duplicated(x[, "aminoAcid"]))))){
        stop("Your list contains unproductive sequences or has not been aggreated for productive amino acid sequences.  Remove unproductive sequences first using the function productiveSeq with the aggregate parameter set to 'aminoAcid'.", call. = FALSE)
    }
    if (show == "common") {
        common <- commonSeqs(samples = c(sample1, sample2), productive.aa = productive.aa)
        plot <- ggplot2::ggplot(data = common, aes_string(x = names(common)[2], y = names(common)[3], label = "aminoAcid")) + 
            geom_point() +
            theme_minimal() + 
            labs(x = paste(sample1, "frequency (%)"), y = paste(sample2, "frequency (%)"))
    }
    if (show == "all") {
        all <- merge(productive.aa[[sample1]], productive.aa[[sample2]], by = "aminoAcid", all = TRUE)
        all[is.na(all)] <- 0
        names(all)[names(all) == "frequencyCount.x"] <- sample1
        names(all)[names(all) == "frequencyCount.y"] <- sample2
        plot <- ggplot2::ggplot(data = all, aes_string(x = sample1, y = sample2, label = "aminoAcid")) + 
            geom_point() +
            theme_minimal() + 
            labs(x = paste(sample1, "frequency (%)"), y = paste(sample2, "frequency (%)"))
    }
    return(plot)
}
davidcoffey/LymphoSeq documentation built on Dec. 31, 2019, 9:52 p.m.