R/searchPublished.R

Defines functions searchPublished

Documented in searchPublished

#' Search for T cell receptor beta CDR3 amino acid sequences with known antigen 
#' specificity
#' 
#' Search for published T cell receptor beta CDR3 amino acid sequences with 
#' known antigen specificity in a list of data frames.
#' 
#' @param list A list of data frames generated by the LymphoSeq functions 
#' readImmunoSeq or productiveSeq.  "aminoAcid", "frequencyCount", and "count" 
#' are required columns.
#' @return Returns a data frame of each sample name and instance in the sample 
#' that the published TCR sequence appeared along with additional 
#' information including antigen specificity, epitope, HLA type, and PubMed ID 
#' (PMID) for the reference where the sequence was characterized.  The 
#' publishedTRB database is located in a separate package called LymphoSeqDB 
#' that should be loaded automatically.
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq")
#' 
#' file.list <- readImmunoSeq(path = file.path)
#' 
#' productive.aa <- productiveSeq(file.list = file.list, aggregate = "aminoAcid")
#' 
#' searchPublished(list = productive.aa)
#' @seealso Refer to the LymphoSeqDB package for details regarding the 
#' publishedTRB database.
#' @export
#' @importFrom plyr llply
#' @import LymphoSeqDB
searchPublished <- function(list) {
    search <- plyr::llply(list, function(x) 
        x[which(x[, "aminoAcid"] %in% LymphoSeqDB::publishedTRB$aminoAcid), ])
    found <- NULL
    l <- 1
    for (l in 1:length(search)) {
        if (nrow(search[[l]]) != 0) {
            search[[l]] <- cbind(rep(names(search)[l], 
                                     nrow(search[[l]])), search[[l]])
            colnames(search[[l]])[1] <- "sample"
            found <- rbind(found, search[[l]])
        }
    }
    if (is.null(found)) {
        message("No sequences found.")
    } else {
        found$prevalence = NULL
        found <- merge(found, LymphoSeqDB::publishedTRB, by = "aminoAcid")
        found <- found[c("sample", setdiff(names(found), "sample"))]
        return(found)
    }
}
davidcoffey/LymphoSeq documentation built on Dec. 31, 2019, 9:52 p.m.