R/searchDB.R

Defines functions searchIreceptor searchDB

Documented in searchDB searchIreceptor

#' Search for T cell receptor beta CDR3 amino acid sequences with known antigen 
#' specificity from iReceptor2
#' 
#' Search for published T cell receptor beta CDR3 amino acid sequences with 
#' known antigen specificity in a list of data frames.
#' 
#' @param study_table A tibble generated by the LymphoSeq functions 
#' searchPublished or topSeqs.  "aminoAcid", "frequencyCount", and "count" 
#' are required columns.
#' @param credential
#' @return Returns a tibble of with sample name and instance in the sample 
#' that the published TCR sequence appeared along with additional 
#' information including disease status, sample type, and PubMed ID 
#' (PMID) for the reference where the sequence was characterized.  
#' @examples
#' file.path <- system.file("extdata", "TCRB_sequencing", package = "LymphoSeq2
#' 
#' study_table <- readImmunoSeq(path = file.path)
#' 
#' productive_aa <- productiveSeq(study_table = study_table, aggregate = "aminoAcid")
#' 
#' top_seqs <- topSeqs(productive_table = productive_aa, top = 1)
#' searchDB(study_table = top_seqs, credential="Adaptive")
#' @export
#' @import tidyverse httr jsonlite 
searchDB <- function(study_table, credential) {
    study_table <- study_table %>% 
                   dplyr::filter(!is.na(sequence_aa)) %>%
                   purrr::pmap_dfr(searchIreceptor, .progress=TRUE) %>%
                   dplyr::bind_rows()
    return(study_table)
}

#' Search for T cell receptor beta CDR3 amino acid sequences in iReceptor
#' 
#' Search for published T cell receptor beta CDR3 amino acid sequences with 
#' known antigen specificity in a list of data frames.
#' 
#' @param row_tibble A row tibble mapped from searchDB
#' @return Returns a tibble of with sample name and instance in the sample 
#' that the published TCR sequence appeared along with additional 
#' information including disease status, sample type, and PubMed ID 
#' (PMID) for the reference where the sequence was characterized.  
#' @export
#' @import tidyverse httr jsonlite
searchIreceptor <- function(...) {
    sequence_row <- tibble(...)
    path <- "https://ipa1.ireceptor.org/v2/sequences_summary?"
    request <- GET(url=path, 
               query= list(username="shashi_ravishankar", 
               junction_aa=sequence_row$sequence_aa)) 
    response_list <- content(request, as = "text", encoding = "UTF-8") %>%
                     jsonlite::fromJSON(flatten = TRUE) 
     if (length(response_list$summary) == 0){
        response_table <- tibble(count = NA, 
                                 disease_diagnosis = NA, 
                                 pub_ids = NA, 
                                 disease_state_sample = NA, 
                                 cell_subset = NA,
                                 ir_project_sample_id = NA,
                                 junction_aa = sequence_row$sequence_aa)
        response_table <- left_join(sequence_row, 
                                    response_table, 
                                    by=c("sequence_aa" = "junction_aa"))                 
    } else {
        response_items <- response_list$items %>% as_tibble() %>% select(junction_aa, ir_project_sample_id) #dapply(`[`, c('junction_aa', 'ir_project_sample_id')) %>% as_tibble()
        response_summary <- response_list$summary %>% as_tibble() %>% select(disease_diagnosis, pub_ids, disease_state_sample, cell_subset, ir_project_sample_id) #dapply(`[`, c('disease_diagnosis', 'pub_ids', 'disease_state_sample', 'cell_subset', 'ir_project_sample_id')) %>% as_tibble()
        response_table <- left_join(response_items, 
                                    response_summary,
                                    by="ir_project_sample_id") %>%
                        select(ir_project_sample_id, 
                                junction_aa, 
                                disease_diagnosis, 
                                pub_ids, 
                                disease_state_sample, 
                                cell_subset) %>%
                        group_by(ir_project_sample_id, 
                                    junction_aa) %>%
                        summarize(count = n(), 
                                    disease_diagnosis = first(disease_diagnosis), 
                                    pub_ids = first(pub_ids), 
                                    disease_state_sample = first(disease_state_sample), 
                                    cell_subset= first(cell_subset)) %>%
                        ungroup()
        response_table <- left_join(sequence_row, 
                                    response_table, 
                                    by=c("sequence_aa" = "junction_aa"))                 
    }
    return(response_table)
}
elulu3/LymphoSeqTest documentation built on Aug. 27, 2022, 5:47 a.m.