This report provides a summary of the IEDB query step for allo-epitope identification. The output csv contain both donor and recipient peptides derived from the "antigen" input alleles that are predicted to bind bind to the user specified "presenting" allele. The user can use these files as input for in the prep_hu_binders.RmD step in to identify only donor derived peptides that contain non self polymorphisms.

Resources and Reference files

The allele lists and amino acid sequences used in this package were downloaded from IPD_IMGT-HLA website on 2021. A description of can be found at HLA nomenclature. The prediction function used in allotopeR is performed via the API to the Immune epitope database.

knitr::opts_chunk$set(echo = TRUE, message = FALSE, warning = FALSE, include = FALSE)

eval_plot <- params$align_plot
library(httr)
library(tidyverse)
library(janitor)
library(here)
library(glue)
library(DT)
library(datasets)
library(hlaR)
library(msa)
#* variables *#
method <- c("netmhciipan", "recommended") # IEDB prediction method
nm_don <- str_c(params$donor_ag,collapse = "_") # donor/presenting antigens bind to output file name
nm_present <- str_c(params$presenting_allele, collapse = "_") 
#* end of variables *#

#* functions *#
find_non_self_binders <- function(data){
    self_peps <- data %>% filter(ag_type == "self")
    allo_peps <- data %>% filter(ag_type == "stim") %>% anti_join(self_peps, by = "peptide")
    return(allo_peps)
}
#* end of functions *#

#* start of reference tables *#
tbl_fasta_human <- read.csv("https://raw.githubusercontent.com/LarsenLab/public-data/master/IEDB/human_all.csv", check.names = FALSE) # fasta reference table, human

tbl_eplet_II <- read.csv(system.file("extdata/example", "MHC_II_test.csv", package = "hlaR"), sep = ",", header = TRUE) %>% filter(row_number() <= 2) # MHC II eplet table
#* end of reference tables *#

#* others *#   
# if(!dir.exists(file.path(here(), "output_httr"))){ # create an output folder if it doesn't exist
#   dir.create(file.path(here(), "output_httr"))
# }
# 
# out_fd <- as.character(glue(here(), "/output_httr/")) 

if(!dir.exists(paste0(path.package("immuntools"), "/output_httr/"))){ # create an output folder if it doesn't exist
  dir.create(file.path(paste0(path.package("immuntools"), "/output_httr/")))
}

out_fd <- as.character(file.path(paste0(path.package("immuntools"), "/output_httr/"))) 
#* end of others *#
# sub set
ref_self <- tbl_fasta_human %>% 
    filter(allele_short_nm %in% c(params$donor_ag, params$self_ag)) %>% 
    distinct(seq, .keep_all = T) %>% 
    group_by(allele_short_nm) %>% 
    slice_max(order_by = length) %>%
    ungroup()

ref_present <- tbl_fasta_human %>% 
    filter(allele_short_nm %in% params$presenting_allele) %>% 
    distinct(seq, .keep_all = T) %>% 
    group_by(allele_short_nm) %>% 
    slice_max(order_by = length) %>%
    ungroup()

allele_query <- ref_present %>% 
    pull(allele_query_nm) %>% 
    str_c(collapse = ",")

# clean labels
donor_ag_label <- params$donor_ag %>% 
                  str_to_lower() %>% 
                  str_c(sep = "_", collapse = T) %>%
                        str_replace(., "TRUE", "-") 

presenting_allele_label <- params$presenting_allele %>% 
                            str_to_lower() %>% 
                            str_c(sep = "_", collapse = T) %>%
                            str_replace(., "TRUE", "-") 
rm(tbl_fasta_human)
query_prep <- ref_self %>%
              mutate(iedb_query_id = str_c("%3E", allele),
                     iedb_query_seq = str_c("", seq),
                     iedb_query_seq = str_replace_all(iedb_query_seq, "\\n", ""),
                     query_fasta = str_c(iedb_query_id, iedb_query_seq))

seq_names <- query_prep %>%  
              rowid_to_column() %>% 
              dplyr::rename(antigen = allele_short_nm,
                            seq_num = rowid) %>% 
              mutate(ag_type = if_else(antigen %in% params$donor_ag, "stim", "self")) %>%
              select(-c(seq, id, query_fasta, iedb_query_id))

seq_names_short <- seq_names %>% 
                    select(antigen, ag_type)
# allele info
allele <- seq_names_short %>% 
                left_join(., ref_self, by = c("antigen" = "allele_short_nm")) %>%
                mutate(allele = str_remove(allele_query_nm, "HLA-")) %>%
                select(ag_type, allele) 
rcpt <- allele %>% filter(ag_type == "self") %>% pull(allele)
don <- allele %>% filter(ag_type == "stim") %>% pull(allele)

if(length(rcpt) == 1){
  rcpt <- c(rcpt, rcpt)
} else{rcpt <- rcpt}

 if(length(don) == 1){
  don <- c(don, don)
} else{don <- don}

rcpt_ag <- str_extract(rcpt[1], "[^*]+") %>% str_remove(., "[0-9]")
don_ag <- str_extract(don[1], "[^*]+") %>% str_remove(., "[0-9]")

# build allele table for eplet mismatch calculation
rcpt_indx <- which(str_detect(colnames(tbl_eplet_II), rcpt_ag))
don_indx <- which(str_detect(colnames(tbl_eplet_II), don_ag))

tbl_eplet_II[c(1:2), ] <- NA
tbl_eplet_II$pair_id <- 1

tbl_eplet_II[1,]$subject_type<- "recipient"
tbl_eplet_II[1,rcpt_indx[1]] <- rcpt[1]
tbl_eplet_II[1,rcpt_indx[2]] <- rcpt[2]

tbl_eplet_II[2,]$subject_type<- "donor"
tbl_eplet_II[2,don_indx[1]] <- don[1]
tbl_eplet_II[2,don_indx[2]] <- don[2]

eplet_mm <- CalEpletMHCII(tbl_eplet_II, ver = 2)$overall_count

rm(allele, don, don_ag, don_indx, rcpt, rcpt_ag, rcpt_indx, tbl_eplet_II)
for(i in 1:length(method)){ # for each method
  for(j in 1:nrow(seq_names)){ # for each antigen
    # set parameter list
    param2httr = list('method' = method[i],
                      'sequence_text' = as.character(seq_names$iedb_query_seq[j]),
                      'allele' = str_c(ref_present$allele_query_nm, collapse = ","),
                      'length' = params$seq_len)
    # send request to api
    res_api <- httr::POST(url = 'http://tools-cluster-interface.iedb.org/tools_api/mhcii/', 
                          body = param2httr)

    # extract content from the request
     cat(httr::content(res_api, "text"), 
         file = glue(out_fd, seq_names$antigen[j], "_", method[i], ".txt"))
  }
}
#* start of netmhciipan *#
  files <- fs::dir_ls(out_fd, glob = "*_netmhciipan.txt")
  netmhciipan <- files %>%
              setNames(nm = .) %>% 
              map_df(~read_tsv(.x, col_types = cols(), col_names = TRUE), .id = "id")  %>%
              mutate(antigen = str_remove(gsub(".*/", "", id), "_netmhciipan.txt")) %>%
              select(-c(id, seq_num)) %>%
                          left_join(seq_names_short, by = "antigen") %>%
                          select(allele, antigen, ag_type, everything()) %>%
                          mutate(binder = if_else(rank <= 3, "strong",
                                                    if_else(rank <=10, "weak", "no")))  

  tt_screen_netpan <- netmhciipan %>% filter(ag_type != "self") %>% nrow(.)

  netmhciipan <- netmhciipan %>%
              filter(binder != "no") %>%
              arrange(match(binder, c("strong", "weak")))

write_csv(netmhciipan, 
          glue("{out_fd}netpan_peps_binder_{nm_don}_in_{nm_present}.csv"))
unlink(files)
#* start of netmhciipan *#

#* start of recommended *#
# please note: binder label for iedb method is derived from adjusted_rank
files <- fs::dir_ls(out_fd, glob = "*_recommended.txt")

recommended <- files %>%
                setNames(nm = .) %>% 
                map_df(~read_tsv(.x, col_types = cols(), col_names = TRUE), .id = "id")  %>%
                mutate(antigen = str_remove(gsub(".*/", "", id), "_recommended.txt")) %>%
                select(-c(id, seq_num)) %>%
                left_join(seq_names_short, by = "antigen") %>%
                select(allele, antigen, ag_type, everything()) %>%
                mutate(binder = if_else(adjusted_rank <= 3, "strong",
                                        if_else(adjusted_rank <= 10, "weak", "no"))) 
 tt_screen_rec <- recommended %>% filter(ag_type != "self") %>% nrow(.)

 recommended <- recommended %>%
              filter(binder != "no") %>%
              arrange(match(binder, c("strong", "weak")))

write_csv(recommended, 
          glue("{out_fd}rec_peps_binder_{nm_don}_in_{nm_present}.csv"))
unlink(files)
#* end of recommended *#
# input antigens
allele_info <- seq_names %>% 
                select(-c(seq_num, length, allele, allele_query_nm)) %>%
                left_join(., ref_self, by = c("antigen" = "allele_short_nm")) %>%
                dplyr::rename(type = ag_type, seq_len = length) %>% 
                select(antigen, type, seq, seq_len ) %>%
                arrange(type)
# peptide of self antigen are excluded
num_len <- ifelse(str_count(params$seq_len, ",") > 0, 
                  str_count(params$seq_len, ",") + 1,
                  1)

netpan <- iedb <- data.frame(method = character(),
                     pep_len = character(),
                     tt_screen = character(),
                     num_binder_noneself = character(),
                     num_uniqcore_noneself = character())

# binder info for each seq_length
# netpan
for(i in 1:num_len){
  tmp <- netmhciipan %>% 
         find_non_self_binders() %>%
         filter(ag_type != "self") %>% 
         filter(length == unique(netmhciipan$length)[i] ) 

  netpan[i,]$method <- "netpan"
  netpan[i,]$pep_len <- unique(tmp$length)
  netpan[i,]$tt_screen <- tt_screen_netpan

  cnt1 <- tmp %>% 
          group_by(binder) %>%
          tally() %>%
          data.frame() 
  cnt2 <- paste(cnt1$binder, cnt1$n, sep = ":")
  netpan[i,]$num_binder_noneself <- ifelse(length(cnt2) == 1,
                                           cnt2,
                                           paste (cnt2[1], cnt2[2], sep = " "))
  rm(cnt1, cnt2)

  cnt1 <- tmp %>% 
          filter(core_peptide != "-") %>%
          group_by(binder) %>% 
          summarise(count = n_distinct(core_peptide)) 

  cnt2 <- paste(cnt1$binder, cnt1$count, sep = ":")
  netpan[i,]$num_uniqcore_noneself <- ifelse(length(cnt2) == 1, cnt2,
                                           paste(cnt2[1], cnt2[2], sep = " "))

  rm(tmp, cnt1, cnt2)

  # rec_peps
  # "The selection IEDB Recommended uses the Consensus approach, combining NN-align, SMM-align, CombLib and Sturniolo if any corresponding predictor is available for the molecule, otherwise NetMHCIIpan is used. The Consensus approach considers a combination of any three of the four methods, if available, where Sturniolo as a final choice." - http://tools.iedb.org/mhcii/help/#Method
  # filter out netpan result from iedb recommended method
  tmp <- recommended %>% 
         find_non_self_binders() %>%
         filter(ag_type != "self" & method != "NetMHCIIpan") %>% 
         filter(length == unique(recommended$length)[i] )

  iedb[i,]$method <- "iedb"
  iedb[i,]$pep_len <- unique(tmp$length)
  iedb[i,]$tt_screen <- tt_screen_rec

  cnt1 <- tmp %>% 
          group_by(binder) %>% 
          tally() %>%
          data.frame() 
  cnt2 <- paste(cnt1$binder, cnt1$n, sep = ":")
  iedb[i,]$num_binder_noneself <- ifelse(length(cnt2) == 1, cnt2,
                                         paste(cnt2[1], cnt2[2], sep = " "))

  rm(cnt1, cnt2)

  cnt1 <- tmp %>% 
          group_by(binder) %>% 
          summarise(count = n_distinct(sturniolo_core)) 

  cnt2 <- paste(cnt1$binder, cnt1$count, sep = ":")
  iedb[i,]$num_uniqcore_noneself <- ifelse(length(cnt2) == 1, cnt2,
                                           paste(cnt2[1], cnt2[2], sep = " "))


  rm(tmp, cnt1, cnt2)
}

# none-self, unique cores
report <- rbind(netpan, iedb) %>% 
          `rownames<-`(seq_len(nrow(netpan)+nrow(iedb))) %>% 
          arrange(method, pep_len) 
ag_peps_net <- netmhciipan %>%  
                find_non_self_binders() %>%
                dplyr::rename(net_binder = binder,
                              net_core = core_peptide,
                              net_allele = allele,
                              net_antigen = antigen,
                              net_rank = rank) %>%
                select(net_allele, net_antigen, peptide, length, net_binder, net_core, net_rank)

ag_peps_rec <- recommended %>% 
                find_non_self_binders() %>%
                dplyr::rename(iedb_binder = binder,
                              iedb_allele = allele,
                              iedb_antigen = antigen,
                              iedb_core = sturniolo_core,
                              iedb_rank = adjusted_rank) %>% 
                filter(method != "NetMHCIIpan") %>%
                select(iedb_allele, iedb_antigen, peptide, iedb_binder, iedb_core, iedb_rank)

ag_peps <- ag_peps_rec %>%  
            right_join(ag_peps_net, by = "peptide") %>% # yes, use right_join
            mutate(iedb_allele = str_remove(iedb_allele, "HLA-"),
                   net_allele = str_remove(net_allele, "HLA-")) %>%
            select(peptide, 
                   net_allele, net_antigen, net_binder, net_core, net_rank, 
                   iedb_allele, iedb_antigen, iedb_binder, iedb_core, iedb_rank)%>%
            distinct() %>%
            arrange(peptide)

# dups on peptide and net_core
peps_dups <- ag_peps %>% 
             get_dupes(peptide, net_core, iedb_core) %>% 
             group_by(peptide, net_core, iedb_core) %>% 
             arrange(net_rank, .by_group=TRUE) %>% 
             mutate(n = row_number()) %>%
             ungroup() %>%
             filter(n == 1) %>% 
             select(-c(dupe_count, n))

peps_uniq <- ag_peps %>%
  filter(!(peptide %in% peps_dups$peptide) & !(net_core %in% peps_dups$net_core) & !(iedb_core %in% peps_dups$iedb_core))

peps_distinct_cores <- data.frame(rbind(peps_dups, peps_uniq)) %>% 
                       arrange(peptide)

rm(find_non_self_binders, ag_peps_net, ag_peps_rec, ag_peps, peps_dups)

Result Tables

antigen info

# antigen info
datatable(allele_info, 
          caption = glue("table 1: total/unique number of eplet mismatch are ", eplet_mm$mm_cnt_tt, "/", eplet_mm$mm_cn_uniq), 
          rownames = FALSE,
          options = list(columnDefs = 
                           list(list(className = 'dt-left', targets = "_all")),
                         lengthChange = FALSE))

binder summary

# summary
datatable(report, 
          caption = "table 2: summary of prediction",
          rownames = FALSE,
          options = list(columnDefs = 
                           list(list(className = 'dt-left', targets = "_all")),
                         lengthChange = FALSE))

distinct cores

# distinct core with full peptide
datatable(peps_distinct_cores, 
          caption = "table 3: distinct core peptides",
          rownames = TRUE,
          filter = list(position = 'top', clear = FALSE),
          extensions = "Buttons", 
          options = list(pageLength = 50,
                         columnDefs = 
                         list(list(className = 'dt-left', targets = "_all")),
                         lengthChange = FALSE,
                         dom = 'Bfrtip',
                         buttons = c('csv', "copy")))
# function to plot alignment with msa package
plot_align <- function(in_dat, out_name){
  write_lines(in_dat, "tmp.fasta")
  mySeqs <- readAAStringSet("tmp.fasta")
  myAlignment <- msa(mySeqs, order = "input")

  msaPrettyPrint(myAlignment, 
                 file = out_name,
                 output = "pdf",
                 showNames = "left",
                 showNumbering = "none",
                 showLogo = "top",
                 showConsensus = "bottom",
                 logoColors = "rasmol",
                 verbose = FALSE,
                 askForOverwrite = FALSE)

  file.remove("tmp.fasta")
}

 for_align <- allele_info %>%
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen)) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) 

 # if only one input donor allele 
 if(length(params$donor_ag) == 1){
   for_align1 <- for_align %>% pull(fa_split) %>% as_vector()

   # seq position 1-50
   short1_1st <- allele_info %>%
               mutate(seq = str_sub(seq,1,50)) %>%
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen)) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) %>%
               pull(fa_split) %>% 
               as_vector()

  # seq position 51-100
  short1_2nd <- allele_info %>%
               mutate(seq = str_sub(seq,51,100)) %>%
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen)) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) %>%
               pull(fa_split) %>% 
               as_vector()

   plot_align(in_dat = for_align1, out_name = paste0(out_fd, "align1.pdf"))
   plot_align(in_dat = short1_1st, out_name = paste0(out_fd, "align1_1st.pdf"))
   plot_align(in_dat = short1_2nd, out_name = paste0(out_fd, "align1_2nd.pdf"))
 }

 # if have both donor input allele
 if(length(params$donor_ag) == 2){
   for_align1 <- for_align %>%
                  filter(antigen == params$donor_ag[1] | type == "self") %>% 
                  pull(fa_split) %>% 
                  as_vector()

   short1_1st <- allele_info %>%
               mutate(seq = str_remove_all(str_sub(seq,1,50), "\n")) %>%
               filter(antigen == params$donor_ag[1] | type == "self") %>% 
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen, "\n")) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) %>%
               pull(fa_split) %>% 
               as_vector()

   short1_2nd <- allele_info %>%
               mutate(seq = str_remove_all(str_sub(seq,51,100), "\n")) %>%
               filter(antigen == params$donor_ag[1] | type == "self") %>% 
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen, "\n")) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) %>%
               pull(fa_split) %>% 
               as_vector()

    for_align2 <- for_align %>%
                  filter(antigen == params$donor_ag[2] | type == "self") %>% 
                  pull(fa_split) %>% 
                  as_vector()

     short2_1st <- allele_info %>%
               mutate(seq = str_remove_all(str_sub(seq,1,50), "\n")) %>%
               filter(antigen == params$donor_ag[2] | type == "self") %>% 
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen, "\n")) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) %>%
               pull(fa_split) %>% 
               as_vector()

   short2_2nd <- allele_info %>%
               mutate(seq = str_remove_all(str_sub(seq,51,100), "\n")) %>%
               filter(antigen == params$donor_ag[2] | type == "self") %>% 
               arrange(desc(type)) %>%
               mutate(allele_fa_nm = str_c(">", type, "_", antigen, "\n")) %>% 
               unite("fa", c(allele_fa_nm, seq), sep = "", remove = F) %>%
               mutate(fa_split = str_split(fa, "\n")) %>%
               pull(fa_split) %>% 
               as_vector()

   plot_align(in_dat = for_align1, out_name = paste0(out_fd, "align1.pdf"))
   plot_align(in_dat = short1_1st, out_name = paste0(out_fd, "align1_1st.pdf"))
   plot_align(in_dat = short1_2nd, out_name = paste0(out_fd, "align1_2nd.pdf"))

   plot_align(in_dat = for_align2, out_name = paste0(out_fd, "align2.pdf"))
   plot_align(in_dat = short2_1st, out_name = paste0(out_fd, "align2_1st.pdf"))
   plot_align(in_dat = short2_2nd, out_name = paste0(out_fd, "align2_2nd.pdf"))
 }

sequence alignment

allele 1

knitr::include_graphics(path = paste0(out_fd, "align1.pdf"))
knitr::include_graphics(path = paste0(out_fd, "align1_1st.pdf"))
knitr::include_graphics(path = paste0(out_fd, "align1_2nd.pdf"))

allele 2

if(length(params$donor_ag) == 2){
knitr::include_graphics(path = paste0(out_fd, "align2.pdf"))
}
if(length(params$donor_ag) == 2){
knitr::include_graphics(path = paste0(out_fd, "align2_1st.pdf"))
}
if(length(params$donor_ag) == 2){
knitr::include_graphics(path = paste0(out_fd, "align2_2nd.pdf"))
}


LarsenLab/immuntools documentation built on July 19, 2023, 9:43 a.m.