#' Utility functions for NetMHC.
#'
#' To use the scripts generated by \code{NetMHC_Script}, NetMHC4.0 and NetMHCIIpan3.2 should be installed locally.
#'
#' @param peptideSet A set of peptide sequences.
#' @param outDir A path to the output directory in which peptides will be exported in FASTA format.
#' @param maxPeptides An integer indicating the maximum number of peptides in each FASTA files. Set \code{NULL} to disable chunking.
#' @param netMHCOutputFileNames A set of tab-delimited NetMHC result files.
#' @param MHCType A character string. "MHCI" or "MHCII".
#' @export
#' @rdname DataPreparation_NetMHC
#' @name DataPreparation_NetMHC
NetMHC_PeptideExport <- function(peptideSet, outDir="./NetMHC/", maxPeptides=NULL){
dir.create(outDir, showWarnings=F, recursive=T)
peptList <- split(peptideSet, nchar(peptideSet))
lengList <- names(peptList)
if(is.null(maxPeptides)){
peptList <- lapply(peptList, list)
}else{
peptList <- lapply(peptList, BBmisc::chunk, chunk.size=maxPeptides)
}
nchunkList <- sapply(peptList, length)
peptList <- purrr::flatten(peptList)
names(peptList) <- unlist(mapply(function(x, y){paste0("Peptides_", x, "mer_", 1:y, ".fasta")}, lengList, nchunkList, SIMPLIFY=F))
mapply(
function(peptSeq, name){seqinr::write.fasta(as.list(peptSeq), peptSeq, file.path(outDir, name))},
peptList, names(peptList), SIMPLIFY=F
)
return(TRUE)
}
#' @export
#' @rdname DataPreparation_NetMHC
#' @name DataPreparation_NetMHC
NetMHC_Script <- function(outDir="./NetMHC/"){
# MHCI
hla_string <- "HLA-A0101,HLA-A0201,HLA-A0301,HLA-A2402,HLA-A2601,HLA-B0702,HLA-B0801,HLA-B2705,HLA-B3901,HLA-B4001,HLA-B5801,HLA-B1501"
script_single <- function(FASTAFileName){
paste0(
"./netMHC -a ", hla_string,
" -l ", gsub("mer", "", unlist(strsplit(FASTAFileName, "_"))[2]),
" -xls -xlsfile ./Peptides/", gsub("fasta", "xls", FASTAFileName),
" -f ./Peptides/", FASTAFileName
)
}
FASTAFileNames <- list.files(path=outDir, pattern="^Peptides_.+fasta$", full.names=F)
FASTAFileNames <- FASTAFileNames[stringr::str_detect(FASTAFileNames, paste0(paste0("_", 8:11, "mer"), collapse="|"))]
scripts <- unlist(lapply(FASTAFileNames, script_single))
writeLines(scripts, file.path(outDir, "NetMHC_Script_MHCI.txt"), sep="\n")
# MHCII
hla_string <- "DRB1_0101,DRB3_0101,DRB4_0101,DRB5_0101,HLA-DPA10103-DPB10101,HLA-DQA10101-DQB10201"
script_single <- function(FASTAFileName){
paste0(
"./netMHCIIpan -a ", hla_string,
" -length ", gsub("mer", "", unlist(strsplit(FASTAFileName, "_"))[2]),
" -xls -xlsfile ./Peptides/", gsub("fasta", "xls", FASTAFileName),
" -f ./Peptides/", FASTAFileName
)
}
FASTAFileNames <- list.files(path=outDir, pattern="^Peptides_.+fasta$", full.names=F)
FASTAFileNames <- FASTAFileNames[stringr::str_detect(FASTAFileNames, paste0(paste0("_", 11:30, "mer"), collapse="|"))]
scripts <- unlist(lapply(FASTAFileNames, script_single))
writeLines(scripts, file.path(outDir, "NetMHC_Script_MHCII.txt"), sep="\n")
return(TRUE)
}
#' @export
#' @rdname DataPreparation_NetMHC
#' @name DataPreparation_NetMHC
NetMHC_Import <- function(netMHCOutputFileNames, MHCType=c("MHCI", "MHCII")){
if(MHCType=="MHCI"){
hlaSuperTypeSet <- c("A01","A02","A03","A24","A26","B07","B08","B27","B39","B44","B58","B62")
thr_w <- 2
thr_s <- 0.5
}
if(MHCType=="MHCII"){
hlaSuperTypeSet <- c("DRB1","DRB3","DRB4","DRB5","DP","DQ")
thr_w <- 10
thr_s <- 2
}
dt_netmhc <- data.table::rbindlist(pbapply::pblapply(netMHCOutputFileNames, data.table::fread))
dt_netmhc <- unique(dt_netmhc, by="ID", fromLast=F)
dt_netmhc <- dt_netmhc[!stringr::str_detect(Peptide, "X"), ]
dt_netmhc <- dt_netmhc[,which(colnames(dt_netmhc) %in% c("Peptide","Rank")), with=F]
colnames(dt_netmhc) <- c("Peptide", hlaSuperTypeSet)
df.netmhc <- dt_netmhc %>%
tidyr::gather(HLARestriction, Rank, -Peptide) %>%
dplyr::mutate(BinderCategory=factor(dplyr::if_else(Rank<=thr_w, 1, 0) + dplyr::if_else(Rank<=thr_s, 1, 0),
levels=c(0, 1, 2),
labels=c("NonBinder", "WeakBinder", "StrongBinder")))
df.netmhc.rank <- df.netmhc %>%
dplyr::select(-BinderCategory) %>%
tidyr::spread(HLARestriction, Rank)
df.netmhc.bind <- df.netmhc %>%
dplyr::mutate(BinderCategory=as.numeric(BinderCategory)) %>%
dplyr::select(-Rank) %>%
tidyr::spread(HLARestriction, BinderCategory)
list("DF"=df.netmhc, "DF_Rank"=df.netmhc.rank, "DF_Bind"=df.netmhc.bind)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.