#' Filter epitopes by taxonomy and host
#'
#' Filters an epitope dataframe by source organism and/or host IDs.
#'
#' @param df *data.table* object of class *LBCE_dt* (generated by [get_LBCE()]),
#' *joined_epit_dt* (generated by [prepare_join_df()]) or
#' *windowed_epit_dt* (generated by [make_window_df()])
#' @param orgIDs vector of organism taxon IDs that we want to retain
#' (using *sourceOrg_id*).
#' @param hostIDs vector of host taxon IDs that we want to retain
#' (using *host_id*).
#' @param removeIDs vector of organism taxon IDs that we want to remove
#' (using *sourceOrg_id*).
#' @param tax_load_file optional taxonomy file (RDS file generated either by this
#' function or by [get_taxonomy()]).
#' @param tax_list optional taxonomy list (to be passed instead of tax_load_file)
#' @param tax_save_folder optional folder saving the taxonomy. Ignored
#' if **tax_load_file** is not _NULL_.
#'
#' @return Epitope dataframe filtered by the criteria in **orgIDs**, **hostIDs** and
#' **removeIDs**.
#'
#' @author Felipe Campelo (\email{f.campelo@@aston.ac.uk})
#'
#' @export
#'
filter_epitopes <- function(df,
orgIDs = NULL,
hostIDs = NULL,
removeIDs = NULL,
tax_load_file = NULL,
tax_list = NULL,
tax_save_folder = NULL) {
# ========================================================================== #
# Sanity checks and initial definitions
id_classes <- c("NULL", "numeric", "integer", "character")
df_classes <- c("LBCE_dt", "joined_epit_dt", "windowed_epit_dt")
assertthat::assert_that(is.data.frame(df),
any(class(df) %in% df_classes),
class(orgIDs) %in% id_classes,
class(hostIDs) %in% id_classes,
class(removeIDs) %in% id_classes,
is.null(tax_load_file) | is.character(tax_load_file),
is.null(tax_list) | is.list(tax_list),
is.null(tax_save_folder) | is.character(tax_save_folder),
is.null(tax_save_folder) | length(tax_save_folder) == 1)
if(!is.null(tax_load_file)) assertthat::assert_that(file.exists(tax_load_file))
# Standardise relevant variables:
df_type <- class(df)[grep("_dt", class(df))]
if(df_type %in% c("LBCE_dt", "joined_epit_dt")){
ids <- data.frame(org = df$sourceOrg_id, host = df$host_id)
} else {
ids <- data.frame(org = df$Info_sourceOrg_id, host = df$Info_host_id)
}
if (is.null(tax_load_file) & is.null(tax_list)){
tax <- get_taxonomy(unique(ids$org), save_folder = tax_save_folder)
} else if (!is.null(tax_list)){
tax <- tax_list
} else {
tax <- readRDS(tax_load_file)
}
# Extract the relevant organism IDs using the taxonomy data
fextr <- function(x, tid){
ul <- c(x$Taxonomy$UID, x$UID)
if(any(tid %in% ul)) return(x$UID)
}
target_org <- unlist(sapply(tax, FUN = fextr, tid = orgIDs))
target_host <- unlist(sapply(tax, FUN = fextr, tid = hostIDs))
target_rm <- unlist(sapply(tax, FUN = fextr, tid = removeIDs))
target_org <- unique(c(target_org, orgIDs))
target_host <- unique(c(target_host, hostIDs))
target_rm <- unique(c(target_rm, removeIDs))
# Get the filtering indices
fmatch <- function(pat, trg){
str <- unlist(strsplit(pat, split = ",", fixed = TRUE))
any(str %in% trg)
}
if (!is.null(orgIDs)){
idx1 <- which(sapply(ids$org, FUN = fmatch, trg = target_org))
ids <- ids[idx1, ]
df <- df[idx1, ]
}
if (!is.null(hostIDs)){
idx2 <- which(sapply(ids$host, FUN = fmatch, trg = target_host))
ids <- ids[idx2, ]
df <- df[idx2, ]
}
if (!is.null(removeIDs)){
idx3 <- which(sapply(ids$org, FUN = fmatch, trg = target_rm))
if (length(idx3) > 0){
ids <- ids[-idx3, ]
df <- df[-idx3, ]
}
}
if(df_type %in% c("LBCE_dt", "joined_epit_dt")){
# Initialize dummy variables to prevent CRAN notes
sourceOrg_id <- protein_id <- epitope_id <- NULL
# Sort data.table (the variable names referred below are internal to df)
df <- df[order(sourceOrg_id, protein_id, epitope_id), ]
} else {
# Initialize dummy variables to prevent CRAN notes
Info_sourceOrg_id <- Info_protein_id <- NULL
Info_epitope_id <- Info_center_pos <- NULL
df <- df[order(Info_sourceOrg_id, Info_protein_id, Info_epitope_id,
Info_center_pos), ]
}
return(df)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.