#' @title Detect and process hypermutated sequences
#'
#' @description
#' This function is a wrapper that will detect and either remove or 'fix'
#' hypermutated sequences depending of the value of the 'fix_with' argument.
#'
#' @details
#' It calls \code{ancestor_processing} to obtain the ancestral sequence to
#' compare the query sequences to, then calls \code{deduplicate_seqs} to remove
#' duplicate sequences for performance reasons, next loops over each unique
#' sequence, comparing it to the ancestral sequence with \code{scan_seq} and
#' finally collates the results.
#'
#' @seealso \code{\link{ancestor_processing}}, \code{\link{deduplicate_seqs}},
#' and \code{\link{scan_seq}}
#'
#' @param dat The sequence data. The structure must match the format produced
#' by read.fasta from the seqinr package. This is a list in which each element
#' represents a single sequence. Each element is of class \code{SeqFastadna}
#' and consists of a vector of single letters of class character with optional
#' attributes names and Annot.
#' @param verbose If TRUE, print the name and p-value of removed/fixed
#' sequences.
#' @param fix_with Either FALSE or a single letter. If not FALSE, then replace
#' the hypermutated base with the letter indicated.
#' @param ancestor Either 'consensus' to indicate that the consensus sequences
#' must be computed, or 'first' to indicate that the first sequence in the
#' dataset should be considered to be the ancestral sequence, or the ancestral
#' sequence itself.
#' @param p_value The p-value used by the one-sided fischer test.
#'
#' @return A list with three elements:
#' \describe{
#' \item{all_mut_pos}{A data.frame that contains all the positions in all the
#' sequences that are either a hypermutation or control position.}
#' \item{seq_results}{A list that stores all the sequences that did not
#' contain any hypermutation and, in the case that the fix_with parameter was
#' set, those sequences with hypermutation that was corrected.}
#' \item{seq_hypermutants}{A list of all the sequences that contains hypermutation.}
#' }
#'
#' @examples
#' result <- remove_hypermut(hd_seqs)
#' names(result)
#' str(result)
#' @export
remove_hypermut <- function(dat, verbose = TRUE, fix_with = FALSE, ancestor = 'consensus', p_value = 0.05){
cons <- ancestor_processing(ancestor, dat)
dat <- cons$dat
cons <- cons$cons
results <- NULL
hypermutants <- NULL
all_mut_pos <- NULL
ddat <- deduplicate_seqs(dat)
for (i in 1:length(ddat)){
result_scan <- rcpp_scan_seq(cons, ddat[[i]]$the_seq, fix_with = fix_with)
c_all_mut_pos <- NULL
for (c_name in ddat[[i]]$dup_name){
c_all_mut_pos <- rbind(c_all_mut_pos,
cbind(data.frame(seq_name = c_name,
stringsAsFactors = FALSE),
result_scan$all_mut_pos))
}
all_mut_pos <- rbind(all_mut_pos, c_all_mut_pos)
if (result_scan$p_value < p_value){
if (verbose) {
if (fix_with != FALSE) {
print(paste("Fixing ", ddat[[i]]$dup_names, " because p value of ", result_scan$p_value,
". Replacing hypermutated positions with ", fix_with,
". Sequences will be left in the seq_results element of the return list and will",
" also appear in the seq_hypermutants element.", sep = ''))
} else {
print(paste("Removing ", ddat[[i]]$dup_names, " because p value of ", result_scan$p_value, sep = ''))
}
}
hypermutants <- rbind(hypermutants,
data.frame(seq_name = ddat[[i]]$dup_names,
the_seq = tolower(paste(result_scan$the_seq, collapse = '')),
stringsAsFactors = FALSE)
)
} else {
results <- rbind(results,
data.frame(seq_name = ddat[[i]]$dup_names,
the_seq = tolower(paste(result_scan$the_seq, collapse = '')),
stringsAsFactors = FALSE)
)
}
}
if (fix_with != FALSE){
results <- rbind(results, hypermutants)
}
results_list <- vector('list', nrow(results))
for (i in 1:nrow(results)){
results_list[[i]] <- results[i,'the_seq']
names(results_list)[i] <- results[i, 'seq_name']
}
if (!is.null(hypermutants)){
hypermutants_list <- vector('list', nrow(hypermutants))
for (i in 1:nrow(hypermutants)){
hypermutants_list[[i]] <- hypermutants[i, 'the_seq']
names(hypermutants_list)[i] <- hypermutants[i, 'seq_name']
}
} else {
hypermutants_list <- NULL
}
return(
list(seq_results = results_list,
seq_hypermutants = hypermutants_list,
all_mut_pos = all_mut_pos)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.