#'CalculateConsensus
#'@description
#'We calculate the consensus information from the MAEGATK results.
#'We set cells that have only alternative reads to 2 (Alternative).
#'We set cells that have only reference reads to 1 (Reference).
#'We set cells that have a mixture of alternative and reference reads to 3 (Both).
#'We set cells that have no reads to 0 (NoCall).
#'
#'Please note. Cells can have reads for the reference of a specific variant and no reads for the alternative.
#'The cell can still have a reads for the other alternative alleles. Then the cell is still considered as 0 (NoCall) for this variant.
#'For example:
#'A cell has at position 3: 0 A reads, 53 T reads, 63 C reads, 148 T reads.
#'For the variant chrM_3_T_A, the cell would have 53 reference reads, but also reads for other variants at this position.
#'To make sure that there is no confusion, the cell is set to NoCall.
# #'@import MatrixGenerics
#'@importFrom SummarizedExperiment rowRanges
#'@param SE SummarizedExperiment object.
#'@param chromosome_prefix The chromosome name used as a prefix.
#'@param verbose Should the function be verbose? Default = FALSE
#'@export
CalculateConsensus <- function(SE, chromosome_prefix = "chrM", verbose = FALSE){
# 0 NoCall = coverage is 0.
# 1 Reference = only reference reads.
# 2 Alternative = only alternative reads of one variant.
# 3 Both = reads for reference and one or more variants.
if(verbose) print("We get the read information per position.")
letter <- c("A", "C", "G", "T")
ref_allele <- as.character(SummarizedExperiment::rowRanges(SE)$refAllele)
reads <- lapply(letter, getReadMatrix, SE = SE, chromosome_prefix = chromosome_prefix)
# Since we have always the same 4 bases, we get all possible combinations by assigning numeric values.
# A = 8, C = 4, G = 2, T = 1.
# If there are several types of reads present at a position, we can simply add the values.
# So, a position with A and T would have a value of 9.
reads[[1]][reads[[1]] > 0] <- 8
reads[[2]][reads[[2]] > 0] <- 4
reads[[3]][reads[[3]] > 0] <- 2
reads[[4]][reads[[4]] > 0] <- 1
if(verbose) print("We add the values together.")
# The row names are the names from the first matrix and not accurate any more.
# The only relevant parts are the position and the reference base.
variants_matrix <- reads[[1]] + reads[[2]] + reads[[3]] + reads[[4]]
rm(reads)
gc()
if(verbose) print("We get the position according to their reference base.")
# Now, we have a list for each set of position with the same base reference.
variants_matrix_ls <- list(A = variants_matrix[grep("_A_", rownames(variants_matrix), value = TRUE), , drop = FALSE],
C = variants_matrix[grep("_C_", rownames(variants_matrix), value = TRUE), , drop = FALSE],
G = variants_matrix[grep("_G_", rownames(variants_matrix), value = TRUE), , drop = FALSE],
T = variants_matrix[grep("_T_", rownames(variants_matrix), value = TRUE), , drop = FALSE],
N = variants_matrix[grep("_N_", rownames(variants_matrix), value = TRUE), , drop = FALSE])
# We check if the N reference is even used. If the variants_matrix_ls[["N"]] is empty (zero rows), we do not perform the consensus determination.
n_binding <- FALSE
if(nrow(variants_matrix_ls[["N"]]) > 0){
n_binding <- TRUE
variants_matrix_ls[["N"]] <- matrix(variants_matrix_ls[["N"]], nrow = 1, ncol = ncol(variants_matrix))
colnames(variants_matrix_ls[["N"]]) <- colnames(variants_matrix)
rownames(variants_matrix_ls[["N"]]) <- paste0(chromosome_prefix, "_3107_N_A")
}
rm(variants_matrix)
gc()
if(verbose) print("Now, we check the consensus value for all positions with the same reference base.")
# Then we can rbind these matrices again and return one large consensus matrix in the end.
if(verbose) print("A")
consensus_a <- lapply(c("C", "G", "T"), get_consensus, ref_base = "A", input_matrix = as.matrix(variants_matrix_ls[[1]]), chromosome_prefix = chromosome_prefix)
consensus_a <- do.call("rbind", consensus_a)
if(verbose) print("C")
consensus_c <- lapply(c("A", "G", "T"), get_consensus, ref_base = "C", input_matrix = as.matrix(variants_matrix_ls[[2]]), chromosome_prefix = chromosome_prefix)
consensus_c <- do.call("rbind", consensus_c)
if(verbose) print("G")
consensus_g <- lapply(c("A", "C", "T"), get_consensus, ref_base = "G", input_matrix = as.matrix(variants_matrix_ls[[3]]), chromosome_prefix = chromosome_prefix)
consensus_g <- do.call("rbind", consensus_g)
if(verbose) print("T")
consensus_t <- lapply(c("A", "C", "G"), get_consensus, ref_base = "T", input_matrix = as.matrix(variants_matrix_ls[[4]]), chromosome_prefix = chromosome_prefix)
consensus_t <- do.call("rbind", consensus_t)
if(n_binding){
if(verbose) print("N")
consensus_n <- lapply(c("A", "C", "G", "T"), get_consensus, ref_base = "N", input_matrix = variants_matrix_ls[[5]], chromosome_prefix = chromosome_prefix)
consensus_n <- do.call("rbind", consensus_n)
} else{
if(verbose) print("N reference not present.")
}
if(verbose) print("Binding the matrices.")
consensus <- rbind(consensus_a, consensus_c, consensus_g, consensus_t)
if(n_binding) consensus <- rbind(consensus, consensus_n)
return(consensus)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.