#' Make tree edges positive
#'
#' If tree edges are zero or negative, makes them a very small positive number
#' (1/1000th of the smallest edge length). This is to prevent
#' \code{\link[ape]{ace}} from breaking. A warning is thrown. Returns a tree
#' with all positive edge lengths.
#'
#' @param tree (\code{ape phylo}).
#'
#' @return tree: (\code{ape phylo}). Tree now has only positive edge lengths.
#' @export
make_all_tree_edges_positive <- function(tree){
check_is_tree(tree)
if (sum(tree$edge.length <= 0) > 0) {
warning(paste0("All non-positive branch lengths changed to small positive ",
"number to be able to perform ancestral reconstruction."))
# Change any edge lengths that are zero to a very small number (so ancestral
# reconstruction doesn't break)
tree$edge.length[tree$edge.length <= 0] <-
min(tree$edge.length[tree$edge.length > 0]) / 1000
}
return(tree)
}
#' Get major alleles
#'
#' Finds the major allele (most common allele) for each variant position in an
#' allele matrix.
#'
#' @param allele_mat Matrix. Allele matrix. Rows are variants. Columns are
#' samples.
#'
#' @return major_allele: Character vector. Gives the major allele for each
#' position. Names are the genomic loci. Values are the nucleotides.
#' @noRd
#' @export
get_major_alleles <- function(allele_mat){
major_allele <- apply(allele_mat, 1, function(x) {
names(which.max(table(x)))
})
names(major_allele) <- rownames(allele_mat)
return(major_allele)
}
#' Remove unknown ancestral states
#'
#' Removes rows from variant matrix where the reference allele (ancestral allele
#' or major allele) is unknown (- or N).
#'
#' @param allele_mat Matrix. Allele matrix. Rows are variants, columns are
#' samples.
#' @param alleles Factor. Vector of reference alleles (ancestral alleles or
#' major alleles)
#' @param ar_results Data.frame. Results from ancestral reconstruction
#'
#' @return A list of three objects:
#' \describe{
#' \item{allele_mat}{Matrix. Rows are variants. Columns are samples.}
#' \item{ar_results}{Data.frame. Variants with unknown ancestral states
#' removed. Rows are variants. If ancestral reconstruction was performed:
#' first column in ancestral allele & second column is probability. If no
#' ancestral reconstruction performed: the only column is the major allele.}
#' \item{removed}{Character. Vector with names of removed samples.}
#' }
#' @noRd
#' @export
remove_unknown_alleles <- function(allele_mat, alleles, ar_results){
check_is_this_class(allele_mat, "matrix")
check_is_this_class(alleles, "factor")
check_is_this_class(ar_results, "data.frame")
unknown <- alleles %in% c("-", "N")
removed <- rownames(allele_mat)[unknown]
if (length(removed) > 0) {
warning(paste(length(removed),
"positions removed because ancestral allele is unknown"))
}
return(list(allele_mat = allele_mat[!unknown, ],
ar_results = ar_results[!unknown, , drop = FALSE],
removed = removed))
}
#' Make binary matrix from allele matrix
#'
#' Returns a binary matrix of variant presence/absence created from an allele
#' matrix and a reference allele (ancestral allele or major allele) vector that
#' can be used in bGWAS. The reference allele is 0 and the non-reference allele
#' is 1.
#'
#' @param allele_mat Matrix. Allele matrix (split by multi-allelic site). Rows
#' are variants. Columns are samples.
#' @param reference_allele Factor. Vector of alleles that are 0 in binary matrix
#' (ancestral allele or major allele). Named vector. Names are genetic loci.
#'
#' @return bin_mat. Matrix. Binary matrix of variant presence/absence.
#' @noRd
#' @export
make_binary_matrix <- function(allele_mat, reference_allele){
check_is_this_class(reference_allele, "factor")
check_is_this_class(allele_mat, "matrix")
# make matrix of reference allele that's the same size as the allele matrix
ref_allele_mat <- replicate(ncol(allele_mat), reference_allele)
# initialize binary matrix
bin_mat <- allele_mat
# if allele is the reference allele, code it as 0
bin_mat[bin_mat == ref_allele_mat] <- 0
# get variant positions
sites <- unique(gsub("\\..*", "", rownames(bin_mat)))
# iterate over each site (to handle multiallelic sites)
bin_mat <- sapply(sites, function(x) {
site <- bin_mat[rownames(bin_mat) == x, ] # get 1st site
# get all bases at that position
bases <- unique(site)
# remove reference (0) and unknown (N) bases
bases <- bases[bases != "0" & bases != "N"]
# create mini-matrix for this variant position where rows are bases and
# columns are samples
binsplit <- matrix(NA, nrow = length(bases), ncol = ncol(bin_mat))
rownames(binsplit) <- bases
# for each base, code that base as 1 and all others as 0
for (b in bases) {
binsite <- site
binsite[binsite != b] <- 0
binsite[binsite == b] <- 1
binsite <- as.numeric(binsite)
binsplit[b, ] <- binsite
}
return(binsplit)
})
# change from list to matrix
bin_mat <- do.call(rbind, bin_mat)
# update rownames and colnames of binary matrix
rownames(bin_mat) <- rownames(allele_mat)
colnames(bin_mat) <- colnames(allele_mat)
# make binary matrix numeric
class(bin_mat) <- "numeric"
return(bin_mat)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.