#' A function to reconstruct the donor haplotypes using data from gametes
#'
#' This function takes gamete data and computes and clusters a distance matrix with a binary method. It
#' anticipates sparse data and replaces NAs with 0.5. It clusters the tree into two groups, i.e., haplotypes.
#' It then categorizes the gamete cells falling into each of the two groups. It reconstructs the original
#' haplotypes by majority vote after inverting the opposite haplotype.
#'
#' @param input_dt A dataframe of sparse gamete data coded with reference and alt alleles (i.e., 0, 1, or NA)
#' @param input_positions vector of SNP positions
#' @param window_indices A window of the gamete data (using segments generated from `split_with_overlap`)
#'
#' @return inferred_output A tibble with the inferred haplotype sorted by position and window
#'
#' @importFrom stats dist hclust cutree
#' @importFrom tibble tibble
#'
reconstruct_hap <- function(input_dt, input_positions, window_indices) {
window_start <- min(window_indices)
window_end <- max(window_indices)
positions_for_window <- input_positions[window_start:window_end]
# compute a distance matrix
d <- dist(t(as.matrix(input_dt)[window_start:window_end,]), method = "binary")
# plug in 0.5 for any NA entries of the distance matrix
d[is.na(d)] <- 0.5
# cluster the distance matrix
tree <- hclust(d, method = "ward.D2")
# cut the tree generated by clustering into two groups (haplotypes)
haplotypes <- cutree(tree, k=2)
# get the names of the gamete cells falling into the two groups
h1_gamete <- names(haplotypes[haplotypes == 1])
h2_gamete <- names(haplotypes[haplotypes == 2])
# reconstruct the original haplotypes by majority vote after inverting the opposite haplotype
h1_inferred <- unname(apply(cbind(input_dt[window_start:window_end, h1_gamete],
invert_bits(input_dt[window_start:window_end, h2_gamete])),
1, function(x) get_mode(x)))
inferred_output <- tibble(index = window_indices, pos = positions_for_window, h1 = h1_inferred)
return(inferred_output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.