Nothing
#' Cluster SNPs with HDBSCAN and identify haplotypes
#'
#' run_hdbscan_haplotyping() performs HDBSCAN clustering of SNPs in region of
#' interest to identify marker groups. Individuals are classified by haplotype
#' combination based on shared combinations of marker group alleles. Returns a
#' comprehensive haplotyping object (HapObject), which can be visualized with reference
#' to phenotype and metadata using crosshap_viz() (set epsilon to 1 as a dummy value).
#'
#' @param vcf Input VCF for region of interest.
#' @param LD Pairwise correlation matrix of SNPs in region (e.g. from PLINK).
#' @param pheno Input numeric phenotype data for each individual.
#' @param MGmin Minimum SNPs in marker groups, MinPts parameter for DBscan.
#' @param minHap Minimum nIndividuals in a haplotype combination.
#' @param hetmiss_as If hetmiss_as = "allele", heterozygous-missing SNPs './N'
#' are recoded as 'N/N', if hetmiss_as = "miss", the site is recoded as missing.
#' @param metadata Metadata input (optional).
#' @param keep_outliers When FALSE, marker group smoothing is performed to
#' remove outliers.
#'
#' @export
#'
#' @returns A comprehensive haplotyping S3 object (HapObject) for each provided
#' epsilon value, needed for clustree_viz() and crosshap_viz().
run_hdbscan_haplotyping <- function(vcf, LD, pheno, MGmin, minHap = 5, hetmiss_as = 'allele', metadata = NULL, keep_outliers = FALSE){
bin_vcf <- dplyr::select(vcf, -c(1,2,4:9)) %>% tibble::column_to_rownames('ID') %>%
dplyr::mutate_all(function(x){base::ifelse(x=='0|0',0,
base::ifelse(x=='1|0'|x=='0|1',1,
base::ifelse(x=='1|1',2,
switch(hetmiss_as, "allele" = base::ifelse(x=='1|.'|x=='.|1',1,
base::ifelse(x=='0|.'|x=='.|0',0,NA)),
"miss" = NA))))})
#Run HDBscan on LD matrix
base::message(paste0("Clustering SNPs into marker groups"))
db40 <- dbscan::hdbscan(LD, minPts = MGmin)
preMGfile <- tibble::tibble(ID=rownames(LD),cluster=db40$cluster) %>%
dplyr::left_join(dplyr::select(vcf, 2:3), by = "ID")
##Identify haplotype frequencies for different marker group combinations
base::message(paste0("Determining haplotypes from marker group clusters"))
phaps_out <- pseudo_haps(preMGfile = preMGfile, bin_vcf = bin_vcf, minHap = minHap, LD = LD, keep_outliers = keep_outliers)
##Build summary object with all relevant haplotyping information
base::message(paste0("Collating haplotype information"))
Varfile <- tagphenos(MGfile = phaps_out$MGfile, bin_vcf, pheno)
HDBHapObject <- base::list(MGmin = db40$minPts,
Hapfile = phaps_out$Hapfile,
Indfile = if(missing(metadata)|is.null(metadata)){
dplyr::left_join(phaps_out$nophenIndfile, pheno, by = "Ind") %>% dplyr::mutate(Metadata = as.character(NA))
}else {
dplyr::left_join(phaps_out$nophenIndfile, pheno, by = "Ind") %>% dplyr::left_join(metadata, by = "Ind")
},
Varfile = Varfile,
MGfile = phaps_out$MGfile,
epsilon = 1)
HDBHapList <- list(HDBHapObject)
return(HDBHapList)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.