#' Extract genotype matrix from a dosage file generated by bcftools
#'
#' @param chr chromosome of the genomic region
#' @param start start coordinate of the genomic region
#' @param end end coordinate of the genomic region
#' @param dosage_file Path to the tabix-indexed dosage file (first four columns need to be CHROM, POS, REF, ALT)
#'
#' @return genotype matrix
#' @export
extractGenotypeMatrixFromDosage <- function(chr, start, end, dosage_file){
#Make a GRanges object
granges = GenomicRanges::GRanges(seqnames = chr, ranges = IRanges::IRanges(start = start, end = end))
#Extract header
header = as.character(read.table(dosage_file, nrows = 1, stringsAsFactors = F))
coltypes = paste0("cccc", paste(rep("d",length(header)-4), collapse = ""))
#Extract genotypes
gt_df = eQTLUtils::scanTabixDataFrame(dosage_file, granges, col_names = header, col_types = coltypes)[[1]]
#Construct variant ids
id_df = dplyr::select(gt_df, CHROM, POS, REF, ALT) %>%
dplyr::mutate(variant_id = paste(paste0("chr", CHROM), POS, REF, ALT, sep = "_"))
#Make genotype matrix
gt_matrix = gt_df[,-c(1:4)] %>% as.matrix()
row.names(gt_matrix) = id_df$variant_id
return(gt_matrix)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.