R/get_vcf_sample_ids.R

Defines functions get_vcf_sample_ids

Documented in get_vcf_sample_ids

#' Get VCF sample ID(s)
#'
#' @inheritParams format_sumstats
#' @importFrom VariantAnnotation scanVcfHeader
#' @return sample_id
#' @keywords internal
get_vcf_sample_ids <- function(path) { 
    header <- VariantAnnotation::scanVcfHeader(file = path)
    sample_id <- tryCatch(expr = {header@samples},
                          error=function(e){NULL})
    if (is.null(sample_id)) {
        header <- readLines(path, 100)
        sample_id <- header[grepl("^##SAMPLE", header)] # gets ##SAMPLE
        if (length(sample_id) == 0) {
            ### Try again with more rows
            header <- readLines(path, 500)
            sample_id <- header[grepl("^##SAMPLE", header)] # gets ##SAMPLE
        }
        # get rid of everything after ID
        sample_id <- gsub(",.*$", "", sample_id)
        # rmv ##SAMPLE=
        sample_id <- base::substr(sample_id, 10, nchar(sample_id))
        # rmv things before equals
        sample_id <- sub(".+=(.+)", "\\1", sample_id)
    }
    if (length(sample_id) == 0) {
        message(
            "No ##SAMPLE row found.",
            "Will infer sample name(s) from data colnames."
        )
        return(NULL)
    } else {
        return(sample_id)
    }
}
neurogenomics/MungeSumstats documentation built on July 17, 2024, 3:14 p.m.