R/makeVCFFromGA4GHResponse.R

Defines functions makeVCFFromGA4GHResponse

Documented in makeVCFFromGA4GHResponse

#' @title makeVCFFromGA4GHResponse function
#' @description Convert \code{DataFrame} output from
#' \code{\link{searchVariants}} and \code{\link{getVariant}} functions to
#' \code{\link{VCF}} class.
#' @param variants \code{DataFrame} generated by \code{\link{searchVariants}}.
#' @return \code{\link{VCF}} object.
#' @examples
#' host <- "http://1kgenomes.ga4gh.org/"
#' \dontrun{
#' datasetId <- searchDatasets(host, nrows = 1)$id
#' variantSetId <- searchVariantSets(host, datasetId, nrows = 1)$id
#' variants <- searchVariants(host, variantSetId, referenceName = "1",
#'     start = 15000, end = 16000)
#' variants
#'
#' makeVCFFromGA4GHResponse(variants)
#' }
#' @seealso \code{\link{searchVariants}}, \code{\link{getVariant}},
#' \code{\link{VCF}}, \code{\link{DataFrame}}
#' @export makeVCFFromGA4GHResponse
makeVCFFromGA4GHResponse <- function(variants)
{
    rowRanges <- makeGRangesFromDataFrame(variants,
        seqnames.field = "referenceName")
    rowRanges$ID <- variants$id
    fixed <- DataFrame(REF = DNAStringSet(variants$referenceBases),
        ALT = CharacterList(as.list(variants$alternateBases)))
    info.idx <- startsWith(names(variants), "info.")
    info <- DataFrame(variants[, info.idx], row.names = seq_along(rowRanges))
    names(info) <- sub("^info.", "", names(info))

    if (!all(lengths(variants$calls) == 0)) {
        colData <- DataFrame(row.names = variants$calls[[1]]$callSetName)
        geno <- lapply(bind_rows(lapply(variants$calls, getGeno)), function(x) {
            mat <- matrix(x, ncol = nrow(colData))
            is.na(mat) <- mat == "NULL" | mat == "." | mat == "None"
            mat
        })
        VCF(rowRanges = rowRanges, colData = colData, fixed = fixed,
            info = info, geno = geno)
    } else {
        VCF(rowRanges = rowRanges, fixed = fixed, info = info)
    }
}
labbcb/GA4GHclient documentation built on May 20, 2019, 7:32 p.m.