R/unrelatedIndividuals.R

setMethod("unrelatedIndividuals", signature(param="VariantFilteringParam"),
          function(param, svparam=ScanVcfParam(), BPPARAM=bpparam("SerialParam")) {

  ## store call for reproducing it later
  callobj <- match.call()
  callstr <- gsub(".local", "unrelatedIndividuals", deparse(callobj))

  ## fetch necessary parameters
  vcfFiles <- param$vcfFiles
  seqInfos <- param$seqInfos
  txdb <- get(param$txdb)
  bsgenome <- get(param$bsgenome)
  sampleNames <- param$sampleNames
  
  if (!exists(as.character(substitute(BPPARAM))))
    stop(sprintf("Parallel back-end function %s given in argument 'BPPARAM' does not exist in the current workspace. Either you did not write correctly the function name or you did not load the package 'BiocParallel'.", as.character(substitute(BPPARAM))))
  
  if (length(vcfFiles) > 1) {
    stop("More than one input VCF file is currently not supported. Please either merge the VCF files into a single one with vcftools, do the variant calling simultaneously on all samples, or proceed analyzing each file separately.")
  } else if (length(vcfFiles) < 1)
    stop("A minimum of 1 vcf file has to be provided")

  annotationCache <- new.env() ## cache annotations when using VariantAnnotation::locateVariants()
  annotated_variants <- VRanges()
  metadata(mcols(annotated_variants)) <- list(cutoffs=CutoffsList(), sortings=CutoffsList())

  open(vcfFiles[[1]])
  n.var <- 0
  flag <- TRUE
  while(flag && nrow(vcf <- readVcf(vcfFiles[[1]], genome=seqInfos[[1]], param=svparam))) {

    # insert an index for each variant in the VCF file
    info(header(vcf)) <- rbind(info(header(vcf)),
                               DataFrame(Number=1, Type="Integer",
                                         Description="Variant index in the VCF file.",
                                         row.names="VCFIDX"))
    info(vcf)$VCFIDX <- (n.var+1):(n.var+nrow(vcf))
    varIDs <- rownames(vcf)

    n.var <- n.var + nrow(vcf)

    ## coerce the VCF object to a VRanges object
    variants <- as(vcf, "VRanges")

    ## since the conversion of VCF to VRanges strips the VCF ID field, let's put it back
    variants$VARID <- varIDs[variants$VCFIDX]

    ## harmonize Seqinfo data between variants, annotations and reference genome
    variants <- .matchSeqinfo(variants, txdb, bsgenome)

    ## annotate variants
    if (length(annotated_variants) > 0)
      annotated_variants <- c(annotated_variants, annotationEngine(variants, param, annotationCache,
                                                                   BPPARAM=BPPARAM))
    else
      annotated_variants <- annotationEngine(variants, param, annotationCache,
                                             BPPARAM=BPPARAM)

    if (length(vcfWhich(svparam)) > 0) ## temporary fix to keep this looping
      flag <- FALSE                    ## structure with access through genomic ranges

    message(sprintf("%d variants processed", n.var))
  }
  close(vcfFiles[[1]])

  gSO <- annotateSO(annotated_variants, sog(param))
  annotated_variants <- addSOmetadata(annotated_variants)

  if (length(annotated_variants) == 0)
    warning("The input VCF file has no variants.")

  annoGroups <- list()
  if (!is.null(mcols(mcols(annotated_variants))$TAB)) {
    mask <- !is.na(mcols(mcols(annotated_variants))$TAB)
    annoGroups <- split(colnames(mcols(annotated_variants))[mask],
                      mcols(mcols(annotated_variants))$TAB[mask])
  }

  ## add functional annotation filters and cutoffs generated by the annotation engine
  funFilters <- FilterRules(lapply(metadata(mcols(annotated_variants))$filters,
                                   function(f) { environment(f) <- baseenv() ; f}))
  active(funFilters) <- FALSE ## by default, functional annotation filters are inactive
  flt <- c(filters(param), funFilters)
  fltMd <- rbind(filtersMetadata(param),
                 DataFrame(Description=sapply(metadata(mcols(annotated_variants))$filters,
                                              attr, "description"),
                           AnnoGroup=sapply(metadata(mcols(annotated_variants))$filters,
                                            attr, "TAB")))
  cutoffs <- metadata(mcols(annotated_variants))$cutoffs
  sortings <- metadata(mcols(annotated_variants))$sortings
  bsgenome <- get(param$bsgenome)

  new("VariantFilteringResults", callObj=callobj, callStr=callstr,
      genomeDescription=as(bsgenome, "GenomeDescription"), inputParameters=param,
      activeSamples=sampleNames, inheritanceModel="unrelated individuals",
      variants=annotated_variants, bamViews=BamViews(), gSO=gSO, filters=flt,
      filtersMetadata=fltMd, cutoffs=cutoffs, sortings=sortings, annoGroups=annoGroups,
      minScore5ss=NA_real_, minScore3ss=NA_real_)
})

Try the VariantFiltering package in your browser

Any scripts or data that you put into this service are public.

VariantFiltering documentation built on Nov. 8, 2020, 7:25 p.m.