tests/testthat/test-vcf.R

context("Test read from VCF file")
outFile <- system.file("tests/correct.Rdata", package = "seqminer")

sysname <- Sys.info()[['sysname']]
systemTestable <- FALSE
if (! sysname %in%  c("Linux", "Windows", "Darwin")) {
  cat("Skip unit-testing tabix, probably your system ", sysname, " is big endian!\n")
} else {
  systemTestable <- TRUE
}

if (file.exists(outFile) && systemTestable) {
  load(outFile, verbose = TRUE)

  test_that("readVCFToMatrixByRange", {
    cat("--------------- test readVCFToMatrixByRange ---------------\n")
    fileName <- system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")

    test.cfh <- readVCFToMatrixByRange(fileName, "1:196621007-196716634", "Nonsynonymous")
    expect_equal(test.cfh, cfh.range)

    test.cfh.nonsyn <- readVCFToMatrixByRange(fileName, "1:196621007-196642234,1:196642235-196716634", "Nonsynonymous")
    expect_equal(test.cfh.nonsyn, cfh.nonsyn)

    test.cfh.nonsyn.2 <- readVCFToMatrixByRange(fileName, c("1:196621007-196642234", "1:196642235-196716634"), "Nonsynonymous")
    expect_equal(test.cfh.nonsyn, cfh.nonsyn)

    test.cfh.syn <- readVCFToMatrixByRange(fileName, "1:196621007-196716634", "Synonymous")
    expect_equal(test.cfh.syn, cfh.syn)
  })

  test_that("test readVCFToMatrixByGene", {
    ## Extract genotypes
    fileName <- system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    geneFile <- system.file("vcf/refFlat_hg19_6col.txt.gz", package = "seqminer")

    test.cfh.syn.2 <- readVCFToMatrixByGene(fileName, geneFile, "CFH", "Synonymous")
    expect_equal(test.cfh.syn.2, cfh.syn.2)

    test.apoe <- readVCFToMatrixByGene(fileName, geneFile, "APOE", "")
    expect_equal(test.apoe, apoe)

    expect_warning(ssss <- readVCFToMatrixByGene(fileName, geneFile, "ssss", ""))
  })

  test_that("test VCFToListByGene", {
    fileName <- system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    geneFile <- system.file("vcf/refFlat_hg19_6col.txt.gz", package = "seqminer")
    test.out.gene.1 <- readVCFToListByGene(fileName, geneFile, "CFH", "Synonymous", c("CHROM", "POS", "ID"), "", "")
    expect_equal(test.out.gene.1, out.gene.1)

    test.out.gene.2 <- readVCFToListByGene(fileName, geneFile, "CFH", "Synonymous", c("CHROM","ID"), c("AC","AN"), "")
    expect_equal(test.out.gene.2, out.gene.2)

    test.out.gene.3 <- readVCFToListByGene(fileName, geneFile, "CFH", "Synonymous", c("CHROM","ID"), c("AC","AN"), c("GT","GQ"))
    expect_equal(test.out.gene.3, out.gene.3)
  })

  test_that("test VCFToListByRange", {
    fileName <- system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    test.out.range.1 <- readVCFToListByRange(fileName, "1:196621007-196716634", "Synonymous", c("CHROM","ID", "POS"), c("AC","AN"), c("GT","GQ"))
    expect_equal(test.out.range.1, out.range.1)
  })

  ## test bcf
  test_that("readVCFToMatrixByRange.BCF", {
    ## cat("--------------- test readVCFToMatrixByRange ---------------\n")
    fileName <- system.file("vcf/all.anno.filtered.extract.bcf.gz", package = "seqminer")
    test.cfh <- readVCFToMatrixByRange(fileName, "1:196621007-196716634", "Nonsynonymous")
    expect_equal(test.cfh, cfh)
  })

  ## test tabix functions
  test_that("tabix", {
    fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    snp <- tabix.read(fileName, "1:196623337-196632470")
    expect_equal(class(snp), "character")
    expect_equal(length(snp), 3)
  })

  test_that("tabix", {
    fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    snp <- tabix.read(fileName, "1:196633607-196633607")
    expect_equal(class(snp), "character")
    expect_equal(length(snp), 0)
  })
  test_that("tabix.read.table", {
    fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    snp <- tabix.read.table(fileName, "1:196623337-196632470")
    expect_equal(class(snp), "data.frame")
    expect_equal(nrow(snp), 3)
    expect_equal(ncol(snp), 12)
  })
  test_that("tabix.read.table", {
    fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    snp <- tabix.read.table(fileName, "1:196633607-196633607")
    expect_equal(class(snp), "data.frame")
    expect_equal(nrow(snp), 0)
    expect_equal(ncol(snp), 0)
  })
  test_that("readBGENToMatrixByRange", {
    fileName = system.file("bgen/all.anno.filtered.extract.bgen", package = "seqminer")
    cfh <- readBGENToMatrixByRange(fileName, "1:196621007-196716634")
    expect_length(cfh , 1)
    expect_equivalent(cfh , cfh.all)   ## compare value not dimnames
  })
  test_that("readBGENToMatrixByGene", {
    fileName = system.file("bgen/all.anno.filtered.extract.bgen", package = "seqminer")
    geneFile = system.file("vcf/refFlat_hg19_6col.txt.gz", package = "seqminer")
    cfh <- readBGENToMatrixByGene(fileName, geneFile, "CFH")
    expect_length(cfh , 1)
    expect_equivalent(cfh , cfh.all)   ## compare value not dimnames
  })

  test_that("readBGENToListByRange", {
    fileName = system.file("bgen/all.anno.filtered.extract.bgen", package = "seqminer")
    cfh <- readBGENToListByRange(fileName, "1:196621007-196716634")
    expect_length(cfh, 8)
  })

  test_that("readBGENToListByGene", {
    fileName = system.file("bgen/all.anno.filtered.extract.bgen", package = "seqminer")
    geneFile = system.file("vcf/refFlat_hg19_6col.txt.gz", package = "seqminer")
    cfh <- readBGENToListByGene(fileName, geneFile, "CFH")
    expect_length(cfh, 8)
  })

  test_that("readPlinkToMatrixByIndex", {
    fileName = system.file("plink/all.anno.filtered.extract.bed", package = "seqminer")
    fileName = sub(fileName, pattern = ".bed", replacement = "")
    sampleIndex = seq(3)
    markerIndex =c(14, 36) ## position 1:196642233 and 1:196659237
    cfh <- readPlinkToMatrixByIndex(fileName, sampleIndex, markerIndex)
    expect_equal(t(cfh), cfh.range[[1]])
  })

  test_that("openPlink", {
    cat("--------------- test openPlink ---------------\n")
    fileName = system.file("plink/all.anno.filtered.extract.bed", package = "seqminer")
    fileName = sub(fileName, pattern = ".bed", replacement = "")
    plinkObj <- openPlink(fileName)
    expect_length(plinkObj, 3)
  })

  test_that("readPlinkBySubscript", {
    cat("--------------- test readPlinkBySubscript ---------------\n")
    fileName = system.file("plink/all.anno.filtered.extract.bed", package = "seqminer")
    filePrefix = sub(fileName, pattern = ".bed", replacement = "")
    plinkObj = openPlink(filePrefix)
    sampleIndex = seq(3)
    markerIndex =c(14, 36)
    cfh <- plinkObj[sampleIndex, markerIndex]
    expect_equal(t(cfh), cfh.range[[1]])
  })

  test_that("readSingleChromosomeVCFToMatrixByRange", {
    cat("--------------- test readSingleChromosomeVCFToMatrixByRange ---------------\n")
    fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    cfh <- readSingleChromosomeVCFToMatrixByRange(fileName, "1:196621007-196716634")
    expect_equivalent(cfh[[1]], t(cfh.all[[1]]))
  })

  test_that("createSingleChromosomeVCFIndex", {
    cat("--------------- test createSingleChromosomeVCFIndex ---------------\n")
    fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
    cfh <- createSingleChromosomeVCFIndex(fileName, indexFileName = tempfile())
    expect(nchar(cfh) > 0)
  })

  test_that("readSingleChromosomeBCFToMatrixByRange", {
    cat("--------------- test readSingleChromosomeBCFToMatrixByRange ---------------\n")
    fileName = system.file("vcf/all.anno.filtered.extract.headerFixed.bcf.gz", package = "seqminer")
    cfh <- readSingleChromosomeBCFToMatrixByRange(fileName, "1:196621007-196716634")
    expect_equivalent(cfh[[1]], t(cfh.all[[1]]))
  })

  test_that("createSingleChromosomeBCFIndex", {
    cat("--------------- test createSingleChromosomeBCFIndex ---------------\n")
    fileName = system.file("vcf/all.anno.filtered.extract.headerFixed.bcf.gz", package = "seqminer")
    cfh <- createSingleChromosomeBCFIndex(fileName, indexFileName = tempfile())
    expect(nchar(cfh) > 0)
  })
}

if (FALSE) {
  ##  code that generate the correct output in a .Rdata file
  rm(list=ls())

  setwd("/net/fantasia/home/zhanxw/mycode/seqminer/seqminer/inst/tests")
  outFile <- "correct.Rdata"

  suppressPackageStartupMessages(library(seqminer))
  fileName = system.file("vcf/all.anno.filtered.extract.vcf.gz", package = "seqminer")
  geneFile = system.file("vcf/refFlat_hg19_6col.txt.gz", package = "seqminer")

  cat("--------------- test readVCFToMatrixByRange ---------------\n")
  try(cfh <- readVCFToMatrixByRange(fileName, "1:196621007-196716634", "Nonsynonymous"))
  try(cfh.nonsyn <- readVCFToMatrixByRange(fileName, "1:196621007-196642234,1:196642235-196716634", "Nonsynonymous"))
  try(cfh.nonsyn.2 <- readVCFToMatrixByRange(fileName, c("1:196621007-196642234", "1:196642235-196716634"), "Nonsynonymous"))
  try(cfh.syn <- readVCFToMatrixByRange(fileName, "1:196621007-196716634", "Synonymous"))

  cat("--------------- test readVCFToMatrixByGene ---------------\n")
  ## Extract genotypes
  try(cfh.syn.2 <- readVCFToMatrixByGene(fileName, geneFile, "CFH", "Synonymous"))
  try(apoe <- readVCFToMatrixByGene(fileName, geneFile, "APOE", ""))
  try(ssss <- readVCFToMatrixByGene(fileName, geneFile, "ssss", ""))

  ## Another way to extract from VCF File
  cat("--------------- test VCFToListByGene ---------------\n")
  try(out.gene.1 <- readVCFToListByGene(fileName, geneFile, "CFH", "Synonymous", c("CHROM", "POS", "ID"), "", ""))
  try(out.gene.2 <- readVCFToListByGene(fileName, geneFile, "CFH", "Synonymous", c("CHROM","ID"), c("AC","AN"), ""))
  try(out.gene.3 <- readVCFToListByGene(fileName, geneFile, "CFH", "Synonymous", c("CHROM","ID"), c("AC","AN"), c("GT","GQ")))
  ##print (t)

  cat("--------------- test VCFToListByRange ---------------\n")
  try(out.range.1 <- readVCFToListByRange(fileName, "1:196621007-196716634", "Synonymous", c("CHROM","ID", "POS"), c("AC","AN"), c("GT","GQ")))
  ## print(t)

  save.image(file = outFile)
  ## load(outFile)
}

Try the seqminer package in your browser

Any scripts or data that you put into this service are public.

seqminer documentation built on Sept. 2, 2023, 9:08 a.m.