tests/testthat/test_coxmeg_gds.R

context("test gds function")

.testPheno <- function(sample.id, seed=25) {
    n <- length(sample.id)
    set.seed(seed)
    time <- rnorm(n, mean=100, sd=10)
    set.seed(seed)
    status <- rbinom(n, 1, 0.4)
    data.frame(family=sample.id, sample.id,
               time, status, stringsAsFactors=FALSE)
}

.testSparseMatrix <- function(n, n_blocks) {
    n_f <- ceiling(n/n_blocks)
    mat_list <- list()
    size <- rep(n_blocks,n_f)
    offd <- 0.5
    for(i in 1:n_f){
        mat_list[[i]] <- matrix(offd,size[i],size[i])
        diag(mat_list[[i]]) <- 1
    }
    sigma <- as.matrix(bdiag(mat_list))
    sigma <- sigma[1:n, 1:n]
    sigma <- as(sigma,'dgCMatrix')
    return(sigma)
}

test_that("coxmeg_gds matches coxmeg_plink", {
    bed = system.file("extdata", "example_null.bed", package = "coxmeg")
    bed = substr(bed,1,nchar(bed)-4)
    pheno.file = system.file("extdata", "ex_pheno.txt", package = "coxmeg")
    cov.file = system.file("extdata", "ex_cov.txt", package = "coxmeg")
    
    ## building a relatedness matrix
    sigma <- .testSparseMatrix(n=3000, n_blocks=5)
    
    re.plink = coxmeg_plink(pheno.file,sigma,type='bd',bed=bed,tmp_dir=tempdir(),cov_file=cov.file, verbose=FALSE)
    
    gdsfile <- tempfile()
    SNPRelate::snpgdsBED2GDS(bed.fn=paste0(bed,".bed"), fam.fn=paste0(bed,".fam"), bim.fn=paste0(bed,".bim"),
                             out.gdsfn=gdsfile, verbose=FALSE)
    gds <- SNPRelate::snpgdsOpen(gdsfile)
    pheno <- read.table(pheno.file, header=FALSE, as.is=TRUE, na.strings="-9")
    cov <- read.table(cov.file, header=FALSE, as.is=TRUE)
    
    re.gds <- coxmeg_gds(gds,pheno,sigma,type='bd',cov=cov,verbose=FALSE)
    expect_equal(re.plink, re.gds, tolerance=1e-5)
    
    SNPRelate::snpgdsClose(gds)
    unlink(gdsfile)
})


test_that("SNPRelate and SeqArray methods match", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    snpfile <- SNPRelate::snpgdsExampleFileName()
    seqfile <- tempfile()
    SeqArray::seqSNP2GDS(snpfile, seqfile, verbose=FALSE)
    
    snp <- SNPRelate::snpgdsOpen(snpfile)
    seq <- SeqArray::seqOpen(seqfile)
    
    snpsel <- .gdsSelectSNP(snp, maf=0.01, missing.rate=0.01, verbose=FALSE)
    seqsel <- .gdsSelectSNP(seq, maf=0.01, missing.rate=0.01, verbose=FALSE)
    expect_equal(snpsel, seqsel)
    
    seqResetFilter(seq, verbose=FALSE)
    sample.id <- seqGetData(seq, "sample.id")[1:50]
    snp.id <- seqGetData(seq, "variant.id")[1:100]
    snpref <- substr(gdsfmt::read.gdsn(gdsfmt::index.gdsn(snp, "snp.allele")), 1, 1)
    seqref <- seqGetData(seq, "$ref")
    allele.swap <- (snpref != seqref)
    swap.sel <- allele.swap[1:100]
    snpgeno <- .gdsGetGeno(snp, sample.id=sample.id, snp.id=snp.id, verbose=FALSE)
    seqgeno <- .gdsGetGeno(seq, sample.id=sample.id, snp.id=snp.id, verbose=FALSE)
    expect_equivalent(snpgeno[,!swap.sel], seqgeno[,!swap.sel])
    expect_equivalent(snpgeno[,swap.sel], 2-seqgeno[,swap.sel])
    
    snplist <- .gdsSNPList(snp)
    seqlist <- .gdsSNPList(seq)
    snplist$chromosome <- as.character(snplist$chromosome)
    expect_equivalent(snplist[!allele.swap,], seqlist[!allele.swap,])
    expect_equivalent(snplist[allele.swap,1:3], seqlist[allele.swap,1:3])
    expect_equal(snplist$afreq[allele.swap], 1-seqlist$afreq[allele.swap])
    
    SNPRelate::snpgdsClose(snp)
    SeqArray::seqClose(seq)
    unlink(seqfile)
})


test_that("SNPRelate and SeqArray coxmeg_gds match", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    snpfile <- SNPRelate::snpgdsExampleFileName()
    seqfile <- tempfile()
    SeqArray::seqSNP2GDS(snpfile, seqfile, verbose=FALSE)
    
    snp <- SNPRelate::snpgdsOpen(snpfile)
    seq <- SeqArray::seqOpen(seqfile)
    
    sample.id <- seqGetData(seq, "sample.id")
    pheno <- .testPheno(sample.id, seed=5)
    
    # covariance matrix
    #covmat <- SNPRelate::snpgdsGRM(snp, verbose=FALSE)
    #sigma <- covmat$grm
    sigma <- .testSparseMatrix(n=nrow(pheno), n_blocks=5)
    
    # set high MAF so test runs faster
    re.snp <- coxmeg_gds(snp, pheno, sigma, type='bd', maf=0.47, verbose=FALSE)
    re.seq <- coxmeg_gds(seq, pheno, sigma, type='bd', maf=0.47, verbose=FALSE)
    allele.swap <- re.snp$summary$allele != re.seq$summary$allele
    expect_equal(re.snp$summary$beta[!allele.swap], re.seq$summary$beta[!allele.swap], tolerance=1e-4)
    expect_equal(re.snp$summary$beta[allele.swap], -re.seq$summary$beta[allele.swap], tolerance=1e-4)
    expect_equal(re.snp$summary$HR[!allele.swap], re.seq$summary$HR[!allele.swap], tolerance=1e-4)
    expect_equal(re.snp$summary$HR[allele.swap], 1/re.seq$summary$HR[allele.swap], tolerance=1e-4)
    expect_equal(re.snp$summary$sd_beta, re.seq$summary$sd_beta, tolerance=1e-4)
    expect_equal(re.snp$summary$p, re.seq$summary$p, tolerance=1e-4)
    
    SNPRelate::snpgdsClose(snp)
    SeqArray::seqClose(seq)
    unlink(seqfile)
})


test_that("SeqArray utils respect variant filters", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    gdsfile <- seqExampleFileName()
    gds <- seqOpen(gdsfile)
    
    x <- .gdsSelectSNP(gds, maf=0.01, verbose=FALSE)
    seqSetFilter(gds, variant.sel=1:500, verbose=FALSE)
    x2 <- .gdsSelectSNP(gds, maf=0.01, verbose=FALSE)
    expect_true(length(x) > length(x2))
    
    # with sample.id
    seqResetFilter(gds, verbose=FALSE)
    samp <- seqGetData(gds, "sample.id")[1:50]
    x <- .gdsSelectSNP(gds, sample.id=samp, maf=0.01, verbose=FALSE)
    seqSetFilter(gds, variant.sel=1:500, verbose=FALSE)
    x2 <- .gdsSelectSNP(gds, sample.id=samp, maf=0.01, verbose=FALSE)
    expect_true(length(x) > length(x2))
    
    seqClose(gds)  
})


test_that("SeqArray method respects sample filters", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    gdsfile <- SeqArray::seqExampleFileName()
    gds <- SeqArray::seqOpen(gdsfile)
    
    sample.id <- SeqArray::seqGetData(gds, "sample.id")
    pheno <- .testPheno(sample.id, seed=7)
    sigma <- .testSparseMatrix(n=nrow(pheno), n_blocks=5)
    
    SeqArray::seqSetFilter(gds, sample.id=sample.id[1:50], variant.sel=1:1000, verbose=FALSE)
    re <- coxmeg_gds(gds, pheno, sigma, type='bd', verbose=FALSE)
    expect_true(re$nsam <= 50)
    
    SeqArray::seqClose(gds)  
})


test_that("SNPRelate method subsets to samples in GDS", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    snpfile <- SNPRelate::snpgdsExampleFileName()
    gds <- SNPRelate::snpgdsOpen(snpfile)
    
    sample.id <- SNPRelate::snpgdsSummary(gds, show=FALSE)$sample.id
    sample.add <- c(sample.id, letters)
    pheno <- .testPheno(sample.add, seed=9)
    sigma <- .testSparseMatrix(n=nrow(pheno), n_blocks=7)
    
    re <- coxmeg_gds(gds, pheno, sigma, type='bd', maf=0.47, verbose=FALSE)
    expect_true(re$nsam <= length(sample.id))
    
    SNPRelate::snpgdsClose(gds)
})


test_that("SeqArray method selects snp.id", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    gdsfile <- SeqArray::seqExampleFileName()
    gds <- SeqArray::seqOpen(gdsfile)
    
    sample.id <- SeqArray::seqGetData(gds, "sample.id")
    pheno <- .testPheno(sample.id, seed=7)
    sigma <- .testSparseMatrix(n=nrow(pheno), n_blocks=5)
    
    snp.id <- SeqArray::seqGetData(gds, "variant.id")[1:100]
    re <- coxmeg_gds(gds, pheno, sigma, type='bd', snp.id=snp.id, maf=0, verbose=FALSE)
    expect_true(nrow(re$summary) <= 100)
    
    SeqArray::seqClose(gds)
})


test_that("SNPRelate method selects snp.id", {
    gdsfmt::showfile.gds(closeall=TRUE, verbose=FALSE)
    snpfile <- SNPRelate::snpgdsExampleFileName()
    gds <- SNPRelate::snpgdsOpen(snpfile)
    
    gdssum <- SNPRelate::snpgdsSummary(gds, show=FALSE)
    pheno <- .testPheno(gdssum$sample.id, seed=9)
    sigma <- .testSparseMatrix(n=nrow(pheno), n_blocks=7)
    
    snp.id <- gdssum$snp.id[1:100]
    re <- coxmeg_gds(gds, pheno, sigma, type='bd', snp.id=snp.id, maf=0, verbose=FALSE)
    expect_true(nrow(re$summary) <= 100)
    
    SNPRelate::snpgdsClose(gds)
})

Try the coxmeg package in your browser

Any scripts or data that you put into this service are public.

coxmeg documentation built on Jan. 13, 2023, 5:07 p.m.