tests/testthat/test-genio.R

# need to load it explicitly for some tests
library(tibble)
library(dplyr)

context("test-genio")

# all examples have 10 rows by construction, hardcode for tests here
n_rows <- 10

test_that("add_ext works", {
    # test that there are errors when crucial data is missing
    expect_error(add_ext()) # all is missing
    expect_error(add_ext('file')) # ext is missing
    expect_error(add_ext(ext='txt')) # file is missing
    
    # create a scenario where we know if the desired extension is already there or not
    ext <- 'bim'
    foExtN <- 'file-that-does-not-exist'
    foExtY <- paste0(foExtN, '.', ext)
    # test that missing extension got added correctly
    expect_equal(foExtY, add_ext(foExtN, ext))
    # test that present extension doesn't get added again
    expect_equal(foExtY, add_ext(foExtY, ext))
    # test that an NA extension doesn't add anything
    expect_equal(foExtN, add_ext(foExtN, NA))
})

test_that("real_path works", {
    # test that there are errors when crucial data is missing
    expect_error(real_path()) # all is missing
    expect_error(real_path('file')) # ext is missing
    expect_error(real_path(ext='txt')) # file is missing
    
    # function returns input when file does not exist
    fi <- 'file-that-does-not-exist'
    expect_equal(fi, real_path(fi, 'fam'))

    # test with real file (uncompressed)
    # file path of interest
    fi <- system.file("extdata", 'sample.fam', package = "genio", mustWork = TRUE)
    # when its correctly specified, real_path does not change it!
    expect_equal(fi, real_path(fi, 'fam'))
    # now we omit the extension, will still work
    fiNoExt <- sub('\\.fam$', '', fi)
    expect_equal(fi, real_path(fiNoExt, 'fam'))
    
    # repeat with a compressed file
    # file path of interest
    fi <- system.file("extdata", 'sample2.fam.gz', package = "genio", mustWork = TRUE)
    # when its correctly specified, real_path does not change it!
    expect_equal(fi, real_path(fi, 'fam'))
    # now we omit the .gz extension, will still work
    fiNoGz <- sub('\\.gz$', '', fi)
    expect_equal(fi, real_path(fiNoGz, 'fam'))
    # now we omit the .fam extension too, will still work
    fiNoGzNoExt <- sub('\\.fam$', '', fiNoGz)
    expect_equal(fi, real_path(fiNoGzNoExt, 'fam'))
    
})

test_that("read_fam works", {
    # test that there are errors when crucial data is missing
    expect_error(read_fam()) # file is missing
    expect_error(read_fam('bogus-file')) # file is non-existent (read_table2 will complain)
    
    # load sample file
    fi <- system.file("extdata", 'sample.fam', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    fam <- read_fam(fi)
    # test that number of columns is as expected
    expect_equal(ncol(fam), length(fam_names))
    # test that names are in right order too
    expect_equal(names(fam), fam_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(fam), n_rows)

    # repeat with missing extension
    fiNoExt <- sub('\\.fam$', '', fi)
    # this should just work (no "expect" test)
    fam <- read_fam(fiNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(fam), length(fam_names))
    # test that names are in right order too
    expect_equal(names(fam), fam_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(fam), n_rows)

    # repeat with compressed file (and true full path)
    fi <- system.file("extdata", 'sample2.fam.gz', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    fam <- read_fam(fi)
    # test that number of columns is as expected
    expect_equal(ncol(fam), length(fam_names))
    # test that names are in right order too
    expect_equal(names(fam), fam_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(fam), n_rows)
    
    # repeat with missing .gz extension
    fiNoGz <- sub('\\.gz$', '', fi)
    # this should just work (no "expect" test)
    fam <- read_fam(fiNoGz)
    # test that number of columns is as expected
    expect_equal(ncol(fam), length(fam_names))
    # test that names are in right order too
    expect_equal(names(fam), fam_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(fam), n_rows)

    # repeat with missing .fam.gz double extension
    fiNoGzNoExt <- sub('\\.fam$', '', fiNoGz)
    # this should just work (no "expect" test)
    fam <- read_fam(fiNoGzNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(fam), length(fam_names))
    # test that names are in right order too
    expect_equal(names(fam), fam_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(fam), n_rows)

})

test_that("read_phen works", {
    # test that there are errors when crucial data is missing
    expect_error(read_phen()) # file is missing
    expect_error(read_phen('bogus-file')) # file is non-existent (read_table2 will complain)
    
    # load sample file
    fi <- system.file("extdata", 'sample.phen', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    phen <- read_phen(fi)
    # test that number of columns is as expected
    expect_equal(ncol(phen), length(phen_names))
    # test that names are in right order too
    expect_equal(names(phen), phen_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(phen), n_rows)

    # repeat with missing extension
    fiNoExt <- sub('\\.phen$', '', fi)
    # this should just work (no "expect" test)
    phen <- read_phen(fiNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(phen), length(phen_names))
    # test that names are in right order too
    expect_equal(names(phen), phen_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(phen), n_rows)

    # repeat with compressed file (and true full path)
    fi <- system.file("extdata", 'sample2.phen.gz', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    phen <- read_phen(fi)
    # test that number of columns is as expected
    expect_equal(ncol(phen), length(phen_names))
    # test that names are in right order too
    expect_equal(names(phen), phen_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(phen), n_rows)
    
    # repeat with missing .gz extension
    fiNoGz <- sub('\\.gz$', '', fi)
    # this should just work (no "expect" test)
    phen <- read_phen(fiNoGz)
    # test that number of columns is as expected
    expect_equal(ncol(phen), length(phen_names))
    # test that names are in right order too
    expect_equal(names(phen), phen_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(phen), n_rows)

    # repeat with missing .phen.gz double extension
    fiNoGzNoExt <- sub('\\.phen$', '', fiNoGz)
    # this should just work (no "expect" test)
    phen <- read_phen(fiNoGzNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(phen), length(phen_names))
    # test that names are in right order too
    expect_equal(names(phen), phen_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(phen), n_rows)

})

test_that("read_bim works", {
    # test that there are errors when crucial data is missing
    expect_error(read_bim()) # file is missing
    expect_error(read_bim('bogus-file')) # file is non-existent (read_table2 will complain)
    
    # load sample file
    fi <- system.file("extdata", 'sample.bim', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    bim <- read_bim(fi)
    # test that number of columns is as expected
    expect_equal(ncol(bim), length(bim_names))
    # test that names are in right order too
    expect_equal(names(bim), bim_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(bim), n_rows)

    # repeat with missing extension
    fiNoExt <- sub('\\.bim$', '', fi)
    # this should just work (no "expect" test)
    bim <- read_bim(fiNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(bim), length(bim_names))
    # test that names are in right order too
    expect_equal(names(bim), bim_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(bim), n_rows)

    # repeat with compressed file (and true full path)
    fi <- system.file("extdata", 'sample2.bim.gz', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    bim <- read_bim(fi)
    # test that number of columns is as expected
    expect_equal(ncol(bim), length(bim_names))
    # test that names are in right order too
    expect_equal(names(bim), bim_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(bim), n_rows)
    
    # repeat with missing .gz extension
    fiNoGz <- sub('\\.gz$', '', fi)
    # this should just work (no "expect" test)
    bim <- read_bim(fiNoGz)
    # test that number of columns is as expected
    expect_equal(ncol(bim), length(bim_names))
    # test that names are in right order too
    expect_equal(names(bim), bim_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(bim), n_rows)

    # repeat with missing .bim.gz double extension
    fiNoGzNoExt <- sub('\\.bim$', '', fiNoGz)
    # this should just work (no "expect" test)
    bim <- read_bim(fiNoGzNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(bim), length(bim_names))
    # test that names are in right order too
    expect_equal(names(bim), bim_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(bim), n_rows)

})

test_that("read_ind works", {
    # test that there are errors when crucial data is missing
    expect_error(read_ind()) # file is missing
    expect_error(read_ind('bogus-file')) # file is non-existent (read_table2 will complain)
    
    # load sample file
    fi <- system.file("extdata", 'sample.ind', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    ind <- read_ind(fi)
    # test that number of columns is as expected
    expect_equal(ncol(ind), length(ind_names))
    # test that names are in right order too
    expect_equal(names(ind), ind_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(ind), n_rows)

    # repeat with missing extension
    fiNoExt <- sub('\\.ind$', '', fi)
    # this should just work (no "expect" test)
    ind <- read_ind(fiNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(ind), length(ind_names))
    # test that names are in right order too
    expect_equal(names(ind), ind_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(ind), n_rows)

    # repeat with compressed file (and true full path)
    fi <- system.file("extdata", 'sample2.ind.gz', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    ind <- read_ind(fi)
    # test that number of columns is as expected
    expect_equal(ncol(ind), length(ind_names))
    # test that names are in right order too
    expect_equal(names(ind), ind_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(ind), n_rows)
    
    # repeat with missing .gz extension
    fiNoGz <- sub('\\.gz$', '', fi)
    # this should just work (no "expect" test)
    ind <- read_ind(fiNoGz)
    # test that number of columns is as expected
    expect_equal(ncol(ind), length(ind_names))
    # test that names are in right order too
    expect_equal(names(ind), ind_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(ind), n_rows)

    # repeat with missing .ind.gz double extension
    fiNoGzNoExt <- sub('\\.ind$', '', fiNoGz)
    # this should just work (no "expect" test)
    ind <- read_ind(fiNoGzNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(ind), length(ind_names))
    # test that names are in right order too
    expect_equal(names(ind), ind_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(ind), n_rows)

})

test_that("read_snp works", {
    # test that there are errors when crucial data is missing
    expect_error(read_snp()) # file is missing
    expect_error(read_snp('bogus-file')) # file is non-existent (read_table2 will complain)
    
    # load sample file
    fi <- system.file("extdata", 'sample.snp', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    snp <- read_snp(fi)
    # test that number of columns is as expected
    expect_equal(ncol(snp), length(snp_names))
    # test that names are in right order too
    expect_equal(names(snp), snp_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(snp), n_rows)

    # repeat with missing extension
    fiNoExt <- sub('\\.snp$', '', fi)
    # this should just work (no "expect" test)
    snp <- read_snp(fiNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(snp), length(snp_names))
    # test that names are in right order too
    expect_equal(names(snp), snp_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(snp), n_rows)

    # repeat with compressed file (and true full path)
    fi <- system.file("extdata", 'sample2.snp.gz', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    snp <- read_snp(fi)
    # test that number of columns is as expected
    expect_equal(ncol(snp), length(snp_names))
    # test that names are in right order too
    expect_equal(names(snp), snp_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(snp), n_rows)
    
    # repeat with missing .gz extension
    fiNoGz <- sub('\\.gz$', '', fi)
    # this should just work (no "expect" test)
    snp <- read_snp(fiNoGz)
    # test that number of columns is as expected
    expect_equal(ncol(snp), length(snp_names))
    # test that names are in right order too
    expect_equal(names(snp), snp_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(snp), n_rows)

    # repeat with missing .snp.gz double extension
    fiNoGzNoExt <- sub('\\.snp$', '', fiNoGz)
    # this should just work (no "expect" test)
    snp <- read_snp(fiNoGzNoExt)
    # test that number of columns is as expected
    expect_equal(ncol(snp), length(snp_names))
    # test that names are in right order too
    expect_equal(names(snp), snp_names)
    # the example had n_rows lines only, check that too
    expect_equal(nrow(snp), n_rows)

})

test_that("write_fam works", {
    # test that there are errors when crucial data is missing
    expect_error(write_fam()) # all is missing
    expect_error(write_fam('file')) # tibble is missing
    expect_error(write_fam(tib=data.frame(id=1))) # file is missing (tib is incomplete too, but that gets tested downstream)
    
    # load sample file
    fi <- system.file("extdata", 'sample.fam', package = "genio", mustWork = TRUE)
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.fam')
    # this should just work (tested earlier)
    fam1 <- read_fam(fi)
    # try writing it back elsewhere
    write_fam(fo, fam1)
    # and read it back again to comare
    fam2 <- read_fam(fo)
    # compare
    expect_equal(fam1, fam2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by randomly reordering data, should automatically reorder too
    fam1_r <- fam1[, sample.int(ncol(fam1))]
    # try writing it back elsewhere
    write_fam(fo, fam1_r)
    # and read it back again to compare
    fam2 <- read_fam(fo)
    # compare
    expect_equal(fam1, fam2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by adding junk columns, should be automatically ignored
    fam1_r <- fam1 # copy first
    fam1_r$junk <- 1 # add a junk column
    # try writing it back elsewhere
    write_fam(fo, fam1_r)
    # and read it back again to compare
    fam2 <- read_fam(fo)
    # compare
    expect_equal(fam1, fam2)
    # delete output when done
    invisible(file.remove(fo))

    # delete a column, test that an error is thrown
    fam1_r <- fam1 # copy first
    fam1_r$id <- NULL # delete this column
    expect_error(write_fam(fo, fam1_r))
})

test_that("write_phen works", {
    # test that there are errors when crucial data is missing
    expect_error(write_phen()) # all is missing
    expect_error(write_phen('file')) # tibble is missing
    expect_error(write_phen(tib=data.frame(id=1))) # file is missing (tib is incomplete too, but that gets tested downstream)
    
    # load sample file
    fi <- system.file("extdata", 'sample.phen', package = "genio", mustWork = TRUE)
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.phen')
    # this should just work (tested earlier)
    phen1 <- read_phen(fi)
    # try writing it back elsewhere
    write_phen(fo, phen1)
    # and read it back again to compare
    phen2 <- read_phen(fo)
    # compare
    expect_equal(phen1, phen2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by randomly reordering data, should automatically reorder too
    phen1_r <- phen1[, sample.int(ncol(phen1))]
    # try writing it back elsewhere
    write_phen(fo, phen1_r)
    # and read it back again to compare
    phen2 <- read_phen(fo)
    # compare
    expect_equal(phen1, phen2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by adding junk columns, should be automatically ignored
    phen1_r <- phen1 # copy first
    phen1_r$junk <- 1 # add a junk column
    # try writing it back elsewhere
    write_phen(fo, phen1_r)
    # and read it back again to compare
    phen2 <- read_phen(fo)
    # compare
    expect_equal(phen1, phen2)
    # delete output when done
    invisible(file.remove(fo))

    # delete a column, test that an error is thrown
    phen1_r <- phen1 # copy first
    phen1_r$id <- NULL # delete this column
    expect_error(write_phen(fo, phen1_r))

    # create a random trait, test that writing and reading it both work
    # random quantitiative trait
    phen1$pheno <- rnorm(n_rows)
    # introduce NAs too!
    phen1$pheno[7:8] <- NA
    # try writing it back elsewhere
    write_phen(fo, phen1)
    # and read it back again to compare
    phen2 <- read_phen(fo)
    # compare
    expect_equal(phen1, phen2)
    # delete output when done
    invisible(file.remove(fo))
})

test_that("write_bim works", {
    # test that there are errors when crucial data is missing
    expect_error(write_bim()) # all is missing
    expect_error(write_bim('file')) # tibble is missing
    expect_error(write_bim(tib=data.frame(id=1))) # file is missing (tib is incomplete too, but that gets tested downstream)
    
    # load sample file
    fi <- system.file("extdata", 'sample.bim', package = "genio", mustWork = TRUE)
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.bim')
    # this should just work (tested earlier)
    bim1 <- read_bim(fi)
    # try writing it back elsewhere
    write_bim(fo, bim1)
    # and read it back again to comare
    bim2 <- read_bim(fo)
    # compare
    expect_equal(bim1, bim2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by randomly reordering data, should automatically reorder too
    bim1_r <- bim1[, sample.int(ncol(bim1))]
    # try writing it back elsewhere
    write_bim(fo, bim1_r)
    # and read it back again to comare
    bim2 <- read_bim(fo)
    # compare
    expect_equal(bim1, bim2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by adding junk columns, should be automatically ignored
    bim1_r <- bim1 # copy first
    bim1_r$junk <- 1 # add a junk column
    # try writing it back elsewhere
    write_bim(fo, bim1_r)
    # and read it back again to comare
    bim2 <- read_bim(fo)
    # compare
    expect_equal(bim1, bim2)
    # delete output when done
    invisible(file.remove(fo))

    # delete a column, test that an error is thrown
    bim1_r <- bim1 # copy first
    bim1_r$id <- NULL # delete this column
    expect_error(write_bim(fo, bim1_r))
})

test_that("write_bim with `append = TRUE` works", {
    # load sample file
    fi <- system.file("extdata", 'sample.bim', package = "genio", mustWork = TRUE)
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.bim')
    
    # this should just work (tested earlier)
    bim1 <- read_bim(fi)

    # here's the awkward part, where we write it back in parts
    # write every two lines
    for ( i in 1 : ( nrow( bim1 ) / 2 ) ) {
        # try writing it back elsewhere
        write_bim(
            fo,
            bim1[ (2*i-1):(2*i), ],
            append = TRUE
        )
    }
    
    # and read it back again to comare
    bim2 <- read_bim(fo)
    # compare
    expect_equal(bim1, bim2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat writing one line at the time
    for ( i in 1 : nrow( bim1 ) ) {
        # try writing it back elsewhere
        write_bim(
            fo,
            bim1[ i, ],
            append = TRUE
        )
    }
    
    # and read it back again to comare
    bim2 <- read_bim(fo)
    # compare
    expect_equal(bim1, bim2)
    # delete output when done
    invisible(file.remove(fo))
})

test_that("write_ind works", {
    # test that there are errors when crucial data is missing
    expect_error(write_ind()) # all is missing
    expect_error(write_ind('file')) # tibble is missing
    expect_error(write_ind(tib=data.frame(id=1))) # file is missing (tib is incomplete too, but that gets tested downstream)
    
    # load sample file
    fi <- system.file("extdata", 'sample.ind', package = "genio", mustWork = TRUE)
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.ind')
    # this should just work (tested earlier)
    ind1 <- read_ind(fi)
    # try writing it back elsewhere
    write_ind(fo, ind1)
    # and read it back again to comare
    ind2 <- read_ind(fo)
    # compare
    expect_equal(ind1, ind2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by randomly reordering data, should automatically reorder too
    ind1_r <- ind1[, sample.int(ncol(ind1))]
    # try writing it back elsewhere
    write_ind(fo, ind1_r)
    # and read it back again to comare
    ind2 <- read_ind(fo)
    # compare
    expect_equal(ind1, ind2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by adding junk columns, should be automatically ignored
    ind1_r <- ind1 # copy first
    ind1_r$junk <- 1 # add a junk column
    # try writing it back elsewhere
    write_ind(fo, ind1_r)
    # and read it back again to comare
    ind2 <- read_ind(fo)
    # compare
    expect_equal(ind1, ind2)
    # delete output when done
    invisible(file.remove(fo))

    # delete a column, test that an error is thrown
    ind1_r <- ind1 # copy first
    ind1_r$id <- NULL # delete this column
    expect_error(write_ind(fo, ind1_r))
})

test_that("write_snp works", {
    # test that there are errors when crucial data is missing
    expect_error(write_snp()) # all is missing
    expect_error(write_snp('file')) # tibble is missing
    expect_error(write_snp(tib=data.frame(id=1))) # file is missing (tib is incomplete too, but that gets tested downstream)
    
    # load sample file
    fi <- system.file("extdata", 'sample.snp', package = "genio", mustWork = TRUE)
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.snp')
    # this should just work (tested earlier)
    snp1 <- read_snp(fi)
    # try writing it back elsewhere
    write_snp(fo, snp1)
    # and read it back again to comare
    snp2 <- read_snp(fo)
    # compare
    expect_equal(snp1, snp2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by randomly reordering data, should automatically reorder too
    snp1_r <- snp1[, sample.int(ncol(snp1))]
    # try writing it back elsewhere
    write_snp(fo, snp1_r)
    # and read it back again to comare
    snp2 <- read_snp(fo)
    # compare
    expect_equal(snp1, snp2)
    # delete output when done
    invisible(file.remove(fo))

    # repeat by adding junk columns, should be automatically ignored
    snp1_r <- snp1 # copy first
    snp1_r$junk <- 1 # add a junk column
    # try writing it back elsewhere
    write_snp(fo, snp1_r)
    # and read it back again to comare
    snp2 <- read_snp(fo)
    # compare
    expect_equal(snp1, snp2)
    # delete output when done
    invisible(file.remove(fo))

    # delete a column, test that an error is thrown
    snp1_r <- snp1 # copy first
    snp1_r$id <- NULL # delete this column
    expect_error(write_snp(fo, snp1_r))
})

test_that("make_fam works", {
    # test that there are errors when crucial data is missing
    expect_error(make_fam()) # all is missing

    # this should work
    fam <- make_fam(n = n_rows)
    # check the tibble
    expect_equal(nrow(fam), n_rows)
    expect_equal(names(fam), fam_names)

    # ultimate test, make sure we can write it and also parse it without issues
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-make', fileext = '.fam')
    # to make comparison exact, change some column modes from numeric to character
    fam$fam <- as.character(fam$fam)
    fam$id <- as.character(fam$id)
    fam$pat <- as.character(fam$pat)
    fam$mat <- as.character(fam$mat)
    # write it
    write_fam(fo, fam)
    # read it
    fam2 <- read_fam(fo)
    # compare
    # NOTE: there's a weird issue with class() here that makes this not work unless it's "fam2[]" specifically
    # https://www.tidyverse.org/articles/2018/12/readr-1-3-1/#tibble-subclass
    expect_equal(fam, fam2[])
    # delete output when done
    invisible(file.remove(fo))

    # try a case where we are missing standard columns, add extra columns
    fam <- tibble(pheno =  0:2, subpop = 2:0, age = 30:32)
    # autocomplete and reorder
    fam <- make_fam(fam)
    # test that columns are as expected
    # first all standard columns, then additions in previous order
    expect_equal(names(fam), c(fam_names, 'subpop', 'age'))
    # test rows for good measure
    expect_equal(nrow(fam), 3)
    # make sure pheno was as we specified above and not overwritten with 0's
    expect_equal(fam$pheno, 0:2)
})

test_that("make_bim works", {
    # test that there are errors when crucial data is missing
    expect_error(make_bim()) # all is missing

    # this should work
    bim <- make_bim(n = n_rows)
    # check the tibble
    expect_equal(nrow(bim), n_rows)
    expect_equal(names(bim), bim_names)

    # ultimate test, make sure we can write it and also parse it without issues
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-make', fileext = '.bim')
    # to make comparison exact, change some column modes from numeric to character
    bim$chr <- as.character(bim$chr)
    bim$id <- as.character(bim$id)
    bim$ref <- as.character(bim$ref)
    bim$alt <- as.character(bim$alt)
    # write it
    write_bim(fo, bim)
    # read it
    bim2 <- read_bim(fo)
    # compare
    # NOTE: there's a weird issue with class() here that makes this not work unless it's "bim2[]" specifically
    # https://www.tidyverse.org/articles/2018/12/readr-1-3-1/#tibble-subclass
    expect_equal(bim, bim2[])
    # delete output when done
    invisible(file.remove(fo))

    # try a case where we are missing standard columns, add extra columns
    bim <- tibble(chr = 1:10, fst = (1:10)/100, maf = fst)
    # autocomplete and reorder
    bim <- make_bim(bim)
    # test that columns are as expected
    # first all standard columns, then additions in previous order
    expect_equal(names(bim), c(bim_names, 'fst', 'maf'))
    # test rows for good measure
    expect_equal(nrow(bim), 10)
    # make sure chr was as we specified above and not overwritten with 1's
    expect_equal(bim$chr, 1:10)
})

test_that("require_files_plink works", {
    # these should all just work (pass existing files)
    expect_silent( require_files_plink('dummy-33-101-0.1') )
    expect_silent( require_files_plink('dummy-4-10-0.1') )
    expect_silent( require_files_plink('dummy-5-10-0.1') )
    expect_silent( require_files_plink('dummy-6-10-0.1') )
    expect_silent( require_files_plink('dummy-7-10-0.1') )
    # try something that doesn't exist, expect to fail
    expect_error( require_files_plink('file-that-does-not-exist') )
})

test_that("delete_files_plink works", {
    # positive control
    # create dummy BED/BIM/FAM files
    file <- 'delete-me-test' # no extension
    # add each extension and create empty files
    file.create( paste0(file, '.bed') )
    file.create( paste0(file, '.bim') )
    file.create( paste0(file, '.fam') )
    
    # delete the BED/BIM/FAM files we just created
    expect_silent( delete_files_plink(file) )

    # negative control
    # there will be warnings for files that didn't exist
    expect_warning( delete_files_plink('file-that-does-not-exist') )
})

test_that("delete_files_phen works", {
    # positive control
    # create dummy PHEN files
    file <- 'delete-me-test' # no extension
    # add each extension and create empty files
    file.create( paste0(file, '.phen') )
    
    # delete the PHEN file we just created
    expect_silent( delete_files_phen(file) )

    # negative control
    # there will be warnings for files that didn't exist
    expect_warning( delete_files_phen('file-that-does-not-exist') )
})

test_that("require_files_plink works", {
    # positive control
    # create dummy PHEN files
    file <- 'delete-me-test' # no extension
    # add each extension and create empty files
    file.create( paste0(file, '.phen') )
    
    # this should work (pass existing files)
    expect_silent( require_files_phen( file ) )
    # delete the PHEN file we just created
    expect_silent( delete_files_phen(file) )
    
    # negative control
    # try something that doesn't exist, expect to fail
    expect_error( require_files_phen('file-that-does-not-exist') )
})

test_that("sex_to_int works", {
    # die if input is missing
    expect_error( sex_to_int() )
        
    # positive control
    # construct a simple test where we know the answers
    sex_char <- c('U', 'M', 'F', 'F', 'M', 'U')
    sex_int <- c(0:2, 2:0)
    expect_equal( sex_to_int(sex_char), sex_int )
    # lowercase should work too
    sex_char <- c('u', 'm', 'f', 'f', 'm', 'u')
    expect_equal( sex_to_int(sex_char), sex_int )
    
    # negative controls
    # these throw warnings now (not fatal)
    expect_warning( sex_to_int(c('A', 'B', 'C', 'D')) ) # all invalid
    expect_warning( sex_to_int(c('U', 'M', 'F', 'A')) ) # only last one is invalid
})

test_that("sex_to_char works", {
    # die if input is missing
    expect_error( sex_to_char() )
        
    # positive control
    # construct a simple test where we know the answers
    sex_int <- c(0:2, 2:0)
    sex_char <- c('U', 'M', 'F', 'F', 'M', 'U')
    expect_equal( sex_to_char(sex_int), sex_char )
    
    # negative controls
    # these throw warnings now (not fatal)
    expect_warning( sex_to_char(4:10) ) # all invalid
    expect_warning( sex_to_char(0:3) ) # only last one is invalid

    # test that functions invert themselves as expected
    expect_equal( sex_to_char( sex_to_int(sex_char) ), sex_char )
    expect_equal( sex_to_int( sex_to_char(sex_int) ), sex_int )
})

test_that("validate_tab_generic works", {
    # load sample file
    fi <- system.file("extdata", 'sample.fam', package = "genio", mustWork = TRUE)
    # this should just work (no "expect" test)
    fam <- read_fam(fi)

    # test that there are errors when crucial data is missing
    expect_error(validate_tab_generic()) # all is missing
    expect_error(validate_tab_generic(tib = fam)) # ext, tib_names are missing
    expect_error(validate_tab_generic(ext = 'fam')) # tib, tib_names are missing
    expect_error(validate_tab_generic(tib_names = fam_names)) # tib, ext are missing
    expect_error(validate_tab_generic(tib = fam, ext = 'fam')) # tib_names are missing
    expect_error(validate_tab_generic(tib = fam, tib_names = fam_names)) # ext missing
    expect_error(validate_tab_generic(ext = 'fam', tib_names = fam_names)) # tib missing

    # when all is present and valid, no errors or warnings are thrown
    expect_silent(validate_tab_generic(tib = fam, ext = 'fam', tib_names = fam_names))

    # more negative controls
    # tibble must be a data.frame (includes tibbles)
    expect_error(validate_tab_generic(tib = 'not-tibble', ext = 'fam', tib_names = fam_names))
    # set wrong columns (so they're missing)
    expect_error(validate_tab_generic(tib = fam, ext = 'fam', tib_names = bim_names))
    # NOTE: ext is only used to print the informative messages, but it is not itself validated or matched against anything
})

test_that("ind_to_fam works", {
    # load sample files
    # FAM (negative control)
    fi <- system.file("extdata", 'sample.fam', package = "genio", mustWork = TRUE)
    fam <- read_fam(fi)
    # IND (positive control)
    fi <- system.file("extdata", 'sample.ind', package = "genio", mustWork = TRUE)
    ind <- read_ind(fi)
    
    # die if input is missing
    expect_error( ind_to_fam() )
    # die if input is not an IND table
    expect_error( ind_to_fam(fam) )
    # convert a proper IND file... (overwrites earlier FAM)
    fam <- ind_to_fam(ind)
    # check that we have the values we expect
    expect_equal( fam$id, ind$id )
    expect_equal( fam$fam, ind$label )
    expect_equal( fam$sex, sex_to_int(ind$sex) ) # needs conversion
    expect_equal( sex_to_char(fam$sex), ind$sex ) # gratuitously test reverse conversion
    # these were default values, check that they are so
    expect_true( all(fam$pat == 0) )
    expect_true( all(fam$mat == 0) )
    expect_true( all(fam$pheno == 0) )
})

test_that("tidy_kinship works", {
    # create a toy kinship example
    n <- 3
    kinship <- matrix(
        c(
            0.5, 0.1, 0.0,
            0.1, 0.6, 0.2,
            0.0, 0.2, 0.7
        ),
        nrow = n
    )
    # add names (best for tidy version)
    colnames(kinship) <- paste0('pop', 1:n)
    rownames(kinship) <- paste0('pop', 1:n)
    
    # this returns tidy version
    kinship_tidy <- tidy_kinship( kinship )
    # test colnames
    expect_equal(
        colnames( kinship_tidy ),
        c('id1', 'id2', 'kinship')
    )
    # test row number
    expect_equal(
        nrow( kinship_tidy ),
        n * ( n + 1 ) / 2
    )
    # construct exact expectation here
    kinship_tidy_expect <- tibble(
        id1 = paste0('pop', c(1, 1, 2, 1:3)),
        id2 = paste0('pop', c(3, 2, 3, 1:3)),
        kinship = c(0, 0.1, 0.2, 0.5, 0.6, 0.7)
    )
    expect_equal(
        kinship_tidy_expect,
        kinship_tidy
    )
    
    # now test unsorted version
    kinship_tidy <- tidy_kinship( kinship, sort = FALSE )
    # test colnames
    expect_equal(
        colnames( kinship_tidy ),
        c('id1', 'id2', 'kinship')
    )
    # test row number
    expect_equal(
        nrow( kinship_tidy ),
        n * ( n + 1 ) / 2
    )
    # construct exact expectation here
    kinship_tidy_expect <- tibble(
        id1 = paste0('pop', c(1, 1, 1, 2, 2, 3)),
        id2 = paste0('pop', c(1, 2, 3, 2, 3, 3)),
        kinship = c(0.5, 0.1, 0, 0.6, 0.2, 0.7)
    )
    expect_equal(
        kinship_tidy_expect,
        kinship_tidy
    )

    # test with sorting but without names
    colnames(kinship) <- NULL
    rownames(kinship) <- NULL
    # this must throw a warning
    expect_warning(
        kinship_tidy <- tidy_kinship( kinship )
    )
    # test colnames
    expect_equal(
        colnames( kinship_tidy ),
        c('id1', 'id2', 'kinship')
    )
    # test row number
    expect_equal(
        nrow( kinship_tidy ),
        n * ( n + 1 ) / 2
    )
    # construct exact expectation here
    kinship_tidy_expect <- tibble(
        id1 = as.character( c(1, 1, 2, 1:3) ),
        id2 = as.character( c(3, 2, 3, 1:3) ),
        kinship = c(0, 0.1, 0.2, 0.5, 0.6, 0.7)
    )
    expect_equal(
        kinship_tidy_expect,
        kinship_tidy
    )
})

test_that("read_eigenvec works", {
    # test that errors occur when key data is missing
    expect_error( read_eigenvec() ) # only file is required
    expect_error( read_eigenvec( 'bogus-file' ) ) # file is non-existent (read_table2 will complain)

    # actual number of eigenvectors in sample file
    r <- 3
    
    # load sample file
    fi <- system.file("extdata", 'sample-gcta.eigenvec', package = "genio", mustWork = TRUE)
    expect_silent(
        data <- read_eigenvec( fi, verbose = FALSE )
    )
    expect_true( is.list(data) )
    expect_equal( length( data ), 2 )
    expect_equal( names( data ), c('eigenvec', 'fam') )
    expect_true( is.matrix( data$eigenvec ) )
    expect_true( is.numeric( data$eigenvec ) )
    expect_equal( nrow( data$eigenvec ), n_rows )
    expect_equal( ncol( data$eigenvec ), r )
    expect_true( is_tibble( data$fam ) )
    expect_equal( nrow( data$fam ), n_rows )
    expect_equal( ncol( data$fam ), 2 )
    expect_equal( colnames( data$fam ), c('fam', 'id') )

    # repeat with plink2-formatted sample file (has a header that should simply be ignored)
    fi <- system.file("extdata", 'sample-plink2.eigenvec', package = "genio", mustWork = TRUE)
    expect_silent(
        data <- read_eigenvec( fi, verbose = FALSE )
    )
    expect_true( is.list(data) )
    expect_equal( length( data ), 2 )
    expect_equal( names( data ), c('eigenvec', 'fam') )
    expect_true( is.matrix( data$eigenvec ) )
    expect_true( is.numeric( data$eigenvec ) )
    expect_equal( nrow( data$eigenvec ), n_rows )
    expect_equal( ncol( data$eigenvec ), r )
    expect_true( is_tibble( data$fam ) )
    expect_equal( nrow( data$fam ), n_rows )
    expect_equal( ncol( data$fam ), 2 )
    expect_equal( colnames( data$fam ), c('fam', 'id') )
})


test_that("write_eigenvec works", {
    # test that there are errors when crucial data is missing
    expect_error( write_eigenvec() ) # all is missing
    expect_error( write_eigenvec( 'file' ) ) # eigenvec is missing
    expect_error( write_eigenvec( eigenvec = matrix(0) ) ) # file is missing (tib is incomplete too, but that gets tested downstream)

    # create small but realistic eigenvec matrix
    # number of individuals
    n <- 10
    # number of desired PCs
    r <- 3
    # number of SNPs
    m <- 100
    # allele frequency (boring, right in the middle)
    p <- 0.5
    # random genotypes
    X <- matrix(
        rbinom( n*m, 2, p ),
        nrow = m,
        ncol = n
    )
    # center and scale (with true ancestral allele freq)
    X <- ( X - 2 * p ) / sqrt( 4 * p * ( 1 - p ) )
    # kinship matrix
    kinship <- crossprod( X )
    # now get eigenvectors
    # NOTE: eigen doesn't give colnames to this matrix!
    eigenvec <- eigen( kinship )$vectors
    # subset to top eigenvectors (columns)
    eigenvec <- eigenvec[ , 1:r ]
    # accompanying dummy fam file
    # entries as character for reader/writer comparisons
    fam <- tibble(
        fam = as.character( 1:n ),
        id = as.character( 1:n )
    )
    # expected output in this extra simple case
    eigenvec_with_names <- eigenvec
    colnames( eigenvec_with_names ) <- 1:r
    eigenvec_final_expected <- bind_cols( fam, as_tibble( eigenvec_with_names ) )
    
    # create a dummy output we'll delete later
    # file base (no extension)
    name <- tempfile( 'delete-me_test-write' )
    # full file path (for manually deleting)
    fo <- paste0( name, '.eigenvec' )
    # write file, producing "final" table of interest
    expect_silent(
        eigenvec_final <- write_eigenvec( name, eigenvec, fam = fam, verbose = FALSE )
    )
    # compare
    expect_equal( eigenvec_final, eigenvec_final_expected )
    # now parse it with our read_eigenvec, this should also agree!
    data <- read_eigenvec( name, verbose = FALSE )
    expect_equal( data$eigenvec, eigenvec_with_names )
    expect_equal( data$fam, fam )
    # delete output when done
    invisible( file.remove(fo) )
    
    # repeat by reordering fam data, should automatically reorder too
    fam_r <- fam[ , 2:1 ]
    expect_silent(
        eigenvec_final <- write_eigenvec( name, eigenvec, fam = fam_r, verbose = FALSE )
    )
    # compare
    expect_equal( eigenvec_final, eigenvec_final_expected )
    # delete output when done
    invisible( file.remove(fo) )
    
    # repeat by adding junk columns, should be automatically ignored
    fam_r <- fam # copy first
    fam_r$junk <- 1 # add a junk column
    expect_silent(
        eigenvec_final <- write_eigenvec( name, eigenvec, fam = fam_r, verbose = FALSE )
    )
    # compare
    expect_equal( eigenvec_final, eigenvec_final_expected )
    # delete output when done
    invisible( file.remove(fo) )

    # other expected errors
    
    # delete a column, test that an error is thrown
    fam_r <- fam # copy first
    fam_r$id <- NULL # delete this column
    expect_error(
        write_eigenvec( name, eigenvec, fam = fam_r, verbose = FALSE )
    )

    # since eigenvec doesn't have fam/id columns, this errors (fam omitted)
    expect_error(
        write_eigenvec( name, eigenvec, verbose = FALSE )
    )
    # but including the "final" eigenvec as input should work
    expect_silent(
        eigenvec_final <- write_eigenvec( name, eigenvec_final_expected, verbose = FALSE )
    )
    # compare
    expect_equal( eigenvec_final, eigenvec_final_expected )
    # delete output when done
    invisible( file.remove(fo) )

    # since the final version and fam overlap in fam/id columns, this succeeds but with a warning
    expect_warning(
        eigenvec_final <- write_eigenvec( name, eigenvec_final_expected, fam = fam, verbose = FALSE )
    )
    # compare
    expect_equal( eigenvec_final, eigenvec_final_expected )
    # delete output when done
    invisible( file.remove(fo) )

    # error when number of individuals doesn't match between eigenvec and fam
    expect_error(
        write_eigenvec( name, eigenvec, fam = fam[ -1, ], verbose = FALSE )
    )
    expect_error(
        write_eigenvec( name, eigenvec[ -1, ], fam = fam, verbose = FALSE )
    )
})

test_that("count_lines works", {
    # test that there are errors when crucial data is missing
    expect_error( count_lines() ) # file is missing
    expect_error( count_lines( 'bogus-file' ) ) # file is non-existent
    
    # load sample file
    fi <- system.file("extdata", 'sample.fam', package = "genio", mustWork = TRUE)
    # count lines!
    expect_silent(
        n_ind_lines <- count_lines( fi, verbose = FALSE )
    )
    expect_equal( n_ind_lines, n_rows )
    expect_true( is.integer( n_ind_lines ) )
    
    # repeat with missing extension
    fi_no_ext <- sub('\\.fam$', '', fi)
    # if we don't add extension back, it should fail (file does not exist)
    expect_error(
        count_lines( fi_no_ext )
    )
    # count lines!
    expect_silent(
        n_ind_lines <- count_lines( fi_no_ext, ext = 'fam', verbose = FALSE )
    )
    expect_equal( n_ind_lines, n_rows )
    expect_true( is.integer( n_ind_lines ) )

    # test on a file with a single line but missing its newline (a potential trouble case for C++ iterators solution)
    fi <- 'no-newline.txt'
    # count lines!
    expect_silent(
        n_lines <- count_lines( fi, verbose = FALSE )
    )
    expect_equal( n_lines, 1 )
    expect_true( is.integer( n_lines ) )
})

test_that("read_matrix works", {
    # test that there are errors when crucial data is missing
    expect_error(read_matrix()) # file is missing
    expect_error(read_matrix('bogus-file')) # file is non-existent (read_table2 will complain)
    
    # load sample file
    fi <- system.file("extdata", 'sample-Q3.txt', package = "genio", mustWork = TRUE)
    # main test
    expect_silent(
        mat <- read_matrix( fi, verbose = FALSE )
    )
    # basic validations
    expect_true( is.matrix( mat ) )
    expect_true( is.numeric( mat ) )
    # dimensions known ahead of time
    expect_equal( nrow( mat ), 10 )
    expect_equal( ncol( mat ), 3 )
    # this happens to be an admixture file, so rows sum to 1
    u <- rep.int( 1, 10 )
    expect_equal( rowSums( mat ), u )
    # demand no dimnames (file contains no such data, anything present must be junk)
    expect_null( dimnames( mat ) )

    # repeat with missing extension
    fiNoExt <- sub('\\.txt$', '', fi)
    expect_silent(
        mat <- read_matrix( fiNoExt, verbose = FALSE )
    )
    expect_true( is.matrix( mat ) )
    expect_true( is.numeric( mat ) )
    expect_equal( nrow( mat ), 10 )
    expect_equal( ncol( mat ), 3 )
    expect_equal( rowSums( mat ), u )
    expect_null( dimnames( mat ) )

    # repeat with compressed file (and true full path)
    fi <- system.file("extdata", 'sample-Q3.txt.gz', package = "genio", mustWork = TRUE)
    expect_silent(
        mat <- read_matrix( fi, verbose = FALSE )
    )
    expect_true( is.matrix( mat ) )
    expect_true( is.numeric( mat ) )
    expect_equal( nrow( mat ), 10 )
    expect_equal( ncol( mat ), 3 )
    expect_equal( rowSums( mat ), u )
    expect_null( dimnames( mat ) )
    
    # repeat with missing .gz extension
    fiNoGz <- sub('\\.gz$', '', fi)
    expect_silent(
        mat <- read_matrix( fiNoGz, verbose = FALSE )
    )
    expect_true( is.matrix( mat ) )
    expect_true( is.numeric( mat ) )
    expect_equal( nrow( mat ), 10 )
    expect_equal( ncol( mat ), 3 )
    expect_equal( rowSums( mat ), u )
    expect_null( dimnames( mat ) )

    # repeat with missing .txt.gz double extension
    fiNoGzNoExt <- sub('\\.txt$', '', fiNoGz)
    expect_silent(
        mat <- read_matrix( fiNoGzNoExt, verbose = FALSE )
    )
    expect_true( is.matrix( mat ) )
    expect_true( is.numeric( mat ) )
    expect_equal( nrow( mat ), 10 )
    expect_equal( ncol( mat ), 3 )
    expect_equal( rowSums( mat ), u )
    expect_null( dimnames( mat ) )
})

test_that("write_matrix works", {
    # test that there are errors when crucial data is missing
    expect_error( write_matrix( ) ) # all is missing
    expect_error( write_matrix( 'file' ) ) # tibble is missing
    expect_error( write_matrix( x = cbind( 1 ) ) ) # file is missing

    # create a small matrix to write
    # square matrix in this case
    x <- matrix(
        rnorm( n_rows^2 ),
        nrow = n_rows,
        ncol = n_rows
    )
    
    # create a dummy output we'll delete later
    fo <- tempfile('delete-me_test-write', fileext = '.txt')
    # write data
    expect_silent(
        write_matrix( fo, x, verbose = FALSE )
    )
    # read it back
    expect_silent(
        x2 <- read_matrix( fo, verbose = FALSE )
    )
    # should be the same!
    expect_equal( x, x2 )
    # delete output when done
    invisible( file.remove( fo ) )

    # test append feature!
    # write first half now
    expect_silent(
        write_matrix( fo, x[ 1:5, ], verbose = FALSE )
    )
    # write second half, appending!
    expect_silent(
        write_matrix( fo, x[ 6:n_rows, ], append = TRUE, verbose = FALSE )
    )
    # read it all back
    expect_silent(
        x2 <- read_matrix( fo, verbose = FALSE )
    )
    # should be the same!
    expect_equal( x, x2 )
    # delete output when done
    invisible( file.remove( fo ) )
    
    
})

Try the genio package in your browser

Any scripts or data that you put into this service are public.

genio documentation built on June 11, 2021, 5:12 p.m.