tests/testthat/test_check_ssm.R

context("Checking sample subject mapping (SSM) file")

ssm_dd <- system.file("extdata", "5b_dbGaP_SubjectSampleMappingDD.xlsx", package = "dbgaptools", mustWork = TRUE)
ssm_ds <- system.file("extdata", "5a_dbGaP_SubjectSampleMappingDS.txt", package = "dbgaptools", mustWork = TRUE)

test_that("Compliant files run error free",{
  expect_null(check_ssm(dsfile=ssm_ds))
  expect_null(check_ssm(dsfile=ssm_ds, ddfile=ssm_dd))
})

test_that("Missing ID columns are detected",{
  expect_error(check_ssm(ssm_ds, sampleID_col="mysample"), "Please check that dsfile contains columns for subject-level and sample-level IDs", fixed=TRUE)
  expect_error(check_ssm(ssm_ds, subjectID_col="mysubject"), "Please check that dsfile contains columns for subject-level and sample-level IDs", fixed=TRUE)  
})

test_that("Non-standard ID column names are detected",{
  ds.rev <- read_ds_file(ssm_ds)
  names(ds.rev)[1:2] <- c("mysubject","mysample")
  ds.rev.fn <- tempfile(fileext=".txt")
  write.table(ds.rev, file=ds.rev.fn, col.names=TRUE, row.names=FALSE,
              quote=FALSE, sep="\t")

  expect_warning(check_ssm(dsfile=ds.rev.fn, sampleID_col="mysample", subjectID_col="mysubject"), "Note preferred subject-level ID column name is 'SUBJECT_ID'", fixed=TRUE)
  expect_warning(check_ssm(dsfile=ds.rev.fn, sampleID_col="mysample", subjectID_col="mysubject"), "Note preferred sample-level ID column name is 'SAMPLE_ID'", fixed=TRUE)
  unlink(ds.rev.fn)
})
                 
test_that("Duplicated sample IDs are detected",{
  ds.rev <- read_ds_file(ssm_ds)
  ds.rev$SAMPLE_ID[3] <- ds.rev$SAMPLE_ID[2]
  ds.rev.fn <- tempfile(fileext=".txt")
  write.table(ds.rev, file=ds.rev.fn, col.names=TRUE, row.names=FALSE,
              quote=FALSE, sep="\t")

  expect_equal(check_ssm(ds.rev.fn)$dup_samples, "S2")
  unlink(ds.rev.fn)
})

test_that("Blank sample IDs are detected",{
  ds.rev <- read_ds_file(ssm_ds)
  ds.rev$SAMPLE_ID[3] <- ""
  ds.rev.fn <-  tempfile(fileext=".txt")
  write.table(ds.rev, file=ds.rev.fn, col.names=TRUE, row.names=FALSE,
              quote=FALSE, sep="\t")

  expect_equal(check_ssm(ds.rev.fn)$blank_idx, 3)
  unlink(ds.rev.fn)
})

test_that("Extra samples and subjects are detected",{
  ds <- read_ds_file(ssm_ds)
  ssm_exp <- ds[,1:2]
  ssm_exp_less <- ssm_exp[-c(3:4),]
  out <- check_ssm(ssm_ds, ssm_exp=ssm_exp_less)
  expect_equal(out$extra_subjects, c("3","4"))
  expect_equal(out$extra_samples, c("S3","S4"))
})

test_that("Missing samples and subjects are detected",{
  ds <- read_ds_file(ssm_ds)
  ssm_exp <- ds[,1:2]
  ext_rows <- c(SUBJECT_ID=999, SAMPLE_ID="S999")
  ssm_exp_more <- rbind(ssm_exp, ext_rows)
  out <- check_ssm(ssm_ds, ssm_exp=ssm_exp_more)
  expect_equal(out$missing_subjects, "999")
  expect_equal(out$missing_samples, "S999")
})

test_that("Mapping differences detected",{
  ds <- read_ds_file(ssm_ds)
  ssm_exp <- ds[,1:2]
  ssm_exp$SAMPLE_ID[3] <- "S999"
  out <- check_ssm(ssm_ds, ssm_exp=ssm_exp)
  expect_equivalent(out$ssm_diffs,
                    data.frame(SUBJECT_ID="3", SAMPLE_ID="S999", stringsAsFactors=FALSE))
})
UW-GAC/dbgaptools documentation built on Nov. 3, 2020, 12:19 a.m.