tests.algorithm.r
In openVA: Automated Method for Verbal Autopsy

context("Algorithm")

data("RandomVA3", envir = environment())
RandomVA3 <- get("RandomVA3", envir  = environment())
test <- RandomVA3[1:200, ]
train <- RandomVA3[201:400, ]

data("RandomVA5", envir = environment())
RandomVA5 <- get("RandomVA5", envir  = environment())

# generate a grouping dataset that can encompass every possible COD
data("SampleCategory3", envir = environment())
SampleCategory3 <- get("SampleCategory3", envir  = environment())
data("SampleCategory", envir = environment())
SampleCategory <- get("SampleCategory", envir  = environment())
names(SampleCategory) <- names(SampleCategory3)
grouping <- rbind(SampleCategory3, SampleCategory, data.frame(cause="Undetermined", broad_cause="Undetermined"))
grouping <- grouping[!duplicated(grouping$cause), ]

test_that("Insilico - ID and output correct", {

  # insilico
  set.seed(13)
  fit1 <- codeVA(data = test, data.type = "customize", model = "InSilicoVA",
               data.train = train, causes.train = "cause",
               Nsim=200, auto.length = FALSE)

  top <- getTopCOD(fit1)
  top5 <- getTopCOD(fit1, n = 5)
  s <- sum(!top$ID %in% test[, 1])
  csmf <- getCSMF(fit1)
  csmf0 <- table(train$cause) / dim(train)[1]
  acc <- getCSMF_accuracy(csmf = csmf[, 1], truth = csmf0)
  acc_from_fit <- getCSMF_accuracy(csmf = fit1, truth = csmf0)
  ss <- as.numeric((acc > 0.5) && (acc <= 1))
  ss_from_fit <- as.numeric(all(acc_from_fit > rep(0.5, 10)) && 
                              all(acc_from_fit <= rep(1, 10)))
  indiv_prob <- getIndivProb(fit1)
  indiv_prob_95ci <- getIndivProb(fit1, CI = 0.95)
  ccc <- getCCC(top, test[, 1:2])

  expect_equal(s, 0)
  expect_equal(dim(top)[1], dim(test)[1])
  expect_equal(dim(csmf)[1], length(unique(train$cause)))
  expect_equal(dim(csmf)[2], 5)
  expect_equal(ss, 1)
  expect_equal(ss_from_fit, 1)
  expect_equal(nrow(indiv_prob), nrow(test))
  expect_equal(ncol(indiv_prob), length(unique(train$cause)))
  for (i in 1:nrow(indiv_prob)) {
    expect_equal(sum(indiv_prob[i, ]), 1)
  }
  expect_equal(nrow(indiv_prob_95ci$indiv.prob.lower), nrow(test))
  expect_equal(ncol(indiv_prob_95ci$indiv.prob.lower), length(unique(train$cause)))
  for (i in 1:nrow(indiv_prob_95ci$indiv.prob.lower)) {
    expect_true(all(indiv_prob_95ci$indiv.prob.lower[i, ] < 
                      indiv_prob_95ci$median[i, ]))
  }
  expect_equal(nrow(indiv_prob_95ci$indiv.prob.upper), nrow(test))
  expect_equal(ncol(indiv_prob_95ci$indiv.prob.upper), length(unique(train$cause)))
  for (i in 1:nrow(indiv_prob_95ci$indiv.prob.upper)) {
    expect_true(all(indiv_prob_95ci$indiv.prob.upper[i, ] > 
                      indiv_prob_95ci$median[i, ]))
  }
  expect_true(ccc >= 0 & ccc <= 1)
  expect_equal(names(top5), 
               c("ID", "cause1", "cause2", "cause3", "cause4", "cause5"))
  expect_equal(nrow(top5), nrow(test))
})

test_that("InterVA4 - ID and output correct", {

  # interva
  set.seed(13)
  fit2 <- codeVA(data = test, data.type = "customize", model = "InterVA",
               data.train = train, causes.train = "cause",
               version = "4.02", HIV = "h", Malaria = "l")
  top <- getTopCOD(fit2)
  top5 <- getTopCOD(fit2, n = 5)
  s <- sum(!top$ID %in% test[, 1])
  csmf <- getCSMF(fit2)
  csmf0 <- table(train$cause) / dim(train)[1]
  acc <- getCSMF_accuracy(csmf = csmf, truth = csmf0, undet = "Undetermined")
  ss <- as.numeric((acc > 0.5) && (acc <= 1))
  csmf2 <- getCSMF(fit2, interVA.rule = F)
  acc <- getCSMF_accuracy(csmf = csmf2, truth = csmf0)
  sss <- as.numeric((acc > 0.5) && (acc <= 1))
  indiv_prob <- getIndivProb(fit2)

  expect_equal(s, 0)
  expect_equal(dim(top)[1], dim(test)[1])
  expect_equal(length(csmf), length(unique(train$cause)) + 1)
  expect_equal(length(csmf2), length(unique(train$cause)))
  expect_equal(ss, 1)
  expect_equal(sss, 1)
  expect_equal(names(top5), 
               c("ID", "cause1", "cause2", "cause3", "cause4", "cause5"))
  expect_equal(nrow(top5), nrow(test))
  expect_equal(nrow(indiv_prob), nrow(test))
  expect_equal(ncol(indiv_prob), length(unique(train$cause)))
  for (i in 1:nrow(indiv_prob)) {
    expect_equal(sum(indiv_prob[i, ]), 1)
  }


})

test_that("InterVA5 - ID and output correct", {
  
  # interva
  set.seed(13)
  fit2b <- codeVA(data = RandomVA5[1:10,], data.type = "WHO2016", model = "InterVA",
                  version = "5", HIV = "h", Malaria = "l", write=FALSE)
  top <- getTopCOD(fit2b)
  top5 <- getTopCOD(fit2b, n = 5)
  csmf <- getCSMF(fit2b)
  indiv_prob <- getIndivProb(fit2b)
  
  expect_equal(sum(csmf), 1)
  expect_equal(names(top5), 
               c("ID", "cause1", "cause2", "cause3", "cause4", "cause5"))
  expect_equal(nrow(top5), nrow(RandomVA5[1:10,]))
  expect_equal(nrow(indiv_prob), nrow(RandomVA5[1:10,]))
  expect_true(all(colnames(indiv_prob) %in% names(csmf)))
  for (i in 1:nrow(indiv_prob)) {
    expect_equal(sum(indiv_prob[i, ]), 1)
  }
})

test_that("Tariff - ID and output correct", {


  # tariff
  set.seed(13)
  fit3 <- codeVA(data = test, data.type = "customize", model = "Tariff",
                 data.train = train, causes.train = "cause",
                 nboot.sig = 100)
  top <- getTopCOD(fit3)
  s <- sum(!top$ID %in% test[, 1])
  csmf <- getCSMF(fit3)
  csmf0 <- table(train$cause) / dim(train)[1]
  acc <- getCSMF_accuracy(csmf = csmf, truth = csmf0)
  ss <- as.numeric((acc > 0.5) && (acc <= 1))
 
  expect_equal(s, 0)
  expect_equal(dim(top)[1], dim(test)[1])
  expect_equal(length(csmf), length(unique(train$cause)))
  expect_equal(ss, 1)

})

test_that("Convert Data", {
 id <- c("d1", "d2")
 x <- matrix(c("Yes", "No", "Don't know", 
        "Yes", "Refused to answer", "No"), 
      byrow = TRUE, nrow = 2, ncol = 3)
 x <- cbind(id, x)

 colnames(x) <- c("ID", "S1", "S2", "S3")
 new <- ConvertData(x, yesLabel = "Yes", noLabel = "No", 
      missLabel = c("Don't know", "Refused to answer"))
 new2 <- data.frame(ID = c("d1", "d2"), S1 = c("Y", "Y"), S2 = c("", "."), S3 = c(".", ""))
 expect_equal(new, new2)


})