library(testthat)
context("plpInterface")
indexFolder <- tempfile("indexFolder")
indexFolder2 <- tempfile("indexFolder")
# given plpData and population
n <- 100+sample(100,1)
populationBuild <- data.frame(rowId = 1:n,
outcomeCount = round(runif(n)))
populationPred <- data.frame(rowId = n+1)
# person 11 is similar as person 1/3/5
covariatesBuild <- data.frame(rowId = sample(populationBuild$rowId, size = 1000, replace = T),
#covariateId = sample(10, 1000, replace = T),
covariateId = c(rep(1,100),rep(2,100),rep(3,100),rep(4,100),rep(5,100),
rep(6,100),rep(7,100),rep(8,100),rep(9,100),rep(10,100)),
covariateValue = rep(1, 1000))
covariatesBuild <- unique(covariatesBuild)
covariatesPred <- data.frame(rowId = rep(n+1, 5),
covariateId = sample(10, 5, replace = T),
covariateValue = rep(1, 5))
covariatesPred <- unique(covariatesPred)
covariates <- rbind(covariatesBuild, covariatesPred)
covariateData <- Andromeda::andromeda(covariates = covariates)
plpData <- list(cohorts = NULL,
outcomes = NULL,
covariateData = covariateData)
covariateDataBuild <- Andromeda::andromeda(covariates = covariatesBuild)
plpDataBuild <- list(cohorts = NULL,
outcomes = NULL,
covariateData = covariateDataBuild)
# create matrix for similarity:
covariatesMat <- matrix(nrow =n+1, ncol = 10, data = rep(0, 10*(n+1)))
for(i in 1:nrow(covariates)){
covariatesMat[covariates$rowId[i], covariates$covariateId[i]] <- 1
}
# euclidean distance
distancEuclidean <- stats::dist(covariatesMat)
# manhattan distance
distancManhattan <- stats::dist(covariatesMat, method = 'manhattan')
# cosine similarity distance
cosineSim <- function(covariatesMat){
cos.sim <- function(ix)
{
A = covariatesMat[ix[1],]
B = covariatesMat[ix[2],]
return( sum(A*B)/sqrt(sum(A^2)*sum(B^2)) )
}
cmb <- expand.grid(i=1:nrow(covariatesMat), j=1:nrow(covariatesMat))
C <- matrix(apply(cmb,1,cos.sim),nrow(covariatesMat),nrow(covariatesMat))
distance <- abs(1-C)
distance[is.nan(distance)] <- 1
return(distance)
}
distanceCosine <- cosineSim(covariatesMat)
distance <- distanceCosine
#distance <- distancManhattan
test_that("buildKnnFromPlpData works when test patient not in plpData works", {
buildKnnFromPlpData(plpData = plpDataBuild, # excludes the test person
population = populationBuild,
indexFolder = indexFolder,
overwrite = TRUE)
expect_true(file.exists(indexFolder))
})
test_that("buildKnnFromPlpData when test patient in plpData works", {
buildKnnFromPlpData(plpData = plpData, # includes the test person
population = populationBuild,
indexFolder = indexFolder2,
overwrite = TRUE)
expect_true(file.exists(indexFolder2))
})
test_that("buildKnnFromPlpData unweighted predictions correct", {
# get the predicted risk for the test when k = 1
# find k near 1 based on ties
val <- max(as.matrix(distance)[n+1,order(as.matrix(distance)[n+1,1:n])[1]])
k <- sum(as.matrix(distance)[n+1,1:n]<=val)
pred1 <- predictKnnUsingPlpData(plpData = plpData,
population = populationPred,
indexFolder = indexFolder,
k = k,
weighted = F,
threads = 1)
manualPred1 <- mean(populationBuild$outcomeCount[populationBuild$rowId%in%order(as.matrix(distance)[n+1,1:n])[1:k]])
expect_equal(pred1$value, manualPred1)
# find k near 10 based on ties
val <-max(as.matrix(distance)[n+1,order(as.matrix(distance)[n+1,1:n])[1:10]])
k <- sum(as.matrix(distance)[n+1,1:n]<=val)
pred10 <- predictKnnUsingPlpData(plpData = plpData,
population = populationPred,
indexFolder = indexFolder,
k = k,
weighted = F,
threads = 1)
# get the predicted risk for the test when k = 1
manualPred10 <- mean(populationBuild$outcomeCount[populationBuild$rowId%in%order(as.matrix(distance)[n+1,1:n])[1:k]])
expect_equal(pred10$value, manualPred10)
})
test_that("buildKnnFromPlpData - test when patient has no covariates", {
populationNoCovs <- data.frame(rowId = n+2)
k <- sum(rowSums(covariatesMat) <= min(rowSums(covariatesMat)))
pred <- predictKnnUsingPlpData(plpData = plpData,
population = populationNoCovs,
indexFolder = indexFolder,
k = k,
weighted = F,
threads = 1)
manualPred <- mean(populationBuild$outcomeCount[populationBuild$rowId%in%which(rowSums(covariatesMat) <= min(rowSums(covariatesMat)))])
#expect_equal(pred$value, manualPred)
expect_equal(pred$value, 0) # seems to be a bug where no covariates goes to 0 risk!
})
test_that("buildKnnFromPlpData - testing correct filtering", {
# result when knn trained without test patient in plpData and pop
predWithoutTestInData <- predictKnnUsingPlpData(plpData = plpData,
population = populationPred,
indexFolder = indexFolder,
k = 3,
weighted = F,
threads = 1)
# result when knn trained with test patient in plpData but not in pop
predWithTestInData <- predictKnnUsingPlpData(plpData = plpData,
population = populationPred,
indexFolder = indexFolder2,
k = 3,
weighted = F,
threads = 1)
# filtering appears to work if these are the same
expect_equal(predWithoutTestInData, predWithTestInData)
})
# Test cleanup
unlink(indexFolder)
unlink(indexFolder2)
Andromeda::close(covariateData)
Andromeda::close(covariateDataBuild)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.