# Copyright 2020 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
library("testthat")
context("Fitting")
plpResultKNN <- runPlp(population = population,
plpData = plpData,
modelSettings = knnSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'knnTest',
saveDirectory = saveLoc)
test_that("covRef is correct size", {
# varImp contains all variables in LR
testthat::expect_equal(nrow(as.data.frame(plpData$covariateData$covariateRef)),
nrow(plpResult$model$varImp))
testthat::expect_equal(nrow(as.data.frame(plpDataReal$covariateData$covariateRef)),
nrow(plpResultReal$model$varImp))
testthat::expect_equal(nrow(as.data.frame(plpData$covariateData$covariateRef)),
nrow(plpResultKNN$model$varImp))
})
test_that("LR, KNN and GBM results have same structure", {
# same output names for LR, KNN and GBM
testthat::expect_equal(names(plpResult),
names(plpResultReal))
testthat::expect_equal(names(plpResultReal),
names(plpResultKNN))
})
test_that("fitting", {
#=====================================
# check fitPlp
#=====================================
testthat::expect_error(fitPlp(population=popualtion, data=plpData, modelSettings=NULL,
cohortId=1, outcomeId=2))
testthat::expect_error(fitPlp(population=NULL, data=plpData,
modelSettings=list(model='test'),
cohortId=1, outcomeId=2))
testthat::expect_error(fitPlp(population=population, data=NULL,
modelSettings=list(model='test'),
cohortId=1, outcomeId=2))
#=====================================
# checking Logistic Regression
#=====================================
model_set <- setLassoLogisticRegression()
testthat::expect_that(model_set, testthat::is_a("modelSettings"))
testthat::expect_length(model_set,3)
testthat::expect_error(setLassoLogisticRegression(variance = -3))
testthat::expect_error(setLassoLogisticRegression(seed = 'F'))
#=====================================
# checking Cox Regression
#=====================================
model_set <- setCoxModel()
testthat::expect_that(model_set, testthat::is_a("modelSettings"))
testthat::expect_length(model_set,3)
testthat::expect_error(setCoxModel(variance = -3))
testthat::expect_error(setCoxModel(seed = 'F'))
#=====================================
# checking Gradient Boosting Machine
#=====================================
gbm_set <- setGradientBoostingMachine(ntrees = 10)
testthat::expect_that(gbm_set, testthat::is_a("modelSettings"))
testthat::expect_length(gbm_set,3)
testthat::expect_error(setGradientBoostingMachine(ntrees = -1))
testthat::expect_error(setGradientBoostingMachine(minRows = 1))
testthat::expect_error(setGradientBoostingMachine(maxDepth = 0))
testthat::expect_error(setGradientBoostingMachine(learnRate = -2))
testthat::expect_error(setGradientBoostingMachine(seed = 'F'))
#=====================================
# checking Random forest
#=====================================
#rf_set <- PatientLevelPrediction::setRandomForest(ntrees=10)
#testthat::expect_that(rf_set, testthat::is_a("modelSettings"))
#testthat::expect_length(rf_set,3)
testthat::expect_error(setRandomForest(ntrees=-1))
testthat::expect_error(setRandomForest(mtries = -4))
testthat::expect_error(setRandomForest(maxDepth = 0))
testthat::expect_error(setRandomForest(varImp = 3))
testthat::expect_error(setRandomForest(seed = 'F'))
#=====================================
# checking Decision Tree
#=====================================
#dt_set <- PatientLevelPrediction::setDecisionTree()
#testthat::expect_that(dt_set, is_a("modelSettings"))
#testthat::expect_length(dt_set,3)
testthat::expect_error(setDecisionTree(maxDepth = 0))
testthat::expect_error(setDecisionTree(minSamplesSplit = 1))
testthat::expect_error(setDecisionTree(minSamplesLeaf = -1))
testthat::expect_error(setDecisionTree(minImpuritySplit = -1))
testthat::expect_error(setDecisionTree(classWeight = 'dfds'))
testthat::expect_error(setDecisionTree(classWeight = 4))
testthat::expect_error(setDecisionTree(seed = 'F'))
#=====================================
# checking Ada Boost
#=====================================
#dt_set <- PatientLevelPrediction::setAdaBoost()
#testthat::expect_that(dt_set, is_a("modelSettings"))
#testthat::expect_length(dt_set,3)
testthat::expect_error(setAdaBoost(nEstimators = 0))
testthat::expect_error(setAdaBoost(learningRate = -1))
testthat::expect_error(setAdaBoost(learningRate = 2))
testthat::expect_error(setAdaBoost(seed = 'F'))
#=====================================
# checking KNN
#=====================================
model_set <- setKNN()
testthat::expect_that(model_set, is_a("modelSettings"))
testthat::expect_length(model_set,3)
testthat::expect_error(setKNN(k = 0))
testthat::expect_error(setKNN(indexFolder = 2372))
#=====================================
# checking Naive Bayes
#=====================================
#model_set <- setNaiveBayes()
#testthat::expect_that(model_set, is_a("modelSettings"))
#testthat::expect_length(model_set,3)
#=====================================
# checking MLP
#=====================================
#gbm_set <- setMLP()
#testthat::expect_that(gbm_set, is_a("modelSettings"))
#testthat::expect_length(gbm_set,3)
testthat::expect_error(setMLP(size = -1))
testthat::expect_error(setMLP(alpha = -1))
testthat::expect_error(setMLP(seed = 'F'))
})
gbmachSet <- setGradientBoostingMachine(ntrees = 10, maxDepth = 3, learnRate = 0.1)
plpResultGbmach <- runPlp(population = population,
plpData = plpData,
modelSettings = gbmachSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'gbmachTest',
saveDirectory = saveLoc)
test_that("GBM working checks", {
# check same structure
testthat::expect_equal(names(plpResultGbmach),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultGbmach$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultGbmach$prediction$value), 0)
testthat::expect_lte(max(plpResultGbmach$prediction$value), 1)
})
rfSet <- setRandomForest(ntrees = 10, maxDepth = 3)
plpResultRF <- runPlp(population = population,
plpData = plpData,
modelSettings = rfSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'rfTest',
saveDirectory = saveLoc)
test_that("RF working checks", {
# check same structure
testthat::expect_equal(names(plpResultRF),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultRF$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultRF$prediction$value), 0)
testthat::expect_lte(max(plpResultRF$prediction$value), 1)
})
abSet <- setAdaBoost(nEstimators = 5)
plpResultAb <- runPlp(population = population,
plpData = plpData,
modelSettings = abSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'abTest',
saveDirectory = saveLoc)
test_that("AdaBoost working checks", {
# check same structure
testthat::expect_equal(names(plpResultAb),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultAb$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultAb$prediction$value), 0)
testthat::expect_lte(max(plpResultAb$prediction$value), 1)
})
nbSet <- setNaiveBayes()
plpResultNb <- runPlp(population = population,
plpData = plpData,
modelSettings = nbSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'nbTest',
saveDirectory = saveLoc)
test_that("AdaBoost working checks", {
# check same structure
testthat::expect_equal(names(plpResultNb),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultNb$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultNb$prediction$value), 0)
testthat::expect_lte(max(plpResultNb$prediction$value), 1)
})
dtSet <- setDecisionTree(maxDepth = 2)
plpResultDt <- runPlp(population = population,
plpData = plpData,
modelSettings = dtSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'dtTest',
saveDirectory = saveLoc)
test_that("Decision tree working checks", {
# check same structure
testthat::expect_equal(names(plpResultDt),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultDt$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultDt$prediction$value), 0)
testthat::expect_lte(max(plpResultDt$prediction$value), 1)
})
mlpSet <- setMLP()
plpResultMlp <- runPlp(population = population,
plpData = plpData,
modelSettings = mlpSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'mlpTest',
saveDirectory = saveLoc)
test_that("MLP working checks", {
# check same structure
testthat::expect_equal(names(plpResultMlp),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultMlp$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultMlp$prediction$value), 0)
testthat::expect_lte(max(plpResultMlp$prediction$value), 1)
})
svmSet <- setSVM(C=1, degree = 1, gamma = 1e-04)
plpResultSvm <- runPlp(population = population,
plpData = plpData,
modelSettings = svmSet,
savePlpData = F,
savePlpResult = F,
saveEvaluation = F,
savePlpPlots = F,
analysisId = 'svmTest',
saveDirectory = saveLoc)
test_that("SVM working checks", {
# check same structure
testthat::expect_equal(names(plpResultSvm),
names(plpResult))
# check prediction same size as pop
testthat::expect_equal(nrow(plpResultSvm$prediction), nrow(population))
# check prediction between 0 and 1
testthat::expect_gte(min(plpResultSvm$prediction$value), 0)
testthat::expect_lte(max(plpResultSvm$prediction$value), 1)
})
test_that("LR cross val weights", {
sim <- simulateCyclopsData(nstrata = 1, nrows = 10000, ncovars = 100, eCovarsPerRow = 0.5, effectSizeSd = 1, model = "logistic")
covariates <- sim$covariates
outcomes <- sim$outcomes
y <- outcomes$y
cyclopsData <- Cyclops::convertToCyclopsData(outcomes,
covariates,
modelType = "lr", addIntercept = TRUE)
cv_fit <- suppressWarnings(fitCyclopsModel(cyclopsData,
prior = createPrior("laplace", useCrossValidation = TRUE),
control = createControl(seed = 666)))
cv_hyperparameter <- getHyperParameter(cv_fit)
fixed_prior <- createPrior("laplace", variance = cv_hyperparameter, useCrossValidation = FALSE)
# get result using weights
set.seed(666)
hold_out <- sample(1:getNumberOfRows(cyclopsData),
size = floor(0.1 * getNumberOfRows(cyclopsData)),
replace = FALSE)
weights <- rep(1.0, getNumberOfRows(cyclopsData))
weights[hold_out] <- 0.0
subset_fit <- suppressWarnings(fitCyclopsModel(cyclopsData,
prior = fixed_prior,
weights = weights))
predict <- predict(subset_fit)
predict <- data.frame(rowId = hold_out, value = predict[hold_out], outcomeCount = y[hold_out])
attr(predict, "metaData")$predictionType <- "binary"
# get results using reduced data
cyclopsData2 <- Cyclops::convertToCyclopsData(outcomes[!outcomes$rowId%in%hold_out,],
covariates[!covariates$rowId%in%hold_out,],
modelType = "lr", addIntercept = TRUE)
subset_fit2 <- suppressWarnings(fitCyclopsModel(cyclopsData2,
prior = fixed_prior))
coefficients <- subset_fit2$estimation$estimate
names(coefficients) <- subset_fit2$coefficientNames
covariateData <- list(covariates = covariates[covariates$rowId%in%hold_out,])
class(covariateData) <- 'CovariateData'
pops <- data.frame(rowId = hold_out, outcomeCount = outcomes$y[hold_out])
predict2 <- predictAndromeda(coefficients = coefficients,
population = pops,
covariateData = covariateData,
modelType = 'logistic')
attr(predict2, "metaData")$predictionType <- "binary"
# does not seem to be the same?
testthat::expect_equal(computeAuc(predict),computeAuc(predict2), tolerance=1)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.