tests/testthat/test-sklearnClassifier.R

test_that("DecisionTree settings work checks", {
  
dtset <- setDecisionTree(
  criterion = list('gini'),
  splitter = list('best'),
  maxDepth = list(4, 10, NULL),
  minSamplesSplit = list(2, 10),
  minSamplesLeaf = list(10, 50),
  minWeightFractionLeaf = list(0),
  maxFeatures = list(100,'sqrt', NULL),
  maxLeafNodes = list(NULL),
  minImpurityDecrease = list(10^-7),
  classWeight = list(NULL),
  seed = sample(1000000,1)
)

expect_equal(dtset$fitFunction, "fitSklearn")

expect_equal(length(dtset$param), 3*2*2*3*1)

expect_equal(unique(unlist(lapply(dtset$param, function(x) x[[1]]))), 'gini')
expect_equal(unique(unlist(lapply(dtset$param, function(x) x[[2]]))), 'best')
expect_equal(length(unique(lapply(dtset$param, function(x) x[[3]]))), 3)

expect_false(attr(dtset$param, 'settings')$requiresDenseMatrix)
expect_equal(attr(dtset$param, 'settings')$name, 'Decision Tree')
expect_equal(attr(dtset$param, 'settings')$pythonModule, 'sklearn.tree')
expect_equal(attr(dtset$param, 'settings')$pythonClass, "DecisionTreeClassifier")


})


test_that("DecisionTree errors as expected", {
  
  expect_error(setDecisionTree(criterion = list('madeup')))
  
  expect_error(setDecisionTree(maxDepth =  list(-1)))
  expect_error(setDecisionTree(minSamplesSplit =  list(-1)))
  expect_error(setDecisionTree(minSamplesLeaf  =  list(-1)))
  
})


test_that("check fit of DecisionTree", {
  
  modelSettings <- setDecisionTree(
    criterion = list('gini'),
    splitter = list('best'),
    maxDepth = list(as.integer(4)),
    minSamplesSplit = list(2),
    minSamplesLeaf = list(10),
    minWeightFractionLeaf = list(0),
    maxFeatures = list('sqrt'),
    maxLeafNodes = list(NULL),
    minImpurityDecrease = list(10^-7),
    classWeight = list(NULL),
    seed = sample(1000000,1)
  )
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'DecisionTree',
    analysisPath = tempdir()
    )
  
  expect_correct_fitPlp(plpModel, trainData)
  # add check for other model design settings
  
})

test_that('fitSklearn errors with wrong covariateData', {
  
  newTrainData <- copyTrainData(trainData)
  class(newTrainData$covariateData) <- 'notCovariateData'
  modelSettings <- setAdaBoost()
  analysisId <- 42
  
  expect_error(fitSklearn(newTrainData,
                          modelSettings,
                          search='grid',
                          analysisId))
})


test_that('AdaBoost fit works', {
  
  modelSettings <- setAdaBoost(nEstimators = list(10),
                               learningRate = list(0.1),
                               )
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'Adaboost',
    analysisPath = tempdir()
  )
  
  expect_correct_fitPlp(plpModel, trainData)
  expect_equal(dir(plpModel$model),"model.json")
  
})


test_that('RandomForest fit works', {
  
  modelSettings <- setRandomForest(ntrees=list(10),
                                   maxDepth=list(4),
                                   minSamplesSplit = list(2),
                                   minSamplesLeaf = list(10),
                                   mtries = list("sqrt"),
                                   maxSamples = list(0.9),
                                   classWeight = list(NULL))
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'RandomForest',
    analysisPath = tempdir()
  )
  
  expect_correct_fitPlp(plpModel, trainData)
  expect_equal(dir(plpModel$model),"model.json")
  
})


test_that('MLP fit works', {
  modelSettings <- setMLP(
    hiddenLayerSizes = list(c(20)),
    alpha = list(1e-6),
    maxIter = list(50),
    epsilon = list(1e-08),
    learningRateInit = list(0.01),
    tol = list(1e-2) # reduce tol so I don't get convergence warnings
  )
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'MLP',
    analysisPath = tempdir()
  )
  
  expect_correct_fitPlp(plpModel, trainData)
  expect_equal(dir(plpModel$model),"model.json")
  
})


test_that('Naive bayes fit works', {
  modelSettings <- setNaiveBayes()
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'Naive bayes',
    analysisPath = tempdir()
  )
  
  expect_correct_fitPlp(plpModel, trainData) 
  expect_equal(dir(plpModel$model),"model.json")
  
})


test_that('Support vector machine fit works', {
  modelSettings <- setSVM(C = list(1),
                          degree = list(1),
                          gamma = list('scale'),
                          classWeight = list(NULL))
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'SVM',
    analysisPath = tempdir()
  )
  
  expect_correct_fitPlp(plpModel, trainData) 
  expect_equal(dir(plpModel$model),"model.json")
  
})

test_that('Sklearn predict works', {
  
  modelSettings <- setAdaBoost(nEstimators = list(10),
                               learningRate = list(0.1),
  )
  
  plpModel <- fitPlp(
    trainData = tinyTrainData, 
    modelSettings = modelSettings,
    analysisId = 'Adaboost',
    analysisPath = tempdir()
  )
  
  predictions <- predictPythonSklearn(plpModel,
                                      testData,
                                      population)
  expect_correct_predictions(predictions, testData)
})
OHDSI/PatientLevelPrediction documentation built on May 3, 2024, 12:11 a.m.