tests/testthat/test-preprocessingData.R

# Copyright 2021 Observational Health Data Sciences and Informatics
#
# This file is part of PatientLevelPrediction
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

library("testthat")
context("preprocessingData")


createDefaultSettings <- function(
  minCovariateFraction = 0.001,
  normalizeData = TRUE,
  removeRedundancy = TRUE
){
  result <- createPreprocessSettings(
    minFraction = minCovariateFraction,
    normalize = normalizeData,
    removeRedundancy = removeRedundancy
  )
  return(result)
}

test_that("createPreprocessSettings", {
  
  normalizeDataTest <- sample(c(T,F),1)
  removeRedundancyTest <- sample(c(T,F),1)
  minCovariateFractionTest <- 1/(1000+sample(1000,1))
  
  settings <- createDefaultSettings(
    minCovariateFraction = minCovariateFractionTest,
    normalizeData = normalizeDataTest, 
    removeRedundancy = removeRedundancyTest)
  
  expect_is(settings, "preprocessSettings")
  expect_equal(settings$minFraction, minCovariateFractionTest)
  expect_equal(settings$normalize, normalizeDataTest)
  expect_equal(settings$removeRedundancy, removeRedundancyTest)
  
  expect_error(createDefaultSettings(minCovariateFraction = -1))
  expect_error(createDefaultSettings(minCovariateFraction = 'dfdfdf'))
  
  expect_error(createDefaultSettings(removeRedundancy = 'dfdfdf'))
  expect_error(createDefaultSettings(removeRedundancy = NULL))
  
  expect_error(createDefaultSettings(normalizeData = 'dfdfdf'))
  expect_error(createDefaultSettings(normalizeData = NULL))
})

test_that("createPreprocessSettings", {
  attr(trainData$covariateData, "metaData")$preprocessSettings <- NULL # removing for test
  metaData <- attr(trainData$covariateData, "metaData")
  metaLength <- length(metaData)
  covSize <- trainData$covariateData$covariates %>% dplyr::tally() %>% dplyr::pull()
  oldFeatureCount <- trainData$covariateData$covariateRef %>% dplyr::tally() %>% dplyr::pull()
  
  preprocessSettings <- createDefaultSettings(
    minCovariateFraction = 0.01,
    normalizeData = F,
    removeRedundancy = F
  )
  newData <- preprocessData(trainData$covariateData, preprocessSettings)
  
  expect_is(newData, 'CovariateData')
  expect_true(newData$covariates %>% dplyr::tally() %>% dplyr::pull() < covSize)
  
  # metaData should have tidyCovariateDataSettings + preprocessSettings (so 2 bigger)
  expect_equal(length(attr(newData, "metaData")), metaLength+2)
  
  expect_true(length(attr(newData, "metaData")$tidyCovariateDataSettings$deletedInfrequentCovariateIds)>=0)
  expect_equal(attr(newData, "metaData")$tidyCovariateDataSettings$deletedRedundantCovariateIds, NULL)
  expect_equal(attr(newData, "metaData")$tidyCovariateDataSettings$normFactors, NULL)
  
  newFeatureCount <- newData$covariateRef %>% dplyr::tally() %>% dplyr::pull() + length(attr(newData, "metaData")$tidyCovariateDataSettings$deletedInfrequentCovariateIds)
  
  expect_equal(newFeatureCount, oldFeatureCount)
  
  oldFeatureCount <- trainData$covariateData$covariateRef %>% dplyr::tally() %>% dplyr::pull()
  
  metaData <- attr(trainData$covariateData, "metaData")
  preprocessSettings <- createDefaultSettings(
    minCovariateFraction = 0,
    normalizeData = T,
    removeRedundancy = T
  )
  newData <- preprocessData(trainData$covariateData, preprocessSettings)
  expect_true(length(attr(newData, "metaData")$tidyCovariateDataSettings$deletedInfrequentCovariateIds)==0)
  expect_true(length(attr(newData, "metaData")$tidyCovariateDataSettings$deletedRedundantCovariateIds)>=0)
  expect_true(length(attr(newData, "metaData")$tidyCovariateDataSettings$normFactors)>=0)
  
  newFeatureCount <- newData$covariateRef %>% dplyr::tally() %>% dplyr::pull() + 
    length(attr(newData, "metaData")$tidyCovariateDataSettings$deletedRedundantCovariateIds) 
  
  expect_equal(newFeatureCount, oldFeatureCount) # sometimes differ?
  
  # check settings are saved
  expect_equal(attr(newData, "metaData")$preprocessSettings, preprocessSettings)
  
})

test_that('Did tidy on test', {
  expect_true(attr(plpResult$prediction, 'metaData')$tidyCovariates)
})
OHDSI/PatientLevelPrediction documentation built on April 27, 2024, 8:11 p.m.