tests/testthat/test-RotterdamBreastCancer_df.R

# OncoDataSets - A Comprehensive Collection of Cancer Types and Cancer-related DataSets
# Version 0.1.0
# Copyright (C) 2024 Renzo Caceres Rossi
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

# RotterdamBreastCancer_df  data set



library(testthat)

# Test dataset structure and class
test_that("RotterdamBreastCancer_df loads correctly and has the expected structure", {
  expect_s3_class(RotterdamBreastCancer_df, "data.frame")  # Check if it's a data frame
  expect_equal(nrow(RotterdamBreastCancer_df), 2982)  # Check number of rows
  expect_equal(ncol(RotterdamBreastCancer_df), 15)  # Check number of columns
  expect_equal(names(RotterdamBreastCancer_df), c("pid", "year", "age", "meno", "size", "grade",
                                                  "nodes", "pgr", "er", "hormon", "chemo",
                                                  "rtime", "recur", "dtime", "death"))  # Check column names
})

# Test data types of columns
test_that("RotterdamBreastCancer_df has correct column types", {
  expect_true(is.integer(RotterdamBreastCancer_df$pid))  # Check if 'pid' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$year))  # Check if 'year' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$age))  # Check if 'age' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$meno))  # Check if 'meno' is integer
  expect_true(is.factor(RotterdamBreastCancer_df$size))  # Check if 'size' is a factor
  expect_true(is.integer(RotterdamBreastCancer_df$grade))  # Check if 'grade' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$nodes))  # Check if 'nodes' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$pgr))  # Check if 'pgr' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$er))  # Check if 'er' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$hormon))  # Check if 'hormon' is integer
  expect_true(is.integer(RotterdamBreastCancer_df$chemo))  # Check if 'chemo' is integer
  expect_true(is.numeric(RotterdamBreastCancer_df$rtime))  # Check if 'rtime' is numeric
  expect_true(is.integer(RotterdamBreastCancer_df$recur))  # Check if 'recur' is integer
  expect_true(is.numeric(RotterdamBreastCancer_df$dtime))  # Check if 'dtime' is numeric
  expect_true(is.integer(RotterdamBreastCancer_df$death))  # Check if 'death' is integer
})

# Test for missing values in critical columns
test_that("RotterdamBreastCancer_df reports NA values in the columns", {
  n_na_pid <- sum(is.na(RotterdamBreastCancer_df$pid))
  n_na_year <- sum(is.na(RotterdamBreastCancer_df$year))
  n_na_age <- sum(is.na(RotterdamBreastCancer_df$age))
  n_na_meno <- sum(is.na(RotterdamBreastCancer_df$meno))
  n_na_size <- sum(is.na(RotterdamBreastCancer_df$size))
  n_na_grade <- sum(is.na(RotterdamBreastCancer_df$grade))
  n_na_nodes <- sum(is.na(RotterdamBreastCancer_df$nodes))
  n_na_pgr <- sum(is.na(RotterdamBreastCancer_df$pgr))
  n_na_er <- sum(is.na(RotterdamBreastCancer_df$er))
  n_na_hormon <- sum(is.na(RotterdamBreastCancer_df$hormon))
  n_na_chemo <- sum(is.na(RotterdamBreastCancer_df$chemo))
  n_na_rtime <- sum(is.na(RotterdamBreastCancer_df$rtime))
  n_na_recur <- sum(is.na(RotterdamBreastCancer_df$recur))
  n_na_dtime <- sum(is.na(RotterdamBreastCancer_df$dtime))
  n_na_death <- sum(is.na(RotterdamBreastCancer_df$death))

  expect_true(n_na_pid == 0, info = paste("Found", n_na_pid, "NA values in pid"))
  expect_true(n_na_year == 0, info = paste("Found", n_na_year, "NA values in year"))
  expect_true(n_na_age == 0, info = paste("Found", n_na_age, "NA values in age"))
  expect_true(n_na_meno == 0, info = paste("Found", n_na_meno, "NA values in meno"))
  expect_true(n_na_size == 0, info = paste("Found", n_na_size, "NA values in size"))
  expect_true(n_na_grade == 0, info = paste("Found", n_na_grade, "NA values in grade"))
  expect_true(n_na_nodes == 0, info = paste("Found", n_na_nodes, "NA values in nodes"))
  expect_true(n_na_pgr == 0, info = paste("Found", n_na_pgr, "NA values in pgr"))
  expect_true(n_na_er == 0, info = paste("Found", n_na_er, "NA values in er"))
  expect_true(n_na_hormon == 0, info = paste("Found", n_na_hormon, "NA values in hormon"))
  expect_true(n_na_chemo == 0, info = paste("Found", n_na_chemo, "NA values in chemo"))
  expect_true(n_na_rtime == 0, info = paste("Found", n_na_rtime, "NA values in rtime"))
  expect_true(n_na_recur == 0, info = paste("Found", n_na_recur, "NA values in recur"))
  expect_true(n_na_dtime == 0, info = paste("Found", n_na_dtime, "NA values in dtime"))
  expect_true(n_na_death == 0, info = paste("Found", n_na_death, "NA values in death"))
})

# Test for valid value ranges in the columns
test_that("RotterdamBreastCancer_df has valid value ranges", {
  # Correct validation for 'size' variable based on actual levels in the data
  size_values <- levels(RotterdamBreastCancer_df$size)
  expect_true(all(size_values %in% c("<=20", "20-50", ">50")),
              info = paste("Invalid values found in 'size'. Found:", paste(size_values, collapse = ", ")))

  # Test for 'meno' variable - should have specific values (e.g., 0, 1)
  meno_values <- unique(RotterdamBreastCancer_df$meno)
  expect_true(all(meno_values %in% c(0, 1)),
              info = paste("Invalid values found in 'meno'. Found:", paste(meno_values, collapse = ", ")))

  # Test for other columns (if needed)
  # Example: ensure 'age' is a positive integer
  expect_true(all(RotterdamBreastCancer_df$age > 0), info = "Some 'age' values are non-positive.")
})

# Test to verify dataset immutability
test_that("RotterdamBreastCancer_df remains unchanged after tests", {
  original_dataset <- RotterdamBreastCancer_df  # Create a copy of the original dataset

  # Run some example tests
  sum(is.na(RotterdamBreastCancer_df$pid))  # Ensure no NAs in 'pid'
  sum(is.na(RotterdamBreastCancer_df$year))  # Ensure no NAs in 'year'
  sum(is.na(RotterdamBreastCancer_df$age))  # Ensure no NAs in 'age'
  sum(is.na(RotterdamBreastCancer_df$meno))  # Ensure no NAs in 'meno'
  sum(is.na(RotterdamBreastCancer_df$size))  # Ensure no NAs in 'size'
  sum(is.na(RotterdamBreastCancer_df$grade))  # Ensure no NAs in 'grade'
  sum(is.na(RotterdamBreastCancer_df$nodes))  # Ensure no NAs in 'nodes'
  sum(is.na(RotterdamBreastCancer_df$pgr))  # Ensure no NAs in 'pgr'
  sum(is.na(RotterdamBreastCancer_df$er))  # Ensure no NAs in 'er'
  sum(is.na(RotterdamBreastCancer_df$hormon))  # Ensure no NAs in 'hormon'
  sum(is.na(RotterdamBreastCancer_df$chemo))  # Ensure no NAs in 'chemo'
  sum(is.na(RotterdamBreastCancer_df$rtime))  # Ensure no NAs in 'rtime'
  sum(is.na(RotterdamBreastCancer_df$recur))  # Ensure no NAs in 'recur'
  sum(is.na(RotterdamBreastCancer_df$dtime))  # Ensure no NAs in 'dtime'
  sum(is.na(RotterdamBreastCancer_df$death))  # Ensure no NAs in 'death'

  # Verify the dataset hasn't changed
  expect_identical(original_dataset, RotterdamBreastCancer_df)
  expect_equal(nrow(original_dataset), nrow(RotterdamBreastCancer_df))
  expect_equal(ncol(original_dataset), ncol(RotterdamBreastCancer_df))
  expect_equal(names(original_dataset), names(RotterdamBreastCancer_df))
})

Try the OncoDataSets package in your browser

Any scripts or data that you put into this service are public.

OncoDataSets documentation built on April 3, 2025, 6:08 p.m.