tests/testthat/test_summarize_group_cols.R

library(groupdata2)
context("summarize_group_cols()")



test_that("testing summarize_group_cols()", {
  xpectr::set_test_seed(42)

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE)),
    "grp_3" = factor(sample(LETTERS[1:3], size = 25, replace=TRUE)),
    "grp_4" = factor(sample(LETTERS[1:12], size = 25, replace=TRUE))
  )

  # Manual tests

  # First check that the dataset is as we expect it

  ## Testing 'df'                                                           ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(df),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    df[["some_var"]],
    c(0.91481, 0.93708, 0.28614, 0.83045, 0.64175, 0.5191, 0.73659,
      0.13467, 0.65699, 0.70506, 0.45774, 0.71911, 0.93467, 0.25543,
      0.46229, 0.94001, 0.97823, 0.11749, 0.475, 0.56033, 0.90403,
      0.13871, 0.98889, 0.94667, 0.08244),
    tolerance = 1e-4)
  expect_equal(
    df[["grp_1"]],
    structure(c(3L, 2L, 5L, 3L, 5L, 4L, 5L, 2L, 4L, 1L, 5L, 1L, 2L,
      5L, 4L, 2L, 3L, 1L, 5L, 3L, 5L, 5L, 4L, 5L, 4L), .Label = c("1",
      "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    df[["grp_2"]],
    structure(c(3L, 3L, 4L, 7L, 1L, 6L, 6L, 2L, 3L, 5L, 6L, 8L, 7L,
      5L, 7L, 2L, 3L, 7L, 6L, 2L, 1L, 2L, 2L, 4L, 2L), .Label = c("1",
      "2", "3", "4", "5", "6", "7", "8"), class = "factor"))
  expect_equal(
    df[["grp_3"]],
    structure(c(3L, 1L, 2L, 2L, 1L, 2L, 1L, 2L, 2L, 3L, 2L, 1L, 1L,
      1L, 1L, 3L, 1L, 1L, 3L, 3L, 3L, 1L, 2L, 3L, 2L), .Label = c("A",
      "B", "C"), class = "factor"))
  expect_equal(
    df[["grp_4"]],
    structure(c(6L, 2L, 2L, 3L, 10L, 10L, 7L, 7L, 5L, 1L, 6L, 9L, 8L,
      4L, 5L, 5L, 1L, 3L, 6L, 8L, 3L, 3L, 5L, 6L, 7L), .Label = c("A",
      "C", "E", "F", "G", "H", "I", "J", "K", "L"), class = "factor"))
  # Testing column names
  expect_equal(
    names(df),
    c("some_var", "grp_1", "grp_2", "grp_3", "grp_4"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(df),
    c("numeric", "factor", "factor", "factor", "factor"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(df),
    c("double", "integer", "integer", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(df),
    c(25L, 5L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(df)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'df'                                                  ####


  # Manually calculate the summary statistics

  summaries_long <- summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = TRUE)

  expect_equal(
    summaries_long$`Num Groups`,
    c(length(unique(df$grp_1)), length(unique(df$grp_2)),
      length(unique(df$grp_3)), length(unique(df$grp_4)))
  )
  expect_equal(
    summaries_long$`Mean Rows`,
    c(mean(table(df$grp_1)), mean(table(df$grp_2)),
      mean(table(df$grp_3)), mean(table(df$grp_4)))
  )
  expect_equal(
    summaries_long$`Median Rows`,
    c(median(table(df$grp_1)), median(table(df$grp_2)),
      median(table(df$grp_3)), median(table(df$grp_4)))
  )
  expect_equal(
    summaries_long$`Std Rows`,
    c(sd(table(df$grp_1)), sd(table(df$grp_2)),
      sd(table(df$grp_3)), sd(table(df$grp_4)))
  )
  expect_equal(
    summaries_long$`Min Rows`,
    c(min(table(df$grp_1)), min(table(df$grp_2)),
      min(table(df$grp_3)), min(table(df$grp_4)))
  )
  expect_equal(
    summaries_long$`Max Rows`,
    c(max(table(df$grp_1)), max(table(df$grp_2)),
      max(table(df$grp_3)), max(table(df$grp_4)))
  )

  # Compare to wide format
  summaries_wide <- summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = FALSE)

  get_stats_for_row <- function(long_summ, grp){
    r <- long_summ[long_summ[["Group Column"]] == grp, ]
    r <- r[2:length(r)]
    unname(unlist(r))
  }

  expect_equal(
    summaries_wide$grp_1,
    get_stats_for_row(summaries_long, "grp_1")
  )
  expect_equal(
    summaries_wide$grp_2,
    get_stats_for_row(summaries_long, "grp_2")
  )
  expect_equal(
    summaries_wide$grp_3,
    get_stats_for_row(summaries_long, "grp_3")
  )
  expect_equal(
    summaries_wide$grp_4,
    get_stats_for_row(summaries_long, "grp_4")
  )



  # Generate expectations for 'summarize_group_cols'
  # Tip: comment out the gxs_function() call
  # so it is easy to regenerate the tests
  xpectr::set_test_seed(42)
  # xpectr::gxs_function(
  #   fn = summarize_group_cols,
  #   args_values = list(
  #     "data" = list(df, 1:10),
  #     "group_cols" = list(paste0("grp_", 1:4), "grp_5", NA),
  #     "long" = list(TRUE, FALSE, NA, "Nope")
  #   ),
  #   indentation = 2,
  #   copy_env = FALSE
  # )


  ## Testing 'summarize_group_cols'                                           ####
  ## Initially generated by xpectr
  # Testing different combinations of argument values

  # Testing summarize_group_cols(data = df, group_cols =...
  xpectr::set_test_seed(42)
  # Assigning output
  output_19148 <- summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = TRUE)
  # Testing class
  expect_equal(
    class(output_19148),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_19148[["Group Column"]],
    c("grp_1", "grp_2", "grp_3", "grp_4"),
    fixed = TRUE)
  expect_equal(
    output_19148[["Num Groups"]],
    c(5, 8, 3, 10),
    tolerance = 1e-4)
  expect_equal(
    output_19148[["Mean Rows"]],
    c(5, 3.125, 8.33333, 2.5),
    tolerance = 1e-4)
  expect_equal(
    output_19148[["Median Rows"]],
    c(4, 3, 8, 2),
    tolerance = 1e-4)
  expect_equal(
    output_19148[["Std Rows"]],
    c(2.34521, 1.64208, 1.52753, 1.17851),
    tolerance = 1e-4)
  expect_equal(
    output_19148[["IQR Rows"]],
    c(1, 2, 1.5, 1.75),
    tolerance = 1e-4)
  expect_equal(
    output_19148[["Min Rows"]],
    c(3, 1, 7, 1),
    tolerance = 1e-4)
  expect_equal(
    output_19148[["Max Rows"]],
    c(9, 6, 10, 4),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(output_19148),
    c("Group Column", "Num Groups", "Mean Rows", "Median Rows", "Std Rows",
      "IQR Rows", "Min Rows", "Max Rows"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_19148),
    c("character", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_19148),
    c("character", "double", "double", "double", "double", "double",
      "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_19148),
    c(4L, 8L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_19148)),
    character(0),
    fixed = TRUE)

  # Testing summarize_group_cols(data = 1:10, group_cols...
  # Changed from baseline: data = 1:10
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_19370 <- xpectr::capture_side_effects(summarize_group_cols(data = 1:10, group_cols = paste0("grp_", 1:4), long = TRUE), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19370[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'integer'."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19370[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = NULL, group_cols...
  # Changed from baseline: data = NULL
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_12861 <- xpectr::capture_side_effects(summarize_group_cols(data = NULL, group_cols = paste0("grp_", 1:4), long = TRUE), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_12861[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'data': Must be of type 'data.frame', not 'NULL'."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_12861[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: group_cols = "grp_5"
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_18304 <- xpectr::capture_side_effects(summarize_group_cols(data = df, group_cols = "grp_5", long = TRUE), reset_seed = TRUE)
  expect_match(
    xpectr::strip(side_effects_18304[['error']], lowercase = TRUE),
    xpectr::strip("must include the elements {grp_5}.", lowercase = TRUE),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_18304[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: group_cols = NA
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_16417 <- xpectr::capture_side_effects(summarize_group_cols(data = df, group_cols = NA, long = TRUE), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_16417[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'group_cols': Contains missing values (element 1)."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_16417[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: group_cols = NULL
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_15190 <- xpectr::capture_side_effects(summarize_group_cols(data = df, group_cols = NULL, long = TRUE), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_15190[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'group_cols': Must be of type 'character', not 'NULL'."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_15190[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: long = FALSE
  xpectr::set_test_seed(42)
  # Assigning output
  output_17365 <- summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = FALSE)
  # Testing class
  expect_equal(
    class(output_17365),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_17365[["Measure"]],
    c("Num Groups", "Mean Rows", "Median Rows", "Std Rows", "IQR Rows",
      "Min Rows", "Max Rows"),
    fixed = TRUE)
  expect_equal(
    output_17365[["grp_1"]],
    c(5, 5, 4, 2.34521, 1, 3, 9),
    tolerance = 1e-4)
  expect_equal(
    output_17365[["grp_2"]],
    c(8, 3.125, 3, 1.64208, 2, 1, 6),
    tolerance = 1e-4)
  expect_equal(
    output_17365[["grp_3"]],
    c(3, 8.33333, 8, 1.52753, 1.5, 7, 10),
    tolerance = 1e-4)
  expect_equal(
    output_17365[["grp_4"]],
    c(10, 2.5, 2, 1.17851, 1.75, 1, 4),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(output_17365),
    c("Measure", "grp_1", "grp_2", "grp_3", "grp_4"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_17365),
    c("character", "numeric", "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_17365),
    c("character", "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_17365),
    c(7L, 5L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_17365)),
    character(0),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: long = NA
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_11346 <- xpectr::capture_side_effects(summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = NA), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_11346[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'long': May not be NA."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_11346[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: long = "Nope"
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_16569 <- xpectr::capture_side_effects(summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = "Nope"), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_16569[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'long': Must be of type 'logical flag', not 'character'."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_16569[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  # Testing summarize_group_cols(data = df, group_cols =...
  # Changed from baseline: long = NULL
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_17050 <- xpectr::capture_side_effects(summarize_group_cols(data = df, group_cols = paste0("grp_", 1:4), long = NULL), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_17050[['error']]),
    xpectr::strip("1 assertions failed:\n * Variable 'long': Must be of type 'logical flag', not 'NULL'."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_17050[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)

  ## Finished testing 'summarize_group_cols'                                  ####
  #

})
LudvigOlsen/R-splitters documentation built on March 7, 2024, 6:59 p.m.