tests/testthat/test_summarizers.R

library(groupdata2)
context("summarizers()")


test_that("testing create_empty_summary_()", {
  xpectr::set_test_seed(42)

  # Regression tests

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "some_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE)),
    "grp_3" = factor(sample(LETTERS[1:3], size = 25, replace=TRUE)),
    "grp_4" = factor(sample(LETTERS[1:12], size = 25, replace=TRUE))
  )

  # EMPTY summary

  ## Testing 'create_empty_summary_(data=df, group_col="gr...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Assigning output
  output_11364 <- create_empty_summary_(data=df, group_col="grp_1")
  # Testing class
  expect_equal(
    class(output_11364),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_11364[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  # Testing column names
  expect_equal(
    names(output_11364),
    "grp_1",
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_11364),
    "factor",
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_11364),
    "integer",
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_11364),
    c(5L, 1L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_11364)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'create_empty_summary_(data=df, group_col="gr...'     ####


  ## Testing 'create_empty_summary_(data=df, group_col=c("...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_19148 <- xpectr::capture_side_effects(create_empty_summary_(data=df, group_col=c("grp_1", "grp_2")), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error']]),
    xpectr::strip("Assertion on 'group_col' failed: Must have length 1."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)
  ## Finished testing 'create_empty_summary_(data=df, group_col=c("...'     ####


  ## Testing 'create_empty_summary_(data = df %>% dplyr::g...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Assigning output
  output_19148 <- create_empty_summary_(data = df %>% dplyr::group_by(.data$grp_3),
                                        group_col = "grp_1")
  # Testing class
  expect_equal(
    class(output_19148),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_19148[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  # Testing column names
  expect_equal(
    names(output_19148),
    "grp_1",
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_19148),
    "factor",
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_19148),
    "integer",
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_19148),
    c(5L, 1L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_19148)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'create_empty_summary_(data = df %>% dplyr::g...'     ####


})

test_that("testing create_size_summary_()", {
  xpectr::set_test_seed(42)

  # Regression tests

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "some_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE))
  )


  ## Testing 'create_size_summary_(data=df, group_col="grp...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Assigning output
  output_19148 <- create_size_summary_(data=df, group_col="grp_1", name="size")
  # Testing class
  expect_equal(
    class(output_19148),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_19148[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    output_19148[["size"]],
    c(6, 6, 4, 7, 2),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(output_19148),
    c("grp_1", "size"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_19148),
    c("factor", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_19148),
    c("integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_19148),
    c(5L, 2L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_19148)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'create_size_summary_(data=df, group_col="grp...'     ####



  ## Testing 'create_size_summary_(data=df, group_col="grp...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Assigning output
  output_19148 <- create_size_summary_(data=df, group_col="grp_2", name="size")
  # Testing class
  expect_equal(
    class(output_19148),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_19148[["grp_2"]],
    structure(1:8, .Label = c("1", "2", "3", "4", "5", "6", "7", "8"),
      class = "factor"))
  expect_equal(
    output_19148[["size"]],
    c(2, 2, 2, 3, 7, 4, 3, 2),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(output_19148),
    c("grp_2", "size"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_19148),
    c("factor", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_19148),
    c("integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_19148),
    c(8L, 2L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_19148)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'create_size_summary_(data=df, group_col="grp...'     ####


  ## Testing 'create_size_summary_(data=df, group_col=NA, ...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_19148 <- xpectr::capture_side_effects(create_size_summary_(data=df, group_col=NA, name="size"), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error']]),
    xpectr::strip("Assertion on 'group_col' failed: May not be NA."),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)
  ## Finished testing 'create_size_summary_(data=df, group_col=NA, ...'     ####


  ## Testing 'create_size_summary_(data=df, group_col="grp...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_19148 <- xpectr::capture_side_effects(create_size_summary_(data=df, group_col="grp_1", name=""), reset_seed = TRUE)
  expect_match(
    xpectr::strip(side_effects_19148[['error']], lowercase = TRUE),
    xpectr::strip("must have at least 1 characters", lowercase = TRUE),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)
  ## Finished testing 'create_size_summary_(data=df, group_col="grp...'     ####


})

test_that("testing create_id_summaries_()", {
  xpectr::set_test_seed(42)

  # Regression tests

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "some_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "some_id_2" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE)),
    "grp_3" = factor(sample(LETTERS[1:3], size = 25, replace=TRUE)),
    "grp_4" = factor(sample(LETTERS[1:12], size = 25, replace=TRUE))
  )

  id_summ <-
    create_id_summaries_(
      data = df,
      group_col = "grp_1",
      id_cols = c("some_id", "some_id_2"),
      name_prefix = "# "
    )

  man_summ <- df %>%
    dplyr::group_by(.data$grp_1) %>%
    dplyr::summarise(
      `# some_id` = length(unique(.data$some_id)),
      `# some_id_2` = length(unique(.data$some_id_2))
    )

  expect_identical(id_summ, man_summ)


  ## Testing 'id_summ'                                                      ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(id_summ),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    id_summ[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    id_summ[["# some_id"]],
    c(2, 3, 4, 4, 4),
    tolerance = 1e-4)
  expect_equal(
    id_summ[["# some_id_2"]],
    c(2, 4, 4, 5, 4),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(id_summ),
    c("grp_1", "# some_id", "# some_id_2"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(id_summ),
    c("factor", "integer", "integer"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(id_summ),
    c("integer", "integer", "integer"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(id_summ),
    c(5L, 3L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(id_summ)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'id_summ'                                             ####


})

test_that("testing create_num_summaries_()", {
  xpectr::set_test_seed(42)

  # Regression tests

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "some_var_2" = runif(25),
    "some_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE))
  )

  num_summ <- create_num_summaries_(
    data = df,
    group_col = "grp_1",
    num_cols = c("some_var", "some_var_2"),
    fns = list(
      "mean" = mean,
      "sum" = sum,
      "sd" = sd,
      "iqr" = IQR
    ),
    rename = FALSE
  )

  man_summ <- df %>%
    dplyr::group_by(.data$grp_1) %>%
    dplyr::summarise(
      some_var_mean = mean(.data$some_var),
      some_var_sum = sum(.data$some_var),
      some_var_sd = sd(.data$some_var),
      some_var_iqr = IQR(.data$some_var),
      some_var_2_mean = mean(.data$some_var_2),
      some_var_2_sum = sum(.data$some_var_2),
      some_var_2_sd = sd(.data$some_var_2),
      some_var_2_iqr = IQR(.data$some_var_2)
    )

  expect_identical(num_summ, man_summ)


  ## Testing 'num_summ'                                                     ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(num_summ),
    c("tbl_df", "tbl", "data.frame"),
    fixed = TRUE)
  # Testing column values
  expect_equal(
    num_summ[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    num_summ[["some_var_mean"]],
    c(0.84165, 0.61504, 0.627, 0.5337, 0.61007),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_sum"]],
    c(1.68329, 3.07518, 4.389, 3.73591, 2.44029),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_sd"]],
    c(0.19315, 0.38385, 0.35016, 0.34623, 0.08878),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_iqr"]],
    c(0.13658, 0.61789, 0.58448, 0.52949, 0.11106),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_2_mean"]],
    c(0.21986, 0.54758, 0.72597, 0.6209, 0.50317),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_2_sum"]],
    c(0.43972, 2.73792, 5.08181, 4.34633, 2.01269),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_2_sd"]],
    c(0.30535, 0.38455, 0.20854, 0.27136, 0.37269),
    tolerance = 1e-4)
  expect_equal(
    num_summ[["some_var_2_iqr"]],
    c(0.21591, 0.51553, 0.2713, 0.37083, 0.43655),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(num_summ),
    c("grp_1", "some_var_mean", "some_var_sum", "some_var_sd", "some_var_iqr",
      "some_var_2_mean", "some_var_2_sum", "some_var_2_sd", "some_var_2_iqr"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(num_summ),
    c("factor", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(num_summ),
    c("integer", "double", "double", "double", "double", "double", "double",
      "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(num_summ),
    c(5L, 9L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(num_summ)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'num_summ'                                            ####

  num_summ_2 <- create_num_summaries_(
    data = df,
    group_col = "grp_1",
    num_cols = c("some_var"),
    fns = list(
      "mean" = mean,
      "sum" = sum,
      "sd" = sd,
      "iqr" = IQR
    ),
    rename = TRUE
  )

  expect_equal(
    colnames(num_summ_2),
    c("grp_1", "mean(some_var)", "sum(some_var)", "sd(some_var)", "iqr(some_var)"),
    fixed = TRUE)

  expect_identical(
    unname(num_summ_2),
    unname(num_summ[, 1:5])
  )


})

test_that("testing create_cat_summaries_()", {
  xpectr::set_test_seed(42)

  # Regression tests

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "a_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "b_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE))
  )

  cat_summ <- create_cat_summaries_(
    data = df,
    group_col = "grp_1",
    cat_cols = c("a_factor", "b_factor"),
    max_cat_prefix_chars = 2,
    name_prefix = "# "
  )

  man_summ <- df %>%
    dplyr::select("grp_1", 2:3) %>%
    tidyr::gather(key="cat_col", value="cat_val", 2:3) %>%
    dplyr::count(.data$grp_1, .data$cat_col, .data$cat_val) %>%
    dplyr::mutate(cat_name = paste0("# ", substr(cat_col, 1,2), "_", cat_val)) %>%
    dplyr::arrange(.data$cat_name) %>%
    tidyr::pivot_wider(id_cols = c("grp_1"), names_from = "cat_name", values_from = "n", values_fill = 0) %>%
    dplyr::arrange(.data$grp_1)

  strip_df <- function(x){
    x <- as.matrix(as.data.frame(x))
    rownames(x) <- NULL
    colnames(x) <- NULL
    x
  }

  expect_equal(
    strip_df(cat_summ),
    strip_df(man_summ)
  )


  ## Testing 'cat_summ'                                                     ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(cat_summ),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    cat_summ[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    cat_summ[["# a__1"]],
    c(1, 1, 0, 1, 1),
    tolerance = 1e-4)
  expect_equal(
    cat_summ[["# a__2"]],
    c(1, 2, 3, 3, 1),
    tolerance = 1e-4)
  expect_equal(
    cat_summ[["# a__3"]],
    c(0, 2, 4, 3, 2),
    tolerance = 1e-4)
  expect_equal(
    cat_summ[["# b__1"]],
    c(1, 1, 4, 2, 2),
    tolerance = 1e-4)
  expect_equal(
    cat_summ[["# b__2"]],
    c(1, 2, 2, 1, 0),
    tolerance = 1e-4)
  expect_equal(
    cat_summ[["# b__3"]],
    c(0, 2, 1, 4, 2),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(cat_summ),
    c("grp_1", "# a__1", "# a__2", "# a__3", "# b__1", "# b__2", "# b__3"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(cat_summ),
    c("factor", "numeric", "numeric", "numeric", "numeric", "numeric",
      "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(cat_summ),
    c("integer", "double", "double", "double", "double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(cat_summ),
    c(5L, 7L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(cat_summ)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'cat_summ'                                            ####


})

test_that("testing create_combined_cat_summary_() and create_combined_cat_summaries_()", {
  xpectr::set_test_seed(42)

  df <- data.frame(
    "some_var" = runif(25),
    "a_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "b_factor" = factor(sample(c("a", "b", "c"), size = 25, replace=TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE))
  )

  cat_summ_a <- create_combined_cat_summary_(
    data = df,
    group_cols = "grp_1",
    cat_col = "a_factor",
    cat_levels = NULL,
    warn_zero_variance = TRUE
  )

  man_summ <- df %>%
    dplyr::count(grp_1, a_factor) %>%
    tidyr::spread(key = "a_factor",
                  value = "n",
                  fill = 0) %>%
    tidyr::gather(key = "a_factor", value = "n", 2:4) %>%
    dplyr::group_by(a_factor) %>%
    dplyr::mutate(n = standardize_(n)) %>%
    dplyr::group_by(grp_1) %>%
    dplyr::summarise(a_factor = mean(n))

  expect_equal(
    as.data.frame(cat_summ_a),
    as.data.frame(man_summ)
  )


  ## Testing 'cat_summ_a'                                                   ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(cat_summ_a),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    cat_summ_a[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    cat_summ_a[["a_factor"]],
    c(-0.67868, 0.10412, 0.14157, 0.66219, -0.22921),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(cat_summ_a),
    c("grp_1", "a_factor"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(cat_summ_a),
    c("factor", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(cat_summ_a),
    c("integer", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(cat_summ_a),
    c(5L, 2L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(cat_summ_a)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'cat_summ_a'                                          ####


  cat_summ_b <- create_combined_cat_summary_(
    data = df,
    group_cols = "grp_1",
    cat_col = "b_factor",
    cat_levels = NULL,
    warn_zero_variance = TRUE
  )

  man_summ <- df %>%
    dplyr::count(grp_1, b_factor) %>%
    tidyr::spread(key = "b_factor",
                  value = "n",
                  fill = 0) %>%
    tidyr::gather(key = "b_factor", value = "n", 2:4) %>%
    dplyr::group_by(b_factor) %>%
    dplyr::mutate(n = standardize_(n)) %>%
    dplyr::group_by(grp_1) %>%
    dplyr::summarise(b_factor = mean(n))

  expect_equal(
    as.data.frame(cat_summ_b),
    as.data.frame(man_summ)
  )


  ## Testing 'cat_summ_b'                                                   ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(cat_summ_b),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    cat_summ_b[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    cat_summ_b[["b_factor"]],
    c(-0.75637, 0.09151, 0.68327, 0.41473, -0.43314),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(cat_summ_b),
    c("grp_1", "b_factor"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(cat_summ_b),
    c("factor", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(cat_summ_b),
    c("integer", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(cat_summ_b),
    c(5L, 2L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(cat_summ_b)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'cat_summ_b'                                          ####

  # Combine and check against wrapper
  cat_summ_ab <- cat_summ_a %>%
    dplyr::left_join(cat_summ_b, by="grp_1")

  cat_summ_from_wrapper <- create_combined_cat_summaries_(
    data = df,
    group_cols = c("grp_1"),
    cat_cols = c("a_factor", "b_factor"),
    cat_levels = NULL,
    warn_zero_variance = TRUE
  )

  expect_identical(
    cat_summ_ab,
    cat_summ_from_wrapper
  )


  ## Testing 'cat_summ_from_wrapper'                                        ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(cat_summ_from_wrapper),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    cat_summ_from_wrapper[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    cat_summ_from_wrapper[["a_factor"]],
    c(-0.67868, 0.10412, 0.14157, 0.66219, -0.22921),
    tolerance = 1e-4)
  expect_equal(
    cat_summ_from_wrapper[["b_factor"]],
    c(-0.75637, 0.09151, 0.68327, 0.41473, -0.43314),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(cat_summ_from_wrapper),
    c("grp_1", "a_factor", "b_factor"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(cat_summ_from_wrapper),
    c("factor", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(cat_summ_from_wrapper),
    c("integer", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(cat_summ_from_wrapper),
    c(5L, 3L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(cat_summ_from_wrapper)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'cat_summ_from_wrapper'                               ####


  # Nested group cols

  cat_summ <- create_combined_cat_summaries_(
    data = df,
    group_cols = c("grp_1", "grp_2"),
    cat_cols = c("a_factor", "b_factor"),
    cat_levels = NULL,
    warn_zero_variance = TRUE
  )

  ## Testing 'cat_summ'                                                     ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(cat_summ),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    cat_summ[["grp_1"]],
    structure(c(1L, 1L, 2L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 3L, 4L, 4L,
                4L, 4L, 4L, 4L, 5L, 5L, 5L),
              .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    cat_summ[["grp_2"]],
    structure(c(1L, 7L, 2L, 4L, 7L, 8L, 2L, 3L, 5L, 7L, 8L, 1L, 2L,
                4L, 5L, 6L, 7L, 6L, 7L, 8L),
              .Label = c("1", "2", "3", "4", "5", "6", "7", "8"), class = "factor"))
  expect_equal(
    cat_summ[["a_factor"]],
    c(-0.14068, 0.02177, -0.14068, 0.02177, -0.23933, 0.41046, 0.41046,
      -0.23933, -0.14068, -0.23933, 0.41046, 0.02177, -0.14068, -0.14068,
      0.31182, -0.14068, -0.23933, 0.41046, 0.02177, -0.23933),
    tolerance = 1e-4)
  expect_equal(
    cat_summ[["b_factor"]],
    c(-0.20403, -0.05859, -0.09097, -0.09097, -0.20403, 0.52494, 0.23406,
      -0.05859, -0.09097, -0.05859, 0.23406, -0.09097, -0.20403, -0.05859,
      0.46017, -0.20403, -0.09097, 0.23406, -0.09097, -0.09097),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(cat_summ),
    c("grp_1", "grp_2", "a_factor", "b_factor"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(cat_summ),
    c("factor", "factor", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(cat_summ),
    c("integer", "integer", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(cat_summ),
    c(20L, 4L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(cat_summ)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'cat_summ'                                            ####


  ## With cat_levels
  # By only using a single level (c)
  # We can check the weighting is correctly applied

  cat_summ <- create_combined_cat_summaries_(
    data = df,
    group_cols = c("grp_1"),
    cat_cols = c("b_factor"),
    cat_levels = list(
      "b_factor" = c("a" = 0, "b" = 0, "c" = 10) # Only c is used!
    ),
    warn_zero_variance = TRUE
  )

  man_summ <- df %>%
    dplyr::count(grp_1, b_factor) %>%
    tidyr::spread(key = "b_factor",
                  value = "n",
                  fill = 0) %>%
    tidyr::gather(key = "b_factor", value = "n", 2:4) %>%
    dplyr::group_by(b_factor) %>%
    dplyr::mutate(n = standardize_(n)) %>%
    dplyr::filter(b_factor == "c") %>%  # Only use c!
    dplyr::group_by(grp_1) %>%
    dplyr::summarise(b_factor = mean(n))


  expect_identical(
    as.data.frame(cat_summ),
    as.data.frame(man_summ)
  )


  ## Testing 'cat_summ'                                                     ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(cat_summ),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    cat_summ[["grp_1"]],
    structure(1:5, .Label = c("1", "2", "3", "4", "5"), class = "factor"))
  expect_equal(
    cat_summ[["b_factor"]],
    c(-1.21356, 0.13484, -0.53936, 1.48324, 0.13484),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(cat_summ),
    c("grp_1", "b_factor"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(cat_summ),
    c("factor", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(cat_summ),
    c("integer", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(cat_summ),
    c(5L, 2L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(cat_summ)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'cat_summ'                                            ####

})

test_that("testing create_cat_name_map_()", {
  xpectr::set_test_seed(42)

  # Regression tests

  # The dataset from the example
  df <- data.frame(
    "some_var" = runif(25),
    "a_factor" = factor(sample(1:3, size = 25, replace=TRUE)),
    "b_factor" = factor(sample(c("a", "b", "c"), size = 25, replace = TRUE)),
    "some_id" = factor(sample(1:7, size = 25, replace=TRUE)),
    "grp_1" = factor(sample(1:5, size = 25, replace=TRUE)),
    "grp_2" = factor(sample(1:8, size = 25, replace=TRUE))
  )


  ## Testing 'create_cat_name_map_( data = df, cat_cols = ...'              ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Assigning output
  output_17193 <- create_cat_name_map_(
      data = df,
      cat_cols = c("a_factor", "b_factor"),
      max_cat_prefix_chars = 5,
      name_prefix = "# "
    )
  # Testing class
  expect_equal(
    class(output_17193),
    "list",
    fixed = TRUE)
  # Testing type
  expect_type(
    output_17193,
    type = "list")
  # Testing values
  expect_equal(
    output_17193[["a_factor"]],
    c(`1` = "# a_fac_1", `2` = "# a_fac_2", `3` = "# a_fac_3"),
    fixed = TRUE)
  expect_equal(
    output_17193[["b_factor"]],
    c(a = "# b_fac_a", b = "# b_fac_b", c = "# b_fac_c"),
    fixed = TRUE)
  # Testing names
  expect_equal(
    names(output_17193),
    c("a_factor", "b_factor"),
    fixed = TRUE)
  # Testing length
  expect_equal(
    length(output_17193),
    2L)
  # Testing sum of element lengths
  expect_equal(
    sum(xpectr::element_lengths(output_17193)),
    6L)
  # Testing element classes
  expect_equal(
    xpectr::element_classes(output_17193),
    c("character", "character"),
    fixed = TRUE)
  # Testing element types
  expect_equal(
    xpectr::element_types(output_17193),
    c("character", "character"),
    fixed = TRUE)
  ## Finished testing 'create_cat_name_map_( data = df, cat_cols = ...'     ####


})

test_that("testing rank_numeric_cols_()", {
  xpectr::set_test_seed(42)

  df = data.frame("a" = c(3, 4, 5, 6, 1, 2, 3),
                  "b" = c(5, 3, 2, 6, 4, 1, 5))

  # Test rank_numeric_cols_()

  expect_identical(
    data.frame("a" = rank(df$a), "b" = rank(df$b)),
    rank_numeric_cols_(data=df))


  ## Testing 'rank_numeric_cols_(data=df)'                                 ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Assigning output
  output_19370 <- rank_numeric_cols_(data=df)
  # Testing class
  expect_equal(
    class(output_19370),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    output_19370[["a"]],
    c(3.5, 5, 6, 7, 1, 2, 3.5),
    tolerance = 1e-4)
  expect_equal(
    output_19370[["b"]],
    c(5.5, 3, 2, 7, 4, 1, 5.5),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(output_19370),
    c("a", "b"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(output_19370),
    c("numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(output_19370),
    c("double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(output_19370),
    c(7L, 2L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(output_19370)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'rank_numeric_cols_(data=df)'                         ####

  # With selected cols only
  expect_identical(
    data.frame("a" = rank(df$a), "b" = df$b),
    rank_numeric_cols_(data=df, cols = "a"))

  # Giving non-existing columns

  ## Testing 'rank_numeric_cols_(data = df, cols = "noope")'                ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing side effects
  # Assigning side effects
  side_effects_19148 <- xpectr::capture_side_effects(rank_numeric_cols_(data = df, cols = "noope"), reset_seed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error']]),
    xpectr::strip("`cols` had unknown names: noope"),
    fixed = TRUE)
  expect_equal(
    xpectr::strip(side_effects_19148[['error_class']]),
    xpectr::strip(c("simpleError", "error", "condition")),
    fixed = TRUE)
  ## Finished testing 'rank_numeric_cols_(data = df, cols = "noope")'       ####


})

test_that("testing mean_rank_numeric_cols_()", {
  xpectr::set_test_seed(42)

  df = data.frame("a" = c(1.3, 2.2, 5.8, 6.2, 3.3, 4.1),
                  "b" = c(2.5, 1.2, 6.5, 5.1, 4.6, 3.2))

  ranks <- mean_rank_numeric_cols_(
    data = df,
    cols = c("a", "b"),
    col_name = "mean_rank",
    rank_weights = NULL,
    already_rank_cols = character(0))


  ## Testing 'ranks'                                                        ####
  ## Initially generated by xpectr
  xpectr::set_test_seed(42)
  # Testing class
  expect_equal(
    class(ranks),
    "data.frame",
    fixed = TRUE)
  # Testing column values
  expect_equal(
    ranks[["a"]],
    c(1, 2, 5, 6, 3, 4), # Ranks (removed the decimals)
    tolerance = 1e-4)
  expect_equal(
    ranks[["b"]],
    c(2, 1, 6, 5, 4, 3),  # Ranks (removed the decimals)
    tolerance = 1e-4)
  expect_equal(
    ranks[["mean_rank"]],
    c(1.5, 1.5, 5.5, 5.5, 3.5, 3.5),
    tolerance = 1e-4)
  # Testing column names
  expect_equal(
    names(ranks),
    c("a", "b", "mean_rank"),
    fixed = TRUE)
  # Testing column classes
  expect_equal(
    xpectr::element_classes(ranks),
    c("numeric", "numeric", "numeric"),
    fixed = TRUE)
  # Testing column types
  expect_equal(
    xpectr::element_types(ranks),
    c("double", "double", "double"),
    fixed = TRUE)
  # Testing dimensions
  expect_equal(
    dim(ranks),
    c(6L, 3L))
  # Testing group keys
  expect_equal(
    colnames(dplyr::group_keys(ranks)),
    character(0),
    fixed = TRUE)
  ## Finished testing 'ranks'                                               ####

  # Only use ranks for a

  ranks <- mean_rank_numeric_cols_(
    data = df,
    cols = c("a", "b"),
    col_name = "mean_rank",
    rank_weights = c("a" = 2, "b" = 0),
    already_rank_cols = character(0))

  expect_equal(ranks$a, ranks$mean_rank)

  # Tell it that a is already a rank column

  ranks <- mean_rank_numeric_cols_(
    data = df,
    cols = c("a", "b"),
    col_name = "mean_rank",
    rank_weights = NULL,
    already_rank_cols = c("a"))

  expect_equal(
    ranks$mean_rank,
    (df$a + ranks$b) / 2
  )

})
LudvigOlsen/groupdata2 documentation built on March 7, 2024, 12:57 p.m.