tests/testthat/test.make_regression_grid.r

context("make_regression_grid")

library(magrittr)
library(tibble)
library(dplyr)
#library(testthat)

rm(list = ls())
set.seed(0)
numcol <- 1:10
numcol_notunique <- rep(1, 10)
numcol_small <- 1:2
intcol <- numcol %>% as.integer()
numcol2 <- c(1:5, 1:5)
numcol3 <- c(rep(1, 5), rep(2, 5))
numcol4 <- numcol %>% sample(numcol %>% length())
numcol5 <- numcol %>% sample(numcol %>% length())
chrcol <- c("q", "w", "e", "r", "t", "y", "u", "i", "o", "p")
faccol <- chrcol %>% as.factor()

#### parameters invalid on its face ####

# bad df
expect_error(
  NULL %>%
    make_regression_grid(),
  "data %>% is.data.frame()",
  fixed = T)

# bad max_columns
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = "a",
      max_correlation = .85,
      beam_width = 1),
  "max_columns %>% is.numeric()",
  fixed = T)

# max_columns too small
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = 0,
      max_correlation = .85,
      beam_width = 1),
  "max_columns > 0",
  fixed = T)

# bad max_correlation
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = 1,
      max_correlation = "a",
      beam_width = 1),
  "max_correlation %>% is.numeric()",
  fixed = T)

# max_correlation too small
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = 1,
      max_correlation = 0,
      beam_width = 1),
  "max_correlation > 0",
  fixed = T)

# max_correlation too large
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = 1,
      max_correlation = 1,
      beam_width = 1),
  "max_correlation < 1",
  fixed = T)

# bad beam_width
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = 1,
      max_correlation = .85,
      beam_width = "a"),
  "beam_width %>% is.numeric()",
  fixed = T)

# beam_width too small
expect_error(
  data.frame(numcol, numcol) %>%
    make_regression_grid(
      max_columns = 1,
      max_correlation = .85,
      beam_width = 0),
  "beam_width > 0",
  fixed = T)

#### after analysis, no grid needed ####

# single column
actual <-
  data.frame(numcol) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1)
expect_equal(actual %>% nrow(), 0)

# single column (v2)
actual <-
  data.frame(chrcol) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1)
expect_equal(actual %>% nrow(), 0)

# no response columns
actual <-
  data.frame(chrcol, chrcol) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1)
expect_equal(actual %>% nrow(), 0)

# after uniqueness filtering, no response columns
actual <-
  data.frame(chrcol, numcol_notunique) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1)
expect_equal(actual %>% nrow(), 0)

# after uniqueness filtering, no self predict
actual <-
  data.frame(numcol, numcol_notunique) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1)
expect_equal(actual %>% nrow(), 0)

# not enough rows
actual <-
  data.frame(numcol_small, numcol_small) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1)
expect_equal(actual %>% nrow(), 0)

#### normal filter/select ####

# simple pair
actual <-
  data.frame(
    a = numcol,
    b = numcol4) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c("b~a", "a~b"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# simple pair (v2)
actual <-
  data.frame(
    a = numcol,
    b = numcol4,
    c = numcol5) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c("b~a", "c~a", "a~b", "c~b", "a~c", "b~c"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# simple pair (v3)
actual <-
  data.frame(
    a = faccol,
    b = numcol) %>%
  make_regression_grid(
    max_columns = 4,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c("b~a"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# trim down max_columns
actual <-
  data.frame(
    a = numcol,
    b = numcol4,
    c = numcol5) %>%
  make_regression_grid(
    max_columns = 4,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c(
      "c~a*b", "b~a*c", "a~b*c", "b~a","c~a", "a~b", "c~b",
      "a~c", "b~c"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# trim down beam_width
actual <-
  data.frame(
    a = numcol,
    b = numcol4,
    c = numcol5) %>%
  make_regression_grid(
    max_columns = 2,
    max_correlation = .85,
    beam_width = 5)
expected <-
  data.frame(
    formula = c("c~a*b", "b~a*c", "a~b*c", "b~a","c~a"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# trim down beam_width (v2)
actual <-
  data.frame(
    a = numcol,
    b = numcol4,
    c = numcol5) %>%
  make_regression_grid(
    max_columns = 2,
    max_correlation = .85,
    beam_width = 5)
expected <-
  data.frame(
    formula = c("c~a*b", "b~a*c", "a~b*c", "b~a","c~a"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# ordered beam
# response is in increasing varibality
# prediction is in decreasing varability
# therefor, more varability predicts less variable responce
actual <-
  data.frame(
    a = numcol2,
    b = numcol,
    c = numcol3) %>%
  make_regression_grid(
    max_columns = 1,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c("c~b", "a~b", "c~a", "b~a", "a~c", "b~c"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# ordered beam (v2)
actual <-
  data.frame(
    a = numcol2,
    b = numcol,
    c = numcol3) %>%
  make_regression_grid(
    max_columns = 2,
    max_correlation = .999,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c(
      "c~b*a", "a~b*c", "b~a*c", "c~b", "a~b", "c~a","b~a",
      "a~c", "b~c"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# self refrence filter
actual <-
  data.frame(
    a = numcol,
    b = numcol) %>%
  make_regression_grid(
    max_columns = 4,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c("b~a", "a~b"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# self refrence filter (v2)
actual <-
  data.frame(
    a = numcol,
    b = numcol,
    c = numcol) %>%
  make_regression_grid(
    max_columns = 4,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c("b~a", "c~a", "a~b", "c~b", "a~c", "b~c"),
    stringsAsFactors = F)
expect_equal(actual, expected)

# corrlation filter
# cor(b,c) = .87 > .85
actual <-
  data.frame(
    a = numcol2,
    b = numcol,
    c = numcol3) %>%
  make_regression_grid(
    max_columns = 2,
    max_correlation = .85,
    beam_width = 1000)
expected <-
  data.frame(
    formula = c(
      "c~b*a", "b~a*c", "c~b", "a~b", "c~a","b~a",
      "a~c", "b~c"),
    stringsAsFactors = F)
expect_equal(actual, expected)
markanewman/mndredge documentation built on May 9, 2019, 5:52 a.m.