tests/testthat/test_manip.R

context('Test manipulation functions')


test_that( 'manip_factor_2_numeric'
  ,{

    fac_num = factor( c(1,3,8) )
    fac_chr = factor( c('foo','bar') )
    fac_chr_ordered = factor( c('a','b','c'), ordered = T )

    expect_identical( manip_factor_2_numeric( fac_num ), c(1,3,8) )
    expect_identical( manip_factor_2_numeric( fac_chr ), c(2,1) )
    expect_identical( manip_factor_2_numeric( fac_chr_ordered ), c(1,2,3) )

})


test_that('manip_bin_numerics'
  ,{

  categoricals = c('cyl', 'vs', 'am', 'gear', 'carb')

  data = mtcars2

  data_new = manip_bin_numerics(data)

  numerics = data_new %>%
    select_if( is.numeric ) %>%
    names()

  expect_true( is_empty(numerics) )
  expect_true( ! is_empty(data_new) )
  expect_identical( names(data_new) , names(data) )
  expect_true( ! 'easyalluvialid' %in% names(data_new) )
  
  bins_from_vec = manip_bin_numerics(data$disp)
  expect_equal( levels(bins_from_vec), c("LL", "ML", "M",  "MH", "HH") )
  
  data_new_cuts = manip_bin_numerics(data, bin_labels = 'cuts')
  
  data_new_median = manip_bin_numerics(data, bin_labels = 'median')
  
  data_new_mean = manip_bin_numerics(data, bin_labels = 'mean')
  
  data_new_min_max = manip_bin_numerics(data, bin_labels = 'min_max')
  
  expect_false( identical(data_new_cuts, data_new_median) )
  
  expect_false( identical(data_new_mean, data_new_median) )
  
})


test_that('manip_bin_numerics no numerics in data'
          ,{

  data = mtcars %>%
    mutate_all( as.factor )

  data_new = manip_bin_numerics(data)

  expect_identical(data, data_new)

})

test_that('manip_bin_numerics zero variance columns'
          ,{

  data = mtcars %>%
    as_tibble() %>%
    mutate( zero_var = 1
            , zero = 0
            , near_zero_var = c( rep(1,nrow(.)-1), 0.9 ) )

  expect_warning(data_new <- manip_bin_numerics(data))

  expect_identical( select(data, zero_var, zero)
                    , select(data_new, zero_var, zero) )

  expect_true( is.factor(data_new$near_zero_var) )

})

test_that('manip_bin_numerics with vector'
          , {

  vec = manip_bin_numerics(mtcars$mpg)
  expect_true( is.factor(vec) )

  vec = manip_bin_numerics( as.factor(mtcars$cyl) )
  expect_identical( vec, as.factor(mtcars$cyl) )
  
  df = tibble( a = rnorm(50), b = rnorm(50), c = seq(1:50) ) %>%
    mutate( c = as.character(c) )
  
  df_v1 = manip_bin_numerics(df)
  
  df_v2 = df %>%
    mutate( a = manip_bin_numerics(a)
            , b = manip_bin_numerics(b)
            , c = as.character( seq(1:50) ) )
  
  expect_identical( df_v1, df_v2)
  
  df_v1 = manip_bin_numerics(df, bin_labels = 'median')
  
  df_v2 = df %>%
    mutate( a = manip_bin_numerics(a, bin_labels = 'median')
            , b = manip_bin_numerics(b, bin_labels = 'median')
            , c = as.character( seq(1:50) ) )
  
  expect_identical( df_v1, df_v2)
  
  df_v1 = manip_bin_numerics(df, bin_labels = 'min_max')
  
  df_v2 = df %>%
    mutate( a = manip_bin_numerics(a, bin_labels = 'min_max')
            , b = manip_bin_numerics(b, bin_labels = 'min_max')
            , c = as.character( seq(1:50) ) )
  
  expect_identical( df_v1, df_v2 )
  
})
 
test_that('manip_bin_numerics_NA',{
  
  v =  rnorm(10)
  x = manip_bin_numerics( c( v, NA) )
  expect_true( is.factor(x) )
  expect_true( 'NA' %in% levels(x) )
  
  test_bin_labels = function( bin_label){
    x = manip_bin_numerics( c( v, NA), bin_labels = bin_label )
    expect_true( 'NA' %in% levels(x) )
    y = manip_bin_numerics( v, bin_labels = bin_label )
    expect_true( all( levels(y) %in% levels(x) ) )
    expect_false( all( levels(x) %in% levels(y) ) )
  }
  
  test_bin_labels('median')
  test_bin_labels('mean')
  test_bin_labels('min_max')
  test_bin_labels('cuts')
  
  df = mtcars2 %>%
    bind_rows( mtcars2['cyl'][1,] ) %>%
    manip_bin_numerics() %>%
    select( - cyl ) %>%
    select_if( is.factor) %>%
    summarise_all( function(x) list(levels(x)) ) %>%
    summarise_all( function(x) 'NA' %in% unlist(x) )
  
  expect_true( all( as.matrix( df[1,] ) ) )
  
})

test_that("most_frequent_lvl", {
  
  lvls <- as.factor(LETTERS[c(7, 7, 7, 1, 4, 8, 9, 14, 20)])
  lvl <- get_most_frequent_lvl(lvls)
  expect_true(lvl == "G")
  
})

test_that("manip_bin_numerics_warning",{
  df <- data.frame(x = c(1,1,2,2,9,9,10),
                   y = c(1,2,3,4,5,6,7))
  expect_warning(manip_bin_numerics(df))
})

Try the easyalluvial package in your browser

Any scripts or data that you put into this service are public.

easyalluvial documentation built on May 29, 2024, 5:32 a.m.