## Tests for numeric utils functions
test_that("getDiscretizedBins returns sane results", {
set.seed(1)
x <- rnorm(100)
# NULL numBins
expect_equal(length(getDiscretizedBins(x, 'equalInterval')), 10)
expect_equal(length(getDiscretizedBins(x, 'quantile')), 10)
expect_equal(length(getDiscretizedBins(x, 'sd')), 6)
# with and without values
bins <- getDiscretizedBins(x, 'quantile', 10, FALSE)
expect_equal(all(is.na(unlist(lapply(bins, FUN = function(x) {x@value})))), TRUE)
bins <- getDiscretizedBins(x, 'quantile', 10, TRUE)
expect_equal(any(is.na(unlist(lapply(bins, FUN = function(x) {x@value})))), FALSE)
# non-numeric, NULL or NA for input
expect_error(getDiscretizedBins(c('a','b','c')))
expect_error(getDiscretizedBins(NA))
expect_error(getDiscretizedBins(NULL))
dates <- as.Date(c('1999-12-11','1999-06-14','1999-02-26','1999-05-24','1999-02-25','1999-09-06',
'1999-07-24','1999-05-29','1999-01-03','1999-06-28','1999-02-13','1999-03-17'))
dateBins <- getDiscretizedBins(dates)
# Test that the bin labels match the bin starts and ends, as well as that the
# dates of bins make sense
expect_equal(length(dateBins), 10)
expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][1], paste0("[",dateBins[[1]]@binStart))
expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][2], paste0(dateBins[[1]]@binEnd, "]"))
expect_equal(dateBins[[1]]@binStart, min(dates))
expect_equal(dateBins[[10]]@binEnd, max(dates))
expect_equal(length(getDiscretizedBins(dates, 'quantile')), 10)
expect_equal(length(getDiscretizedBins(dates, 'sd')), 5)
## different types of dates...
dates <- as.Date(c("1969-06-05T00:00:00", "1969-06-06T00:00:00", "1969-06-07T00:00:00","1969-06-08T00:00:00", "1969-06-09T00:00:00", "1969-06-10T00:00:00",
"1969-06-11T00:00:00", "1969-06-12T00:00:00", "1969-06-13T00:00:00",
"1969-06-14T00:00:00", "1969-06-15T00:00:00", "1969-06-16T00:00:00",
"1969-06-17T00:00:00", "1969-06-18T00:00:00", "1969-06-19T00:00:00",
"1969-06-20T00:00:00", "1969-06-21T00:00:00", "1969-06-22T00:00:00",
"1969-06-23T00:00:00", "1969-06-24T00:00:00"))
dateBins <- getDiscretizedBins(dates)
# Test that the bin labels match the bin starts and ends, as well as that the
# dates of bins make sense
expect_equal(length(dateBins), 10)
expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][1], paste0("[",dateBins[[1]]@binStart))
expect_equal(strsplit(dateBins[[1]]@binLabel, ', ')[[1]][2], paste0(dateBins[[1]]@binEnd, "]"))
expect_equal(dateBins[[1]]@binStart, min(dates))
expect_equal(dateBins[[10]]@binEnd, max(dates))
expect_equal(length(getDiscretizedBins(dates, 'quantile')), 10)
expect_equal(length(getDiscretizedBins(dates, 'sd')), 4)
## return as many bins as possible for these cases
# almost no data
expect_equal(length(getDiscretizedBins(1)), 1)
expect_equal(length(getDiscretizedBins(1, 'quantile')), 1)
expect_equal(length(getDiscretizedBins(1, 'sd')), 1)
x <- rnorm(2)
expect_equal(length(getDiscretizedBins(x)), 10)
expect_equal(sum(unlist(lapply(getDiscretizedBins(x), FUN = function(x){0 != x@value}))), 2)
expect_equal(length(getDiscretizedBins(x, 'quantile')), 10)
expect_equal(sum(unlist(lapply(getDiscretizedBins(x, 'quantile'), FUN = function(x){0 != x@value}))), 2)
expect_equal(length(getDiscretizedBins(x, 'sd')), 2)
# skewed data
x <- rnbinom(100, 10, 0.5)
expect_equal(length(getDiscretizedBins(x)), 10)
expect_equal(length(getDiscretizedBins(x, 'equalInterval', 50)), 50)
expect_equal(length(getDiscretizedBins(x, 'quantile')), 10)
expect_equal(length(getDiscretizedBins(x, 'quantile', 50)) <= 50, TRUE)
expect_equal(length(getDiscretizedBins(x, 'sd')) <= 6, TRUE)
})
test_that("nonZeroRound only returns 0 if it receives one", {
expect_equal(nonZeroRound(0),0)
expect_equal(nonZeroRound(123456789.987654321, 4) == 0, FALSE)
expect_equal(nonZeroRound(0.987654321, 4) == 0, FALSE)
expect_equal(nonZeroRound(0.00000019, 4) == 0, FALSE)
expect_equal(nonZeroRound(0.00000019, 4), 0.0000002)
})
test_that("setNaToZero replaces intended NAs", {
df <- iris
# Add NAs to all columns
nMissing <- 10
df <- as.data.frame(lapply(df, function(x) {x[sample(1:length(x), size=nMissing)] <- NA; return(x)}))
# With specified columns
setNaToZero(df, cols=c("Sepal.Length", "Sepal.Width"))
expect_equal(class(df), 'data.frame')
expect_equal(sum(is.na(df)), 3*nMissing)
expect_equal(sum(df[, c('Sepal.Length', 'Sepal.Width')] == 0), 2*nMissing)
# Err if given a non-numeric column
expect_error(naToZero(df, cols = c("Sepal.Length", "Sepal.Width", "Species")))
# With defualt cols and data.table. Should change only numeric columns
dt <- data.table::as.data.table(df)
setNaToZero(dt)
expect_equal(class(dt), c('data.table','data.frame'))
expect_equal(sum(is.na(dt)), nMissing)
expect_equal(sum(dt[, c('Petal.Length', 'Petal.Width')] == 0), 2*nMissing)
})
test_that("naToZero replaces intended NAs", {
df <- iris
# Add NAs to all columns
nMissing <- 10
df <- as.data.frame(lapply(df, function(x) {x[sample(1:length(x), size=nMissing)] <- NA; return(x)}))
# With specified columns
df <- naToZero(df, cols=c("Sepal.Length", "Sepal.Width"))
expect_equal(class(df), 'data.frame')
expect_equal(sum(is.na(df)), 3*nMissing)
expect_equal(sum(df[, c('Sepal.Length', 'Sepal.Width')] == 0), 2*nMissing)
# With defualt cols and data.table input. Should change only numeric columns
dt <- data.table::as.data.table(df)
dt <- naToZero(dt)
expect_equal(class(dt), c('data.table','data.frame'))
expect_equal(sum(is.na(dt)), nMissing)
expect_equal(sum(dt[, c('Petal.Length', 'Petal.Width')] == 0), 2*nMissing)
# Testing additional object types
# Lists
lst <- lapply(iris, function(x) {x[sample(1:length(x), size=nMissing)] <- NA; return(x)})
lst <- naToZero(lst)
expect_equal(class(lst), 'list')
expect_equal(sum(unlist(lapply(lst, function(x) {sum(is.na(x))}))), nMissing)
expect_equal(sum(unlist(lapply(lst[c('Petal.Length', 'Petal.Width')], function(x) {sum(x==0)}))), 2*nMissing)
# Err if given a non-numeric column
expect_error(naToZero(lst, cols = c("Sepal.Length", "Sepal.Width", "Species")))
# Vector
vec <- c(1,2,3,NA)
vec <- naToZero(vec)
expect_equal(class(vec), 'numeric')
expect_true(all(!is.na(vec)))
expect_equal(vec, c(1,2,3,0))
# Matrix
mat <- matrix(rnorm(36), nrow=6)
mat[sample(1:36, size=nMissing, replace=F)] <- NA
mat <- naToZero(mat)
expect_equal(class(mat), c('matrix', 'array'))
expect_true(any(!is.na(mat)))
expect_equal(sum(mat == 0), nMissing)
# Do nothing to strings
vec <- c('1','2','3',NA)
vec <- naToZero(vec)
expect_equal(class(vec), 'character')
expect_equal(vec, c('1','2','3',NA))
})
test_that("finding and validating numeric columns works", {
df <- iris
# With specified columns
numericCols <- findNumericCols(df)
expect_equal(numericCols, c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'))
# In a data.table
dt <- data.table::as.data.table(df)
numericCols <- findNumericCols(dt)
expect_equal(numericCols, c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'))
validatedCols <- validateNumericCols(dt, c('Sepal.Length', 'Sepal.Width'))
expect_equal(validatedCols, c('Sepal.Length', 'Sepal.Width'))
validatedCols <- validateNumericCols(dt, c(1, 2, 3))
expect_equal(validatedCols, c(1, 2, 3))
# In a list
lst <- as.list(iris)
numericCols <- findNumericCols(lst)
expect_equal(numericCols, c('Sepal.Length', 'Sepal.Width', 'Petal.Length', 'Petal.Width'))
validatedCols <- validateNumericCols(lst, c('Sepal.Length', 'Sepal.Width'))
expect_equal(validatedCols, c('Sepal.Length', 'Sepal.Width'))
validatedCols <- validateNumericCols(lst, c(2, 3, 4))
expect_equal(validatedCols, c(2, 3, 4))
# If no numeric cols
dt_string <- dt[, lapply(.SD, as.character)]
numericCols <- findNumericCols(dt_string)
expect_equal(numericCols, NULL)
lst_string <- lapply(iris, function(x) {x <- as.character(x); return(x)})
numericCols <- findNumericCols(lst_string)
expect_equal(numericCols, NULL)
# validateNumericCols should err if given non-numeric column names
expect_error(validateNumericCols(dt, c('Sepal.Length', 'Species')))
expect_error(validateNumericCols(lst, c('Sepal.Length', 'Species')))
# err if column names do not exist
expect_error(validateNumericCols(dt, c('a', 'Species')))
expect_error(validateNumericCols(lst, c('a', 'Species')))
# err if indices too large
expect_error(validateNumericCols(dt, c(1, 2, 100)))
expect_error(validateNumericCols(lst, c(1, 2, 100)))
# err if indices too small
expect_error(validateNumericCols(dt, c(-1, 2, 100)))
expect_error(validateNumericCols(lst, c(-1, 2, 100)))
# return NULL for NULL input
validatedCols <- validateNumericCols(dt, NULL)
expect_equal(validatedCols, NULL)
validatedCols <- validateNumericCols(lst, NULL)
expect_equal(validatedCols, NULL)
# remove NAs in cols arg
validatedCols <- validateNumericCols(dt, c(1, NA, 4))
expect_equal(validatedCols, c(1, 4))
validatedCols <- validateNumericCols(lst, c(1, NA, 4))
expect_equal(validatedCols, c(1, 4))
})
test_that("signifDigitEpsilon returns appropriate results", {
expect_equal(signifDigitEpsilon(1.23, 3), 0.01)
expect_equal(signifDigitEpsilon(11.0, 3), 0.1)
expect_equal(signifDigitEpsilon(12.3, 3), 0.1)
expect_equal(signifDigitEpsilon(101000, 3), 1000)
expect_equal(signifDigitEpsilon(1.20e-05, 3), 1.0e-07)
expect_equal(signifDigitEpsilon(0.0123e-05, 3), 1.0e-09)
expect_equal(signifDigitEpsilon(-2.34e-02, 3), 1.0e-04)
expect_equal(signifDigitEpsilon(1234567, 7), 1)
expect_equal(signifDigitEpsilon(-1234567, 7), 1)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.