library(delutils)
context("misc_utils")
create_example_dataframe <- function(){
set.seed(42)
mat <- data.frame(group=c('a', 'a', 'a', 'b', 'c', 'c'), vals=runif(6) * 100)
mat$vals[6] <- -400
mat$vals <- round(mat$vals)
row.names(mat) <- paste0('r_', seq(1,6))
return(mat)
}
#### clean_colnames ####
test_that("clean_colnames properly cleans column names", {
# Create a test data frame with different types of whitespace in column names
test_df <- data.frame(" First Name " = c("John", "Jane", "Alice"),
"Last\tName " = c("Doe", "Smith", "Johnson"),
" Age " = c(25, 30, 35),
check.names = FALSE)
# Clean column names using the clean_colnames function
cleaned_df <- clean_colnames(test_df)
# Expected column names after cleaning
expected_colnames <- c("first_name", "last_name", "age")
# Compare the cleaned column names with the expected column names
expect_equal(colnames(cleaned_df), expected_colnames)
})
#### clean_by_levels ####
test_that('clean_by_levels correctly checks input for correct type', {
expect_error(clean_by_levels(dat=matrix()), 'dat needs to be a data.frame not a matrix')
expect_error(clean_by_levels(dat=list()), 'dat needs to be a data.frame not a list')
expect_error(clean_by_levels(dat=data.frame(), n_levels='foo'), 'n_levels needs to be an integer >= 1')
expect_error(clean_by_levels(dat=data.frame(), n_levels=-1), 'n_levels needs to be an integer >= 1')
})
test_that('clean_by_levels cleans a data.frame', {
in_dat <- data.frame(A=rep(1,4), B=seq(1,4), C=c('f1', 'f2', 'f1', 'f1'))
ex_dat <- data.frame(B=seq(1,4), C=c('f1', 'f2', 'f1', 'f1'))
expect_equal(clean_by_levels(dat=in_dat), ex_dat)
})
#### rbind_named_df_list ####
test_that('rbind_named_df_list returns a bound data.frame',{
el <- data.frame(
col_from_list=paste0('Cond',
c(rep(2, 5), rep(3,2))),A=seq(1,7))
l <- list(data.frame(A=seq(1,5)),
data.frame(A=seq(6, 7))) %>%
setNames(., c('Cond2', 'Cond3'))
expect_equal(rbind_named_df_list(l), el)
# Making sure it works with tibbles
l2 <- lapply(l, tibble::as_tibble)
expect_equal(rbind_named_df_list(l2), el)
})
test_that('rbind_named_df_list checks for non data.frames', {
l <- list(matrix(data=NA, ncol=1, nrow=5),
data.frame(A=seq(1,5)),
data.frame(A=seq(6, 7))) %>%
setNames(., c('Cond1', 'Cond2', 'Cond3'))
expect_error(rbind_named_df_list(l),"df_list contains items which are not data.frame or tibble" )
})
test_that('rbind_named_df_list can (gracefully) handle empty data.frames or NULL objects',{
el <- data.frame(col_from_list=paste0('Cond', c(rep(2, 5), rep(3,2))),A=seq(1,7))
l_empty <- list(data.frame(),
data.frame(A=seq(1,5)),
data.frame(A=seq(6, 7))) %>%
setNames(., c('Cond1', 'Cond2', 'Cond3'))
l_null <- list(NULL,
data.frame(A=seq(1,5)),
data.frame(A=seq(6, 7))) %>%
setNames(., c('Cond1', 'Cond2', 'Cond3'))
expect_equal(expect_warning(rbind_named_df_list(l_empty), 'Cond1'), el)
expect_equal(expect_warning(rbind_named_df_list(l_null), 'Cond1'), el)
})
test_that('rbind_named_df_list complains loudly about data.frames with mismatched columns',{
l <- list(data.frame(A=seq(1,5), B=seq(8,12)),
data.frame(A=seq(6, 7))) %>%
setNames(., c('Cond2', 'Cond3'))
expect_error(rbind_named_df_list(l))
})
#### uniquefy ####
test_that('uniquefy (base) function returns a logical vector of uniquefied rows by largest value data.frame',{
mat <- create_example_dataframe()
e_vec <- c(FALSE,TRUE,FALSE,TRUE,TRUE,FALSE)
out_vec <- uniquefy_base(mat, 'group', 'vals')
expect_equal(out_vec, e_vec)
})
test_that('uniquefy by abs max returns a uniquefied (by abs(max)) data.frame',{
mat <- create_example_dataframe()
e_mat <- data.frame(group=c('a', 'b', 'c'),
vals=c(94,83,-400),
row.names = c('r_2','r_4','r_6'))
out_mat <- uniquefy_by_abs_max(mat)
expect_equal(row.names(out_mat), row.names(e_mat))
expect_equal(out_mat$group, e_mat$group)
expect_equal(out_mat$vals, e_mat$vals)
})
test_that('uniquefy by variance returns a uniquefied (by variance) data.frame of expression data',{
map <- data.frame(row.names=paste0('gene', seq(1,6)),
duplicated_ids=c('a','a','b','b','b','c'))
gene_vars <- c(2, 1, .5, 1, 4, 1)
set.seed(42)
mat <- lapply(gene_vars, function(x){
rnorm(10, 0, x)
}) %>% do.call(rbind, .)
row.names(mat) <- row.names(map)
e_vec <- c(TRUE,FALSE,FALSE,FALSE,TRUE,TRUE)
e_mat <- mat[e_vec,]
out_mat <- uniquefy_by_variance(mat, map, 'duplicated_ids')
expect_equal(out_mat, e_mat)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.