context('Exploratory Analysis')
library(testthat)
library(ggplot2)
library(dplyr)
options(dplyr.summarise.inform=F)
library(lubridate)
library(gapminder)
library(nycflights13)
library(forcats)
# library(scales)
source('test_helpers.R')
test_that("rt_explore_categoric_summary_NAs", {
temp_iris <- iris
# make sure it works with NA
temp_iris$Petal.Width <- rep(NA, nrow(temp_iris))
results <- rt_explore_categoric_summary(dataset=temp_iris)
expect_true(rt_are_dataframes_equal_from_file(dataframe1=results,
rds_file='data/rt_explore_numeric_summary_iris_missing.RDS'))
})
test_that("rt_explore_numeric_summary", {
temp_iris <- iris
temp_iris[1, 'Sepal.Width'] <- NA
temp_iris[2, 'Sepal.Width'] <- NA
temp_iris[3, 'Sepal.Width'] <- NA
temp_iris[1, 'Petal.Length'] <- NA
temp_iris[1, 'Sepal.Length'] <- 0
temp_iris[2, 'Sepal.Length'] <- 0
temp_iris[3, 'Sepal.Length'] <- 0
temp_iris[1, 'Petal.Width'] <- 0
result <- rt_explore_numeric_summary(dataset=temp_iris)
rds_file <- 'data/rt_explore_numeric_summary_iris.RDS'
expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))
result <- rt_explore_numeric_summary(dataset=flights)
rds_file <- 'data/rt_explore_numeric_summary_flights.RDS'
expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))
})
test_that("rt_explore_categoric_summary", {
temp_iris <- iris
temp_iris[1, 'Species'] <- NA
temp_iris[2, 'Species'] <- NA
temp_iris[3, 'Species'] <- NA
result <- rt_explore_categoric_summary(dataset=temp_iris)
rds_file <- 'data/rt_explore_categoric_summary_iris.RDS'
expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))
result <- rt_explore_categoric_summary(dataset=flights)
rds_file <- 'data/rt_explore_categoric_summary_flights.RDS'
expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))
})
test_that("rt_explore_correlations_credit", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# make sure it handles NAs
credit_data[1, 'months_loan_duration'] <- NA
# default parameters
correlations <- rt_explore_correlations(dataset=credit_data)$correlations
rds_file <- 'data/rt_correlations_credit.RDS'
expect_true(rt_are_dataframes_equal_from_file(dataframe1=data.frame(correlations), rds_file=rds_file))
# use correlation parameters from above
test_save_plot(file_name='data/rt_explore_plot_correlations_credit.png',
plot=rt_explore_plot_correlations(dataset=credit_data))
# pretty
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_pretty.png',
plot=rt_explore_plot_correlations(dataset=rt_pretty_dataset(credit_data)))
# change base_size
test_save_plot(file_name='data/rt_explore_plot_correlations_base_size.png',
plot=rt_explore_plot_correlations(dataset=credit_data, base_size=16))
# lower p_value_threshold
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_pvalue.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
p_value_threshold=0.3))
# lower p_value_threshold
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_corr_treshold.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
corr_threshold=0.115))
# lower p_value_threshold
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_both_parameters.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
corr_threshold=0.115,
p_value_threshold=0.3))
})
test_that("rt_explore_correlations_credit_min_missing_nas_in_column", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# only include cols that have <= x% missing values
max_missing_perc <- 0.05
set.seed(42)
rows_to_make_na_valid <- sample(nrow(credit_data), nrow(credit_data) * (max_missing_perc - 0.01))
set.seed(43)
rows_to_make_na_invalid <- sample(nrow(credit_data), nrow(credit_data) * (max_missing_perc + 0.01))
# plots should include months_loan_duration and exclude age
credit_data[rows_to_make_na_valid, 'months_loan_duration'] <- NA
credit_data[rows_to_make_na_invalid, 'age'] <- NA
#correlations <- rt_explore_correlations(dataset=credit_data, max_missing_column_perc=max_missing_perc)
# use correlation parameters from above
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_2.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
max_missing_column_perc=max_missing_perc))
# lower p_value_threshold
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_pvalue_2.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
max_missing_column_perc=max_missing_perc,
p_value_threshold=0.3))
# lower p_value_threshold
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_corr_treshold_2.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
max_missing_column_perc=max_missing_perc,
corr_threshold=0.115))
# lower p_value_threshold
test_save_plot(file_name='data/rt_explore_plot_correlations_credit_both_parameters_2.png',
plot=rt_explore_plot_correlations(dataset=credit_data,
max_missing_column_perc=max_missing_perc,
corr_threshold=0.115,
p_value_threshold=0.3))
})
test_that("rt_explore_value_totals", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# make sure all col names have spaces
colnames(credit_data) <- test_helper__column_names(credit_data)
##########################################################################################################
# test with factor
# change the levels to verify that the original levels are retained if order_by_count==FALSE
##########################################################################################################
custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
credit_data$`Id Col` <- 1:nrow(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
credit_data[2, 'Default Col'] <- NA
credit_data[3, 'Id Col'] <- NA
credit_data[4, 'Amount Col'] <- NA
variable <- 'Checking Balance Col'
second_variable <- 'Default Col'
sum_by_variable <- 'Amount Col'
count_distinct <- 'Id Col'
# cannot use sum_by_variable and count_distict at the same time
expect_error(rt_explore_value_totals(dataset=credit_data,
variable=variable,
sum_by_variable=sum_by_variable,
count_distinct=count_distinct))
####
# single var
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`) %>%
rename(count = n)) %>% as.data.frame()
expected_df$percent <- expected_df$count / sum(expected_df$count)
actual_df <- rt_explore_value_totals(dataset=credit_data, variable=variable)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), 1000)
expect_equal(sum(actual_df$percent), 1)
####
# single var - non factor
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`) %>%
rename(count = n)) %>% as.data.frame() %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
arrange(`Checking Balance Col`)
expected_df$percent <- expected_df$count / sum(expected_df$count)
actual_df <- rt_explore_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable)
expect_false(is.factor(actual_df$`Checking Balance Col`))
expect_identical(c(sort(actual_df$`Checking Balance Col`), NA), actual_df$`Checking Balance Col`)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), 1000)
expect_equal(sum(actual_df$percent), 1)
####
# single var - count-distinct
# ALL NAs - Should be counted as 1 for each group
####
expected_df <- suppressWarnings(credit_data %>%
mutate(`Id Col` = NA) %>%
group_by(`Checking Balance Col`) %>%
summarise(count = n_distinct(`Id Col`)) %>%
as.data.frame())
expected_df$percent <- expected_df$count / sum(expected_df$count)
actual_df <- rt_explore_value_totals(dataset=credit_data %>%
mutate(`Id Col` = NA),
variable=variable,
count_distinct=count_distinct)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), 5)
expect_equal(sum(actual_df$percent), 1)
####
# single var - count-distinct
#
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`) %>%
rename(count = n)) %>% as.data.frame()
expected_df$percent <- expected_df$count / sum(expected_df$count)
actual_df <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
count_distinct=count_distinct)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), 1000)
expect_equal(sum(actual_df$percent), 1)
####
# single var - count-distinct
# set all `unknown`s to NA, so there should only be 1 distinct value for `unknown`, but the rest of the
# dataset should be the same as above
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`) %>%
rename(count = n)) %>% as.data.frame()
num_unknowns <- expected_df[4, 'count']
expected_df[4, 'count'] <- 1
expected_df$percent <- expected_df$count / sum(expected_df$count)
temp <- credit_data %>% mutate(`Id Col` = ifelse(`Checking Balance Col` == 'unknown', NA, `Id Col`))
actual_df <- rt_explore_value_totals(dataset=temp, variable=variable, count_distinct = count_distinct)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
# there is only 1 distinct unknown, so subtract all unknowns and add back in 1 for the 1 distinct
expect_equal(sum(actual_df$count), 1000 - num_unknowns + 1)
expect_equal(sum(actual_df$percent), 1)
####
# single var - sum by var
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, wt=`Amount Col`) %>%
rename(sum = n)) %>% as.data.frame()
expected_df$percent <- expected_df$sum / sum(expected_df$sum)
actual_df <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
sum_by_variable=sum_by_variable)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE))
expect_equal(sum(actual_df$percent), 1)
####
# single var - multi-var
# transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
# while all other variable counts remain the same
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`) %>%
rename(count = n)) %>% as.data.frame()
num_less_zero <- expected_df[1, 'count']
expected_df[1, 'count'] <-num_less_zero * 2
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- expected_df %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
arrange(`Checking Balance Col`)
# need to convert to a character, otherwise ifelse will convert to numeric factor value
# then convert back to factor
temp <- credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
paste(`Checking Balance Col`, '|', `Checking Balance Col`),
`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
multi_value_delimiter=' \\| ')
expect_true(is.factor(actual_df$`Checking Balance Col`))
# same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
# take the unique values and sort them for the factor levels)
expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
# we essentially duplicated the count for `< 0 DM`
expect_equal(sum(actual_df$count), nrow(credit_data) + num_less_zero)
expect_equal(sum(actual_df$percent), 1)
####
# single var - multi-var - sum by
# transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
# while all other variable counts remain the same
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, wt=`Amount Col`) %>%
rename(sum = n)) %>% as.data.frame()
sum_amount_less_zero <- expected_df[1, 'sum']
expected_df[1, 'sum'] <- sum_amount_less_zero * 2
expected_df$percent <- expected_df$sum / sum(expected_df$sum)
expected_df <- expected_df %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
arrange(`Checking Balance Col`)
# need to convert to a character, otherwise ifelse will convert to numeric factor value
# then convert back to factor
temp <- credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
paste(`Checking Balance Col`, '|', `Checking Balance Col`),
`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
sum_by_variable=sum_by_variable,
multi_value_delimiter=' \\| ')
expect_true(is.factor(actual_df$`Checking Balance Col`))
# same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
# take the unique values and sort them for the factor levels)
expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
# we essentially duplicated the sum for `< 0 DM`
expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE) + sum_amount_less_zero)
expect_equal(sum(actual_df$percent), 1)
####
# single var - multi-var - count distinct
# transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
# while all other variable counts remain the same
# since we are duplicated "< 0 DM", but counting distinct Id, the counts should be the same as if we
# did not duplicate
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`) %>%
rename(count = n)) %>% as.data.frame()
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- expected_df %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
arrange(`Checking Balance Col`)
# need to convert to a character, otherwise ifelse will convert to numeric factor value
# then convert back to factor
temp <- credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
paste(`Checking Balance Col`, '|', `Checking Balance Col`),
`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
count_distinct=count_distinct,
multi_value_delimiter=' \\| ')
expect_true(is.factor(actual_df$`Checking Balance Col`))
# same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
# take the unique values and sort them for the factor levels)
expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
# we are counting distinct ids so we should have the same counts as original
expect_equal(sum(actual_df$count), nrow(credit_data))
expect_equal(sum(actual_df$percent), 1)
get_group_percent_totals <- function(x) {
suppressWarnings(x %>%
group_by(`Checking Balance Col`) %>%
summarise(group_percent_check = sum(group_percent))) %>%
rt_get_vector('group_percent_check')
}
####
# double var
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`) %>%
rename(count = n))
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = count / sum(count)) %>%
ungroup()) %>% as.data.frame()
actual_df <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
second_variable=second_variable)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), nrow(credit_data))
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - non factor
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`) %>%
rename(count = n))
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = count / sum(count)) %>%
ungroup()) %>% as.data.frame() %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
arrange(`Checking Balance Col`)
actual_df <- rt_explore_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable,
second_variable=second_variable)
expect_false(is.factor(actual_df$`Checking Balance Col`))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), nrow(credit_data))
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - count-distinct
# this should actually be the same things as not counting distinct, because NAs get lumped into 1
# and there is only 1 NA for the id field, so it still gets counted as a single distinct
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`) %>%
rename(count = n)) %>% as.data.frame()
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = count / sum(count)) %>%
ungroup()) %>% as.data.frame()
actual_df <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
second_variable=second_variable,
count_distinct=count_distinct)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), nrow(credit_data))
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - count-distinct
# set all `unknown`s to NA, so there should only be 1 distinct value for `unknown`, but the rest of the
# dataset should be the same as above
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`) %>%
rename(count = n)) %>% as.data.frame()
num_unknowns <- expected_df[8, 'count'] + expected_df[9, 'count']
expected_df[8, 'count'] <- 1
expected_df[9, 'count'] <- 1
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = count / sum(count)) %>%
ungroup()) %>% as.data.frame()
temp <- credit_data %>% mutate(`Id Col` = ifelse(`Checking Balance Col` == 'unknown', NA, `Id Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
second_variable=second_variable,
count_distinct=count_distinct)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
# we changed all of the ids corresponding to checking_balacne=unknown to NA, so count distinct
# will count 1 in each group, so subtract number of unknowns and add back in the 2 distinct
expect_equal(sum(actual_df$count), nrow(credit_data) - num_unknowns + 2)
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - sum by var
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`, wt=`Amount Col`) %>%
rename(sum = n)) %>% as.data.frame()
expected_df$percent <- expected_df$sum / sum(expected_df$sum)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = sum / sum(sum)) %>%
ungroup()) %>% as.data.frame()
actual_df <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
second_variable=second_variable,
sum_by_variable=sum_by_variable)
expect_true(is.factor(actual_df$`Checking Balance Col`))
expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE))
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - multi-var
# transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
# while all other variable counts remain the same
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`) %>%
rename(count = n)) %>% as.data.frame()
num_less_zero <- expected_df[1, 'count'] + expected_df[2, 'count']
expected_df[1, 'count'] <- expected_df[1, 'count'] * 2
expected_df[2, 'count'] <- expected_df[2, 'count'] * 2
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- expected_df %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
arrange(`Checking Balance Col`)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = count / sum(count)) %>%
ungroup()) %>% as.data.frame()
# need to convert to a character, otherwise ifelse will convert to numeric factor value
# then convert back to factor
temp <- credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
paste(`Checking Balance Col`, '|', `Checking Balance Col`),
`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
second_variable=second_variable,
multi_value_delimiter=' \\| ')
expect_true(is.factor(actual_df$`Checking Balance Col`))
# same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
# take the unique values and sort them for the factor levels)
expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), nrow(credit_data) + num_less_zero)
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - multi-var - sum by
# transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
# while all other variable counts remain the same
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`, wt=`Amount Col`) %>%
rename(sum = n)) %>% as.data.frame()
sum_less_zero <- expected_df[1, 'sum'] + expected_df[2, 'sum']
expected_df[1, 'sum'] <- expected_df[1, 'sum'] * 2
expected_df[2, 'sum'] <- expected_df[2, 'sum'] * 2
expected_df$percent <- expected_df$sum / sum(expected_df$sum)
expected_df <- expected_df %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
arrange(`Checking Balance Col`)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = sum / sum(sum)) %>%
ungroup()) %>% as.data.frame()
# need to convert to a character, otherwise ifelse will convert to numeric factor value
# then convert back to factor
temp <- credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
paste(`Checking Balance Col`, '|', `Checking Balance Col`),
`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
second_variable=second_variable,
sum_by_variable=sum_by_variable,
multi_value_delimiter=' \\| ')
expect_true(is.factor(actual_df$`Checking Balance Col`))
# same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
# take the unique values and sort them for the factor levels)
expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE) + sum_less_zero)
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
####
# double var - multi-var - count distinct
# transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
# while all other variable counts remain the same
# since we are duplicated "< 0 DM", but counting distinct Id, the counts should be the same as if we
# did not duplicate
####
expected_df <- suppressWarnings(credit_data %>%
count(`Checking Balance Col`, `Default Col`) %>%
rename(count = n)) %>% as.data.frame()
expected_df$percent <- expected_df$count / sum(expected_df$count)
expected_df <- expected_df %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
arrange(`Checking Balance Col`)
expected_df <- suppressWarnings(expected_df %>%
group_by(`Checking Balance Col`) %>%
mutate(group_percent = count / sum(count)) %>%
ungroup()) %>% as.data.frame()
# need to convert to a character, otherwise ifelse will convert to numeric factor value
# then convert back to factor
temp <- credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
paste(`Checking Balance Col`, '|', `Checking Balance Col`),
`Checking Balance Col`)) %>%
mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
actual_df <- rt_explore_value_totals(dataset=temp,
variable=variable,
second_variable=second_variable,
count_distinct=count_distinct,
multi_value_delimiter=' \\| ')
expect_true(is.factor(actual_df$`Checking Balance Col`))
# same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
# take the unique values and sort them for the factor levels)
expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
expect_true(rt_are_dataframes_equal(expected_df, actual_df))
expect_equal(sum(actual_df$count), nrow(credit_data))
expect_equal(sum(actual_df$percent), 1)
expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
})
test_that("rt_explore_value_totals__facet_strings", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=FALSE)
# make sure all col names have spaces
colnames(credit_data) <- test_helper__column_names(credit_data)
##########################################################################################################
# test with factor
# change the levels to verify that the original levels are retained if order_by_count==FALSE
##########################################################################################################
custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
credit_data$`Id Col` <- 1:nrow(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
credit_data[2, 'Default Col'] <- NA
credit_data[3, 'Id Col'] <- NA
credit_data[4, 'Amount Col'] <- NA
variable <- 'Checking Balance Col'
second_variable <- 'Default Col'
sum_by_variable <- 'Amount Col'
count_distinct <- 'Id Col'
# already have unit tests to check the non-facet numbers, so we only have to verify that
# if we filter by facet variables, we should get the same values
##########################################################################################################
# variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$count, 1)
expect_equal(default_na$percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
##########################################################################################################
# comparison variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$count, 1)
expect_equal(default_na$percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
##########################################################################################################
# sum_by variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
sum_by_variable = 'Amount Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$sum, 5951)
expect_equal(default_na$percent, 1)
expect_equal(default_na$group_percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
sum_by_variable = 'Amount Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
sum_by_variable = 'Amount Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
##########################################################################################################
# count_distinct variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
count_distinct = 'Id Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$count, 1)
expect_equal(default_na$percent, 1)
expect_equal(default_na$group_percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
count_distinct = 'Id Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
count_distinct = 'Id Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
})
test_that("rt_explore_value_totals__facet_factors", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# make sure all col names have spaces
colnames(credit_data) <- test_helper__column_names(credit_data)
##########################################################################################################
# test with factor
# change the levels to verify that the original levels are retained if order_by_count==FALSE
##########################################################################################################
custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
credit_data$`Id Col` <- 1:nrow(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
credit_data[2, 'Default Col'] <- NA
credit_data[3, 'Id Col'] <- NA
credit_data[4, 'Amount Col'] <- NA
variable <- 'Checking Balance Col'
second_variable <- 'Default Col'
sum_by_variable <- 'Amount Col'
count_distinct <- 'Id Col'
# already have unit tests to check the non-facet numbers, so we only have to verify that
# if we filter by facet variables, we should get the same values
##########################################################################################################
# variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$count, 1)
expect_equal(default_na$percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
##########################################################################################################
# comparison variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$count, 1)
expect_equal(default_na$percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
##########################################################################################################
# sum_by variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
sum_by_variable = 'Amount Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$sum, 5951)
expect_equal(default_na$percent, 1)
expect_equal(default_na$group_percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
sum_by_variable = 'Amount Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
sum_by_variable = 'Amount Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
##########################################################################################################
# count_distinct variable
##########################################################################################################
value_counts <- credit_data %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
count_distinct = 'Id Col',
facet_variable = 'Default Col')
default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
expect_equal(nrow(default_na), 1)
expect_equal(default_na$count, 1)
expect_equal(default_na$percent, 1)
expect_equal(default_na$group_percent, 1)
expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")
expected <- credit_data %>%
filter(`Default Col` == 'yes') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
count_distinct = 'Id Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - yes') %>%
select(-`Default Col`)))
expected <- credit_data %>%
filter(`Default Col` == 'no') %>%
rt_explore_value_totals(variable = 'Checking Balance Col',
second_variable = 'Purpose Col',
count_distinct = 'Id Col')
expect_true(rt_are_dataframes_equal(expected,
value_counts %>%
filter(`Default Col` == 'Default Col - no') %>%
select(-`Default Col`)))
})
test_that("rt_explore_value_totals - bug: sum_by_all_zeros", {
# when using a second categoric variable and sum_by, and all the second categorical has a value of zero
# for all sum-by values ina particular primary categorical value, then we try to divide by zero and get
# NAN
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# make sure all col names have spaces
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data <- credit_data %>% mutate(`Amount Col` = ifelse(`Checking Balance Col` == '< 0 DM', 0, `Amount Col`))
actual_counts <- rt_explore_value_totals(dataset=credit_data,
variable='Checking Balance Col',
second_variable=NULL,
count_distinct=NULL,
sum_by_variable='Amount Col',
multi_value_delimiter=NULL)
expect_true(rt_are_dataframes_equal(actual_counts,
credit_data %>%
group_by(`Checking Balance Col`) %>%
summarise(sum = sum(`Amount Col`),
percent = sum(`Amount Col`) / sum(credit_data$`Amount Col`))))
actual_counts <- rt_explore_value_totals(dataset=credit_data,
variable='Checking Balance Col',
second_variable='Default Col',
count_distinct=NULL,
sum_by_variable='Amount Col',
multi_value_delimiter=NULL)
expect_true(rt_are_dataframes_equal(actual_counts,
credit_data %>%
group_by(`Checking Balance Col`, `Default Col`) %>%
summarise(sum = sum(`Amount Col`),
percent = sum(`Amount Col`) / sum(credit_data$`Amount Col`)) %>%
mutate(group_percent = sum / sum(sum)) %>%
ungroup()))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Checking Balance Col',
comparison_variable='Default Col',
sum_by_variable='Amount Col'))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by_stack.png',
plot=suppressWarnings(rt_explore_plot_value_totals(dataset=credit_data,
variable='Checking Balance Col',
comparison_variable='Default Col',
sum_by_variable='Amount Col',
view_type = 'Stack Percent')))
actual_counts <- rt_explore_value_totals(dataset=credit_data,
variable='Checking Balance Col',
second_variable='Default Col',
count_distinct='Amount Col',
#sum_by_variable='amount',
multi_value_delimiter=NULL)
expect_true(rt_are_dataframes_equal(actual_counts %>% select(`Checking Balance Col`, `Default Col`, count),
credit_data %>%
group_by(`Checking Balance Col`, `Default Col`) %>%
summarise(count = n_distinct(`Amount Col`))))
##########################################################################################################
# Try the same thing but if values in category have NA rather than 0
##########################################################################################################
credit_data <- credit_data %>% mutate(`Amount Col` = ifelse(`Checking Balance Col` == '< 0 DM', NA, `Amount Col`))
actual_counts <- rt_explore_value_totals(dataset=credit_data,
variable='Checking Balance Col',
second_variable=NULL,
count_distinct=NULL,
sum_by_variable='Amount Col',
multi_value_delimiter=NULL)
expect_true(rt_are_dataframes_equal(actual_counts,
credit_data %>%
group_by(`Checking Balance Col`) %>%
summarise(sum = sum(`Amount Col`, na.rm = TRUE),
percent = sum(`Amount Col`, na.rm = TRUE) / sum(credit_data$`Amount Col`, na.rm = TRUE))))
actual_counts <- rt_explore_value_totals(dataset=credit_data,
variable='Checking Balance Col',
second_variable='Default Col',
count_distinct=NULL,
sum_by_variable='Amount Col',
multi_value_delimiter=NULL)
expect_true(rt_are_dataframes_equal(actual_counts,
credit_data %>%
group_by(`Checking Balance Col`, `Default Col`) %>%
summarise(sum = sum(`Amount Col`, na.rm = TRUE),
percent = sum(`Amount Col`, na.rm = TRUE) / sum(credit_data$`Amount Col`, na.rm = TRUE)) %>%
mutate(group_percent = sum / sum(sum, na.rm = TRUE)) %>%
ungroup()))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by_nas.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Checking Balance Col',
comparison_variable='Default Col',
sum_by_variable='Amount Col'))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by_stack_nas.png',
plot=suppressWarnings(rt_explore_plot_value_totals(dataset=credit_data,
variable='Checking Balance Col',
comparison_variable='Default Col',
sum_by_variable='Amount Col',
view_type = 'Stack Percent')))
actual_counts <- rt_explore_value_totals(dataset=credit_data,
variable='Checking Balance Col',
second_variable='Default Col',
count_distinct='Amount Col',
#sum_by_variable='Amount Col',
multi_value_delimiter=NULL)
expect_true(rt_are_dataframes_equal(actual_counts %>% select(`Checking Balance Col`, `Default Col`, count),
credit_data %>%
group_by(`Checking Balance Col`, `Default Col`) %>%
summarise(count = n_distinct(`Amount Col`))))
})
test_that("rt_explore_plot_value_totals__distinct_variable", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# make sure all col names have spaces
colnames(credit_data) <- test_helper__column_names(credit_data)
##########################################################################################################
# test with factor
# change the levels to verify that the original levels are retained if order_by_count==FALSE
##########################################################################################################
custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
credit_data$`Id Col` <- 1:nrow(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
credit_data[2, 'Default Col'] <- NA
credit_data[3, 'Id Col'] <- NA
credit_data[4, 'Amount Col'] <- NA
variable <- 'Checking Balance Col'
comparison_variable <- 'Default Col'
sum_by_variable <- 'Amount Col'
count_distinct <- 'Id Col'
##########################################################################################################
# single variable
##########################################################################################################
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
sum_by_variable=sum_by_variable,
count_distinct_variable = count_distinct))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
count_distinct_variable=count_distinct,
view_type="Confidence Interval"))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
count_distinct_variable=count_distinct,
view_type="Confidence Interval - within Variable"))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
count_distinct_variable=count_distinct,
order_by_count=FALSE))
temp <- credit_data %>% unite(cohort, `Age Col`, `Purpose Col`)
# temp %>% group_by(checking_balance) %>% summarise(distinct_cohorts = n_distinct(cohort),
# perc_distinct = distinct_cohorts / length(unique(temp$cohort)))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__purpose.png',
plot=rt_explore_plot_value_totals(dataset=temp,
variable=variable,
count_distinct_variable='cohort',
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct_order_by_count.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
count_distinct_variable=count_distinct,
order_by_count=TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__char.png',
plot=rt_explore_plot_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable,
count_distinct_variable=count_distinct,
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__dual.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
count_distinct_variable=count_distinct,
show_dual_axes=TRUE,
order_by_count=FALSE))
##########################################################################################################
# comparison variable
##########################################################################################################
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
count_distinct_variable = count_distinct))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
view_type="Confidence Interval"))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
view_type="Confidence Interval - within Variable"))
# credit_data %>%
# group_by(checking_balance, default) %>%
# summarise(distinct_cohorts = n_distinct(id)) %>%
# ungroup()
# credit_data %>%
# group_by(checking_balance) %>%
# summarise(distinct_prim = n_distinct(id)) %>%
# ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__dual.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
show_dual_axes=TRUE,
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
order_by_count=TRUE))
temp <- credit_data %>% unite(cohort, `Age Col`, `Purpose Col`)
# temp %>%
# group_by(checking_balance, default) %>%
# summarise(distinct_cohorts = n_distinct(cohort)) %>%
# ungroup()
# temp %>%
# group_by(checking_balance) %>%
# summarise(distinct_prim = n_distinct(cohort)) %>%
# ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__purpose.png',
plot=rt_explore_plot_value_totals(dataset=temp,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable='cohort',
show_dual_axes = TRUE,
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__temp__distinct__facet.png',
plot=rt_explore_plot_value_totals(dataset=temp,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=comparison_variable,
count_distinct_variable='cohort',
view_type="Bar",
order_by_count=FALSE))
# temp %>%
# group_by(checking_balance, default, phone) %>%
# summarise(distinct_cohorts = n_distinct(cohort)) %>%
# ungroup()
# temp %>%
# group_by(checking_balance, phone) %>%
# summarise(distinct_prim = n_distinct(cohort)) %>%
# ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__temp__distinct__comp__facet.png',
plot=rt_explore_plot_value_totals(dataset=temp,
variable=variable,
comparison_variable=comparison_variable,
facet_variable='Phone Col',
count_distinct_variable='cohort',
view_type="Bar",
order_by_count=FALSE))
# credit_data %>%
# group_by(checking_balance, default) %>%
# summarise(distinct_cohorts = n_distinct(id)) %>%
# ungroup()
# credit_data %>%
# group_by(checking_balance) %>%
# summarise(distinct_prim = n_distinct(id)) %>%
# ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp_order_by_count.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
order_by_count=TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__char.png',
plot=rt_explore_plot_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__dual.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
count_distinct_variable=count_distinct,
show_dual_axes=TRUE,
order_by_count=FALSE))
# test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__facet.png',
# plot=rt_explore_plot_value_totals(dataset=credit_data,
# variable=variable,
# comparison_variable=comparison_variable,
# count_distinct_variable=count_distinct,
# view_type="Facet by Comparison",
# order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__facet.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=comparison_variable,
count_distinct_variable=count_distinct,
view_type="Bar",
order_by_count=FALSE))
})
test_that("rt_get_colors_from_values", {
dataset <- diamonds
custom_colors <- rt_colors()[1:5]
###################
# TEST AS CHARACTER
####################
returned_colors <- rt_get_colors_from_values(as.character(dataset[['cut']]))
expect_identical(custom_colors, returned_colors)
###################
# TEST AS FACTOR
####################
returned_colors <- rt_get_colors_from_values(dataset[['cut']])
expect_identical(custom_colors[c(1, 2, 5, 4, 3)], returned_colors)
##################################################
# TEST WITH NA
##################################################
dataset[1, 'cut'] <- NA
###################
# TEST AS CHARACTER
####################
returned_colors <- rt_get_colors_from_values(as.character(dataset[['cut']]))
expect_identical(custom_colors, returned_colors)
###################
# TEST AS FACTOR
####################
returned_colors <- rt_get_colors_from_values(dataset[['cut']])
expect_identical(custom_colors[c(1, 2, 5, 4, 3)], returned_colors)
temp_dataset <- dataset
# Ideal < Premium < Very Good < Good < Fair
# "Fair" "Good" "Ideal" "Premium" "Very Good"
expected_order <- c(3, 4, 5, 2, 1)
temp_dataset$cut <- fct_infreq(temp_dataset$cut, ordered = TRUE)
returned_colors <- rt_get_colors_from_values(temp_dataset[['cut']])
expect_identical(custom_colors[expected_order], returned_colors)
})
test_that("rt_explore_plot_value_counts", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
variable <- 'Checking Balance Col'
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
# plot without order
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable,
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable,
order_by_count=FALSE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_counts__comparison__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data %>%
mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
variable=variable,
comparison_variable='Default Col',
simple_mode=TRUE,
order_by_count=FALSE,
base_size=11))
# plot without order
temp_dataset <- credit_data
temp_dataset <- temp_dataset %>%
mutate(`Checking Balance Col` = factor(as.character(`Checking Balance Col`),
levels=c("< 0 DM", "1 - 200 DM", "> 200 DM", "unknown"),
ordered = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__factor.png',
plot=rt_explore_plot_value_totals(dataset=temp_dataset,
variable=variable,
order_by_count=FALSE,
base_size=11))
# plot without order
temp_dataset$`Checking Balance Col` <- fct_infreq(temp_dataset$`Checking Balance Col`, ordered = TRUE)
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__ordered.png',
plot=rt_explore_plot_value_totals(dataset=temp_dataset,
variable=variable,
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__ordered2.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
order_by_count=TRUE,
base_size=11))
t <- credit_data %>%
mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == 'unknown',
NA,
as.character(`Checking Balance Col`)))
# plot without order
test_save_plot(file_name='data/rt_explore_plot_value_counts_nas.png',
plot=rt_explore_plot_value_totals(dataset=t,
variable=variable,
order_by_count=FALSE,
base_size=11))
# plot without order
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_group_totals.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
show_variable_totals=FALSE,
base_size=11))
# plot pretty
test_save_plot(file_name='data/rt_explore_plot_value_counts_pretty.png',
plot=rt_explore_plot_value_totals(dataset=rt_pretty_dataset(credit_data),
variable=rt_pretty_text(variable),
order_by_count=FALSE,
base_size=11))
# plot with order
test_save_plot(file_name='data/rt_explore_plot_value_counts_with_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
order_by_count=TRUE,
base_size=11))
##########################################################################################################
# test without factor
##########################################################################################################
credit_data$`Checking Balance Col` <- as.character(credit_data$`Checking Balance Col`)
# plot without order
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_factor_no_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
order_by_count=FALSE,
base_size=11))
# plot with order
test_save_plot(file_name='data/rt_explore_plot_value_counts_no_factor_with_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
order_by_count=TRUE,
base_size=11))
})
test_that("rt_explore_plot_value_counts__facet", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
# make sure all col names have spaces
colnames(credit_data) <- test_helper__column_names(credit_data)
variable <- 'Checking Balance Col'
comparison_variable <- 'Credit History Col'
facet_variable <- 'Default Col'
# make sure it handles NAs
credit_data[1, variable] <- NA
credit_data[2, comparison_variable] <- NA
credit_data[3, facet_variable] <- NA
test_save_plot(file_name='data/rt_explore_plot_value_totals__var__facet__order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
facet_variable=facet_variable,
order_by_count = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__var__facet__comp__order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
order_by_count = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__var__facet__comp__no_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
order_by_count = FALSE))
# credit_data %>%
# count(checking_balance, default) %>%
# arrange(default, checking_balance) %>%
# group_by(default) %>%
# mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=facet_variable,
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=facet_variable,
simple_mode=TRUE,
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var__conf.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=facet_variable,
view_type = 'Confidence Interval',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var__conf__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=facet_variable,
simple_mode=TRUE,
view_type = 'Confidence Interval',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_sum__bar.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
sum_by_variable = 'Amount Col',
facet_variable=facet_variable,
view_type = 'Bar',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_sum__bar__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
sum_by_variable = 'Amount Col',
facet_variable=facet_variable,
simple_mode=TRUE,
view_type = 'Bar',
order_by_count=FALSE,
base_size=11))
# credit_data %>%
# count(checking_balance, default, wt=amount) %>%
# arrange(default, checking_balance) %>%
# group_by(default) %>%
# mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_unique__bar.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
#sum_by_variable = 'amount',
count_distinct_variable = 'Employment Duration Col',
facet_variable=facet_variable,
view_type = 'Bar',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_unique__bar__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
#comparison_variable=comparison_variable,
#sum_by_variable = 'amount',
count_distinct_variable = 'Employment Duration Col',
facet_variable=facet_variable,
view_type = 'Bar',
simple_mode=TRUE,
order_by_count=FALSE,
base_size=11))
# credit_data %>%
# group_by(checking_balance, default) %>%
# summarise(n=n_distinct(employment_duration)) %>%
# ungroup() %>%
# arrange(default, checking_balance) %>%
# group_by(default) %>%
# mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup()
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp__simple.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
simple_mode=TRUE,
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_conf.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
view_type = 'Confidence Interval',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_conf2.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
view_type = 'Confidence Interval - within Variable',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_stack.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
view_type = 'Stack',
order_by_count=FALSE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_stack_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
view_type = 'Stack',
order_by_count=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_stack_perc.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
view_type = 'Stack Percent',
order_by_count=FALSE,
base_size=11))
# credit_data %>%
# count(checking_balance, credit_history, default) %>%
# arrange(default, checking_balance, credit_history) %>%
# group_by(default, checking_balance) %>%
# mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup() %>% as.data.frame()
# test factor order
temp_dataset <- credit_data
temp_dataset <- temp_dataset %>%
mutate(`Checking Balance Col` = factor(as.character(`Checking Balance Col`),
levels=c("< 0 DM", "1 - 200 DM", "> 200 DM", "unknown"),
ordered = TRUE),
`Default Col` = factor(as.character(`Default Col`),
levels=c("yes", "no"),
ordered = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_factors.png',
plot=rt_explore_plot_value_totals(dataset=temp_dataset,
variable=variable,
#comparison_variable=comparison_variable,
facet_variable=facet_variable,
order_by_count=FALSE,
base_size=11))
})
test_that("rt_explore_plot_value_counts: logical", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data[1, 'Default Col'] <- NA
credit_data_logical <- credit_data %>%
mutate(`Default Col` = ifelse(`Default Col` == 'yes', TRUE, FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__logical.png',
plot=rt_explore_plot_value_totals(dataset=credit_data_logical,
variable='Default Col',
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_value_totals__logical_comparison.png',
plot=rt_explore_plot_value_totals(dataset=credit_data_logical,
variable='Checking Balance Col',
comparison_variable='Default Col',
base_size=11))
})
test_that("rt_explore_plot_boxplot: logical", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data[1, 'Default Col'] <- NA
credit_data_logical <- credit_data %>%
mutate(`Default Col` = ifelse(`Default Col` == 'yes', TRUE, FALSE))
test_save_plot(file_name='data/rt_explore_plot_boxplot__logical.png',
plot=rt_explore_plot_boxplot(dataset=credit_data_logical,
variable='Amount Col',
comparison_variable='Default Col'))
test_save_plot(file_name='data/rt_explore_plot_boxplot__logical_color.png',
plot=rt_explore_plot_boxplot(dataset=credit_data_logical,
variable='Amount Col',
comparison_variable='Checking Balance Col',
color_variable = 'Default Col'))
})
test_that("rt_explore_plot_scatter: logical", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data_logical <- credit_data %>%
mutate(`Default Col` = ifelse(`Default Col` == 'yes', TRUE, FALSE))
test_save_plot(file_name='data/rt_explore_plot_scatter__logical_size.png',
plot=rt_explore_plot_scatter(dataset=credit_data_logical,
variable='Amount Col',
comparison_variable='Months Loan Duration Col',
size_variable = 'Default Col'))
credit_data_logical[1, 'Default Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_scatter__logical.png',
plot=rt_explore_plot_scatter(dataset=credit_data_logical,
variable='Amount Col',
comparison_variable='Months Loan Duration Col'))
test_save_plot(file_name='data/rt_explore_plot_scatter__logical_color.png',
plot=rt_explore_plot_scatter(dataset=credit_data_logical,
variable='Amount Col',
comparison_variable='Months Loan Duration Col',
color_variable = 'Default Col'))
})
test_that("rt_explore_plot_value_counts_against_categorical", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
##########################################################################################################
# test with factor
# change the levels to verify that the original levels are retained if order_by_count==FALSE
##########################################################################################################
custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
# plot with labels
test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_defaults.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
order_by_count=TRUE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE))
# plot pretty
test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_pretty.png',
plot=rt_explore_plot_value_totals(dataset=rt_pretty_dataset(credit_data),
variable=rt_pretty_text(variable),
comparison_variable=rt_pretty_text('Default Col'),
order_by_count=TRUE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_swapped.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Default Col',
comparison_variable=variable,
order_by_count=TRUE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE))
# plot with labels
test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_not_order_by_count.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE))
# plot with labels
test_save_plot(file_name='data/rt_explore_plot_value_counts_comp_var_not_show_group_totals.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
order_by_count=FALSE,
show_variable_totals=FALSE,
show_comparison_totals=TRUE))
# plot with labels
test_save_plot(file_name='data/rt_explore_plot_value_counts_comp_var_not_show_comparison_totals.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
order_by_count=FALSE,
show_variable_totals=FALSE,
show_comparison_totals=FALSE))
##########################################################################################################
# ORDERED FACTORS
##########################################################################################################
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`,
levels=c("< 0 DM", "1 - 200 DM", "> 200 DM", "unknown"),
ordered=TRUE)
credit_data$`Default Col` <- factor(credit_data$`Default Col`,
levels=c("no", "yes"),
ordered=TRUE)
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack Percent",
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_rev.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack Percent",
order_by_count=FALSE,
reverse_stack=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_total.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_total_rev.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
order_by_count=FALSE,
reverse_stack=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_amount.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
sum_by_variable = 'Amount Col',
order_by_count=TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_amount_rev.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
sum_by_variable = 'Amount Col',
order_by_count=TRUE,
reverse_stack=FALSE))
test_save_plot(file_name='data/value_counts__ordered_factor_stacked_amount_no_var_totals.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
sum_by_variable = 'Amount Col',
order_by_count=TRUE,
show_variable_totals=FALSE))
test_save_plot(file_name='data/value_counts__ordered_factor_stacked_amount_no_comp_totals.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
sum_by_variable = 'Amount Col',
order_by_count=TRUE,
show_comparison_totals=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_amount2.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack",
sum_by_variable = 'Amount Col',
order_by_count=TRUE,
show_dual_axes=TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Confidence Interval",
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf2.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Confidence Interval - within Variable",
order_by_count=FALSE))
# change the order of the secondary/comparison variable
credit_data$`Default Col` <- factor(credit_data$`Default Col`,
levels=c("yes", "no"),
ordered=TRUE)
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor__swapped_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked__swapped_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Stack Percent",
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf__swapped_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Confidence Interval",
order_by_count=FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf2__swapped_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Default Col',
view_type="Confidence Interval - within Variable",
order_by_count=FALSE))
})
test_that("rt_explore_plot_value_totals__daul_axes", {
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__cut__daul.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
show_dual_axes = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison__dual.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
comparison_variable='color',
show_dual_axes = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison__no_dual.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
comparison_variable='color',
show_dual_axes = FALSE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__sum_by_total__daul.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
sum_by_variable='price',
show_dual_axes = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__sum_comparison__daul.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
comparison_variable='color',
sum_by_variable='price',
show_variable_totals=FALSE,
show_comparison_totals=FALSE,
show_dual_axes = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__sum_comparison__no_dual.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
comparison_variable='color',
sum_by_variable='price',
show_variable_totals=FALSE,
show_comparison_totals=FALSE,
show_dual_axes = FALSE))
# STACK - should NOT show dual axes regardless if stacked (all percentages)
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison_stacked__dual.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
comparison_variable='color',
view_type="Stack Percent",
show_dual_axes = TRUE))
test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison_sum_stacked__dual.png',
plot=rt_explore_plot_value_totals(dataset=diamonds,
variable='cut',
comparison_variable='color',
sum_by_variable='price',
view_type="Stack Percent",
show_dual_axes = TRUE))
})
test_that("rt_explore_plot_value_totals__conf_intervals", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
##########################################################################################################
# test with factor
# change the levels to verify that the original levels are retained if order_by_count==FALSE
##########################################################################################################
custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
comparison_variable <- 'Housing Col'
sum_by_variable <- 'Amount Col'
##########################################################################################################
# VARIABLE ONLY
# Test c("Bar", "Confidence Interval")
##########################################################################################################
test_save_plot(file_name='data/plot_value_totals__var__bar__dual.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__CI.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval",
show_dual_axes=FALSE))
test_save_plot(file_name='data/plot_value_totals__var__CI__no_vals.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
view_type="Confidence Interval",
show_dual_axes=FALSE))
multi_value_credit_data <- credit_data %>%
mutate(`Purpose Col` = case_when(
`Purpose Col` == 'car' ~ 'car, car_test',
`Purpose Col` == 'business' ~ 'business, business_test',
TRUE ~ as.character(`Purpose Col`))) %>%
mutate(`Purpose Col` = as.factor(`Purpose Col`))
test_save_plot(file_name='data/plot_value_totals__conf__multi_value.png',
plot=rt_explore_plot_value_totals(dataset=multi_value_credit_data,
variable='Purpose Col',
comparison_variable = NULL,
view_type="Confidence Interval",
multi_value_delimiter=', '))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval - within Variable",
show_dual_axes=TRUE))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Stack Percent",
show_dual_axes=TRUE))
##########################################################################################################
# VARIABLE, SUM_BY_VARIABLE
# Test c("Bar")
##########################################################################################################
test_save_plot(file_name='data/plot_value_totals__var__sum_by__bar.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval",
show_dual_axes=TRUE))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Stack Percent",
show_dual_axes=TRUE))
##########################################################################################################
# VARIABLE, COMPARISON_VARIABLE
# Test c("Bar", "Confidence Interval", "Facet by Comparison", "Confidence Interval - within Variable", "Stack")
##########################################################################################################
test_save_plot(file_name='data/plot_value_totals__var__comp__bar.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__CI.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__facet.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
facet_variable=comparison_variable,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__facet.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
facet_variable=comparison_variable,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__CI_var.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval - within Variable",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__stack.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=NULL,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Stack Percent",
show_dual_axes=TRUE))
##########################################################################################################
# VARIABLE, COMPARISON_VARIABLE, SUM_BY_VARIABLE
# Test c("Bar", "Facet by Comparison", "Stack")
##########################################################################################################
test_save_plot(file_name='data/plot_value_totals__var__comp__sum__bar.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__sum__facet.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
facet_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Bar",
show_dual_axes=TRUE))
expect_error(rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Confidence Interval - within Variable",
show_dual_axes=TRUE))
test_save_plot(file_name='data/plot_value_totals__var__comp__sum__stack.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Stack Percent",
show_dual_axes=TRUE))
})
test_that("rt_explore_plot_value_counts_against_categorical_fill", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
# plot with labels
test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_purpose_stack.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Purpose Col',
order_by_count=TRUE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Stack Percent"))
test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_purpose_stack_sum.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable='Purpose Col',
sum_by_variable='Amount Col',
order_by_count=TRUE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
view_type="Stack Percent"))
})
test_that("rt_explore_plot_value_totals_sums", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
sum_by_variable <- 'Amount Col'
comparison_variable <- 'Default Col'
test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_defaults.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
sum_by_variable=sum_by_variable))
test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_non_defaults.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=NULL,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=FALSE,
show_comparison_totals=FALSE,
base_size=16))
test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_comparison_defaults.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=TRUE,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=14))
test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_comparison_defaults_no_order.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable,
order_by_count=FALSE,
show_variable_totals=TRUE,
show_comparison_totals=FALSE,
base_size=14))
##########################################################################################################
# Make sure it handles all NAs for the SUM_BY, or entire categories of NAs
##########################################################################################################
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
sum_by_variable <- 'Amount Col'
comparison_variable <- 'Credit History Col'
facet_variable <- 'Default Col'
credit_data$`Amount Col` <- NA
results <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
sum_by_variable=sum_by_variable)
expect_identical(results$`Checking Balance Col` %>% rt_remove_val(NA), sort(unique(credit_data$`Checking Balance Col`)))
expect_true(all(results$sum == 0))
expect_true(all(results$percent == 0))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_nas__checking_balance.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
sum_by_variable=sum_by_variable))
results <- rt_explore_value_totals(dataset=credit_data,
variable=variable,
second_variable=comparison_variable,
sum_by_variable=sum_by_variable)
expect_identical(sort(unique(results$`Checking Balance Col`)) %>% rt_remove_val(NA),
sort(unique(credit_data$`Checking Balance Col`)))
expect_identical(sort(unique(results$`Credit History Col`)), sort(unique(credit_data$`Credit History Col`)))
expect_true(all(results$sum == 0))
expect_true(all(results$percent == 0))
expect_true(all(results$group_percent == 0))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_nas__comparison.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
sum_by_variable=sum_by_variable))
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_nas__facet.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable = facet_variable,
sum_by_variable=sum_by_variable))
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
# make sure it handles NAs
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
sum_by_variable <- 'Amount Col'
comparison_variable <- 'Credit History Col'
facet_variable <- 'Default Col'
credit_data$`Amount Col` <- ifelse(credit_data$`Default Col` == 'yes', NA, credit_data$`Amount Col`)
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_facet_nas.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable = facet_variable,
sum_by_variable=sum_by_variable))
# make sure it handles NAs
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
sum_by_variable <- 'Amount Col'
comparison_variable <- 'Credit History Col'
facet_variable <- 'Default Col'
credit_data$`Amount Col` <- ifelse(credit_data$`Checking Balance Col` == 'unknown', NA, credit_data$`Amount Col`)
#credit_data %>% count(checking_balance, credit_history, default, wt=amount) %>% arrange(default, checking_balance, credit_history) %>% View()
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_variable_nas.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable = facet_variable,
sum_by_variable=sum_by_variable))
# make sure it handles NAs
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data[1, 'Checking Balance Col'] <- NA
variable <- 'Checking Balance Col'
sum_by_variable <- 'Amount Col'
comparison_variable <- 'Credit History Col'
facet_variable <- 'Default Col'
credit_data$`Amount Col` <- ifelse(credit_data$`Credit History Col` == 'good', NA, credit_data$`Amount Col`)
test_save_plot(file_name='data/rt_explore_plot_value_totals__all_comparison_nas.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable=comparison_variable,
facet_variable = facet_variable,
sum_by_variable=sum_by_variable))
})
test_that("rt_explore_plot_value_totals_multivalue_column", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
expected_totals <- rt_explore_value_totals(dataset=credit_data,
variable='Purpose Col',
multi_value_delimiter=NULL)
expect_equal(sum(expected_totals$percent), 1)
# first test with a delimiter when none of the cols are delimited
found_totals <- rt_explore_value_totals(dataset=credit_data,
variable='Purpose Col',
multi_value_delimiter=', ')
expect_true(rt_are_dataframes_equal(expected_totals, found_totals))
expected_sum_by_variable <- credit_data %>%
count(`Purpose Col`, wt = `Months Loan Duration Col`, sort = TRUE)
expected_sum_by_variable <- expected_sum_by_variable %>%
rename(sum = n) %>%
mutate(percent = sum / sum(expected_sum_by_variable$n)) %>%
arrange(`Purpose Col`) %>%
as.data.frame()
expect_equal(sum(expected_sum_by_variable$percent), 1)
expect_true(rt_are_dataframes_equal(expected_sum_by_variable,
rt_explore_value_totals(dataset=credit_data,
variable='Purpose Col',
sum_by_variable='Months Loan Duration Col',
multi_value_delimiter=', ')))
credit_data <- credit_data %>%
mutate(`Purpose Col` = case_when(
`Purpose Col` == 'car' ~ 'car, car_test',
`Purpose Col` == 'business' ~ 'business, business_test',
TRUE ~ as.character(`Purpose Col`))) %>%
arrange(`Purpose Col`) %>%
mutate(`Purpose Col` = as.factor(`Purpose Col`))
variable <- 'Purpose Col'
comparison_variable <- NULL
test_save_plot(file_name='data/rt_explore_plot_value_totals_purose_multivalue.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable=variable,
comparison_variable = NULL,
multi_value_delimiter=', '))
})
test_that("rt_explore_plot_value_totals_multivalue_bug", {
# there is a bug where, because I had hardcoded the number of cols the underlying dataset would
# separate to, any multi-value that had >2 values (e.g. a;b;c) would get lost (e.g. `c` wouldnt' get
# counted)
##########################################################################################################
# CREATE THE DATASET
##########################################################################################################
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data$`Id Col` <- 1:nrow(credit_data)
credit_data$`Purpose Col`[1] <- NA
credit_data$`Id Col`[2] <- NA
credit_data$`Amount Col`[3] <- NA
credit_data$`Purpose Col`[600] <- NA
original_totals <- rt_explore_value_totals(dataset=credit_data,
variable='Purpose Col',
multi_value_delimiter=NULL)
# original_sum_by <- credit_data %>% count(purpose, wt=amount)
# original_sum_by2 <- credit_data %>% count(purpose, default, wt=amount)
credit_data <- credit_data %>%
mutate(`Purpose Col` = case_when(
`Purpose Col` == 'car' ~ 'car, car_test',
`Purpose Col` == 'business' ~ 'business, business_test2, business_test3, business_test4',
TRUE ~ as.character(`Purpose Col`)))
# set 2 rows to only 3 values for business, rather than 4
credit_data$`Purpose Col`[30] <- 'business, business_test2, business_test3'
credit_data$`Purpose Col`[31] <- 'business, business_test2, business_test3'
credit_data$`Purpose Col` <- as.factor(credit_data$`Purpose Col`)
##########################################################################################################
# CREATE THE EXPECTED TOTALS
##########################################################################################################
car_count <- (original_totals %>% filter(`Purpose Col` == 'car'))$count
business_count <- (original_totals %>% filter(`Purpose Col` == 'business'))$count
expected_totals <- data.frame(`Purpose Col`=c('car_test', 'business_test2', 'business_test3', 'business_test4'),
# subtract 2 from business_test4 because we changed index 30/31
count=c(car_count, business_count, business_count, business_count - 2),
stringsAsFactors = FALSE,
check.names = FALSE) %>%
bind_rows(original_totals %>% mutate(`Purpose Col` = as.character(`Purpose Col`))) %>%
arrange(`Purpose Col`)
expected_totals <- expected_totals %>% mutate(percent = count / sum(count))
##########################################################################################################
# VALIDATE EXPECTED IS THE SAME AS ACTUAL
##########################################################################################################
actual_totals <- rt_explore_value_totals(dataset=credit_data,
variable='Purpose Col',
multi_value_delimiter=', ')
expect_true(rt_are_dataframes_equal(expected_totals, actual_totals))
test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Purpose Col',
order_by_count=FALSE,
multi_value_delimiter=', '))
##########################################################################################################
# TEST COMPARISON
##########################################################################################################
#credit_data %>% count(purpose, default)
test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values_comp.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Purpose Col',
comparison_variable='Default Col',
order_by_count=FALSE,
multi_value_delimiter=', '))
##########################################################################################################
# TEST SUM-BY
##########################################################################################################
#credit_data %>% count(purpose, default, wt=amount)
test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values_sum.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Purpose Col',
comparison_variable='Default Col',
sum_by_variable = 'Amount Col',
order_by_count=FALSE,
multi_value_delimiter=', '),
size_inches = c(8, 20))
##########################################################################################################
# TEST COUNT-DISTINCT
##########################################################################################################
test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values_dis.png',
plot=rt_explore_plot_value_totals(dataset=credit_data,
variable='Purpose Col',
comparison_variable='Default Col',
#sum_by_variable = 'amount',
count_distinct_variable = 'Id Col',
order_by_count=FALSE,
multi_value_delimiter=', '))
})
test_that("rt_explore_plot_categoric_heatmap", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(credit_data) <- test_helper__column_names(credit_data)
credit_data$`Id Col` <- 1:nrow(credit_data)
credit_data$`Purpose2 Col` <- credit_data$`Purpose Col`
# table(credit_data$purpose, credit_data$purpose2)
# credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Purpose2 Col'),
size_inches = c(4, 4))
# table(credit_data$purpose, credit_data$purpose2)
# credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables__no_percentages.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Purpose2 Col',
include_percentages = FALSE),
size_inches = c(4, 4))
# table(fct_lump(flights$dest, n = 10), flights$origin) %>% t()
# flights %>% ggplot(aes(x=origin)) + geom_bar()
temp_df <- flights %>% mutate(dest=fct_lump(dest, n = 10))
colnames(temp_df) <- test_helper__column_names(temp_df)
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights.png',
plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
x_variable='Origin Col',
y_variable='Dest Col'))
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights2.png',
plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
x_variable='Dest Col',
y_variable='Origin Col'))
#flights %>% mutate(dest=fct_lump(dest, n = 10)) %>% count(dest, origin, wt=dep_delay) %>% View()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights__sum_by.png',
plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
x_variable='Dest Col',
y_variable='Origin Col',
sum_by_variable = 'Dep Delay Col'))
# flights %>%
# mutate(dest=fct_lump(dest, n = 10)) %>%
# group_by(dest, origin) %>%
# summarise(n=n_distinct(flight)) %>%
# View()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights__count_distinct.png',
plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
x_variable='Dest Col',
y_variable='Origin Col',
count_distinct_variable = 'Flight Col'))
if(file.exists("Rplots.pdf")) {
file.remove("Rplots.pdf")
}
})
test_that("rt_explore_plot_categoric_heatmap_NAs", {
# test with strings
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors = FALSE)
credit_data$id <- 1:nrow(credit_data)
credit_data[1, 'purpose'] <- NA
credit_data[2, 'purpose'] <- NA
credit_data[2, 'purpose2'] <- NA
credit_data[2, 'credit_history'] <- NA
credit_data[4, 'id'] <- NA
credit_data$purpose2 <- credit_data$purpose
colnames(credit_data) <- test_helper__column_names(credit_data)
# table(credit_data$purpose, credit_data$purpose2)
# credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables_NAs.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Purpose2 Col'),
size_inches = c(4, 4))
#credit_data %>% group_by(purpose, credit_history) %>% summarise(sum=sum(amount)) %>% View()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Credit History Col',
sum_by_variable = 'Amount Col'))
# credit_data[5, c('purpose', 'credit_history', 'amount')]
# car | poor | 4870
credit_data[5, 'Amount Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA2.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Credit History Col',
sum_by_variable = 'Amount Col'))
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__count_distinct__NA.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Credit History Col',
count_distinct_variable = 'Id Col'))
# test with factors
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors = TRUE)
credit_data$id <- 1:nrow(credit_data)
credit_data[1, 'purpose'] <- NA
credit_data[2, 'purpose'] <- NA
credit_data[2, 'purpose2'] <- NA
credit_data[2, 'credit_history'] <- NA
credit_data[4, 'id'] <- NA
credit_data$purpose2 <- credit_data$purpose
colnames(credit_data) <- test_helper__column_names(credit_data)
# table(credit_data$purpose, credit_data$purpose2)
# credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables_NAs_factor.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Purpose2 Col'),
size_inches = c(4, 4))
#credit_data %>% group_by(purpose, credit_history) %>% summarise(sum=sum(amount)) %>% View()
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA_factor.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Credit History Col',
sum_by_variable = 'Amount Col'))
# credit_data[5, c('purpose', 'credit_history', 'amount')]
# car | poor | 4870
credit_data[5, 'Amount Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA2_factor.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Credit History Col',
sum_by_variable = 'Amount Col'))
test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__count_distinct__NA_factor.png',
plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
x_variable='Purpose Col',
y_variable='Credit History Col',
count_distinct_variable = 'Id Col'))
if(file.exists("Rplots.pdf")) {
file.remove("Rplots.pdf")
}
})
test_that("rt_explore_plot_numeric_heatmap", {
credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
credit_data[1, 'age'] <- NA
credit_data[2, 'age'] <- NA
credit_data[2, 'amount'] <- NA
credit_data[3, 'amount'] <- NA
colnames(credit_data) <- test_helper__column_names(credit_data)
# table(credit_data$purpose, credit_data$purpose2)
# credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap.png',
plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
x_variable = 'Age Col',
y_variable = 'Amount Col'),
size_inches = c(4, 4))
test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__n_cut_10.png',
plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
x_variable = 'Age Col',
y_variable = 'Amount Col',
n_cuts = 10),
size_inches = c(4, 4))
x_cut_sequence <- c(18, 21, 25, 30, 50, 100)
y_cut_sequence <- c(100, 500, 1000, 2500, 5000, 10000, 100000)
test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__custom_x_cut.png',
plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
x_variable = 'Age Col',
y_variable = 'Amount Col',
x_cut_sequence = x_cut_sequence),
size_inches = c(4, 4))
test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__custom_y_cut.png',
plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
x_variable = 'Age Col',
y_variable = 'Amount Col',
y_cut_sequence = y_cut_sequence),
size_inches = c(4, 4))
test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__custom_x_y_cut.png',
plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
x_variable = 'Age Col',
y_variable = 'Amount Col',
x_cut_sequence = x_cut_sequence,
y_cut_sequence = y_cut_sequence),
size_inches = c(4, 4))
if(file.exists("Rplots.pdf")) {
file.remove("Rplots.pdf")
}
})
test_that("rt_explore_plot_boxplot", {
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'Months Loan Duration Col'
comparison_variable <- 'Default Col'
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard_zoom_min.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
y_zoom_min=20,
y_zoom_max=NULL,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard_zoom_max.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
y_zoom_min=NULL,
y_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard_zoom_both.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
y_zoom_min=20,
y_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__log_scale_y.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
log_scale_y=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison__simple.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
simple_mode=TRUE,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__log_scale_y__comp.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
log_scale_y=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison2.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable='Checking Balance Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__defualt.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__log_scale_y__color.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
log_scale_y=TRUE,
base_size=11))
# simple should have no effect
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__defualt__simple.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
simple_mode=TRUE,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset <- dataset
temp_dataset[1, 'Default Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot__default__NAs.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Default Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot__default__NAs__simple.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Default Col',
simple_mode = TRUE,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset[1, 'Checking Balance Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs2.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset[2, 'Checking Balance Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs3.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset[3, 'Default Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs4.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_pretty.png',
plot=rt_explore_plot_boxplot(dataset=rt_pretty_dataset(dataset),
variable=rt_pretty_text(variable),
comparison_variable=rt_pretty_text(comparison_variable),
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_min.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
y_zoom_min=20,
y_zoom_max=NA, # Check NA
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_max.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
y_zoom_min=NA, # Check NA
y_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_both.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
y_zoom_min=20,
y_zoom_max=40,
base_size=15))
})
test_that("rt_explore_plot_boxplot_facet", {
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'Months Loan Duration Col'
comparison_variable <- 'Default Col'
facet_variable <- 'Phone Col'
expect_error(rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_facet.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_facet__simple.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
simple_mode=TRUE,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison2_facet.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable='Checking Balance Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__default_facet.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset <- dataset
temp_dataset[1, 'Default Col'] <- NA
temp_dataset[2, facet_variable] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot__NAs_facet.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Default Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot__NAs_facet__simple.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Default Col',
facet_variable=facet_variable,
simple_mode=TRUE,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs_facet.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset[1, 'Checking Balance Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs2_facet.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset[2, 'Checking Balance Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs3_facet.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset[3, 'Default Col'] <- NA
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs4_facet.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='Checking Balance Col',
color_variable='Default Col',
facet_variable=facet_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_pretty_facet.png',
plot=rt_explore_plot_boxplot(dataset=rt_pretty_dataset(dataset),
variable=rt_pretty_text(variable),
comparison_variable=rt_pretty_text(comparison_variable),
facet_variable=rt_pretty_text(facet_variable),
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_min_facet.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
y_zoom_min=20,
y_zoom_max=NA, # Check NA
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_max_facet.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
y_zoom_min=NA, # Check NA
y_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_both_facet.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
facet_variable=facet_variable,
y_zoom_min=20,
y_zoom_max=40,
base_size=15))
})
test_that("rt_explore_plot_boxplot - NA numeric values", {
# there's a bug where the count of the records (below the median line) shows the number of total records
# in the group, but it should show the total number of non-na values for which the boxplot is based on
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
variable <- 'amount'
comparison_variable <- 'checking_balance'
color_variable <- 'default'
dataset <- dataset %>% mutate(amount = ifelse(checking_balance == '< 0 DM', NA, amount))
set.seed(42)
dataset$amount[sample(x=1:1000, size=300)] <- NA
#summary(dataset$amount)
# nrow(dataset) - sum(is.na(dataset$amount))
# mean(dataset$amount, na.rm = TRUE)
# median(dataset$amount, na.rm = TRUE)
# dataset %>%
# group_by(checking_balance) %>%
# summarise(med=median(amount, na.rm = TRUE),
# cnt_non_na=sum(!is.na(amount)),
# cnt=n())
test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__num_nas.png',
plot=suppressWarnings(rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11)))
test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison__num_nas.png',
plot=suppressWarnings(rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11)))
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__defualt__num_nas.png',
plot=rt_explore_plot_boxplot(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
color_variable='default',
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
temp_dataset <- dataset
temp_dataset[6, 'default'] <- NA
# temp_dataset %>%
# group_by(checking_balance, default) %>%
# summarise(med=median(amount, na.rm = TRUE),
# cnt_non_na=sum(!is.na(amount)),
# cnt=n())
test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs__num_nas.png',
plot=rt_explore_plot_boxplot(dataset=temp_dataset,
variable=variable,
comparison_variable='checking_balance',
color_variable=color_variable,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
})
test_that("rt_explore_plot_histogram", {
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'Months Loan Duration Col'
test_save_plot(file_name='data/rt_explore_plot_histogram_standard.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
num_bins=30,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_pretty.png',
plot=rt_explore_plot_histogram(dataset=rt_pretty_dataset(dataset),
variable=rt_pretty_text(variable),
num_bins=30,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_num_bins.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
num_bins=15,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_min.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
num_bins=30,
x_zoom_min=20,
x_zoom_max=NULL,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_max.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
num_bins=30,
x_zoom_min=NULL,
x_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_both.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
num_bins=30,
x_zoom_min=20,
x_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_both_num_bins.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
num_bins=15,
x_zoom_min=20,
x_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram__log_scale_x__comp.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable="Amount Col", comparison_variable = 'Default Col',
num_bins=15,
log_scale_x=TRUE,
base_size=11))
dataset[1, "Default Col"] <- NA
test_save_plot(file_name='data/rt_explore_plot_histogram__log_scale_x.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable="Amount Col",
num_bins=15,
log_scale_x=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram__log_scale_x__comp__NA.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable="Amount Col", comparison_variable = 'Default Col',
num_bins=15,
log_scale_x=TRUE,
base_size=11))
})
test_that("rt_explore_plot_histogram_with_categoric_comparison", {
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'Months Loan Duration Col'
comparison_variable <- 'Checking Balance Col'
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_standard.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
num_bins=30,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_pretty.png',
plot=rt_explore_plot_histogram(dataset=rt_pretty_dataset(dataset),
variable=rt_pretty_text(variable),
comparison_variable=rt_pretty_text(comparison_variable),
num_bins=30,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_num_bins.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
num_bins=15,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_density.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
#density = TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_min.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
num_bins=30,
x_zoom_min=20,
x_zoom_max=NULL,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_max.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
num_bins=30,
x_zoom_min=NULL,
x_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_both.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
num_bins=30,
x_zoom_min=20,
x_zoom_max=40,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_both_num_bins.png',
plot=rt_explore_plot_histogram(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
num_bins=15,
x_zoom_min=20,
x_zoom_max=40,
base_size=15))
})
test_that("rt_explore_plot_scatterplot", {
dataset <- read.csv("data/housing.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'Median House Value Col'
comparison_variable <- 'Median Income Col'
test_save_plot(file_name='data/rt_explore_plot_scatter.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_swap.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=comparison_variable,
comparison_variable=variable,
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_pretty.png',
plot=rt_explore_plot_scatter(dataset=rt_pretty_dataset(dataset),
variable=rt_pretty_text(variable),
comparison_variable=rt_pretty_text(comparison_variable),
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_alpha.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_scatter_x_zoom_min.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=5,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_x_zoom_max.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=10,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_x_zoom_both.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=5,
x_zoom_max=10,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_y_zoom_min.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=200000,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_y_zoom_max.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=300000,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_y_zoom_both.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=200000,
y_zoom_max=300000,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_zoom_min_both.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.3,
x_zoom_min=10,
x_zoom_max=NULL,
y_zoom_min=200000,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_all.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
x_zoom_min=5,
x_zoom_max=10,
y_zoom_min=200000,
y_zoom_max=300000,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_scatter_log_scale_x.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
log_scale_x = TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_scatter_log_scale_y.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
log_scale_y = TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_scatter_log_scale_xy.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
log_scale_x = TRUE,
log_scale_y = TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_scatter_all_log.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
x_zoom_min=5,
x_zoom_max=10,
y_zoom_min=200000,
y_zoom_max=300000,
log_scale_x = TRUE,
log_scale_y = TRUE,
base_size=15))
})
test_that('rt_explore_plot_scatterplot_size_color', {
dataset <- read.csv("data/housing.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'Median House Value Col'
comparison_variable <- 'Median Income Col'
test_save_plot(file_name='data/rt_explore_plot_scatter_color.png',
rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
color_variable = 'Ocean Proximity Col',
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_size.png',
rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
size_variable = 'Housing Median Age Col',
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_size__categoric.png',
rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
size_variable = 'Ocean Proximity Col',
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_size_color_numeric.png',
rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
color_variable = 'Total Rooms Col',
size_variable = 'Housing Median Age Col',
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_scatter_size_color_categoric.png',
rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
color_variable = 'Ocean Proximity Col',
size_variable = 'Housing Median Age Col',
alpha=0.3,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
})
test_that("rt_explore_plot_scatterplot_jitter", {
dataset <- iris
variable <- 'Sepal.Length'
comparison_variable <- 'Sepal.Length'
test_save_plot(file_name='data/rt_explore_plot_scatter_jitter.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
alpha=0.1,
jitter=TRUE,
base_size=15))
})
test_that("rt_explore_plot_aggregate_2_numerics", {
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
# make sure it handles NAs
dataset[1, 'Months Loan Duration Col'] <- NA
variable <- 'Amount Col'
comparison_variable <- 'Months Loan Duration Col'
aggregation_function <- rt_geometric_mean
aggregation_function_name <- "Geometric Mean"
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__boxplot_0_min.png',
plot=suppressWarnings(rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=NULL,
aggregation_function_name=NULL,
aggregation_count_minimum=0, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=TRUE,
show_points=TRUE,
show_labels=TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11)))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__boxplot_30_min.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=NULL,
aggregation_function_name=NULL,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=TRUE,
show_points=TRUE,
show_labels=TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__geometric_mean.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=TRUE,
show_points=TRUE,
show_labels=TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__geometric_mean__2.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=FALSE,
show_points=FALSE,
show_labels=TRUE,
x_zoom_min=10,
x_zoom_max=40,
y_zoom_min=1900,
y_zoom_max=5000,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__scale_log_x.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=FALSE,
show_points=FALSE,
show_labels=TRUE,
log_scale_x=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__scale_log_y.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=FALSE,
show_points=FALSE,
show_labels=TRUE,
log_scale_y=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__scale_log_xy.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=FALSE,
show_points=TRUE,
show_labels=TRUE,
log_scale_x=TRUE,
log_scale_y=TRUE,
base_size=11))
variable <- 'Months Loan Duration Col'
comparison_variable <- 'Existing Loans Count Col'
aggregation_function <- function(values) {
return (mean(values, na.rm = TRUE))
}
aggregation_function_name <- "Mean"
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__boxplot_0_min.png',
plot=suppressWarnings(rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=NULL,
aggregation_function_name=NULL,
aggregation_count_minimum=0, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=TRUE,
show_points=TRUE,
show_labels=TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11)))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__boxplot_30_min.png',
plot=suppressWarnings(rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=NULL,
aggregation_function_name=NULL,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=TRUE,
show_points=TRUE,
show_labels=TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11)))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__mean.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=TRUE,
show_points=TRUE,
show_labels=TRUE,
x_zoom_min=NULL,
x_zoom_max=NULL,
y_zoom_min=NULL,
y_zoom_max=NULL,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__mean__2.png',
plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
aggregation_function=aggregation_function,
aggregation_function_name=aggregation_function_name,
aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
show_resampled_confidence_interval=FALSE,
show_points=FALSE,
show_labels=TRUE,
x_zoom_min=-1,
x_zoom_max=3,
y_zoom_min=4,
y_zoom_max=25,
base_size=11))
})
test_that("rt_explore_plot_categoric_numeric_aggregation", {
dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
colnames(dataset) <- test_helper__column_names(dataset)
categoric_variable <- 'Checking Balance Col'
numeric_variable <- 'Amount Col'
# make sure it handles NAs
dataset[1, categoric_variable] <- NA
dataset[1, numeric_variable] <- NA
dataset[2, categoric_variable] <- NA
dataset[3, numeric_variable] <- NA
color_variable <- NULL
facet_variable <- NULL
##########################################################################################################
# Categoric/Numeric - No Color or Facet
##########################################################################################################
#dataset %>% count(checking_balance, wt=amount) %>% mutate(p=n/sum(n))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total_s.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean_s.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec_s.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median_s.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot_s.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
##########################################################################################################
# Categoric/Numeric - Color
##########################################################################################################
color_variable <- 'Default Col'
facet_variable <- NULL
#dataset %>% count(checking_balance, default, wt=amount) %>% mutate(p=n/sum(n))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=FALSE,
base_size=15))
#dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__color_s.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
simple_mode=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance, default) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
##########################################################################################################
# Categoric/Numeric - Facet
##########################################################################################################
facet_variable <- 'Default Col'
color_variable <- NULL
#dataset %>% count(checking_balance, default, wt=amount) %>% mutate(p=n/sum(n))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=FALSE,
base_size=15))
#dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance, default) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__facet.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__facet2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
##########################################################################################################
# Categoric/Numeric - Color/Facet
##########################################################################################################
facet_variable <- 'Default Col'
color_variable <- 'Phone Col'
#dataset %>% count(checking_balance, phone, default, wt=amount) %>% mutate(p=n/sum(n))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet_color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet_color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Total',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=FALSE,
base_size=15))
#dataset %>% group_by(checking_balance, phone, default) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet_color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet_color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Mean',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance, phone, default) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave__facet_color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave__facet_color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Average Value Per Record',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
#dataset %>% group_by(checking_balance, phone, default) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet_color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet_color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Median',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet_color.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=TRUE,
show_comparison_totals=TRUE,
base_size=11))
test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet_color2.png',
plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
categoric_variable=categoric_variable,
numeric_variable=numeric_variable,
aggregation_type='Boxplot',
color_variable=color_variable,
facet_variable=facet_variable,
show_variable_totals=FALSE,
show_comparison_totals=TRUE,
base_size=15))
})
test_that("rt_explore_plot_scatterplot_labels", {
dataset <- data.frame(gapminder) %>% filter(year == 2002)
colnames(dataset) <- test_helper__column_names(dataset)
variable <- 'LifeExp Col'
comparison_variable <- 'GdpPercap Col'
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__defaults.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables='Country Col'))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label__size.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables='Country Col',
size_variable='Pop Col'))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label__zoom.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables='Country Col',
x_zoom_min = 25000,
x_zoom_max = 40000,
y_zoom_min = 75))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label__zoom_size.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables='Country Col',
size_variable = 'Pop Col',
x_zoom_min = 25000,
x_zoom_max = 40000,
y_zoom_min = 75))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__lifeExp_label.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables='LifeExp Col'))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__pop_label.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables='Pop Col'))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__multi_label.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables=c('Country Col', 'Year Col')))
test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__multi2_label.png',
plot=rt_explore_plot_scatter(dataset=dataset,
variable=variable,
comparison_variable=comparison_variable,
label_variables=c('Year Col', 'Country Col'),
size_variable = 'Pop Col',
x_zoom_min = 25000,
x_zoom_max = 40000,
y_zoom_min = 75))
})
test_that('rt_plot_funnel', {
steps <- c("Step Z", "Step Y", "Step X", "Step W")
values <- c(200, 60, 20, 10)
test_save_plot(file_name='data/rt_plot_funnel_proportionate_FALSE.png',
plot=rt_funnel_plot(step_names=steps, step_values=values,
title="My title", subtitle = "My Subtitle", caption = "My Caption",
proportionate=FALSE))
test_save_plot(file_name='data/rt_plot_funnel_proportionate_TRUE.png',
plot=rt_funnel_plot(step_names=steps, step_values=values,
title="My title", subtitle = "My Subtitle", caption = "My Caption",
proportionate=TRUE))
steps <- c("Step W", "Step X", "Step Y", "Step Z")
values <- c(2000, 1111, 50, 11)
test_save_plot(file_name='data/rt_plot_funnel_2_proportionate_FALSE.png',
plot=rt_funnel_plot(step_names=steps, step_values=values, proportionate=FALSE))
test_save_plot(file_name='data/rt_plot_funnel_2_proportionate_TRUE.png',
plot=rt_funnel_plot(step_names=steps, step_values=values, proportionate=TRUE))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.