tests/testthat/test_explore.R

context('Exploratory Analysis')
library(testthat)
library(ggplot2)
library(dplyr)
options(dplyr.summarise.inform=F)
library(lubridate)
library(gapminder)
library(nycflights13)
library(forcats)
# library(scales)
source('test_helpers.R')


test_that("rt_explore_categoric_summary_NAs", {

    temp_iris <- iris
    # make sure it works with NA
    temp_iris$Petal.Width <- rep(NA, nrow(temp_iris))

    results <- rt_explore_categoric_summary(dataset=temp_iris)

    expect_true(rt_are_dataframes_equal_from_file(dataframe1=results,
                                                  rds_file='data/rt_explore_numeric_summary_iris_missing.RDS'))
})

test_that("rt_explore_numeric_summary", {

    temp_iris <- iris
    temp_iris[1, 'Sepal.Width'] <- NA
    temp_iris[2, 'Sepal.Width'] <- NA
    temp_iris[3, 'Sepal.Width'] <- NA
    temp_iris[1, 'Petal.Length'] <- NA
    temp_iris[1, 'Sepal.Length'] <- 0
    temp_iris[2, 'Sepal.Length'] <- 0
    temp_iris[3, 'Sepal.Length'] <- 0
    temp_iris[1, 'Petal.Width'] <- 0

    result <- rt_explore_numeric_summary(dataset=temp_iris)
    rds_file <- 'data/rt_explore_numeric_summary_iris.RDS'
    expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))

    result <- rt_explore_numeric_summary(dataset=flights)
    rds_file <- 'data/rt_explore_numeric_summary_flights.RDS'
    expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))
})

test_that("rt_explore_categoric_summary", {

    temp_iris <- iris
    temp_iris[1, 'Species'] <- NA
    temp_iris[2, 'Species'] <- NA
    temp_iris[3, 'Species'] <- NA

    result <- rt_explore_categoric_summary(dataset=temp_iris)

    rds_file <- 'data/rt_explore_categoric_summary_iris.RDS'
    expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))

    result <- rt_explore_categoric_summary(dataset=flights)
    rds_file <- 'data/rt_explore_categoric_summary_flights.RDS'
    expect_true(rt_are_dataframes_equal_from_file(dataframe1=result, rds_file=rds_file))
})

test_that("rt_explore_correlations_credit", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # make sure it handles NAs
    credit_data[1, 'months_loan_duration'] <- NA

    # default parameters
    correlations <- rt_explore_correlations(dataset=credit_data)$correlations

    rds_file <- 'data/rt_correlations_credit.RDS'
    expect_true(rt_are_dataframes_equal_from_file(dataframe1=data.frame(correlations), rds_file=rds_file))

    # use correlation parameters from above
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data))

     # pretty
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_pretty.png',
                   plot=rt_explore_plot_correlations(dataset=rt_pretty_dataset(credit_data)))

    # change base_size
    test_save_plot(file_name='data/rt_explore_plot_correlations_base_size.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data, base_size=16))

    # lower p_value_threshold
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_pvalue.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                   p_value_threshold=0.3))

    # lower p_value_threshold
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_corr_treshold.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                                                     corr_threshold=0.115))

    # lower p_value_threshold
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_both_parameters.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                                                     corr_threshold=0.115,
                                                     p_value_threshold=0.3))
})

test_that("rt_explore_correlations_credit_min_missing_nas_in_column", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # only include cols that have <= x% missing values
    max_missing_perc <- 0.05

    set.seed(42)
    rows_to_make_na_valid <- sample(nrow(credit_data), nrow(credit_data) * (max_missing_perc - 0.01))
    set.seed(43)
    rows_to_make_na_invalid <- sample(nrow(credit_data), nrow(credit_data) * (max_missing_perc + 0.01))

    # plots should include months_loan_duration and exclude age
    credit_data[rows_to_make_na_valid, 'months_loan_duration'] <- NA
    credit_data[rows_to_make_na_invalid, 'age'] <- NA

    #correlations <- rt_explore_correlations(dataset=credit_data, max_missing_column_perc=max_missing_perc)

    # use correlation parameters from above
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_2.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                                                     max_missing_column_perc=max_missing_perc))

    # lower p_value_threshold
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_pvalue_2.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                                                     max_missing_column_perc=max_missing_perc,
                                                     p_value_threshold=0.3))

    # lower p_value_threshold
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_corr_treshold_2.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                                                     max_missing_column_perc=max_missing_perc,
                                                     corr_threshold=0.115))

    # lower p_value_threshold
    test_save_plot(file_name='data/rt_explore_plot_correlations_credit_both_parameters_2.png',
                   plot=rt_explore_plot_correlations(dataset=credit_data,
                                                     max_missing_column_perc=max_missing_perc,
                                                     corr_threshold=0.115,
                                                     p_value_threshold=0.3))
})

test_that("rt_explore_value_totals", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # make sure all col names have spaces
    colnames(credit_data) <- test_helper__column_names(credit_data)

    ##########################################################################################################
    # test with factor
    # change the levels to verify that the original levels are retained if order_by_count==FALSE
    ##########################################################################################################
    custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
    credit_data$`Id Col` <- 1:nrow(credit_data)
    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    credit_data[2, 'Default Col'] <- NA
    credit_data[3, 'Id Col'] <- NA
    credit_data[4, 'Amount Col'] <- NA

    variable <- 'Checking Balance Col'
    second_variable <- 'Default Col'
    sum_by_variable <- 'Amount Col'
    count_distinct <- 'Id Col'

    # cannot use sum_by_variable and count_distict at the same time
    expect_error(rt_explore_value_totals(dataset=credit_data,
                                         variable=variable,
                                         sum_by_variable=sum_by_variable,
                                         count_distinct=count_distinct))
    ####
    # single var
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    actual_df <- rt_explore_value_totals(dataset=credit_data, variable=variable)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), 1000)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - non factor
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`) %>%
                                        rename(count = n)) %>% as.data.frame() %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        arrange(`Checking Balance Col`)
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    actual_df <- rt_explore_value_totals(dataset=credit_data %>%
                                             mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                         variable=variable)
    expect_false(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(c(sort(actual_df$`Checking Balance Col`), NA), actual_df$`Checking Balance Col`)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), 1000)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - count-distinct
    # ALL NAs - Should be counted as 1 for each group
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        mutate(`Id Col` = NA) %>%
                                        group_by(`Checking Balance Col`) %>%
                                        summarise(count = n_distinct(`Id Col`)) %>%
                                        as.data.frame())
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    actual_df <- rt_explore_value_totals(dataset=credit_data %>%
                                             mutate(`Id Col` = NA),
                                         variable=variable,
                                         count_distinct=count_distinct)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), 5)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - count-distinct
    #
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    actual_df <- rt_explore_value_totals(dataset=credit_data,
                                         variable=variable,
                                         count_distinct=count_distinct)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), 1000)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - count-distinct
    # set all `unknown`s to NA, so there should only be 1 distinct value for `unknown`, but the rest of the
    # dataset should be the same as above
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    num_unknowns <- expected_df[4, 'count']
    expected_df[4, 'count'] <- 1
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    temp <- credit_data %>% mutate(`Id Col` = ifelse(`Checking Balance Col` == 'unknown', NA, `Id Col`))
    actual_df <- rt_explore_value_totals(dataset=temp, variable=variable, count_distinct = count_distinct)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    # there is only 1 distinct unknown, so subtract all unknowns and add back in 1 for the 1 distinct
    expect_equal(sum(actual_df$count), 1000 - num_unknowns + 1)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - sum by var
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, wt=`Amount Col`) %>%
                                        rename(sum = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$sum / sum(expected_df$sum)
    actual_df <- rt_explore_value_totals(dataset=credit_data,
                                         variable=variable,
                                         sum_by_variable=sum_by_variable)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE))
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - multi-var
    # transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
    # while all other variable counts remain the same
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    num_less_zero <- expected_df[1, 'count']
    expected_df[1, 'count'] <-num_less_zero * 2
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- expected_df %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
        arrange(`Checking Balance Col`)

    # need to convert to a character, otherwise ifelse will convert to numeric factor value
    # then convert back to factor
    temp <- credit_data %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
                                                             paste(`Checking Balance Col`, '|', `Checking Balance Col`),
                                                             `Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         multi_value_delimiter=' \\| ')
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    # same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
    # take the unique values and sort them for the factor levels)
    expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    # we essentially duplicated the count for `< 0 DM`
    expect_equal(sum(actual_df$count), nrow(credit_data) + num_less_zero)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - multi-var - sum by
    # transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
    # while all other variable counts remain the same
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, wt=`Amount Col`) %>%
                                        rename(sum = n)) %>% as.data.frame()
    sum_amount_less_zero <- expected_df[1, 'sum']
    expected_df[1, 'sum'] <- sum_amount_less_zero * 2
    expected_df$percent <- expected_df$sum / sum(expected_df$sum)
    expected_df <- expected_df %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
        arrange(`Checking Balance Col`)

    # need to convert to a character, otherwise ifelse will convert to numeric factor value
    # then convert back to factor
    temp <- credit_data %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
                                         paste(`Checking Balance Col`, '|', `Checking Balance Col`),
                                         `Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         sum_by_variable=sum_by_variable,
                                         multi_value_delimiter=' \\| ')
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    # same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
    # take the unique values and sort them for the factor levels)
    expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))

    # we essentially duplicated the sum for `< 0 DM`
    expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE) + sum_amount_less_zero)
    expect_equal(sum(actual_df$percent), 1)

    ####
    # single var - multi-var - count distinct
    # transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
    # while all other variable counts remain the same
    # since we are duplicated "< 0 DM", but counting distinct Id, the counts should be the same as if we
    # did not duplicate
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- expected_df %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
        arrange(`Checking Balance Col`)

    # need to convert to a character, otherwise ifelse will convert to numeric factor value
    # then convert back to factor
    temp <- credit_data %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
                                         paste(`Checking Balance Col`, '|', `Checking Balance Col`),
                                         `Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         count_distinct=count_distinct,
                                         multi_value_delimiter=' \\| ')
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    # same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
    # take the unique values and sort them for the factor levels)
    expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    # we are counting distinct ids so we should have the same counts as original
    expect_equal(sum(actual_df$count), nrow(credit_data))
    expect_equal(sum(actual_df$percent), 1)

    get_group_percent_totals <- function(x) {
        suppressWarnings(x %>%
                             group_by(`Checking Balance Col`) %>%
                             summarise(group_percent_check = sum(group_percent))) %>%
            rt_get_vector('group_percent_check')
    }

    ####
    # double var
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`) %>%
                                        rename(count = n))
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- suppressWarnings(expected_df %>%
        group_by(`Checking Balance Col`) %>%
        mutate(group_percent = count / sum(count)) %>%
        ungroup()) %>% as.data.frame()
    actual_df <- rt_explore_value_totals(dataset=credit_data,
                                         variable=variable,
                                         second_variable=second_variable)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), nrow(credit_data))
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - non factor
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`) %>%
                                        rename(count = n))
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- suppressWarnings(expected_df %>%
                                        group_by(`Checking Balance Col`) %>%
                                        mutate(group_percent = count / sum(count)) %>%
                                        ungroup()) %>% as.data.frame() %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        arrange(`Checking Balance Col`)
    actual_df <- rt_explore_value_totals(dataset=credit_data %>%
                                             mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                         variable=variable,
                                         second_variable=second_variable)
    expect_false(is.factor(actual_df$`Checking Balance Col`))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), nrow(credit_data))
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - count-distinct
    # this should actually be the same things as not counting distinct, because NAs get lumped into 1
    # and there is only 1 NA for the id field, so it still gets counted as a single distinct
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- suppressWarnings(expected_df %>%
                                       group_by(`Checking Balance Col`) %>%
                                       mutate(group_percent = count / sum(count)) %>%
                                       ungroup()) %>% as.data.frame()
    actual_df <- rt_explore_value_totals(dataset=credit_data,
                                         variable=variable,
                                         second_variable=second_variable,
                                         count_distinct=count_distinct)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), nrow(credit_data))
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - count-distinct
    # set all `unknown`s to NA, so there should only be 1 distinct value for `unknown`, but the rest of the
    # dataset should be the same as above
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    num_unknowns <- expected_df[8, 'count'] + expected_df[9, 'count']
    expected_df[8, 'count'] <- 1
    expected_df[9, 'count'] <- 1
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- suppressWarnings(expected_df %>%
                                        group_by(`Checking Balance Col`) %>%
                                        mutate(group_percent = count / sum(count)) %>%
                                        ungroup()) %>% as.data.frame()
    temp <- credit_data %>% mutate(`Id Col` = ifelse(`Checking Balance Col` == 'unknown', NA, `Id Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         second_variable=second_variable,
                                         count_distinct=count_distinct)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))

    # we changed all of the ids corresponding to checking_balacne=unknown to NA, so count distinct
    # will count 1 in each group, so subtract number of unknowns and add back in the 2 distinct
    expect_equal(sum(actual_df$count), nrow(credit_data) - num_unknowns + 2)
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - sum by var
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`, wt=`Amount Col`) %>%
                                        rename(sum = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$sum / sum(expected_df$sum)
    expected_df <- suppressWarnings(expected_df %>%
                                        group_by(`Checking Balance Col`) %>%
                                        mutate(group_percent = sum / sum(sum)) %>%
                                        ungroup()) %>% as.data.frame()

    actual_df <- rt_explore_value_totals(dataset=credit_data,
                                         variable=variable,
                                         second_variable=second_variable,
                                         sum_by_variable=sum_by_variable)
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    expect_identical(levels(actual_df$`Checking Balance Col`), custom_levels)
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))

    expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE))
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - multi-var
    # transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
    # while all other variable counts remain the same
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    num_less_zero <- expected_df[1, 'count'] + expected_df[2, 'count']
    expected_df[1, 'count'] <- expected_df[1, 'count'] * 2
    expected_df[2, 'count'] <- expected_df[2, 'count'] * 2
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- expected_df %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
        arrange(`Checking Balance Col`)
    expected_df <- suppressWarnings(expected_df %>%
                                        group_by(`Checking Balance Col`) %>%
                                        mutate(group_percent = count / sum(count)) %>%
                                        ungroup()) %>% as.data.frame()

    # need to convert to a character, otherwise ifelse will convert to numeric factor value
    # then convert back to factor
    temp <- credit_data %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
                                         paste(`Checking Balance Col`, '|', `Checking Balance Col`),
                                         `Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         second_variable=second_variable,
                                         multi_value_delimiter=' \\| ')
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    # same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
    # take the unique values and sort them for the factor levels)
    expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), nrow(credit_data) + num_less_zero)
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - multi-var - sum by
    # transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
    # while all other variable counts remain the same
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`, wt=`Amount Col`) %>%
                                        rename(sum = n)) %>% as.data.frame()
    sum_less_zero <- expected_df[1, 'sum'] + expected_df[2, 'sum']
    expected_df[1, 'sum'] <- expected_df[1, 'sum'] * 2
    expected_df[2, 'sum'] <- expected_df[2, 'sum'] * 2
    expected_df$percent <- expected_df$sum / sum(expected_df$sum)
    expected_df <- expected_df %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
        arrange(`Checking Balance Col`)
    expected_df <- suppressWarnings(expected_df %>%
                                        group_by(`Checking Balance Col`) %>%
                                        mutate(group_percent = sum / sum(sum)) %>%
                                        ungroup()) %>% as.data.frame()

    # need to convert to a character, otherwise ifelse will convert to numeric factor value
    # then convert back to factor
    temp <- credit_data %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
                                         paste(`Checking Balance Col`, '|', `Checking Balance Col`),
                                         `Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         second_variable=second_variable,
                                         sum_by_variable=sum_by_variable,
                                         multi_value_delimiter=' \\| ')
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    # same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
    # take the unique values and sort them for the factor levels)
    expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$sum), sum(credit_data$`Amount Col`, na.rm = TRUE) + sum_less_zero)
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))

    ####
    # double var - multi-var - count distinct
    # transform "< 0 DM" to "< 0 DM | < 0 DM" so that we can parse " | " and "< 0 DM" should be duplicated
    # while all other variable counts remain the same
    # since we are duplicated "< 0 DM", but counting distinct Id, the counts should be the same as if we
    # did not duplicate
    ####
    expected_df <- suppressWarnings(credit_data %>%
                                        count(`Checking Balance Col`, `Default Col`) %>%
                                        rename(count = n)) %>% as.data.frame()
    expected_df$percent <- expected_df$count / sum(expected_df$count)
    expected_df <- expected_df %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`, levels = sort(custom_levels))) %>%
        arrange(`Checking Balance Col`)
    expected_df <- suppressWarnings(expected_df %>%
                                        group_by(`Checking Balance Col`) %>%
                                        mutate(group_percent = count / sum(count)) %>%
                                        ungroup()) %>% as.data.frame()

    # need to convert to a character, otherwise ifelse will convert to numeric factor value
    # then convert back to factor
    temp <- credit_data %>%
        mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == '< 0 DM',
                                         paste(`Checking Balance Col`, '|', `Checking Balance Col`),
                                         `Checking Balance Col`)) %>%
        mutate(`Checking Balance Col` = factor(`Checking Balance Col`))
    actual_df <- rt_explore_value_totals(dataset=temp,
                                         variable=variable,
                                         second_variable=second_variable,
                                         count_distinct=count_distinct,
                                         multi_value_delimiter=' \\| ')
    expect_true(is.factor(actual_df$`Checking Balance Col`))
    # same levels, but now should be sorted (since there won't necessarily be the same levels, so it will
    # take the unique values and sort them for the factor levels)
    expect_identical(levels(actual_df$`Checking Balance Col`), sort(custom_levels))
    expect_true(rt_are_dataframes_equal(expected_df, actual_df))
    expect_equal(sum(actual_df$count), nrow(credit_data))
    expect_equal(sum(actual_df$percent), 1)
    expect_true(rt_are_numerics_equal(get_group_percent_totals(actual_df), 1, num_decimals = 8))
})

test_that("rt_explore_value_totals__facet_strings", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=FALSE)
    # make sure all col names have spaces
    colnames(credit_data) <- test_helper__column_names(credit_data)

    ##########################################################################################################
    # test with factor
    # change the levels to verify that the original levels are retained if order_by_count==FALSE
    ##########################################################################################################
    custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
    credit_data$`Id Col` <- 1:nrow(credit_data)
    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    credit_data[2, 'Default Col'] <- NA
    credit_data[3, 'Id Col'] <- NA
    credit_data[4, 'Amount Col'] <- NA

    variable <- 'Checking Balance Col'
    second_variable <- 'Default Col'
    sum_by_variable <- 'Amount Col'
    count_distinct <- 'Id Col'

    # already have unit tests to check the non-facet numbers, so we only have to verify that
    # if we filter by facet variables, we should get the same values
    ##########################################################################################################
    # variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$count, 1)
    expect_equal(default_na$percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))
    ##########################################################################################################
    # comparison variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$count, 1)
    expect_equal(default_na$percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))

    ##########################################################################################################
    # sum_by variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                sum_by_variable = 'Amount Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$sum, 5951)
    expect_equal(default_na$percent, 1)
    expect_equal(default_na$group_percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                sum_by_variable = 'Amount Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                sum_by_variable = 'Amount Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))

    ##########################################################################################################
    # count_distinct variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                count_distinct = 'Id Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$count, 1)
    expect_equal(default_na$percent, 1)
    expect_equal(default_na$group_percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                count_distinct = 'Id Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                count_distinct = 'Id Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))
})

test_that("rt_explore_value_totals__facet_factors", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # make sure all col names have spaces
    colnames(credit_data) <- test_helper__column_names(credit_data)

    ##########################################################################################################
    # test with factor
    # change the levels to verify that the original levels are retained if order_by_count==FALSE
    ##########################################################################################################
    custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
    credit_data$`Id Col` <- 1:nrow(credit_data)
    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    credit_data[2, 'Default Col'] <- NA
    credit_data[3, 'Id Col'] <- NA
    credit_data[4, 'Amount Col'] <- NA

    variable <- 'Checking Balance Col'
    second_variable <- 'Default Col'
    sum_by_variable <- 'Amount Col'
    count_distinct <- 'Id Col'

    # already have unit tests to check the non-facet numbers, so we only have to verify that
    # if we filter by facet variables, we should get the same values
    ##########################################################################################################
    # variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$count, 1)
    expect_equal(default_na$percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))
    ##########################################################################################################
    # comparison variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$count, 1)
    expect_equal(default_na$percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col', second_variable = 'Purpose Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))

    ##########################################################################################################
    # sum_by variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                sum_by_variable = 'Amount Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$sum, 5951)
    expect_equal(default_na$percent, 1)
    expect_equal(default_na$group_percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                sum_by_variable = 'Amount Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                sum_by_variable = 'Amount Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))

    ##########################################################################################################
    # count_distinct variable
    ##########################################################################################################
    value_counts <- credit_data %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                count_distinct = 'Id Col',
                                facet_variable = 'Default Col')

    default_na <- value_counts %>% filter(`Default Col` == 'Default Col - NA')
    expect_equal(nrow(default_na), 1)
    expect_equal(default_na$count, 1)
    expect_equal(default_na$percent, 1)
    expect_equal(default_na$group_percent, 1)
    expect_equal(as.character(default_na$`Checking Balance Col`), "1 - 200 DM")

    expected <- credit_data %>%
        filter(`Default Col` == 'yes') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                count_distinct = 'Id Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - yes') %>%
                                            select(-`Default Col`)))

    expected <- credit_data %>%
        filter(`Default Col` == 'no') %>%
        rt_explore_value_totals(variable = 'Checking Balance Col',
                                second_variable = 'Purpose Col',
                                count_distinct = 'Id Col')
    expect_true(rt_are_dataframes_equal(expected,
                                        value_counts %>%
                                            filter(`Default Col` == 'Default Col - no') %>%
                                            select(-`Default Col`)))

})

test_that("rt_explore_value_totals - bug: sum_by_all_zeros", {
    # when using a second categoric variable and sum_by, and all the second categorical has a value of zero
    # for all sum-by values ina particular primary categorical value, then we try to divide by zero and get
    # NAN
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # make sure all col names have spaces
    colnames(credit_data) <- test_helper__column_names(credit_data)

    credit_data <- credit_data %>% mutate(`Amount Col` = ifelse(`Checking Balance Col` == '< 0 DM', 0, `Amount Col`))

    actual_counts <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Checking Balance Col',
                                             second_variable=NULL,
                                             count_distinct=NULL,
                                             sum_by_variable='Amount Col',
                                             multi_value_delimiter=NULL)

    expect_true(rt_are_dataframes_equal(actual_counts,
                                        credit_data %>%
                                            group_by(`Checking Balance Col`) %>%
                                            summarise(sum = sum(`Amount Col`),
                                                      percent = sum(`Amount Col`) / sum(credit_data$`Amount Col`))))

    actual_counts <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Checking Balance Col',
                                             second_variable='Default Col',
                                             count_distinct=NULL,
                                             sum_by_variable='Amount Col',
                                             multi_value_delimiter=NULL)
    expect_true(rt_are_dataframes_equal(actual_counts,
                                        credit_data %>%
                                            group_by(`Checking Balance Col`, `Default Col`) %>%
                                            summarise(sum = sum(`Amount Col`),
                                                      percent = sum(`Amount Col`) / sum(credit_data$`Amount Col`)) %>%
                                            mutate(group_percent = sum / sum(sum)) %>%
                                            ungroup()))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Checking Balance Col',
                                                     comparison_variable='Default Col',
                                                     sum_by_variable='Amount Col'))
    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by_stack.png',
                   plot=suppressWarnings(rt_explore_plot_value_totals(dataset=credit_data,
                                                                      variable='Checking Balance Col',
                                                                      comparison_variable='Default Col',
                                                                      sum_by_variable='Amount Col',
                                                                      view_type = 'Stack Percent')))

    actual_counts <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Checking Balance Col',
                                             second_variable='Default Col',
                                             count_distinct='Amount Col',
                                             #sum_by_variable='amount',
                                             multi_value_delimiter=NULL)
    expect_true(rt_are_dataframes_equal(actual_counts %>% select(`Checking Balance Col`, `Default Col`, count),
                                        credit_data %>%
                                            group_by(`Checking Balance Col`, `Default Col`) %>%
                                            summarise(count = n_distinct(`Amount Col`))))



    ##########################################################################################################
    # Try the same thing but if values in category have NA rather than 0
    ##########################################################################################################
    credit_data <- credit_data %>% mutate(`Amount Col` = ifelse(`Checking Balance Col` == '< 0 DM', NA, `Amount Col`))

    actual_counts <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Checking Balance Col',
                                             second_variable=NULL,
                                             count_distinct=NULL,
                                             sum_by_variable='Amount Col',
                                             multi_value_delimiter=NULL)

    expect_true(rt_are_dataframes_equal(actual_counts,
                                        credit_data %>%
                                            group_by(`Checking Balance Col`) %>%
                                            summarise(sum = sum(`Amount Col`, na.rm = TRUE),
                                                      percent = sum(`Amount Col`, na.rm = TRUE) / sum(credit_data$`Amount Col`, na.rm = TRUE))))

    actual_counts <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Checking Balance Col',
                                             second_variable='Default Col',
                                             count_distinct=NULL,
                                             sum_by_variable='Amount Col',
                                             multi_value_delimiter=NULL)
    expect_true(rt_are_dataframes_equal(actual_counts,
                                        credit_data %>%
                                            group_by(`Checking Balance Col`, `Default Col`) %>%
                                            summarise(sum = sum(`Amount Col`, na.rm = TRUE),
                                                      percent = sum(`Amount Col`, na.rm = TRUE) / sum(credit_data$`Amount Col`, na.rm = TRUE)) %>%
                                            mutate(group_percent = sum / sum(sum, na.rm = TRUE)) %>%
                                            ungroup()))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by_nas.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Checking Balance Col',
                                                     comparison_variable='Default Col',
                                                     sum_by_variable='Amount Col'))
    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_missing_sum_by_stack_nas.png',
                   plot=suppressWarnings(rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Checking Balance Col',
                                                     comparison_variable='Default Col',
                                                     sum_by_variable='Amount Col',
                                                     view_type = 'Stack Percent')))

    actual_counts <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Checking Balance Col',
                                             second_variable='Default Col',
                                             count_distinct='Amount Col',
                                             #sum_by_variable='Amount Col',
                                             multi_value_delimiter=NULL)
    expect_true(rt_are_dataframes_equal(actual_counts %>% select(`Checking Balance Col`, `Default Col`, count),
                                        credit_data %>%
                                            group_by(`Checking Balance Col`, `Default Col`) %>%
                                            summarise(count = n_distinct(`Amount Col`))))

})

test_that("rt_explore_plot_value_totals__distinct_variable", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # make sure all col names have spaces
    colnames(credit_data) <- test_helper__column_names(credit_data)

    ##########################################################################################################
    # test with factor
    # change the levels to verify that the original levels are retained if order_by_count==FALSE
    ##########################################################################################################
    custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)
    credit_data$`Id Col` <- 1:nrow(credit_data)
    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    credit_data[2, 'Default Col'] <- NA
    credit_data[3, 'Id Col'] <- NA
    credit_data[4, 'Amount Col'] <- NA

    variable <- 'Checking Balance Col'
    comparison_variable <- 'Default Col'
    sum_by_variable <- 'Amount Col'
    count_distinct <- 'Id Col'

    ##########################################################################################################
    # single variable
    ##########################################################################################################
    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              sum_by_variable=sum_by_variable,
                                              count_distinct_variable = count_distinct))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              count_distinct_variable=count_distinct,
                                              view_type="Confidence Interval"))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              count_distinct_variable=count_distinct,
                                              view_type="Confidence Interval - within Variable"))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=FALSE))

    temp <- credit_data %>% unite(cohort, `Age Col`, `Purpose Col`)
    # temp %>% group_by(checking_balance) %>% summarise(distinct_cohorts = n_distinct(cohort),
    #                                                   perc_distinct = distinct_cohorts / length(unique(temp$cohort)))
    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__purpose.png',
                   plot=rt_explore_plot_value_totals(dataset=temp,
                                                     variable=variable,
                                                     count_distinct_variable='cohort',
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct_order_by_count.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__char.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data %>%
                                                         mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                                     variable=variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     count_distinct_variable=count_distinct,
                                                     show_dual_axes=TRUE,
                                                     order_by_count=FALSE))

    ##########################################################################################################
    # comparison variable
    ##########################################################################################################
    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=comparison_variable,
                                              sum_by_variable=sum_by_variable,
                                              count_distinct_variable = count_distinct))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=comparison_variable,
                                              count_distinct_variable=count_distinct,
                                              view_type="Confidence Interval"))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=comparison_variable,
                                              count_distinct_variable=count_distinct,
                                              view_type="Confidence Interval - within Variable"))

    # credit_data %>%
    #     group_by(checking_balance, default) %>%
    #     summarise(distinct_cohorts = n_distinct(id)) %>%
    #     ungroup()
    # credit_data %>%
    #     group_by(checking_balance) %>%
    #     summarise(distinct_prim = n_distinct(id)) %>%
    #     ungroup()
    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     show_dual_axes=TRUE,
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=TRUE))

    temp <- credit_data %>% unite(cohort, `Age Col`, `Purpose Col`)
    # temp %>%
    #     group_by(checking_balance, default) %>%
    #     summarise(distinct_cohorts = n_distinct(cohort)) %>%
    #     ungroup()
    # temp %>%
    #     group_by(checking_balance) %>%
    #     summarise(distinct_prim = n_distinct(cohort)) %>%
    #     ungroup()
    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__purpose.png',
                   plot=rt_explore_plot_value_totals(dataset=temp,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable='cohort',
                                                     show_dual_axes = TRUE,
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__temp__distinct__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=temp,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=comparison_variable,
                                                     count_distinct_variable='cohort',
                                                     view_type="Bar",
                                                     order_by_count=FALSE))
    # temp %>%
    #     group_by(checking_balance, default, phone) %>%
    #     summarise(distinct_cohorts = n_distinct(cohort)) %>%
    #     ungroup()
    # temp %>%
    #     group_by(checking_balance, phone) %>%
    #     summarise(distinct_prim = n_distinct(cohort)) %>%
    #     ungroup()
    test_save_plot(file_name='data/rt_explore_plot_value_totals__temp__distinct__comp__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=temp,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable='Phone Col',
                                                     count_distinct_variable='cohort',
                                                     view_type="Bar",
                                                     order_by_count=FALSE))

    # credit_data %>%
    #     group_by(checking_balance, default) %>%
    #     summarise(distinct_cohorts = n_distinct(id)) %>%
    #     ungroup()
    # credit_data %>%
    #     group_by(checking_balance) %>%
    #     summarise(distinct_prim = n_distinct(id)) %>%
    #     ungroup()
    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp_order_by_count.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__char.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data %>%
                                                         mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     show_dual_axes=TRUE,
                                                     order_by_count=FALSE))

    # test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__facet.png',
    #                plot=rt_explore_plot_value_totals(dataset=credit_data,
    #                                                  variable=variable,
    #                                                  comparison_variable=comparison_variable,
    #                                                  count_distinct_variable=count_distinct,
    #                                                  view_type="Facet by Comparison",
    #                                                  order_by_count=FALSE))
    test_save_plot(file_name='data/rt_explore_plot_value_totals__distinct__comp__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=comparison_variable,
                                                     count_distinct_variable=count_distinct,
                                                     view_type="Bar",
                                                     order_by_count=FALSE))
})

test_that("rt_get_colors_from_values", {

    dataset <- diamonds
    custom_colors <- rt_colors()[1:5]

    ###################
    # TEST AS CHARACTER
    ####################
    returned_colors <- rt_get_colors_from_values(as.character(dataset[['cut']]))
    expect_identical(custom_colors, returned_colors)

    ###################
    # TEST AS FACTOR
    ####################
    returned_colors <- rt_get_colors_from_values(dataset[['cut']])
    expect_identical(custom_colors[c(1, 2, 5, 4, 3)], returned_colors)

    ##################################################
    # TEST WITH NA
    ##################################################
    dataset[1, 'cut'] <- NA
    ###################
    # TEST AS CHARACTER
    ####################
    returned_colors <- rt_get_colors_from_values(as.character(dataset[['cut']]))
    expect_identical(custom_colors, returned_colors)

    ###################
    # TEST AS FACTOR
    ####################
    returned_colors <- rt_get_colors_from_values(dataset[['cut']])
    expect_identical(custom_colors[c(1, 2, 5, 4, 3)], returned_colors)

    temp_dataset <- dataset
    # Ideal < Premium < Very Good < Good < Fair
    # "Fair"      "Good"      "Ideal"     "Premium"   "Very Good"
    expected_order <- c(3, 4, 5, 2, 1)
    temp_dataset$cut <- fct_infreq(temp_dataset$cut, ordered = TRUE)
    returned_colors <- rt_get_colors_from_values(temp_dataset[['cut']])
    expect_identical(custom_colors[expected_order], returned_colors)
})

test_that("rt_explore_plot_value_counts", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)
    variable <- 'Checking Balance Col'

    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA

    # plot without order
    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data %>%
                                                         mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                                      variable=variable,
                                                      order_by_count=FALSE,
                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data %>%
                                                         mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                                     variable=variable,
                                                     order_by_count=FALSE,
                                                     simple_mode=TRUE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__comparison__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data %>%
                                                         mutate(`Checking Balance Col` = as.character(`Checking Balance Col`)),
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     simple_mode=TRUE,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    # plot without order
    temp_dataset <- credit_data
    temp_dataset <- temp_dataset %>%
        mutate(`Checking Balance Col` = factor(as.character(`Checking Balance Col`),
                                         levels=c("< 0 DM", "1 - 200 DM", "> 200 DM", "unknown"),
                                         ordered = TRUE))
    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__factor.png',
                   plot=rt_explore_plot_value_totals(dataset=temp_dataset,
                                                     variable=variable,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    # plot without order
    temp_dataset$`Checking Balance Col` <- fct_infreq(temp_dataset$`Checking Balance Col`, ordered = TRUE)
    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__ordered.png',
                   plot=rt_explore_plot_value_totals(dataset=temp_dataset,
                                                     variable=variable,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_order__ordered2.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     order_by_count=TRUE,
                                                     base_size=11))

    t <- credit_data %>%
        mutate(`Checking Balance Col` = ifelse(`Checking Balance Col` == 'unknown',
                                                  NA,
                                                  as.character(`Checking Balance Col`)))
    # plot without order
    test_save_plot(file_name='data/rt_explore_plot_value_counts_nas.png',
                   plot=rt_explore_plot_value_totals(dataset=t,
                                                     variable=variable,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    # plot without order
    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_group_totals.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      show_variable_totals=FALSE,
                                                      base_size=11))

    # plot pretty
    test_save_plot(file_name='data/rt_explore_plot_value_counts_pretty.png',
                   plot=rt_explore_plot_value_totals(dataset=rt_pretty_dataset(credit_data),
                                                      variable=rt_pretty_text(variable),
                                                      order_by_count=FALSE,
                                                      base_size=11))

    # plot with order
    test_save_plot(file_name='data/rt_explore_plot_value_counts_with_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     order_by_count=TRUE,
                                                     base_size=11))

    ##########################################################################################################
    # test without factor
    ##########################################################################################################
    credit_data$`Checking Balance Col` <- as.character(credit_data$`Checking Balance Col`)

    # plot without order
    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_factor_no_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      order_by_count=FALSE,
                                                      base_size=11))

    # plot with order
    test_save_plot(file_name='data/rt_explore_plot_value_counts_no_factor_with_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      order_by_count=TRUE,
                                                      base_size=11))
})

test_that("rt_explore_plot_value_counts__facet", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    # make sure all col names have spaces
    colnames(credit_data) <- test_helper__column_names(credit_data)

    variable <- 'Checking Balance Col'
    comparison_variable <- 'Credit History Col'
    facet_variable <- 'Default Col'

    # make sure it handles NAs
    credit_data[1, variable] <- NA
    credit_data[2, comparison_variable] <- NA
    credit_data[3, facet_variable] <- NA

    test_save_plot(file_name='data/rt_explore_plot_value_totals__var__facet__order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     facet_variable=facet_variable,
                                                     order_by_count = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__var__facet__comp__order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     order_by_count = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__var__facet__comp__no_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     order_by_count = FALSE))


    # credit_data %>%
    #     count(checking_balance, default) %>%
    #     arrange(default, checking_balance) %>%
    #     group_by(default) %>%
    #     mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup()
    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     simple_mode=TRUE,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var__conf.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     view_type = 'Confidence Interval',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var__conf__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     simple_mode=TRUE,
                                                     view_type = 'Confidence Interval',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_sum__bar.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     sum_by_variable = 'Amount Col',
                                                     facet_variable=facet_variable,
                                                     view_type = 'Bar',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_sum__bar__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     sum_by_variable = 'Amount Col',
                                                     facet_variable=facet_variable,
                                                     simple_mode=TRUE,
                                                     view_type = 'Bar',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    # credit_data %>%
    #     count(checking_balance, default, wt=amount) %>%
    #     arrange(default, checking_balance) %>%
    #     group_by(default) %>%
    #     mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup()

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_unique__bar.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     #sum_by_variable = 'amount',
                                                     count_distinct_variable = 'Employment Duration Col',
                                                     facet_variable=facet_variable,
                                                     view_type = 'Bar',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_unique__bar__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     #sum_by_variable = 'amount',
                                                     count_distinct_variable = 'Employment Duration Col',
                                                     facet_variable=facet_variable,
                                                     view_type = 'Bar',
                                                     simple_mode=TRUE,
                                                     order_by_count=FALSE,
                                                     base_size=11))
    # credit_data %>%
    #     group_by(checking_balance, default) %>%
    #     summarise(n=n_distinct(employment_duration)) %>%
    #     ungroup() %>%
    #     arrange(default, checking_balance) %>%
    #     group_by(default) %>%
    #     mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup()

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp__simple.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     simple_mode=TRUE,
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_conf.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     view_type = 'Confidence Interval',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_conf2.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     view_type = 'Confidence Interval - within Variable',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_stack.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     view_type = 'Stack',
                                                     order_by_count=FALSE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_stack_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     view_type = 'Stack',
                                                     order_by_count=TRUE,
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_comp_stack_perc.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     view_type = 'Stack Percent',
                                                     order_by_count=FALSE,
                                                     base_size=11))
    # credit_data %>%
    #     count(checking_balance, credit_history, default) %>%
    #     arrange(default, checking_balance, credit_history) %>%
    #     group_by(default, checking_balance) %>%
    #     mutate(p=n/sum(n, na.rm = TRUE)) %>% ungroup() %>% as.data.frame()


    # test factor order
    temp_dataset <- credit_data
    temp_dataset <- temp_dataset %>%
        mutate(`Checking Balance Col` = factor(as.character(`Checking Balance Col`),
                                         levels=c("< 0 DM", "1 - 200 DM", "> 200 DM", "unknown"),
                                         ordered = TRUE),
               `Default Col` = factor(as.character(`Default Col`),
                                         levels=c("yes", "no"),
                                         ordered = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__facet_var_factors.png',
                   plot=rt_explore_plot_value_totals(dataset=temp_dataset,
                                                     variable=variable,
                                                     #comparison_variable=comparison_variable,
                                                     facet_variable=facet_variable,
                                                     order_by_count=FALSE,
                                                     base_size=11))
})

test_that("rt_explore_plot_value_counts: logical", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    credit_data[1, 'Default Col'] <- NA
    credit_data_logical <- credit_data %>%
        mutate(`Default Col` = ifelse(`Default Col` == 'yes', TRUE, FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__logical.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data_logical,
                                                     variable='Default Col',
                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__logical_comparison.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data_logical,
                                                     variable='Checking Balance Col',
                                                     comparison_variable='Default Col',
                                                     base_size=11))
})

test_that("rt_explore_plot_boxplot: logical", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    credit_data[1, 'Default Col'] <- NA
    credit_data_logical <- credit_data %>%
        mutate(`Default Col` = ifelse(`Default Col` == 'yes', TRUE, FALSE))

    test_save_plot(file_name='data/rt_explore_plot_boxplot__logical.png',
                   plot=rt_explore_plot_boxplot(dataset=credit_data_logical,
                                                variable='Amount Col',
                                                comparison_variable='Default Col'))

    test_save_plot(file_name='data/rt_explore_plot_boxplot__logical_color.png',
                   plot=rt_explore_plot_boxplot(dataset=credit_data_logical,
                                                variable='Amount Col',
                                                comparison_variable='Checking Balance Col',
                                                color_variable = 'Default Col'))
})

test_that("rt_explore_plot_scatter: logical", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    credit_data_logical <- credit_data %>%
        mutate(`Default Col` = ifelse(`Default Col` == 'yes', TRUE, FALSE))

    test_save_plot(file_name='data/rt_explore_plot_scatter__logical_size.png',
                   plot=rt_explore_plot_scatter(dataset=credit_data_logical,
                                                variable='Amount Col',
                                                comparison_variable='Months Loan Duration Col',
                                                size_variable = 'Default Col'))

    credit_data_logical[1, 'Default Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_scatter__logical.png',
                   plot=rt_explore_plot_scatter(dataset=credit_data_logical,
                                                variable='Amount Col',
                                                comparison_variable='Months Loan Duration Col'))

    test_save_plot(file_name='data/rt_explore_plot_scatter__logical_color.png',
                   plot=rt_explore_plot_scatter(dataset=credit_data_logical,
                                                variable='Amount Col',
                                                comparison_variable='Months Loan Duration Col',
                                                color_variable = 'Default Col'))
})

test_that("rt_explore_plot_value_counts_against_categorical", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    ##########################################################################################################
    # test with factor
    # change the levels to verify that the original levels are retained if order_by_count==FALSE
    ##########################################################################################################
    custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)

    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'

    # plot with labels
    test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_defaults.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      comparison_variable='Default Col',
                                                      order_by_count=TRUE,
                                                      show_variable_totals=TRUE,
                                                      show_comparison_totals=TRUE))

    # plot pretty
    test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_pretty.png',
                   plot=rt_explore_plot_value_totals(dataset=rt_pretty_dataset(credit_data),
                                                      variable=rt_pretty_text(variable),
                                                      comparison_variable=rt_pretty_text('Default Col'),
                                                      order_by_count=TRUE,
                                                      show_variable_totals=TRUE,
                                                      show_comparison_totals=TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_swapped.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Default Col',
                                                     comparison_variable=variable,
                                                     order_by_count=TRUE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE))


    # plot with labels
    test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_not_order_by_count.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      comparison_variable='Default Col',
                                                      order_by_count=FALSE,
                                                      show_variable_totals=TRUE,
                                                      show_comparison_totals=TRUE))

    # plot with labels
    test_save_plot(file_name='data/rt_explore_plot_value_counts_comp_var_not_show_group_totals.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      comparison_variable='Default Col',
                                                      order_by_count=FALSE,
                                                      show_variable_totals=FALSE,
                                                      show_comparison_totals=TRUE))

    # plot with labels
    test_save_plot(file_name='data/rt_explore_plot_value_counts_comp_var_not_show_comparison_totals.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      comparison_variable='Default Col',
                                                      order_by_count=FALSE,
                                                      show_variable_totals=FALSE,
                                                      show_comparison_totals=FALSE))

    ##########################################################################################################
    # ORDERED FACTORS
    ##########################################################################################################
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`,
                                                    levels=c("< 0 DM", "1 - 200 DM", "> 200 DM", "unknown"),
                                                    ordered=TRUE)
    credit_data$`Default Col` <- factor(credit_data$`Default Col`,
                                           levels=c("no", "yes"),
                                           ordered=TRUE)
    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack Percent",
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_rev.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack Percent",
                                                     order_by_count=FALSE,
                                                     reverse_stack=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_total.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_total_rev.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     order_by_count=FALSE,
                                                     reverse_stack=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_amount.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     sum_by_variable = 'Amount Col',
                                                     order_by_count=TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_amount_rev.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     sum_by_variable = 'Amount Col',
                                                     order_by_count=TRUE,
                                                     reverse_stack=FALSE))

    test_save_plot(file_name='data/value_counts__ordered_factor_stacked_amount_no_var_totals.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     sum_by_variable = 'Amount Col',
                                                     order_by_count=TRUE,
                                                     show_variable_totals=FALSE))

    test_save_plot(file_name='data/value_counts__ordered_factor_stacked_amount_no_comp_totals.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     sum_by_variable = 'Amount Col',
                                                     order_by_count=TRUE,
                                                     show_comparison_totals=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked_amount2.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack",
                                                     sum_by_variable = 'Amount Col',
                                                     order_by_count=TRUE,
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Confidence Interval",
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf2.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Confidence Interval - within Variable",
                                                     order_by_count=FALSE))

    # change the order of the secondary/comparison variable
    credit_data$`Default Col` <- factor(credit_data$`Default Col`,
                                           levels=c("yes", "no"),
                                           ordered=TRUE)
    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor__swapped_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     order_by_count=FALSE))
    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_stacked__swapped_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Stack Percent",
                                                     order_by_count=FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf__swapped_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Confidence Interval",
                                                     order_by_count=FALSE))


    test_save_plot(file_name='data/rt_explore_plot_value_counts__ordered_factor_conf2__swapped_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Default Col',
                                                     view_type="Confidence Interval - within Variable",
                                                     order_by_count=FALSE))
})

test_that("rt_explore_plot_value_totals__daul_axes", {

    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__cut__daul.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     show_dual_axes = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     comparison_variable='color',
                                                     show_dual_axes = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison__no_dual.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     comparison_variable='color',
                                                     show_dual_axes = FALSE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__sum_by_total__daul.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     sum_by_variable='price',
                                                     show_dual_axes = TRUE))
    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__sum_comparison__daul.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     comparison_variable='color',
                                                     sum_by_variable='price',
                                                     show_variable_totals=FALSE,
                                                     show_comparison_totals=FALSE,
                                                     show_dual_axes = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__sum_comparison__no_dual.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     comparison_variable='color',
                                                     sum_by_variable='price',
                                                     show_variable_totals=FALSE,
                                                     show_comparison_totals=FALSE,
                                                     show_dual_axes = FALSE))

    # STACK - should NOT show dual axes regardless if stacked (all percentages)
    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison_stacked__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     comparison_variable='color',
                                                     view_type="Stack Percent",
                                                     show_dual_axes = TRUE))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__diamonds__comparison_sum_stacked__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=diamonds,
                                                     variable='cut',
                                                     comparison_variable='color',
                                                     sum_by_variable='price',
                                                     view_type="Stack Percent",
                                                     show_dual_axes = TRUE))
})

test_that("rt_explore_plot_value_totals__conf_intervals", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)
    ##########################################################################################################
    # test with factor
    # change the levels to verify that the original levels are retained if order_by_count==FALSE
    ##########################################################################################################
    custom_levels <- c('< 0 DM', '1 - 200 DM', '> 200 DM', 'unknown')
    credit_data$`Checking Balance Col` <- factor(credit_data$`Checking Balance Col`, levels=custom_levels)

    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'
    comparison_variable <- 'Housing Col'
    sum_by_variable <- 'Amount Col'

    ##########################################################################################################
    # VARIABLE ONLY
    # Test c("Bar", "Confidence Interval")
    ##########################################################################################################
    test_save_plot(file_name='data/plot_value_totals__var__bar__dual.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=NULL,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__CI.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=NULL,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Confidence Interval",
                                                     show_dual_axes=FALSE))

    test_save_plot(file_name='data/plot_value_totals__var__CI__no_vals.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=NULL,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=FALSE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Confidence Interval",
                                                     show_dual_axes=FALSE))

    multi_value_credit_data <- credit_data %>%
        mutate(`Purpose Col` = case_when(
            `Purpose Col` == 'car' ~ 'car, car_test',
            `Purpose Col` == 'business' ~ 'business, business_test',
            TRUE ~ as.character(`Purpose Col`))) %>%
        mutate(`Purpose Col` = as.factor(`Purpose Col`))

    test_save_plot(file_name='data/plot_value_totals__conf__multi_value.png',
                   plot=rt_explore_plot_value_totals(dataset=multi_value_credit_data,
                                                     variable='Purpose Col',
                                                     comparison_variable = NULL,
                                                     view_type="Confidence Interval",
                                                     multi_value_delimiter=', '))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=NULL,
                                              sum_by_variable=NULL,
                                              order_by_count=FALSE,
                                              show_variable_totals=TRUE,
                                              show_comparison_totals=TRUE,
                                              view_type="Confidence Interval - within Variable",
                                              show_dual_axes=TRUE))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=NULL,
                                              sum_by_variable=NULL,
                                              order_by_count=FALSE,
                                              show_variable_totals=TRUE,
                                              show_comparison_totals=TRUE,
                                              view_type="Stack Percent",
                                              show_dual_axes=TRUE))
    ##########################################################################################################
    # VARIABLE, SUM_BY_VARIABLE
    # Test c("Bar")
    ##########################################################################################################
    test_save_plot(file_name='data/plot_value_totals__var__sum_by__bar.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=NULL,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=NULL,
                                              sum_by_variable=sum_by_variable,
                                              order_by_count=FALSE,
                                              show_variable_totals=TRUE,
                                              show_comparison_totals=TRUE,
                                              view_type="Confidence Interval",
                                              show_dual_axes=TRUE))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=NULL,
                                              sum_by_variable=sum_by_variable,
                                              order_by_count=FALSE,
                                              show_variable_totals=TRUE,
                                              show_comparison_totals=TRUE,
                                              view_type="Stack Percent",
                                              show_dual_axes=TRUE))

    ##########################################################################################################
    # VARIABLE, COMPARISON_VARIABLE
    # Test c("Bar", "Confidence Interval", "Facet by Comparison", "Confidence Interval - within Variable", "Stack")
    ##########################################################################################################
    test_save_plot(file_name='data/plot_value_totals__var__comp__bar.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__CI.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Confidence Interval",
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     facet_variable=comparison_variable,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     facet_variable=comparison_variable,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__CI_var.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Confidence Interval - within Variable",
                                                     show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__stack.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=NULL,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Stack Percent",
                                                     show_dual_axes=TRUE))

    ##########################################################################################################
    # VARIABLE, COMPARISON_VARIABLE, SUM_BY_VARIABLE
    # Test c("Bar", "Facet by Comparison", "Stack")
    ##########################################################################################################
    test_save_plot(file_name='data/plot_value_totals__var__comp__sum__bar.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=comparison_variable,
                                              sum_by_variable=sum_by_variable,
                                              order_by_count=FALSE,
                                              show_variable_totals=TRUE,
                                              show_comparison_totals=TRUE,
                                              view_type="Confidence Interval",
                                              show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__sum__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     facet_variable=comparison_variable,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Bar",
                                                     show_dual_axes=TRUE))

    expect_error(rt_explore_plot_value_totals(dataset=credit_data,
                                              variable=variable,
                                              comparison_variable=comparison_variable,
                                              sum_by_variable=sum_by_variable,
                                              order_by_count=FALSE,
                                              show_variable_totals=TRUE,
                                              show_comparison_totals=TRUE,
                                              view_type="Confidence Interval - within Variable",
                                              show_dual_axes=TRUE))

    test_save_plot(file_name='data/plot_value_totals__var__comp__sum__stack.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Stack Percent",
                                                     show_dual_axes=TRUE))
})

test_that("rt_explore_plot_value_counts_against_categorical_fill", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'

    # plot with labels
    test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_purpose_stack.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                      variable=variable,
                                                      comparison_variable='Purpose Col',
                                                      order_by_count=TRUE,
                                                      show_variable_totals=TRUE,
                                                      show_comparison_totals=TRUE,
                                                      view_type="Stack Percent"))

    test_save_plot(file_name='data/rt_explore_plot_value_counts_comparison_variable_purpose_stack_sum.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable='Purpose Col',
                                                     sum_by_variable='Amount Col',
                                                     order_by_count=TRUE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     view_type="Stack Percent"))
})

test_that("rt_explore_plot_value_totals_sums", {
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'
    sum_by_variable <- 'Amount Col'
    comparison_variable <- 'Default Col'

    test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_defaults.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     sum_by_variable=sum_by_variable))

    test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_non_defaults.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=NULL,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=FALSE,
                                                     show_comparison_totals=FALSE,
                                                     base_size=16))

    test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_comparison_defaults.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=TRUE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=TRUE,
                                                     base_size=14))

    test_save_plot(file_name='data/rt_explore_plot_value_totals_sums_comparison_defaults_no_order.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=sum_by_variable,
                                                     order_by_count=FALSE,
                                                     show_variable_totals=TRUE,
                                                     show_comparison_totals=FALSE,
                                                     base_size=14))


    ##########################################################################################################
    # Make sure it handles all NAs for the SUM_BY, or entire categories of NAs
    ##########################################################################################################

    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)
    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'
    sum_by_variable <- 'Amount Col'
    comparison_variable <- 'Credit History Col'
    facet_variable <- 'Default Col'

    credit_data$`Amount Col` <- NA

    results <- rt_explore_value_totals(dataset=credit_data,
                                       variable=variable,
                                       sum_by_variable=sum_by_variable)

    expect_identical(results$`Checking Balance Col` %>% rt_remove_val(NA), sort(unique(credit_data$`Checking Balance Col`)))
    expect_true(all(results$sum == 0))
    expect_true(all(results$percent == 0))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_nas__checking_balance.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     sum_by_variable=sum_by_variable))

    results <- rt_explore_value_totals(dataset=credit_data,
                                       variable=variable,
                                       second_variable=comparison_variable,
                                       sum_by_variable=sum_by_variable)

    expect_identical(sort(unique(results$`Checking Balance Col`)) %>% rt_remove_val(NA),
                          sort(unique(credit_data$`Checking Balance Col`)))
    expect_identical(sort(unique(results$`Credit History Col`)), sort(unique(credit_data$`Credit History Col`)))
    expect_true(all(results$sum == 0))
    expect_true(all(results$percent == 0))
    expect_true(all(results$group_percent == 0))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_nas__comparison.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     sum_by_variable=sum_by_variable))

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_nas__facet.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable = facet_variable,
                                                     sum_by_variable=sum_by_variable))

    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    # make sure it handles NAs
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'
    sum_by_variable <- 'Amount Col'
    comparison_variable <- 'Credit History Col'
    facet_variable <- 'Default Col'

    credit_data$`Amount Col` <- ifelse(credit_data$`Default Col` == 'yes', NA, credit_data$`Amount Col`)

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_facet_nas.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable = facet_variable,
                                                     sum_by_variable=sum_by_variable))


    # make sure it handles NAs
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'
    sum_by_variable <- 'Amount Col'
    comparison_variable <- 'Credit History Col'
    facet_variable <- 'Default Col'

    credit_data$`Amount Col` <- ifelse(credit_data$`Checking Balance Col` == 'unknown', NA, credit_data$`Amount Col`)

    #credit_data %>% count(checking_balance, credit_history, default, wt=amount) %>% arrange(default, checking_balance, credit_history) %>% View()
    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_variable_nas.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable = facet_variable,
                                                     sum_by_variable=sum_by_variable))

    # make sure it handles NAs
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)
    credit_data[1, 'Checking Balance Col'] <- NA
    variable <- 'Checking Balance Col'
    sum_by_variable <- 'Amount Col'
    comparison_variable <- 'Credit History Col'
    facet_variable <- 'Default Col'

    credit_data$`Amount Col` <- ifelse(credit_data$`Credit History Col` == 'good', NA, credit_data$`Amount Col`)

    test_save_plot(file_name='data/rt_explore_plot_value_totals__all_comparison_nas.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable=comparison_variable,
                                                     facet_variable = facet_variable,
                                                     sum_by_variable=sum_by_variable))

})

test_that("rt_explore_plot_value_totals_multivalue_column", {

    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    expected_totals <- rt_explore_value_totals(dataset=credit_data,
                                               variable='Purpose Col',
                                               multi_value_delimiter=NULL)
    expect_equal(sum(expected_totals$percent), 1)

    # first test with a delimiter when none of the cols are delimited
    found_totals <- rt_explore_value_totals(dataset=credit_data,
                                            variable='Purpose Col',
                                            multi_value_delimiter=', ')
    expect_true(rt_are_dataframes_equal(expected_totals, found_totals))

    expected_sum_by_variable <- credit_data %>%
        count(`Purpose Col`, wt = `Months Loan Duration Col`, sort = TRUE)
    expected_sum_by_variable <- expected_sum_by_variable %>%
        rename(sum = n) %>%
        mutate(percent = sum / sum(expected_sum_by_variable$n)) %>%
        arrange(`Purpose Col`) %>%
        as.data.frame()
    expect_equal(sum(expected_sum_by_variable$percent), 1)

    expect_true(rt_are_dataframes_equal(expected_sum_by_variable,
                                        rt_explore_value_totals(dataset=credit_data,
                                                                variable='Purpose Col',
                                                                sum_by_variable='Months Loan Duration Col',
                                                                multi_value_delimiter=', ')))

    credit_data <- credit_data %>%
        mutate(`Purpose Col` = case_when(
            `Purpose Col` == 'car' ~ 'car, car_test',
            `Purpose Col` == 'business' ~ 'business, business_test',
            TRUE ~ as.character(`Purpose Col`))) %>%
        arrange(`Purpose Col`) %>%
        mutate(`Purpose Col` = as.factor(`Purpose Col`))

    variable <- 'Purpose Col'
    comparison_variable <- NULL
    test_save_plot(file_name='data/rt_explore_plot_value_totals_purose_multivalue.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable=variable,
                                                     comparison_variable = NULL,
                                                     multi_value_delimiter=', '))
})

test_that("rt_explore_plot_value_totals_multivalue_bug", {
    # there is a bug where, because I had hardcoded the number of cols the underlying dataset would
    # separate to, any multi-value that had >2 values (e.g. a;b;c) would get lost (e.g. `c` wouldnt' get
    # counted)
    ##########################################################################################################
    # CREATE THE DATASET
    ##########################################################################################################
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    credit_data$`Id Col` <- 1:nrow(credit_data)
    credit_data$`Purpose Col`[1] <- NA
    credit_data$`Id Col`[2] <- NA
    credit_data$`Amount Col`[3] <- NA
    credit_data$`Purpose Col`[600] <- NA

    original_totals <- rt_explore_value_totals(dataset=credit_data,
                                               variable='Purpose Col',
                                               multi_value_delimiter=NULL)
    # original_sum_by <-  credit_data %>% count(purpose, wt=amount)
    # original_sum_by2 <-  credit_data %>% count(purpose, default, wt=amount)

    credit_data <- credit_data %>%
        mutate(`Purpose Col` = case_when(
            `Purpose Col` == 'car' ~ 'car, car_test',
            `Purpose Col` == 'business' ~ 'business, business_test2, business_test3, business_test4',
            TRUE ~ as.character(`Purpose Col`)))

    # set 2 rows to only 3 values for business, rather than 4
    credit_data$`Purpose Col`[30] <- 'business, business_test2, business_test3'
    credit_data$`Purpose Col`[31] <- 'business, business_test2, business_test3'
    credit_data$`Purpose Col` <- as.factor(credit_data$`Purpose Col`)

    ##########################################################################################################
    # CREATE THE EXPECTED TOTALS
    ##########################################################################################################
    car_count <- (original_totals %>% filter(`Purpose Col` == 'car'))$count
    business_count <- (original_totals %>% filter(`Purpose Col` == 'business'))$count

    expected_totals <- data.frame(`Purpose Col`=c('car_test', 'business_test2', 'business_test3', 'business_test4'),
                                  # subtract 2 from business_test4 because we changed index 30/31
                                  count=c(car_count, business_count, business_count, business_count - 2),
                                  stringsAsFactors = FALSE,
                                  check.names = FALSE) %>%
        bind_rows(original_totals %>% mutate(`Purpose Col` = as.character(`Purpose Col`))) %>%
        arrange(`Purpose Col`)
    expected_totals <- expected_totals %>% mutate(percent = count / sum(count))

    ##########################################################################################################
    # VALIDATE EXPECTED IS THE SAME AS ACTUAL
    ##########################################################################################################
    actual_totals <- rt_explore_value_totals(dataset=credit_data,
                                             variable='Purpose Col',
                                             multi_value_delimiter=', ')
    expect_true(rt_are_dataframes_equal(expected_totals, actual_totals))

    test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Purpose Col',
                                                     order_by_count=FALSE,
                                                     multi_value_delimiter=', '))

    ##########################################################################################################
    # TEST COMPARISON
    ##########################################################################################################
    #credit_data %>% count(purpose, default)
    test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values_comp.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Purpose Col',
                                                     comparison_variable='Default Col',
                                                     order_by_count=FALSE,
                                                     multi_value_delimiter=', '))

    ##########################################################################################################
    # TEST SUM-BY
    ##########################################################################################################
    #credit_data %>% count(purpose, default, wt=amount)
    test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values_sum.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Purpose Col',
                                                     comparison_variable='Default Col',
                                                     sum_by_variable = 'Amount Col',
                                                     order_by_count=FALSE,
                                                     multi_value_delimiter=', '),
                   size_inches = c(8, 20))


    ##########################################################################################################
    # TEST COUNT-DISTINCT
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_value_totals_purpose_multivalue_4_values_dis.png',
                   plot=rt_explore_plot_value_totals(dataset=credit_data,
                                                     variable='Purpose Col',
                                                     comparison_variable='Default Col',
                                                     #sum_by_variable = 'amount',
                                                     count_distinct_variable = 'Id Col',
                                                     order_by_count=FALSE,
                                                     multi_value_delimiter=', '))

})

test_that("rt_explore_plot_categoric_heatmap", {

    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(credit_data) <- test_helper__column_names(credit_data)

    credit_data$`Id Col` <- 1:nrow(credit_data)
    credit_data$`Purpose2 Col` <- credit_data$`Purpose Col`

    # table(credit_data$purpose, credit_data$purpose2)
    # credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Purpose2 Col'),
                   size_inches = c(4, 4))

    # table(credit_data$purpose, credit_data$purpose2)
    # credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables__no_percentages.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Purpose2 Col',
                                                          include_percentages = FALSE),
                   size_inches = c(4, 4))

    # table(fct_lump(flights$dest, n = 10), flights$origin) %>% t()
    # flights %>% ggplot(aes(x=origin)) + geom_bar()
    temp_df <- flights %>% mutate(dest=fct_lump(dest, n = 10))
    colnames(temp_df) <- test_helper__column_names(temp_df)

    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
                                                          x_variable='Origin Col',
                                                          y_variable='Dest Col'))

    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights2.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
                                                          x_variable='Dest Col',
                                                          y_variable='Origin Col'))

    #flights %>% mutate(dest=fct_lump(dest, n = 10)) %>% count(dest, origin, wt=dep_delay) %>% View()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights__sum_by.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
                                                          x_variable='Dest Col',
                                                          y_variable='Origin Col',
                                                          sum_by_variable = 'Dep Delay Col'))

    # flights %>%
    #     mutate(dest=fct_lump(dest, n = 10)) %>%
    #     group_by(dest, origin) %>%
    #     summarise(n=n_distinct(flight)) %>%
    #     View()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__flights__count_distinct.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=temp_df,
                                                          x_variable='Dest Col',
                                                          y_variable='Origin Col',
                                                          count_distinct_variable = 'Flight Col'))

    if(file.exists("Rplots.pdf")) {
        file.remove("Rplots.pdf")
    }
})

test_that("rt_explore_plot_categoric_heatmap_NAs", {

    # test with strings
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors = FALSE)
    credit_data$id <- 1:nrow(credit_data)

    credit_data[1, 'purpose'] <- NA
    credit_data[2, 'purpose'] <- NA
    credit_data[2, 'purpose2'] <- NA
    credit_data[2, 'credit_history'] <- NA
    credit_data[4, 'id'] <- NA
    credit_data$purpose2 <- credit_data$purpose

    colnames(credit_data) <- test_helper__column_names(credit_data)

    # table(credit_data$purpose, credit_data$purpose2)
    # credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables_NAs.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Purpose2 Col'),
                   size_inches = c(4, 4))

    #credit_data %>% group_by(purpose, credit_history) %>% summarise(sum=sum(amount)) %>% View()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Credit History Col',
                                                          sum_by_variable = 'Amount Col'))

    # credit_data[5, c('purpose', 'credit_history', 'amount')]
    # car | poor | 4870
    credit_data[5, 'Amount Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA2.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Credit History Col',
                                                          sum_by_variable = 'Amount Col'))

    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__count_distinct__NA.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Credit History Col',
                                                          count_distinct_variable = 'Id Col'))


    # test with factors
    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors = TRUE)
    credit_data$id <- 1:nrow(credit_data)

    credit_data[1, 'purpose'] <- NA
    credit_data[2, 'purpose'] <- NA
    credit_data[2, 'purpose2'] <- NA
    credit_data[2, 'credit_history'] <- NA
    credit_data[4, 'id'] <- NA
    credit_data$purpose2 <- credit_data$purpose

    colnames(credit_data) <- test_helper__column_names(credit_data)

    # table(credit_data$purpose, credit_data$purpose2)
    # credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__same_variables_NAs_factor.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Purpose2 Col'),
                   size_inches = c(4, 4))

    #credit_data %>% group_by(purpose, credit_history) %>% summarise(sum=sum(amount)) %>% View()
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA_factor.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Credit History Col',
                                                          sum_by_variable = 'Amount Col'))

    # credit_data[5, c('purpose', 'credit_history', 'amount')]
    # car | poor | 4870
    credit_data[5, 'Amount Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__sum_by__NA2_factor.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Credit History Col',
                                                          sum_by_variable = 'Amount Col'))

    test_save_plot(file_name='data/rt_explore_plot_categoric_heatmap__count_distinct__NA_factor.png',
                   plot=rt_explore_plot_categoric_heatmap(dataset=credit_data,
                                                          x_variable='Purpose Col',
                                                          y_variable='Credit History Col',
                                                          count_distinct_variable = 'Id Col'))

    if(file.exists("Rplots.pdf")) {
        file.remove("Rplots.pdf")
    }
})

test_that("rt_explore_plot_numeric_heatmap", {

    credit_data <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    credit_data[1, 'age'] <- NA
    credit_data[2, 'age'] <- NA
    credit_data[2, 'amount'] <- NA
    credit_data[3, 'amount'] <- NA

    colnames(credit_data) <- test_helper__column_names(credit_data)

    # table(credit_data$purpose, credit_data$purpose2)
    # credit_data %>% ggplot(aes(x=purpose)) + geom_bar()
    test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap.png',
                   plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
                                                        x_variable = 'Age Col',
                                                        y_variable = 'Amount Col'),
                   size_inches = c(4, 4))

    test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__n_cut_10.png',
                   plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
                                                        x_variable = 'Age Col',
                                                        y_variable = 'Amount Col',
                                                        n_cuts = 10),
                   size_inches = c(4, 4))


    x_cut_sequence <- c(18, 21, 25, 30, 50, 100)
    y_cut_sequence <- c(100, 500, 1000, 2500, 5000, 10000, 100000)

    test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__custom_x_cut.png',
                   plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
                                                        x_variable = 'Age Col',
                                                        y_variable = 'Amount Col',
                                                        x_cut_sequence = x_cut_sequence),
                   size_inches = c(4, 4))

    test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__custom_y_cut.png',
                   plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
                                                        x_variable = 'Age Col',
                                                        y_variable = 'Amount Col',
                                                        y_cut_sequence = y_cut_sequence),
                   size_inches = c(4, 4))

    test_save_plot(file_name='data/rt_explore_plot_numeric_heatmap__custom_x_y_cut.png',
                   plot=rt_explore_plot_numeric_heatmap(dataset = credit_data,
                                                        x_variable = 'Age Col',
                                                        y_variable = 'Amount Col',
                                                        x_cut_sequence = x_cut_sequence,
                                                        y_cut_sequence = y_cut_sequence),
                   size_inches = c(4, 4))

    if(file.exists("Rplots.pdf")) {
        file.remove("Rplots.pdf")
    }
})

test_that("rt_explore_plot_boxplot", {
    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Months Loan Duration Col'
    comparison_variable <- 'Default Col'

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=NULL,
                                                  y_zoom_min=NULL,
                                                  y_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard_zoom_min.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=NULL,
                                                  y_zoom_min=20,
                                                  y_zoom_max=NULL,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard_zoom_max.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=NULL,
                                                  y_zoom_min=NULL,
                                                  y_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard_zoom_both.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=NULL,
                                                  y_zoom_min=20,
                                                  y_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__log_scale_y.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=NULL,
                                                  log_scale_y=TRUE,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  y_zoom_min=NULL,
                                                  y_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison__simple.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                simple_mode=TRUE,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__log_scale_y__comp.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  log_scale_y=TRUE,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison2.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__defualt.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__log_scale_y__color.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                log_scale_y=TRUE,
                                                base_size=11))

    # simple should have no effect
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__defualt__simple.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                simple_mode=TRUE,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset <- dataset
    temp_dataset[1, 'Default Col'] <- NA

    test_save_plot(file_name='data/rt_explore_plot_boxplot__default__NAs.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Default Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))


    test_save_plot(file_name='data/rt_explore_plot_boxplot__default__NAs__simple.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Default Col',
                                                simple_mode = TRUE,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset[1, 'Checking Balance Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs2.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset[2, 'Checking Balance Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs3.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset[3, 'Default Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs4.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_pretty.png',
                   plot=rt_explore_plot_boxplot(dataset=rt_pretty_dataset(dataset),
                                                variable=rt_pretty_text(variable),
                                                comparison_variable=rt_pretty_text(comparison_variable),
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_min.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  y_zoom_min=20,
                                                  y_zoom_max=NA,  # Check NA
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_max.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  y_zoom_min=NA,  # Check NA
                                                  y_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_both.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  y_zoom_min=20,
                                                  y_zoom_max=40,
                                                  base_size=15))
})

test_that("rt_explore_plot_boxplot_facet", {
    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Months Loan Duration Col'
    comparison_variable <- 'Default Col'
    facet_variable <- 'Phone Col'

    expect_error(rt_explore_plot_boxplot(dataset=dataset,
                                         variable=variable,
                                         comparison_variable=NULL,
                                         facet_variable=facet_variable,
                                         y_zoom_min=NULL,
                                         y_zoom_max=NULL,
                                         base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_facet__simple.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                facet_variable=facet_variable,
                                                simple_mode=TRUE,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison2_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__default_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset <- dataset
    temp_dataset[1, 'Default Col'] <- NA
    temp_dataset[2, facet_variable] <- NA

    test_save_plot(file_name='data/rt_explore_plot_boxplot__NAs_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Default Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot__NAs_facet__simple.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Default Col',
                                                facet_variable=facet_variable,
                                                simple_mode=TRUE,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset[1, 'Checking Balance Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs2_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset[2, 'Checking Balance Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs3_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset[3, 'Default Col'] <- NA
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs4_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='Checking Balance Col',
                                                color_variable='Default Col',
                                                facet_variable=facet_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_pretty_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=rt_pretty_dataset(dataset),
                                                variable=rt_pretty_text(variable),
                                                comparison_variable=rt_pretty_text(comparison_variable),
                                                facet_variable=rt_pretty_text(facet_variable),
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_min_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                facet_variable=facet_variable,
                                                y_zoom_min=20,
                                                y_zoom_max=NA,  # Check NA
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_max_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                facet_variable=facet_variable,
                                                y_zoom_min=NA,  # Check NA
                                                y_zoom_max=40,
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison_zoom_both_facet.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                facet_variable=facet_variable,
                                                y_zoom_min=20,
                                                y_zoom_max=40,
                                                base_size=15))
})

test_that("rt_explore_plot_boxplot - NA numeric values", {
    # there's a bug where the count of the records (below the median line) shows the number of total records
    # in the group, but it should show the total number of non-na values for which the boxplot is based on
    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    variable <- 'amount'
    comparison_variable <- 'checking_balance'
    color_variable <- 'default'

    dataset <- dataset %>% mutate(amount = ifelse(checking_balance == '< 0 DM', NA, amount))

    set.seed(42)
    dataset$amount[sample(x=1:1000, size=300)] <- NA
    #summary(dataset$amount)
    # nrow(dataset) - sum(is.na(dataset$amount))
    # mean(dataset$amount, na.rm = TRUE)
    # median(dataset$amount, na.rm = TRUE)
    # dataset %>%
    #     group_by(checking_balance) %>%
    #     summarise(med=median(amount, na.rm = TRUE),
    #               cnt_non_na=sum(!is.na(amount)),
    #               cnt=n())
    test_save_plot(file_name='data/rt_explore_plot_boxplot_standard__num_nas.png',
                   plot=suppressWarnings(rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=NULL,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11)))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_comparison__num_nas.png',
                   plot=suppressWarnings(rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11)))

    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__defualt__num_nas.png',
                   plot=rt_explore_plot_boxplot(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                color_variable='default',
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
    temp_dataset <- dataset
    temp_dataset[6, 'default'] <- NA
    # temp_dataset %>%
    #     group_by(checking_balance, default) %>%
    #     summarise(med=median(amount, na.rm = TRUE),
    #               cnt_non_na=sum(!is.na(amount)),
    #               cnt=n())
    test_save_plot(file_name='data/rt_explore_plot_boxplot_color__NAs__num_nas.png',
                   plot=rt_explore_plot_boxplot(dataset=temp_dataset,
                                                variable=variable,
                                                comparison_variable='checking_balance',
                                                color_variable=color_variable,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))
})

test_that("rt_explore_plot_histogram", {
    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Months Loan Duration Col'

    test_save_plot(file_name='data/rt_explore_plot_histogram_standard.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  num_bins=30,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram_pretty.png',
                   plot=rt_explore_plot_histogram(dataset=rt_pretty_dataset(dataset),
                                                  variable=rt_pretty_text(variable),
                                                  num_bins=30,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram_num_bins.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  num_bins=15,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_min.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  num_bins=30,
                                                  x_zoom_min=20,
                                                  x_zoom_max=NULL,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_max.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  num_bins=30,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_both.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  num_bins=30,
                                                  x_zoom_min=20,
                                                  x_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram_standard_zoom_both_num_bins.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  num_bins=15,
                                                  x_zoom_min=20,
                                                  x_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram__log_scale_x__comp.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable="Amount Col", comparison_variable = 'Default Col',
                                                  num_bins=15,
                                                  log_scale_x=TRUE,
                                                  base_size=11))

    dataset[1, "Default Col"] <- NA
    test_save_plot(file_name='data/rt_explore_plot_histogram__log_scale_x.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable="Amount Col",
                                                  num_bins=15,
                                                  log_scale_x=TRUE,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram__log_scale_x__comp__NA.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable="Amount Col", comparison_variable = 'Default Col',
                                                  num_bins=15,
                                                  log_scale_x=TRUE,
                                                  base_size=11))
})

test_that("rt_explore_plot_histogram_with_categoric_comparison", {
    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Months Loan Duration Col'
    comparison_variable <- 'Checking Balance Col'

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_standard.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  num_bins=30,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_pretty.png',
                   plot=rt_explore_plot_histogram(dataset=rt_pretty_dataset(dataset),
                                                  variable=rt_pretty_text(variable),
                                                  comparison_variable=rt_pretty_text(comparison_variable),
                                                  num_bins=30,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_num_bins.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  num_bins=15,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_density.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  #density = TRUE,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=NULL,
                                                  base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_min.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  num_bins=30,
                                                  x_zoom_min=20,
                                                  x_zoom_max=NULL,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_max.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  num_bins=30,
                                                  x_zoom_min=NULL,
                                                  x_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_both.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  num_bins=30,
                                                  x_zoom_min=20,
                                                  x_zoom_max=40,
                                                  base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_histogram_with_comp_zoom_both_num_bins.png',
                   plot=rt_explore_plot_histogram(dataset=dataset,
                                                  variable=variable,
                                                  comparison_variable=comparison_variable,
                                                  num_bins=15,
                                                  x_zoom_min=20,
                                                  x_zoom_max=40,
                                                  base_size=15))

})

test_that("rt_explore_plot_scatterplot", {
    dataset <- read.csv("data/housing.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Median House Value Col'
    comparison_variable <- 'Median Income Col'

    test_save_plot(file_name='data/rt_explore_plot_scatter.png',
                    plot=rt_explore_plot_scatter(dataset=dataset,
                                                 variable=variable,
                                                 comparison_variable=comparison_variable,
                                                 alpha=0.3,
                                                 x_zoom_min=NULL,
                                                 x_zoom_max=NULL,
                                                 y_zoom_min=NULL,
                                                 y_zoom_max=NULL,
                                                 base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_swap.png',
                    plot=rt_explore_plot_scatter(dataset=dataset,
                                                 variable=comparison_variable,
                                                 comparison_variable=variable,
                                                 alpha=0.3,
                                                 x_zoom_min=NULL,
                                                 x_zoom_max=NULL,
                                                 y_zoom_min=NULL,
                                                 y_zoom_max=NULL,
                                                 base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_pretty.png',
                   plot=rt_explore_plot_scatter(dataset=rt_pretty_dataset(dataset),
                                                variable=rt_pretty_text(variable),
                                                comparison_variable=rt_pretty_text(comparison_variable),
                                                alpha=0.3,
                                                x_zoom_min=NULL,
                                                x_zoom_max=NULL,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_alpha.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                x_zoom_min=NULL,
                                                x_zoom_max=NULL,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_scatter_x_zoom_min.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=5,
                                                x_zoom_max=NULL,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_x_zoom_max.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=NULL,
                                                x_zoom_max=10,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_x_zoom_both.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=5,
                                                x_zoom_max=10,
                                                y_zoom_min=NULL,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_y_zoom_min.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=NULL,
                                                x_zoom_max=NULL,
                                                y_zoom_min=200000,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_y_zoom_max.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=NULL,
                                                x_zoom_max=NULL,
                                                y_zoom_min=NULL,
                                                y_zoom_max=300000,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_y_zoom_both.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=NULL,
                                                x_zoom_max=NULL,
                                                y_zoom_min=200000,
                                                y_zoom_max=300000,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_zoom_min_both.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.3,
                                                x_zoom_min=10,
                                                x_zoom_max=NULL,
                                                y_zoom_min=200000,
                                                y_zoom_max=NULL,
                                                base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_all.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                x_zoom_min=5,
                                                x_zoom_max=10,
                                                y_zoom_min=200000,
                                                y_zoom_max=300000,
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_scatter_log_scale_x.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                log_scale_x = TRUE,
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_scatter_log_scale_y.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                log_scale_y = TRUE,
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_scatter_log_scale_xy.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                log_scale_x = TRUE,
                                                log_scale_y = TRUE,
                                                base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_scatter_all_log.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                x_zoom_min=5,
                                                x_zoom_max=10,
                                                y_zoom_min=200000,
                                                y_zoom_max=300000,
                                                log_scale_x = TRUE,
                                                log_scale_y = TRUE,
                                                base_size=15))

})

test_that('rt_explore_plot_scatterplot_size_color', {
    dataset <- read.csv("data/housing.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Median House Value Col'
    comparison_variable <- 'Median Income Col'

    test_save_plot(file_name='data/rt_explore_plot_scatter_color.png',
                   rt_explore_plot_scatter(dataset=dataset,
                                           variable=variable,
                                           comparison_variable=comparison_variable,
                                           color_variable = 'Ocean Proximity Col',
                                           alpha=0.3,
                                           x_zoom_min=NULL,
                                           x_zoom_max=NULL,
                                           y_zoom_min=NULL,
                                           y_zoom_max=NULL,
                                           base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_size.png',
                   rt_explore_plot_scatter(dataset=dataset,
                                           variable=variable,
                                           comparison_variable=comparison_variable,
                                           size_variable = 'Housing Median Age Col',
                                           alpha=0.3,
                                           x_zoom_min=NULL,
                                           x_zoom_max=NULL,
                                           y_zoom_min=NULL,
                                           y_zoom_max=NULL,
                                           base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_size__categoric.png',
                   rt_explore_plot_scatter(dataset=dataset,
                                           variable=variable,
                                           comparison_variable=comparison_variable,
                                           size_variable = 'Ocean Proximity Col',
                                           alpha=0.3,
                                           x_zoom_min=NULL,
                                           x_zoom_max=NULL,
                                           y_zoom_min=NULL,
                                           y_zoom_max=NULL,
                                           base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_size_color_numeric.png',
                   rt_explore_plot_scatter(dataset=dataset,
                                           variable=variable,
                                           comparison_variable=comparison_variable,
                                           color_variable = 'Total Rooms Col',
                                           size_variable = 'Housing Median Age Col',
                                           alpha=0.3,
                                           x_zoom_min=NULL,
                                           x_zoom_max=NULL,
                                           y_zoom_min=NULL,
                                           y_zoom_max=NULL,
                                           base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_scatter_size_color_categoric.png',
                   rt_explore_plot_scatter(dataset=dataset,
                                           variable=variable,
                                           comparison_variable=comparison_variable,
                                           color_variable = 'Ocean Proximity Col',
                                           size_variable = 'Housing Median Age Col',
                                           alpha=0.3,
                                           x_zoom_min=NULL,
                                           x_zoom_max=NULL,
                                           y_zoom_min=NULL,
                                           y_zoom_max=NULL,
                                           base_size=11))
})

test_that("rt_explore_plot_scatterplot_jitter", {
    dataset <- iris
    variable <- 'Sepal.Length'
    comparison_variable <- 'Sepal.Length'

    test_save_plot(file_name='data/rt_explore_plot_scatter_jitter.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                alpha=0.1,
                                                jitter=TRUE,
                                                base_size=15))
})

test_that("rt_explore_plot_aggregate_2_numerics", {
    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    # make sure it handles NAs
    dataset[1, 'Months Loan Duration Col'] <- NA
    variable <- 'Amount Col'
    comparison_variable <- 'Months Loan Duration Col'

    aggregation_function <- rt_geometric_mean
    aggregation_function_name <- "Geometric Mean"

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__boxplot_0_min.png',
                   plot=suppressWarnings(rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=NULL,
                                                             aggregation_function_name=NULL,
                                                             aggregation_count_minimum=0, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=TRUE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=NULL,
                                                             x_zoom_max=NULL,
                                                             y_zoom_min=NULL,
                                                             y_zoom_max=NULL,
                                                             base_size=11)))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__boxplot_30_min.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=NULL,
                                                             aggregation_function_name=NULL,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=TRUE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=NULL,
                                                             x_zoom_max=NULL,
                                                             y_zoom_min=NULL,
                                                             y_zoom_max=NULL,
                                                             base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__geometric_mean.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=TRUE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=NULL,
                                                             x_zoom_max=NULL,
                                                             y_zoom_min=NULL,
                                                             y_zoom_max=NULL,
                                                             base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__geometric_mean__2.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=FALSE,
                                                             show_points=FALSE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=10,
                                                             x_zoom_max=40,
                                                             y_zoom_min=1900,
                                                             y_zoom_max=5000,
                                                             base_size=11))


    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__scale_log_x.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=FALSE,
                                                             show_points=FALSE,
                                                             show_labels=TRUE,
                                                             log_scale_x=TRUE,
                                                             base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__scale_log_y.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=FALSE,
                                                             show_points=FALSE,
                                                             show_labels=TRUE,
                                                             log_scale_y=TRUE,
                                                             base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__scale_log_xy.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=FALSE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             log_scale_x=TRUE,
                                                             log_scale_y=TRUE,
                                                             base_size=11))

    variable <- 'Months Loan Duration Col'
    comparison_variable <- 'Existing Loans Count Col'

    aggregation_function <- function(values) {
        return (mean(values, na.rm = TRUE))
    }
    aggregation_function_name <- "Mean"

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__boxplot_0_min.png',
                   plot=suppressWarnings(rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=NULL,
                                                             aggregation_function_name=NULL,
                                                             aggregation_count_minimum=0, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=TRUE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=NULL,
                                                             x_zoom_max=NULL,
                                                             y_zoom_min=NULL,
                                                             y_zoom_max=NULL,
                                                             base_size=11)))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__boxplot_30_min.png',
                   plot=suppressWarnings(rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=NULL,
                                                             aggregation_function_name=NULL,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=TRUE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=NULL,
                                                             x_zoom_max=NULL,
                                                             y_zoom_min=NULL,
                                                             y_zoom_max=NULL,
                                                             base_size=11)))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__mean.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=TRUE,
                                                             show_points=TRUE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=NULL,
                                                             x_zoom_max=NULL,
                                                             y_zoom_min=NULL,
                                                             y_zoom_max=NULL,
                                                             base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_aggregate_2_numerics__loan_count__mean__2.png',
                   plot=rt_explore_plot_aggregate_2_numerics(dataset=dataset,
                                                             variable=variable,
                                                             comparison_variable=comparison_variable,
                                                             aggregation_function=aggregation_function,
                                                             aggregation_function_name=aggregation_function_name,
                                                             aggregation_count_minimum=30, # need at least 30 samples, otherwise when we bootstrap resample e.g. with a group that has 1 sample we'd pull e.g. 100 random samples of the same value
                                                             show_resampled_confidence_interval=FALSE,
                                                             show_points=FALSE,
                                                             show_labels=TRUE,
                                                             x_zoom_min=-1,
                                                             x_zoom_max=3,
                                                             y_zoom_min=4,
                                                             y_zoom_max=25,
                                                             base_size=11))

})

test_that("rt_explore_plot_categoric_numeric_aggregation", {

    dataset <- read.csv("data/credit.csv", header=TRUE, stringsAsFactors=TRUE)
    colnames(dataset) <- test_helper__column_names(dataset)

    categoric_variable <- 'Checking Balance Col'
    numeric_variable <- 'Amount Col'

    # make sure it handles NAs
    dataset[1, categoric_variable] <- NA
    dataset[1, numeric_variable] <- NA
    dataset[2, categoric_variable] <- NA
    dataset[3, numeric_variable] <- NA

    color_variable <- NULL
    facet_variable <- NULL

    ##########################################################################################################
    # Categoric/Numeric - No Color or Facet
    ##########################################################################################################
    #dataset %>% count(checking_balance, wt=amount) %>% mutate(p=n/sum(n))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total_s.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      simple_mode=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean_s.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      simple_mode=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec_s.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      simple_mode=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median_s.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      simple_mode=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot_s.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      simple_mode=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    ##########################################################################################################
    # Categoric/Numeric - Color
    ##########################################################################################################
    color_variable <- 'Default Col'
    facet_variable <- NULL

    #dataset %>% count(checking_balance, default, wt=amount) %>% mutate(p=n/sum(n))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=FALSE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__color_s.png',
                  plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                     categoric_variable=categoric_variable,
                                                                     numeric_variable=numeric_variable,
                                                                     aggregation_type='Mean',
                                                                     color_variable=color_variable,
                                                                     facet_variable=facet_variable,
                                                                     show_variable_totals=TRUE,
                                                                     show_comparison_totals=TRUE,
                                                                     simple_mode=TRUE,
                                                                     base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, default) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    ##########################################################################################################
    # Categoric/Numeric - Facet
    ##########################################################################################################
    facet_variable <- 'Default Col'
    color_variable <- NULL

    #dataset %>% count(checking_balance, default, wt=amount) %>% mutate(p=n/sum(n))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=FALSE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, default) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__facet.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave_per_rec__facet2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, default) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    ##########################################################################################################
    # Categoric/Numeric - Color/Facet
    ##########################################################################################################
    facet_variable <- 'Default Col'
    color_variable <- 'Phone Col'

    #dataset %>% count(checking_balance, phone, default, wt=amount) %>% mutate(p=n/sum(n))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet_color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__total__facet_color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Total',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=FALSE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, phone, default) %>% summarise(n=sum(!is.na(amount)), m=mean(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet_color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__mean__facet_color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Mean',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, phone, default) %>% summarise(n=n(), m=sum(amount, na.rm=TRUE) / n())
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave__facet_color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__ave__facet_color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Average Value Per Record',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    #dataset %>% group_by(checking_balance, phone, default) %>% summarise(n=sum(!is.na(amount)), m=median(amount, na.rm=TRUE))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet_color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__median__facet_color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Median',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet_color.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=TRUE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=11))
    test_save_plot(file_name='data/rt_explore_plot_categoric_numeric_aggregation__boxplot__facet_color2.png',
                   plot=rt_explore_plot_categoric_numeric_aggregation(dataset=dataset,
                                                                      categoric_variable=categoric_variable,
                                                                      numeric_variable=numeric_variable,
                                                                      aggregation_type='Boxplot',
                                                                      color_variable=color_variable,
                                                                      facet_variable=facet_variable,
                                                                      show_variable_totals=FALSE,
                                                                      show_comparison_totals=TRUE,
                                                                      base_size=15))

})

test_that("rt_explore_plot_scatterplot_labels", {
    dataset <- data.frame(gapminder) %>% filter(year == 2002)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'LifeExp Col'
    comparison_variable <- 'GdpPercap Col'

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__defaults.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables='Country Col'))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label__size.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables='Country Col',
                                                size_variable='Pop Col'))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label__zoom.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables='Country Col',
                                                x_zoom_min = 25000,
                                                x_zoom_max = 40000,
                                                y_zoom_min = 75))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__country_label__zoom_size.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables='Country Col',
                                                size_variable = 'Pop Col',
                                                x_zoom_min = 25000,
                                                x_zoom_max = 40000,
                                                y_zoom_min = 75))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__lifeExp_label.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables='LifeExp Col'))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__pop_label.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables='Pop Col'))

    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__multi_label.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables=c('Country Col', 'Year Col')))
    test_save_plot(file_name='data/rt_explore_plot_scatter__gapminder__multi2_label.png',
                   plot=rt_explore_plot_scatter(dataset=dataset,
                                                variable=variable,
                                                comparison_variable=comparison_variable,
                                                label_variables=c('Year Col', 'Country Col'),
                                                size_variable = 'Pop Col',
                                                x_zoom_min = 25000,
                                                x_zoom_max = 40000,
                                                y_zoom_min = 75))
})

test_that('rt_plot_funnel', {

    steps <- c("Step Z", "Step Y", "Step X", "Step W")
    values <- c(200, 60, 20, 10)

    test_save_plot(file_name='data/rt_plot_funnel_proportionate_FALSE.png',
                   plot=rt_funnel_plot(step_names=steps, step_values=values,
                                       title="My title", subtitle = "My Subtitle", caption = "My Caption",
                                       proportionate=FALSE))
    test_save_plot(file_name='data/rt_plot_funnel_proportionate_TRUE.png',
                   plot=rt_funnel_plot(step_names=steps, step_values=values,
                                       title="My title", subtitle = "My Subtitle", caption = "My Caption",
                                       proportionate=TRUE))

    steps <- c("Step W", "Step X", "Step Y", "Step Z")
    values <- c(2000, 1111, 50, 11)

    test_save_plot(file_name='data/rt_plot_funnel_2_proportionate_FALSE.png',
                   plot=rt_funnel_plot(step_names=steps, step_values=values, proportionate=FALSE))
    test_save_plot(file_name='data/rt_plot_funnel_2_proportionate_TRUE.png',
                   plot=rt_funnel_plot(step_names=steps, step_values=values, proportionate=TRUE))
})
shane-kercheval/rtools documentation built on July 7, 2022, 8:31 a.m.