tests/testthat/test_explore_time_series.R

context('Exploratory Analysis - Time Series')
library(testthat)
library(ggplot2)
library(dplyr)
options(dplyr.summarise.inform=F)
library(lubridate)
library(gapminder)
library(nycflights13)
library(forcats)
# library(scales)
source('test_helpers.R')


test_that('rt_explore_plot_time_series', {
    dataset <- data.frame(flights %>%
                              mutate(date = lubridate::make_date(year, month, day),
                                     cohort = paste0(year, '-',
                                                     lubridate::week(date)))) %>%
       select(date, dep_delay, dep_time, origin, cohort)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Date Col'
    comparison_variable <- 'Dep Delay Col'

    comp_func_sum <- function(x) {
        return (sum(x, na.rm=TRUE))
    }
    comp_func_mean <- function(x) {
        return (mean(x, na.rm=TRUE))
    }
    comp_func_median <- function(x) {
        return (median(x, na.rm=TRUE))
    }

    test_save_plot(file_name='data/rt_explore_plot_time_series_default.png',
                   plot=rt_explore_plot_time_series(dataset=dataset, variable=variable))

    test_save_plot(file_name='data/rt_explore_plot_time_series_default_include_zero_false.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable, include_zero_y_axis = FALSE))

    # rquires both comparison_function and comparison_function_name
    expect_error(rt_explore_plot_time_series(dataset=dataset, variable=variable,
                                             comparison_variable=comparison_variable))
    expect_error(rt_explore_plot_time_series(dataset=dataset, variable=variable,
                                             comparison_variable=comparison_variable,
                                             comparison_function=comp_func_sum))
    expect_error(rt_explore_plot_time_series(dataset=dataset, variable=variable,
                                             comparison_variable=comparison_variable,
                                             comparison_function_name='Sum'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_comparison_sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_comparison_mean.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_comparison_median.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_comparison_median_zoom_min.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    y_zoom_min=0))

    test_save_plot(file_name='data/rt_explore_plot_time_series_comparison_median_zoom_minmax.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    y_zoom_min=0,
                                                    y_zoom_max=10,
                                                    base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_time_series_zoom_min.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    y_zoom_min=750,
                                                    base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_time_series_dep_time_zoom_min.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable='Dep Time Col',
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    y_zoom_min=1000,
                                                    y_zoom_max=NULL,
                                                    base_size=15))

    color_variable <- 'Origin Col'
    test_save_plot(file_name='data/rt_explore_plot_time_series_comparison_median_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    color_variable = color_variable,
                                                    # y_zoom_min=0,
                                                    show_labels = TRUE,
                                                    y_zoom_max=25,
                                                    base_size=15))

    test_save_plot(file_name='data/rt_explore_plot_time_series_count_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable=color_variable))

    test_save_plot(file_name='data/rt_explore_plot_time_series_count_color_points_labels.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>% filter(month(`Date Col`) == 12),
                                                    variable=variable,
                                                    color_variable=color_variable,
                                                    show_points=TRUE,
                                                    show_labels=TRUE,
                                                    date_breaks = '1 day'))
})

test_that('rt_explore_plot_time_series__POSIXct', {
    dataset <- data.frame(flights %>%
                              mutate(date = lubridate::make_date(year, month, day),
                                     cohort = paste0(year, '-',
                                                     lubridate::week(date)))) %>%
        select(date, dep_delay, dep_time, origin, cohort) %>%
        mutate(date = as.POSIXct(date))
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Date Col'
    comparison_variable <- 'Dep Delay Col'

    # was POSIXct failing
    test_save_plot(file_name='data/rt_explore_plot_time_series_default__POSIXct.png',
                   plot=rt_explore_plot_time_series(dataset=dataset, variable=variable))
})

test_that('rt_as_year_qtr_format', {

    expected_values <- c("2019-Q1", "", "", "", "2019-Q2", "", "", "2019-Q3", "", "", "2019-Q4", "", "", "", "2020-Q1", NA, "2019-Q1")
    actual_values <- rt_as_year_qtr_format(c(ymd('2019-01-01',
                                                 '2019-01-02',
                                                 '2019-02-01',
                                                 '2019-03-04',
                                                 '2019-04-01',
                                                 '2019-04-04',
                                                 '2019-06-01',
                                                 '2019-07-01',
                                                 '2019-08-01',
                                                 '2019-09-01',
                                                 '2019-10-01',
                                                 '2019-11-01',
                                                 '2019-12-01',
                                                 '2019-12-31',
                                                 '2020-01-01',
                                                 NA,
                                                 '2019-01-01')))
    expect_identical(expected_values, actual_values)
    actual_values <- rt_as_year_qtr_format(as.POSIXct(c(ymd('2019-01-01',
                                                 '2019-01-02',
                                                 '2019-02-01',
                                                 '2019-03-04',
                                                 '2019-04-01',
                                                 '2019-04-04',
                                                 '2019-06-01',
                                                 '2019-07-01',
                                                 '2019-08-01',
                                                 '2019-09-01',
                                                 '2019-10-01',
                                                 '2019-11-01',
                                                 '2019-12-01',
                                                 '2019-12-31',
                                                 '2020-01-01',
                                                 NA,
                                                 '2019-01-01'))))
    expect_identical(expected_values, actual_values)
})

test_that('rt_explore_plot_time_series_breaks_floors', {
    dataset <- data.frame(flights %>%
                              mutate(date = lubridate::make_date(year, month, day),
                                     cohort = paste0(year, '-',
                                                     lubridate::week(date)))) %>%
        select(date, dep_delay, dep_time, origin, cohort)
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Date Col'
    comparison_variable <- 'Dep Delay Col'

    comp_func_sum <- function(x) {
        return (sum(x, na.rm=TRUE))
    }
    comp_func_mean <- function(x) {
        return (mean(x, na.rm=TRUE))
    }
    comp_func_median <- function(x) {
        return (median(x, na.rm=TRUE))
    }

    ##########################################################################################################
    # week/month/quarter/year defaults
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = NULL))

    test_save_plot(file_name='data/rt_explore_plot_time_series_month.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = NULL,
                                                    date_breaks_width = NULL))

    test_save_plot(file_name='data/rt_explore_plot_time_series_quarter.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = NULL,
                                                    date_breaks_width = NULL))

    test_save_plot(file_name='data/rt_explore_plot_time_series_year.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'year',
                                                    date_break_format = NULL,
                                                    date_breaks_width = NULL))

    ##########################################################################################################
    # week/month/quarter/year widths
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_week_width.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_month_width.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '4 months'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_quarter_width.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '3 months'))


    ##########################################################################################################
    # week/month/quarter/year formats
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_week_format.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_month_format.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '4 months'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_quarter_format.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '3 months'))

    ##########################################################################################################
    # week/month/quarter/year other variables & options
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_sum_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_sum_month.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'month'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_sum_quarter.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'quarter'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_sum_year.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'year'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_month.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'month'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_quarter.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'quarter'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_year.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'year'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_median_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_median_month.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'month'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_median_quarter.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'quarter'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_median_year.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_median,
                                                    comparison_function_name='Median',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'year'))

    ##########################################################################################################
    # week/month/quarter/year other variables & options
    ##########################################################################################################
    color_variable <- 'Origin Col'
    test_save_plot(file_name='data/rt_explore_plot_time_series_week_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable=color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_week_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    color_variable = color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_month_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable=color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'month'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_month_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    color_variable = color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'month'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_quarter_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable=color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'quarter'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_quarter_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    color_variable = color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'quarter'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_year_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable=color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'year'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_average_year_color.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_mean,
                                                    comparison_function_name='Average',
                                                    color_variable = color_variable,
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'year'))
})

test_that('rt_explore_plot_time_series_breaks_floors_date_time', {
    dataset_10 <- data.frame(flights) %>%
        filter(floor_date(time_hour, unit = 'days') <= ymd('2013-02-10'))
    dataset_11 <- data.frame(flights) %>%
        filter(floor_date(time_hour, unit = 'days') <= ymd('2013-02-11'))

    colnames(dataset_10) <- test_helper__column_names(dataset_10)
    colnames(dataset_11) <- test_helper__column_names(dataset_11)
    # max(dataset_10$time_hour)
    # max(dataset_11$time_hour)
    # dataset_11 %>%
    #     mutate(cohort = floor_date(time_hour, unit='weeks',week_start = 1)) %>%
    #     count(cohort)

    variable <- 'Time Hour Col'
    comparison_variable <- 'Dep Delay Col'

    comp_func_sum <- function(x) {
        return (sum(x, na.rm=TRUE))
    }
    comp_func_mean <- function(x) {
        return (mean(x, na.rm=TRUE))
    }
    comp_func_median <- function(x) {
        return (median(x, na.rm=TRUE))
    }

    ##########################################################################################################
    # week/month/quarter/year defaults
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_210.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_10,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = NULL))

    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_211.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_11,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = NULL))

    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_210_day.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_10,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = NULL))

    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_211_day.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_11,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = NULL))
    ##########################################################################################################
    # week/month/quarter/year widths
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_width_210.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_10,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_width_211.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_11,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 weeks'))

    ##########################################################################################################
    # week/month/quarter/year formats
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_format_210.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_10,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_week_format_211.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_11,
                                                    variable=variable,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 weeks'))

    ##########################################################################################################
    # week/month/quarter/year other variables & options
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_sum_week_210.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_10,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

    test_save_plot(file_name='data/rt_explore_plot_time_series_datetime_sum_week_211.png',
                   plot=rt_explore_plot_time_series(dataset=dataset_11,
                                                    variable=variable,
                                                    comparison_variable=comparison_variable,
                                                    comparison_function=comp_func_sum,
                                                    comparison_function_name='Sum of',
                                                    show_points = TRUE,
                                                    show_labels = TRUE,
                                                    date_floor = 'week'))

})

test_that('rt_explore_plot_time_facet_yoy', {
    # bike_traffic <- readr::read_csv("https://raw.githubusercontent.com/rfordatascience/tidytuesday/master/data/2019/2019-04-02/bike_traffic.csv")
    # bike_traffic <- bike_traffic %>% mutate(date = mdy_hms(date))
    dataset <- readRDS('data/bike_traffic.RDS')
    set.seed(42)
    dataset <- dataset %>% sample_n(10000)
    dataset <- dataset %>% mutate(crossing = fct_lump(crossing, 1))
    colnames(dataset) <- test_helper__column_names(dataset)
    #dataset %>% count(crossing)

    variable <- 'Date Col'
    comparison_variable <- 'Bike Count Col'

    comp_func_sum <- function(x) {
        return (sum(x, na.rm=TRUE))
    }

    ##########################################################################################################
    # weekly
    ##########################################################################################################

    ##################
    # Year-over-Year
    ##################
    # use to verify numbers
    # dataset %>%
    #     mutate(date = format_format('%Y-%W')(date)) %>%
    #     count(date) %>% arrange(date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__yoy_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    year_over_year = TRUE,
                                                    facet_variable = NULL,
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '8 weeks'))
    ##################
    # count
    ##################
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '8 weeks'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = format_format('%Y-%W')(date)) %>%
    #     count(date, direction) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_week__yoy.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    year_over_year = TRUE,
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '8 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_week.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '8 weeks'))
    ##################
    # sum
    ##################
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'week')) %>%
    #     count(date, direction, wt=bike_count) %>% arrange(direction, date) %>% as.data.frame() %>% head(40)
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_week__sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    color_variable = NULL,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '8 weeks'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = format_format('%Y-%W')(date)) %>%
    #     count(date, direction, wt=bike_count) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_week__sum__yoy.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    year_over_year = TRUE,
                                                    color_variable = NULL,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '8 weeks'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'week')) %>%
    #     mutate(`Direction Col` = fct_lump(`Direction Col`, 1)) %>%
    #     count(date, crossing, direction, wt=bike_count) %>% arrange(crossing, direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_week__sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'week',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '8 weeks'))

    ##########################################################################################################
    # monthly
    ##########################################################################################################
    ##################
    # count
    ##################
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'month')) %>%
    #     count(date, direction) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_month.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 months'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'month')) %>%
    #     count(date, direction) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_month__yoy.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    year_over_year = TRUE,
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 months'))
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'month')) %>%
    #     mutate(`Direction Col` = fct_lump(`Direction Col`, 1)) %>%
    #     count(date, crossing, direction) %>% arrange(crossing, direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_month.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 months'))

    ##################
    # sum
    ##################
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'month')) %>%
    #     count(date, direction, wt=bike_count) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_month__sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    color_variable = NULL,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 months'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'month')) %>%
    #     count(date, direction, wt=bike_count) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_month__sum__yoy.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    year_over_year = TRUE,
                                                    color_variable = NULL,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 months'))
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'month')) %>%
    #     mutate(`Direction Col` = fct_lump(`Direction Col`, 1)) %>%
    #     count(date, crossing, direction, wt=bike_count) %>% arrange(crossing, direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_month__sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 months'))

    # to test include_zero_y_axis, lets make the numbers much higher than they are and make sure no values are 0
    comp_func_custom <- function(x) {
        x <- ifelse(x == 0, 100000, x * 10000)
        x <- sum(x, na.rm=TRUE)
        x <- ifelse(x > 30000000, 30000000, x)
        x <- ifelse(x < 1000000, 1000000, x)
        return (x)
    }
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_month__include_zero.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    include_zero_y_axis = FALSE,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_custom,
                                                    comparison_function_name = 'Custom',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'month',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 months'))

    ##########################################################################################################
    # quarterly
    ##########################################################################################################
    ##################
    # count
    ##################
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'quarter')) %>%
    #     count(date, direction) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_quarter.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 quarters'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'quarter')) %>%
    #     count(date, direction) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_quarter__yoy.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    year_over_year = TRUE,
                                                    color_variable = NULL,
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 quarters'))
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'quarter')) %>%
    #     mutate(`Direction Col` = fct_lump(`Direction Col`, 1)) %>%
    #     count(date, crossing, direction) %>% arrange(crossing, direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_quarter.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    comparison_variable = NULL,
                                                    comparison_function = NULL,
                                                    comparison_function_name = NULL,
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 quarters'))

    ##################
    # sum
    ##################
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'quarter')) %>%
    #     count(date, direction, wt=bike_count) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_quarter__sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    color_variable = NULL,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 quarters'))

    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'quarter')) %>%
    #     count(date, direction, wt=bike_count) %>% arrange(direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_quarter__sum__yoy.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    facet_variable = 'Direction Col',
                                                    year_over_year = TRUE,
                                                    color_variable = NULL,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = NULL,
                                                    date_breaks_width = '2 quarters'))
    # use to verify numbers
    # dataset %>%
    #     mutate(date = rt_floor_date_factor(date, 'quarter')) %>%
    #     mutate(`Direction Col` = fct_lump(`Direction Col`, 1)) %>%
    #     count(date, crossing, direction, wt=bike_count) %>% arrange(crossing, direction, date) %>% as.data.frame() %>% View()
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_quarter__sum.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'SUM',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 quarters'))

    # to test include_zero_y_axis, lets make the numbers much higher than they are and make sure no values are 0
    comp_func_custom <- function(x) {
        x <- ifelse(x == 0, 100000, x * 10000)
        x <- sum(x, na.rm=TRUE)
        x <- ifelse(x > 30000000, 30000000, x)
        x <- ifelse(x < 1000000, 1000000, x)
        return (x)
    }
    test_save_plot(file_name='data/rt_explore_plot_time_series__facet_color_quarter__include_zero.png',
                   plot=rt_explore_plot_time_series(dataset=dataset %>%
                                                        mutate(`Direction Col` = fct_lump(`Direction Col`, 1)),
                                                    variable=variable,
                                                    facet_variable = 'Crossing Col',
                                                    color_variable = 'Direction Col',
                                                    include_zero_y_axis = FALSE,
                                                    comparison_variable = comparison_variable,
                                                    comparison_function = comp_func_custom,
                                                    comparison_function_name = 'Custom',
                                                    show_labels = TRUE,
                                                    show_points = TRUE,
                                                    date_floor = 'quarter',
                                                    date_break_format = '%Y-%m-%d',
                                                    date_breaks_width = '2 quarters'))
})

test_that('rt_explore_plot_time_series', {
    dataset <- data.frame(flights %>%
                              mutate(date = lubridate::make_date(year, month, day),
                                     cohort = paste0(year, '-',
                                                     lubridate::week(date)))) %>%
        select(date, dep_delay, dep_time, origin, cohort) %>%
        mutate(origin = factor(origin, levels = c("JFK", "LGA", "EWR"), ordered=TRUE))
    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Date Col'
    comparison_variable <- 'Dep Delay Col'
    comp_func_sum <- function(x) {
        return (sum(x, na.rm=TRUE))
    }

    test_save_plot(file_name='data/rt_explore_plot_time_series__colors.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable='Origin Col'))

    test_save_plot(file_name='data/rt_explore_plot_time_series__colors__comp.png',
                   plot=rt_explore_plot_time_series(dataset=dataset,
                                                    variable=variable,
                                                    color_variable='Origin Col',
                                                    comparison_variable=comparison_variable,
                                                    comparison_function = comp_func_sum,
                                                    comparison_function_name = 'Sum'))
})

test_that('rt_explore_plot_time_series__many_nas', {

    dataset <- data.frame(flights %>% mutate(date = lubridate::make_date(year, month, day))) %>%
        select(date, dep_delay, dep_time, origin)

    update_indexes <- which(dataset$date < ymd('2013-09-01'))
    dataset$date[update_indexes] <- NA

    colnames(dataset) <- test_helper__column_names(dataset)

    variable <- 'Date Col'
    color_variable <- 'Origin Col'

    # if there are many NAs, it will mess up the count scale, and we can't plot them anyway
    test_save_plot(file_name='data/rt_explore_plot_time_series__many_nas.png',
                   plot=rt_explore_plot_time_series(dataset,
                                                    variable = variable,
                                                    color_variable = NULL))
    test_save_plot(file_name='data/rt_explore_plot_time_series__many_nas__color.png',
                   plot=rt_explore_plot_time_series(dataset,
                                                    variable = variable,
                                                    color_variable = color_variable))
})

test_that('rt_explore_plot_conversion_rates', {

    sample_size <- 20000
    conversion_rate <- 0.3

    set.seed(42)
    conversion_data <- data.frame(index=1:sample_size,
                                  first_visit=ymd_hms('2019-01-01 00:00:00') +
                                      days(round(runif(n=sample_size, min=0, max=600))) +
                                      hours(round(runif(n=sample_size, min=0, max=23))) +
                                      minutes(round(runif(n=sample_size, min=0, max=60))) +
                                      seconds(round(runif(n=sample_size, min=0, max=60))))
    set.seed(43)
    conversion_data$converted <- as.logical(rbinom(n=sample_size, size=1, prob=conversion_rate))

    colnames(conversion_data) <- test_helper__column_names(conversion_data)

    get_rand_binom_num <- function(seed, max_num) {
        set.seed(seed)
        rbinom(1, max_num, 0.3)
    }
    get_rand_unif_num <- function(seed, max_num) {
        set.seed(seed)
        as.integer(round(runif(n=1, min=0, max=max_num)))
    }

    conversion_data$`Num Days Col` <- map_int(conversion_data$`Index Col`, ~ get_rand_binom_num(., 39))
    conversion_data$`Num Hours Col` <- map_int(conversion_data$`Index Col`, ~ get_rand_unif_num(., 23))
    conversion_data <- conversion_data %>%
        mutate(`Conversion Date Col` = `First Visit Col` +
                   days(`Num Days Col`) +
                   hours(`Num Hours Col`)) %>%
        select(-`Num Days Col`, -`Num Hours Col`)

    conversion_data$`Conversion Date Col`[which(!conversion_data$`Converted Col`)] <- NA
    conversion_data <- conversion_data %>% mutate(group=ifelse(`Index Col` %% 2 == 0, 'A', 'B'))
    conversion_data <- conversion_data %>% select(-`Index Col`, -`Converted Col`)

    mock_reference_date <- max(conversion_data$`First Visit Col`)

    ##########################################################################################################
    # Non-Groups
    ##########################################################################################################
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(6, 7, 10, 14),
                                     snapshot_units='days',
                                     date_floor='month',
                                     color_or_facet='color',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width=NULL))

    # there was a bug where unique values were being removed (e.g. so duplicated First Visit Col's would be removed)
    # so the above and below conversion rates, which should be (about) the same, were actually different
    # the graphs won't actually be the same because by doing round_date we lose some precision on the date,
    # and therefore the date-diff between the first-visit and the conversion-date, but they should be close
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month__floor_bug.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data %>%
                                                             mutate(`First Visit Col` = round_date(`First Visit Col`, unit='days')),
                                                         first_date='First Visit Col',
                                                         second_date='Conversion Date Col',
                                                         reference_date=mock_reference_date,
                                                         snapshots=c(6, 7, 10, 14),
                                                         snapshot_units='days',
                                                         date_floor='month',
                                                         color_or_facet='color',
                                                         year_over_year=FALSE,
                                                         y_zoom_min=NULL,
                                                         y_zoom_max=NULL,
                                                         include_zero_y_axis=TRUE,
                                                         show_points=TRUE,
                                                         show_labels=TRUE,
                                                         date_break_format=NULL,
                                                         date_breaks_width=NULL))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month_facet.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(6, 7, 10, 14),
                                     snapshot_units='days',
                                     date_floor='month',
                                     color_or_facet='facet',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width=NULL))
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month_yoy.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(6, 7, 10, 14),
                                     snapshot_units='days',
                                     date_floor='month',
                                     color_or_facet='facet',
                                     year_over_year=TRUE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=FALSE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width=NULL))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month_yoy_ignore_color.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                                         first_date='First Visit Col',
                                                         second_date='Conversion Date Col',
                                                         reference_date=mock_reference_date,
                                                         snapshots=c(6, 7, 10, 14),
                                                         snapshot_units='days',
                                                         date_floor='month',
                                                         color_or_facet='color',
                                                         year_over_year=TRUE,
                                                         y_zoom_min=NULL,
                                                         y_zoom_max=NULL,
                                                         include_zero_y_axis=FALSE,
                                                         show_points=TRUE,
                                                         show_labels=TRUE,
                                                         date_break_format=NULL,
                                                         date_breaks_width=NULL))
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__2.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(7, 14, 21),
                                     snapshot_units='days',
                                     date_floor='month',
                                     color_or_facet='facet',
                                     year_over_year=TRUE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=FALSE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width=NULL))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(1, 2, 3),
                                     snapshot_units='weeks',
                                     date_floor='weeks',
                                     color_or_facet='color',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width='4 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks2.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data %>%
                                                             filter(`First Visit Col` >= ymd_hms('2020-01-01 00:00:00')),
                                                         first_date='First Visit Col',
                                                         second_date='Conversion Date Col',
                                                         reference_date=mock_reference_date,
                                                         snapshots=c(1, 2, 3),
                                                         snapshot_units='weeks',
                                                         date_floor='weeks',
                                                         color_or_facet='color',
                                                         year_over_year=FALSE,
                                                         y_zoom_min=NULL,
                                                         y_zoom_max=NULL,
                                                         include_zero_y_axis=TRUE,
                                                         show_points=TRUE,
                                                         show_labels=TRUE,
                                                         date_break_format='%Y-%m-%d'
                                                         #date_breaks_width='4 weeks'
                                                         ))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks_facet.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(1, 2, 3),
                                     snapshot_units='weeks',
                                     date_floor='weeks',
                                     color_or_facet='facet',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width='4 weeks'))
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks_yoy.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(1, 2, 3),
                                     snapshot_units='weeks',
                                     date_floor='weeks',
                                     color_or_facet='facet',
                                     year_over_year=TRUE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width='4 weeks'))
    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks_2.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     reference_date=mock_reference_date,
                                     snapshots=c(1, 2, 3),
                                     snapshot_units='weeks',
                                     date_floor='weeks',
                                     color_or_facet='facet',
                                     year_over_year=TRUE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=FALSE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format='%Y-%m-%d',
                                     date_breaks_width='4 weeks'))

    ##########################################################################################################
    # GROUPS
    ##########################################################################################################


    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month__groups.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     group_variable='group',
                                     reference_date=mock_reference_date,
                                     snapshots=c(6, 7, 10, 14),
                                     snapshot_units='days',
                                     date_floor='month',
                                     color_or_facet='color',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width=NULL))


    # there was a bug where unique values were being removed (e.g. so duplicated First Visit Col's would be removed)
    # so the above and below conversion rates, which should be (about) the same, were actually different
    # the graphs won't actually be the same because by doing round_date we lose some precision on the date,
    # and therefore the date-diff between the first-visit and the conversion-date, but they should be close
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month__groups__floor_bug.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data %>%
                                                             mutate(`First Visit Col` = round_date(`First Visit Col`, unit='days')),
                                                         first_date='First Visit Col',
                                                         second_date='Conversion Date Col',
                                                         group_variable='group',
                                                         reference_date=mock_reference_date,
                                                         snapshots=c(6, 7, 10, 14),
                                                         snapshot_units='days',
                                                         date_floor='month',
                                                         color_or_facet='color',
                                                         year_over_year=FALSE,
                                                         y_zoom_min=NULL,
                                                         y_zoom_max=NULL,
                                                         include_zero_y_axis=TRUE,
                                                         show_points=TRUE,
                                                         show_labels=TRUE,
                                                         date_break_format=NULL,
                                                         date_breaks_width=NULL))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__days_month_facet__groups.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     group_variable='group',
                                     reference_date=mock_reference_date,
                                     snapshots=c(6, 7, 10, 14),
                                     snapshot_units='days',
                                     date_floor='month',
                                     color_or_facet='facet',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=FALSE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width=NULL))
    ##########################################################################################################
    expect_error(rt_explore_plot_conversion_rates(dataset=conversion_data,
                                                  first_date='First Visit Col',
                                                  second_date='Conversion Date Col',
                                                  group_variable='group',
                                                  reference_date=mock_reference_date,
                                                  snapshots=c(6, 7, 10, 14),
                                                  snapshot_units='days',
                                                  date_floor='month',
                                                  color_or_facet='facet',
                                                  year_over_year=TRUE,
                                                  y_zoom_min=NULL,
                                                  y_zoom_max=NULL,
                                                  include_zero_y_axis=FALSE,
                                                  show_points=TRUE,
                                                  show_labels=TRUE,
                                                  date_break_format=NULL,
                                                  date_breaks_width=NULL))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks__groups.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     group_variable='group',
                                     reference_date=mock_reference_date,
                                     snapshots=c(1, 2, 3),
                                     snapshot_units='weeks',
                                     date_floor='weeks',
                                     color_or_facet='color',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width='4 weeks'))

    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks2__groups.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data %>%
                                                             filter(`First Visit Col` >= ymd_hms('2020-01-01 00:00:00')),
                                                         first_date='First Visit Col',
                                                         second_date='Conversion Date Col',
                                                         group_variable='group',
                                                         reference_date=mock_reference_date,
                                                         snapshots=c(1, 2, 3),
                                                         snapshot_units='weeks',
                                                         date_floor='weeks',
                                                         color_or_facet='color',
                                                         year_over_year=FALSE,
                                                         y_zoom_min=NULL,
                                                         y_zoom_max=NULL,
                                                         include_zero_y_axis=TRUE,
                                                         show_points=TRUE,
                                                         show_labels=TRUE,
                                                         date_break_format='%Y-%m-%d'
                                                         #date_breaks_width='4 weeks'
                                                         ))

    ##########################################################################################################
    test_save_plot(file_name='data/rt_explore_plot_conversion_rates__weeks_weeks_facet__groups.png',
                   plot=rt_explore_plot_conversion_rates(dataset=conversion_data,
                                     first_date='First Visit Col',
                                     second_date='Conversion Date Col',
                                     group_variable='group',
                                     reference_date=mock_reference_date,
                                     snapshots=c(1, 2, 3),
                                     snapshot_units='weeks',
                                     date_floor='weeks',
                                     color_or_facet='facet',
                                     year_over_year=FALSE,
                                     y_zoom_min=NULL,
                                     y_zoom_max=NULL,
                                     include_zero_y_axis=TRUE,
                                     show_points=TRUE,
                                     show_labels=TRUE,
                                     date_break_format=NULL,
                                     date_breaks_width='4 weeks'))
})

test_that('rt_explore_plot_cohorted_adoption', {
    sample_size <- 20000
    conversion_rate <- 0.3

    set.seed(42)
    conversion_data <- data.frame(index=1:sample_size,
                                  first_visit=ymd_hms('2019-01-01 00:00:00') +
                                      days(round(runif(n=sample_size, min=0, max=600))) +
                                      hours(round(runif(n=sample_size, min=0, max=23))) +
                                      minutes(round(runif(n=sample_size, min=0, max=60))) +
                                      seconds(round(runif(n=sample_size, min=0, max=60))))

    set.seed(43)
    conversion_data$converted <- as.logical(rbinom(n=sample_size, size=1, prob=conversion_rate))

    colnames(conversion_data) <- test_helper__column_names(conversion_data)

    get_rand_binom_num <- function(seed, max_num) {
        set.seed(seed)
        rbinom(1, max_num, 0.3)
    }
    get_rand_unif_num <- function(seed, max_num) {
        set.seed(seed)
        as.integer(round(runif(n=1, min=0, max=max_num)))
    }

    conversion_data$`Num Days Col` <- map_int(conversion_data$`Index Col`, ~ get_rand_binom_num(., 39))
    conversion_data$`Num Hours Col` <- map_int(conversion_data$`Index Col`, ~ get_rand_unif_num(., 23))
    conversion_data <- conversion_data %>%
        mutate(`Conversion Date Col` = `First Visit Col` +
                   days(`Num Days Col`) +
                   hours(`Num Hours Col`)) %>%
        select(-`Num Days Col`, -`Num Hours Col`)

    conversion_data$`Conversion Date Col`[which(!conversion_data$`Converted Col`)] <- NA
    conversion_data <- conversion_data %>% select(-`Index Col`, -`Converted Col`)

    mock_reference_date <- max(conversion_data$`First Visit Col`)

    test_save_plot(file_name='data/rt_explore_plot_cohorted_adoption__30_days_month.png',
                   plot=rt_explore_plot_cohorted_adoption(dataset=conversion_data,
                                                          first_date='First Visit Col',
                                                          second_date='Conversion Date Col',
                                                          reference_date=mock_reference_date,
                                                          last_n_cohorts=10,
                                                          n_units_after_first_date=30,
                                                          units='days',
                                                          date_floor='month',
                                                          y_zoom_min=NULL,
                                                          y_zoom_max=NULL,
                                                          include_zero_y_axis=TRUE,
                                                          show_points=FALSE,
                                                          show_labels=FALSE,
                                                          date_break_format=NULL,
                                                          base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_cohorted_adoption__30_days_month_options.png',
                   plot=rt_explore_plot_cohorted_adoption(dataset=conversion_data,
                                                          first_date='First Visit Col',
                                                          second_date='Conversion Date Col',
                                                          reference_date=mock_reference_date,
                                                          last_n_cohorts=10,
                                                          n_units_after_first_date=30,
                                                          units='days',
                                                          date_floor='month',
                                                          y_zoom_min=NULL,
                                                          y_zoom_max=0.30,
                                                          include_zero_y_axis=TRUE,
                                                          show_points=TRUE,
                                                          show_labels=TRUE,
                                                          date_break_format='%Y-%m-%d',
                                                          base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_cohorted_adoption__3_weeks_week.png',
                   plot=rt_explore_plot_cohorted_adoption(dataset=conversion_data,
                                                          first_date='First Visit Col',
                                                          second_date='Conversion Date Col',
                                                          reference_date=mock_reference_date,
                                                          last_n_cohorts=15,
                                                          n_units_after_first_date=3,
                                                          units='weeks',
                                                          date_floor='week',
                                                          y_zoom_min=NULL,
                                                          #y_zoom_max=0.30,
                                                          include_zero_y_axis=TRUE,
                                                          #show_points=TRUE,
                                                          #show_labels=TRUE,
                                                          #date_break_format='%Y-%m-%d',
                                                          base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_cohorted_adoption__3_weeks_month_options1.png',
                   plot=rt_explore_plot_cohorted_adoption(dataset=conversion_data,
                                                          first_date='First Visit Col',
                                                          second_date='Conversion Date Col',
                                                          reference_date=mock_reference_date,
                                                          last_n_cohorts=100,
                                                          n_units_after_first_date=4,
                                                          units='weeks',
                                                          date_floor='month',
                                                          y_zoom_min=NULL,
                                                          separated_colors=FALSE,
                                                          #y_zoom_max=0.30,
                                                          include_zero_y_axis=TRUE,
                                                          show_points=TRUE,
                                                          #show_labels=TRUE,
                                                          #date_break_format='%Y-%W',
                                                          base_size=11))

    test_save_plot(file_name='data/rt_explore_plot_cohorted_adoption__3_weeks_month_options2.png',
                   plot=rt_explore_plot_cohorted_adoption(dataset=conversion_data,
                                                          first_date='First Visit Col',
                                                          second_date='Conversion Date Col',
                                                          reference_date=mock_reference_date,
                                                          last_n_cohorts=100,
                                                          n_units_after_first_date=4,
                                                          units='weeks',
                                                          date_floor='month',
                                                          y_zoom_min=NULL,
                                                          separated_colors=TRUE,
                                                          #y_zoom_max=0.30,
                                                          include_zero_y_axis=TRUE,
                                                          show_points=TRUE,
                                                          #show_labels=TRUE,
                                                          #date_break_format='%Y-%W',
                                                          base_size=11))
})
shane-kercheval/rtools documentation built on July 7, 2022, 8:31 a.m.