Scripts/App_DataPrep.R

# Libraries ---------------------------------------------------------------
library(tidyverse)
library(sunburstR)
library(KFAS)

# Load Data ---------------------------------------------------------------
data_src <- '/Users/joe.bringley/C3Test/data/HappyMealRawData.csv'
data_src %>% file.exists() # check: should return TRUE
raw_df <- data_src %>% read_csv()


# Data Prep ---------------------------------------------------------------
# Format column names
x <- colnames(raw_df)
x <- gsub(" ", "_", tolower(x))
colnames(raw_df) <- x


# Gather Data -------------------------------------------------------------
df <- raw_df %>%
  filter(!is.na(promotion_id),
         name != 'CLEAN UP') %>%
  data.frame()
# Note: going to drop programs with identical names as they will cause issues
# with filtering
multiple_program_names <- c('Super Mario', 'Hello Kitty', 'Barbie')
promo_tbl <- df %>%
  distinct(promotion_id, name) %>%
  filter(!name %in% multiple_program_names)


# Metrics by Time ---------------------------------------------------------
promo_metrics <- c("hm_restaurants","total_guest_counts",
                  "hm_sales_units", "hm_promo_units", "hm_units_w_promo",
                  "hm_average_price", "hm_adu", "hm_w_promo_adu",
                  "hm_upt", "hm_w_promo_upt", "hm_revenue",
                  "hm_w_toys_sales_units", "hm_w_toys_promo_units","hm_w_toys_w_promo_units",
                  "hm_w_toys_average_price", "hm_w_toys_adu","hm_w_toys_w_promo_adu",
                  "hm_w_toys_upt", "hm_w_toys_w_promo_upt", "hm_w_toys_revenue",
                  "hm_toy_units", "hm_toy_promo_units", "hm_toy_units_w_promo",
                  "hm_toy_average_price", "hm_toy_adu", "hm_toy_w_promo_adu",
                  "hm_toy_upt", "hm_toy_w_promo_upt", "hm_toy_revenue",
                  "hm_guest_counts", "hm_basket_avg", "hm_basket_avg_with_toys",
                  "basket_avg")

# Movie Metrics -----------------------------------------------------------
movie_df <- df %>%
  filter(!is.na(genre))

genre_perf_tbl <- movie_df %>%
  group_by(genre) %>%
  summarise(AvgHMADU = round(mean(hm_adu), 0))


# Pie (Dummy) Data --------------------------------------------------------
pie_tbl <- df %>%
  filter(name == "Lion King") %>%
  mutate(Weekday = as.character(lubridate::wday(date, label = TRUE))) %>%
  group_by(Weekday) %>%
  summarise(TotalADU = sum(hm_adu))


# Bar (Dummy) Data --------------------------------------------------------
bar_tbl <- df %>%
  filter(name %in% c('Lion King', 'Toy Story 4', 'Incredibles 2',
                     'LEGO Batman')) %>%
  group_by(name) %>%
  summarise(AvgADU = mean(hm_adu),
            AvgUPT = mean(hm_upt),
            AvgBasket = mean(hm_basket_avg_with_toys))

# Sunburst Data -----------------------------------------------------------
sunburst_df <- df %>%
  mutate(IsMovie = ifelse(is.na(genre), 'No', 'Yes')) %>%
  group_by(name, IsMovie, year, seasonality, length, category) %>%
  summarise(AvgADU = mean(hm_adu),
            HMADU = list(hm_adu))

sunburst_df$ADUBucket = cut(sunburst_df$AvgADU, breaks = 3, labels = c('Low', 'Medium', 'High'))
# Re arrange Order
sunburst_df <- sunburst_df %>%
  ungroup() %>%
  select(name, ADUBucket, IsMovie, year, seasonality, length, AvgADU, HMADU) %>%
  mutate(AvgADU = round(AvgADU, 0))
# Manual Adjustment to Show Column Name Along with Label
sunburst_df$ADUBucket <- paste0('ADU: ', sunburst_df$ADUBucket)
sunburst_df$IsMovie <- paste0('IsMovie: ', sunburst_df$IsMovie)
sunburst_df$year <- paste0('Year: ', sunburst_df$year)
sunburst_df$seasonality <- paste0('Season: ', sunburst_df$seasonality)
sunburst_df$length <- paste0('Program Length: ', sunburst_df$length)


data_list <- list()
for (i in seq_len((ncol(sunburst_df) -3))) {
  colz <- colnames(sunburst_df)[2: (1 + i)]
  z <- sunburst_df %>%
    group_by(paste(!!!syms(colz), sep = "-")) %>%
    tally() %>%
    rename(Col1 = 1)
  data_list[[i]] <- z
}
sunburst_tbl <- bind_rows(data_list)

sunburst_tbl$depth <- unlist(lapply(strsplit(sunburst_tbl$Col1,"-"),length))

sunburst_shiny <- sunburst_tbl %>%
  arrange(desc(depth), Col1) %>%
  sunburst(elementId = "sunburst_diag") %>%
  add_shiny()

# Parallel Coordinates Data -----------------------------------------------
pc_summarise_cols <- c("hm_restaurants", "total_guest_counts", "hm_sales_units",
                       "hm_promo_units", "hm_adu", "hm_upt", "basket_avg",
                       "hm_guest_counts")
pc_tbl <- df %>%
  group_by(name) %>%
  summarise_at(vars(pc_summarise_cols), funs(mean, median, min, max))


# Forecasting Data --------------------------------------------------------
ex_fitted <- readRDS("~/C3Test/data/kfas_fitted_model.rds")
ex_holdout_model <- readRDS("~/C3Test/data/kfas_holdout_model.rds")
ex_holdout_data <- readRDS("~/C3Test/data/kfas_holdout_data.rds")
# Make predictions
preds <- predict(ex_fitted,
                 newdata=ex_holdout_model,
                 interval = "confidence",
                 se.fit=T,
                 type="response",
                 nsim = 100)
# Compile predictions + data
forecast_data <- cbind(ex_holdout_data$hm_sales_adu, preds$hm_sales_adu)
colnames(forecast_data) <- c("hm_sales_adu", "p.fit", "p.lwr", "p.upr", "se.fit")
forecast_data <- forecast_data[, 1:4]


# Help Data ---------------------------------------------------------------
help_tbl <- read_csv('~/C3Test/data/help.csv')
help_tbl <- help_tbl %>% slice(1:2)

# Save Data ---------------------------------------------------------------
data_dest <- '/Users/joe.bringley/C3Test/data/'
filename_out <- 'AppData.Rda'
data_dest %>% dir.exists() # check: should return TRUE
save(df, promo_tbl, promo_metrics,
     movie_df, genre_perf_tbl, sunburst_tbl,
     sunburst_shiny, pc_tbl, sunburst_df,
     forecast_data, pie_tbl, bar_tbl, help_tbl,
     file = paste0(data_dest, filename_out))
JB-Sandbox/C3Test documentation built on Jan. 19, 2020, 12:23 a.m.