#-- 0.0 markdown options knitr::opts_chunk$set( echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, dpi = 300, tinytex.verbose = TRUE, tidy = FALSE, # cashe.extra = packageVersion('tufte'), fig.align = "center" )
#-- 1.0 libraries ---- # library(magrittr) # library(tidyverse) library(data.table) # library(tidyquant) # library(RMySQL) library(lubridate) # library(ISOweek) # library(MMWRweek) #-- 1.2 Libraries own ---- library(fd.dashboard) #-- 1.3 libraries graphical ---- library(ggpubr) library(knitr) library(ggExtra) library(viridis) library(tufte)
#-- 1.3. Settings ---- Sys.setenv(R_CONFIG_ACTIVE = "default") # use "default" for for production, "development" for prototyping config <- config::get(file = "../inst/golem-config.yml", use_parent = TRUE)
r newthought('The goal')
of this document is to prepare functions for the report 14 Investment Level.
prof_raw_dt <- fd.dashboard::get_clean_profiles() prof_raw_dt %>% str() lead_raw_dt <- fd.dashboard::get_clean_leads() lead_raw_dt %>% str()
prof_dt[, .N, .(min_investment, min_investment_currency)] prof_dt[status == "Active", .(min_investment, min_investment_currency)][, .N, .(min_investment)][order(-N)] prof_usd_dt <- prof_dt[status == "Active" & min_investment_currency == "USD", .(min_investment)] min_investment_max = 1e6 prof_usd_dt <- prof_usd_dt[min_investment <= min_investment_max, ] prof_usd_dt[, `:=` (min_investment = min_investment / 1e3)] prof_usd_dt[order(min_investment)] numbers_of_bins = 9 prof_usd_dt[, `:=` (bins_q = cut(min_investment, breaks = unique(quantile(min_investment, probs = seq.int(0, 1, by = 1/numbers_of_bins))), include.lowest = TRUE))] prof_usd_dt[, .N, .(bins_q)][order(bins_q)] prof_usd_dt[, .N, .(bins_q)][order(bins_q)] %>% ggplot(aes(bins_q, N)) + geom_col() #-- regular bins numbers_of_bins = 9 bin_from = 70 bin_to = 160 bin_by = 10 prof_usd_dt[, `:=` (bins = cut(min_investment, breaks = c(-Inf, seq.int(from = bin_from, to = bin_to, by = bin_by), Inf), include.lowest = TRUE) )] prof_usd_dt[, .N, .(bins)][order(bins)] %>% ggplot(aes(bins, N)) + geom_col() + theme_bw() #-- all currencies min_investment_max_dt <- data.table(min_investment_max = c(10, 10, 100, 100, 100, 100, 10), # in millions min_investment_currency = c("USD", "EUR", "ZAR", "MXN", "MYR", "SGD", "GBP")) min_investment_max_dt[, `:=` (min_investment_max = min_investment_max * 1e6)] setkey(prof_dt, min_investment_currency) setkey(min_investment_max_dt, min_investment_currency) prof_dt <- prof_dt[min_investment_max_dt] prof_dt[, `:=` (outlier = fcase(min_investment > min_investment_max, TRUE, min_investment <= min_investment_max, FALSE))] prof_dt[, `:=` (min_investment = min_investment / 1e3)] prof_dt[, .N, .(outlier, min_investment_currency, status)][, dcast.data.table(.SD, min_investment_currency ~ status + outlier)] prof_dt[outlier == T & min_investment_currency == "MXN" & status == "Active", ] prof_dt[, .N, .(status)] prof_dt[(!outlier), ] #-- chart prof_normal_dt <- prof_dt[!(outlier), ] numbers_of_bins = 9 bin_from = 10 bin_to = 100 bin_by = 20 prof_normal_dt[, bins := NULL] prof_normal_dt[, `:=` (bins = cut(min_investment, breaks = c(-Inf, seq.int(from = bin_from, to = bin_to, by = bin_by), Inf), include.lowest = TRUE)), by = .(min_investment_currency)] prof_normal_dt[, .N, .(bins, min_investment_currency)][min_investment_currency %in% c("USD"), ] %>% str() prof_normal_dt[min_investment_currency %in% c("EUR", "USD", "ZAR", "MYR", "GBP"), .N, .(bins, min_investment_currency)][order(bins)] %>% ggplot(aes(bins, N)) + geom_col(color = config$plot.color, fill = config$plot.color, alpha = 0.4) + theme_bw() + facet_wrap("min_investment_currency") prof_dt[(outlier) & status == "Active", .(profile, sales_rep, website_iso2c, min_investment, min_investment_currency, outlier)] prof_dt[, .N, .(is.na(min_investment), is.na(min_investment_currency))] prof_normal_dt[min_investment < 40e3 & min_investment_currency == "ZAR", min_investment] %>% hist()
#-- red profiles prof_dt <- fd.dashboard::get_clean_profiles() #-- identify outliers min_investment_max_dt <- data.table(min_investment_max = c(10, 10, 10, 100, 100, 100, 100), # in millions min_investment_currency = c("USD", "EUR", "GBP", "ZAR", "MXN", "MYR", "SGD")) #min_investment_max_dt[, `:=` (min_investment_max = min_investment_max * 1e6)] #-- threshold to define outliers tab_01 <- min_investment_max_dt[order(min_investment_max, min_investment_currency)] setkey(prof_dt, min_investment_currency) setkey(min_investment_max_dt, min_investment_currency) prof_min_set_dt <- prof_dt[min_investment_max_dt] prof_min_set_dt[, `:=` (outlier = fcase(min_investment > min_investment_max * 1e6, TRUE, min_investment <= min_investment_max * 1e6, FALSE))] prof_min_set_dt[, `:=` (min_investment = min_investment / 1e3)] prof_min_set_dt[status == "Active", .N, .(profile_id, min_investment, min_investment_currency)] prof_min_set_dt[, .N, .(profile_id, min_investment, min_investment_currency, status)][, .N, .(status)] setkey(prof_min_set_dt, tech_date_start) prof_min_set_dt[, .SD[.N], .(profile_id)][, .N, .(profile_id, min_investment, min_investment_currency, status)][status == "Active"] #-- number of outliers, per currency and status table_02 <- prof_min_set_dt[, .N, .(outlier, min_investment_currency, status, min_investment_max)][, dcast.data.table(.SD, min_investment_currency + min_investment_max ~ status + outlier)][order(-Active_FALSE)] #-- undefined min_investment OR min_investment_currency table_03 <- prof_dt[status == "Active", .N, .(m_inv = is.na(min_investment), m_inv_c = is.na(min_investment_currency), sales_rep)][(m_inv) | (m_inv_c), .(sales_rep, N)][order(-N)] table_04 <- prof_dt[status == "Active", .N, .(m_inv = is.na(min_investment), m_inv_c = is.na(min_investment_currency), sales_rep)][, `:=` (missing_min_inv = fcase( (m_inv == T | m_inv_c == T), TRUE, (m_inv == F & m_inv_c == F), FALSE))][, .(sales_rep, missing_min_inv, N)][, dcast.data.table(.SD, sales_rep ~ missing_min_inv, fill = 0)] setnames(table_04, c("FALSE", "TRUE"), c("OK", "missing")) table_04_sum <- table_04[, lapply(.SD, sum), .SDcols = is.numeric][, `:=` (sales_rep = "Total")] data.table::rbindlist(list(table_04, table_04_sum), use.names = T) #-- charting prof_normal_dt <- prof_min_set_dt[!(outlier), ] bin_from = 10 bin_to = 110 bin_by = 25 currencies <- min_investment_max_dt[, min_investment_currency] prof_normal_dt[, bins := NULL] prof_normal_dt[, `:=` (bins = cut(min_investment, breaks = c(-Inf, seq.int(from = bin_from, to = bin_to, by = bin_by), Inf), include.lowest = TRUE)), by = .(min_investment_currency)] prof_normal_dt[, .N, .(bins, min_investment_currency)][min_investment_currency %in% currencies, ] %>% str() prof_normal_2_dt <- prof_normal_dt[, `:=` (n = 1)][min_investment_currency %in% currencies, ][, `:=` (min_investment_currency = as.factor(min_investment_currency) %>% fct_reorder(.f = min_investment_currency, .x = n, .fun = sum) %>% fct_rev())][] #prof_normal_2_dt %>% str() prof_normal_2_dt$min_investment_currency %>% levels() currencies_sort <- prof_normal_2_dt[, .N, .(min_investment_currency)][order(-N)][, min_investment_currency] prof_normal_2_dt[min_investment_currency %in% currencies_sort[1:3], .N, .(bins, min_investment_currency)][order(bins)] %>% ggplot(aes(bins, N)) + geom_col(color = config$plot.color, fill = config$plot.color, alpha = 0.4) + theme_bw() + facet_wrap("min_investment_currency", scales = "free_y") + labs(title = "Number of Profiles, binned, per currency.", subtitle = "Only Active Profiles.", x = "", y = "")
#-- reduce changes to only those, having changed status or min_investment, "flatten" all other changes, when status is not changed dt <- prof_normal_2_dt setkey(dt, tech_date_start, profile_id) dt[, `:=` (status_prev = shift(status, type = "lag", fill = NA), min_investment_prev = shift(min_investment, type = "lag", fill = NA)), .(profile_id)] dt[profile_id %in% profile_id_mult_inv[1], .(status, status_prev, min_investment, min_investment_prev, tech_date_start)] prof_dt[profile_id %in% profile_id_mult_inv[1], ] x3_dt <- dt[, .(profile_id, status, status_prev, tech_date_start, min_investment, min_investment_prev)][status != status_prev | is.na(status_prev) | min_investment != min_investment_prev,] x3_dt[profile_id %in% profile_id_mult_inv[2], ] x3_dt[status == "Active"][profile_id %in% profile_id_mult_inv[2], ] profile_id_mult_2_inv <- x3_dt[, .N, .(profile_id, min_investment)][, .N, .(profile_id)][N > 1][, profile_id] x3_dt[profile_id %in% profile_id_mult_2_inv[1], ] profile_id_mult_2_inv[1:2]
#-- # uwzględniać leady jednokrotnie - spłaszczyć liczbę profili - tylko zmiana statusu, lub min_investment # profile_id_mult_inv <- prof_normal_2_dt[, .N, .(profile_id, min_investment)][, .N, .(profile_id)][N > 1][order(-N)][, profile_id] prof_normal_2_dt[profile_id %in% profile_id_mult_inv[15], ] profile_id_mult <- prof_normal_2_dt[status == "Active", .N, .(profile_id)][order(-N)][1, profile_id] prof_normal_2_dt[profile_id %in% profile_id_mult, ] prof_normal_2_dt[, .(profile, profile_id, status, tech_date_start, tech_date_end, tech_prev_id, tech_next_id, min_investment, min_investment_currency)] #leads_dt <- fread("../../franchisedirect/dashboards/report_14/bs_leads_clean_view.csv") #leads_dt <- fread("../../franchisedirect/dashboards/report_14/query_result.csv") setkey(leads_dt, profile_id, date) leads_dt[, .N, .(profile_id)] prof_dt <- x3_dt[profile_id %in% profile_id_mult_2_inv[2], ] setkey(prof_dt, profile_id, tech_date_start) x4_dt <- prof_dt[leads_dt, roll = T][, .(profile_id, status, tech_date_start, tech_date_end, min_investment)][tech_date_start >= as.Date("2020-12-16")] x4_dt[, .N, .(profile_id)] x4_dt[, .N, .(date = tech_date_start, min_investment)] x4_dt[, .N, .( min_investment)] rm(x4_dt)
#-- reduce changes to only those, having changed status or min_investment, "flatten" all other changes, when status is not changed dt <- prof_raw_dt setkey(dt, tech_date_start, profile_id) dt[, `:=` (status_prev = shift(status, type = "lag", fill = NA), min_investment_prev = shift(min_investment, type = "lag", fill = NA)), .(profile_id)] dt[profile_id %in% profile_id_mult_inv[1], .(status, status_prev, min_investment, min_investment_prev, tech_date_start)] dt[profile_id %in% profile_x_id, .(profile_id, status, status_prev, tech_date_start, min_investment, min_investment_prev)] x3_dt <- dt[, .(profile_id, status, status_prev, tech_date_start, min_investment, min_investment_prev)][status != status_prev | is.na(status_prev) | min_investment != min_investment_prev | (!is.na(min_investment) & is.na(min_investment_prev)),] x3_dt[profile_id %in% profile_x_id, ] profile_id_mult_inv <- prof_dt[!is.na(min_investment), .N, .(profile_id, min_investment)][, .N, .(profile_id)][N > 1][order(-N)][, profile_id] x3_dt[profile_id %in% profile_id_mult_inv & status == "Active", ] x3_dt[profile_id %in% profile_id_mult_inv[3], ] profile_id_mult_2_inv <- x3_dt[, .N, .(profile_id, min_investment)][, .N, .(profile_id)][N > 1][, profile_id] x3_dt[profile_id %in% profile_id_mult_2_inv[1], ] profile_id_mult_2_inv[1:2]
dt <- prof_raw_dt #-- reduce changes to only those, having changed status, "flatten" all other changes, when status is not changed setkey(dt, tech_date_start, profile_id) dt[, `:=` (status_prev = shift(status, type = "lag", fill = NA), min_investment_prev = shift(min_investment, type = "lag", fill = NA)), .(profile_id)] x3_dt <- dt[, .(profile_id, status, status_prev, tech_date_start, min_investment, min_investment_prev, min_investment_currency)][status != status_prev | is.na(status_prev) | min_investment != min_investment_prev | (!is.na(min_investment) & is.na(min_investment_prev)),] #x3_dt[profile_id %in% profile_id_mult_inv[1], ] #-- analysis window boundary_dates <- data.table(date = c(lubridate::int_start(analysis_period), lubridate::int_end(analysis_period))) boundary_dates[, `:=` (date = as.Date(date))] setkey(boundary_dates, date) #-- limit profile_id to only those, which had Active status, sometime within the analysis window setkey(x3_dt, tech_date_start) b1 <- boundary_dates[x3_dt, roll = -Inf] b2 <- x3_dt[, .SD[boundary_dates, roll = +Inf], .(profile_id)] b3 <- rbindlist(list(b1[, .(profile_id, status, min_investment, date)], b2[, .(profile_id, status, min_investment, date = tech_date_start)])) setkey(b3, date) b4 <- b3[, .SD[date %within% analysis_period & status == "Active", ], .(profile_id) ] b5 <- b4[, .N, .(profile_id)] setkey(b5, profile_id) #-- select only these profile_id, which have at least one day active within the period (the pupose of b5) setkey(x3_dt, profile_id) b6 <- x3_dt[b5][, .(profile_id, status, min_investment, min_investment_currency, date = tech_date_start)] b6[profile_id %in% profile_id_mult_inv[4], ] #-- create dates sequence seq_dates <- seq.Date(from = as.Date(int_start(analysis_period)), to = as.Date(int_end(analysis_period)), by = "day") dates_dt <- data.table(date = seq_dates) setkey(dates_dt, date) setkey(b6, date) b7 <- b6[, .SD[dates_dt, roll = +Inf], .(profile_id)][, .(profile_id, status, min_investment, min_investment_currency, date)][!is.na(status)] b7[profile_id %in% profile_id_mult_inv[4], ] return(b7) #-- calculate bins for min_inv daily b7_profile_id <- b7[, .N, .(profile_id, date)][N > 1, profile_id] b7[profile_id %in% b7_profile_id[1], ] b7[, .N, .(status)] b7[status == "Active" & date > "2020-12-16", .N, .(date, min_investment_currency)] %>% ggplot(aes(date, N, color = min_investment_currency)) + geom_line() bin_from <- 10 bin_to <- 100 bin_by <- 20 b7[, `:=` (min_investment = min_investment / 1e3)] b7[, `:=` (bins = cut(min_investment, breaks = c(-Inf, seq.int(from = bin_from, to = bin_to, by = bin_by), Inf), include.lowest = TRUE)), by = .(min_investment_currency, date)] b7[, `:=` (bins = fct_rev(bins))] currencies <- b7[, unique(min_investment_currency)] b7 <- b7[status == "Active" & date > "2020-12-16" & min_investment_currency %in% currencies, ] b7[, `:=` (n = 1)] b7 <- b7[, `:=` (min_investment_currency = as.factor(min_investment_currency) %>% fct_reorder(.f = min_investment_currency, .x = n, .fun = sum) %>% fct_rev())] b7 <- b7[, `:=` (min_investment_currency = fct_reorder(.f = min_investment_currency, .x = n, .fun = sum) %>% fct_rev())] b7[, .(sum(n)), .(min_investment_currency)][order(-V1)] b7 %>% str() b7[status == "Active" & date > "2020-12-16" & min_investment_currency == "USD", .N, .(date, bins)] %>% ggplot(aes(date, N, color = bins, fill = bins)) + geom_area(alpha = 0.5) + scale_fill_brewer(palette = "Set1") + scale_color_brewer(palette = "Set1") plot_01 <- b7[!is.na(min_investment_currency), .N, .(date, bins, min_investment_currency)] %>% ggplot(aes(date, N, color = bins, fill = bins)) + geom_area(alpha = 0.5) + scale_color_viridis_d(option = "") + scale_fill_viridis_d(option = "D") + # scale_fill_brewer(palette = "Set1") + # scale_color_brewer(palette = "Set1") + facet_wrap("min_investment_currency", scales = "free_y") + theme_bw() + labs(title = "Number of Profiles, binned, per currency.", subtitle = stringr::str_glue("Only Active Profiles."), x = "", y = "") + theme(legend.position = "right", legend.direction = "vertical")
get_profiles_investment_daily <- function(dt, analysis_period){ #-- reduce changes to only those, having changed status, "flatten" all other changes, when status is not changed setkey(dt, tech_date_start, profile_id) dt[, `:=` (status_prev = shift(status, type = "lag", fill = NA), min_investment_k_prev = shift(min_investment_k, type = "lag", fill = NA)), .(profile_id)] x3_dt <- dt[, .(profile_id, status, status_prev, tech_date_start, min_investment_k, min_investment_k_prev, min_investment_currency) ][status != status_prev | is.na(status_prev) | min_investment_k != min_investment_k_prev | (!is.na(min_investment_k) & is.na(min_investment_k_prev)),] #-- analysis window boundary_dates <- data.table(date = c(lubridate::int_start(analysis_period), lubridate::int_end(analysis_period))) boundary_dates[, `:=` (date = as.Date(date))] setkey(boundary_dates, date) #-- limit profile_id to only those, which had Active status, sometime within the analysis window setkey(x3_dt, tech_date_start) b1 <- boundary_dates[x3_dt, roll = -Inf] b2 <- x3_dt[, .SD[boundary_dates, roll = +Inf], .(profile_id)] b3 <- rbindlist(list(b1[, .(profile_id, status, min_investment_k, date)], b2[, .(profile_id, status, min_investment_k, date = tech_date_start)])) setkey(b3, date) b4 <- b3[, .SD[date %within% analysis_period & status == "Active", ], .(profile_id) ] b5 <- b4[, .N, .(profile_id)] setkey(b5, profile_id) #-- select only these profile_id, which have at least one day active within the period (the pupose of b5) setkey(x3_dt, profile_id) b6 <- x3_dt[b5][, .(profile_id, status, min_investment_k, min_investment_currency, date = tech_date_start)] #-- create dates sequence seq_dates <- seq.Date(from = as.Date(int_start(analysis_period)), to = as.Date(int_end(analysis_period)), by = "day") dates_dt <- data.table(date = seq_dates) setkey(dates_dt, date) setkey(b6, date) b7 <- b6[, .SD[dates_dt, roll = +Inf], .(profile_id)][, .(profile_id, status, min_investment_k, min_investment_currency, date)][!is.na(status)] return(b7) } get_investment_bins_daily <- function(dt){ bin_from <- 10 bin_to <- 100 bin_by <- 20 currencies <- prof_inv_dt[, unique(min_investment_currency)] dt[, `:=` (bins = cut(min_investment_k, breaks = c(-Inf, seq.int(from = bin_from, to = bin_to, by = bin_by), Inf), include.lowest = TRUE)), by = .(min_investment_currency, date)] dt[, `:=` (bins = fct_rev(bins))] dt[, `:=` (min_investment_currency = fct_reorder(.f = min_investment_currency, .x = n, .fun = sum) %>% fct_rev())] return(dt) } #-- test functions prof_raw_dt <- fd.dashboard::get_clean_profiles() analysis_period <- "2020-12-16" %--% today() prof_dt <- prof_raw_dt prof_dt[, `:=` (min_investment_k = min_investment / 1e3)] prof_inv_dt <- get_profiles_investment_daily(dt = prof_dt, analysis_period = analysis_period) prof_inv_dt[, `:=` (n = 1)] prof_inv_dt <- get_investment_bins_daily(prof_inv_dt) plot_01 <- prof_inv_dt[!is.na(min_investment_currency) & status == "Active", .N, .(date, bins, min_investment_currency)] %>% ggplot(aes(date, N, color = bins, fill = bins)) + geom_area(alpha = 0.5) + scale_color_viridis_d(option = "D") + scale_fill_viridis_d(option = "D") + facet_wrap("min_investment_currency", scales = "free_y") + theme_bw() + labs(title = "Number of Profiles, binned, per currency.", subtitle = stringr::str_glue("Only Active Profiles."), x = "", y = "") + theme(legend.position = "bottom") + guides(colour = guide_legend(nrow = 1)) setkey(prof_inv_dt, date) prof_inv_dt[, .N, .(min_investment_currency) ] prof_inv_dt[date == max(date) & !is.na(min_investment_currency), lapply(.SD, fivenum), .(min_investment_currency), .SDcols = "min_investment_k"] prof_inv_dt[date == max(date) & !is.na(min_investment_currency) & min_investment_k <= 100 & status == "Active", .(curr = min_investment_currency, min_investment_k)] %>% ggpubr::ggdensity(x = "min_investment_k", y = "..density..", color = "curr", fill = "curr", alpha = 0.3, add = "median") + theme_classic2()
#-- trim leads analysis_period <- "2020-12-01" %--% today() lead_dt <- lead_raw_dt[Date %within% analysis_period] prof_raw_dt[, .N, .(client_id, profile_id)][, .N, .(client_id)][N > 1] prof_raw_dt[, .N, .(client_id, profile_id)][, .N, .(profile_id)][N > 1] prof_raw_dt[profile_id == "0000015c-d713-d671-affd-d7bfba8c0000"] lead_raw_dt[profile_id == "0000015c-d713-d671-affd-d7bfba8c0000"] profile_x_id <- "0000015c-d713-d671-affd-d7bfe6fb0000" prof_raw_dt[profile_id %in% profile_x_id] lead_dt[profile_id %in% profile_x_id] x1 <- get_active_profiles_daily(dt = prof_dt, analysis_period = analysis_period)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.