#-- 0.0 markdown options knitr::opts_chunk$set( echo = FALSE, eval = TRUE, message = FALSE, warning = FALSE, dpi = 300, tinytex.verbose = TRUE, tidy = FALSE, # cashe.extra = packageVersion('tufte'), fig.align = "center" )
#-- 1.0 libraries ---- # library(magrittr) # library(tidyverse) library(data.table) # library(tidyquant) # library(RMySQL) library(lubridate) # library(ISOweek) # library(MMWRweek) #-- 1.2 Libraries own ---- library(fd.dashboard) #-- 1.3 libraries graphical ---- library(ggpubr) library(knitr) library(ggExtra) library(viridis) library(tufte)
#-- 1.3. Settings ---- Sys.setenv(R_CONFIG_ACTIVE = "default") # use "default" for for production, "development" for prototyping config <- config::get(file = "../inst/golem-config.yml", use_parent = TRUE)
r newthought('The goal')
of this document is to prepare functions for the report 14 Investment Level.
lead_raw_dt <- fd.dashboard::get_clean_leads() lead_raw_dt %>% str() lead_mult_3_id <- lead_raw_dt[, .N, .(lead_status, lead_id)][, .N, .(lead_id)][N > 1][order(-N)][N == 3, lead_id] lead_raw_dt[lead_id %in% lead_mult_3_id[1], ] setkey(lead_raw_dt, tech_date_start) lead_latest_entry_dt <- lead_raw_dt[, .SD[.N], .(lead_id)] lead_latest_entry_dt[lead_id %in% lead_mult_3_id[1], ] period <- "2021-01-01" %--% (today() - lubridate::days(1)) period <- "2021-01-01" %--% today() lead_latest_entry_dt[Date %within% period, .N, .(lead_status, website_iso2c)][, dcast.data.table(.SD, lead_status ~ website_iso2c)] lead_latest_entry_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(lead_status, website_iso2c)][order(-N)][, lapply(.SD, sum), .SDcols = is.numeric] lead_latest_entry_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(Date)][order(Date)] %>% ggplot(aes(Date, N)) + geom_line() lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(website_iso2c, lead_status)][order(-N)] lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(Date)][order(Date)] %>% ggplot(aes(Date, N)) + geom_line() lead_raw_dt[Date %within% period, ][Date == max(Date) & website_iso2c %in% c("US"), ] period <- "2021-01-01" %--% (today() - lubridate::days(1)) lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(website_iso2c, lead_status)][order(-N)] lead_us_mult_id_dt <- lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(lead_status, lead_id)][, .N, .(lead_id)][N > 1][order(-N)] setkey(lead_raw_dt, lead_id) setkey(lead_us_mult_id_dt, lead_id) lead_id_mult_sent_dt <- lead_raw_dt[lead_us_mult_id_dt][lead_status == "Sent"][, .(lead_id)] setkey(lead_id_mult_sent_dt, lead_id) x1_dt <- lead_raw_dt[lead_id_mult_sent_dt][, .(lead_id, publish_date, lead_status, tech_date_start)] setkey(x1_dt, tech_date_start) x1_dt[, .(lead_status[3]), .(lead_id)][!is.na(V1)] x1_dt[, `:=` (n = 1)] x1_dt[, `:=` (step = cumsum(n)), .(lead_id)] x1_dt[, .N, .(step)] x1_dt[step == 18, ] x1_dt$step %<>% as.factor() plot_01 <- x1_dt[step %in% (c(1:2)), .(lead_id, lead_status, step)] %>% ggplot(aes(x = step, stratum = lead_status, alluvium = lead_id, fill = lead_status)) + geom_flow(stat = "alluvium") + geom_stratum() + scale_fill_viridis_d(option = "A") + geom_flow(stat = "alluvium", lode.guidance = "frontback", color = "lightgray") + theme_bw() + labs(subtitle = stringr::str_glue("leads with more than one status"), x = "", y = "") + theme(legend.position = "right") #-- all x2_dt <- lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .(lead_id, lead_status, tech_date_start)] x2_dt[, `:=` (n = 1)] x2_dt[, `:=` (step = cumsum(n)), .(lead_id)] x2_dt$step %<>% as.factor() x2_dt <- x2_dt[step %in% (c(1:2)), ] x2_dt <- x2_dt[, .(lead_id, lead_status, step, n)][, `:=` (lead_status = as.factor(lead_status) %>% fct_lump_prop(prop = 0.01))] x2_dt %>% str() x2_dt[, .N, .(lead_status)] plot_02 <- x2_dt[step %in% (c(1:2)), .(lead_id, lead_status, step)] %>% ggplot(aes(x = step, stratum = lead_status, alluvium = lead_id, fill = lead_status)) + geom_flow(stat = "alluvium") + geom_stratum() + scale_fill_viridis_d(option = "B") + geom_flow(stat = "alluvium", lode.guidance = "frontback", color = "lightgray") + theme_bw() + labs(title = "Status changes, January, US website", subtitle = stringr::str_glue("all leads"), x = "", y = "") + theme(legend.position = "right") + guides(fill = guide_legend(ncol = 1)) plot_03 <- ggpubr::ggarrange(plot_02, plot_01, nrow = 2, ncol = 1, common.legend = T, legend = "right") ggsave(filename = "Leads status.png", plot = plot_03, width = 12)
library(sunburstR) sequences <- read.csv( system.file("examples/visit-sequences.csv",package="sunburstR") ,header = FALSE ,stringsAsFactors = FALSE )[1:200,] # create a d2b sunburst sund2b(sequences) # show labels sund2b(sequences, showLabels = TRUE) #-- x4_dt <- x2_dt[step %in% (c(1:2)), .(lead_id, lead_status, step)][, dcast.data.table(.SD, lead_id ~ step, value.var = "lead_status")][!is.na(`2`)][, `:=` (seq = paste(`1`, `2`, sep = "-")), .(lead_id)][] x5_dt <- x4_dt[, .N, .(seq)] sund2b(x5_dt) #-- x6_dt <- lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .(lead_id, lead_status, tech_date_start)] x6_dt[, `:=` (n = 1)] x6_dt[, `:=` (step = cumsum(n)), .(lead_id)] x6_dt$step %<>% as.factor() x6_dt <- x6_dt[step %in% (c(1:2)), ] x7_dt <- x6_dt[, dcast.data.table(.SD, lead_id ~ step, value.var = "lead_status")][, `:=` (seq = fcase(is.na(`2`), `1`, !is.na(`2`), paste(`1`, `2`, sep = "-"))), .(lead_id)] x7_dt[, .N, .(seq)] %>% sund2b( ) ?sund2bTooltip() lead_2_id <- x6_dt[, .N, .(lead_id)][N == 2] setkey(lead_2_id, lead_id) setkey(x6_dt, lead_id) x7_dt <- x6_dt[lead_2_id] x7_dt[, seq := lapply(.SD, paste), .SDcols = c("lead_status"), .(lead_id)][] x7_dt[lead_id == "00000176-bc4a-d410-affe-bcdb7fc30000", (paste(lead_status, sep = "-"))] x7_dt[lead_id == "00000176-bc4a-d410-affe-bcdb7fc30000", (reduce(.x = lead_status, .f = paste, sep = "-"))] x7_dt[lead_id == "00000176-bc4a-d410-affe-bcdb7fc30000", .(seq = reduce(.x = lead_status, .f = paste, sep = "-"))] x6_dt[, .(seq = reduce(.x = lead_status, .f = paste, sep = "-")), .(lead_id)][, .N, .(seq)] %>% sund2b()
# Tue Feb 2 17:06:57 2021 ------------------------------ lead_raw_dt[month == "Jan" & year == 2021, .N, .(lead_status, lead_source)][, dcast.data.table(.SD, lead_status ~ lead_source)] lead_raw_dt[month == "Jan" & year == 2021, .N, .(lead_status, credited_lead)][, dcast.data.table(.SD, lead_status ~ credited_lead)][!is.na(true)] #-- charting credited leads dates_interval <- (today() - months(1)) %--% today() leads_dt <- lead_raw_dt[Date %within% dates_interval, ] #-- which lead_id have status Credited factor_cols <- c("lead_status", "profile_id", "lead_id", "client_id") leads_dt[, (factor_cols) := lapply(.SD, as.factor), .SDcols = factor_cols] leads_dt[, .SD, .SDcols = (colnames(leads_dt) %>% str_subset(pattern = "_id"))] %>% str() leads_credited_dt <- leads_dt[lead_status == "Credited Lead", .N, .(lead_id)][, .(lead_id)] #-- subset to leads, which were later on credited setkey(leads_dt, lead_id) setkey(leads_credited_dt, lead_id) leads_sent_credited_dt <- leads_dt[leads_credited_dt][lead_status == "Credited Lead"] leads_sent_credited_dt <- leads_sent_credited_dt[, `:=` (n = 1), ][!is.na(website_iso2c), ] leads_sent_credited_dt[, `:=` (website_iso2c = fct_reorder(website_iso2c, .x = n, .fun = sum, na.rm = T) %>% fct_rev())] month_selected <- "Jan" leads_sent_credited_dt[, .N, .(Date, web = website_iso2c)] %>% ggplot(aes(Date, N, color = web, fill = web)) + geom_col() + theme_bw() + scale_color_viridis_d(option = "A", alpha = 1) + scale_fill_viridis_d(option = "A", alpha = .6) + labs(title = glue::glue("Number of Credited Leads in {month_selected}."), y = "", x = "")
leads_dt[, .N, .(lead_source, lead_status)] leads_source_dt <- leads_dt[lead_status == "Sent", .N, .(Date, lead_source)][, `:=` (lead_source = as.factor(lead_source) %>% fct_reorder(.x = N, .fun = sum) %>% fct_rev())][] leads_source_dt$lead_source %>% levels() dates_range <- leads_source_dt[, range(Date)] dates_range[2] %<>% + days(20) lead_source_labels_dt <- leads_source_dt[Date < today() & lead_source != "null", {max_date = max(Date); .(N = .SD[Date == max_date, N], max_date)}, .(lead_source)][order(-N)] plot_01 <- leads_source_dt[Date < today() & lead_source != "null"] %>% ggplot(aes(Date, N, color = lead_source)) + geom_line() + geom_point(data = lead_source_labels_dt, mapping = aes(max_date, N, color = lead_source)) + theme_bw() + theme(legend.position = "none") + labs(title = glue::glue("Number of Leads by Source in {month_selected}."), y = "", x = "") plot_02 <- plot_01 + coord_cartesian(xlim = c(dates_range[1], dates_range[2])) + ggrepel::geom_text_repel(data = lead_source_labels_dt, mapping = aes(max_date, N, label = lead_source), nudge_x = 3, direction = "y", force = .5, segment.size = 0.2, hjust = 0, na.rm = T, label.padding = 100, point.padding = 0.5) #-- now facet by website leads_source_web_dt <- leads_dt[lead_status == "Sent", .N, .(Date, lead_source, website_iso2c) ][, `:=` (lead_source = as.factor(lead_source) %>% fct_reorder(.x = N, .fun = sum) %>% fct_rev(), website_iso2c = fct_reorder(.f = website_iso2c, .x = N, .fun = sum) %>% fct_rev())][] plot_11 <- leads_source_web_dt[Date < today() & lead_source != "null" & website_iso2c %in% c("US", "GB")] %>% ggplot(aes(Date, N, color = lead_source, fill = lead_source)) + geom_area(alpha = 0.5) + #geom_point(data = lead_source_labels_dt, mapping = aes(max_date, N, color = lead_source)) + theme_bw() + theme(legend.position = "none") + labs(title = glue::glue("Number of Leads by Source in {month_selected}."), y = "", x = "") + facet_wrap(facets = "website_iso2c", scales = "free_y") plot_11
#-- charting credited leads dates_interval <- (today() - months(1)) %--% today() leads_dt <- lead_raw_dt[Date %within% dates_interval, ] dt <- leads_dt #-- reduce changes to only those, having changed status, "flatten" all other changes, when status is not changed setkey(dt, tech_date_start, lead_id) dt[, `:=` (status_prev = shift(lead_status, type = "lag", fill = NA)), .(lead_id)] x3_dt <- dt[, .(lead_id, lead_status, status_prev, tech_date_start)][lead_status != status_prev | is.na(status_prev),] x3_dt[, .(mult = .N), .(lead_id)][, .N, .(mult)] x3_dt[, .(mult = .N), .(lead_id)][order(-mult)] x3_dt[lead_id == "00000176-cf64-d410-affe-efffaa060000"] x3_dt[lead_id == "00000176-d7a6-dbb8-a7fe-dff6fd1c0000"] #-- logic #-- 1. get lead_id which were credited factor_cols <- c("lead_status", "profile_id", "lead_id", "client_id") dt[, (factor_cols) := lapply(.SD, as.factor), .SDcols = factor_cols] dt[, .SD, .SDcols = (colnames(dt) %>% str_subset(pattern = "_id"))] %>% str() leads_credited_dt <- dt[lead_status == "Credited Lead", .N, .(lead_id)][, .(lead_id)] #-- 2. get earliest "Sent" status per each lead_id x4_sent_dt <- x3_dt[lead_status == "Sent", ] lead_id_mult_sent <- x4_sent_dt[, .N, .(lead_id)][N > 1][, lead_id] x4_sent_dt[lead_id %in% lead_id_mult_sent, ] setkey(x4_sent_dt, tech_date_start) x4_sent_dt[, .N, .(lead_id)][N > 1] x4_sent_dt[lead_id == "00000176-cf64-d410-affe-efffaa060000", ] dt[, .N, .(lead_status, lead_id)][, .N, .(lead_id)][N > 1][order(-N)] dt[lead_status != "Sent", .N, .(lead_status)] dt[lead_status == "Sent", .N, .(lead_id)][N >1] dt_1st_sent_dt <- dt[, .SD[1], .(lead_id)] dt_1st_sent2_dt <- dt[lead_status == "Sent", .SD[1], .(lead_id)] #-- 3. mark, if the lead have been credited with a flag setkey(dt_1st_sent_dt, lead_id) setkey(leads_credited_dt, lead_id) x4_dt <- rbindlist(list(dt_1st_sent_dt[!leads_credited_dt][, `:=` (credited = FALSE)], dt_1st_sent_dt[leads_credited_dt][, `:=` (credited = TRUE)])) x4_dt[, .N, .(credited_lead, credited)] x4_dt[credited_lead == "true" & credited == FALSE, ] x4_dt[, .N, .(credited, Date)][, dcast.data.table(.SD, Date ~ credited, fill = 0)] x4_dt[, .N, .(credited, Date)][, dcast.data.table(.SD, Date ~ credited, fill = 0)][, lapply(.SD, sum), .SDcols = is.integer] # Wed Feb 3 12:23:56 2021 ------------------------------ # - dokończyć budowanie funkcji # 34959 1st sent - 141 credited = 34818 # 34959 - 141
# Wed Feb 17 16:40:52 2021 ------------------------------ lead_raw_dt <- fd.dashboard::get_clean_leads() leads_sent <- mark_credited_leads(lead_raw_dt) leads_credited <- get_credited_leads(leads_sent) leads_credited[Date >= date("2021-01-01")]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.