#-- 0.0 markdown options
knitr::opts_chunk$set(
    echo = FALSE,
    eval = TRUE,
    message = FALSE,
    warning = FALSE,
    dpi = 300,
    tinytex.verbose = TRUE,
    tidy = FALSE, 
#    cashe.extra = packageVersion('tufte'),
    fig.align = "center"
    )
#-- 1.0 libraries ----
# library(magrittr)
# library(tidyverse)
library(data.table)
# library(tidyquant)
# library(RMySQL)
library(lubridate)
# library(ISOweek)
# library(MMWRweek)

#-- 1.2 Libraries own ----
library(fd.dashboard)

#-- 1.3 libraries graphical ----
library(ggpubr)
library(knitr)
library(ggExtra)
library(viridis)
library(tufte)
#-- 1.3. Settings ----
Sys.setenv(R_CONFIG_ACTIVE = "default") # use "default" for for production, "development" for prototyping
config <- config::get(file = "../inst/golem-config.yml", use_parent = TRUE)

Purpose

r newthought('The goal') of this document is to prepare functions for the report 14 Investment Level.

lead_raw_dt <- fd.dashboard::get_clean_leads()
lead_raw_dt %>% str()

lead_mult_3_id <- lead_raw_dt[, .N, .(lead_status, lead_id)][, .N, .(lead_id)][N > 1][order(-N)][N == 3, lead_id]
lead_raw_dt[lead_id %in% lead_mult_3_id[1], ]

setkey(lead_raw_dt, tech_date_start)
lead_latest_entry_dt <- lead_raw_dt[, .SD[.N], .(lead_id)]
lead_latest_entry_dt[lead_id %in% lead_mult_3_id[1], ]

period <- "2021-01-01" %--% (today() - lubridate::days(1))
period <- "2021-01-01" %--% today()

lead_latest_entry_dt[Date %within% period, .N, .(lead_status, website_iso2c)][, dcast.data.table(.SD, lead_status ~ website_iso2c)]
lead_latest_entry_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(lead_status, website_iso2c)][order(-N)][, lapply(.SD, sum), .SDcols = is.numeric]
lead_latest_entry_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(Date)][order(Date)] %>%
    ggplot(aes(Date, N)) +
    geom_line()


lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(website_iso2c, lead_status)][order(-N)]
lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(Date)][order(Date)] %>%
    ggplot(aes(Date, N)) +
    geom_line()

lead_raw_dt[Date %within% period, ][Date == max(Date) & website_iso2c %in% c("US"), ]

period <- "2021-01-01" %--% (today() - lubridate::days(1))
lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(website_iso2c, lead_status)][order(-N)]
lead_us_mult_id_dt <- lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .N, .(lead_status, lead_id)][, .N, .(lead_id)][N > 1][order(-N)]

setkey(lead_raw_dt, lead_id)
setkey(lead_us_mult_id_dt, lead_id)

lead_id_mult_sent_dt <- lead_raw_dt[lead_us_mult_id_dt][lead_status == "Sent"][, .(lead_id)]
setkey(lead_id_mult_sent_dt, lead_id)

x1_dt <- lead_raw_dt[lead_id_mult_sent_dt][, .(lead_id, publish_date, lead_status, tech_date_start)]
setkey(x1_dt, tech_date_start)

x1_dt[, .(lead_status[3]), .(lead_id)][!is.na(V1)]

x1_dt[, `:=` (n = 1)]
x1_dt[, `:=` (step = cumsum(n)), .(lead_id)]
x1_dt[, .N, .(step)]
x1_dt[step == 18, ]


x1_dt$step %<>% as.factor()
plot_01 <- x1_dt[step %in% (c(1:2)), .(lead_id, lead_status, step)] %>%
    ggplot(aes(x = step, stratum = lead_status, alluvium = lead_id, fill = lead_status)) +
    geom_flow(stat = "alluvium") +
    geom_stratum() +
    scale_fill_viridis_d(option = "A") +
    geom_flow(stat = "alluvium", lode.guidance = "frontback",
            color = "lightgray") +
    theme_bw() +
    labs(subtitle = stringr::str_glue("leads with more than one status"), x = "", y = "") +
    theme(legend.position = "right")




#-- all
x2_dt <- lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .(lead_id, lead_status, tech_date_start)]
x2_dt[, `:=` (n = 1)]
x2_dt[, `:=` (step = cumsum(n)), .(lead_id)]
x2_dt$step %<>% as.factor()
x2_dt <- x2_dt[step %in% (c(1:2)), ]

x2_dt <- x2_dt[, .(lead_id, lead_status, step, n)][, `:=` (lead_status = as.factor(lead_status) %>% fct_lump_prop(prop = 0.01))]
x2_dt %>% str()
x2_dt[, .N, .(lead_status)]


plot_02 <- x2_dt[step %in% (c(1:2)), .(lead_id, lead_status, step)] %>%
    ggplot(aes(x = step, stratum = lead_status, alluvium = lead_id, fill = lead_status)) +
    geom_flow(stat = "alluvium") +
    geom_stratum() +
    scale_fill_viridis_d(option = "B") +
    geom_flow(stat = "alluvium", lode.guidance = "frontback",
            color = "lightgray") +
    theme_bw() +
    labs(title = "Status changes, January, US website", subtitle = stringr::str_glue("all leads"), x = "", y = "") +
    theme(legend.position = "right") +
    guides(fill = guide_legend(ncol = 1))


plot_03 <- ggpubr::ggarrange(plot_02, plot_01, nrow = 2, ncol = 1, common.legend = T, legend = "right") 
ggsave(filename = "Leads status.png", plot = plot_03, width = 12)
library(sunburstR)
sequences <- read.csv(
    system.file("examples/visit-sequences.csv",package="sunburstR")
    ,header = FALSE
    ,stringsAsFactors = FALSE
  )[1:200,]

# create a d2b sunburst
  sund2b(sequences)

  # show labels
  sund2b(sequences, showLabels = TRUE)


#--
x4_dt <- x2_dt[step %in% (c(1:2)), .(lead_id, lead_status, step)][, dcast.data.table(.SD, lead_id ~ step, value.var = "lead_status")][!is.na(`2`)][, `:=` (seq = paste(`1`, `2`, sep = "-")), .(lead_id)][]

x5_dt <- x4_dt[, .N, .(seq)]

sund2b(x5_dt)

#--
x6_dt <- lead_raw_dt[Date %within% period & website_iso2c %in% c("US"), .(lead_id, lead_status, tech_date_start)]
x6_dt[, `:=` (n = 1)]
x6_dt[, `:=` (step = cumsum(n)), .(lead_id)]
x6_dt$step %<>% as.factor()
x6_dt <- x6_dt[step %in% (c(1:2)), ]

x7_dt <- x6_dt[, dcast.data.table(.SD, lead_id ~ step, value.var = "lead_status")][, `:=` (seq = fcase(is.na(`2`), `1`, 
                                                                                                       !is.na(`2`), paste(`1`, `2`, sep = "-"))), .(lead_id)]


x7_dt[, .N, .(seq)] %>% sund2b( )

?sund2bTooltip()

lead_2_id <- x6_dt[, .N, .(lead_id)][N == 2]
setkey(lead_2_id, lead_id)
setkey(x6_dt, lead_id)

x7_dt <- x6_dt[lead_2_id]
x7_dt[, seq := lapply(.SD,  paste), .SDcols = c("lead_status"), .(lead_id)][]
x7_dt[lead_id == "00000176-bc4a-d410-affe-bcdb7fc30000", (paste(lead_status, sep = "-"))]
x7_dt[lead_id == "00000176-bc4a-d410-affe-bcdb7fc30000", (reduce(.x = lead_status, .f = paste, sep = "-"))]
x7_dt[lead_id == "00000176-bc4a-d410-affe-bcdb7fc30000", .(seq = reduce(.x = lead_status, .f = paste, sep = "-"))]

x6_dt[, .(seq = reduce(.x = lead_status, .f = paste, sep = "-")), .(lead_id)][, .N, .(seq)] %>%
    sund2b()
# Tue Feb  2 17:06:57 2021 ------------------------------
lead_raw_dt[month == "Jan" & year == 2021, .N, .(lead_status, lead_source)][, dcast.data.table(.SD, lead_status ~ lead_source)]
lead_raw_dt[month == "Jan" & year == 2021, .N, .(lead_status, credited_lead)][, dcast.data.table(.SD, lead_status ~ credited_lead)][!is.na(true)]

#-- charting credited leads
dates_interval <- (today() - months(1)) %--% today()
leads_dt <- lead_raw_dt[Date %within% dates_interval, ]

#-- which lead_id have status Credited
factor_cols <- c("lead_status", "profile_id", "lead_id", "client_id")
leads_dt[, (factor_cols) := lapply(.SD, as.factor), .SDcols = factor_cols]
leads_dt[, .SD, .SDcols = (colnames(leads_dt) %>% str_subset(pattern = "_id"))] %>% str()
leads_credited_dt <- leads_dt[lead_status == "Credited Lead", .N, .(lead_id)][, .(lead_id)]

#-- subset to leads, which were later on credited
setkey(leads_dt, lead_id)
setkey(leads_credited_dt, lead_id)
leads_sent_credited_dt <- leads_dt[leads_credited_dt][lead_status == "Credited Lead"]

leads_sent_credited_dt <- leads_sent_credited_dt[, `:=` (n = 1), ][!is.na(website_iso2c), ]
leads_sent_credited_dt[, `:=` (website_iso2c = fct_reorder(website_iso2c, .x = n, .fun = sum, na.rm = T) %>% fct_rev())]

month_selected <- "Jan"
leads_sent_credited_dt[, .N, .(Date, web = website_iso2c)] %>%
  ggplot(aes(Date, N, color = web, fill = web)) +
  geom_col() +
  theme_bw() +
  scale_color_viridis_d(option = "A", alpha = 1) +
  scale_fill_viridis_d(option = "A", alpha = .6) +
  labs(title = glue::glue("Number of Credited Leads in {month_selected}."), y = "", x = "")
leads_dt[, .N, .(lead_source, lead_status)]

leads_source_dt <- leads_dt[lead_status == "Sent", .N, .(Date, lead_source)][, `:=` (lead_source = as.factor(lead_source) %>% fct_reorder(.x = N, .fun = sum) %>% fct_rev())][] 

leads_source_dt$lead_source %>% levels()

dates_range <- leads_source_dt[, range(Date)]
dates_range[2] %<>% + days(20)

lead_source_labels_dt <- leads_source_dt[Date < today() & lead_source != "null", {max_date = max(Date); .(N = .SD[Date == max_date, N], max_date)}, .(lead_source)][order(-N)]

plot_01 <- leads_source_dt[Date < today() & lead_source != "null"] %>%
  ggplot(aes(Date, N, color = lead_source)) +
  geom_line() + 
  geom_point(data = lead_source_labels_dt, mapping = aes(max_date, N, color = lead_source)) +
  theme_bw() +
  theme(legend.position = "none") +
  labs(title = glue::glue("Number of Leads by Source in {month_selected}."), y = "", x = "")


plot_02 <- plot_01 +
  coord_cartesian(xlim = c(dates_range[1], dates_range[2])) +
  ggrepel::geom_text_repel(data = lead_source_labels_dt, mapping = aes(max_date, N, label = lead_source), 
                           nudge_x = 3, direction = "y", force = .5, segment.size = 0.2, hjust = 0, na.rm = T,
                           label.padding = 100, point.padding = 0.5)

#-- now facet by website
leads_source_web_dt <- leads_dt[lead_status == "Sent", .N, .(Date, lead_source, website_iso2c)
                                ][, `:=` (lead_source = as.factor(lead_source) %>% fct_reorder(.x = N, .fun = sum) %>% fct_rev(),
                                          website_iso2c = fct_reorder(.f = website_iso2c, .x = N, .fun = sum) %>% fct_rev())][] 

plot_11 <- leads_source_web_dt[Date < today() & lead_source != "null" & website_iso2c %in% c("US", "GB")] %>%
  ggplot(aes(Date, N, color = lead_source, fill = lead_source)) +
  geom_area(alpha = 0.5) + 
  #geom_point(data = lead_source_labels_dt, mapping = aes(max_date, N, color = lead_source)) +
  theme_bw() +
  theme(legend.position = "none") +
  labs(title = glue::glue("Number of Leads by Source in {month_selected}."), y = "", x = "") +
  facet_wrap(facets = "website_iso2c", scales = "free_y")

plot_11
#-- charting credited leads
dates_interval <- (today() - months(1)) %--% today()
leads_dt <- lead_raw_dt[Date %within% dates_interval, ]
dt <- leads_dt


       #-- reduce changes to only those, having changed status, "flatten" all other changes, when status is not changed
        setkey(dt, tech_date_start, lead_id)
        dt[, `:=` (status_prev = shift(lead_status, type = "lag", fill = NA)), .(lead_id)]
        x3_dt <- dt[, .(lead_id, lead_status, status_prev, tech_date_start)][lead_status != status_prev | is.na(status_prev),]

x3_dt[, .(mult = .N), .(lead_id)][, .N, .(mult)]
x3_dt[, .(mult = .N), .(lead_id)][order(-mult)]
x3_dt[lead_id == "00000176-cf64-d410-affe-efffaa060000"]
x3_dt[lead_id == "00000176-d7a6-dbb8-a7fe-dff6fd1c0000"]

#-- logic
#-- 1. get lead_id which were credited
factor_cols <- c("lead_status", "profile_id", "lead_id", "client_id")
dt[, (factor_cols) := lapply(.SD, as.factor), .SDcols = factor_cols]
dt[, .SD, .SDcols = (colnames(dt) %>% str_subset(pattern = "_id"))] %>% str()
leads_credited_dt <- dt[lead_status == "Credited Lead", .N, .(lead_id)][, .(lead_id)]

#-- 2. get earliest "Sent" status per each lead_id
x4_sent_dt <- x3_dt[lead_status == "Sent", ]
lead_id_mult_sent <- x4_sent_dt[, .N, .(lead_id)][N > 1][, lead_id]
x4_sent_dt[lead_id %in% lead_id_mult_sent, ]

setkey(x4_sent_dt, tech_date_start)
x4_sent_dt[, .N, .(lead_id)][N > 1]
x4_sent_dt[lead_id == "00000176-cf64-d410-affe-efffaa060000", ]


dt[, .N, .(lead_status, lead_id)][, .N, .(lead_id)][N > 1][order(-N)]
dt[lead_status != "Sent", .N, .(lead_status)]
dt[lead_status == "Sent", .N, .(lead_id)][N >1]

dt_1st_sent_dt <- dt[, .SD[1], .(lead_id)]
dt_1st_sent2_dt <- dt[lead_status == "Sent", .SD[1], .(lead_id)]

#-- 3. mark, if the lead have been credited with a flag
setkey(dt_1st_sent_dt, lead_id)
setkey(leads_credited_dt, lead_id)

x4_dt <- rbindlist(list(dt_1st_sent_dt[!leads_credited_dt][, `:=` (credited = FALSE)],
               dt_1st_sent_dt[leads_credited_dt][, `:=` (credited = TRUE)]))


x4_dt[, .N, .(credited_lead, credited)]
x4_dt[credited_lead == "true" & credited == FALSE, ]
x4_dt[, .N, .(credited, Date)][, dcast.data.table(.SD, Date ~ credited, fill = 0)]
x4_dt[, .N, .(credited, Date)][, dcast.data.table(.SD, Date ~ credited, fill = 0)][, lapply(.SD, sum), .SDcols = is.integer]

# Wed Feb  3 12:23:56 2021 ------------------------------
# - dokończyć budowanie funkcji
# 34959 1st sent - 141 credited = 34818
# 34959 - 141
# Wed Feb 17 16:40:52 2021 ------------------------------
lead_raw_dt <- fd.dashboard::get_clean_leads()

leads_sent <- mark_credited_leads(lead_raw_dt)

leads_credited <- get_credited_leads(leads_sent)
leads_credited[Date >= date("2021-01-01")]


piotrgruszecki/fd.dashboard documentation built on March 21, 2021, 6:16 p.m.