require(BTYD)
require(data.table)
require(modelpipe)
require(lubridate)
require(rquery)
require(rqdatatable)
# params ------------------------------------------------------------------
cal_period <- as.Date("1997-09-30")
# data load ---------------------------------------------------------------
cdnowElog <- system.file("data/cdnowElog.csv", package = "BTYD")
df <- fread(cdnowElog)
df$date <- ymd(df$date)
setnames(df, "sampleid", "custid")
# Functions ---------------------------------------------------------------
calc_days_diff <- function(x, x_1){
out <- difftime(x, x_1, units = "days") %>% as.numeric()
return(out)
}
itt_sd <- function(x, x_1){
n <- calc_days_diff(x, x_1)
n <- n[!is.na(n)]
sd_out <- sum((n - mean(n))^2) / (length(n)-1)
return(sd_out^(1/2))
}
create_cal_frame <- function(cust_id, sales, date, cal_period){
df <- data.table(cust_id, sales, date)
testit::assert("Date entry is not a date.", is.Date(date))
testit::assert("Sales is not numeric.", is.numeric(sales))
testit::assert("Missing values in data.", nrow(df) == nrow(df[complete.cases(df), ]))
df <- df %.>%
rquery::project(.,
sales %:=% sum(sales),
groupby = c("cust_id", "date"))
#summarise calibration data
df_cal <- df[df$date <= cal_period, ]
df_cal <- df_cal %.>%
rquery::project(.,
total_sales = sum(sales),
mean_sales = mean(sales),
trans_cnt = length(date),
days_last = calc_days_diff(.(cal_period), max(date)),
itt_sd = ifelse(length(sales) <= 2, -1, itt_sd(date, shift(date, 1))),
groupby = "cust_id")
#modeling period
df_mdl <- df[df$date > cal_period, ]
df_mdl <- df_mdl %.>%
rquery::project(.,
fut_star = sum(sales),
fut_trans = length(date),
groupby = "cust_id")
df_out <- merge(df_cal, df_mdl, all.x = T, by = "cust_id")
df_out[is.na(df_out)] <- 0
return(df_out)
}
# Data manipulation -------------------------------------------------------
df_cal <- create_cal_frame(df$custid, df$sales, df$date, cal_period)
df_cal <- split_data(df_cal)
train <- df_cal$df_train
train$target <- train$fut_trans
train_sales <- train$fut_star
train <- train[,c("fut_trans", "fut_star") := NULL]
test <- df_cal$df_test
test$target <- test$fut_trans
test_sales <- test$fut_star
test <- test[,c("fut_trans", "fut_star") := NULL]
mdl <- xgb_pois(train,
test)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.