#' Data preparation for the raw data in order to bring them in the format we want
#' @export
#' @param input is a dataset with three columns (customer_id, iri_week, trans_spend)
Data_prep <- function(input) {
library(dplyr)
library(sqldf)
library(data.table)
#input$cust <- as.numeric(input$cust)
colnames(input)[2] <- "date"
# Remove outliers ---------------------------------------------------------
input_outl <- input[ , .N, by = c('cust') ]
lower_bound <- quantile(input_outl$N, 0.025)
upper_bound <- quantile(input_outl$N, 0.975)
outlier_ind <- which(input_outl$N < lower_bound | input_outl$N > upper_bound)
input_outl <- input_outl[-which(rownames(input_outl) %in% outlier_ind),]
df_input= input %>% inner_join(input_outl, by = 'cust')
df_input <- subset(df_input, select = c(1:3))
# Aggregate Data weekly ---------------------------------------------------
input <- df_input[, .(spend=sum(spend)), by=list(cust, date)]
# Calculate average spend per customer ------------------------------------
avg_input_spend <- aggregate(input$spend, by=list(input$cust), FUN=mean)
colnames(avg_input_spend)[1] <- "cust"
colnames(avg_input_spend)[2] <- "spend"
input <- subset(input, select = c("cust","date"))
# Set the last week -------------------------------------------------------
input$T_cal <- max(input$date)
# Create freq, recency and total observ period ----------------------------
input_train <- sqldf("select cust, count(*)-1 as freq, (max(date) - min(date))/7 as recency, (T_cal - min(date))/7 as Tot_Obs from input group by cust", drv = 'SQLite')
input_train <- as.matrix(input_train)
return(list(input_train,avg_input_spend))
#return(input_train)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.