Data_prep: Several data transformation in order to bring it in the...

Usage Arguments Examples

View source: R/Data_prep.R

Usage

1
Data_prep(input)

Arguments

input

Examples

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (input)
{
    input$cust <- as.numeric(input$cust)
    colnames(input)[2] <- "date"
    input_outl <- input[, .N, by = c("cust")]
    lower_bound <- quantile(input_outl$N, 0.025)
    upper_bound <- quantile(input_outl$N, 0.975)
    outlier_ind <- which(input_outl$N < lower_bound | input_outl$N >
        upper_bound)
    input_outl <- input_outl[-which(rownames(input_outl) %in%
        outlier_ind), ]
    df_input = input %>% inner_join(input_outl, by = "cust")
    df_input <- subset(df_input, select = c(1:3))
    input <- df_input[, .(spend = sum(spend)), by = list(cust,
        date)]
    avg_input_spend <- aggregate(input$spend, by = list(input$cust),
        FUN = mean)
    colnames(avg_input_spend)[1] <- "cust"
    colnames(avg_input_spend)[2] <- "spend"
    input <- subset(input, select = c("cust", "date"))
    input$T_cal <- max(input$date)
    input_train <- sqldf("select cust, count(*)-1 as freq, (max(date) - min(date))/7 as recency, (T_cal - min(date))/7 as Tot_Obs from input group by cust",
        drv = "SQLite")
    input_train <- as.matrix(input_train)
    return(list(input_train, avg_input_spend))
  }

uogss/CLV documentation built on Jan. 29, 2021, 1:50 p.m.