Nothing
#' R6 Class representing a customer. Otherwise known as the CADF.
#'
#' Call Customer$new() to convert transactional data to CADF format
# Common ways that this class is used.
# 1.) Load transactional data. Make sure to format dates and only pull ID and purchase date
# If you are testing send a single customerid and data for a single customer to id_to_CADF.
# If your data contains multiple customer id's
# 2.) Split the data from 2 using the R split function
# 3.) Call -> split.transaction.file_to_CADF
#' @import utils
#' @import stats
#' @param ... All arguements in list
#' @export
#' @examples
#' library(CADF)
#' data("transactions")
#' customer <- subset(transactions, transactions$ID == 40)
#' today.study.cutoff <- max(customer$PURCHASE_DATE)
#' customer.40.CADF <- Customer$new(customer, today.study.cutoff)
#' @description
#' A short description...
#' @param df_customer description
#' @param today
Customer <- R6::R6Class(
"Customer",
public = list(
#' @field output Stores all information
#' in R format at the customer level.
output = NULL ,
#'@field payload Stores all computed customer information
#' in JSON format for integration into other systems.
#' This is not quite an API but designed
#' so that customer information can be imported to other
#' formats and systems.
payload = NULL,
#'@field data a data frame that stores purchase
#'information for a single customer. Input data
#'for various calculations in initialize (df_customer)
data = NULL,
#' @field id The customer id. This will be the same ID
#' as provided in the input transaction file.
id = NULL,
#' @field study_name A name to associate with the cohort study.
#' #The name can be whatever is easiest to associate with the set
#' of customer id and dates included in
#' the analysis.
study_name = "Customer Study",
#' @field study_begin_date Begin date of the customer study.
#' In theory this should be min(TRANSACTION_DATE)
#' for each customer in the dataset.
study_begin_date = NULL,
#' @field timing
#' Monthly timing computes T as months.
#' Most commonly utilized and is the default.
timing = NULL,
#' @field transaction_dates All transaction dates for the customer
transaction_dates = NULL,
#' @field transaction_months All YYYY_MM transaction dates
#' for the customer
transaction_months = NULL,
#' @field first_purchase_date First purchase date for the customer.
first_purchase_date = NULL,
#'@field last_purchase_date Last purchase date for the customer.
last_purchase_date = NULL,
#' #' @field repeat_customer repeat_customer
#' if the following conditions
#' are true. The customer has more
#' than one transaction. The second transaction
#' date is greater than the first transaction date.
repeat_customer = NULL ,
#' @field repeat_customer_by_day description
repeat_customer_by_day = NULL,
#' @field today today
today = NULL,
#' #' @field T a measure of time between first
#' date of activity and purchase.
T = NULL,
#' @field T_ss T_ss
T_ss = NULL,
T_custom = NULL,
#' @field transaction_range_complete shows a
#' consecutive sequence usually beginning at 1
transaction_range_complete = NULL,
#' @field purchase_count purchase count
purchase_count = NULL,
#' @field purchase_string description
purchase_string = NULL,
#' @field purchase_string_as_matrix purchase string as matrix
purchase_string_as_matrix = NULL,
#' @field recency_string_as_matrix recency string as matrix
recency_string_as_matrix = NULL,
#' @field Freq frequency count
Freq=NULL,
#' @field logistic_modeling_matrix Stores customer's
#' logistic modeling matrix. (One row for each time period
#' (T), 1 = purchase; 0 = no purchase)
logistic_modeling_matrix = NULL,
#' @field logistic_modeling_matrix_ss logistic_modeling_matrix_ss
logistic_modeling_matrix_ss = NULL,
#' @field logistic_modeling_matrix_custom logistic_modeling_matrix_custom
logistic_modeling_matrix_custom = NULL,
#' @field survival_modeling_matrix Stores customer's
#' modeling matrix for survival analysis. For survival analysis '1'
#' means that the customer has stopped being a customer.
#' '0' means that the customer is continuing to be a customer.
survival_modeling_matrix = NULL,
#' @field survival_modeling_matrix_ss survival_modeling_matrix_ss
survival_modeling_matrix_ss = NULL,
#' @field survival_modeling_matrix_custom survival_modeling_matrix_custom
survival_modeling_matrix_custom = NULL,
#' @description
#' Creates a CADF profile for a given customer
#' based on the input transactional data
#' usually an R list
#' @return A new `Customer` object. Converted transactional data to CADF format.
#' To access cadf[[1]], etc...
#' Represents customer data (for a particular id) in the "CADF" format
initialize = function(df_customer = NA, today = NA) {
stopifnot(CADF::qc_transactional_data(df_customer) == 2)
# figure out how to handle the today arguement if it is missing
#' df_customer$Tdays df_customer data frame column:
#' to compute "days from first purchase"
df_customer$Tdays <- ceiling(as.numeric(df_customer[[2]]
- min(df_customer[[2]])))
#' df_customer$month_yr date converted to YYYY_MM format
df_customer$month_yr <- format(as.Date(df_customer[[2]]), "%Y-%m")
#' df_customer$Tmonths Number of months between purchase date
#' and first purchase date. Rounded up to nearest month
df_customer$Tmonths <- ceiling(as.numeric(df_customer[[2]]
- min(df_customer[[2]])) / 30)
#to compute "weeks from first purchase
df_customer$yr_week = paste(format(as.Date(df_customer[[2]]), "%Y") ,
"_" ,
collapse = format(as.Date(df_customer[[2]]), "%V"),
sep ="")
df_customer$Tweeks <- as.numeric(df_customer[[2]] - min(df_customer[[2]])) / 7
#migration (non-contractual) modeling (last row fed to modeling dataset)
df_customer$today <- today #separate holdout and analysis dataset
df_customer$diff.years <- (df_customer$today - df_customer$PURCHASE_DATE) / 365.25
df_customer$target.buy <- ifelse(df_customer$diff.years < 0, 1, 0) #1 means holdout period
#df_customer$target.buy <- factor(df_customer$target.buy, levels = c("1", "0"))
df_customer$Frequency.baseperiod <- nrow(df_customer[df_customer$target.buy ==0,]) ## holdout frequency, ## target frequency
df_customer$Frequency.holdout <- nrow(df_customer[df_customer$target.buy == 1,])
#btyd modeling (weeks)
df_customer$x <- df_customer$Frequency.baseperiod
df_customer$t.x <- (df_customer$today - df_customer$PURCHASE_DATE) / 12
df_customer$T.cal <- as.numeric(today - min(df_customer$PURCHASE_DATE)) / 7
minpositive = function(x) min(x[x > 0])
df_customer$recency.studyperiod.years <- minpositive(df_customer$diff.years)
#sort for output
df_customer <- df_customer[order(df_customer[[2]]), ]
df_customer$purchase.num <- 1:nrow(df_customer)
#' id the customerid which identifies the customer
#' in the CADF class.
self$id = min(df_customer[, 1])
#' transaction_dates All unique transaction
#' dates for customer
self$transaction_dates <- unique(df_customer[, 2])
#'All unique YYYY_MM combinations
#' for customer transactions. This is used for building purchase strings.
self$transaction_months <- unique(df_customer$month_yr)
self$first_purchase_date <- min(df_customer[, 2])
self$last_purchase_date <- max(df_customer[, 2])
#' @field repeat_customer This can be used to filter out
#' repeat customers from analysis. Repeat customer
#' based on YYYY_MM. (Customer with only two purchases
#' in January would not be a repeat customer)
self$repeat_customer <-
ifelse(length(unique(self$transaction_months)) > 1, "Y", "N")
#' however it's by day instead of YYYY_MM.
self$repeat_customer_by_day <-
ifelse(length(unique(self$transaction_dates)) > 1, "Y", "N")
self$transaction_dates = format(self$transaction_dates, "%Y_%m")
self$today = today
self$purchase_count = length(unique(self$transaction_dates)) #monthly count
#' PURCHASE STRINGS
#' purchase_string Utilizes the 'create.purchase.string'
#' function to create a purchase string. "1" if
#' purchase was made during the
#' purchase period; "0" otherwise.
#' No special rules are applied and the purchase string
#' reflects true purchase history.
#' df_customer: data frame for single customer, id column,
#' purchase date column
self$purchase_string <-
create.purchase.string(df_customer, names(df_customer[1]),
names(df_customer[2]))
self$purchase_string_as_matrix <-
create.purchase.string(df_customer, names(df_customer[1]),
names(df_customer[2]), return.mode = "matrix")
self$recency_string_as_matrix <- create.recency.string(self$purchase_string_as_matrix)
self$Freq <- nchar(self$purchase_stringF) #number of purchase periods
self$transaction_range_complete <- nchar(self$purchase_string)
#' @field T T is a cancellation time. CADF offers
#' different ways to estimate the cancellation time
#' strict_quitter: Customer leaves after first period
#' of inactivity. Example purchase string 11001. T=3
#' strict_stayer: T is the last period of transaction in
#' the purchase string. 11001. T=5
#' As T becomes longer strict_quitter will have a tendancy
#' to underestimate retention. Strict_stayer will have a
#' tendancey to overestimate
#' If you know your customers come and go at free
#' will you can utilize a Migration model or choose
#' T between strict quitter and strict stayer
#'
self$T <- ps_to_T_strict_quitter(self$purchase_string)
#' T_ss T_ss
self$T_ss <- ps_to_T_strict_stayer(self$purchase_string)
#' @field T_custom T_custom
self$T_custom <- ps_to_T_custom(self$purchase_string)
#' logistic_modeling_matrix Stores rows for
#' the customer that contribute to a logistic
#' modeling matrix.
#' Assumes strict/perm cancellations.
#' Customer relationship starts at time 1 and
#' ends at time N (with perm cancellation and no pauses in between)
#' This is usually known as a contractual relationship
self$logistic_modeling_matrix <- f_CustomerModelingMatrix(self$T)
#' logistic_modeling_matrix_sc Assumes strict stayer assumption
self$logistic_modeling_matrix_ss <- f_CustomerModelingMatrix(self$T_ss)
#' $field logistic_modeling_matrix_custom
self$logistic_modeling_matrix_custom <- f_CustomerModelingMatrix(self$T_custom)
### This flips the 0/1 from the logistic modeling matrices
#' survival_modeling_matrix Stores rows for
#' the customer that contribute to a survival
#' modeling matrix.
self$survival_modeling_matrix <-
f_CustomerSurvivalModelingMatrix(self$T)
self$survival_modeling_matrix_ss <-
f_CustomerSurvivalModelingMatrix(self$T_ss)
#' $field logistic_modeling_matrix_custom
self$survival_modeling_matrix_custom <-
f_CustomerSurvivalModelingMatrix(self$T_custom)
#' cleanup and data storage
#'empty working df_customer data frame
#'and place the result in the class, name it 'data'
self$data <- df_customer
df_customer <- NULL
self$output <- list(c(self$global, self$monthly))
self$payload <- jsonlite::toJSON(self$output)
}
)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.