inst/doc/BTYD-walkthrough.R

## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
concordance=TRUE
)

## ----fig.path="", label="pnbdCalibrationFit", results="hide", echo=FALSE, include=FALSE----
library(knitr)
opts_chunk$set(comment="#")
library(BTYD)
# Set the hardie parameter value here, apply it everywhere
allHardie <- TRUE
data(cdnowSummary)
est.params <- cdnowSummary$est.params
cal.cbs <- cdnowSummary$cbs
pdf(file = 'pnbdCalibrationFit.pdf')
cal.fit <- pnbd.PlotFrequencyInCalibration(params = est.params, 
                                           cal.cbs = cal.cbs, 
                                           censor = 7, 
                                           hardie = allHardie)
dev.off()

## ----message=FALSE, tidy=FALSE------------------------------------------------
cdnowElog <- system.file("data/cdnowElog.csv", package = "BTYD")
elog <- dc.ReadLines(cdnowElog, cust.idx = 2,
                     date.idx = 3, sales.idx = 5)
elog[1:3,]

## ----message=FALSE------------------------------------------------------------
elog$date <- as.Date(elog$date, "%Y%m%d")
elog[1:3,]

## ----results="hide", message=FALSE--------------------------------------------
elog <- dc.MergeTransactionsOnSameDate(elog)

## ----message=FALSE------------------------------------------------------------
end.of.cal.period <- as.Date("1997-09-30")
elog.cal <- elog[which(elog$date <= end.of.cal.period), ]

## ----results="hide", message=FALSE--------------------------------------------
split.data <- dc.SplitUpElogForRepeatTrans(elog.cal)
clean.elog <- split.data$repeat.trans.elog

## ----message=FALSE------------------------------------------------------------
freq.cbt <- dc.CreateFreqCBT(clean.elog)
freq.cbt[1:3,1:5]

## ----results="hide", message=FALSE--------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
cal.cbt <- dc.MergeCustomers(tot.cbt, freq.cbt)

## ----tidy=FALSE, results="hide", message=FALSE--------------------------------
birth.periods <- split.data$cust.data$birth.per
last.dates <- split.data$cust.data$last.date
cal.cbs.dates <- data.frame(birth.periods, last.dates, 
                            end.of.cal.period)
cal.cbs <- dc.BuildCBSFromCBTAndDates(cal.cbt, cal.cbs.dates, 
                                      per="week")

## ----warning=FALSE------------------------------------------------------------
params <- pnbd.EstimateParameters(cal.cbs = cal.cbs, 
                                  hardie = allHardie)
round(params, digits = 3)
LL <- pnbd.cbs.LL(params = params, 
                  cal.cbs = cal.cbs, 
                  hardie = allHardie)
LL

## -----------------------------------------------------------------------------
p.matrix <- c(params, LL)
for (i in 1:2){
  params <- pnbd.EstimateParameters(cal.cbs = cal.cbs, 
                                    par.start = params, 
                                    hardie = allHardie)
  LL <- pnbd.cbs.LL(params = params, 
                    cal.cbs = cal.cbs, 
                    hardie = allHardie)
  p.matrix.row <- c(params, LL)
  p.matrix <- rbind(p.matrix, p.matrix.row)
}
colnames(p.matrix) <- c("r", "alpha", "s", "beta", "LL")
rownames(p.matrix) <- 1:3
round(p.matrix, digits = 3)

## ----fig.path="", label="pnbdTransactionHeterogeneity", results="hide", include=FALSE----
pdf(file = 'pnbdTransactionHeterogeneity.pdf')
pnbd.PlotTransactionRateHeterogeneity(params = params)
dev.off()

## ----fig.path="", label="pnbdDropoutHeterogeneity", results="hide", include=FALSE----
pdf(file = 'pnbdDropoutHeterogeneity.pdf')
pnbd.PlotDropoutRateHeterogeneity(params = params)
dev.off()

## -----------------------------------------------------------------------------
pnbd.Expectation(params = params, t = 52)

## ----tidy=FALSE---------------------------------------------------------------
cal.cbs["1516",]
x <- cal.cbs["1516", "x"]
t.x <- cal.cbs["1516", "t.x"]
T.cal <- cal.cbs["1516", "T.cal"]
pnbd.ConditionalExpectedTransactions(params, 
                                     T.star = 52, 
                                     x, 
                                     t.x, 
                                     T.cal, 
                                     hardie = allHardie)
pnbd.PAlive(params, 
            x, 
            t.x, 
            T.cal, 
            hardie = allHardie)

## ----tidy=FALSE---------------------------------------------------------------
# avoid overflow in LaTeX code block here:
cet <- "pnbd.ConditionalExpectedTransactions"
for (i in seq(10, 25, 5)){
  cond.expectation <- match.fun(cet)(params, 
                                     T.star = 52, 
                                     x = i, 
                                     t.x = 20, 
                                     T.cal = 39, 
                                     hardie = allHardie)
  cat ("x:",i,"\t Expectation:",cond.expectation, fill = TRUE)
}

## ----results="hide", eval=FALSE-----------------------------------------------
#  pnbd.PlotFrequencyInCalibration(params = params,
#                                  cal.cbs = cal.cbs,
#                                  censor = 7,
#                                  hardie = allHardie)

## ----message=FALSE------------------------------------------------------------
elog <- dc.SplitUpElogForRepeatTrans(elog)$repeat.trans.elog
x.star <- rep(0, nrow(cal.cbs))
cal.cbs <- cbind(cal.cbs, x.star)
elog.custs <- elog$cust
for (i in 1:nrow(cal.cbs)){
  current.cust <- rownames(cal.cbs)[i]
  tot.cust.trans <- length(which(elog.custs == current.cust))
  cal.trans <- cal.cbs[i, "x"]
  cal.cbs[i, "x.star"] <-  tot.cust.trans - cal.trans
}
round(cal.cbs[1:3,], digits = 3)

## ----fig.path="", label="pnbdCondExpComp", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
T.star <- 39 # length of the holdout period
censor <- 7  # This censor serves the same purpose described above
x.star <- cal.cbs[,"x.star"]
pdf(file = 'pnbdCondExpComp.pdf')
comp <- pnbd.PlotFreqVsConditionalExpectedFrequency(params, 
                                                    T.star, 
                                                    cal.cbs, 
                                                    x.star, 
                                                    censor, 
                                                    hardie = allHardie)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
round(comp, digits = 3)

## -----------------------------------------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
d.track.data <- rep(0, 7 * 78)
origin <- as.Date("1997-01-01")
for (i in colnames(tot.cbt)){
  date.index <- difftime(as.Date(i), origin) + 1
  d.track.data[date.index] <- sum(tot.cbt[,i])
}
w.track.data <-  rep(0, 78)
for (j in 1:78){
  w.track.data[j] <- sum(d.track.data[(j*7-6):(j*7)])
}

## ----fig.path="", label="pnbdTrackingInc", tidy=FALSE, echo=TRUE, fig.keep='none'----
T.cal <- cal.cbs[,"T.cal"]
T.tot <- 78
n.periods.final <- 78

pdf(file = 'pnbdTrackingInc.pdf')
inc.tracking <- pnbd.PlotTrackingInc(params = params, 
                                     T.cal = T.cal, 
                                     T.tot = T.tot, 
                                     actual.inc.tracking.data = w.track.data, 
                                     n.periods.final = n.periods.final)
dev.off()
round(inc.tracking[,20:25], digits = 3)

## ----fig.path="", label="pnbdTrackingCum", tidy=FALSE, echo=TRUE, fig.keep='none'----
cum.tracking.data <- cumsum(w.track.data)
pdf(file = 'pnbdTrackingCum.pdf')
cum.tracking <- pnbd.PlotTrackingCum(params = params, 
                                     T.cal = T.cal, 
                                     T.tot = T.tot, 
                                     actual.cu.tracking.data = cum.tracking.data, 
                                     n.periods.final = n.periods.final)
dev.off()
round(cum.tracking[,20:25], digits = 3)

## ----fig.path="", label="bgnbdCalibrationFit", results="hide", echo=FALSE, include=FALSE----
data(cdnowSummary);
est.params <- c(0.243, 4.414, 0.793, 2.426);
cal.cbs <- cdnowSummary$cbs;
pdf(file = 'bgnbdCalibrationFit.pdf')
cal.fit <- bgnbd.PlotFrequencyInCalibration(est.params, cal.cbs, 7)
dev.off()

## ----message=FALSE, tidy=FALSE------------------------------------------------
cdnowElog <- system.file("data/cdnowElog.csv", package = "BTYD")
elog <- dc.ReadLines(cdnowElog, cust.idx = 2,
                     date.idx = 3, sales.idx = 5)
elog[1:3,]

## ----message=FALSE------------------------------------------------------------
elog$date <- as.Date(elog$date, "%Y%m%d");
elog[1:3,]

## ----results="hide", message=FALSE--------------------------------------------
elog <- dc.MergeTransactionsOnSameDate(elog);

## ----message=FALSE------------------------------------------------------------
end.of.cal.period <- as.Date("1997-09-30")
elog.cal <- elog[which(elog$date <= end.of.cal.period), ]

## ----results="hide", message=FALSE--------------------------------------------
split.data <- dc.SplitUpElogForRepeatTrans(elog.cal);
clean.elog <- split.data$repeat.trans.elog;

## ----message=FALSE------------------------------------------------------------
freq.cbt <- dc.CreateFreqCBT(clean.elog);
freq.cbt[1:3,1:5]

## ----results="hide", message=FALSE--------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
cal.cbt <- dc.MergeCustomers(tot.cbt, freq.cbt)

## ----tidy=FALSE, results="hide", message=FALSE--------------------------------
birth.periods <- split.data$cust.data$birth.per
last.dates <- split.data$cust.data$last.date
cal.cbs.dates <- data.frame(birth.periods, last.dates, 
                            end.of.cal.period)
cal.cbs <- dc.BuildCBSFromCBTAndDates(cal.cbt, cal.cbs.dates, 
                                      per="week")

## -----------------------------------------------------------------------------
params <- bgnbd.EstimateParameters(cal.cbs);
params
LL <- bgnbd.cbs.LL(params, cal.cbs);
LL

## -----------------------------------------------------------------------------
p.matrix <- c(params, LL);
for (i in 1:2){
params <- bgnbd.EstimateParameters(cal.cbs, params);
LL <- bgnbd.cbs.LL(params, cal.cbs);
p.matrix.row <- c(params, LL);
p.matrix <- rbind(p.matrix, p.matrix.row);
}
colnames(p.matrix) <- c("r", "alpha", "a", "b", "LL");
rownames(p.matrix) <- 1:3;
p.matrix;

## ----fig.path="", label="bgnbdTransactionHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgnbdTransactionHeterogeneity.pdf')
bgnbd.PlotTransactionRateHeterogeneity(params)
dev.off()

## ----fig.path="", label="bgnbdDropoutHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgnbdDropoutHeterogeneity.pdf')
bgnbd.PlotDropoutRateHeterogeneity(params)
dev.off()

## -----------------------------------------------------------------------------
bgnbd.Expectation(params, t=52);

## ----tidy=FALSE---------------------------------------------------------------
cal.cbs["1516",]
x <- cal.cbs["1516", "x"]
t.x <- cal.cbs["1516", "t.x"]
T.cal <- cal.cbs["1516", "T.cal"]
bgnbd.ConditionalExpectedTransactions(params, T.star = 52, 
                                     x, t.x, T.cal)
bgnbd.PAlive(params, x, t.x, T.cal)

## ----tidy=FALSE---------------------------------------------------------------
for (i in seq(10, 25, 5)){
  cond.expectation <- bgnbd.ConditionalExpectedTransactions(
                        params, T.star = 52, x = i,
                        t.x = 20, T.cal = 39)
  cat ("x:",i,"\t Expectation:",cond.expectation, fill = TRUE)
}

## ----results="hide", eval=FALSE-----------------------------------------------
#  bgnbd.PlotFrequencyInCalibration(params, cal.cbs, 7)

## ----message=FALSE------------------------------------------------------------
elog <- dc.SplitUpElogForRepeatTrans(elog)$repeat.trans.elog;
x.star <- rep(0, nrow(cal.cbs));
cal.cbs <- cbind(cal.cbs, x.star);
elog.custs <- elog$cust;
for (i in 1:nrow(cal.cbs)){
  current.cust <- rownames(cal.cbs)[i]
  tot.cust.trans <- length(which(elog.custs == current.cust))
  cal.trans <- cal.cbs[i, "x"]
  cal.cbs[i, "x.star"] <-  tot.cust.trans - cal.trans
}
cal.cbs[1:3,]

## ----fig.path="", label="bgnbdCondExpComp", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
T.star <- 39 # length of the holdout period
censor <- 7  # This censor serves the same purpose described above
x.star <- cal.cbs[,"x.star"]

pdf(file = 'bgnbdCondExpComp.pdf')
comp <- bgnbd.PlotFreqVsConditionalExpectedFrequency(params, T.star,
                                              cal.cbs, x.star, censor)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
comp

## -----------------------------------------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
d.track.data <- rep(0, 7 * 78)
origin <- as.Date("1997-01-01")
for (i in colnames(tot.cbt)){
  date.index <- difftime(as.Date(i), origin) + 1;
  d.track.data[date.index] <- sum(tot.cbt[,i]);
}
w.track.data <-  rep(0, 78)
for (j in 1:78){
  w.track.data[j] <- sum(d.track.data[(j*7-6):(j*7)])
}

## ----fig.path="", label="bgnbdTrackingInc", tidy=FALSE, echo=TRUE, fig.keep='none'----
T.cal <- cal.cbs[,"T.cal"]
T.tot <- 78
n.periods.final <- 78
pdf(file = 'bgnbdTrackingInc.pdf')
inc.tracking <- bgnbd.PlotTrackingInc(params, 
                                      T.cal, 
                                      T.tot, 
                                      w.track.data,
                                      n.periods.final, 
                                      allHardie)
dev.off()
inc.tracking[,20:25]

## ----fig.path="", label="bgnbdTrackingCum", tidy=FALSE, echo=TRUE, fig.keep='none'----

cum.tracking.data <- cumsum(w.track.data)
pdf(file = 'bgnbdTrackingCum.pdf')
cum.tracking <- bgnbd.PlotTrackingCum(params, 
                                      T.cal, 
                                      T.tot, 
                                      cum.tracking.data,
                                      n.periods.final, 
                                      allHardie)
dev.off()
cum.tracking[,20:25]

## ----fig.path="", label="bgbbCalibrationFit", results="hide", echo=FALSE, include=FALSE----
data(donationsSummary)
rf.matrix <- donationsSummary$rf.matrix
params <- bgbb.EstimateParameters(rf.matrix)
pdf(file = 'bgbbCalibrationFit.pdf')
cal.fit <- bgbb.PlotFrequencyInCalibration(params, rf.matrix, 6)
dev.off()

## ----message=FALSE, tidy=FALSE------------------------------------------------
simElog <- system.file("data/discreteSimElog.csv", 
                       package = "BTYD")
elog <- dc.ReadLines(simElog, cust.idx = 1, date.idx = 2)
elog[1:3,]
elog$date <- as.Date(elog$date, "%Y-%m-%d")

max(elog$date);
min(elog$date);
# let's make the calibration period end somewhere in-between
T.cal <- as.Date("1977-01-01")

simData <- dc.ElogToCbsCbt(elog, per="year", T.cal)
cal.cbs <- simData$cal$cbs

freq<- cal.cbs[,"x"]
rec <- cal.cbs[,"t.x"]
trans.opp <- 7 # transaction opportunities
cal.rf.matrix <- dc.MakeRFmatrixCal(freq, rec, trans.opp)
cal.rf.matrix[1:5,]

## -----------------------------------------------------------------------------
data(donationsSummary);
rf.matrix <- donationsSummary$rf.matrix
params <- bgbb.EstimateParameters(rf.matrix);
LL <- bgbb.rf.matrix.LL(params, rf.matrix);
p.matrix <- c(params, LL);
for (i in 1:2){
params <- bgbb.EstimateParameters(rf.matrix, params);
LL <- bgbb.rf.matrix.LL(params, rf.matrix);
p.matrix.row <- c(params, LL);
p.matrix <- rbind(p.matrix, p.matrix.row);
}
colnames(p.matrix) <- c("alpha", "beta", "gamma", "delta", "LL");
rownames(p.matrix) <- 1:3;
p.matrix;

## ----fig.path="", label="bgbbTransactionHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgbbTransactionHeterogeneity.pdf')
bgbb.PlotTransactionRateHeterogeneity(params)
dev.off()

## ----fig.path="", label="bgbbDropoutHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgbbDropoutHeterogeneity.pdf')
bgbb.PlotDropoutRateHeterogeneity(params)
dev.off()

## -----------------------------------------------------------------------------
bgbb.Expectation(params, n=10);

## ----tidy=FALSE---------------------------------------------------------------
# customer A
n.cal = 6
n.star = 10
x = 0
t.x = 0
bgbb.ConditionalExpectedTransactions(params, n.cal, 
                                     n.star, x, t.x)
# customer B
x = 4
t.x = 5
bgbb.ConditionalExpectedTransactions(params, n.cal, 
                                     n.star, x, t.x)

## ----results="hide", eval=FALSE-----------------------------------------------
#  bgbb.PlotFrequencyInCalibration(params, rf.matrix)

## -----------------------------------------------------------------------------
holdout.cbs <- simData$holdout$cbs
x.star <- holdout.cbs[,"x.star"]

## ----fig.path="", label="bgbbCondExpComp", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
n.star <- 5 # length of the holdout period
x.star <- donationsSummary$x.star
pdf(file = 'bgbbCondExpComp.pdf')
comp <- bgbb.PlotFreqVsConditionalExpectedFrequency(params, n.star, 
                                                    rf.matrix, x.star)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
comp

## ----fig.path="", label="bgbbCondExpCompRec", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
pdf(file = 'bgbbCondExpCompRec.pdf')
comp <- bgbb.PlotRecVsConditionalExpectedFrequency(params, n.star, 
                                                   rf.matrix, x.star)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
comp

## ----fig.path="", label="bgbbTrackingInc", tidy=FALSE, echo=TRUE, fig.keep='none'----
inc.track.data <- donationsSummary$annual.trans
n.cal <- 6
xtickmarks <- 1996:2006
pdf(file = 'bgbbTrackingInc.pdf')
inc.tracking <- bgbb.PlotTrackingInc(params, rf.matrix, 
                                     inc.track.data, 
                                     xticklab = xtickmarks)
dev.off()
rownames(inc.tracking) <- c("act", "exp")
inc.tracking

## ----fig.path="", label="bgbbTrackingCum", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
cum.track.data <- cumsum(inc.track.data)
pdf(file = 'bgbbTrackingCum.pdf')
cum.tracking <- bgbb.PlotTrackingCum(params, rf.matrix, cum.track.data, 
                                     xticklab = xtickmarks)
dev.off()
rownames(cum.tracking) <- c("act", "exp")
cum.tracking

Try the BTYD package in your browser

Any scripts or data that you put into this service are public.

BTYD documentation built on Nov. 18, 2021, 1:10 a.m.