Nothing
## ----include=FALSE------------------------------------------------------------
library(knitr)
opts_chunk$set(
concordance=TRUE
)
## ----fig.path="", label="pnbdCalibrationFit", results="hide", echo=FALSE, include=FALSE----
library(knitr)
opts_chunk$set(comment="#")
library(BTYD)
# Set the hardie parameter value here, apply it everywhere
allHardie <- TRUE
data(cdnowSummary)
est.params <- cdnowSummary$est.params
cal.cbs <- cdnowSummary$cbs
pdf(file = 'pnbdCalibrationFit.pdf')
cal.fit <- pnbd.PlotFrequencyInCalibration(params = est.params,
cal.cbs = cal.cbs,
censor = 7,
hardie = allHardie)
dev.off()
## ----message=FALSE, tidy=FALSE------------------------------------------------
cdnowElog <- system.file("data/cdnowElog.csv", package = "BTYD")
elog <- dc.ReadLines(cdnowElog, cust.idx = 2,
date.idx = 3, sales.idx = 5)
elog[1:3,]
## ----message=FALSE------------------------------------------------------------
elog$date <- as.Date(elog$date, "%Y%m%d")
elog[1:3,]
## ----results="hide", message=FALSE--------------------------------------------
elog <- dc.MergeTransactionsOnSameDate(elog)
## ----message=FALSE------------------------------------------------------------
end.of.cal.period <- as.Date("1997-09-30")
elog.cal <- elog[which(elog$date <= end.of.cal.period), ]
## ----results="hide", message=FALSE--------------------------------------------
split.data <- dc.SplitUpElogForRepeatTrans(elog.cal)
clean.elog <- split.data$repeat.trans.elog
## ----message=FALSE------------------------------------------------------------
freq.cbt <- dc.CreateFreqCBT(clean.elog)
freq.cbt[1:3,1:5]
## ----results="hide", message=FALSE--------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
cal.cbt <- dc.MergeCustomers(tot.cbt, freq.cbt)
## ----tidy=FALSE, results="hide", message=FALSE--------------------------------
birth.periods <- split.data$cust.data$birth.per
last.dates <- split.data$cust.data$last.date
cal.cbs.dates <- data.frame(birth.periods, last.dates,
end.of.cal.period)
cal.cbs <- dc.BuildCBSFromCBTAndDates(cal.cbt, cal.cbs.dates,
per="week")
## ----warning=FALSE------------------------------------------------------------
params <- pnbd.EstimateParameters(cal.cbs = cal.cbs,
hardie = allHardie)
round(params, digits = 3)
LL <- pnbd.cbs.LL(params = params,
cal.cbs = cal.cbs,
hardie = allHardie)
LL
## -----------------------------------------------------------------------------
p.matrix <- c(params, LL)
for (i in 1:2){
params <- pnbd.EstimateParameters(cal.cbs = cal.cbs,
par.start = params,
hardie = allHardie)
LL <- pnbd.cbs.LL(params = params,
cal.cbs = cal.cbs,
hardie = allHardie)
p.matrix.row <- c(params, LL)
p.matrix <- rbind(p.matrix, p.matrix.row)
}
colnames(p.matrix) <- c("r", "alpha", "s", "beta", "LL")
rownames(p.matrix) <- 1:3
round(p.matrix, digits = 3)
## ----fig.path="", label="pnbdTransactionHeterogeneity", results="hide", include=FALSE----
pdf(file = 'pnbdTransactionHeterogeneity.pdf')
pnbd.PlotTransactionRateHeterogeneity(params = params)
dev.off()
## ----fig.path="", label="pnbdDropoutHeterogeneity", results="hide", include=FALSE----
pdf(file = 'pnbdDropoutHeterogeneity.pdf')
pnbd.PlotDropoutRateHeterogeneity(params = params)
dev.off()
## -----------------------------------------------------------------------------
pnbd.Expectation(params = params, t = 52)
## ----tidy=FALSE---------------------------------------------------------------
cal.cbs["1516",]
x <- cal.cbs["1516", "x"]
t.x <- cal.cbs["1516", "t.x"]
T.cal <- cal.cbs["1516", "T.cal"]
pnbd.ConditionalExpectedTransactions(params,
T.star = 52,
x,
t.x,
T.cal,
hardie = allHardie)
pnbd.PAlive(params,
x,
t.x,
T.cal,
hardie = allHardie)
## ----tidy=FALSE---------------------------------------------------------------
# avoid overflow in LaTeX code block here:
cet <- "pnbd.ConditionalExpectedTransactions"
for (i in seq(10, 25, 5)){
cond.expectation <- match.fun(cet)(params,
T.star = 52,
x = i,
t.x = 20,
T.cal = 39,
hardie = allHardie)
cat ("x:",i,"\t Expectation:",cond.expectation, fill = TRUE)
}
## ----results="hide", eval=FALSE-----------------------------------------------
# pnbd.PlotFrequencyInCalibration(params = params,
# cal.cbs = cal.cbs,
# censor = 7,
# hardie = allHardie)
## ----message=FALSE------------------------------------------------------------
elog <- dc.SplitUpElogForRepeatTrans(elog)$repeat.trans.elog
x.star <- rep(0, nrow(cal.cbs))
cal.cbs <- cbind(cal.cbs, x.star)
elog.custs <- elog$cust
for (i in 1:nrow(cal.cbs)){
current.cust <- rownames(cal.cbs)[i]
tot.cust.trans <- length(which(elog.custs == current.cust))
cal.trans <- cal.cbs[i, "x"]
cal.cbs[i, "x.star"] <- tot.cust.trans - cal.trans
}
round(cal.cbs[1:3,], digits = 3)
## ----fig.path="", label="pnbdCondExpComp", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
T.star <- 39 # length of the holdout period
censor <- 7 # This censor serves the same purpose described above
x.star <- cal.cbs[,"x.star"]
pdf(file = 'pnbdCondExpComp.pdf')
comp <- pnbd.PlotFreqVsConditionalExpectedFrequency(params,
T.star,
cal.cbs,
x.star,
censor,
hardie = allHardie)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
round(comp, digits = 3)
## -----------------------------------------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
d.track.data <- rep(0, 7 * 78)
origin <- as.Date("1997-01-01")
for (i in colnames(tot.cbt)){
date.index <- difftime(as.Date(i), origin) + 1
d.track.data[date.index] <- sum(tot.cbt[,i])
}
w.track.data <- rep(0, 78)
for (j in 1:78){
w.track.data[j] <- sum(d.track.data[(j*7-6):(j*7)])
}
## ----fig.path="", label="pnbdTrackingInc", tidy=FALSE, echo=TRUE, fig.keep='none'----
T.cal <- cal.cbs[,"T.cal"]
T.tot <- 78
n.periods.final <- 78
pdf(file = 'pnbdTrackingInc.pdf')
inc.tracking <- pnbd.PlotTrackingInc(params = params,
T.cal = T.cal,
T.tot = T.tot,
actual.inc.tracking.data = w.track.data,
n.periods.final = n.periods.final)
dev.off()
round(inc.tracking[,20:25], digits = 3)
## ----fig.path="", label="pnbdTrackingCum", tidy=FALSE, echo=TRUE, fig.keep='none'----
cum.tracking.data <- cumsum(w.track.data)
pdf(file = 'pnbdTrackingCum.pdf')
cum.tracking <- pnbd.PlotTrackingCum(params = params,
T.cal = T.cal,
T.tot = T.tot,
actual.cu.tracking.data = cum.tracking.data,
n.periods.final = n.periods.final)
dev.off()
round(cum.tracking[,20:25], digits = 3)
## ----fig.path="", label="bgnbdCalibrationFit", results="hide", echo=FALSE, include=FALSE----
data(cdnowSummary);
est.params <- c(0.243, 4.414, 0.793, 2.426);
cal.cbs <- cdnowSummary$cbs;
pdf(file = 'bgnbdCalibrationFit.pdf')
cal.fit <- bgnbd.PlotFrequencyInCalibration(est.params, cal.cbs, 7)
dev.off()
## ----message=FALSE, tidy=FALSE------------------------------------------------
cdnowElog <- system.file("data/cdnowElog.csv", package = "BTYD")
elog <- dc.ReadLines(cdnowElog, cust.idx = 2,
date.idx = 3, sales.idx = 5)
elog[1:3,]
## ----message=FALSE------------------------------------------------------------
elog$date <- as.Date(elog$date, "%Y%m%d");
elog[1:3,]
## ----results="hide", message=FALSE--------------------------------------------
elog <- dc.MergeTransactionsOnSameDate(elog);
## ----message=FALSE------------------------------------------------------------
end.of.cal.period <- as.Date("1997-09-30")
elog.cal <- elog[which(elog$date <= end.of.cal.period), ]
## ----results="hide", message=FALSE--------------------------------------------
split.data <- dc.SplitUpElogForRepeatTrans(elog.cal);
clean.elog <- split.data$repeat.trans.elog;
## ----message=FALSE------------------------------------------------------------
freq.cbt <- dc.CreateFreqCBT(clean.elog);
freq.cbt[1:3,1:5]
## ----results="hide", message=FALSE--------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
cal.cbt <- dc.MergeCustomers(tot.cbt, freq.cbt)
## ----tidy=FALSE, results="hide", message=FALSE--------------------------------
birth.periods <- split.data$cust.data$birth.per
last.dates <- split.data$cust.data$last.date
cal.cbs.dates <- data.frame(birth.periods, last.dates,
end.of.cal.period)
cal.cbs <- dc.BuildCBSFromCBTAndDates(cal.cbt, cal.cbs.dates,
per="week")
## -----------------------------------------------------------------------------
params <- bgnbd.EstimateParameters(cal.cbs);
params
LL <- bgnbd.cbs.LL(params, cal.cbs);
LL
## -----------------------------------------------------------------------------
p.matrix <- c(params, LL);
for (i in 1:2){
params <- bgnbd.EstimateParameters(cal.cbs, params);
LL <- bgnbd.cbs.LL(params, cal.cbs);
p.matrix.row <- c(params, LL);
p.matrix <- rbind(p.matrix, p.matrix.row);
}
colnames(p.matrix) <- c("r", "alpha", "a", "b", "LL");
rownames(p.matrix) <- 1:3;
p.matrix;
## ----fig.path="", label="bgnbdTransactionHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgnbdTransactionHeterogeneity.pdf')
bgnbd.PlotTransactionRateHeterogeneity(params)
dev.off()
## ----fig.path="", label="bgnbdDropoutHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgnbdDropoutHeterogeneity.pdf')
bgnbd.PlotDropoutRateHeterogeneity(params)
dev.off()
## -----------------------------------------------------------------------------
bgnbd.Expectation(params, t=52);
## ----tidy=FALSE---------------------------------------------------------------
cal.cbs["1516",]
x <- cal.cbs["1516", "x"]
t.x <- cal.cbs["1516", "t.x"]
T.cal <- cal.cbs["1516", "T.cal"]
bgnbd.ConditionalExpectedTransactions(params, T.star = 52,
x, t.x, T.cal)
bgnbd.PAlive(params, x, t.x, T.cal)
## ----tidy=FALSE---------------------------------------------------------------
for (i in seq(10, 25, 5)){
cond.expectation <- bgnbd.ConditionalExpectedTransactions(
params, T.star = 52, x = i,
t.x = 20, T.cal = 39)
cat ("x:",i,"\t Expectation:",cond.expectation, fill = TRUE)
}
## ----results="hide", eval=FALSE-----------------------------------------------
# bgnbd.PlotFrequencyInCalibration(params, cal.cbs, 7)
## ----message=FALSE------------------------------------------------------------
elog <- dc.SplitUpElogForRepeatTrans(elog)$repeat.trans.elog;
x.star <- rep(0, nrow(cal.cbs));
cal.cbs <- cbind(cal.cbs, x.star);
elog.custs <- elog$cust;
for (i in 1:nrow(cal.cbs)){
current.cust <- rownames(cal.cbs)[i]
tot.cust.trans <- length(which(elog.custs == current.cust))
cal.trans <- cal.cbs[i, "x"]
cal.cbs[i, "x.star"] <- tot.cust.trans - cal.trans
}
cal.cbs[1:3,]
## ----fig.path="", label="bgnbdCondExpComp", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
T.star <- 39 # length of the holdout period
censor <- 7 # This censor serves the same purpose described above
x.star <- cal.cbs[,"x.star"]
pdf(file = 'bgnbdCondExpComp.pdf')
comp <- bgnbd.PlotFreqVsConditionalExpectedFrequency(params, T.star,
cal.cbs, x.star, censor)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
comp
## -----------------------------------------------------------------------------
tot.cbt <- dc.CreateFreqCBT(elog)
d.track.data <- rep(0, 7 * 78)
origin <- as.Date("1997-01-01")
for (i in colnames(tot.cbt)){
date.index <- difftime(as.Date(i), origin) + 1;
d.track.data[date.index] <- sum(tot.cbt[,i]);
}
w.track.data <- rep(0, 78)
for (j in 1:78){
w.track.data[j] <- sum(d.track.data[(j*7-6):(j*7)])
}
## ----fig.path="", label="bgnbdTrackingInc", tidy=FALSE, echo=TRUE, fig.keep='none'----
T.cal <- cal.cbs[,"T.cal"]
T.tot <- 78
n.periods.final <- 78
pdf(file = 'bgnbdTrackingInc.pdf')
inc.tracking <- bgnbd.PlotTrackingInc(params,
T.cal,
T.tot,
w.track.data,
n.periods.final,
allHardie)
dev.off()
inc.tracking[,20:25]
## ----fig.path="", label="bgnbdTrackingCum", tidy=FALSE, echo=TRUE, fig.keep='none'----
cum.tracking.data <- cumsum(w.track.data)
pdf(file = 'bgnbdTrackingCum.pdf')
cum.tracking <- bgnbd.PlotTrackingCum(params,
T.cal,
T.tot,
cum.tracking.data,
n.periods.final,
allHardie)
dev.off()
cum.tracking[,20:25]
## ----fig.path="", label="bgbbCalibrationFit", results="hide", echo=FALSE, include=FALSE----
data(donationsSummary)
rf.matrix <- donationsSummary$rf.matrix
params <- bgbb.EstimateParameters(rf.matrix)
pdf(file = 'bgbbCalibrationFit.pdf')
cal.fit <- bgbb.PlotFrequencyInCalibration(params, rf.matrix, 6)
dev.off()
## ----message=FALSE, tidy=FALSE------------------------------------------------
simElog <- system.file("data/discreteSimElog.csv",
package = "BTYD")
elog <- dc.ReadLines(simElog, cust.idx = 1, date.idx = 2)
elog[1:3,]
elog$date <- as.Date(elog$date, "%Y-%m-%d")
max(elog$date);
min(elog$date);
# let's make the calibration period end somewhere in-between
T.cal <- as.Date("1977-01-01")
simData <- dc.ElogToCbsCbt(elog, per="year", T.cal)
cal.cbs <- simData$cal$cbs
freq<- cal.cbs[,"x"]
rec <- cal.cbs[,"t.x"]
trans.opp <- 7 # transaction opportunities
cal.rf.matrix <- dc.MakeRFmatrixCal(freq, rec, trans.opp)
cal.rf.matrix[1:5,]
## -----------------------------------------------------------------------------
data(donationsSummary);
rf.matrix <- donationsSummary$rf.matrix
params <- bgbb.EstimateParameters(rf.matrix);
LL <- bgbb.rf.matrix.LL(params, rf.matrix);
p.matrix <- c(params, LL);
for (i in 1:2){
params <- bgbb.EstimateParameters(rf.matrix, params);
LL <- bgbb.rf.matrix.LL(params, rf.matrix);
p.matrix.row <- c(params, LL);
p.matrix <- rbind(p.matrix, p.matrix.row);
}
colnames(p.matrix) <- c("alpha", "beta", "gamma", "delta", "LL");
rownames(p.matrix) <- 1:3;
p.matrix;
## ----fig.path="", label="bgbbTransactionHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgbbTransactionHeterogeneity.pdf')
bgbb.PlotTransactionRateHeterogeneity(params)
dev.off()
## ----fig.path="", label="bgbbDropoutHeterogeneity", results="hide", include=FALSE----
pdf(file = 'bgbbDropoutHeterogeneity.pdf')
bgbb.PlotDropoutRateHeterogeneity(params)
dev.off()
## -----------------------------------------------------------------------------
bgbb.Expectation(params, n=10);
## ----tidy=FALSE---------------------------------------------------------------
# customer A
n.cal = 6
n.star = 10
x = 0
t.x = 0
bgbb.ConditionalExpectedTransactions(params, n.cal,
n.star, x, t.x)
# customer B
x = 4
t.x = 5
bgbb.ConditionalExpectedTransactions(params, n.cal,
n.star, x, t.x)
## ----results="hide", eval=FALSE-----------------------------------------------
# bgbb.PlotFrequencyInCalibration(params, rf.matrix)
## -----------------------------------------------------------------------------
holdout.cbs <- simData$holdout$cbs
x.star <- holdout.cbs[,"x.star"]
## ----fig.path="", label="bgbbCondExpComp", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
n.star <- 5 # length of the holdout period
x.star <- donationsSummary$x.star
pdf(file = 'bgbbCondExpComp.pdf')
comp <- bgbb.PlotFreqVsConditionalExpectedFrequency(params, n.star,
rf.matrix, x.star)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
comp
## ----fig.path="", label="bgbbCondExpCompRec", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
pdf(file = 'bgbbCondExpCompRec.pdf')
comp <- bgbb.PlotRecVsConditionalExpectedFrequency(params, n.star,
rf.matrix, x.star)
dev.off()
rownames(comp) <- c("act", "exp", "bin")
comp
## ----fig.path="", label="bgbbTrackingInc", tidy=FALSE, echo=TRUE, fig.keep='none'----
inc.track.data <- donationsSummary$annual.trans
n.cal <- 6
xtickmarks <- 1996:2006
pdf(file = 'bgbbTrackingInc.pdf')
inc.tracking <- bgbb.PlotTrackingInc(params, rf.matrix,
inc.track.data,
xticklab = xtickmarks)
dev.off()
rownames(inc.tracking) <- c("act", "exp")
inc.tracking
## ----fig.path="", label="bgbbTrackingCum", tidy=FALSE, echo=TRUE, size="small", fig.keep='none'----
cum.track.data <- cumsum(inc.track.data)
pdf(file = 'bgbbTrackingCum.pdf')
cum.tracking <- bgbb.PlotTrackingCum(params, rf.matrix, cum.track.data,
xticklab = xtickmarks)
dev.off()
rownames(cum.tracking) <- c("act", "exp")
cum.tracking
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.