R/bgnbd.R

Defines functions bgnbd.cbs.LL bgnbd.LL bgnbd.compress.cbs bgnbd.EstimateParameters bgnbd.pmf bgnbd.pmf.General bgnbd.ConditionalExpectedTransactions bgnbd.PAlive bgnbd.Expectation bgnbd.PlotFrequencyInCalibration bgnbd.PlotFreqVsConditionalExpectedFrequency bgnbd.PlotRecVsConditionalExpectedFrequency bgnbd.PlotTransactionRateHeterogeneity bgnbd.PlotDropoutRateHeterogeneity bgnbd.ExpectedCumulativeTransactions bgnbd.PlotTrackingCum bgnbd.PlotTrackingInc

Documented in bgnbd.cbs.LL bgnbd.compress.cbs bgnbd.ConditionalExpectedTransactions bgnbd.EstimateParameters bgnbd.Expectation bgnbd.ExpectedCumulativeTransactions bgnbd.LL bgnbd.PAlive bgnbd.PlotDropoutRateHeterogeneity bgnbd.PlotFrequencyInCalibration bgnbd.PlotFreqVsConditionalExpectedFrequency bgnbd.PlotRecVsConditionalExpectedFrequency bgnbd.PlotTrackingCum bgnbd.PlotTrackingInc bgnbd.PlotTransactionRateHeterogeneity bgnbd.pmf bgnbd.pmf.General

################################################## BG/NBD estimation, visualization functions

library(hypergeo)

bgnbd.cbs.LL <- function(params, cal.cbs) {
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.cbs.LL")
    
    tryCatch(x <- cal.cbs[, "x"], error = function(e) stop("Error in bgnbd.cbs.LL: cal.cbs must have a frequency column labelled \"x\""))
    tryCatch(t.x <- cal.cbs[, "t.x"], error = function(e) stop("Error in bgnbd.cbs.LL: cal.cbs must have a recency column labelled \"t.x\""))
    tryCatch(T.cal <- cal.cbs[, "T.cal"], error = function(e) stop("Error in bgnbd.cbs.LL: cal.cbs must have a column for length of time observed labelled \"T.cal\""))
    
    if ("custs" %in% colnames(cal.cbs)) {
        
        many_rows = function(vec, nreps) {
            return(rep(1, nreps) %*% t.default(vec))
        }
        
        custs <- cal.cbs[, "custs"]
        logvec = (1:length(custs)) * (custs > 1)
        logvec = logvec[logvec > 0]
        M = sum(logvec > 0)
        for (i in 1:M) {
            cal.cbs = rbind(cal.cbs, many_rows(cal.cbs[logvec[i], ], custs[logvec[i]] - 
                1))
        }
        x = cal.cbs[, "x"]
        t.x = cal.cbs[, "t.x"]
        T.cal = cal.cbs[, "T.cal"]
    }
    
    return(sum(bgnbd.LL(params, x, t.x, T.cal)))
}

bgnbd.LL <- function(params, x, t.x, T.cal) {
    
    beta.ratio = function(a, b, x, y) {
        exp(lgamma(a) + lgamma(b) - lgamma(a + b) - lgamma(x) - lgamma(y) + lgamma(x + 
            y))
    }
    
    max.length <- max(length(x), length(t.x), length(T.cal))
    
    if (max.length%%length(x)) 
        warning("Maximum vector length not a multiple of the length of x")
    if (max.length%%length(t.x)) 
        warning("Maximum vector length not a multiple of the length of t.x")
    if (max.length%%length(T.cal)) 
        warning("Maximum vector length not a multiple of the length of T.cal")
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.LL")
    
    if (any(x < 0) || !is.numeric(x)) 
        stop("x must be numeric and may not contain negative numbers.")
    if (any(t.x < 0) || !is.numeric(t.x)) 
        stop("t.x must be numeric and may not contain negative numbers.")
    if (any(T.cal < 0) || !is.numeric(T.cal)) 
        stop("T.cal must be numeric and may not contain negative numbers.")
    
    r = params[1]
    alpha = params[2]
    a = params[3]
    b = params[4]
    
    x <- rep(x, length.out = max.length)
    t.x <- rep(t.x, length.out = max.length)
    T.cal <- rep(T.cal, length.out = max.length)
    
    A = r * log(alpha) + lgamma(r + x) - lgamma(r) - (r + x) * log(alpha + t.x)
    B = beta.ratio(a, b + x, a, b) * ((alpha + t.x)/(alpha + T.cal))^(r + x) + as.numeric((x > 
        0)) * beta.ratio(a + 1, b + x - 1, a, b)
    LL = sum(A + log(B))
    
    return(LL)
}

bgnbd.compress.cbs <- function(cbs, rounding = 3) {
    
    if (!("x" %in% colnames(cbs))) 
        stop("Error in bgnbd.compress.cbs: cbs must have a frequency column labelled \"x\"")
    if (!("t.x" %in% colnames(cbs))) 
        stop("Error in bgnbd.compress.cbs: cbs must have a recency column labelled \"t.x\"")
    if (!("T.cal" %in% colnames(cbs))) 
        stop("Error in bgnbd.compress.cbs: cbs must have a column for length of time observed labelled \"T.cal\"")
    
    orig.rows <- nrow(cbs)
    
    if (!("custs" %in% colnames(cbs))) {
        custs <- rep(1, nrow(cbs))
        cbs <- cbind(cbs, custs)
    }
    
    other.colnames <- colnames(cbs)[!(colnames(cbs) %in% c("x", "t.x", "T.cal"))]
    
    ## Round x, t.x and T.cal to the desired level
    cbs[, c("x", "t.x", "T.cal")] <- round(cbs[, c("x", "t.x", "T.cal")], rounding)
    
    ## Aggregate every column that is not x, t.x or T.cal by those columns. Do this by
    ## summing entries which have the same x, t.x and T.cal.
    cbs <- as.matrix(aggregate(cbs[, !(colnames(cbs) %in% c("x", "t.x", "T.cal"))], 
        by = list(x = cbs[, "x"], t.x = cbs[, "t.x"], T.cal = cbs[, "T.cal"]), sum))
    
    colnames(cbs) <- c("x", "t.x", "T.cal", other.colnames)
    final.rows <- nrow(cbs)
    message("Data reduced from ", orig.rows, " rows to ", final.rows, " rows.")
    return(cbs)
}

bgnbd.EstimateParameters <- function(cal.cbs, par.start = c(1, 3, 1, 3), max.param.value = 10000) {
    
    dc.check.model.params(c("r", "alpha", "a", "b"), par.start, "bgnbd.EstimateParameters")
    
    bgnbd.eLL <- function(params, cal.cbs, max.param.value) {
        params <- exp(params)
        params[params > max.param.value] = max.param.value
        return(-1 * bgnbd.cbs.LL(params, cal.cbs))
    }
    logparams = log(par.start)
    results = optim(logparams, bgnbd.eLL, cal.cbs = cal.cbs, max.param.value = max.param.value, 
        method = "L-BFGS-B")
    estimated.params <- exp(results$par)
    estimated.params[estimated.params > max.param.value] <- max.param.value
    return(estimated.params)
}

bgnbd.pmf <- function(params, t, x) {
    max.length <- max(length(t), length(x))
    if (max.length%%length(t)) 
        warning("Maximum vector length not a multiple of the length of t")
    if (max.length%%length(x)) 
        warning("Maximum vector length not a multiple of the length of x")
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.pmf")
    
    if (any(t < 0) || !is.numeric(t)) 
        stop("t must be numeric and may not contain negative numbers.")
    if (any(x < 0) || !is.numeric(x)) 
        stop("x must be numeric and may not contain negative numbers.")
    
    t. <- rep(t, length.out = max.length)
    x <- rep(x, length.out = max.length)
    
    return(bgnbd.pmf.General(params, 0, t, x))
}

bgnbd.pmf.General <- function(params, t.start, t.end, x) {
    
    max.length = max(length(t.start), length(t.end), length(x))
    
    if (max.length%%length(t.start)) 
        warning("Maximum vector length not a multiple of the length of t.start")
    if (max.length%%length(t.end)) 
        warning("Maximum vector length not a multiple of the length of t.end")
    if (max.length%%length(x)) 
        warning("Maximum vector length not a multiple of the length of x")
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.pmf.General")
    
    if (any(t.start < 0) || !is.numeric(t.start)) 
        stop("t.start must be numeric and may not contain negative numbers.")
    if (any(t.end < 0) || !is.numeric(t.end)) 
        stop("t.end must be numeric and may not contain negative numbers.")
    if (any(x < 0) || !is.numeric(x)) 
        stop("x must be numeric and may not contain negative numbers.")
    
    t.start = rep(t.start, length.out = max.length)
    t.end = rep(t.end, length.out = max.length)
    x = rep(x, length.out = max.length)
    
    if (any(t.start > t.end)) {
        stop("Error in bgnbd.pmf.General: t.start > t.end.")
    }
    r <- params[1]
    alpha <- params[2]
    a <- params[3]
    b <- params[4]
    equation.part.0 <- rep(0, max.length)
    t = t.end - t.start
    term3 = rep(0, max.length)
    term1 = beta(a, b + x)/beta(a, b) * gamma(r + x)/gamma(r)/factorial(x) * ((alpha/(alpha + 
        t))^r) * ((t/(alpha + t))^x)
    
    for (i in 1:max.length) {
        if (x[i] > 0) {
            ii = c(0:(x[i] - 1))
            summation.term = sum(gamma(r + ii)/gamma(r)/factorial(ii) * ((t[i]/(alpha + 
                t[i]))^ii))
            term3[i] = 1 - (((alpha/(alpha + t[i]))^r) * summation.term)
        }
    }
    term2 = as.numeric(x > 0) * beta(a + 1, b + x - 1)/beta(a, b) * term3
    
    return(term1 + term2)
}


bgnbd.ConditionalExpectedTransactions <- function(params, T.star, x, t.x, T.cal) {
    
    h2f1 <- function(a, b, c, z) {
        lenz <- length(z)
        j = 0
        uj <- 1:lenz
        uj <- uj/uj
        y <- uj
        lteps <- 0
        
        while (lteps < lenz) {
            lasty <- y
            j <- j + 1
            uj <- uj * (a + j - 1) * (b + j - 1)/(c + j - 1) * z/j
            y <- y + uj
            lteps <- sum(y == lasty)
        }
        return(y)
    }
    
    max.length <- max(length(T.star), length(x), length(t.x), length(T.cal))
    
    if (max.length%%length(T.star)) 
        warning("Maximum vector length not a multiple of the length of T.star")
    if (max.length%%length(x)) 
        warning("Maximum vector length not a multiple of the length of x")
    if (max.length%%length(t.x)) 
        warning("Maximum vector length not a multiple of the length of t.x")
    if (max.length%%length(T.cal)) 
        warning("Maximum vector length not a multiple of the length of T.cal")
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.ConditionalExpectedTransactions")
    
    if (any(T.star < 0) || !is.numeric(T.star)) 
        stop("T.star must be numeric and may not contain negative numbers.")
    if (any(x < 0) || !is.numeric(x)) 
        stop("x must be numeric and may not contain negative numbers.")
    if (any(t.x < 0) || !is.numeric(t.x)) 
        stop("t.x must be numeric and may not contain negative numbers.")
    if (any(T.cal < 0) || !is.numeric(T.cal)) 
        stop("T.cal must be numeric and may not contain negative numbers.")
    
    x <- rep(x, length.out = max.length)
    t.x <- rep(t.x, length.out = max.length)
    T.cal <- rep(T.cal, length.out = max.length)
    
    r = params[1]
    alpha = params[2]
    a = params[3]
    b = params[4]
    term1 <- ((a + b + x - 1)/(a - 1))
    term2 <- 1 - ((alpha + T.cal)/(alpha + T.cal + T.star))^(r + x) * h2f1(r + x, 
        b + x, a + b + x - 1, T.star/(alpha + T.cal + T.star))
    term3 <- 1 + as.numeric(x > 0) * (a/(b + x - 1)) * ((alpha + T.cal)/(alpha + 
        t.x))^(r + x)
    out <- term1 * term2/term3
    return(out)
}

bgnbd.PAlive <- function(params, x, t.x, T.cal) {
    
    max.length <- max(length(x), length(t.x), length(T.cal))
    
    if (max.length%%length(x)) 
        warning("Maximum vector length not a multiple of the length of x")
    if (max.length%%length(t.x)) 
        warning("Maximum vector length not a multiple of the length of t.x")
    if (max.length%%length(T.cal)) 
        warning("Maximum vector length not a multiple of the length of T.cal")
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.PAlive")
    
    if (any(x < 0) || !is.numeric(x)) 
        stop("x must be numeric and may not contain negative numbers.")
    if (any(t.x < 0) || !is.numeric(t.x)) 
        stop("t.x must be numeric and may not contain negative numbers.")
    if (any(T.cal < 0) || !is.numeric(T.cal)) 
        stop("T.cal must be numeric and may not contain negative numbers.")
    
    x <- rep(x, length.out = max.length)
    t.x <- rep(t.x, length.out = max.length)
    T.cal <- rep(T.cal, length.out = max.length)
    
    r = params[1]
    alpha = params[2]
    a = params[3]
    b = params[4]
    term1 = (a/(b + x - 1)) * ((alpha + T.cal)/(alpha + t.x))^(r + x)
    return(1/(1 + as.numeric(x > 0) * term1))
}

bgnbd.Expectation <- function(params, t) {
    
    h2f1 <- function(a, b, c, z) {
        lenz <- length(z)
        j = 0
        uj <- 1:lenz
        uj <- uj/uj
        y <- uj
        lteps <- 0
        
        while (lteps < lenz) {
            lasty <- y
            j <- j + 1
            uj <- uj * (a + j - 1) * (b + j - 1)/(c + j - 1) * z/j
            y <- y + uj
            lteps <- sum(y == lasty)
        }
        return(y)
    }
    
    if (any(t < 0) || !is.numeric(t)) 
        stop("t must be numeric and may not contain negative numbers.")
    r = params[1]
    alpha = params[2]
    a = params[3]
    b = params[4]
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.Expectation")
    
    term1 = (a + b - 1)/(a - 1)
    term2 = (alpha/(alpha + t))^r
    term3 = h2f1(r, b, a + b - 1, t/(alpha + t))
    output = term1 * (1 - term2 * term3)
    
    return(output)
}

bgnbd.PlotFrequencyInCalibration <- function(params, cal.cbs, censor, plotZero = TRUE, 
    xlab = "Calibration period transactions", ylab = "Customers", title = "Frequency of Repeat Transactions") {
    
    tryCatch(x <- cal.cbs[, "x"], error = function(e) stop("Error in bgnbd.PlotFrequencyInCalibration: cal.cbs must have a frequency column labelled \"x\""))
    tryCatch(T.cal <- cal.cbs[, "T.cal"], error = function(e) stop("Error in bgnbd.PlotFrequencyInCalibration: cal.cbs must have a column for length of time observed labelled \"T.cal\""))
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.PlotFrequencyInCalibration")
    if (censor > max(x)) 
        stop("censor too big (> max freq) in PlotFrequencyInCalibration.")
    
    x = cal.cbs[, "x"]
    T.cal = cal.cbs[, "T.cal"]
    n.x <- rep(0, max(x) + 1)
    ncusts = nrow(cal.cbs)
    for (ii in unique(x)) {
        # Get number of customers to buy n.x times, over the grid of all possible n.x
        # values (no censoring)
        n.x[ii + 1] <- sum(ii == x)
    }
    n.x.censor <- sum(n.x[(censor + 1):length(n.x)])
    n.x.actual <- c(n.x[1:censor], n.x.censor)  # This upper truncates at censor (ie. if censor=7, 8 categories: {0, 1, ..., 6, 7+}).
    T.value.counts <- table(T.cal)  # This is the table of counts of all time durations from customer birth to end of calibration period.
    T.values <- as.numeric(names(T.value.counts))  # These are all the unique time durations from customer birth to end of calibration period.
    n.T.values <- length(T.values)  # These are the number of time durations we need to consider.
    n.x.expected <- rep(0, length(n.x.actual))  # We'll store the probabilities in here.
    n.x.expected.all <- rep(0, max(x) + 1)  # We'll store the probabilities in here.
    
    for (ii in 0:max(x)) {
        # We want to run over the probability of each transaction amount.
        this.x.expected = 0
        for (T.idx in 1:n.T.values) {
            # We run over all people who had all time durations.
            T = T.values[T.idx]
            if (T == 0) 
                next
            n.T = T.value.counts[T.idx]  # This is the number of customers who had this time duration.
            prob.of.this.x.for.this.T = bgnbd.pmf(params, T, ii)
            expected.given.x.and.T = n.T * prob.of.this.x.for.this.T
            this.x.expected = this.x.expected + expected.given.x.and.T
        }
        n.x.expected.all[ii + 1] = this.x.expected
    }
    n.x.expected[1:censor] = n.x.expected.all[1:censor]
    n.x.expected[censor + 1] = sum(n.x.expected.all[(censor + 1):(max(x) + 1)])
    
    col.names <- paste(rep("freq", length(censor + 1)), (0:censor), sep = ".")
    col.names[censor + 1] <- paste(col.names[censor + 1], "+", sep = "")
    censored.freq.comparison <- rbind(n.x.actual, n.x.expected)
    colnames(censored.freq.comparison) <- col.names
    cfc.plot <- censored.freq.comparison
    if (plotZero == FALSE) 
        cfc.plot <- cfc.plot[, -1]
    n.ticks <- ncol(cfc.plot)
    if (plotZero == TRUE) {
        x.labels <- 0:(n.ticks - 1)
        x.labels[n.ticks] <- paste(n.ticks - 1, "+", sep = "")
    }
    ylim <- c(0, ceiling(max(cfc.plot) * 1.1))
    barplot(cfc.plot, names.arg = x.labels, beside = TRUE, ylim = ylim, main = title, 
        xlab = xlab, ylab = ylab, col = 1:2)
    legend("topright", legend = c("Actual", "Model"), col = 1:2, lwd = 2)
    return(censored.freq.comparison)
}


bgnbd.PlotFreqVsConditionalExpectedFrequency <- function(params, T.star, cal.cbs, 
    x.star, censor, xlab = "Calibration period transactions", ylab = "Holdout period transactions", 
    xticklab = NULL, title = "Conditional Expectation") {
    tryCatch(x <- cal.cbs[, "x"], error = function(e) stop("Error in bgnbd.PlotFreqVsConditionalExpectedFrequency: cal.cbs must have a frequency column labelled \"x\""))
    tryCatch(t.x <- cal.cbs[, "t.x"], error = function(e) stop("Error in bgnbd.PlotFreqVsConditionalExpectedFrequency: cal.cbs must have a recency column labelled \"t.x\""))
    tryCatch(T.cal <- cal.cbs[, "T.cal"], error = function(e) stop("Error in bgnbd.PlotFreqVsConditionalExpectedFrequency: cal.cbs must have a column for length of time observed labelled \"T.cal\""))
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.PlotFreqVsConditionalExpectedFrequency")
    if (censor > max(x)) 
        stop("censor too big (> max freq) in PlotFreqVsConditionalExpectedFrequency.")
    
    if (any(T.star < 0) || !is.numeric(T.star)) 
        stop("T.star must be numeric and may not contain negative numbers.")
    if (any(x.star < 0) || !is.numeric(x.star)) 
        stop("x.star must be numeric and may not contain negative numbers.")
    
    n.bins = censor + 1
    transaction.actual = rep(0, n.bins)
    transaction.expected = rep(0, n.bins)
    bin.size = rep(0, n.bins)
    for (cc in 0:censor) {
        if (cc != censor) {
            this.bin = which(cc == x)
        } else if (cc == censor) {
            this.bin = which(x >= cc)
        }
        n.this.bin = length(this.bin)
        bin.size[cc + 1] = n.this.bin
        transaction.actual[cc + 1] = sum(x.star[this.bin])/n.this.bin
        transaction.expected[cc + 1] = sum(bgnbd.ConditionalExpectedTransactions(params, 
            T.star, x[this.bin], t.x[this.bin], T.cal[this.bin]))/n.this.bin
    }
    col.names = paste(rep("freq", length(censor + 1)), (0:censor), sep = ".")
    col.names[censor + 1] = paste(col.names[censor + 1], "+", sep = "")
    comparison = rbind(transaction.actual, transaction.expected, bin.size)
    colnames(comparison) = col.names
    if (is.null(xticklab) == FALSE) {
        x.labels = xticklab
    }
    if (is.null(xticklab) != FALSE) {
        if (censor < ncol(comparison)) {
            x.labels = 0:(censor)
            x.labels[censor + 1] = paste(censor, "+", sep = "")
        }
        if (censor >= ncol(comparison)) {
            x.labels = 0:(ncol(comparison))
        }
    }
    actual = comparison[1, ]
    expected = comparison[2, ]
    ylim = c(0, ceiling(max(c(actual, expected)) * 1.1))
    plot(actual, type = "l", xaxt = "n", col = 1, ylim = ylim, xlab = xlab, ylab = ylab, 
        main = title)
    lines(expected, lty = 2, col = 2)
    axis(1, at = 1:ncol(comparison), labels = x.labels)
    legend("topleft", legend = c("Actual", "Model"), col = 1:2, lty = 1:2, lwd = 1)
    return(comparison)
}

bgnbd.PlotRecVsConditionalExpectedFrequency <- function(params, cal.cbs, T.star, 
    x.star, xlab = "Calibration period recency", ylab = "Holdout period transactions", 
    xticklab = NULL, title = "Actual vs. Conditional Expected Transactions by Recency") {
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.PlotRecVsConditionalExpectedFrequency")
    
    if (any(T.star < 0) || !is.numeric(T.star)) 
        stop("T.star must be numeric and may not contain negative numbers.")
    if (any(x.star < 0) || !is.numeric(x.star)) 
        stop("x.star must be numeric and may not contain negative numbers.")
    
    tryCatch(x <- cal.cbs[, "x"], error = function(e) stop("Error in bgnbd.PlotRecVsConditionalExpectedFrequency: cal.cbs must have a frequency column labelled \"x\""))
    tryCatch(t.x <- cal.cbs[, "t.x"], error = function(e) stop("Error in bgnbd.PlotRecVsConditionalExpectedFrequency: cal.cbs must have a recency column labelled \"t.x\""))
    tryCatch(T.cal <- cal.cbs[, "T.cal"], error = function(e) stop("Error in bgnbd.PlotRecVsConditionalExpectedFrequency: cal.cbs must have a column for length of time observed labelled \"T.cal\""))
    
    t.values <- sort(unique(t.x))
    n.recs <- length(t.values)
    transaction.actual <- rep(0, n.recs)
    transaction.expected <- rep(0, n.recs)
    rec.size <- rep(0, n.recs)
    
    for (tt in 1:n.recs) {
        this.t.x <- t.values[tt]
        this.rec <- which(t.x == this.t.x)
        n.this.rec <- length(this.rec)
        rec.size[tt] <- n.this.rec
        transaction.actual[tt] <- sum(x.star[this.rec])/n.this.rec
        transaction.expected[tt] <- sum(bgnbd.ConditionalExpectedTransactions(params, 
            T.star, x[this.rec], t.x[this.rec], T.cal[this.rec]))/n.this.rec
    }
    
    comparison <- rbind(transaction.actual, transaction.expected, rec.size)
    colnames(comparison) <- round(t.values, 3)
    
    bins <- seq(1, ceiling(max(t.x)))
    n.bins <- length(bins)
    actual <- rep(0, n.bins)
    expected <- rep(0, n.bins)
    bin.size <- rep(0, n.bins)
    
    x.labels <- NULL
    if (is.null(xticklab) == FALSE) {
        x.labels <- xticklab
    } else {
        x.labels <- 1:(n.bins)
    }
    point.labels <- rep("", n.bins)
    point.y.val <- rep(0, n.bins)
    for (ii in 1:n.bins) {
        if (ii < n.bins) {
            this.bin <- which(as.numeric(colnames(comparison)) >= (ii - 1) & as.numeric(colnames(comparison)) < 
                ii)
        } else if (ii == n.bins) {
            this.bin <- which(as.numeric(colnames(comparison)) >= ii - 1)
        }
        actual[ii] <- sum(comparison[1, this.bin])/length(comparison[1, this.bin])
        expected[ii] <- sum(comparison[2, this.bin])/length(comparison[2, this.bin])
        bin.size[ii] <- sum(comparison[3, this.bin])
    }
    
    ylim <- c(0, ceiling(max(c(actual, expected)) * 1.1))
    plot(actual, type = "l", xaxt = "n", col = 1, ylim = ylim, xlab = xlab, ylab = ylab, 
        main = title)
    lines(expected, lty = 2, col = 2)
    
    axis(1, at = 1:n.bins, labels = x.labels)
    legend("topleft", legend = c("Actual", "Model"), col = 1:2, lty = 1:2, lwd = 1)
    
    return(rbind(actual, expected, bin.size))
}

bgnbd.PlotTransactionRateHeterogeneity <- function(params, lim = NULL) {
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.PlotTransactionRateHeterogeneity")
    shape <- params[1]
    rate <- params[2]
    rate.mean <- round(shape/rate, 4)
    rate.var <- round(shape/rate^2, 4)
    if (is.null(lim)) {
        lim = qgamma(0.99, shape = shape, rate = rate)
    }
    x.axis.ticks <- seq(0, lim, length.out = 100)
    heterogeneity <- dgamma(x.axis.ticks, shape = shape, rate = rate)
    plot(x.axis.ticks, heterogeneity, type = "l", xlab = "Transaction Rate", ylab = "Density", 
        main = "Heterogeneity in Transaction Rate")
    mean.var.label <- paste("Mean:", rate.mean, "    Var:", rate.var)
    mtext(mean.var.label, side = 3)
    return(rbind(x.axis.ticks, heterogeneity))
}

bgnbd.PlotDropoutRateHeterogeneity <- function(params, lim = NULL) {
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.PlotDropoutRateHeterogeneity")
    alpha_param = params[3]
    beta_param = params[4]
    beta_param.mean = round(alpha_param/(alpha_param + beta_param), 4)
    beta_param.var = round(alpha_param * beta_param/((alpha_param + beta_param)^2)/(alpha_param + 
        beta_param + 1), 4)
    if (is.null(lim)) {
        # get right end point of grid
        lim = qbeta(0.99, shape1 = alpha_param, shape2 = beta_param)
    }
    x.axis.ticks = seq(0, lim, length.out = 100)
    heterogeneity = dbeta(x.axis.ticks, shape1 = alpha_param, shape2 = beta_param)
    plot(x.axis.ticks, heterogeneity, type = "l", xlab = "Dropout Probability p", 
        ylab = "Density", main = "Heterogeneity in Dropout Probability")
    mean.var.label = paste("Mean:", beta_param.mean, "    Var:", beta_param.var)
    mtext(mean.var.label, side = 3)
    return(rbind(x.axis.ticks, heterogeneity))
}

bgnbd.ExpectedCumulativeTransactions <- function(params, T.cal, T.tot, n.periods.final) {
    
    dc.check.model.params(c("r", "alpha", "s", "beta"), params, "bgnbd.ExpectedCumulativeTransactions")
    
    if (any(T.cal < 0) || !is.numeric(T.cal)) 
        stop("T.cal must be numeric and may not contain negative numbers.")
    
    if (length(T.tot) > 1 || T.tot < 0 || !is.numeric(T.tot)) 
        stop("T.cal must be a single numeric value and may not be negative.")
    if (length(n.periods.final) > 1 || n.periods.final < 0 || !is.numeric(n.periods.final)) 
        stop("n.periods.final must be a single numeric value and may not be negative.")
    
    intervals <- seq(T.tot/n.periods.final, T.tot, length.out = n.periods.final)
    
    cust.birth.periods <- max(T.cal) - T.cal
    
    expected.transactions <- sapply(intervals, function(interval) {
        if (interval <= min(cust.birth.periods)) 
            return(0)
        sum(bgnbd.Expectation(params, interval - cust.birth.periods[cust.birth.periods <= 
            interval]))
    })
    
    return(expected.transactions)
}


bgnbd.PlotTrackingCum <- function(params, T.cal, T.tot, actual.cu.tracking.data, 
    xlab = "Week", ylab = "Cumulative Transactions", xticklab = NULL, title = "Tracking Cumulative Transactions") {
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.Plot.PlotTrackingCum")
    
    if (any(T.cal < 0) || !is.numeric(T.cal)) 
        stop("T.cal must be numeric and may not contain negative numbers.")
    if (any(actual.cu.tracking.data < 0) || !is.numeric(actual.cu.tracking.data)) 
        stop("actual.cu.tracking.data must be numeric and may not contain negative numbers.")
    
    if (length(T.tot) > 1 || T.tot < 0 || !is.numeric(T.tot)) 
        stop("T.cal must be a single numeric value and may not be negative.")
    
    actual <- actual.cu.tracking.data
    expected <- bgnbd.ExpectedCumulativeTransactions(params, T.cal, T.tot, length(actual))
    
    cu.tracking.comparison <- rbind(actual, expected)
    
    ylim <- c(0, max(c(actual, expected)) * 1.05)
    plot(actual, type = "l", xaxt = "n", xlab = xlab, ylab = ylab, col = 1, ylim = ylim, 
        main = title)
    lines(expected, lty = 2, col = 2)
    if (is.null(xticklab) == FALSE) {
        if (ncol(cu.tracking.comparison) != length(xticklab)) {
            stop("Plot error, xticklab does not have the correct size")
        }
        axis(1, at = 1:ncol(cu.tracking.comparison), labels = xticklab)
    } else {
        axis(1, at = 1:length(actual), labels = 1:length(actual))
    }
    abline(v = max(T.cal), lty = 2)
    
    legend("bottomright", legend = c("Actual", "Model"), col = 1:2, lty = 1:2, lwd = 1)
    
    return(cu.tracking.comparison)
}

bgnbd.PlotTrackingInc <- function(params, T.cal, T.tot, actual.inc.tracking.data, 
    xlab = "Week", ylab = "Transactions", xticklab = NULL, title = "Tracking Weekly Transactions") {
    
    dc.check.model.params(c("r", "alpha", "a", "b"), params, "bgnbd.Plot.PlotTrackingCum")
    
    if (any(T.cal < 0) || !is.numeric(T.cal)) 
        stop("T.cal must be numeric and may not contain negative numbers.")
    if (any(actual.inc.tracking.data < 0) || !is.numeric(actual.inc.tracking.data)) 
        stop("actual.inc.tracking.data must be numeric and may not contain negative numbers.")
    
    if (length(T.tot) > 1 || T.tot < 0 || !is.numeric(T.tot)) 
        stop("T.cal must be a single numeric value and may not be negative.")
    
    actual <- actual.inc.tracking.data
    expected <- dc.CumulativeToIncremental(bgnbd.ExpectedCumulativeTransactions(params, 
        T.cal, T.tot, length(actual)))
    
    ylim <- c(0, max(c(actual, expected)) * 1.05)
    plot(actual, type = "l", xaxt = "n", xlab = xlab, ylab = ylab, col = 1, ylim = ylim, 
        main = title)
    lines(expected, lty = 2, col = 2)
    if (is.null(xticklab) == FALSE) {
        if (length(actual) != length(xticklab)) {
            stop("Plot error, xticklab does not have the correct size")
        }
        axis(1, at = 1:length(actual), labels = xticklab)
    } else {
        axis(1, at = 1:length(actual), labels = 1:length(actual))
    }
    abline(v = max(T.cal), lty = 2)
    
    legend("topright", legend = c("Actual", "Model"), col = 1:2, lty = 1:2, lwd = 1)
    
    return(rbind(actual, expected))
}


 
retina-dot-ai/BTYD documentation built on May 22, 2019, 12:17 p.m.