tests/testthat/test_correctness_clvdata_s3.R

data("cdnow")

# summary ---------------------------------------------------------------------------------------------------------

test_that("Zero repeaters are counted correctly", {
  skip_on_cran()

  # reported vs manual count/perc of zero-repeaters
  fct.verify.zero.repeaters <- function(date.estimation.split){
    clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow, estimation.split = date.estimation.split)
    expect_silent(res.sum <- summary(clv.cdnow))

    # Only entries in estimation but not in holdout and total
    expect_true(res.sum$descriptives.transactions[Name == "Total # zero repeaters", "Holdout"] == "-")
    expect_true(res.sum$descriptives.transactions[Name == "Total # zero repeaters", "Total"] == "-")

    if(!is.null(date.estimation.split)){
      num.zero.rep <- cdnow[Date <= date.estimation.split, .N, by = "Id"][N == 1, .N]
      num.ids <- cdnow[Date <= date.estimation.split, uniqueN(Id)]
    }else{
      num.zero.rep <- cdnow[, .N, by = "Id"][N == 1, .N]
      num.ids <- cdnow[, uniqueN(Id)]
    }
    perc.zero.rep <- round(100 * num.zero.rep / num.ids, 3)
    expect_true(num.zero.rep == res.sum$descriptives.transactions[Name == "Total # zero repeaters", as.numeric(Estimation)])
    expect_true(perc.zero.rep == round(res.sum$descriptives.transactions[Name == "Percentage of zero repeaters", as.numeric(Estimation)], 3))
  }

  fct.verify.zero.repeaters(date.estimation.split = NULL)
  fct.verify.zero.repeaters(date.estimation.split = lubridate::ymd("1997-09-17"))
})

test_that("Summary has no NA", {
  clv.cdnow.holdout <- fct.helper.create.clvdata.cdnow(cdnow, estimation.split = 37)
  clv.cdnow.no.holdout <- fct.helper.create.clvdata.cdnow(cdnow, estimation.split = NULL)

  fct.summary.has.no.na <- function(clv.data, Id){
    expect_silent(res.sum <- summary(clv.data, Id=Id))
    # Returns characters and cannot convert to numeric because would
    #   surely introduce NAs (converting dates and "-")
    expect_false(any(res.sum$descriptives.transactions == "NA"))
    expect_false(any(res.sum$descriptives.transactions == "NaN"))
  }

  # All
  fct.summary.has.no.na(clv.cdnow.holdout, Id=NULL)
  fct.summary.has.no.na(clv.cdnow.no.holdout, Id=NULL)
  # Zero-repeater
  fct.summary.has.no.na(clv.cdnow.holdout, Id="3")
  fct.summary.has.no.na(clv.cdnow.no.holdout, Id="3")
  # Not zero-repeater
  fct.summary.has.no.na(clv.cdnow.holdout, Id="1")
  fct.summary.has.no.na(clv.cdnow.no.holdout, Id="1")
  # Mix
  fct.summary.has.no.na(clv.cdnow.holdout, Id=c("1", "3"))
  fct.summary.has.no.na(clv.cdnow.no.holdout, Id=c("1", "3"))

})

test_that("Same transaction summary if all ids or NULL are given", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  expect_silent(res.sum.null <- summary(clv.cdnow, Id=NULL))
  expect_silent(res.sum.all <- summary(clv.cdnow, Id=cdnow[, unique(Id)]))
  expect_true(isTRUE(all.equal(res.sum.null$descriptives.transactions,
                               res.sum.all$descriptives.transactions)))
})

test_that("Correct Ids selected", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)
  expect_silent(res.sum <- summary(clv.cdnow, Id=c("1", "2", "3")))
  expect_true(setequal(res.sum$selected.ids, c("1", "2", "3")))
  expect_true(length(unique(res.sum$selected.ids)) == length(res.sum$selected.ids))
  # double
  expect_silent(res.sum <- summary(clv.cdnow, Id=c("1", "2", "3", "3", "3")))
  expect_true(setequal(res.sum$selected.ids, c("1", "2", "3")))
  expect_true(length(unique(res.sum$selected.ids)) == length(res.sum$selected.ids))
})

test_that("Different output if ids given", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)
  expect_silent(df.desc.1 <- summary(clv.cdnow, Id="1")$descriptives.transactions)
  expect_silent(df.desc.123 <- summary(clv.cdnow, Id=c("1", "2", "3" ,"99"))$descriptives.transactions)

  expect_false(isTRUE(all.equal(df.desc.1[, "Estimation"], df.desc.123[, "Estimation"])))
  expect_false(isTRUE(all.equal(df.desc.1[, "Holdout"], df.desc.123[, "Holdout"])))
  expect_false(isTRUE(all.equal(df.desc.1[, "Total"], df.desc.123[, "Total"])))
})

test_that("Holdout is - if customer has no transactions in holdout period", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)
  expect_true(all(summary(clv.cdnow, Id="2")$descriptives.transactions[, "Holdout"] == "-"))
})


# as.data.x ---------------------------------------------------------------------------------------------------------
test_that("Correct data format is returned", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(data.cdnow=cdnow)

  expect_false(is.data.table(as.data.frame(clv.cdnow)))
  expect_true(is.data.frame(as.data.frame(clv.cdnow)))

  expect_true(is.data.table(as.data.table(clv.cdnow)))
  # expect_true(is.data.frame(as.data.table(clv.cdnow)))
})


test_that("Correct Ids are returned", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(data.cdnow=cdnow)
  target.ids <- c("1", "2", "999")

  expect_setequal(as.data.frame(clv.cdnow, Ids = target.ids)$Id, target.ids)
  expect_setequal(as.data.table(clv.cdnow, Ids = target.ids)$Id, target.ids)

  expect_warning(as.data.frame(clv.cdnow, Ids=c(target.ids, "abc")))
  expect_warning(as.data.table(clv.cdnow, Ids=c(target.ids, "abc")))
})

test_that("Returns correct number of transactinons for given sample", {
  skip_on_cran()

  clv.cdnow <- fct.helper.create.clvdata.cdnow(data.cdnow=cdnow)

  fct.verify.correct.number.trans <- function(fct.as.data.x){
    expect_true(nrow(fct.as.data.x(clv.cdnow)) == nrow(cdnow))

    expect_true(nrow(fct.as.data.x(clv.cdnow)) == nrow(fct.as.data.x(clv.cdnow, sample="full")))

    expect_true(nrow(fct.as.data.x(clv.cdnow, sample="estimation")) +
                  nrow(fct.as.data.x(clv.cdnow, sample="holdout")) ==
                  nrow(fct.as.data.x(clv.cdnow, sample="full")))
  }

  fct.verify.correct.number.trans(fct.as.data.x = as.data.frame)
  fct.verify.correct.number.trans(fct.as.data.x = as.data.table)
})

test_that("Always returns a copy", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(data.cdnow=cdnow)
  orig.address <- address(clv.cdnow@data.transactions)

  # data.frame
  expect_false(orig.address == address(as.data.frame(clv.cdnow, sample="full")))
  expect_false(orig.address == address(as.data.frame(clv.cdnow, sample="estimation")))
  expect_false(orig.address == address(as.data.frame(clv.cdnow, sample="holdout")))

  expect_false(orig.address == address(as.data.frame(clv.cdnow, sample="full", Ids = "1")))
  expect_false(orig.address == address(as.data.frame(clv.cdnow, sample="estimation", Ids = "1")))
  expect_false(orig.address == address(as.data.frame(clv.cdnow, sample="holdout", Ids = "1")))

  # data.table
  expect_false(orig.address == address(as.data.table(clv.cdnow, sample="full")))
  expect_false(orig.address == address(as.data.table(clv.cdnow, sample="estimation")))
  expect_false(orig.address == address(as.data.table(clv.cdnow, sample="holdout")))

  expect_false(orig.address == address(as.data.table(clv.cdnow, sample="full", Ids = "1")))
  expect_false(orig.address == address(as.data.table(clv.cdnow, sample="estimation", Ids = "1")))
  expect_false(orig.address == address(as.data.table(clv.cdnow, sample="holdout", Ids = "1")))
})




# subset ---------------------------------------------------------------------

test_that("Correct data selected", {
  skip_on_cran()
  # with holdout
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  # full
  # no arg same as full
  expect_true(isTRUE(all.equal(subset(clv.cdnow),
                               subset(clv.cdnow, sample="full"))))

  # Id
  expect_silent(dt.trans <- subset(clv.cdnow, Id == "1"))
  expect_setequal(dt.trans$Id, "1")
  expect_true(dt.trans[, .N] == 4)
  expect_setequal(colnames(dt.trans), c("Id", "Date", "Price"))
  # multiple Ids
  expect_silent(dt.trans <- subset(clv.cdnow, Id %in% c("1", "2")))
  expect_setequal(dt.trans$Id, c("1", "2"))
  expect_true(dt.trans[, .N] == 6)

  # estimation
  expect_silent(dt.trans <- subset(clv.cdnow, Id == "1", sample="estimation"))
  expect_setequal(dt.trans$Id, "1")
  expect_true(dt.trans[, .N] == 3)
  expect_setequal(colnames(dt.trans), c("Id", "Date", "Price"))
  #   multiple Ids
  expect_silent(dt.trans <- subset(clv.cdnow, Id %in% c("1", "2"), sample="estimation"))
  expect_setequal(dt.trans$Id, c("1", "2"))
  expect_true(dt.trans[, .N] == 5)

  # holdout
  expect_silent(dt.trans <- subset(clv.cdnow, Id == "1", sample="holdout"))
  expect_setequal(dt.trans$Id, "1")
  expect_true(dt.trans[, .N] == 1)
  expect_setequal(colnames(dt.trans), c("Id", "Date", "Price"))
  #   multiple Ids
  expect_silent(dt.trans <- subset(clv.cdnow, Id %in% c("1", "111"), sample="holdout"))
  expect_setequal(dt.trans$Id, c("1", "111"))
  expect_true(dt.trans[, .N] == 2)

  # Date
  expect_true(isTRUE(all.equal(subset(clv.cdnow, between(Date, "1997-02-02", "1997-10-10"))[order(Id)],
                               cdnow[Date>="1997-02-02" & Date <= "1997-10-10", !"CDs"][order(Id)])))

  # Price
  expect_true(isTRUE(all.equal(subset(clv.cdnow, between(Price, 50, 100))[order(Id)],
                               cdnow[Price >= 50 & Price <= 100, !"CDs"][order(Id)])))

  # columns
  # full
  expect_setequal(colnames(subset(clv.cdnow, select=c("Id"), sample="full")), "Id")
  expect_setequal(colnames(subset(clv.cdnow, select=c("Id", "Date"), sample="full")), c("Id", "Date"))
  # estimation
  expect_setequal(colnames(subset(clv.cdnow, select=c("Id"), sample="estimation")), "Id")
  expect_setequal(colnames(subset(clv.cdnow, select=c("Id", "Date"), sample="estimation")), c("Id", "Date"))
  # holdout
  expect_setequal(colnames(subset(clv.cdnow, select=c("Id"), sample="holdout")), "Id")
  expect_setequal(colnames(subset(clv.cdnow, select=c("Id", "Date", "Price"), sample="holdout")),
                  c("Id", "Date", "Price"))

})

test_that("If no holdout, full and estimation are the same", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow, estimation.split=NULL)
  expect_true(isTRUE(all.equal(subset(clv.cdnow, sample="full"),
                               subset(clv.cdnow, sample="estimation"))))

  # holdout fails
  expect_error(subset(clv.cdnow, sample="holdout"), regexp = "no holdout data")
})


test_that("Same when argument positions are swapped", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  expect_true(isTRUE(all.equal(subset(clv.cdnow, Id=="111", c("Id", "Date")),
                               subset(clv.cdnow, select=c("Id", "Date"), Id=="111"))))
})


test_that("Always returns a copy of the data", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)
  orig.address <- address(clv.cdnow@data.transactions)

  # only both, holdout and estimation always return different object because different data
  expect_false(address(subset(clv.cdnow, subset=TRUE, sample="full")) == orig.address)
  expect_false(address(subset(clv.cdnow, subset=Id=="1", sample="full")) == orig.address)
  expect_false(address(subset(clv.cdnow, select=Id=="1", sample="full")) == orig.address)
})





# plot ---------------------------------------------------------------------

# . frequency ---------------------------------------------------------------
test_that("frequency plot - actual trans has no 0", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  expect_silent(dt.plot <- plot(clv.cdnow, which="frequency",
                                count.repeat.trans=FALSE, trans.bins=c(1,2,3),
                                plot=FALSE, verbose=FALSE))
  expect_false(any(levels(dt.plot$num.transactions) == "0"))

  # but does with repeat trans
  expect_silent(dt.plot <- plot(clv.cdnow, which="frequency", count.repeat.trans=TRUE,
                                plot=FALSE, verbose=FALSE))
  expect_true(any(levels(dt.plot$num.transactions) == "0"))
})

test_that("frequency plot - remaining label is the highest level and disappears it not needed", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  expect_silent(dt.plot <- plot(clv.cdnow, which="frequency",
                                trans.bins=0:10, label.remaining="AbC123",
                                count.remaining=TRUE,
                                plot=FALSE, verbose=FALSE))
  expect_true(max(levels(dt.plot$num.transactions)) == "AbC123")

  # but disappears if not needed
  expect_silent(dt.plot <- plot(clv.cdnow, which="frequency",
                                trans.bins=0:10, label.remaining="AbC123",
                                count.remaining=FALSE,
                                plot=FALSE, verbose=FALSE))
  expect_true(max(as.numeric(levels(dt.plot$num.transactions))) == 10)
})



# . spending ---------------------------------------------------------------
test_that("Spending plot - different data for different sample", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  expect_silent(dt.none       <- plot(clv.cdnow, which="spending", plot=FALSE, verbose=FALSE))
  expect_silent(dt.estimation <- plot(clv.cdnow, which="spending", sample="estimation", plot=FALSE, verbose=FALSE))
  expect_silent(dt.full       <- plot(clv.cdnow, which="spending", sample="full", plot=FALSE, verbose=FALSE))
  expect_silent(dt.holdout    <- plot(clv.cdnow, which="spending", sample="holdout", plot=FALSE, verbose=FALSE))

  # estimation is default
  expect_true(isTRUE(all.equal(dt.none, dt.estimation)))
  # all differs to all others
  expect_false(isTRUE(all.equal(dt.estimation, dt.full)))
  expect_false(isTRUE(all.equal(dt.estimation, dt.holdout)))
  expect_false(isTRUE(all.equal(dt.full, dt.holdout)))
})


test_that("Spending plot - ggplot styling works correctly", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  # defaults to line
  expect_silent(gg.default <- plot(clv.cdnow, which="spending", verbose=FALSE))
  expect_silent(gg.dots    <- plot(clv.cdnow, which="spending", verbose=FALSE, linewidth=0.1))
  expect_silent(gg.geom    <- plot(clv.cdnow, which="spending", verbose=FALSE, geom="point"))
  # args passed in ...
  expect_silent(gg.color   <- plot(clv.cdnow, which="spending", verbose=FALSE, color="green"))

  expect_s3_class(gg.default$layers[[1]]$geom, "GeomLine")
  expect_s3_class(gg.geom$layers[[1]]$geom, "GeomPoint")
  expect_true(gg.dots$layers[[1]]$aes_params[["linewidth"]] == 0.1)
  expect_true(gg.color$layers[[1]]$aes_params[["colour"]] == "green")
})

test_that("Spending plot - correct num plotted", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  # mean.spending = TRUE
  expect_silent(dt.plot <- plot(clv.cdnow,mean.spending=TRUE, sample="full", which="spending", plot=FALSE, verbose=FALSE))
  expect_setequal(colnames(dt.plot), c("Id", "Spending"))
  expect_true(nrow(dt.plot) == clv.cdnow@data.transactions[, uniqueN(Id)])
  expect_setequal(dt.plot$Id, clv.cdnow@data.transactions[, unique(Id)])

  # mean.spending = FALSE
  #   num trans: Every transaction after aggregating same id/date
  expect_silent(dt.plot <- plot(clv.cdnow, mean.spending=FALSE, sample="full", plot=FALSE, verbose=FALSE, which="spending"))
  expect_setequal(colnames(dt.plot), c("Id", "Spending"))
  expect_true(nrow(dt.plot) == nrow(clv.data.aggregate.transactions(cdnow, has.spending = TRUE)))
  expect_setequal(dt.plot$Id, clv.cdnow@data.transactions[, unique(Id)])
})


# . interpurchasetime -----------------------------------------------------------

test_that("Interpurchasetime plot - zero-repeaters removed", {
  skip_on_cran()
  clv.cdnow <- fct.helper.create.clvdata.cdnow(cdnow)

  expect_silent(dt.plot <- plot(clv.cdnow, which="interpurchasetime", sample="estimation", plot=FALSE, verbose=FALSE))
  expect_s3_class(dt.plot, "data.table")
  expect_setequal(colnames(dt.plot), c("Id", "mean.interpurchase.time"))
  expect_false(anyNA(dt.plot))
  expect_true(dt.plot[mean.interpurchase.time>0,  .N] >  0)
  expect_true(dt.plot[mean.interpurchase.time<=0, .N] == 0)
  # Ids are unique
  expect_true(dt.plot[, uniqueN(Id)] == nrow(dt.plot))
  expect_true(nrow(dt.plot) == nobs(clv.cdnow) - 1432) # 1432: num zero-repeaters from summary() for split=37, 1411 for split=39
})
bachmannpatrick/CLVTools documentation built on Oct. 29, 2023, 2:16 p.m.