test02_sarpred.R
In SAR: Smart Adaptive Recommendations

context("SAR prediction")

datapath <- "../resources"
data(ms_usage, package="SAR", envir=environment())

i <- readLines(file.path(datapath, "items.txt"))
u <- readLines(file.path(datapath, "user.txt"))
u2 <- readLines(file.path(datapath, "user2.txt"))
dfu <- subset(ms_usage, user == u)


test_that("Prediction for cooccurrence works",
{
    count3 <- sar(ms_usage, support_threshold=3, similarity="count")

    ipred3 <- read.csv(file.path(datapath, "itempred_count3.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(item_predict(count3, i, k=10), ipred3)
    expect_equal(item_predict(count3, i), ipred3)

    upred3 <- read.csv(file.path(datapath, "userpred_count3_userid_only.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(count3, u, k=10), upred3)
    expect_equal(user_predict(count3, u), upred3)

    upred3 <- read.csv(file.path(datapath, "userpred_count3_userid_plus_events.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(count3, dfu, k=10), upred3)
    expect_equal(user_predict(count3, dfu), upred3)
})

test_that("Prediction for jaccard works",
{
    jac3 <- sar(ms_usage, support_threshold=3, similarity="jac")

    ipred3 <- read.csv(file.path(datapath, "itempred_jac3.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(item_predict(jac3, i, k=10), ipred3)
    expect_equal(item_predict(jac3, i), ipred3)

    upred3 <- read.csv(file.path(datapath, "userpred_jac3_userid_only.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(jac3, u, k=10), upred3)
    expect_equal(user_predict(jac3, u), upred3)

    upred3 <- read.csv(file.path(datapath, "userpred_jac3_userid_plus_events.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(jac3, dfu, k=10), upred3)
    expect_equal(user_predict(jac3, dfu), upred3)
})

test_that("Prediction for lift works",
{
    lift3 <- sar(ms_usage, support_threshold=3, similarity="lift")

    ipred3 <- read.csv(file.path(datapath, "itempred_lift3.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(item_predict(lift3, i, k=10), ipred3)
    expect_equal(item_predict(lift3, i), ipred3)

    upred3 <- read.csv(file.path(datapath, "userpred_lift3_userid_only.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(lift3, u, k=10), upred3)
    expect_equal(user_predict(lift3, u), upred3)

    upred3 <- read.csv(file.path(datapath, "userpred_lift3_userid_plus_events.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(lift3, dfu, k=10), upred3)
    expect_equal(user_predict(lift3, dfu), upred3)
})


test_that("Prediction with new/overlapping user IDs works",
{
    mod <- sar(ms_usage, support_threshold=3, similarity="count")

    ipred3 <- read.csv(file.path(datapath, "itempred_count3.csv"), stringsAsFactors=FALSE,
                       colClasses=c(rep("character", 11), rep("numeric", 10)))

    # new user ID: predicting with transactions should work, without transactions should fail
    dfu_new <- dfu
    dfu_new$user <- "xxxx"

    pred <- user_predict(mod, dfu_new)
    expect_s3_class(pred, "data.frame")
    expect_equal(nrow(pred), 1)

    expect_error(user_predict(mod, dfu_new[1]))

    expect_s3_class(user_predict(mod, dfu_new[-1]), "data.frame")

    # overlapping user IDs: predicting with transactions should work, without transactions should warn and drop
    dfu_ov <- rbind(dfu, dfu_new)

    pred <- user_predict(mod, dfu_ov)
    expect_s3_class(pred, "data.frame")
    expect_equal(nrow(pred), 2)

    expect_warning(pred <- user_predict(mod, dfu_ov[1]))
    expect_s3_class(pred, "data.frame")
    expect_equal(nrow(pred), 1)

    expect_s3_class(user_predict(mod, dfu_ov[-1]), "data.frame")
})


test_that("Prediction with multiple user IDs ordered correctly",
{
    mod <- sar(ms_usage, support_threshold=3, similarity="count")

    uu2 <- c(u, u2)

    expected <- read.csv(file.path(datapath, "userpred_2users_count3_userid_only.csv"),
                         stringsAsFactors=FALSE,
                         colClasses=c(rep("character", 11), rep("numeric", 10)))
    pred <- user_predict(mod, uu2)
    expect_equal(pred, expected)

    pred2 <- user_predict(mod, uu2[2:1])
    expected2 <- expected[2:1,]
    row.names(expected2) <- NULL
    expect_equal(pred2, expected2)

    pred3 <- user_predict(mod, uu2[c(1, 2, 1, 2, 1, 2)])
    expect_equal(pred3, expected)

    expected <- read.csv(file.path(datapath, "userpred_2users_count3_userid_plus_events.csv"),
                         stringsAsFactors=FALSE,
                         colClasses=c(rep("character", 11), rep("numeric", 10)))
    dfuu2 <- rbind(subset(ms_usage, user == u), subset(ms_usage, user == u2))
    pred <- user_predict(mod, dfuu2)
    expect_equal(pred, expected)

    dfu2u <- rbind(subset(ms_usage, user == u2), subset(ms_usage, user == u))
    pred2 <- user_predict(mod, dfu2u)
    expected2 <- expected[2:1,]
    row.names(expected2) <- NULL
    expect_equal(pred2, expected2)
})


test_that("Prediction with new item IDs fails",
{
    mod <- sar(ms_usage)

    dfu_new <- dfu
    dfu_new$item[1] <- "xxxx"
    expect_error(user_predict(mod, dfu_new))
})


test_that("Backfilled and non-backfilled prediction work",
{
    u_bf <- readLines(file.path(datapath, "user_backfill.txt"))

    jac3 <- sar(ms_usage, support_threshold=3, similarity="jaccard")

    upred_nbf <- read.csv(file.path(datapath, "userpred_jac3_nobackfill.csv"), stringsAsFactors=FALSE,
                          colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(jac3, u_bf, backfill=FALSE), upred_nbf)

    upred_bf <- read.csv(file.path(datapath, "userpred_jac3_backfill.csv"), stringsAsFactors=FALSE,
                         colClasses=c(rep("character", 11), rep("numeric", 10)))
    expect_equal(user_predict(jac3, u_bf, backfill=TRUE), upred_bf)
})