####################
# Author: James Hickey
#
# Series of tests for perf_pairwise functionality
#
####################
context("Testing perf_pairwise")
test_that("Metric ndcg fit passed to perf_pairwise runs", {
skip("Skipping pairwise")
# Given data and a fitted pairwise
# create query groups, with an average size of 25 items each
N <- 1000
num.queries <- floor(N/25)
query <- sample(1:num.queries, N, replace=TRUE)
# X1 is a variable determined by query group only
query.level <- runif(num.queries)
X1 <- query.level[query]
# X2 varies with each item
X2 <- runif(N)
# X3 is uncorrelated with target
X3 <- runif(N)
# The target
Y <- X1 + X2
# Add some random noise to X2 that is correlated with
# queries, but uncorrelated with items
X2 <- X2 + scale(runif(num.queries))[query]
# Add some random noise to target
SNR <- 5 # signal-to-noise ratio
sigma <- sqrt(var(Y)/SNR)
Y <- Y + runif(N, 0, sigma)
data <- data.frame(Y, query=query, X1, X2, X3)
dist <- gbm_dist("Pairwise", metric="ndcg", group="query")
params <- training_params(num_trees = 200, num_train = nrow(data), id=seq_len(nrow(data)),
interaction_depth = 3)
fit <- gbmt(Y~X1+X2+X3, # formula
data=data, # dataset
distribution=dist,
train_params=params,
keep_gbm_data=TRUE, # store copy of input data in model
cv_folds=5, # number of cross validation folds
is_verbose = FALSE , # don't print progress
par_details=gbmParallel())
# When perf_pairwise is called
# Then runs without error
expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index,
fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
})
test_that("Metric map fit passed to perf_pairwise runs", {
skip("Skipping pairwise")
# Given data and a fitted pairwise
# create query groups, with an average size of 25 items each
N <- 1000
num.queries <- floor(N/25)
query <- sample(1:num.queries, N, replace=TRUE)
# X1 is a variable determined by query group only
query.level <- runif(num.queries)
X1 <- query.level[query]
# X2 varies with each item
X2 <- runif(N)
# X3 is uncorrelated with target
X3 <- runif(N)
# The target
Y <- X1 + X2
# Add some random noise to X2 that is correlated with
# queries, but uncorrelated with items
X2 <- X2 + scale(runif(num.queries))[query]
# Add some random noise to target
SNR <- 5 # signal-to-noise ratio
sigma <- sqrt(var(Y)/SNR)
Y <- Y + runif(N, 0, sigma)
Y[Y >= 1] <- 1
Y[Y < 1] <- 0
data <- data.frame(Y, query=query, X1, X2, X3)
dist <- gbm_dist("Pairwise", metric="map", group="query")
params <- training_params(num_trees = 200, num_train = nrow(data),
id=seq_len(nrow(data)),
interaction_depth = 3)
fit <- gbmt(Y~X1+X2+X3, # formula
data=data, # dataset
distribution=dist,
train_params=params,
keep_gbm_data=TRUE, # store copy of input data in model
cv_folds=5, # number of cross validation folds
is_verbose = FALSE , # don't print progress
par_details=gbmParallel())
# When perf_pairwise is called
# Then runs without error
expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index,
fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
})
test_that("Metric mrr fit passed to perf_pairwise runs", {
skip("Skipping pairwise")
# Given data and a fitted pairwise
# create query groups, with an average size of 25 items each
N <- 1000
num.queries <- floor(N/25)
query <- sample(1:num.queries, N, replace=TRUE)
# X1 is a variable determined by query group only
query.level <- runif(num.queries)
X1 <- query.level[query]
# X2 varies with each item
X2 <- runif(N)
# X3 is uncorrelated with target
X3 <- runif(N)
# The target
Y <- X1 + X2
# Add some random noise to X2 that is correlated with
# queries, but uncorrelated with items
X2 <- X2 + scale(runif(num.queries))[query]
# Add some random noise to target
SNR <- 5 # signal-to-noise ratio
sigma <- sqrt(var(Y)/SNR)
Y <- Y + runif(N, 0, sigma)
Y[Y >= 1] <- 1
Y[Y < 1] <- 0
data <- data.frame(Y, query=query, X1, X2, X3)
dist <- gbm_dist("Pairwise", metric="ndcg", group="query")
params <- training_params(num_trees = 200, num_train = nrow(data), id=seq_len(nrow(data)),
interaction_depth = 3)
fit <- gbmt(Y~X1+X2+X3, # formula
data=data, # dataset
distribution=dist,
train_params=params,
keep_gbm_data=TRUE, # store copy of input data in model
cv_folds=5, # number of cross validation folds
is_verbose = FALSE , # don't print progress
par_details=gbmParallel())
# When perf_pairwise is called
# Then runs without error
expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index,
fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
})
test_that("Metric conc fit passed to perf_pairwise runs", {
skip("Skipping pairwise")
# Given data and a fitted pairwise
# create query groups, with an average size of 25 items each
N <- 1000
num.queries <- floor(N/25)
query <- sample(1:num.queries, N, replace=TRUE)
# X1 is a variable determined by query group only
query.level <- runif(num.queries)
X1 <- query.level[query]
# X2 varies with each item
X2 <- runif(N)
# X3 is uncorrelated with target
X3 <- runif(N)
# The target
Y <- X1 + X2
# Add some random noise to X2 that is correlated with
# queries, but uncorrelated with items
X2 <- X2 + scale(runif(num.queries))[query]
# Add some random noise to target
SNR <- 5 # signal-to-noise ratio
sigma <- sqrt(var(Y)/SNR)
Y <- Y + runif(N, 0, sigma)
data <- data.frame(Y, query=query, X1, X2, X3)
dist <- gbm_dist("Pairwise", metric="conc", group="query")
params <- training_params(num_trees = 200, num_train = nrow(data), id=seq_len(nrow(data)),
interaction_depth = 3)
fit <- gbmt(Y~X1+X2+X3, # formula
data=data, # dataset
distribution=dist,
train_params=params,
keep_gbm_data=TRUE, # store copy of input data in model
cv_folds=5, # number of cross validation folds
is_verbose = FALSE , # don't print progress
par_details=gbmParallel())
# When perf_pairwise is called
# Then runs without error
expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index,
fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
})
test_that("Error thrown if metric not recognised", {
skip("Skipping pairwise")
# Given a fitted Pairwise distribution
# create query groups, with an average size of 25 items each
N <- 1000
num.queries <- floor(N/25)
query <- sample(1:num.queries, N, replace=TRUE)
# X1 is a variable determined by query group only
query.level <- runif(num.queries)
X1 <- query.level[query]
# X2 varies with each item
X2 <- runif(N)
# X3 is uncorrelated with target
X3 <- runif(N)
# The target
Y <- X1 + X2
# Add some random noise to X2 that is correlated with
# queries, but uncorrelated with items
X2 <- X2 + scale(runif(num.queries))[query]
# Add some random noise to target
SNR <- 5 # signal-to-noise ratio
sigma <- sqrt(var(Y)/SNR)
Y <- Y + runif(N, 0, sigma)
data <- data.frame(Y, query=query, X1, X2, X3)
dist <- gbm_dist("Pairwise", metric="conc", group="query")
params <- training_params(num_trees = 200, num_train = nrow(data),
id=seq_len(nrow(data)),
interaction_depth = 3)
fit <- gbmt(Y~X1+X2+X3, # formula
data=data, # dataset
distribution=dist,
train_params=params,
keep_gbm_data=TRUE, # store copy of input data in model
cv_folds=5, # number of cross validation folds
is_verbose = FALSE , # don't print progress
par_details=gbmParallel())
# When perf_pairwise is called but with an unrecognised metric
# Then an error is thrown
expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index,
"wrong", max_rank=fit$distribution$max_rank))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.