tests/testthat/test-perf-pairwise.r

####################
# Author: James Hickey
#
# Series of tests for perf_pairwise functionality
#
####################

context("Testing perf_pairwise")
test_that("Metric ndcg  fit passed to perf_pairwise runs", {
  skip("Skipping pairwise")
  # Given data and a fitted pairwise
  # create query groups, with an average size of 25 items each
  N <- 1000
  num.queries <- floor(N/25)
  query <- sample(1:num.queries, N, replace=TRUE)
  
  # X1 is a variable determined by query group only
  query.level <- runif(num.queries)
  X1 <- query.level[query]
  
  # X2 varies with each item
  X2 <- runif(N)
  
  # X3 is uncorrelated with target
  X3 <- runif(N)
  
  # The target
  Y <- X1 + X2
  
  # Add some random noise to X2 that is correlated with
  # queries, but uncorrelated with items
  
  X2 <- X2 + scale(runif(num.queries))[query]
  
  # Add some random noise to target
  SNR <- 5 # signal-to-noise ratio
  sigma <- sqrt(var(Y)/SNR)
  Y <- Y + runif(N, 0, sigma)
  
  data <- data.frame(Y, query=query, X1, X2, X3)
  dist <- gbm_dist("Pairwise", metric="ndcg", group="query")
  params <- training_params(num_trees = 200, num_train = nrow(data), id=seq_len(nrow(data)),
                            interaction_depth = 3)
  
  fit <- gbmt(Y~X1+X2+X3,          # formula
             data=data,     # dataset
             distribution=dist,
             train_params=params,
             keep_gbm_data=TRUE,      # store copy of input data in model
             cv_folds=5,          # number of cross validation folds
             is_verbose = FALSE ,    # don't print progress
             par_details=gbmParallel())  
  
  # When perf_pairwise is called
  # Then runs without error
  expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index, 
                             fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
  
})

test_that("Metric map fit passed to perf_pairwise runs", {
  skip("Skipping pairwise")
  # Given data and a fitted pairwise
  # create query groups, with an average size of 25 items each
  N <- 1000
  num.queries <- floor(N/25)
  query <- sample(1:num.queries, N, replace=TRUE)
  
  # X1 is a variable determined by query group only
  query.level <- runif(num.queries)
  X1 <- query.level[query]
  
  # X2 varies with each item
  X2 <- runif(N)
  
  # X3 is uncorrelated with target
  X3 <- runif(N)
  
  # The target
  Y <- X1 + X2
  
  # Add some random noise to X2 that is correlated with
  # queries, but uncorrelated with items
  
  X2 <- X2 + scale(runif(num.queries))[query]
  
  # Add some random noise to target
  SNR <- 5 # signal-to-noise ratio
  sigma <- sqrt(var(Y)/SNR)
  Y <- Y + runif(N, 0, sigma)
  
  Y[Y >= 1] <- 1
  Y[Y < 1] <- 0
  
  data <- data.frame(Y, query=query, X1, X2, X3)
  dist <- gbm_dist("Pairwise", metric="map", group="query")
  params <- training_params(num_trees = 200, num_train = nrow(data),
                            id=seq_len(nrow(data)),
                            interaction_depth = 3)
  
  fit <- gbmt(Y~X1+X2+X3,          # formula
              data=data,     # dataset
              distribution=dist,
              train_params=params,
              keep_gbm_data=TRUE,      # store copy of input data in model
              cv_folds=5,          # number of cross validation folds
              is_verbose = FALSE ,    # don't print progress
              par_details=gbmParallel())  
  
  # When perf_pairwise is called
  # Then runs without error
  expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index, 
                             fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
  
})

test_that("Metric mrr fit passed to perf_pairwise runs", {
  skip("Skipping pairwise")
  # Given data and a fitted pairwise
  # create query groups, with an average size of 25 items each
  N <- 1000
  num.queries <- floor(N/25)
  query <- sample(1:num.queries, N, replace=TRUE)
  
  # X1 is a variable determined by query group only
  query.level <- runif(num.queries)
  X1 <- query.level[query]
  
  # X2 varies with each item
  X2 <- runif(N)
  
  # X3 is uncorrelated with target
  X3 <- runif(N)
  
  # The target
  Y <- X1 + X2
  
  # Add some random noise to X2 that is correlated with
  # queries, but uncorrelated with items
  
  X2 <- X2 + scale(runif(num.queries))[query]
  
  # Add some random noise to target
  SNR <- 5 # signal-to-noise ratio
  sigma <- sqrt(var(Y)/SNR)
  Y <- Y + runif(N, 0, sigma)
  Y[Y >= 1] <- 1
  Y[Y < 1] <- 0
  
  data <- data.frame(Y, query=query, X1, X2, X3)
  dist <- gbm_dist("Pairwise", metric="ndcg", group="query")
  params <- training_params(num_trees = 200, num_train = nrow(data), id=seq_len(nrow(data)),
                            interaction_depth = 3)
  
  fit <- gbmt(Y~X1+X2+X3,          # formula
              data=data,     # dataset
              distribution=dist,
              train_params=params,
              keep_gbm_data=TRUE,      # store copy of input data in model
              cv_folds=5,          # number of cross validation folds
              is_verbose = FALSE ,    # don't print progress
              par_details=gbmParallel())  
  
  # When perf_pairwise is called
  # Then runs without error
  expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index, 
                             fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
  
})

test_that("Metric conc fit passed to perf_pairwise runs", {
  skip("Skipping pairwise")
  # Given data and a fitted pairwise
  # create query groups, with an average size of 25 items each
  N <- 1000
  num.queries <- floor(N/25)
  query <- sample(1:num.queries, N, replace=TRUE)
  
  # X1 is a variable determined by query group only
  query.level <- runif(num.queries)
  X1 <- query.level[query]
  
  # X2 varies with each item
  X2 <- runif(N)
  
  # X3 is uncorrelated with target
  X3 <- runif(N)
  
  # The target
  Y <- X1 + X2
  
  # Add some random noise to X2 that is correlated with
  # queries, but uncorrelated with items
  
  X2 <- X2 + scale(runif(num.queries))[query]
  
  # Add some random noise to target
  SNR <- 5 # signal-to-noise ratio
  sigma <- sqrt(var(Y)/SNR)
  Y <- Y + runif(N, 0, sigma)
  
  data <- data.frame(Y, query=query, X1, X2, X3)
  dist <- gbm_dist("Pairwise", metric="conc", group="query")
  params <- training_params(num_trees = 200, num_train = nrow(data), id=seq_len(nrow(data)),
                            interaction_depth = 3)
  
  fit <- gbmt(Y~X1+X2+X3,          # formula
              data=data,     # dataset
              distribution=dist,
              train_params=params,
              keep_gbm_data=TRUE,      # store copy of input data in model
              cv_folds=5,          # number of cross validation folds
              is_verbose = FALSE ,    # don't print progress
              par_details=gbmParallel())  
  
  # When perf_pairwise is called
  # Then runs without error
  expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index, 
                             fit$distribution$metric, max_rank=fit$distribution$max_rank), NA)
  
})

test_that("Error thrown if metric not recognised", {
  skip("Skipping pairwise")
  # Given a fitted Pairwise distribution
  # create query groups, with an average size of 25 items each
  N <- 1000
  num.queries <- floor(N/25)
  query <- sample(1:num.queries, N, replace=TRUE)
  
  # X1 is a variable determined by query group only
  query.level <- runif(num.queries)
  X1 <- query.level[query]
  
  # X2 varies with each item
  X2 <- runif(N)
  
  # X3 is uncorrelated with target
  X3 <- runif(N)
  
  # The target
  Y <- X1 + X2
  
  # Add some random noise to X2 that is correlated with
  # queries, but uncorrelated with items
  
  X2 <- X2 + scale(runif(num.queries))[query]
  
  # Add some random noise to target
  SNR <- 5 # signal-to-noise ratio
  sigma <- sqrt(var(Y)/SNR)
  Y <- Y + runif(N, 0, sigma)
  
  data <- data.frame(Y, query=query, X1, X2, X3)
  dist <- gbm_dist("Pairwise", metric="conc", group="query")
  params <- training_params(num_trees = 200, num_train = nrow(data),
                            id=seq_len(nrow(data)),
                            interaction_depth = 3)
  
  fit <- gbmt(Y~X1+X2+X3,          # formula
              data=data,     # dataset
              distribution=dist,
              train_params=params,
              keep_gbm_data=TRUE,      # store copy of input data in model
              cv_folds=5,          # number of cross validation folds
              is_verbose = FALSE ,    # don't print progress
              par_details=gbmParallel())  
  
  # When perf_pairwise is called but with an unrecognised metric
  # Then an error is thrown
  expect_error(perf_pairwise(Y, fit$fit, fit$distribution$group_index, 
                             "wrong", max_rank=fit$distribution$max_rank))
})
gbm-developers/gbm3 documentation built on April 28, 2024, 10:04 p.m.