tests/testthat/test-sparse.R

if (requireNamespace("Matrix", quietly=TRUE))
    iris_sparse <- Matrix::Matrix(data.matrix(iris), sparse=TRUE)

test_that("out-of-bag error same using sparse or non-sparse data", {
    skip_if_not_installed("Matrix")
    set.seed(56)
    rf_class_sparse <- train(data=iris_sparse, response_name="Species",
                             classification=TRUE)
    set.seed(56)
    rf_class_df <- train(data=iris, response_name="Species")

    expect_equal(rf_class_df$oob_error, rf_class_sparse$oob_error)

    skip_if_not_installed("Matrix")
    set.seed(56)
    rf_num_sparse <- train(data=iris_sparse, response_name="Sepal.Length")
    set.seed(56)
    rf_num_df <- train(data=iris, response_name="Sepal.Length")

    expect_equal(rf_num_df$oob_error, rf_num_sparse$oob_error)

})

test_that("prediction same using sparse or non-sparse data", {
    skip_if_not_installed("Matrix")
    set.seed(56)
    rf_class_df <- train(data=iris, response_name="Species")
    pred_class_df <- predict(rf_class_df, newdata=iris)$values
    set.seed(56)
    rf_class_sparse <- train(data=iris_sparse, response_name="Species",
                             classification=TRUE)
    pred_class_sparse <- predict(rf_class_sparse, newdata=iris_sparse)$values
    pred_class_sparse <- factor(pred_class_sparse,
                                levels=rf_class_df$response_values,
                                labels=rf_class_df$response_levels)

    expect_equal(pred_class_df, pred_class_sparse)

    set.seed(56)
    rf_num_sparse <- train(data=iris_sparse, response_name="Sepal.Length")
    pred_num_sparse <- predict(rf_num_sparse, newdata=iris_sparse)$values
    set.seed(56)
    rf_num_df <- train(data=iris, response_name="Sepal.Length")
    pred_num_df <- predict(rf_num_df, newdata=iris)$values

    expect_equal(pred_num_df, pred_num_sparse)

})

test_that("prediction same if one of training or testing data is sparse", {
    skip_if_not_installed("Matrix")
    idx <- sample(nrow(iris), 2/3*nrow(iris))
    train_df <- iris[idx, ]
    test_df <- iris[-idx, ]
    train_sparse <- Matrix::Matrix(data.matrix(train_df), sparse=TRUE)
    test_sparse <- Matrix::Matrix(data.matrix(test_df), sparse=TRUE)
  # to convert from sparse-data response to original response
    unique_species <- unique(iris$Species)
    species_map <- setNames(unique_species, as.integer(unique_species))

    set.seed(42)
    rf_df <- train(data=train_df, response_name="Species")
    set.seed(57)
    pred_df_df <- predict(rf_df, newdata=test_df)$values
    set.seed(57)
    pred_df_sparse <- predict(rf_df, newdata=test_sparse)$values

    set.seed(42)
    rf_sparse <- train(data=train_sparse, response_name="Species",
                       classification=TRUE)
    set.seed(57)
    pred_sparse_df <- factor(
        unname(species_map[predict(rf_sparse, newdata=test_sparse)$values]),
        levels=levels(iris$Species)
    )
    set.seed(57)
    pred_sparse_sparse <- factor(
        unname(species_map[predict(rf_sparse, newdata=test_sparse)$values]),
        levels=levels(iris$Species)
    )

    expect_equal(pred_df_df, pred_df_sparse)
    expect_equal(pred_df_sparse, pred_sparse_df)
    expect_equal(pred_sparse_df, pred_sparse_sparse)
})

Try the literanger package in your browser

Any scripts or data that you put into this service are public.

literanger documentation built on Sept. 30, 2024, 9:15 a.m.