tests/testthat/test-performance.R

# This file contains performance tests (speed and memory)
# for different kinds of matrices.

test_that("calculation speed", {
  
  # Don't run these tests on continuous integration systems or CRAN
  skip_on_ci()
  skip_on_cran()
  
  min_time <- 0.01 # seconds, default is 0.5 seconds
  small_mat_dimnames <- list(paste("My big long row name [from a suffix]", 1:3), 
                             paste("My big long col name [from a suffix]", 1:2))
  small_mat_dns <- matrix(1:6, nrow = 3, ncol = 2, dimnames = small_mat_dimnames)
  small_mat_nodns <- matrix(small_mat_dns, nrow = 3, ncol = 2, dimnames = NULL)
  t_small_mat_dns <- t(small_mat_dns)
  t_small_mat_nodns <- t(small_mat_nodns)
  
  small_ijx_dns <- Matrix::Matrix(small_mat_dns, sparse = TRUE)
  small_ijx_nodns <- Matrix::Matrix(small_mat_nodns, sparse = TRUE)
  t_small_ijx_dns <- Matrix::t(small_ijx_dns)
  t_small_ijx_nodns <- Matrix::t(small_ijx_nodns)
  
  
  big_mat_dimnames <- list(paste("My big long row name [from a suffix]", 1:100), 
                           paste("My big long col name [from a suffix]", 1:100))
  big_mat_dns <- matrix(1:(100*100), nrow = 100, ncol = 100, dimnames = big_mat_dimnames) * 0
  big_mat_dns[ , 1] <- 1:100
  big_mat_nodns <- matrix(big_mat_dns, dimnames = NULL)
  t_big_mat_dns <- t(big_mat_dns)
  t_big_mat_nodns <- t(big_mat_nodns)
  big_ijx_dns <- Matrix::Matrix(big_mat_dns, sparse = TRUE)
  big_ijx_nodns <- Matrix::Matrix(big_mat_nodns, sparse = TRUE)
  t_big_ijx_dns <- Matrix::t(big_ijx_dns)
  t_big_ijx_nodns <- Matrix::t(big_ijx_nodns)
  
  suppressWarnings(
    current_sum_small <- bench::mark(matsbyname::sum_byname(small_ijx_dns, small_ijx_dns), time_unit = "ms", min_time = min_time) |> 
      magrittr::extract2("median"))
  future_sum_small = bench::mark(small_ijx_nodns + small_ijx_nodns, time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  speedup_sum_small <- current_sum_small/future_sum_small # 7.4
  expect_true(speedup_sum_small > 1)

  current_prod_small <- bench::mark(matsbyname::matrixproduct_byname(small_ijx_dns, t_small_ijx_dns), time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  future_prod_small = bench::mark(big_ijx_nodns %*% t_big_ijx_nodns, time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  speedup_prod_small <- current_prod_small/future_prod_small
  expect_true(speedup_prod_small > 1) # 15.4
  
  current_sum_big <- bench::mark(matsbyname::sum_byname(big_ijx_dns, big_ijx_dns), time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  future_sum_big = bench::mark(big_ijx_nodns + big_ijx_nodns, time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  speedup_sum_big <- current_sum_big/future_sum_big # 8.6
  # expect_true(speedup_sum_big > 1) # Fails on GitHub actions for macOS

  current_prod_big <- bench::mark(matsbyname::matrixproduct_byname(big_ijx_dns, t_big_ijx_dns), time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  future_prod_big = bench::mark(big_ijx_nodns %*% t_big_ijx_nodns, time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  speedup_prod_big <- current_prod_big/future_prod_big # 22.9
  expect_true(speedup_prod_big > 1)
  
  # compare an object without dimnames going through the matsbyname function to one that does not.
  current_sum_big_nodns <- bench::mark(matsbyname::sum_byname(big_ijx_nodns, big_ijx_nodns), time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  future_sum_big_nodns <- bench::mark(big_ijx_nodns + big_ijx_nodns, time_unit = "ms", min_time = min_time) |> 
    magrittr::extract2("median")
  speedup_sum_big_nodns <- current_sum_big_nodns/future_sum_big_nodns # 3.5
  expect_true(speedup_sum_big_nodns > 1)
})


test_that("object sizes", {
  
  small_mat_dimnames <- list(paste("My big long row name [from a suffix]", 1:3), 
                             paste("My big long col name [from a suffix]", 1:2))
  size_small_mat_dimnames <- object.size(small_mat_dimnames) # 688 bytes
  
  small_mat <- matrix(1:6, nrow = 3, ncol = 2, dimnames = small_mat_dimnames)
  size_small_mat_dns <- object.size(small_mat) # 1048 bytes
  size_small_mat_nodns <- object.size(matrix(small_mat, dimnames = NULL)) # 248 bytes
  expect_true(size_small_mat_nodns < size_small_mat_dns)  
  expect_true(size_small_mat_nodns + size_small_mat_dimnames < size_small_mat_dns) # 248 bytes + 68 bytes = 936 bytes < 1048 bytes

  
  df_small_ijx <- tibble::tribble(~i, ~j, ~x, 
                                  1, 1, 1, 
                                  1, 2, 4, 
                                  2, 1, 2, 
                                  2, 2, 5, 
                                  3, 1, 3, 
                                  3, 2, 6)
  small_ijx_dns <- Matrix::sparseMatrix(i = df_small_ijx$i, j = df_small_ijx$j, x = df_small_ijx$x, 
                                        dimnames = small_mat_dimnames)
  size_small_ijx_dns <- object.size(small_ijx_dns) # 2216 bytes
  small_ijx_nodns <- Matrix::sparseMatrix(i = df_small_ijx$i, j = df_small_ijx$j, x = df_small_ijx$x)
  size_small_ijx_nodns <- object.size(small_ijx_nodns) # 1592 bytes
  expect_true(size_small_ijx_nodns + size_small_mat_dimnames > size_small_ijx_dns) # 1592 bytes + 688 bytes = 2280 bytes > 2216 bytes
  
  
  big_mat_dimnames <- list(paste("My big long row name [from a suffix]", 1:100), 
                           paste("My big long col name [from a suffix]", 1:100))
  size_big_mat_dimnames <- object.size(big_mat_dimnames) # 20960 bytes
  big_mat_dns <- matrix(1:(100*100), nrow = 100, ncol = 100, dimnames = big_mat_dimnames) * 0
  big_mat_dns[ , 1] <- 1:100
  size_big_mat_dns <- object.size(big_mat_dns) # 101288 bytes
  size_big_mat_nodns <- object.size(matrix(big_mat_dns, dimnames = NULL)) # 80216 bytes
  
  df_big_ijx <- data.frame(i = 1:100, j = 1, x = 1:100)
  big_ijx_dns <- Matrix::sparseMatrix(i = df_big_ijx$i, 
                                      j = df_big_ijx$j, 
                                      x = df_big_ijx$x, 
                                      dims = c(100, 100), 
                                      dimnames = big_mat_dimnames)
  size_big_ijx_dns <- object.size(big_ijx_dns) # 24000 bytes
  
  big_ijx_nodns <- Matrix::sparseMatrix(i = df_big_ijx$i, 
                                        j = df_big_ijx$j, 
                                        x = df_big_ijx$x, 
                                        dims = c(100, 100))
  size_big_ijx_nodns <- object.size(big_ijx_nodns) # 3104 bytes

  expect_true(size_big_ijx_nodns < size_big_ijx_dns) # 3104 bytes < 24000 bytes, factor of 7.7 improvement
  
  expect_true(size_big_ijx_nodns < size_big_mat_dns) # 3104 bytes < 101288 bytes, factor of 32 improvement
})


test_that("operations on sparse matrices work in dplyr::mutate()", {
  small_mat <- Matrix::Matrix(c(11, 21, 31, 12, 22, 32), nrow = 3, ncol = 2, sparse = TRUE)
  small_mat_list <- list(small_mat, small_mat, small_mat)
  m1str <- "m1"
  m2str <- "m2"
  m3str <- "m3"
  m4str <- "m4"
  df <- tibble::tibble(m1 = small_mat_list, m2 = small_mat_list)
  res <- df |> 
    dplyr::mutate(
      "{m3str}" := Map(`+`, .data[[m1str]], .data[[m2str]]), 
      "{m4str}" := Map(`%*%`, .data[[m2str]], .data[[m2str]] |> lapply(FUN = Matrix::t))
    )
  expected_m3 <- mapply(`+`, df$m1, df$m2)  
  expect_equal(res$m3, expected_m3)
  expected_m4 <- Map(f = `%*%`, df$m2, df$m2 |> lapply(FUN = Matrix::t))
  expect_equal(res$m4, expected_m4)
  # Check that results are correct
  small_mat_prod <- small_mat %*% Matrix::t(small_mat)
  for (i in 1:nrow(df)) {
    expect_equal(res$m4[[i]], small_mat_prod)  
  }
})

Try the matsbyname package in your browser

Any scripts or data that you put into this service are public.

matsbyname documentation built on May 29, 2024, 8:10 a.m.