tests/testthat/test-dissimilarity_matrices.R

#============================================================

# data

set.seed(0)
binary_data = do.call(cbind, lapply(1:10, function(x) sample(0:1, 100, replace = T)))

set.seed(1)
continuous_data = matrix(runif(1000), ncol = 10, nrow = 100)

set.seed(2)
MEDOIDS_continuous = matrix(runif(20), ncol = ncol(binary_data), nrow = 2)

set.seed(3)
MEDOIDS_binary = matrix(sample(0:1, 20, replace = T), ncol = ncol(binary_data), nrow = 2)


# data which includes missing values 

# continuous

set.seed(4)
mt_NAs = matrix(runif(1000), nrow = 100, ncol = 10)
mt_NAs[sample(1:nrow(mt_NAs), 20), 1] = NA
mt_NAs[sample(1:nrow(mt_NAs), 20), 3] = NA
mt_NAs[sample(1:nrow(mt_NAs), 20), 4] = NA
mt_NAs[sample(1:nrow(mt_NAs), 20), 7] = NA

# binary

binary_NAs = binary_data
binary_NAs[sample(1:nrow(binary_NAs), 20), 1] = NA
binary_NAs[sample(1:nrow(binary_NAs), 20), 3] = NA
binary_NAs[sample(1:nrow(binary_NAs), 20), 4] = NA
binary_NAs[sample(1:nrow(binary_NAs), 20), 7] = NA


#=============================================================


context('dissimilarity - matrices')



######################
# dissim_mat function
######################

# complete data ------------


testthat::test_that("in case that the data is binary it returns the correct output for the binary methods", {

  binary_methods = c("simple_matching_coefficient", "hamming", "jaccard_coefficient", "Rao_coefficient")
  
  res = rep(NA, length(binary_methods))
  
  for (i in 1:length(binary_methods)) {
    
    out = dissim_mat(binary_data, binary_methods[i], upper = T, diagonal = T)
    
    res[i] = is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(binary_data) && ncol(out) == nrow(binary_data)
  }
  
  testthat::expect_true( length(binary_methods) == sum(res) )
})


testthat::test_that("in case that the data is numeric it returns the correct output for the numeric methods", {
  
  continuous_methods = c("euclidean", "manhattan", "chebyshev", "canberra", "braycurtis", "pearson_correlation", "mahalanobis", "cosine")
  
  res = rep(NA, length(continuous_methods))
  
  for (i in 1:length(continuous_methods)) {
    
    out = dissim_mat(continuous_data, continuous_methods[i], upper = T, diagonal = T)
    
    res[i] = is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(continuous_data) && ncol(out) == nrow(continuous_data)
  }
  
  testthat::expect_true( length(continuous_methods) == sum(res) )
})



# missing data ------------


testthat::test_that("in case that the data is binary AND INCLUDE MISSING VALUES it returns the correct output for the binary methods", {
  
  binary_methods = c("simple_matching_coefficient", "hamming", "jaccard_coefficient", "Rao_coefficient")
  
  res = rep(NA, length(binary_methods))
  
  for (i in 1:length(binary_methods)) {
    
    out = dissim_mat(binary_NAs, binary_methods[i], upper = T, diagonal = T)
    
    res[i] = is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(binary_NAs) && ncol(out) == nrow(binary_NAs) && sum(as.vector(colSums(is.na(out)))) == 0 
  }
  
  testthat::expect_true( length(binary_methods) == sum(res) )
})


testthat::test_that("in case that the data is numeric AND INCLUDE MISSING VALUES it returns the correct output for the numeric methods", {
  
  continuous_methods = c("euclidean", "manhattan", "chebyshev", "canberra", "braycurtis", "pearson_correlation", "cosine")
  
  res = rep(NA, length(continuous_methods))
  
  for (i in 1:length(continuous_methods)) {
    
    out = dissim_mat(mt_NAs, continuous_methods[i], upper = T, diagonal = T)

    res[i] = is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(mt_NAs) && ncol(out) == nrow(mt_NAs) && sum(as.vector(colSums(is.na(out)))) == 0
  }
  
  testthat::expect_true( length(continuous_methods) == sum(res) )
})


testthat::test_that("in case that the data is numeric AND INCLUDE MISSING VALUES AND method = mahalanobis it returns an error", {

  testthat::expect_error( dissim_mat(mt_NAs, "mahalanobis", upper = T, diagonal = T) )
})


testthat::test_that("in case that the data is numeric AND INCLUDE MISSING VALUES it returns the correct output for the minkowski method", {
  
  out = dissim_mat(mt_NAs, "minkowski", upper = T, diagonal = T, minkowski_p = 1.0)
  
  testthat::expect_true( is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(mt_NAs) && ncol(out) == nrow(mt_NAs) )
})


testthat::test_that("in case that the data is numeric it returns the correct output for the minkowski method", {
  
  out = dissim_mat(continuous_data, "minkowski", upper = T, diagonal = T, minkowski_p = 1.0)
  
  testthat::expect_true( is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(continuous_data) && ncol(out) == nrow(continuous_data) )
})



testthat::test_that("in case that the data includes missing values (NA) the 'dissim_mat' function returns the correct output ", {
  
  out = dissim_mat(mt_NAs, "euclidean", upper = T, diagonal = T)
  
  testthat::expect_true( is.matrix(out) && nrow(out) == ncol(out) && nrow(out) == nrow(mt_NAs) && ncol(out) == nrow(mt_NAs) && sum(as.vector(colSums(is.na(out)))) == 0 )
})



##########################
# dissim_MEDOIDS function
##########################


# complete data ----------------

testthat::test_that("in case that the data is binary it returns the correct output for the binary methods", {
  
  binary_methods = c("simple_matching_coefficient", "hamming", "jaccard_coefficient", "Rao_coefficient")
  
  res = rep(NA, length(binary_methods))
  
  for (i in 1:length(binary_methods)) {
    
    out = dissim_MEDOIDS(binary_data, binary_methods[i], MEDOIDS_binary)
    
    res[i] = is.matrix(out) && nrow(out) == nrow(binary_data) && ncol(out) == nrow(MEDOIDS_binary)
  }
  
  testthat::expect_true( length(binary_methods) == sum(res) )
})


testthat::test_that("in case that the data is numeric it returns the correct output for the numeric methods", {
  
  continuous_methods = c("euclidean", "manhattan", "chebyshev", "canberra", "braycurtis", "pearson_correlation", "mahalanobis", "cosine")
  
  res = rep(NA, length(continuous_methods))
  
  for (i in 1:length(continuous_methods)) {
    
    out = dissim_MEDOIDS(continuous_data, continuous_methods[i], MEDOIDS_continuous)
    
    res[i] = is.matrix(out) && nrow(out) == nrow(continuous_data) && ncol(out) == nrow(MEDOIDS_continuous)
  }
  
  testthat::expect_true( length(continuous_methods) == sum(res) )
})


# missing data ----------------

testthat::test_that("in case that the data is binary AND INCLUDE MISSING VALUES it returns the correct output for the binary methods", {
  
  binary_methods = c("simple_matching_coefficient", "hamming", "jaccard_coefficient", "Rao_coefficient")
  
  res = rep(NA, length(binary_methods))
  
  for (i in 1:length(binary_methods)) {
    
    out = dissim_MEDOIDS(binary_NAs, binary_methods[i], MEDOIDS_binary)

    res[i] = is.matrix(out) && nrow(out) == nrow(binary_NAs) && ncol(out) == nrow(MEDOIDS_binary) && sum(as.vector(colSums(is.na(out)))) == 0
  }
  
  testthat::expect_true( length(binary_methods) == sum(res) )
})


testthat::test_that("in case that the data is numeric AND INCLUDE MISSING VALUES it returns the correct output for the numeric methods", {
  
  continuous_methods = c("euclidean", "manhattan", "chebyshev", "canberra", "braycurtis", "pearson_correlation", "cosine")
  
  res = rep(NA, length(continuous_methods))
  
  for (i in 1:length(continuous_methods)) {
    
    out = dissim_MEDOIDS(mt_NAs, continuous_methods[i], MEDOIDS_continuous)
    
    res[i] = is.matrix(out) && nrow(out) == nrow(mt_NAs) && ncol(out) == nrow(MEDOIDS_continuous) && sum(as.vector(colSums(is.na(out)))) == 0
  }
  
  testthat::expect_true( length(continuous_methods) == sum(res) )
})


testthat::test_that("in case that the data is numeric AND INCLUDE MISSING VALUES AND method = mahalanobis it returns an error", {
  
  testthat::expect_error( dissim_MEDOIDS(mt_NAs, "mahalanobis", MEDOIDS_continuous) )
})


testthat::test_that("in case that the data is numeric AND INCLUDE MISSING VALUES it returns the correct output for the minkowski method", {
  
  out = dissim_MEDOIDS(mt_NAs, "minkowski", MEDOIDS_continuous, minkowski_p = 1.0)
  
  testthat::expect_true( is.matrix(out) && nrow(out) == nrow(mt_NAs) && ncol(out) == nrow(MEDOIDS_continuous) )
})


testthat::test_that("in case that the data is numeric it returns the correct output for the minkowski method", {
  
  out = dissim_MEDOIDS(continuous_data, "minkowski", MEDOIDS_continuous, minkowski_p = 1.0)
  
  testthat::expect_true( is.matrix(out) && nrow(out) == nrow(continuous_data) && ncol(out) == nrow(MEDOIDS_continuous) )
})


testthat::test_that("in case that the data includes missing values (NA) the function returns the correct output taking into account the medoids", {
  
  out = dissim_MEDOIDS(mt_NAs, "euclidean", MEDOIDS_continuous)
  
  testthat::expect_true( is.matrix(out) && nrow(out) == nrow(continuous_data) && ncol(out) == nrow(MEDOIDS_continuous) && sum(as.vector(colSums(is.na(out)))) == 0  )
})


  

Try the ClusterR package in your browser

Any scripts or data that you put into this service are public.

ClusterR documentation built on June 22, 2024, 10:28 a.m.