tests/testthat/test-convert.R

#############################################################################
##
## Copyright 2016 Novartis Institutes for BioMedical Research Inc.
## Licensed under the Apache License, Version 2.0 (the "License");
## you may not use this file except in compliance with the License.
## You may obtain a copy of the License at
##
## http://www.apache.org/licenses/LICENSE-2.0
##
## Unless required by applicable law or agreed to in writing, software
## distributed under the License is distributed on an "AS IS" BASIS,
## WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
## See the License for the specific language governing permissions and
## limitations under the License.
##
#############################################################################

context("Conversion")


truncateVec <- function(x, min, max) {
    x[is.na(x) | x < min] <- min
    x[!is.na(x) & x > max] <- max
    return(x)
}


test_that("Convert H5T_FLOAT works", {
    dtype_double <- h5types$H5T_NATIVE_DOUBLE
    dtype_float <- h5types$H5T_NATIVE_FLOAT
    ## with double input
    Robj <- (-10:10) / 2
    res_double <- convertRoundTrip(Robj, dtype_double)
    res_float <- convertRoundTrip(Robj, dtype_float)
    expect_equal(res_double$input, res_double$output)

    ## with integer input
    Robj_int <- -10:10
    res_int_double <- convertRoundTrip(Robj_int, dtype_double)
    res_int_float <- convertRoundTrip(Robj_int, dtype_float)
    expect_equal(as.double(res_int_double$input), res_int_double$output)
    expect_equal(as.double(res_int_float$input), res_int_float$output)

    ## with 64bit integer input
    Robj_int64 <- as.integer64(-10:10)
    res_int64_double <- convertRoundTrip(Robj_int64, dtype_double)
    res_int64_float <- convertRoundTrip(Robj_int64, dtype_float)
    expect_equal(as.double(res_int64_double$input), res_int64_double$output)
    expect_equal(as.double(res_int64_float$input), res_int64_float$output)
    
})


## Some constants relevant for integers here
SHORT_MIN <- -2^15
SHORT_MAX <- 2^15 - 1
INT_MIN <- as.integer64(-2^31)
INT_MAX <- as.integer64(2^31 - 1)
UINT_MIN <- as.integer64(0)
UINT_MAX <- as.integer64(2^32-1)
INT64_MAX <- as.integer64(2)^62 - 1 + as.integer64(2)^62

test_that("Convert H5T_INTEGER works", {
    dtype_int64 <- h5types$H5T_NATIVE_LLONG
    dtype_uint64 <- h5types$H5T_NATIVE_ULLONG
    dtype_int <- h5types$H5T_NATIVE_INT
    dtype_uint <- h5types$H5T_NATIVE_UINT
    dtype_short <- h5types$H5T_NATIVE_SHORT
    dtype_double <- h5types$H5T_NATIVE_DOUBLE

    robj_int64 <- c(as.integer64(c(-1, 1, 2^31-1, 2^31)), as.integer(2)^62, suppressWarnings(as.integer64(2^64)))
    robj_double <- c(-1, 1, 2^31-1, 2^31, 2^62, 2^64, 2^64+2^30)
    robj_double_int64 <- suppressWarnings(as.integer64(robj_double))
    robj_double_int64[is.na(robj_double_int64)] <- INT64_MAX
    robj_double_int64[robj_double_int64 < 0] <- 0
    
    res_double_uint64 <- convertRoundTrip(robj_double, dtype_uint64)
    res_int64_int64 <- convertRoundTrip(robj_int64, dtype_int64)
    res_int64_int <- suppressWarnings(convertRoundTrip(robj_int64, dtype_int))
    res_int64_uint <- suppressWarnings(convertRoundTrip(robj_int64, dtype_uint))
    res_int64_short <- suppressWarnings(convertRoundTrip(robj_int64, dtype_short))
    suppressWarnings(res_double_int64 <- convertRoundTrip(robj_double, dtype_int64))
    suppressWarnings(res_double_int <- convertRoundTrip(robj_double, dtype_int))
    suppressWarnings(res_double_uint <- convertRoundTrip(robj_double, dtype_uint))
    suppressWarnings(res_double_short <- convertRoundTrip(robj_double, dtype_short))

    expect_equal(res_int64_int64$input, res_int64_int64$output)
    expect_equal(suppressWarnings(as.integer(res_int64_int$input)), res_int64_int$output)
    expect_equal(as.numeric(truncateVec(res_int64_uint$input, UINT_MIN, UINT_MAX)), res_int64_uint$output)
    expect_equal(as.integer(truncateVec(res_int64_short$input, SHORT_MIN, SHORT_MAX)), res_int64_short$output)
    expect_true(sum(res_double_uint64$output - robj_double_int64) == 0)
})

test_that("Convert H5T_ENUM works", {
    ## Need to test logical, factors and enums with arbitrary values
    obj_logical <- c(FALSE, TRUE)
    obj_factor <- factor(paste("test", 1:10, sep=""))
    obj_factor_ext <- factor_ext(rep(-10:10, 2), levels=paste("test", -10:10, sep=""))
    
    ## object intended to provoke an error
    obj_error <- obj_factor_ext
    attr(obj_error, "values") <- 0:10
    obj_character <- paste("test", 1:10)

    ## create the enums for these cases
    enum_logical <- H5T_LOGICAL$new(include_NA=TRUE)
    enum_factor <- H5T_ENUM$new(labels=levels(obj_factor))
    enum_factor_ext <- H5T_ENUM$new(labels=attr(obj_factor_ext, "levels"), values=attr(obj_factor_ext, "values"))
    
    expect_equal(enum_logical$get_values(), as.integer(c(0,1,2)))
    expect_equal(enum_factor$get_values(), 1:10)
    expect_equal(enum_factor_ext$get_values(), -10:10)

    expect_equal(enum_logical$get_labels(), c("FALSE", "TRUE", "NA"))
    expect_equal(enum_factor$get_labels(), levels(obj_factor))
    expect_equal(enum_factor_ext$get_labels(), levels(obj_factor_ext))

    library(utils)
    if(compareVersion(h5version(verbose=FALSE), "1.8.16") >= 0) {
        expect_equal(enum_logical$get_precision(), 2)
        expect_equal(enum_factor$get_precision(), 4)
        expect_equal(enum_factor_ext$get_precision(), 5)
    }
    else { ## in 1.8.14 and below, there is an issue getting the native version of an enum
           ## that is based on an non-native type
        expect_equal(enum_logical$get_precision(), 8)
        expect_equal(enum_factor$get_precision(), 8)
        expect_equal(enum_factor_ext$get_precision(), 8)
    }
    
    res_logical <- convertRoundTrip(obj_logical, enum_logical)
    res_factor <- convertRoundTrip(obj_factor, enum_factor)
    res_factor_ext <- convertRoundTrip(obj_factor_ext, enum_factor_ext)
    expect_equal(res_logical$input, res_logical$output)
    expect_equal(res_factor$input, res_factor$output)
    expect_equal(res_factor_ext$input, res_factor_ext$output)

    res_error <- try(convertRoundTrip(obj_error, enum_factor_ext), silent=TRUE)
    res_character <- try(convertRoundTrip(obj_character, enum_factor_ext), silent=TRUE)
    expect_true(inherits(res_error, "try-error"))
    expect_true(inherits(res_character, "try-error"))

    ## check that enums work appropriate with 64 bit values
    enum_64bit <- H5T_ENUM$new(labels="Test", values=INT_MAX + 1)
    expect_equal(enum_64bit$get_values(), INT_MAX + 1)
})

test_that("Convert H5T_STRING works", {
#    string1.utf8 <- "\x46\x6F\x6F\x20\xC2\xA9\x20\x62\x61\x72\x20\xF0\x9D\x8C\x86\x20\x62\x61\x7A\x20\xE2\x98\x83\x20\x71\x75\x78"
#    Encoding(string1.utf8) <- "UTF-8"
    string.latin1 <- "H\xF6her w\xE4re sch\xF6ner!"
    Encoding(string.latin1) <- "latin1"
    string.utf8 <- enc2utf8(string.latin1)

    stringVec <- paste("test", 1:10)

    dtype_string_unknown <- H5T_STRING$new()$set_size(30)$set_cset("unknown")
    dtype_string_utf8 <- H5T_STRING$new()$set_size(30)$set_cset("UTF-8")
    dtype_string_short_unknown <- H5T_STRING$new()$set_size(10)$set_cset(h5const$H5T_CSET_ASCII)
    dtype_string_short_utf8 <- H5T_STRING$new()$set_size(10)$set_cset(h5const$H5T_CSET_UTF8)
    dtype_string_variable_unknown <- H5T_STRING$new()$set_size(Inf)$set_cset(h5const$H5T_CSET_ASCII)
    dtype_string_variable_utf8 <- H5T_STRING$new()$set_size(Inf)$set_cset(h5const$H5T_CSET_UTF8)

##    res_string1_utf8_unknown <- convertRoundTrip(string1.utf8, dtype_string_unknown)
##    res_string1_utf8_utf8 <- convertRoundTrip(string1.utf8, dtype_string_utf8)
##    res_string1_utf8_short_unknown <- convertRoundTrip(string1.utf8, dtype_string_short_unknown)
##    res_string1_utf8_short_utf8 <- convertRoundTrip(string1.utf8, dtype_string_short_utf8)
##    res_string1_utf8_variable_unknown <- convertRoundTrip(string1.utf8, dtype_string_variable_unknown)
##    res_string1_utf8_variable_utf8 <- convertRoundTrip(string1.utf8, dtype_string_variable_utf8)

    res_string_utf8_unknown <- convertRoundTrip(string.utf8, dtype_string_unknown)
    res_string_utf8_utf8 <- convertRoundTrip(string.utf8, dtype_string_utf8)
    res_string_utf8_short_unknown <- convertRoundTrip(string.utf8, dtype_string_short_unknown)
    res_string_utf8_short_utf8 <- convertRoundTrip(string.utf8, dtype_string_short_utf8)
    res_string_utf8_variable_unknown <- convertRoundTrip(string.utf8, dtype_string_variable_unknown)
    res_string_utf8_variable_utf8 <- convertRoundTrip(string.utf8, dtype_string_variable_utf8)

    res_string_latin1_unknown <- convertRoundTrip(string.latin1, dtype_string_unknown)
    res_string_latin1_utf8 <- convertRoundTrip(string.latin1, dtype_string_utf8)
    res_string_latin1_short_unknown <- convertRoundTrip(string.latin1, dtype_string_short_unknown)
    res_string_latin1_short_utf8 <- convertRoundTrip(string.latin1, dtype_string_short_utf8)
    res_string_latin1_variable_unknown <- convertRoundTrip(string.latin1, dtype_string_variable_unknown)
    res_string_latin1_variable_utf8 <- convertRoundTrip(string.latin1, dtype_string_variable_utf8)

    ## now check only the ones where we expect equality depending on the locale
    if(l10n_info()[["UTF-8"]]) { ## check if the current locale is UTF-8
        expect_equal(res_string_utf8_unknown$input, res_string_utf8_unknown$output)
        expect_equal(res_string_utf8_variable_unknown$input, res_string_utf8_variable_unknown$output)
    }
    if(l10n_info()[["Latin-1"]]) {
        expect_equal(res_string_latin1_unknown$input, res_string_latin1_unknown$output)
        expect_equal(res_string_latin1_variable_unknown$input, res_string_latin1_variable_unknown$output)
    }
    ## now the ones where we always expect equality, because the utf-8 charset is specified in the dtype
    expect_equal(res_string_utf8_utf8$input, res_string_utf8_utf8$output)
    expect_equal(res_string_utf8_variable_utf8$input, res_string_latin1_variable_utf8$output)
    expect_equal(res_string_latin1_utf8$input, res_string_utf8_utf8$output)
    expect_equal(res_string_latin1_variable_utf8$input, res_string_latin1_variable_utf8$output)

    ## for the short, we just check that the length of the returned string is 10
    expect_equal(length(charToRaw(res_string_utf8_short_utf8$output)), 10)
    expect_equal(length(charToRaw(res_string_utf8_short_unknown$output)), 10)
    expect_equal(length(charToRaw(res_string_latin1_short_utf8$output)), 10)
    expect_equal(length(charToRaw(res_string_latin1_short_utf8$output)), 10)

    ## convert an entire vector
    res_stringVec_fixed <- convertRoundTrip(stringVec, dtype_string_utf8)
    res_stringVec_variable <- convertRoundTrip(stringVec, dtype_string_variable_utf8)
    expect_equal(res_stringVec_fixed$input, res_stringVec_fixed$output)
    expect_equal(res_stringVec_variable$input, res_stringVec_variable$output)

    ## convert a string that is NA
    string_NA <- as.character(NA)
    res_stringNA <- convertRoundTrip(string_NA, dtype_string_variable_utf8)
    expect_equal("NA", res_stringNA$output)
})


test_that("Convert R_Complex works", {
    r_cmplx <- complex(real=-5:5, imaginary = 5:(-5))
    dtype_complex <- H5T_COMPLEX$new()
    res <- convertRoundTrip(r_cmplx, dtype_complex)
})


test_that("Convert H5T_COMPOUND works", {
    test_frame <- data.frame(a=1:3, b=4:6)
    test_frame_frame <- test_frame
    test_frame_frame$c <- test_frame

    dtype_int <- h5types$H5T_NATIVE_SHORT
    dtype_cpd <- H5T_COMPOUND$new(dtypes=list(dtype_int, dtype_int), labels=c("a", "b"))
    dtype_cpd_cpd <- H5T_COMPOUND$new(dtypes=list(dtype_int, dtype_int, dtype_cpd), labels=c("a", "b", "c"))

    res_test <- convertRoundTrip(test_frame, dtype_cpd)
    res_test2 <- convertRoundTrip(test_frame_frame, dtype_cpd_cpd)
    
    expect_equal(res_test$input, res_test$output)
    expect_equal(res_test2$input, res_test2$output)
})


test_that("Convert H5T_ARRAY works", {
    dtype_array <- H5T_ARRAY$new(dtype_base=h5types$H5T_NATIVE_LLONG, dims=c(2,3))
    test_data <- array(1:120, dim=c(20,2,3))
    res_test <- convertRoundTrip(test_data, dtype_array, nelem=20)
    expect_equal(as.integer(res_test$input), res_test$output)
    expect_equal(dtype_array$get_array_ndims(), 2)
    expect_equal(dtype_array$get_array_dims(), c(2,3))
})


test_that("Convert H5T_VLEN works", {
    dtype_vlen <- H5T_VLEN$new(dtype_base=h5types$H5T_NATIVE_LLONG)
    test_data <- list(a=1:4, b=3:12, c=c(-1.5, 2.5))
    res_test <- convertRoundTrip(test_data, dtype_vlen)
    res_test_input_as_int <- unname(lapply(res_test$input, as.integer))
    expect_equal(res_test_input_as_int, res_test$output)
})


test_that("guess_dtype", {
    ## test array types
    int_array <- guess_dtype(as.integer(1:4), scalar=TRUE)
    expect_equal(int_array$get_class(), h5const$H5T_ARRAY)
    expect_equal(int_array$get_super()$get_class(), h5const$H5T_INTEGER)
    expect_equal(int_array$get_array_dims(), 4)
    int_array2 <- guess_dtype(matrix(as.integer(1:4), ncol=2), scalar=TRUE)
    expect_equal(int_array2$get_class(), h5const$H5T_ARRAY)
    expect_equal(int_array2$get_super()$get_class(), h5const$H5T_INTEGER)
    expect_equal(int_array2$get_array_dims(), c(2,2))

    ## test data frame
    cpd_type <- guess_dtype(data.frame(a=1:4, b=5:8/2), scalar=FALSE)
    expect_equal(cpd_type$get_class(), h5const$H5T_COMPOUND)
    expect_equal(cpd_type$get_cpd_labels(), c("a", "b"))
    expect_equal(cpd_type$get_cpd_classes(), c(h5const$H5T_INTEGER, h5const$H5T_FLOAT))
    
    ## test list
    list_equal_length_type <- guess_dtype(list(a=1:4, b=5:8/2), scalar=FALSE)
    expect_equal(list_equal_length_type$get_class(), h5const$H5T_COMPOUND)

    list_unequal_length_type <- guess_dtype(list(a=1:4, b=1:8), scalar=FALSE)
    expect_equal(list_unequal_length_type$get_class(), h5const$H5T_VLEN)
    
    ## test factor_ext
    factor_ext_type <- guess_dtype(factor_ext(c("a", "b"), values=2:3))
    expect_equal(factor_ext_type$get_class(), h5const$H5T_ENUM)
    
    ## test factor
    factor_ext_type <- guess_dtype(factor(c("a", "b")))
    expect_equal(factor_ext_type$get_class(), h5const$H5T_ENUM)

    ## test character
    char_type <- guess_dtype("asdf", string_len="estimate")
    expect_equal(char_type$get_class(), h5const$H5T_STRING)
    expect_equal(char_type$get_size(), 5)

    char_10_type <- guess_dtype("asdf", string_len = 10)
    expect_equal(char_10_type$get_class(), h5const$H5T_STRING)
    expect_equal(char_10_type$get_size(), 10)

    char_Inf_type <- guess_dtype("asdf", string_len = Inf)
    expect_equal(char_Inf_type$get_class(), h5const$H5T_STRING)
    expect_equal(char_Inf_type$get_size(), Inf)

    ## test logical
    logical_type <- guess_dtype(TRUE)
    expect_equal(logical_type$get_class(), h5const$H5T_ENUM)
    expect_equal(logical_type$get_labels(), c("FALSE", "TRUE", "NA"))
    
    ## test integer64
    int64_type <- guess_dtype(as.integer64(1))
    expect_equal(int64_type$get_class(), h5const$H5T_INTEGER)
    expect_equal(int64_type$get_size(), 8)
    
    ## test integer
    int_type <- guess_dtype(as.integer(1))
    expect_equal(int_type$get_class(), h5const$H5T_INTEGER)
    expect_equal(int_type$get_size(), 4)

    ## test numeric
    double_type <- guess_dtype(as.double(1))
    expect_equal(double_type$get_class(), h5const$H5T_FLOAT)
    expect_equal(double_type$get_size(), 8)

    ## test complex
    complex_type <- guess_dtype(complex(length.out=1, real=1, imaginary=1))
    expect_equal(complex_type$get_class(), h5const$H5T_COMPOUND)
    expect_equal(complex_type$get_cpd_labels(), c("Real", "Imaginary"))

    ## test matrix; here was a bug where it always created an array for a matrix
    ## that is why we have the test here
    int_dtype <- guess_dtype(matrix(as.integer(1:10), ncol=2))
    expect_equal(int_dtype$get_class(), h5const$H5T_INTEGER)
})

test_that("guess_space", {
    Robj <- matrix(1:100, nrow=10)
    dtype_int <- h5types$H5T_NATIVE_INT
    dtype_array <- H5T_ARRAY$new(dims=10, dtype_base=dtype_int)

    space_int_chunked <- guess_space(Robj, dtype_int, chunked=TRUE)
    expect_equal(space_int_chunked$get_simple_extent_dims(), list(rank=2, dims=c(10,10), maxdims=c(Inf, Inf)))
    space_int_unchunked <- guess_space(Robj, dtype_int, chunked=FALSE)
    expect_equal(space_int_unchunked$get_simple_extent_dims(), list(rank=2, dims=c(10,10), maxdims=c(10, 10)))
    space_array_chunked <- guess_space(Robj, dtype_array, chunked=TRUE)
    expect_equal(space_array_chunked$get_simple_extent_dims(), list(rank=1, dims=c(10), maxdims=c(Inf)))
    space_array_unchunked <- guess_space(Robj, dtype_array, chunked=FALSE)
    expect_equal(space_array_unchunked$get_simple_extent_dims(), list(rank=1, dims=c(10), maxdims=c(10)))
})


test_that("guess_chunks", {
    expect_equal(guess_chunks(c(10, 10), 8, 8 * 200), c(10,10))
    expect_equal(guess_chunks(c(100, 100), 8, 8 * 200), c(15,15))

    expect_equal(guess_chunks(c(5, 2, 4), 8, 8 * 200), c(5, 2, 4))
    expect_equal(guess_chunks(c(5, 2, 50), 8, 8 * 200), c(5, 2, 20))
    expect_equal(guess_chunks(c(50, 2, 50), 8, 8 * 200), c(10, 2, 10))
})

test_that("reorder", {
    array_rank3 <- array(as.integer(1:60), dim=c(3,4,5))
    ## item size is 4 bytes per integer
    array_reorder_c <- array_reorder(array_rank3, c(3,4,5), 2, 4:1, 4)
    array_reorder_r <- array_rank3[, 4:1,]
    expect_equal(array_reorder_c, array_reorder_r)
})


test_that("Logical with and without NA", {
    logical_NA <- c(TRUE, FALSE, NA)
    logical_noNA <- c(TRUE, FALSE)

    dtype_logical_noNA <- h5types$H5T_LOGICAL
    dtype_logical_NA <- h5types$H5T_LOGICAL_NA

    res_noNA_noNA <- convertRoundTrip(logical_noNA, dtype_logical_noNA)  
    res_NA_noNA <- try(convertRoundTrip(logical_NA, dtype_logical_noNA), silent=TRUE)  
    res_noNA_NA <- convertRoundTrip(logical_noNA, dtype_logical_NA)  
    res_NA_NA <- convertRoundTrip(logical_NA, dtype_logical_NA)

    expect_equal(res_noNA_noNA$input, res_noNA_noNA$output)
    expect_true(inherits(res_NA_noNA, "try-error"));
    expect_equal(res_noNA_NA$input, res_noNA_NA$output)
    expect_equal(res_NA_NA$input, res_NA_NA$output)
})

Try the hdf5r package in your browser

Any scripts or data that you put into this service are public.

hdf5r documentation built on Jan. 22, 2023, 1:12 a.m.