if (require(RUnit)) {
library(FeatureHashing)
mapping_value <- structure(c(3789462177, 4122940517, 1079927366, 1505155248, 4103768016,
1576910802, 248868694, 2189134401, 1321560276, 2636986885, 1980993114,
3588767725, 3873367263, 3437882550, 1125161513, 875000041, 1178743966,
1791688646), .Names = c("PlantQn1", "PlantQn2", "PlantQn3", "uptake",
"TypeMississippi", "Treatmentchilled", "PlantMn1", "PlantMn2",
"PlantMn3", "PlantQc1", "PlantQc2", "PlantQc3", "Treatmentnonchilled",
"PlantMc1", "PlantMc2", "PlantMc3", "conc", "TypeQuebec"))
checkTrue(all(hashed.value(names(mapping_value)) %% 2^32 == mapping_value),
"Unexpected hashing result by hashed.value")
m <- hashed.model.matrix(~ ., CO2, hash.size = 2^10, create.mapping = TRUE, transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE)
mapping <- as.list(attr(m, "mapping"))
checkTrue(all(!duplicated(unlist(mapping) %% 2^10 + 1)),
"Unexpected collision of hashing example")
name.candidate <- lapply(seq_along(CO2), function(i) {
if (class(CO2[[i]])[1] == "numeric") rep(names(CO2)[i], nrow(CO2)) else {
paste0(names(CO2)[i], CO2[[i]])
}
})
for(index in seq_along(mapping)) {
i <- hashed.value(names(mapping))[index] %% 2^10 + 1
name <- names(mapping)[index]
col.i <- which(sapply(name.candidate, function(x) name %in% x))
X <- name.candidate[[col.i]]
j <- which(name == X)
value <- hash.sign(names(mapping))[index]
if (class(CO2[[col.i]])[1] == "numeric") {
value <- value * CO2[[col.i]]
}
x <- m[i,]
checkTrue(all(x[j] == value), "Inconsistent hash value between hashed.value, hash.sign and hashed.model.matrix")
checkTrue(all(x[-j] == 0), "Inconsistent hash value between hashed.value, hash.sign and hashed.model.matrix")
}
m <- hashed.model.matrix(~ .^2, CO2, hash.size = 2^10, create.mapping = TRUE,
transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE)
mapping_value <- hash.mapping(m)
mapping_value.expected <- structure(list(PlantQc1 = 2636986885, PlantQn1 = 3789462177,
PlantQc2 = 1980993114, PlantQn2 = 4122940517, "PlantMc3:conc" = 3739801583,
"PlantMc2:conc" = 1395604525, "PlantMn3:conc" = 3235311896,
"PlantMn2:conc" = 540436913, "PlantQc1:TypeQuebec" = 925725157,
"TypeQuebec:conc" = 2746149015, "PlantQn1:TypeQuebec" = 4237186241,
PlantQc3 = 3588767725, "PlantQn1:conc" = 2288578128, "Treatmentchilled:conc" = 255906650,
PlantQn3 = 1079927366, "PlantQc1:conc" = 2251098928, TypeQuebec = 1791688646,
"PlantMc3:Treatmentchilled" = 3891992855, "PlantMn2:Treatmentnonchilled" = 1166626593,
"TypeQuebec:Treatmentchilled" = 2941354564, "PlantMn1:Treatmentnonchilled" = 3205838849,
Treatmentnonchilled = 3873367263, Treatmentchilled = 1576910802,
"PlantMc2:uptake" = 175028363, "PlantMc1:uptake" = 4172577474,
"PlantMn3:uptake" = 4053818018, "TypeQuebec:uptake" = 3178360588,
"PlantQc3:uptake" = 3937745018, uptake = 1505155248, "PlantMc3:TypeMississippi" = 1611851048,
"PlantMn3:TypeMississippi" = 2164201561, PlantMc1 = 3437882550,
PlantMn1 = 248868694, PlantMc2 = 1125161513, PlantMn2 = 2189134401,
PlantMc3 = 875000041, PlantMn3 = 1321560276, "PlantQn3:conc" = 3023324759,
"TypeMississippi:conc" = 1821057777, "PlantQc2:conc" = 714503009,
"PlantQc3:conc" = 1657455447, "PlantQn2:TypeQuebec" = 2781903365,
"PlantQc2:TypeQuebec" = 51079300, "PlantQn3:TypeQuebec" = 862836424,
"PlantQn2:conc" = 900931883, "PlantQc3:TypeQuebec" = 510730956,
"TypeMississippi:Treatmentchilled" = 2281473014, "PlantMn3:Treatmentnonchilled" = 1193828828,
"PlantQc1:Treatmentchilled" = 4048766167, "PlantQc2:Treatmentchilled" = 2534699902,
"PlantQn2:Treatmentnonchilled" = 1386368423, "PlantQn1:Treatmentnonchilled" = 160472416,
"PlantMc3:uptake" = 712540968, "PlantQn2:uptake" = 974267571,
"Treatmentnonchilled:uptake" = 48404478, "PlantQn1:uptake" = 3757368420,
"conc:uptake" = 375043273, "PlantMc1:conc" = 3318696701,
conc = 1178743966, "Treatmentnonchilled:conc" = 4015153694,
"PlantMn1:conc" = 1646616462, "PlantMc2:Treatmentchilled" = 2148207650,
"PlantMc1:Treatmentchilled" = 3227113198, "PlantQc3:Treatmentchilled" = 3565755962,
"TypeQuebec:Treatmentnonchilled" = 1973117262, "TypeMississippi:Treatmentnonchilled" = 4231588217,
"PlantQn3:Treatmentnonchilled" = 1411718622, "PlantMn2:uptake" = 342355816,
"TypeMississippi:uptake" = 31699673, "PlantQn3:uptake" = 4247950021,
"PlantQc2:uptake" = 3035200074, "Treatmentchilled:uptake" = 3235544148,
"PlantQc1:uptake" = 946686067, "PlantMn1:uptake" = 3186749149,
"PlantMc2:TypeMississippi" = 3984179893, "PlantMc1:TypeMississippi" = 517052919,
"PlantMn2:TypeMississippi" = 2154821378, "PlantMn1:TypeMississippi" = 3688281232,
TypeMississippi = 4103768016), .Names = c("PlantQc1", "PlantQn1",
"PlantQc2", "PlantQn2", "PlantMc3:conc", "PlantMc2:conc", "PlantMn3:conc",
"PlantMn2:conc", "PlantQc1:TypeQuebec", "TypeQuebec:conc", "PlantQn1:TypeQuebec",
"PlantQc3", "PlantQn1:conc", "Treatmentchilled:conc", "PlantQn3",
"PlantQc1:conc", "TypeQuebec", "PlantMc3:Treatmentchilled", "PlantMn2:Treatmentnonchilled",
"TypeQuebec:Treatmentchilled", "PlantMn1:Treatmentnonchilled",
"Treatmentnonchilled", "Treatmentchilled", "PlantMc2:uptake",
"PlantMc1:uptake", "PlantMn3:uptake", "TypeQuebec:uptake", "PlantQc3:uptake",
"uptake", "PlantMc3:TypeMississippi", "PlantMn3:TypeMississippi",
"PlantMc1", "PlantMn1", "PlantMc2", "PlantMn2", "PlantMc3", "PlantMn3",
"PlantQn3:conc", "TypeMississippi:conc", "PlantQc2:conc", "PlantQc3:conc",
"PlantQn2:TypeQuebec", "PlantQc2:TypeQuebec", "PlantQn3:TypeQuebec",
"PlantQn2:conc", "PlantQc3:TypeQuebec", "TypeMississippi:Treatmentchilled",
"PlantMn3:Treatmentnonchilled", "PlantQc1:Treatmentchilled",
"PlantQc2:Treatmentchilled", "PlantQn2:Treatmentnonchilled",
"PlantQn1:Treatmentnonchilled", "PlantMc3:uptake", "PlantQn2:uptake",
"Treatmentnonchilled:uptake", "PlantQn1:uptake", "conc:uptake",
"PlantMc1:conc", "conc", "Treatmentnonchilled:conc", "PlantMn1:conc",
"PlantMc2:Treatmentchilled", "PlantMc1:Treatmentchilled", "PlantQc3:Treatmentchilled",
"TypeQuebec:Treatmentnonchilled", "TypeMississippi:Treatmentnonchilled",
"PlantQn3:Treatmentnonchilled", "PlantMn2:uptake", "TypeMississippi:uptake",
"PlantQn3:uptake", "PlantQc2:uptake", "Treatmentchilled:uptake",
"PlantQc1:uptake", "PlantMn1:uptake", "PlantMc2:TypeMississippi",
"PlantMc1:TypeMississippi", "PlantMn2:TypeMississippi", "PlantMn1:TypeMississippi",
"TypeMississippi"))
mapping_value.expected <- unlist(mapping_value.expected) %% 2^10 + 1
mapping_value[names(mapping_value.expected)]
checkTrue(isTRUE(all.equal(mapping_value[names(mapping_value.expected)],
mapping_value.expected)),
"Unexpected hashing result of interaction term")
m2 <- hashed.model.matrix(~ . ^ 2, data = CO2, hash.size = 32, create.mapping = TRUE,
transpose = TRUE, is.dgCMatrix = FALSE, signed.hash = TRUE)
checkTrue(!all(m2@i == 0),
"All hashed indices created by hashed.model.matrix are zero")
checkTrue(sum(m2 %*% rep(1, ncol(m2)) != 0) > 1,
"Incorrect hashed matrix created by hashed.model.matrix")
mapping <- hash.mapping(m2)
mapping.raw <- hashed.value(names(mapping))
names(mapping.raw) <- names(mapping)
is.interaction <- grepl(":", names(mapping), fixed = TRUE)
checkTrue(all(mapping[!is.interaction] == mapping.raw[!is.interaction] %% 32 + 1))
checkTrue(sum(is.na(unlist(lapply(names(mapping), strsplit, "")))) == 0,
"Non-ascii name occurs!")
for(i in grep(":", names(mapping), fixed = TRUE)) {
name <- names(mapping)[i]
key <- strsplit(name, ":")[[1]]
input <- unlist(lapply(mapping.raw[key], intToRaw), use.names = FALSE)
r1 <- hashed.value(rawToChar(input))
checkTrue(r1 %% 32 + 1== mapping[name],
"The hashing result of interaction is not expected!")
}
# check handling of NA
tryCatch(m <- hashed.model.matrix(~ PlAnT, CO2, 8, signed.hash = TRUE,
transpose = TRUE, is.dgCMatrix = FALSE), error = function(e) {
if (class(e)[1] != "std::invalid_argument") stop(e)
if (conditionMessage(e) != "Failed to find the column:PlAnT") stop(e)
})
m <- hashed.model.matrix(~ Plant:Type:Treatment, CO2, create.mapping = TRUE, signed.hash = TRUE)
map <- hash.mapping(m)
map <- map[grepl("\\w+:\\w+:\\w+", names(map))]
checkTrue(all(hashed.interaction.value(names(map)) %% (2^18) + 1== map),
"The hashed.interaction.value gives inconsistent result of hashed.model.matrix")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.