tests/test-hash.mapping.R

if (require(RUnit)) {
  library(FeatureHashing)
  data(test.tag)
  set.seed(1)
  df <- data.frame(a = test.tag, b = c(-0.626453810742332, 0.183643324222082, -0.835628612410047, 
1.59528080213779, 0.329507771815361, -0.820468384118015, 0.487429052428485, 
0.738324705129217, 0.575781351653492, -0.305388387156356, 1.51178116845085, 
0.389843236411431, -0.621240580541804, -2.2146998871775, 1.12493091814311, 
-0.0449336090152309, -0.0161902630989461, 0.943836210685299, 
0.821221195098089, 0.593901321217509, 0.918977371608218, 0.782136300731067, 
0.0745649833651906, -1.98935169586337, 0.61982574789471, -0.0561287395290008, 
-0.155795506705329, -1.47075238389927, -0.47815005510862, 0.417941560199702, 
1.35867955152904, -0.102787727342996, 0.387671611559369, -0.0538050405829051, 
-1.37705955682861, -0.41499456329968, -0.394289953710349, -0.0593133967111857, 
1.10002537198388, 0.763175748457544, -0.164523596253587, -0.253361680136508, 
0.696963375404737, 0.556663198673657, -0.68875569454952, -0.70749515696212, 
0.36458196213683, 0.768532924515416, -0.112346212150228, 0.881107726454215, 
0.398105880367068, -0.612026393250771, 0.341119691424425, -1.12936309608079, 
1.43302370170104, 1.98039989850586, -0.367221476466509, -1.04413462631653, 
0.569719627442413, -0.135054603880824, 2.40161776050478, -0.0392400027331692, 
0.689739362450777, 0.0280021587806661, -0.743273208882405, 0.188792299514343, 
-1.80495862889104, 1.46555486156289, 0.153253338211898, 2.17261167036215, 
0.475509528899663, -0.709946430921815, 0.610726353489055, -0.934097631644252, 
-1.2536334002391, 0.291446235517463, -0.443291873218433, 0.00110535163162413, 
0.0743413241516641, -0.589520946188072, -0.568668732818502, -0.135178615123832, 
1.1780869965732, -1.52356680042976, 0.593946187628422, 0.332950371213518, 
1.06309983727636, -0.304183923634301, 0.370018809916288, 0.267098790772231, 
-0.54252003099165, 1.20786780598317, 1.16040261569495, 0.700213649514998, 
1.58683345454085, 0.558486425565304, -1.27659220845804, -0.573265414236886, 
-1.22461261489836, -0.473400636439312))
  m <- hashed.model.matrix(~ split(a, delim = ",", type = "existence") + b - 1, df, 2^6,
    create.mapping = TRUE, transpose = FALSE, is.dgCMatrix = TRUE)
  mapping <- hash.mapping(m)
  checkTrue(length(m@p) == 2^6 + 1)
  checkTrue(max(mapping) <= 2^6)
  checkTrue(abs(sum(diff(m@p) != 0) - length(unique(mapping))) <= 1, 
            "The hash mapping returned by `hashed.mapping` is incorrect")
  checkTrue(all(apply(m[,mapping], 2, function(v) sum(abs(v))) > 0))
  checkTrue(all(which(apply(m, 2, function(v) sum(abs(v))) != 0) %in% mapping))
}

Try the FeatureHashing package in your browser

Any scripts or data that you put into this service are public.

FeatureHashing documentation built on Oct. 31, 2022, 1:06 a.m.