# R/ritfunContinuous.R In karlkumbier/irfRegression: iterative Random Forests

#### Defines functions getNodeVarsummarize_interactgroupFeaturerandom_intersect_trees_contint2setset2intritfunContinuous

```ritfunContinuous=function(readforest_out, y, tree_depth=5, n_child=2,
n_ritree=500, varnames=NULL, verbose=TRUE,
wt=c('var', 'uniform')){

stopifnot(is.matrix(node_feature))
stopifnot(nrow(node_feature) > 1)

S=set2int(!!node_feature)
M=n_ritree, n_child=n_child, verbose=FALSE, wt=wt)
if (is.null(v)){
if (verbose) print('no interaction detected!')
return(character(0))
} else{
v=int2set(v, p=ncol(node_feature), label=TRUE
, var.names=varnames)
tt=table(v)
tt=sort(tt, decreasing=TRUE)
return(names(tt))
}
}

# input: matrix with sets (binary seq) in rows
# ouput: matrix of integers with nrow(input) rows, ceiling(ncol(output)/32) columns
set2int=function(S){
if (length(dim(S))!=2)
stop('input to set2int() is not a matrix!')

n=nrow(S); p=ncol(S)

store=array(as.integer(0), c(n, n_block))
temp<-foreach(block=1:n_block, .combine=cbind, .multicombine=TRUE)%dopar%{
tmp=packBits(t(S[,((block-1)*32+seq(32))])
, type='integer')
tmp
}
store=as.matrix(temp)
return(store)
}

# input: matrix with integers
int2set=function(v, p, label=FALSE, var.names=NULL){

if (length(dim(v))!=2)
stop('input to int2set() is not a matrix!')

if (is.null(var.names))
var.names=seq(p)

n_block=ncol(v)
S <- foreach(block=1:n_block, .combine=cbind, .multicombine=TRUE)%dopar%{
tmp=matrix(as.integer(intToBits(v[,block])), ncol=32, byrow=TRUE)
tmp
}

# remove trailing zeros
S=S[,1:p]

S=(S==1)
if (!label)
return(S)
else{
if (is.null(dim(S)))
S=as.matrix(t(S), nrow=1)
return(apply(S, 1
, FUN=function(vv){
return(paste(sort((var.names[which(vv)])), collapse='_'))
}
)
)
}
}

random_intersect_trees_cont=function(S, y, readforest_out, D=5, M=100, n_child=3,
verbose=TRUE, wt=NULL){

stopifnot(length(dim(S))==2)
n=nrow(S)
n_block=ncol(S)

# Root node selected with probability proportional to node size or uniformly
if (wt == 'var') {
node_wt <- 1 / getNodeVar(readforest_out\$node_data, y)
} else if (wt == 'uniform') {
node_wt <- rep(1, length(tree_info\$size_node))
}

out=foreach(m=1:M, .combine=rbind, .multicombine=TRUE)%dopar%{
tmp=S[sample(n, 1, prob=node_wt),]
tmp=as.matrix(t(tmp), nrow=1)

for (d in 1:D){
if (is.null(tmp)|(nrow(tmp)==0))
next
j_d=nrow(tmp)
store=matrix(0, nrow=1, ncol=n_block)
for (j in 1:j_d){
select_samples=as.matrix(S[sample(n, n_child, replace=FALSE, prob = node_wt),]);
for (block in 1:n_block){
select_samples[,block]=bitwAnd(tmp[j,block], select_samples[,block])
}
store=rbind(store, select_samples)
}# end for (j in 1:j_d)
store=as.matrix(store[-1,])

na_id=apply(is.na(store), 1, FUN=max)
blank_id=!apply(!!store, 1, FUN=max)
keep_id=!(na_id|blank_id)
tmp=matrix(store[keep_id,], nrow=sum(keep_id))

}# end for (d in 1:D)
if (is.null(tmp)|(nrow(tmp)==0)){
if (verbose){
print(paste('tree ', m, ': no set survived'))
}
tmp=matrix(NA, nrow=1, ncol=n_block)
}
tmp
}# end foreach(m=1:M)

na_id=apply(is.na(out), 1, FUN=max)
blank_id=!apply(!!out, 1, FUN=max)

idx = sum(!(na_id|blank_id))
if (idx == 0){
out=NULL
} else if (idx ==1) {
out = matrix(out[!(na_id|blank_id),], nrow=1)
} else {
out=as.matrix(out[!(na_id|blank_id),])
}
return(out)
}

groupFeature=function(node_feature
, grp
){
grp_names = unique(grp)
node_feature_new = array(FALSE, c(nrow(node_feature), length(grp_names)))
for (g in grp_names){
grp_id = which(grp== g)
if (length(grp_id)==1){
node_feature_new[,grp_names==g] = node_feature[,grp_id]
}
else{
node_feature_new[,grp_names==g] = apply(node_feature[,grp_id], 1, FUN=max)
}
}
node_feature_new = !!node_feature_new
colnames(node_feature_new) <- grp_names

return(node_feature_new)
}

summarize_interact = function(store_out, varnames = NULL){
n_bootstrap = length(store_out)

store = character()

for (b in 1:n_bootstrap){
tmp = store_out[[b]]
if ((length(tmp)>0))
store = c(store, tmp)

}

if (length(store) >= 1){
store_tbl= sort(table(store), decreasing = TRUE)
tt = as.vector(store_tbl)
names(tt) = names(store_tbl)
tt = tt/n_bootstrap
}
else{
return(character(0))
}

if (!is.null(varnames)){
for (j in 1:length(tt)){
tmp = as.numeric(strsplit(names(tt)[j], split='_')[[1]])
names(tt)[j] = paste(varnames[tmp], collapse='_')
}
}
return(tt)
}

getNodeVar <- function(node_data, y) {

nodeVar <- function(in_node)
ifelse(sum(in_node) == 1, 0, var(y[which(in_node == 1)]))
node.vars <- apply(node_data, MAR=1, nodeVar)
return(node.vars)
}
```
karlkumbier/irfRegression documentation built on May 18, 2017, 3:28 p.m.