R/splitstats.R
In itree: Tools for classification and regression trees, with an emphasis on interpretability.

Documented in splitstats

## methods for computing various statistics of a tree's splits.

topNsplitvars <- function(tree,topn=0){
# Internal function that
# returns splits vars for topn depth splits. 0= root-only
	if(!inherits(tree, "itree")) stop("Not legitimate itree object")
	
	nn <- as.numeric(rownames(tree$frame))  #rowname of the frame is actually the nodenumber
	notleaf <- tree$frame[,1]!="<leaf>"
	depth <- floor(log(nn,base=2))[notleaf]

	tf <- tree$frame[notleaf,1] #get rid of the leafs[order(depth)]
	tf <- tf[order(depth)]  #order by depth
	depth <- depth[order(depth)]

	tf[(depth <= topn)]
}

splitstats <- function(tree,featlist=NULL){
# Externally available function.
# count of how many times each feature in featlist appears in the tree
# also total "inverse-depth"
# if featlist is NULL than it takes the features from the itree object.
     if(!inherits(tree, "itree"))
                stop("'tree' is not a legitimate itree object!")
                
	if(is.null(featlist)){
		featlist <- attr(tree$terms, "term.labels")
	}
	
	#compute non-leaf splitvars, depth, normalized inverse-depth
	ff  <- tree$frame
	nn <- as.numeric(rownames(ff))  #rowname of the frame is actually the nodenumber
	depth <- round(log(nn,base=2))
	depth <- depth[ff[,1]!="<leaf>"]
	md <- max(depth)  #lowest non-root
	inv.depth <- 1/(depth+1)   #higher = more important. 0 = not in the tree.
	inv.depth <- inv.depth/sum(inv.depth)

	#nodesize
	nodesize <- ff$n[ff[,1]!="<leaf>"]
	nodesize <- nodesize/sum(nodesize) 

	#splitvars
	splitvar <- as.character(ff[ff[,1]!="<leaf>",1]) #get rid of the leaves
	splitvars.unique <- unique(splitvar)
	names(inv.depth) <- names(nodesize) <-  splitvar
	
	#OK, now record stuff about which variables are used, where they occur, etc.
	statmat <- matrix(0,nrow=length(featlist),ncol=6)

	#first a useful fcn telling us where to record a given number.
	where <- function(varlist){
		apply(as.matrix(varlist,ncol=1),1,FUN=function(split){which(split==featlist)})
	}

	#counts
	ct <- table(splitvar)
	statmat[where(names(ct)),2] <- ct
	
	#total inverse depth for each variable
	str <- apply(as.matrix(splitvars.unique,ncol=1),1,FUN=function(splitvar){sum(inv.depth[names(inv.depth)==splitvar])})
	statmat[where(splitvars.unique),3] <- str
	
	#normalized sum of nodes sizes that each variable split. 
	total.ns <- apply(as.matrix(splitvars.unique,ncol=1),1,FUN=function(splitvar){sum(nodesize[names(nodesize)==splitvar])})			
	statmat[where(splitvars.unique),4] <- total.ns

	#root and depth=1
	topsplits <- topNsplitvars(tree,1)
	ww <- where(topsplits)
	statmat[ww[1],5] <- 1
	statmat[ww[2:length(ww)],6] <- 1
	
	statmat <- as.data.frame(statmat)
	statmat[,1] <- as.factor(featlist)
	colnames(statmat) <- c("var","split.ct","total.inv.depth","total.node.size","isroot","isdepth1")
	return(statmat)
}

Any scripts or data that you put into this service are public.

itree documentation built on May 2, 2019, 7:25 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

itree
Tools for classification and regression trees, with an emphasis on interpretability.

R/splitstats.R
In itree: Tools for classification and regression trees, with an emphasis on interpretability.

Defines functions topNsplitvars splitstats

Documented in splitstats

Try the itree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

itree Tools for classification and regression trees, with an emphasis on interpretability.

R/splitstats.R In itree: Tools for classification and regression trees, with an emphasis on interpretability.

Defines functions topNsplitvars splitstats

Documented in splitstats

Try the itree package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

itree
Tools for classification and regression trees, with an emphasis on interpretability.

R/splitstats.R
In itree: Tools for classification and regression trees, with an emphasis on interpretability.