R/extract.R

Defines functions leaf_scores mi_scores print.vistla_hierarchy hierarchy

Documented in hierarchy leaf_scores mi_scores print.vistla_hierarchy

#' Extract the vertex hierarchy from the vistla tree
#'
#' Traverses the vistla tree in a depth-first order and 
#' lists the visited vertices as a data frame.
#' @param x vistla object.
#' @return A data frame of a class \code{vistla_hierarchy}.
#' @note This function effectively prunes the tree off suboptimal paths.
#' @export
hierarchy<-function(x){
 stopifnot(inherits(x,"vistla"))
 if(sum(x$tree$leaf)==0){
  Q<-data.frame(
   name=x$yn,
   score=NA_real_,
   depth=-1,
   leaf=FALSE,
   prv=NA_integer_
  )
  names(Q)<-c("name","score","depth","leaf","prv")
  class(Q)<-c("vistla_hierarchy","data.frame")
  return(Q)
 }
 Q<-subset_tree(x$tree,x$tree$used)
 which(is.na(Q$a))->ti
 Qt<-Q[ti,]
 Qt[Qt$used,]->Qt
 Qt[!duplicated(Qt$b),]->Qt
 nt<-nrow(Qt)
 Qy<-data.frame(a=NA,b=NA,c=NA,score=NA,depth=-1,leaf=FALSE,used=TRUE,prv=NA)
 Qh<-data.frame(
  a=rep(NA,nt),
  b=rep(NA,nt),
  c=Qt$b,
  score=Qt$score,
  depth=0,
  leaf=FALSE,
  used=TRUE,
  prv=1
 )
 Q$prv<-Q$prv+nt+1
 Q$prv[ti]<-match(Q$b[ti],Qh$c)+1
 rbind(Qy,Qh,Q)->Q

 Q$firstsub<-Q$lastsub<-Q$nxtsib<-rep(NA,nrow(Q))
 for(e in 1:nrow(Q)){
  prv<-Q$prv[e]
  if(!is.na(prv)){
   if(is.na(Q$firstsub[prv])) Q$firstsub[prv]<-e
   if(!is.na(Q$lastsub[prv])) Q$nxtsib[Q$lastsub[prv]]<-e
   Q$lastsub[prv]<-e
  }
 }

 Q$trord<-rep(NA,nrow(Q))
 Q$visited<-rep(FALSE,nrow(Q))
 cur<-1
 ord<-1
 while(!is.na(cur))
  cur<-if(Q$visited[cur]){
   if(!is.na(Q$nxtsib[cur])) Q$nxtsib[cur] else Q$prv[cur]
  }else{
   Q$trord[ord]<-cur
   Q$visited[cur]<-TRUE
   ord<-ord+1
   if(!is.na(Q$firstsub[cur])) 
    Q$firstsub[cur] else
     if(!is.na(Q$nxtsib[cur])) Q$nxtsib[cur] else Q$prv[cur]
  }
 
 Q<-subset_tree(Q,Q$trord)
 Q[,c("c","score","depth","leaf","prv")]->Q
 names(Q)[1]<-"name"
 Q$name<-colnames(x$mi)[Q$name]
 Q$name[is.na(Q$name)]<-x$yn
 rownames(Q)<-NULL
 class(Q)<-c("vistla_hierarchy","data.frame")

 return(Q)
}

#' @rdname print.vistla
#' @method print vistla_hierarchy
#' @export
print.vistla_hierarchy<-function(x,...){
 cat("\n\tVistla hierarchy\n\n")
 V<-'| '
 J<-'+'
 cat(sprintf(
   "%s%s%s%s",
   sapply(x$depth+1,function(dc) paste(rep(V,dc),collapse="")),
   J,x$name,
   ifelse(x$depth>0,sprintf(" (%0.2g)",x$score),"")
  ),sep="\n")
 cat("\n")
 invisible(x)
}

#' Extract mutual information score matrix
#'
#' Produces a matrix \eqn{S} where \eqn{S_{ij}} is a 
#'  value of \eqn{I(X_i;X_j)}.
#' This matrix is always calculated as an initial step of the
#'  vistla algorithm and stored in the vistla object.
#' @param x vistla object.
#' @return A symmetric square matrix with mutual information scores between features and root.
#' @export
mi_scores<-function(x){
 stopifnot(inherits(x,"vistla"))
 rbind(cbind(x$mi,x$miY),c(x$miY,NA))->ans
 colnames(ans)[length(x$miY)+1]<-x$yn
 rownames(ans)[length(x$miY)+1]<-x$yn
 ans
}

#' Extract leaf scores of vertex pairs
#'
#' Produces a matrix \eqn{S} where \eqn{S_{ij}} is a score
#'  of the path ending in vertices \eqn{i} and \eqn{j}.
#' Since vistla works on vertex pairs, this value is unique.
#' This can be interpreted as a feature similarity matrix
#'  in context of the current vistla root.
#' @note This function should be called on an unpruned vistla tree,
#'  otherwise the result will be mostly composed of zeroes.
#' @param x vistla object.
#' @return A square matrix with leaf scores of all feature pairs.
#' @export
leaf_scores<-function(x){
 stopifnot(inherits(x,"vistla"))
 x$mi*0->ans
 ans[as.matrix(x$tree[,c("b","c")])]<-x$tree$score
 ans
}

Try the vistla package in your browser

Any scripts or data that you put into this service are public.

vistla documentation built on Oct. 5, 2023, 5:08 p.m.