R/tools.R

Defines functions getNonRejectedFormula getConfirmedFormula plotImpHistory plot.Boruta generateCol TentativeRoughFix getSelectedAttributes attStats

Documented in attStats getConfirmedFormula getNonRejectedFormula getSelectedAttributes plot.Boruta plotImpHistory TentativeRoughFix

# Supplementary routines for Boruta.

#' Extract attribute statistics
#'
#' \code{attStats} shows a summary of a Boruta run in an attribute-centred way.
#' It produces a data frame containing some importance stats as well as the number of hits that attribute scored and the decision it was given.
#' @param x an object of a class Boruta, from which attribute stats should be extracted.
#' @return A data frame containing, for each attribute that was originally in information system, mean, median, maximal and minimal importance, number of hits normalised to number of importance source runs performed and the decision copied from \code{finalDecision}.
#' @note  When using a Boruta object generated by a \code{\link{TentativeRoughFix}}, the resulting data frame will consist a rough-fixed decision.
#' @note \code{x} has to be made with \code{holdHistory} set to \code{TRUE} for this code to run.
#' @export
#' @examples
#' \dontrun{
#' library(mlbench); data(Sonar)
#' #Takes some time, so be patient
#' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
#' print(Bor.son)
#' stats<-attStats(Bor.son)
#' print(stats)
#' plot(normHits~meanImp,col=stats$decision,data=stats)
#' }
attStats<-function(x){
 stopifnot(inherits(x,'Boruta'))
 if(is.null(x$ImpHistory))
  stop('Importance history was not stored during the Boruta run.')
 lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
 colnames(x$ImpHistory)->names(lz)
 mr<-lz$shadowMax; lz[1:(length(lz)-3)]->lz
 t(sapply(lz,function(x) c(mean(x),stats::median(x),min(x),max(x),sum(mr[1:length(x)]<x)/length(mr))))->st
 st<-data.frame(st,x$finalDecision)
 names(st)<-c("meanImp","medianImp","minImp","maxImp","normHits","decision")
 return(st)
}

#' Extract names of the selected attributes
#'
#' \code{getSelectedAttributes} returns a vector of names of attributes selected during a Boruta run.
#' @param x an object of a class Boruta, from which relevant attributes names should be extracted.
#' @param withTentative if set to \code{TRUE}, Tentative attributes will be also returned.
#' @return A character vector with names of the relevant attributes.
#' @export
#' @examples
#' \dontrun{
#' data(iris)
#' #Takes some time, so be patient
#' Boruta(Species~.,data=iris,doTrace=2)->Bor.iris
#' print(Bor.iris)
#' print(getSelectedAttributes(Bor.iris))
#' }
getSelectedAttributes<-function(x,withTentative=FALSE){
 stopifnot(inherits(x,'Boruta'))
 names(x$finalDecision)[
  x$finalDecision%in%(if(!withTentative) "Confirmed" else c("Confirmed","Tentative"))
 ]
}

#' Rough fix of Tentative attributes
#'
#' In some circumstances (too short Boruta run, unfortunate mixing of shadow attributes, tricky dataset\ldots), Boruta can leave some attributes Tentative.
#' \code{TentativeRoughFix} performs a simplified, weaker test for judging such attributes.
#' @param x an object of a class Boruta.
#' @param averageOver Either number of last importance source runs to
#' average over or Inf for averaging over the whole Boruta run.
#' @return A Boruta class object with modified \code{finalDecision} element.
#' Such object has few additional elements:
#' \item{originalDecision}{Original \code{finalDecision}.}
#' \item{averageOver}{Copy of \code{averageOver} parameter.}
#' @details Function claims as Confirmed those attributes that
#' have median importance higher than the median importance of
#' maximal shadow attribute, and the rest as Rejected.
#' Depending of the user choice, medians for the test
#' are count over last round, all rounds or N last
#' importance source runs.
#' @note This function should be used only when strict decision is
#' highly desired, because this test is much weaker than Boruta
#' and can lower the confidence of the final result.
#' @note \code{x} has to be made with \code{holdHistory} set to
#' \code{TRUE} for this code to run.
#' @export
TentativeRoughFix<-function(x,averageOver=Inf){
 stopifnot(inherits(x,'Boruta'))
 if(is.null(x$ImpHistory))
  stop('Importance history was not stored during the Boruta run.')
 if(!is.numeric(averageOver))
  stop('averageOver should be a numeric vector.')
 if(length(averageOver)!=1)
  stop('averageOver should be a one-element vector.')
 if(averageOver<1)
  stop('averageOver should be positive.')

 tentIdx<-which(x$finalDecision=='Tentative')
 if(length(tentIdx)==0){
  warning('There are no Tentative attributes! Returning original object.')
  return(x)
 }

 nRuns<-dim(x$ImpHistory)[1]

 if(averageOver>nRuns)
  averageOver<-nRuns

 impHistorySubset<-x$ImpHistory[(nRuns-averageOver+1):nRuns,]
 medianTentImp<-apply(impHistorySubset[,tentIdx,drop=FALSE],2,stats::median)
 medianShaMaxImp<-stats::median(impHistorySubset[,'shadowMax'])
 medianTentImp>medianShaMaxImp->toOrdain

 ans<-x
 ans$roughfixed<-TRUE
 ans$averageOver<-averageOver
 ans$originalDecision<-x$finalDecision
 ans$finalDecision[tentIdx[toOrdain]]<-'Confirmed'
 ans$finalDecision[tentIdx[!toOrdain]]<-'Rejected'

 return(ans)
}

##generateCol is internally used by plot.Boruta and plotImpHistory
generateCol<-function(x,colCode,col,numShadow){
 #Checking arguments
 if(is.null(col) & length(colCode)!=4)
  stop('colCode should have 4 elements.')
 #Generating col
 if(is.null(col)){
  rep(colCode[4],length(x$finalDecision)+numShadow)->cc
  cc[c(x$finalDecision=='Confirmed',rep(FALSE,numShadow))]<-colCode[1]
  cc[c(x$finalDecision=='Tentative',rep(FALSE,numShadow))]<-colCode[2]
  cc[c(x$finalDecision=='Rejected',rep(FALSE,numShadow))]<-colCode[3]
  col=cc
 }
 return(col)
}

#' Plot Boruta object
#'
#' Default plot method for Boruta objects, showing boxplots of attribute importances over run.
#' @method plot Boruta
#' @param x an object of a class Boruta.
#' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
#' @param sort controls whether boxplots should be ordered, or left in original order.
#' @param whichShadow a logical vector controlling which shadows should be drawn; switches respectively max shadow, mean shadow and min shadow.
#' @param col standard \code{col} attribute. If given, suppresses effects of \code{colCode}.
#' @param xlab X axis label that will be passed to \code{\link{boxplot}}.
#' @param ylab Y axis label that will be passed to \code{\link{boxplot}}.
#' @param ... additional graphical parameter that will be passed to \code{\link{boxplot}}.
#' @note If \code{col} is given and \code{sort} is \code{TRUE}, the \code{col} will be permuted, so that its order corresponds to attribute order in \code{ImpHistory}.
#' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
#' @return Invisible copy of \code{x}.
#' @examples
#' \dontrun{
#' library(mlbench); data(HouseVotes84)
#' na.omit(HouseVotes84)->hvo
#' #Takes some time, so be patient
#' Boruta(Class~.,data=hvo,doTrace=2)->Bor.hvo
#' print(Bor.hvo)
#' plot(Bor.hvo)
#' }
#' @export
plot.Boruta<-function(x,colCode=c('green','yellow','red','blue'),sort=TRUE,whichShadow=c(TRUE,TRUE,TRUE),
  col=NULL,xlab='Attributes',ylab='Importance',...){
 #Checking arguments
 stopifnot(inherits(x,'Boruta'))
 if(is.null(x$ImpHistory))
  stop('Importance history was not stored during the Boruta run.')

 #Removal of -Infs and conversion to a list
 lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
 colnames(x$ImpHistory)->names(lz)

 #Selection of shadow meta-attributes
 numShadow<-sum(whichShadow)
 lz[c(rep(TRUE,length(x$finalDecision)),whichShadow)]->lz

 #Generating color vector
 col<-generateCol(x,colCode,col,numShadow)

 #Ordering boxes due to attribute median importance
 if(sort){
  ii<-order(sapply(lz,stats::median))
  lz[ii]->lz; col<-col[ii]
 }

 #Final plotting
 graphics::boxplot(lz,xlab=xlab,ylab=ylab,col=col,...)
 invisible(x)
}

#' Plot Boruta object as importance history
#'
#' Alternative plot method for Boruta objects, showing matplot of attribute importances over run.
#' @param x an object of a class Boruta.
#' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
#' @param col standard \code{col} attribute, passed to \code{\link{matplot}}. If given, suppresses effects of \code{colCode}.
#' @param type Plot type that will be passed to \code{\link{matplot}}.
#' @param lty Line type that will be passed to \code{\link{matplot}}.
#' @param pch Point mark type that will be passed to \code{\link{matplot}}.
#' @param xlab X axis label that will be passed to \code{\link{matplot}}.
#' @param ylab Y axis label that will be passed to \code{\link{matplot}}.
#' @param ... additional graphical parameter that will be passed to \code{\link{matplot}}.
#' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
#' @return Invisible copy of \code{x}.
#' @examples
#' \dontrun{
#' library(mlbench); data(Sonar)
#' #Takes some time, so be patient
#' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
#' print(Bor.son)
#' plotImpHistory(Bor.son)
#' }
#' @export
plotImpHistory<-function(x,colCode=c('green','yellow','red','blue'),col=NULL,type="l",lty=1,pch=0,
  xlab='Classifier run',ylab='Importance',...){
 #Checking arguments
 stopifnot(inherits(x,'Boruta'))
 if(is.null(x$ImpHistory))
  stop('Importance history was not stored during the Boruta run.')
 col<-generateCol(x,colCode,col,3)

 #Final plotting
 graphics::matplot(0:(nrow(x$ImpHistory)-1),x$ImpHistory,xlab=xlab,ylab=ylab,col=col,type=type,lty=lty,pch=pch,...)
 invisible(x)
}

#' Export Boruta result as a formula
#'
#' Functions which convert the Boruta selection into a formula, so that it could be passed further to other functions.
#' @param x an object of a class Boruta, made using a formula interface.
#' @return Formula, corresponding to the Boruta results.
#' \code{getConfirmedFormula} returns only Confirmed attributes, \code{getNonRejectedFormula} also adds Tentative ones.
#' @note This operation is possible only when Boruta selection was invoked using a formula interface.
#' @rdname getFormulae
#' @export
getConfirmedFormula<-function(x){
 stopifnot(inherits(x,'Boruta'))
 if(is.null(x$call[["formula"]]))
  stop('The model for this Boruta run was not a formula.')
 deparse(x$call[["formula"]][[2]])->dec
 preds<-paste(names(x$finalDecision)[x$finalDecision=='Confirmed'],collapse="+")
 return(stats::as.formula(sprintf('%s~%s',dec,preds)))
}

#' @rdname getFormulae
#' @export
getNonRejectedFormula<-function(x){
 stopifnot(inherits(x,'Boruta'))
 if(is.null(x$call[["formula"]]))
  stop('The model for this Boruta run was not a formula.')
 deparse(x$call[["formula"]][[2]])->dec
 preds<-paste(names(x$finalDecision)[x$finalDecision!='Rejected'],collapse="+")
 return(stats::as.formula(sprintf('%s~%s',dec,preds)))
}

Try the Boruta package in your browser

Any scripts or data that you put into this service are public.

Boruta documentation built on Nov. 12, 2022, 9:06 a.m.