Nothing
# Supplementary routines for Boruta.
#' Extract attribute statistics
#'
#' \code{attStats} shows a summary of a Boruta run in an attribute-centred way.
#' It produces a data frame containing some importance stats as well as the number of hits that attribute scored and the decision it was given.
#' @param x an object of a class Boruta, from which attribute stats should be extracted.
#' @return A data frame containing, for each attribute that was originally in information system, mean, median, maximal and minimal importance, number of hits normalised to number of importance source runs performed and the decision copied from \code{finalDecision}.
#' @note When using a Boruta object generated by a \code{\link{TentativeRoughFix}}, the resulting data frame will consist a rough-fixed decision.
#' @note \code{x} has to be made with \code{holdHistory} set to \code{TRUE} for this code to run.
#' @export
#' @examples
#' \dontrun{
#' library(mlbench); data(Sonar)
#' #Takes some time, so be patient
#' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
#' print(Bor.son)
#' stats<-attStats(Bor.son)
#' print(stats)
#' plot(normHits~meanImp,col=stats$decision,data=stats)
#' }
attStats<-function(x){
stopifnot(inherits(x,'Boruta'))
if(is.null(x$ImpHistory))
stop('Importance history was not stored during the Boruta run.')
lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
colnames(x$ImpHistory)->names(lz)
mr<-lz$shadowMax; lz[1:(length(lz)-3)]->lz
t(sapply(lz,function(x) c(mean(x),stats::median(x),min(x),max(x),sum(mr[1:length(x)]<x)/length(mr))))->st
st<-data.frame(st,x$finalDecision)
names(st)<-c("meanImp","medianImp","minImp","maxImp","normHits","decision")
return(st)
}
#' Extract names of the selected attributes
#'
#' \code{getSelectedAttributes} returns a vector of names of attributes selected during a Boruta run.
#' @param x an object of a class Boruta, from which relevant attributes names should be extracted.
#' @param withTentative if set to \code{TRUE}, Tentative attributes will be also returned.
#' @return A character vector with names of the relevant attributes.
#' @export
#' @examples
#' \dontrun{
#' data(iris)
#' #Takes some time, so be patient
#' Boruta(Species~.,data=iris,doTrace=2)->Bor.iris
#' print(Bor.iris)
#' print(getSelectedAttributes(Bor.iris))
#' }
getSelectedAttributes<-function(x,withTentative=FALSE){
stopifnot(inherits(x,'Boruta'))
names(x$finalDecision)[
x$finalDecision%in%(if(!withTentative) "Confirmed" else c("Confirmed","Tentative"))
]
}
#' Rough fix of Tentative attributes
#'
#' In some circumstances (too short Boruta run, unfortunate mixing of shadow attributes, tricky dataset\ldots), Boruta can leave some attributes Tentative.
#' \code{TentativeRoughFix} performs a simplified, weaker test for judging such attributes.
#' @param x an object of a class Boruta.
#' @param averageOver Either number of last importance source runs to
#' average over or Inf for averaging over the whole Boruta run.
#' @return A Boruta class object with modified \code{finalDecision} element.
#' Such object has few additional elements:
#' \item{originalDecision}{Original \code{finalDecision}.}
#' \item{averageOver}{Copy of \code{averageOver} parameter.}
#' @details Function claims as Confirmed those attributes that
#' have median importance higher than the median importance of
#' maximal shadow attribute, and the rest as Rejected.
#' Depending of the user choice, medians for the test
#' are count over last round, all rounds or N last
#' importance source runs.
#' @note This function should be used only when strict decision is
#' highly desired, because this test is much weaker than Boruta
#' and can lower the confidence of the final result.
#' @note \code{x} has to be made with \code{holdHistory} set to
#' \code{TRUE} for this code to run.
#' @export
TentativeRoughFix<-function(x,averageOver=Inf){
stopifnot(inherits(x,'Boruta'))
if(is.null(x$ImpHistory))
stop('Importance history was not stored during the Boruta run.')
if(!is.numeric(averageOver))
stop('averageOver should be a numeric vector.')
if(length(averageOver)!=1)
stop('averageOver should be a one-element vector.')
if(averageOver<1)
stop('averageOver should be positive.')
tentIdx<-which(x$finalDecision=='Tentative')
if(length(tentIdx)==0){
warning('There are no Tentative attributes! Returning original object.')
return(x)
}
nRuns<-dim(x$ImpHistory)[1]
if(averageOver>nRuns)
averageOver<-nRuns
impHistorySubset<-x$ImpHistory[(nRuns-averageOver+1):nRuns,]
medianTentImp<-apply(impHistorySubset[,tentIdx,drop=FALSE],2,stats::median)
medianShaMaxImp<-stats::median(impHistorySubset[,'shadowMax'])
medianTentImp>medianShaMaxImp->toOrdain
ans<-x
ans$roughfixed<-TRUE
ans$averageOver<-averageOver
ans$originalDecision<-x$finalDecision
ans$finalDecision[tentIdx[toOrdain]]<-'Confirmed'
ans$finalDecision[tentIdx[!toOrdain]]<-'Rejected'
return(ans)
}
##generateCol is internally used by plot.Boruta and plotImpHistory
generateCol<-function(x,colCode,col,numShadow){
#Checking arguments
if(is.null(col) & length(colCode)!=4)
stop('colCode should have 4 elements.')
#Generating col
if(is.null(col)){
rep(colCode[4],length(x$finalDecision)+numShadow)->cc
cc[c(x$finalDecision=='Confirmed',rep(FALSE,numShadow))]<-colCode[1]
cc[c(x$finalDecision=='Tentative',rep(FALSE,numShadow))]<-colCode[2]
cc[c(x$finalDecision=='Rejected',rep(FALSE,numShadow))]<-colCode[3]
col=cc
}
return(col)
}
#' Plot Boruta object
#'
#' Default plot method for Boruta objects, showing boxplots of attribute importances over run.
#' @method plot Boruta
#' @param x an object of a class Boruta.
#' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
#' @param sort controls whether boxplots should be ordered, or left in original order.
#' @param whichShadow a logical vector controlling which shadows should be drawn; switches respectively max shadow, mean shadow and min shadow.
#' @param col standard \code{col} attribute. If given, suppresses effects of \code{colCode}.
#' @param xlab X axis label that will be passed to \code{\link{boxplot}}.
#' @param ylab Y axis label that will be passed to \code{\link{boxplot}}.
#' @param ... additional graphical parameter that will be passed to \code{\link{boxplot}}.
#' @note If \code{col} is given and \code{sort} is \code{TRUE}, the \code{col} will be permuted, so that its order corresponds to attribute order in \code{ImpHistory}.
#' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
#' @return Invisible copy of \code{x}.
#' @examples
#' \dontrun{
#' library(mlbench); data(HouseVotes84)
#' na.omit(HouseVotes84)->hvo
#' #Takes some time, so be patient
#' Boruta(Class~.,data=hvo,doTrace=2)->Bor.hvo
#' print(Bor.hvo)
#' plot(Bor.hvo)
#' }
#' @export
plot.Boruta<-function(x,colCode=c('green','yellow','red','blue'),sort=TRUE,whichShadow=c(TRUE,TRUE,TRUE),
col=NULL,xlab='Attributes',ylab='Importance',...){
#Checking arguments
stopifnot(inherits(x,'Boruta'))
if(is.null(x$ImpHistory))
stop('Importance history was not stored during the Boruta run.')
#Removal of -Infs and conversion to a list
lz<-lapply(1:ncol(x$ImpHistory),function(i) x$ImpHistory[is.finite(x$ImpHistory[,i]),i])
colnames(x$ImpHistory)->names(lz)
#Selection of shadow meta-attributes
numShadow<-sum(whichShadow)
lz[c(rep(TRUE,length(x$finalDecision)),whichShadow)]->lz
#Generating color vector
col<-generateCol(x,colCode,col,numShadow)
#Ordering boxes due to attribute median importance
if(sort){
ii<-order(sapply(lz,stats::median))
lz[ii]->lz; col<-col[ii]
}
#Final plotting
graphics::boxplot(lz,xlab=xlab,ylab=ylab,col=col,...)
invisible(x)
}
#' Plot Boruta object as importance history
#'
#' Alternative plot method for Boruta objects, showing matplot of attribute importances over run.
#' @param x an object of a class Boruta.
#' @param colCode a vector containing colour codes for attribute decisions, respectively Confirmed, Tentative, Rejected and shadow.
#' @param col standard \code{col} attribute, passed to \code{\link{matplot}}. If given, suppresses effects of \code{colCode}.
#' @param type Plot type that will be passed to \code{\link{matplot}}.
#' @param lty Line type that will be passed to \code{\link{matplot}}.
#' @param pch Point mark type that will be passed to \code{\link{matplot}}.
#' @param xlab X axis label that will be passed to \code{\link{matplot}}.
#' @param ylab Y axis label that will be passed to \code{\link{matplot}}.
#' @param ... additional graphical parameter that will be passed to \code{\link{matplot}}.
#' @note This function will throw an error when \code{x} lacks importance history, i.e., was made with \code{holdHistory} set to \code{FALSE}.
#' @return Invisible copy of \code{x}.
#' @examples
#' \dontrun{
#' library(mlbench); data(Sonar)
#' #Takes some time, so be patient
#' Boruta(Class~.,data=Sonar,doTrace=2)->Bor.son
#' print(Bor.son)
#' plotImpHistory(Bor.son)
#' }
#' @export
plotImpHistory<-function(x,colCode=c('green','yellow','red','blue'),col=NULL,type="l",lty=1,pch=0,
xlab='Classifier run',ylab='Importance',...){
#Checking arguments
stopifnot(inherits(x,'Boruta'))
if(is.null(x$ImpHistory))
stop('Importance history was not stored during the Boruta run.')
col<-generateCol(x,colCode,col,3)
#Final plotting
graphics::matplot(0:(nrow(x$ImpHistory)-1),x$ImpHistory,xlab=xlab,ylab=ylab,col=col,type=type,lty=lty,pch=pch,...)
invisible(x)
}
#' Export Boruta result as a formula
#'
#' Functions which convert the Boruta selection into a formula, so that it could be passed further to other functions.
#' @param x an object of a class Boruta, made using a formula interface.
#' @return Formula, corresponding to the Boruta results.
#' \code{getConfirmedFormula} returns only Confirmed attributes, \code{getNonRejectedFormula} also adds Tentative ones.
#' @note This operation is possible only when Boruta selection was invoked using a formula interface.
#' @rdname getFormulae
#' @export
getConfirmedFormula<-function(x){
stopifnot(inherits(x,'Boruta'))
if(is.null(x$call[["formula"]]))
stop('The model for this Boruta run was not a formula.')
deparse(x$call[["formula"]][[2]])->dec
preds<-paste(names(x$finalDecision)[x$finalDecision=='Confirmed'],collapse="+")
return(stats::as.formula(sprintf('%s~%s',dec,preds)))
}
#' @rdname getFormulae
#' @export
getNonRejectedFormula<-function(x){
stopifnot(inherits(x,'Boruta'))
if(is.null(x$call[["formula"]]))
stop('The model for this Boruta run was not a formula.')
deparse(x$call[["formula"]][[2]])->dec
preds<-paste(names(x$finalDecision)[x$finalDecision!='Rejected'],collapse="+")
return(stats::as.formula(sprintf('%s~%s',dec,preds)))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.