#' Applies the Bagging algorithm to a dataset
#'
#' Fits the Bagging algorithm proposed by Breiman in
#' 1996 using classification trees as single classifiers.
#'
#' @param data FLTable
#' @param formula formula specifying the independent and dependent variable columns
#' @param mfinal an integer, the number of iterations
#' for which boosting is run or the number of trees to use.
#' Defaults to mfinal=5 iterations.
#' @param control options that control details of the rpart algorithm. See rpart for more details.
#'
#' @return An object of class "FLBagging" which has details about the trees generated
#' and the number of votes in bag corresponding to every ObsID.
#'
#' @examples
#' flt<-FLTable(getTestTableName("tblDecisionTreeMulti"),
#' "ObsID","VarID","Num_Val")
#' flobj<-bagging(flt, formula = -1~.,mfinal=5)
#' pred <- predict(flobj, flt[1:4])
#' pred$confusion
#' pred$class
#' @seealso \code{\link[adabag]{bagging}} for corresponding R function reference.
#' @export
bagging<-function(formula,data,...){
UseMethod("bagging",data)
}
#' @export
bagging.default <- function (formula,data=list(),...) {
if (!requireNamespace("adabag", quietly = TRUE)){
stop("adabag package needed for bagging. Please install it.",
call. = FALSE)
}
else return(adabag::bagging(formula=formula,data=data,...))
}
#' @export
bagging.FLTable<-function(data,
formula,
control=c(minsplit=10,
maxdepth=5,
cp=0.95),
mfinal=5){
if(class(data) == "FLpreparedData")
x<-rpart.FLpreparedData(data,formula,control,mfinal=mfinal)
else
x<-rpart.FLTable(data,formula,control,mfinal=mfinal)
vfuncName<-"FLBagDecisionTree"
retobj<-sqlStoredProc(getFLConnection(),
vfuncName,
outputParameter=c(AnalysisID="a"),
pInputParameters=x$vinputcols)
AnalysisID<-as.character(retobj[1,1])
sql<-paste0("SELECT * FROM fzzlDecisionTreeMNMD AS a
WHERE AnalysisID = ",fquote(AnalysisID)," ORDER BY 2,4")
ret<-sqlQuery(getFLConnection(),sql)
colnames(ret) <- tolower(colnames(ret))
call<-match.call()
frame<-data.frame(NodeID=ret$nodeid,
n=ret$nodesize,
prob=ret$predictclassprob,
yval=ret$predictclass,
var=ret$splitvarid,
SplitVal=ret$splitval,
leftson=ret$childnodeleft,
rightson=ret$childnoderight,
treelevel=ret$treelevel,
parent=ret$parentnodeid,
Leaf=ret$isleaf,
TreeID=ret$datasetid)
ntrees<-unique(frame$TreeID)
trees<-list()
for(l in 1:length(ntrees)){
trees[[l]]<-subset(frame,TreeID==l)
class(trees[[l]])<-"data.frame"
}
#browser()
votes<-sqlQuery(getFLConnection(),paste0("SELECT ObsID, ObservedClass, PredictedClass, NumOfVotes
FROM fzzlBagDTPred WHERE AnalysisID = ",fquote(AnalysisID),
"ORDER BY 1, 2, 3, 4"))
colnames(votes) <- tolower(colnames(votes))
class<-votes$predictedclass
retobj<-list(trees=trees,
call=call,
formula=formula,
votes=votes,
class=class,
RegrDataPrepSpecs=x$vprepspecs,
data=x$data,
AnalysisID=AnalysisID)
class(retobj)<-"FLbagging"
return(retobj)
}
#' @export
bagging.FLpreparedData <- bagging.FLTable
# print.FLbagging<-function(object){
# for(i in 1:length(object$trees)){
# cat(object$trees[[i]])
# }
# }
# setMethod("show","FLbagging",print.FLbagging)
#' @export
predict.FLbagging<-function(object,newdata=object$data,
scoreTable="",...){ #browser()
if(!is.FLTable(newdata)) stop("scoring allowed on FLTable only")
newdata <- setAlias(newdata,"")
vinputTable <- getTableNameSlot(newdata)
if(scoreTable=="")
scoreTable <- gen_score_table_name("BaggingDTScore")
vRegrDataPrepSpecs <- setDefaultsRegrDataPrepSpecs(x=object$RegrDataPrepSpecs,
values=list(...))
deepx <- FLRegrDataPrep(newdata,depCol=vRegrDataPrepSpecs$depCol,
ExcludeCols=vRegrDataPrepSpecs$excludeCols)
newdatatable <- deepx$table
newdatatable <- setAlias(newdatatable,"")
tablename<- getTableNameSlot(newdatatable)
vobsid <- getVariables(newdatatable)[["obs_id_colname"]]
vvarid <- getVariables(newdatatable)[["var_id_colname"]]
vvalue <- getVariables(newdatatable)[["cell_val_colname"]]
vinputcols<-list()
vinputcols <- c(vinputcols,
TableName=tablename,
ObsIDColName=vobsid,
VarIDColName=vvarid,
ValueColName=vvalue,
AnalysisID=object$AnalysisID,
ScoreTable=scoreTable,
Note=genNote("RandomForestPrediction"))
vfuncName<-"FLBagDecisionTreeScore"
AnalysisID <- sqlStoredProc(getFLConnection(),
vfuncName,
outputParameter=c(AnalysisID="a"),
pInputParams=vinputcols)
sqlSendUpdate(getFLConnection(), paste0("alter table ",scoreTable,
" add probability float, add matrix_id float"))
sqlSendUpdate(getFLConnection(), paste0("update ",scoreTable,
" set matrix_id = 1, probability = NumOfVotes * 1.0 /",length(object$trees)))
x<-sqlQuery(getFLConnection(),paste0("select ObservedClass, \n ",
" PredictedClass from ",scoreTable))
colnames(x) <- c("ObservedClass","PredictedClass")
m<-matrix(nrow = max(x$ObservedClass)-min(x$ObservedClass)+1,
ncol=max(x$PredictedClass)-min(x$PredictedClass)+1)
rownames(m)<-min(x$ObservedClass):max(x$ObservedClass)
colnames(m)<-min(x$PredictedClass):max(x$PredictedClass)
m[is.na(m)]<-0
for(i in 1:length(x$ObservedClass)){
j<-x[i,1]
k<-x[i,2]
m[as.character(j),as.character(k)]<-m[as.character(j),as.character(k)]+1
}
warning("The probability values are only true for predicted class. The sum may not be 1.")
return(list(formula= object$formula,
votes = FLMatrix(scoreTable,1,"matrix_id",vobsid,"PredictedClass","NumOfVotes"),
prob = FLMatrix(scoreTable,1,"matrix_id",vobsid,"PredictedClass","probability"),
class=as.factor(x$PredictedClass),
confusion=m))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.