R/rpart.rules.push.R

#' Writes rule tables required to process rpart rules in SQL to an open RODBC connection.
#'
#'  This function handles the process of pushing tabular versions of
#'  \pkg{rpart} rules to an RODBC connection.  The entire process of generation
#'  and writing is completed with a single call, with all necessary subcalls
#'  handled within this function. 
#'  
#'  Once the tables have been pushed to the database, unpivoted source data can
#'  be processed using the rpart model with SQL code similar to the following:
#'  
#'\preformatted{
#' WITH SOURCE AS
#' (
#'     SELECT
#'     ID,
#'     TYPE,
#'     VALUE
#'     FROM DATA
#'     UNPIVOT
#'     (
#'         VALUE FOR TYPE IN (FIELD1, FIELD2, FIELD3)
#'     )UNPVT
#' ),
#' MATCHES AS 
#' (
#'     SELECT
#'     ID
#'     ,Subrule
#'     ,Variable
#'     ,SR.Value
#'     ,Less
#'     ,Greater
#'     FROM
#'     SOURCE S
#'     LEFT JOIN SUBRULES SR
#'     ON
#'     TYPE = VARIABLE
#'     AND (
#'         S.value = SR.value
#'         OR S.value < SR.Less
#'         OR S.value > SR.Greater
#'         ) 
#'     ),
#' MATCHED_SUBRULES
#' AS (
#'     SELECT
#'     Subrule
#'     ,ID
#'     FROM
#'     MATCHES M
#'     GROUP BY
#'     Subrule
#'     ,ID
#'     ),
#' MATCHED_RULES
#' AS (
#'     SELECT
#'     R.[Rule]
#'     ,MS.*
#'     FROM
#'     RULES AS R
#'     LEFT JOIN MATCHED_SUBRULES MS
#'     ON R.SUBRULE=MS.SUBRULE AND Leaf='TRUE'
#'     )
#' ,
#' COUNTS AS
#' (
#'     SELECT
#'     [RULE]
#'     ,ID
#'     ,MATCH_COUNT=COUNT(DISTINCT SUBRULE)
#'     ,NEEDED_COUNT=(SELECT COUNT(DISTINCT SUBRULE) FROM RULES R WHERE R.[RULE]=MR.[RULE])
#'     FROM
#'     MATCHED_RULES MR
#'     GROUP BY
#'     [RULE]
#'     ,ID
#'     )
#' SELECT 
#' [RULE]
#' ,ID
#' FROM COUNTS
#' WHERE 
#' MATCH_COUNT=NEEDED_COUNT 
#'}
#'
#' The frame is also passed to the database which allows extracting the estimates generated by the rpart model.
#'
#' @param object an rpart object
#' @param connection and open RODBC connection
#' @param rulePrefix A character string to prepend to each rule name to allow for multiple rule sets
#' @param tablePrefix A character string to prepend to each table name to allow for multiple rule sets
#' @export
rpart.rules.push<-function(object,connection,rulePrefix=NULL,tablePrefix=NULL)
{
  require(RODBC)
  rulePrefix<-paste2(rulePrefix,'',sep='.')
  rules<-rpart.rules.table(object)
  rules$Rule<-paste(rulePrefix,rules$Rule,sep='')
  rules$Subrule<-paste(rulePrefix,rules$Subrule,sep='')
  sqlSave(connection,rules,tablename=paste2(tablePrefix,"RULES",sep='_'),append=TRUE,rownames=FALSE)
  subrules<-rpart.subrules.table(object)
  subrules$Subrule<-paste(rulePrefix,subrules$Subrule,sep='')
  sqlSave(connection,subrules,tablename=paste2(tablePrefix,"SUBRULES",sep='_'),append=TRUE,rownames=FALSE)
  frame<-object$frame
  if (!is.null(frame$yval2))
      frame<-cbind(frame[,names(frame) != "yval2"],frame$yval2)
  frame$Rule<-paste(rulePrefix,row.names(frame),sep='')
  sqlSave(connection,frame,tablename=paste2(tablePrefix,"FRAME",sep='_'),append=TRUE,rownames=FALSE)
}

paste2<-function(...,sep=' ')
{
  args<-unlist(list(...))
  result<-paste(args,collapse=sep)
  return(result)
}
cvarrichio/rpart.utils documentation built on May 14, 2019, 12:53 p.m.