R/regTbl_xml.R

#' Function to output 'RegressionTable' element in a PMML file
#'
#' Mapping elements in R object 'lm' to XML nodes
#'
#' @param lmObj An R object representing a fitted multiple linear regression model
#'              that may contains interaction terms.
#'
#' @return An XML file representing 'RegressionTable' element in a PMML file.
#' @export
#'
#' @examples UsedPlat_RegTbl <- regTbl_xml(lmFitUsedPlat)
regTbl_xml <- function(lmObj) {

  modSummary <- summary(lmObj) # extract model summary that contains estimated coefficients
  modTerms <- attributes(lmObj$terms) # extract all attributes regarding 'term'
  modTerms_main <- modTerms$term.labels[modTerms$order == 1] # main effect terms
  modTerms_inter <- modTerms$term.labels[modTerms$order > 1] # interaction terms
  labels_inter <- vector(mode = 'list', length = length(modTerms_inter))
  for (i in seq_along(modTerms_inter)) {
    labels_inter[[i]] <- unlist(strsplit(modTerms_inter[i], ':')) # split components within interaction terms
  }
  modCoef_main <- modSummary$coefficients[-1, 1][modTerms$order == 1]
  modCoef_inter <- modSummary$coefficients[-1, 1][modTerms$order > 1]

  regTbl <- XML::newXMLNode("RegressionTable", attrs = c(intercept = modSummary$coefficients[1, 1]),
                            parent = pmmlRegMod) # pmmlRegMod was globally defined in 'multiReg_pmml()'

  for (i in seq_along(modCoef_main)) {
    XML::newXMLNode("NumericPredictor", attrs = c(coefficient = unname(modCoef_main)[i], exponent = 1,
                                                  name = modTerms_main[i]),
                    parent = regTbl)
  }

  for (i in seq_along(modCoef_inter)) {
    inter_node <- XML::newXMLNode("PredictorTerm", attrs = c(coefficient = unname(modCoef_inter)[i]),
                                  parent = regTbl)
    for (j in seq_along(labels_inter[[i]])) {
      XML::newXMLNode("FieldRef", attrs = c(field = labels_inter[[i]][j]), parent = inter_node)
    }
  }

  # return(regTbl)
}
hongqi0314/PRAuto.PMML documentation built on May 6, 2019, 11:30 a.m.