R/norm_xml.R

#' Generate PMML elements that using 'NormContinuous'
#'
#' Generate the 'NormContinuous' part for the whole PMML transformation segment
#' 'LocalTransformation'.
#'
#' @param featureName Character string indicating name of the variable that is to
#'                    be normalized/transformed.
#' @param featureRange Vector of two float numbers indicating range (clean limits)
#'                     of the variable that is to be transformed.
#' @param hatTips Vector of float numbers representing inner cut points generated
#'                by function 'gen_buckets()',
#' @param newFieldName Character string indicating new name for the derived feature
#' @param purpose Character string indicating type of 'NormContinuous', e.g.,
#'                'normalization', 'hat'.
#'
#' @return XML file representing PMML element 'LocalTransformation' that contains
#'        'DerivedField' with transformation 'NormContinuous'.
#'
#' @export
#'
#' @examples
#' normFICO <- norm_xml(featureName = 'CreditScore', featureRange = c(450, 950),
#'     hatTips = BatchSMB_UsedPlat[[1]]$bands, newFieldName = 'FICO', purpose = 'hat')
norm_xml <- function(featureName, featureRange, hatTips = featureRange, newFieldName = featureName, purpose = "norm") {
  LocalTransformations <- newXMLNode("LocalTransformations")
  if (purpose == "hat") {
    ## combine hatTips and featureRange ------------------------
    hatCuts <- c(featureRange[1], hatTips, featureRange[2])

    ### derive the left hat --------------------------------------------------------------------------------
    DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, "_", 1, sep = ""),
                                                           dataType = "double", optype = "continuous" ),
                                parent = LocalTransformations )
    NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                               outliers = "asMissingValues" ),
                                  parent = DerivedField )
    #### define local variable "newRangeLeft"
    newRangeLeft <- c(1, 1, 0)
    for ( i in seq_along( hatCuts[1:3] ) ) {
      newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[1:3][i],
                                           norm = newRangeLeft[i] ),
                  parent = NormContinuous )
    }

    ### derive hats in the middle -----------------------------------------------------------------------------
    if (length(hatTips) > 2) {
      #### define local variable "newRangeMiddle"
      newRangeMiddle <- c(0, 1, 0)
      for ( i in 1:(length(hatTips) - 2) ) {
        DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, "_", i+1, sep = ""),
                                                               dataType = "double", optype = "continuous" ),
                                    parent = LocalTransformations )
        NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                                   outliers = "asMissingValues" ),
                                      parent = DerivedField )
        for ( j in seq_along( hatTips[c(i, i+1, i+2)] ) ) {
          newXMLNode( "LinearNorm", attrs = c( orig = hatTips[c(i, i+1, i+2)][j],
                                               norm = newRangeMiddle[j] ),
                      parent = NormContinuous )
        }
      }
    }

    ### derive the right hat --------------------------------------------------------------------------------------------
    DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, "_", length(hatTips), sep = ""),
                                                           dataType = "double", optype = "continuous" ),
                                parent = LocalTransformations )
    NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                               outliers = "asMissingValues" ),
                                  parent = DerivedField )
    #### define local variable "newRangeRight"
    newRangeRight <- c(0, 1, 1)
    for ( i in seq_along( hatCuts[(length(hatCuts)-2):length(hatCuts)] ) ) {
      newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[(length(hatCuts)-2):length(hatCuts)][i],
                                           norm = newRangeRight[i] ),
                  parent = NormContinuous )
    }
  } else {
    DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, sep = ""),
                                                           dataType = "double", optype = "continuous" ),
                                parent = LocalTransformations )
    NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                               outliers = "asMissingValues" ),
                                  parent = DerivedField )
    newRange <- c(0, 1)
    for (i in seq_along(featureRange)) {
      newXMLNode( "LinearNorm", attrs = c( orig = featureRange[i], norm = newRange[i] ),
                  parent = NormContinuous )
    }
  }

  return(LocalTransformations)
}
hongqi0314/PRAuto.PMML documentation built on May 6, 2019, 11:30 a.m.