R/normCon_pmml.R

#' Function to generate PMML element that using 'NormContinuous'
#'
#'
#' @param featureName String of characters indicating the variable that is to be normalized.
#' @param featureRange Vector of two float numbers indicating range of the variable
#'                     to be normalized.
#' @param innerKnots Vector of float numbers representing critical values to generate bins,
#'                   output of running 'smbinning'.
#' @param newFieldName String of characters indicating origin of transformed features.
#' @param purpose String of characters indicating purpose of the transformation.
#'
#' @return PMML element 'DerivedField' with transformation 'NormContinuous'
#' @export
#'
#' @examples
normCon_pmml <- function(featureName, featureRange, innerKnots = featureRange, newFieldName = featureName, purpose = "norm") {

  ## define root element 'LocalTransformations'
  LocalTransformations <- newXMLNode("LocalTransformations")
  if (purpose == "hat") {
    ## combine innerKnots and featureRange
    hatCuts <- c(featureRange[1], innerKnots, featureRange[2])

    ### derive the left hat ----
    DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, "_", 1, sep = ""),
                                                           dataType = "double", optype = "continuous" ),
                                parent = LocalTransformations )
    NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                               outliers = "asMissingValues" ),
                                  parent = DerivedField )
    #### define local variable "newRangeLeft"
    newRangeLeft <- c(1, 1, 0)
    for ( i in seq_along( hatCuts[1:3] ) ) {
      newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[1:3][i],
                                           norm = newRangeLeft[i] ),
                  parent = NormContinuous )
    }

    ### derive hats in the middle ----
    if (length(innerKnots) > 2) {
      #### define local variable "newRangeMiddle"
      newRangeMiddle <- c(0, 1, 0)
      for ( i in 1:(length(innerKnots) - 2) ) {
        DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, "_", i+1, sep = ""),
                                                               dataType = "double", optype = "continuous" ),
                                    parent = LocalTransformations )
        NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                                   outliers = "asMissingValues" ),
                                      parent = DerivedField )
        for ( j in seq_along( innerKnots[c(i, i+1, i+2)] ) ) {
          newXMLNode( "LinearNorm", attrs = c( orig = innerKnots[c(i, i+1, i+2)][j],
                                               norm = newRangeMiddle[j] ),
                      parent = NormContinuous )
        }
      }
    }

    ### derive the right hat ----
    DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, "_", length(innerKnots), sep = ""),
                                                           dataType = "double", optype = "continuous" ),
                                parent = LocalTransformations )
    NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                               outliers = "asMissingValues" ),
                                  parent = DerivedField )
    #### define local variable "newRangeRight"
    newRangeRight <- c(0, 1, 1)
    for ( i in seq_along( hatCuts[(length(hatCuts)-2):length(hatCuts)] ) ) {
      newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[(length(hatCuts)-2):length(hatCuts)][i],
                                           norm = newRangeRight[i] ),
                  parent = NormContinuous )
    }
  } else {
    DerivedField <- newXMLNode( "DerivedField", attrs = c( name = paste(purpose, newFieldName, sep = ""),
                                                           dataType = "double", optype = "continuous" ),
                                parent = LocalTransformations )
    NormContinuous <- newXMLNode( "NormContinuous", attrs = c( field = featureName, mapMissingTo = 0,
                                                               outliers = "asMissingValues" ),
                                  parent = DerivedField )
    hatCuts <- unique(c(featureRange[1], innerKnots, featureRange[2]))
    if (length(hatCuts) < 3) {
      newRange <- c(0, 1)
      for (i in seq_along(hatCuts)) {
        newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[i], norm = newRange[i] ),
                    parent = NormContinuous )
      }
    } else {
      if (featureRange[1] < innerKnots[1]) {
        newRange <- c(0, 0, 1)
        for (i in seq_along(hatCuts)) {
          newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[i], norm = newRange[i] ),
                      parent = NormContinuous )
        }
      } else{
        newRange <- c(0, 1, 1)
        for (i in seq_along(hatCuts)) {
          newXMLNode( "LinearNorm", attrs = c( orig = hatCuts[i], norm = newRange[i] ),
                      parent = NormContinuous )
        }
      }
    }
  }

  return(LocalTransformations)
}
hongqi0314/PRAuto.PMML documentation built on May 6, 2019, 11:30 a.m.