pkgname <- "pmml"
source(file.path(R.home("share"), "R", "examples-header.R"))
options(warn = 1)
library('pmml')
base::assign(".oldSearch", base::search(), pos = 'CheckExEnv')
base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv')
cleanEx()
nameEx("add_attributes")
### * add_attributes
flush(stderr()); flush(stdout())
### Name: add_attributes
### Title: Add attribute values to an existing element in a given PMML
### file.
### Aliases: add_attributes
### Keywords: interface
### ** Examples
# Make a sample model:
fit <- lm(Sepal.Length ~ ., data = iris[, -5])
fit_pmml <- pmml(fit)
# Add arbitrary attributes to the 1st 'NumericPredictor' element. The
# attributes are for demostration only (they are not allowed under
# the PMML schema). The command assumes the default namespace.
fit_pmml_2 <- add_attributes(fit_pmml, "/p:PMML/descendant::p:NumericPredictor[1]",
attributes = c(a = 1, b = "b")
)
# Add attributes to the NumericPredictor element which has
# 'Petal.Length' as the 'name' attribute:
fit_pmml_3 <- add_attributes(fit_pmml,
"/p:PMML/descendant::p:NumericPredictor[@name='Petal.Length']",
attributes = c(a = 1, b = "b")
)
# 3 NumericElements exist which have '1' as the 'exponent' attribute.
# Add new attributes to the 3rd one:
fit_pmml_4 <- add_attributes(fit_pmml,
"/p:PMML/descendant::p:NumericPredictor[@exponent='1'][3]",
attributes = c(a = 1, b = "b")
)
# Add attributes to the 1st element whose 'name' attribute contains
# 'Length':
fit_pmml_5 <- add_attributes(fit_pmml,
"/p:PMML/descendant::p:NumericPredictor[contains(@name,'Length')]",
attributes = c(a = 1, b = "b")
)
cleanEx()
nameEx("add_data_field_attributes")
### * add_data_field_attributes
flush(stderr()); flush(stdout())
### Name: add_data_field_attributes
### Title: Add attribute values to an existing DataField element in a given
### PMML file
### Aliases: add_data_field_attributes
### Keywords: interface
### ** Examples
# Make a sample model:
fit <- lm(Sepal.Length ~ ., data = iris[, -5])
fit_pmml <- pmml(fit)
# The resulting model has mining fields with no information besides
# fieldName, dataType and optype. This object is already an xml
# node (not an external text file), so there is no need to convert
# it to an xml node object.
# Create data frame with attribute information:
attributes <- data.frame(c("FlowerWidth", 1), c("FlowerLength", 0),
stringsAsFactors = FALSE
)
rownames(attributes) <- c("displayName", "isCyclic")
colnames(attributes) <- c("Sepal.Width", "Petal.Length")
# Although not needed in this first try, necessary to easily add
# new values later. Removes values as factors so that new values
# added later are not evaluated as factor values and thus rejected
# as invalid.
attributes[] <- lapply(attributes, as.character)
fit_pmml_2 <- add_data_field_attributes(fit_pmml,
attributes,
namespace = "4_4"
)
# Alternative method to add attributes to a single field,
# "Sepal.Width":
fit_pmml_3 <- add_data_field_attributes(
fit_pmml, c(displayName = "FlowerWidth", isCyclic = 1),
"Sepal.Width"
)
mi <- make_intervals(
list("openClosed", "closedClosed", "closedOpen"),
list(NULL, 1, 2), list(1, 2, NULL)
)
mv <- make_values(
list("A", "B", "C"), list(NULL, NULL, NULL),
list("valid", NULL, "invalid")
)
fit_pmml_4 <- add_data_field_children(fit_pmml,
field = "Sepal.Length",
interval = mi, values = mv
)
cleanEx()
nameEx("add_data_field_children")
### * add_data_field_children
flush(stderr()); flush(stdout())
### Name: add_data_field_children
### Title: Add 'Interval' and 'Value' child elements to a given DataField
### element in a given PMML file.
### Aliases: add_data_field_children
### Keywords: interface
### ** Examples
# Make a sample model:
fit <- lm(Sepal.Length ~ ., data = iris[, -5])
fit_pmml <- pmml(fit)
# The resulting model has data fields but with no 'Interval' or Value'
# elements. This object is already an xml node (not an external text
# file), so there is no need to convert it to an xml node object.
# Add an 'Interval' element node by typing it in
fit_pmml_2 <- add_data_field_children(fit_pmml,
field = "Sepal.Length",
intervals = list(newXMLNode("Interval",
attrs = c(closure = "openClosed", rightMargin = 3)
))
)
# Use helper functions to create list of 'Interval' and 'Value'
# elements. We define the 3 Intervals as ,1] (1,2) and [2,
mi <- make_intervals(
list("openClosed", "openOpen", "closedOpen"),
list(NULL, 1, 2), list(1, 2, NULL)
)
# Define 3 values, none with a 'displayValue' attribute and 1 value
# defined as 'invalid'. The 2nd one is 'valid' by default.
mv <- make_values(
list(1.1, 2.2, 3.3), list(NULL, NULL, NULL),
list("valid", NULL, "invalid")
)
# As an example, apply these to the Sepal.Length field:
fit_pmml_3 <- add_data_field_children(fit_pmml, field = "Sepal.Length", intervals = mi, values = mv)
# Only defined 'Interval's:
fit_pmml_3 <- add_data_field_children(fit_pmml, field = "Sepal.Length", intervals = mi)
cleanEx()
nameEx("add_mining_field_attributes")
### * add_mining_field_attributes
flush(stderr()); flush(stdout())
### Name: add_mining_field_attributes
### Title: Add attribute values to an existing MiningField element in a
### given PMML file.
### Aliases: add_mining_field_attributes
### Keywords: interface
### ** Examples
# Make a sample model
fit <- lm(Sepal.Length ~ ., data = iris[, -5])
fit_pmml <- pmml(fit)
# The resulting model has mining fields with no information
# besides fieldName, dataType and optype. This object is
# already an xml node (not an external text file), so there
# is no need to convert it to an xml node object.
# Create data frame with attribute information:
attributes <- data.frame(
c("active", 1.1, "asIs"),
c("active", 2.2, "asIs"),
c("active", NA, "asMissing"),
stringsAsFactors = TRUE
)
rownames(attributes) <- c(
"usageType", "missingValueReplacement",
"invalidValueTreatment"
)
colnames(attributes) <- c(
"Sepal.Width", "Petal.Length",
"Petal.Width"
)
# Although not needed in this first try, necessary to easily
# add new values later:
for (k in 1:ncol(attributes)) {
attributes[[k]] <- as.character(attributes[[k]])
}
fit_pmml <- add_mining_field_attributes(fit_pmml, attributes, namespace = "4_4")
cleanEx()
nameEx("add_output_field")
### * add_output_field
flush(stderr()); flush(stdout())
### Name: add_output_field
### Title: Add Output nodes to a PMML object.
### Aliases: add_output_field
### ** Examples
# Load the standard iris dataset
data(iris)
# Create a linear model and convert it to PMML
mod <- lm(Sepal.Length ~ ., iris)
pmod <- pmml(mod)
# Create additional output nodes
onodes0 <- make_output_nodes(
name = list("OutputField", "OutputField"),
attributes = list(list(
name = "dbl",
optype = "continuous"
), NULL),
expression = list("ln(x)", "ln(x/(1-x))")
)
onodes2 <- make_output_nodes(
name = list("OutputField", "OutputField"),
attributes = list(
list(
name = "F1",
dataType = "double", optype = "continuous"
),
list(name = "F2")
)
)
# Create new pmml objects with the output nodes appended
pmod2 <- add_output_field(
xml_model = pmod, outputNodes = onodes2, at = "End",
xformText = NULL, nodeName = NULL, attributes = NULL,
whichOutput = 1
)
pmod2 <- add_output_field(
xml_model = pmod, outputNodes = onodes0, at = "End",
xformText = NULL, nodeName = NULL,
attributes = NULL, whichOutput = 1
)
# Create nodes with attributes and transformations
pmod3 <- add_output_field(xml_model = pmod2, outputNodes = onodes2, at = 2)
pmod4 <- add_output_field(
xml_model = pmod2, xformText = list("exp(x) && !x"),
nodeName = "Predicted_Sepal.Length"
)
att <- list(datype = "dbl", optpe = "dsc")
pmod5 <- add_output_field(
xml_model = pmod2, nodeName = "Predicted_Sepal.Length",
attributes = att
)
cleanEx()
nameEx("audit")
### * audit
flush(stderr()); flush(stdout())
### Name: audit
### Title: Audit: artificially constructed dataset
### Aliases: audit
### Keywords: datasets
### ** Examples
data(audit, package = "pmml")
cleanEx()
nameEx("file_to_xml_node")
### * file_to_xml_node
flush(stderr()); flush(stdout())
### Name: file_to_xml_node
### Title: Read in a file and parse it into an object of type XMLNode.
### Aliases: file_to_xml_node
### Keywords: interface
### ** Examples
## Not run:
##D # Define some transformations:
##D iris_box <- xform_wrap(iris)
##D iris_box <- xform_z_score(iris_box, xform_info = "column1->d1")
##D iris_box <- xform_z_score(iris_box, xform_info = "column2->d2")
##D
##D # Make a LocalTransformations element and save it to an external file:
##D pmml_trans <- pmml(NULL, transforms = iris_box)
##D write(toString(pmml_trans), file = "xform_iris.pmml")
##D
##D # Later, we may need to read in the PMML model into R
##D # 'lt' below is now a XML Node, as opposed to a string:
##D lt <- file_to_xml_node("xform_iris.pmml")
## End(Not run)
cleanEx()
nameEx("function_to_pmml")
### * function_to_pmml
flush(stderr()); flush(stdout())
### Name: function_to_pmml
### Title: Convert an R expression to PMML.
### Aliases: function_to_pmml
### ** Examples
# Operator precedence and parenthesis
func_pmml <- function_to_pmml("1 + 3/5 - (4 * 2)")
# Nested arbitrary functions
func_pmml <- function_to_pmml("foo(bar(x)) - bar(foo(y-z))")
# If-else expression
func_pmml <- function_to_pmml("if (x==3) { 3 } else { 0 }")
# If-else with boolean output
func_pmml <- function_to_pmml("if (x==3) { TRUE } else { FALSE }")
# Function with string argument types
func_pmml <- function_to_pmml("colors('red','green','blue')")
# Sign in front of expression
func_pmml <- function_to_pmml("-(x/y)")
cleanEx()
nameEx("houseVotes84")
### * houseVotes84
flush(stderr()); flush(stdout())
### Name: houseVotes84
### Title: Modified 1984 United States Congressional Voting Records
### Database
### Aliases: houseVotes84
### Keywords: datasets
### ** Examples
data(houseVotes84, package = "pmml")
cleanEx()
nameEx("make_intervals")
### * make_intervals
flush(stderr()); flush(stdout())
### Name: make_intervals
### Title: Create Interval elements, most likely to add to a DataDictionary
### element.
### Aliases: make_intervals
### ** Examples
# make 3 Interval elements
# we define the 3 Intervals as ,1] (1,2) and [2,
mi <- make_intervals(
list("openClosed", "openOpen", "closedOpen"),
list(NULL, 1, 2), list(1, 2, NULL)
)
cleanEx()
nameEx("make_output_nodes")
### * make_output_nodes
flush(stderr()); flush(stdout())
### Name: make_output_nodes
### Title: Add Output nodes to a PMML object.
### Aliases: make_output_nodes
### ** Examples
# Make two nodes, one with attributes
two_nodes <- make_output_nodes(
name = list("OutputField", "OutputField"),
attributes = list(list(name = "dbl", optype = "continuous"), NULL),
expression = list("ln(x)", "ln(x/(1-x))")
)
cleanEx()
nameEx("make_values")
### * make_values
flush(stderr()); flush(stdout())
### Name: make_values
### Title: Create Values element, most likely to add to a DataDictionary
### element.
### Aliases: make_values
### ** Examples
# define 3 values, none with a 'displayValue' attribute and 1 value
# defined as 'invalid'. The 2nd one is 'valid' by default.
mv <- make_values(
list(1.1, 2.2, 3.3), list(NULL, NULL, NULL),
list("valid", NULL, "invalid")
)
cleanEx()
nameEx("pmml.ARIMA")
### * pmml.ARIMA
flush(stderr()); flush(stdout())
### Name: pmml.ARIMA
### Title: Generate PMML for an ARIMA object the *forecast* package.
### Aliases: pmml.ARIMA
### ** Examples
library(forecast)
# non-seasonal model
data("WWWusage")
mod <- Arima(WWWusage, order = c(3, 1, 1))
mod_pmml <- pmml(mod)
# seasonal model
data("JohnsonJohnson")
mod_02 <- Arima(JohnsonJohnson,
order = c(1, 1, 1),
seasonal = c(1, 1, 1)
)
mod_02_pmml <- pmml(mod_02)
# non-seasonal model exported as StateSpaceModel
data("WWWusage")
mod <- Arima(WWWusage, order = c(3, 1, 1))
mod_pmml <- pmml(mod, ts_type = "statespace")
cleanEx()
nameEx("pmml")
### * pmml
flush(stderr()); flush(stdout())
### Name: pmml
### Title: Generate the PMML representation for R objects.
### Aliases: pmml
### ** Examples
# Build an lm model
iris_lm <- lm(Sepal.Length ~ ., data = iris)
# Convert to pmml
iris_lm_pmml <- pmml(iris_lm)
# Create a data transformation object
iris_trans <- xform_wrap(iris)
# Transform the 'Sepal.Length' variable
iris_trans <- xform_min_max(iris_trans, xform_info = "column1->d_sl")
# Output the tranformation in PMML format
iris_trans_pmml <- pmml(NULL, transforms = iris_trans)
cleanEx()
nameEx("pmml.ada")
### * pmml.ada
flush(stderr()); flush(stdout())
### Name: pmml.ada
### Title: Generate the PMML representation for an ada object from the
### package 'ada'.
### Aliases: pmml.ada
### ** Examples
library(ada)
data(audit)
fit <- ada(Adjusted ~ Employment + Education + Hours + Income, iter = 3, audit)
fit_pmml <- pmml(fit)
cleanEx()
nameEx("pmml.cv.glmnet")
### * pmml.cv.glmnet
flush(stderr()); flush(stdout())
### Name: pmml.cv.glmnet
### Title: Generate the PMML representation for a cv.glmnet object from the
### package 'glmnet'.
### Aliases: pmml.cv.glmnet
### ** Examples
library(glmnet)
# Create a simple predictor (x) and response(y) matrices:
x <- matrix(rnorm(100 * 20), 100, 20)
y <- rnorm(100)
# Build a simple gaussian model:
model1 <- cv.glmnet(x, y)
# Output the model in PMML format:
model1_pmml <- pmml(model1)
# Shift y between 0 and 1 to create a poisson response:
y <- y - min(y)
# Give the predictor variables names (default values are V1,V2,...):
name <- NULL
for (i in 1:20) {
name <- c(name, paste("variable", i, sep = ""))
}
colnames(x) <- name
# Create a simple poisson model:
model2 <- cv.glmnet(x, y, family = "poisson")
# Output the regression model in PMML format at the lambda
# parameter = 0.006:
model2_pmml <- pmml(model2, s = 0.006)
cleanEx()
nameEx("pmml.gbm")
### * pmml.gbm
flush(stderr()); flush(stdout())
### Name: pmml.gbm
### Title: Generate the PMML representation for a gbm object from the
### package 'gbm'.
### Aliases: pmml.gbm
### ** Examples
library(gbm)
data(audit)
mod <- gbm(Adjusted ~ .,
data = audit[, -c(1, 4, 6, 9, 10, 11, 12)],
n.trees = 3, interaction.depth = 4
)
mod_pmml <- pmml(mod)
# Classification example:
mod2 <- gbm(Species ~ .,
data = iris, n.trees = 2,
interaction.depth = 3, distribution = "multinomial"
)
# The PMML will include a regression model to read the gbm object outputs
# and convert to a "response" prediction type.
mod2_pmml <- pmml(mod2)
cleanEx()
nameEx("pmml.glm")
### * pmml.glm
flush(stderr()); flush(stdout())
### Name: pmml.glm
### Title: Generate the PMML representation for a glm object from the
### package 'stats'.
### Aliases: pmml.glm
### ** Examples
data(iris)
mod <- glm(Sepal.Length ~ ., data = iris, family = "gaussian")
mod_pmml <- pmml(mod)
rm(mod, mod_pmml)
data(audit)
mod <- glm(Adjusted ~ Age + Employment + Education + Income, data = audit, family = binomial(logit))
mod_pmml <- pmml(mod)
rm(mod, mod_pmml)
# Create a new 2-class target from a 3-class variable:
data(iris)
dat <- iris[, 1:4]
# Add a new 2-class target "Species_setosa" before passing it to glm():
dat$Species_setosa <- iris$Species == "setosa"
mod <- glm(Species_setosa ~ ., data = dat, family = binomial(logit))
mod_pmml <- pmml(mod)
rm(dat, mod, mod_pmml)
cleanEx()
nameEx("pmml.hclust")
### * pmml.hclust
flush(stderr()); flush(stdout())
### Name: pmml.hclust
### Title: Generate the PMML representation for a hclust object from the
### package 'amap'.
### Aliases: pmml.hclust
### ** Examples
# Cluster the 4 numeric variables of the iris dataset.
library(amap)
library(rattle)
model <- hclusterpar(iris[, -5])
# Get the information about the cluster centers. The last
# parameter of the function used is the number of clusters
# desired.
centerInfo <- centers.hclust(iris[, -5], model, 3)
# Convert to pmml
model_pmml <- pmml(model, centers = centerInfo)
cleanEx()
nameEx("pmml.iForest")
### * pmml.iForest
flush(stderr()); flush(stdout())
### Name: pmml.iForest
### Title: Generate PMML for an iForest object from the *isofor* package.
### Aliases: pmml.iForest
### ** Examples
## Not run:
##D
##D # Build iForest model using iris dataset. Create an isolation
##D # forest with 10 trees. Sample 30 data points at a time from
##D # the iris dataset to fit the trees.
##D library(isofor)
##D data(iris)
##D mod <- iForest(iris, nt = 10, phi = 30)
##D
##D # Convert to PMML:
##D mod_pmml <- pmml(mod)
## End(Not run)
cleanEx()
nameEx("pmml.kmeans")
### * pmml.kmeans
flush(stderr()); flush(stdout())
### Name: pmml.kmeans
### Title: Generate the PMML representation for a kmeans object from the
### package 'stats'.
### Aliases: pmml.kmeans
### ** Examples
ds <- rbind(
matrix(rnorm(100, sd = 0.3), ncol = 2),
matrix(rnorm(100, mean = 1, sd = 0.3), ncol = 2)
)
colnames(ds) <- c("Dimension1", "Dimension2")
cl <- kmeans(ds, 2)
cl_pmml <- pmml(cl)
cleanEx()
nameEx("pmml.ksvm")
### * pmml.ksvm
flush(stderr()); flush(stdout())
### Name: pmml.ksvm
### Title: Generate the PMML representation for a ksvm object from the
### package 'kernlab'.
### Aliases: pmml.ksvm
### ** Examples
# Train a support vector machine to perform classification.
library(kernlab)
model <- ksvm(Species ~ ., data = iris)
model_pmml <- pmml(model, dataset = iris)
cleanEx()
nameEx("pmml.lm")
### * pmml.lm
flush(stderr()); flush(stdout())
### Name: pmml.lm
### Title: Generate the PMML representation for an lm object from the
### package 'stats'.
### Aliases: pmml.lm
### ** Examples
fit <- lm(Sepal.Length ~ ., data = iris)
fit_pmml <- pmml(fit)
cleanEx()
nameEx("pmml.naiveBayes")
### * pmml.naiveBayes
flush(stderr()); flush(stdout())
### Name: pmml.naiveBayes
### Title: Generate the PMML representation for a naiveBayes object from
### the package 'e1071'.
### Aliases: pmml.naiveBayes
### ** Examples
library(e1071)
data(houseVotes84)
house <- na.omit(houseVotes84)
model <- naiveBayes(Class ~ V1 + V2 + V3, data = house, threshold = 0.003)
model_pmml <- pmml(model, dataset = house, predicted_field = "Class")
cleanEx()
nameEx("pmml.neighbr")
### * pmml.neighbr
flush(stderr()); flush(stdout())
### Name: pmml.neighbr
### Title: Generate PMML for a neighbr object from the *neighbr* package.
### Aliases: pmml.neighbr
### ** Examples
## Not run:
##D
##D # Continuous features with continuous target, categorical target,
##D # and neighbor ranking:
##D
##D library(neighbr)
##D data(iris)
##D
##D # Add an ID column to the data for neighbor ranking:
##D iris$ID <- c(1:150)
##D
##D # Train set contains all predicted variables, features, and ID column:
##D train_set <- iris[1:140, ]
##D
##D # Omit predicted variables or ID column from test set:
##D test_set <- iris[141:150, -c(4, 5, 6)]
##D
##D fit <- knn(
##D train_set = train_set, test_set = test_set,
##D k = 3,
##D categorical_target = "Species",
##D continuous_target = "Petal.Width",
##D comparison_measure = "squared_euclidean",
##D return_ranked_neighbors = 3,
##D id = "ID"
##D )
##D
##D fit_pmml <- pmml(fit)
##D
##D
##D # Logical features with categorical target and neighbor ranking:
##D
##D library(neighbr)
##D data("houseVotes84")
##D
##D # Remove any rows with N/A elements:
##D dat <- houseVotes84[complete.cases(houseVotes84), ]
##D
##D # Change all {yes,no} factors to {0,1}:
##D feature_names <- names(dat)[!names(dat) %in% c("Class", "ID")]
##D for (n in feature_names) {
##D levels(dat[, n])[levels(dat[, n]) == "n"] <- 0
##D levels(dat[, n])[levels(dat[, n]) == "y"] <- 1
##D }
##D
##D # Change factors to numeric:
##D for (n in feature_names) {
##D dat[, n] <- as.numeric(levels(dat[, n]))[dat[, n]]
##D }
##D
##D # Add an ID column for neighbor ranking:
##D dat$ID <- c(1:nrow(dat))
##D
##D # Train set contains features, predicted variable, and ID:
##D train_set <- dat[1:225, ]
##D
##D # Test set contains features only:
##D test_set <- dat[226:232, !names(dat) %in% c("Class", "ID")]
##D
##D fit <- knn(
##D train_set = train_set, test_set = test_set,
##D k = 5,
##D categorical_target = "Class",
##D comparison_measure = "jaccard",
##D return_ranked_neighbors = 3,
##D id = "ID"
##D )
##D
##D fit_pmml <- pmml(fit)
## End(Not run)
cleanEx()
nameEx("pmml.nnet")
### * pmml.nnet
flush(stderr()); flush(stdout())
### Name: pmml.nnet
### Title: Generate the PMML representation for a nnet object from package
### 'nnet'.
### Aliases: pmml.nnet
### ** Examples
library(nnet)
fit <- nnet(Species ~ ., data = iris, size = 4)
fit_pmml <- pmml(fit)
rm(fit)
cleanEx()
nameEx("pmml.randomForest")
### * pmml.randomForest
flush(stderr()); flush(stdout())
### Name: pmml.randomForest
### Title: Generate the PMML representation for a randomForest object from
### the package 'randomForest'.
### Aliases: pmml.randomForest
### ** Examples
# Build a randomForest model
library(randomForest)
iris_rf <- randomForest(Species ~ ., data = iris, ntree = 20)
# Convert to pmml
iris_rf_pmml <- pmml(iris_rf)
rm(iris_rf)
cleanEx()
nameEx("pmml.rfsrc")
### * pmml.rfsrc
flush(stderr()); flush(stdout())
### Name: pmml.rfsrc
### Title: Generate the PMML representation for an rfsrc object from the
### package 'randomForestSRC'.
### Aliases: pmml.rfsrc
### ** Examples
## Not run:
##D library(randomForestSRC)
##D
##D data(veteran)
##D
##D veteran_mod <- rfsrc(Surv(time, status) ~ .,
##D data = veteran,
##D ntree = 5, forest = TRUE,
##D membership = TRUE
##D )
##D
##D veteran_mod_pmml <- pmml(veteran_mod)
## End(Not run)
cleanEx()
nameEx("pmml.rpart")
### * pmml.rpart
flush(stderr()); flush(stdout())
### Name: pmml.rpart
### Title: Generate the PMML representation for an rpart object from the
### package 'rpart'.
### Aliases: pmml.rpart
### ** Examples
library(rpart)
fit <- rpart(Species ~ ., data = iris)
fit_pmml <- pmml(fit)
cleanEx()
nameEx("pmml.svm")
### * pmml.svm
flush(stderr()); flush(stdout())
### Name: pmml.svm
### Title: Generate the PMML representation of an svm object from the
### 'e1071' package.
### Aliases: pmml.svm
### ** Examples
## Not run:
##D library(e1071)
##D data(iris)
##D
##D # Classification with a polynomial kernel
##D fit <- svm(Species ~ ., data = iris, kernel = "polynomial")
##D fit_pmml <- pmml(fit)
##D
##D # Regression
##D fit <- svm(Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width, data = iris)
##D fit_pmml <- pmml(fit)
##D
##D # Anomaly detection with one-classification
##D fit <- svm(iris[, 1:4],
##D y = NULL,
##D type = "one-classification"
##D )
##D fit_pmml <- pmml(fit, dataset = iris[, 1:4])
##D
##D # Inlier detection with one-classification
##D fit <- svm(iris[, 1:4],
##D y = NULL,
##D type = "one-classification",
##D detect_anomaly = FALSE
##D )
##D fit_pmml <- pmml(fit, dataset = iris[, 1:4])
## End(Not run)
cleanEx()
nameEx("pmml.xgb.Booster")
### * pmml.xgb.Booster
flush(stderr()); flush(stdout())
### Name: pmml.xgb.Booster
### Title: Generate PMML for a xgb.Booster object from the package
### 'xgboost'.
### Aliases: pmml.xgb.Booster
### ** Examples
## Not run:
##D # Example using the xgboost package example model.
##D
##D library(xgboost)
##D data(agaricus.train, package = "xgboost")
##D data(agaricus.test, package = "xgboost")
##D
##D train <- agaricus.train
##D test <- agaricus.test
##D
##D model1 <- xgboost(
##D data = train$data, label = train$label,
##D max_depth = 2, eta = 1, nthread = 2,
##D nrounds = 2, objective = "binary:logistic"
##D )
##D
##D # Save the tree information in an external file:
##D xgb.dump(model1, "model1.dumped.trees")
##D
##D # Convert to PMML:
##D model1_pmml <- pmml(model1,
##D input_feature_names = colnames(train$data),
##D output_label_name = "prediction1",
##D output_categories = c("0", "1"),
##D xgb_dump_file = "model1.dumped.trees"
##D )
##D
##D # Multinomial model using iris data:
##D model2 <- xgboost(
##D data = as.matrix(iris[, 1:4]),
##D label = as.numeric(iris[, 5]) - 1,
##D max_depth = 2, eta = 1, nthread = 2, nrounds = 2,
##D objective = "multi:softprob", num_class = 3
##D )
##D
##D # Save the tree information in an external file:
##D xgb.dump(model2, "model2.dumped.trees")
##D
##D # Convert to PMML:
##D model2_pmml <- pmml(model2,
##D input_feature_names = colnames(as.matrix(iris[, 1:4])),
##D output_label_name = "Species",
##D output_categories = c(1, 2, 3), xgb_dump_file = "model2.dumped.trees"
##D )
## End(Not run)
cleanEx()
nameEx("rename_wrap_var")
### * rename_wrap_var
flush(stderr()); flush(stdout())
### Name: rename_wrap_var
### Title: Rename a variable in the xform_wrap transform object.
### Aliases: rename_wrap_var
### Keywords: manip methods utilities
### ** Examples
# Load the standard iris dataset, already built into R
data(iris)
# First wrap the data
iris_box <- xform_wrap(iris)
# We wish to refer to the variables "Sepal.Length" and
# "Sepal.Width" as "SL" and "SW"
iris_box <- rename_wrap_var(wrap_object = iris_box, xform_info = "column1->SL")
iris_box <- rename_wrap_var(wrap_object = iris_box, xform_info = "Sepal.Width->SW")
cleanEx()
nameEx("save_pmml")
### * save_pmml
flush(stderr()); flush(stdout())
### Name: save_pmml
### Title: Save a pmml object as an external PMML file.
### Aliases: save_pmml
### Keywords: interface
### ** Examples
## Not run:
##D # Make a gbm model:
##D library(gbm)
##D data(audit)
##D
##D mod <- gbm(Adjusted ~ .,
##D data = audit[, -c(1, 4, 6, 9, 10, 11, 12)],
##D n.trees = 3,
##D interaction.depth = 4
##D )
##D
##D # Export to PMML:
##D pmod <- pmml(mod)
##D
##D # Save to an external file:
##D save_pmml(pmod, "GBMModel.pmml")
## End(Not run)
cleanEx()
nameEx("xform_discretize")
### * xform_discretize
flush(stderr()); flush(stdout())
### Name: xform_discretize
### Title: Discretize a continuous variable as indicated by interval
### mappings in accordance with the PMML element *Discretize*.
### Aliases: xform_discretize
### Keywords: manip
### ** Examples
# First wrap the data
iris_box <- xform_wrap(iris)
## Not run:
##D # Convert the continuous variable "Sepal.Length" to a discrete
##D # variable "dsl". The intervals to be used for this transformation is
##D # given in a file, "intervals.csv", whose content is, for example,:
##D #
##D # 5],val1
##D # (5:6],22
##D # (6,val2
##D #
##D # This will be used to create a discrete variable named "dsl" of dataType
##D # "string" such that:
##D # if(Sepal.length <= 5) then dsl = "val1"
##D # if((Sepal.Lenght > 5) and (Sepal.Length <= 6)) then dsl = "22"
##D # if(Sepal.Length > 6) then dsl = "val2"
##D #
##D # Give "dsl" the value 0 if the input variable value is missing.
##D iris_box <- xform_discretize(iris_box,
##D xform_info = "[Sepal.Length -> dsl][double -> string]",
##D table = "intervals.csv", map_missing_to = "0"
##D )
## End(Not run)
# A different transformation using a list of data frames, of size 1:
t <- list()
m <- data.frame(rbind(
c(
"Petal.Length", "dis_pl", "leftInterval", "leftValue",
"rightInterval", "rightValue"
),
c(
"double", "integer", "string", "double", "string",
"double"
),
c("0)", 0, "open", NA, "Open", 0),
c(NA, 1, "closed", 0, "Open", 1),
c(NA, 2, "closed", 1, "Open", 2),
c(NA, 3, "closed", 2, "Open", 3),
c(NA, 4, "closed", 3, "Open", 4),
c("[4", 5, "closed", 4, "Open", NA)
), stringsAsFactors = TRUE)
# Give column names to make it look nice; not necessary!
colnames(m) <- c(
"Petal.Length", "dis_pl", "leftInterval", "leftValue",
"rightInterval", "rightValue"
)
# A textual representation of the data frame is:
# Petal.Length dis_pl leftInterval leftValue rightInterval rightValue
# 1 Petal.Length dis_pl leftInterval leftValue rightInterval rightValue
# 2 double integer string double string double
# 3 0) 0 open <NA> Open 0
# 4 <NA> 1 closed 0 Open 1
# 5 <NA> 2 closed 1 Open 2
# 6 <NA> 3 closed 2 Open 3
# 7 <NA> 4 closed 3 Open 4
# 8 (4 5 closed 4 Open <NA>
#
# This is a transformation that defines a derived field 'dis_pl'
# which has the integer value '0' if the original field
# 'Petal.Length' has a value less than 0. The derived field has a
# value '1' if the input is greater than or equal to 0 and less
# than 1. Note that the values of the 1st column after row 2 have
# been deliberately given NA values in the middle. This is to
# show that that column is meant for a textual representation of
# the transformation as defined for the method involving external
# files; however in this methodtheir values are not used.
# Add the data frame to a list. The default values and the missing
# values should be given as a vector, each element of the vector
# corresponding to the element at the same index in the list. If
# these values are not given as a vector, they will be used for the
# first list element only.
t[[1]] <- m
def <- c(11)
mis <- c(22)
iris_box <- xform_discretize(iris_box,
xform_info = t, default_value = def,
map_missing_to = mis
)
# Make a simple model to see the effect.
fit <- lm(Petal.Width ~ ., iris_box$data[, -5])
fit_pmml <- pmml(fit, transforms = iris_box)
cleanEx()
nameEx("xform_function")
### * xform_function
flush(stderr()); flush(stdout())
### Name: xform_function
### Title: Add a function transformation to a xform_wrap object.
### Aliases: xform_function
### ** Examples
# Load the standard iris dataset:
data(iris)
# Wrap the data:
iris_box <- xform_wrap(iris)
# Perform a transform on the Sepal.Length field:
# the value is squared and then divided by 100
iris_box <- xform_function(iris_box,
orig_field_name = "Sepal.Length",
new_field_name = "Sepal.Length.Transformed",
expression = "(Sepal.Length^2)/100"
)
# Combine two fields to create another new feature:
iris_box <- xform_function(iris_box,
orig_field_name = "Sepal.Width, Petal.Width",
new_field_name = "Width.Sum",
expression = "Sepal.Width + Sepal.Length"
)
# Create linear model using the derived features:
fit <- lm(Petal.Length ~
Sepal.Length.Transformed + Width.Sum, data = iris_box$data)
# Create pmml from the fit:
fit_pmml <- pmml(fit, transform = iris_box)
cleanEx()
nameEx("xform_map")
### * xform_map
flush(stderr()); flush(stdout())
### Name: xform_map
### Title: Implement a map between discrete values in accordance with the
### PMML element *MapValues*.
### Aliases: xform_map
### Keywords: manip
### ** Examples
# Load the standard audit dataset, part of the pmml package:
data(audit)
# First wrap the data:
audit_box <- xform_wrap(audit)
## Not run:
##D # One of the variables, "Sex", has 2 possible values: "Male"
##D # and "Female". If these string values have to be mapped to a
##D # numeric value, a file has to becreated, say "MapGender.csv"
##D # whose content is, for example:
##D #
##D # Male,1
##D # Female,2
##D #
##D # Transform the variable "Gender" to a variable "d_gender"
##D # such that:
##D # if Sex = "Male" then d_sex = "1"
##D # if Sex = "Female" then d_sex = "0"
##D #
##D # Give "d_sex" the value 0 if the input variable value is
##D # missing.
##D audit_box <- xform_map(audit_box,
##D xform_info = "[Sex -> d_sex][string->integer]",
##D table = "MapGender.csv", map_missing_to = "0"
##D )
## End(Not run)
# Same as above, with an extra variable, but using data frames.
# The top 2 rows gives the variable names and their data types.
# The rest represent the map. So for example, the third row
# indicates that when the input variable "Sex" has the value
# "Male" and the input variable "Employment" has
# the value "PSLocal", the output variable "d_sex" should have
# the value 1.
t <- list()
m <- data.frame(
c("Sex", "string", "Male", "Female"),
c("Employment", "string", "PSLocal", "PSState"),
c("d_sex", "integer", 1, 0),
stringsAsFactors = TRUE
)
t[[1]] <- m
# Give default value as a vector and missing value as a string,
# this is only possible as there is only one map defined. If
# default values is not given, it will simply not be given in
# the PMML file as well. In general, the default values and the
# missing values should be given as a vector, each element of
# the vector corresponding to the element at the same index in
# the list. If these values are not given as a vector, they will
# be used for the first list element only.
audit_box <- xform_map(audit_box,
xform_info = t, default_value = c(3),
map_missing_to = "2"
)
# check what the pmml looks like
fit <- lm(Adjusted ~ ., data = audit_box$data)
fit_pmml <- pmml(fit, transforms = audit_box)
cleanEx()
nameEx("xform_min_max")
### * xform_min_max
flush(stderr()); flush(stdout())
### Name: xform_min_max
### Title: Normalize continuous values in accordance with the PMML element
### *NormContinuous*.
### Aliases: xform_min_max
### Keywords: manip
### ** Examples
# Load the standard iris dataset:
data(iris)
# First wrap the data:
iris_box <- xform_wrap(iris)
# Normalize all numeric variables of the loaded iris dataset to lie
# between 0 and 1. These would normalize "Sepal.Length", "Sepal.Width",
# "Petal.Length", "Petal.Width" to the 4 new derived variables named
# derived_Sepal.Length, derived_Sepal.Width, derived_Petal.Length,
# derived_Petal.Width.
iris_box_1 <- xform_min_max(iris_box)
# Normalize the 1st column values of the dataset (Sepal.Length) to lie
# between 0 and 1 and give the derived variable the name "dsl"
iris_box_1 <- xform_min_max(iris_box, xform_info = "column1 -> dsl")
# Repeat the above operation; adding the new transformed variable to
# the iris_box object
iris_box <- xform_min_max(iris_box, xform_info = "column1 -> dsl")
# Transform Sepal.Width(the 2nd column)
# The new transformed variable will be given the default name
# "derived_Sepal.Width"
iris_box_3 <- xform_min_max(iris_box, xform_info = "column2")
# Repeat the same operation as above, this time using the variable name
iris_box_4 <- xform_min_max(iris_box, xform_info = "Sepal.Width")
# Repeat the same operation as above, assign the transformed variable,
# "derived_Sepal.Width". the value of 0.5 if the input value of the
# "Sepal.Width" variable is missing
iris_box_5 <- xform_min_max(iris_box, xform_info = "Sepal.Width", "map_missing_to=0.5")
# Transform Sepal.Width(the 2nd column) to lie between 2 and 3.
# The new transformed variable will be given the default name
# "derived_Sepal.Width"
iris_box_6 <- xform_min_max(iris_box, xform_info = "column2->[2,3]")
# Repeat the above transformation, this time the transformed variable
# lies between 0 and 10
iris_box_7 <- xform_min_max(iris_box, xform_info = "column2->[,10]")
cleanEx()
nameEx("xform_norm_discrete")
### * xform_norm_discrete
flush(stderr()); flush(stdout())
### Name: xform_norm_discrete
### Title: Normalize discrete values in accordance with the PMML element
### *NormDiscrete*.
### Aliases: xform_norm_discrete
### Keywords: manip
### ** Examples
# Load the standard iris dataset, already available in R
data(iris)
# First wrap the data
iris_box <- xform_wrap(iris)
# Discretize the "Species" variable. This will find all possible
# values of the "Species" variable and define new variables. The
# parameter name used here should be replaced by the new preferred
# parameter name as shown in the next example below.
#
# "Species_setosa" such that it is 1 if
# "Species" equals "setosa", else 0;
# "Species_versicolor" such that it is 1 if
# "Species" equals "versicolor", else 0;
# "Species_virginica" such that it is 1 if
# "Species" equals "virginica", else 0
iris_box <- xform_norm_discrete(iris_box, input_var = "Species")
# Exact same operation performed with a different parameter name.
# Use of this new parameter is the preferred method as the previous
# parameter will be deprecated soon.
iris_box <- xform_wrap(iris)
iris_box <- xform_norm_discrete(iris_box, xform_info = "Species")
cleanEx()
nameEx("xform_wrap")
### * xform_wrap
flush(stderr()); flush(stdout())
### Name: xform_wrap
### Title: Wrap data in a data transformations object.
### Aliases: xform_wrap
### ** Examples
# Load the standard iris dataset
data(iris)
# Make a object for the iris dataset to use with
# transformation functions
iris_box <- xform_wrap(iris)
# Output only the transformations in PMML format.
# This example will output just an empty "LocalTransformations"
# element as no transformations were performed.
trans_pmml <- pmml(NULL, transforms = iris_box)
# The following will also work
trans_pmml_2 <- pmml(, transforms = iris_box)
cleanEx()
nameEx("xform_z_score")
### * xform_z_score
flush(stderr()); flush(stdout())
### Name: xform_z_score
### Title: Perform a z-score normalization on continuous values in
### accordance with the PMML element *NormContinuous*.
### Aliases: xform_z_score
### Keywords: manip methods utilities
### ** Examples
# Load the standard iris dataset, already built into R
data(iris)
# First wrap the data
iris_box <- xform_wrap(iris)
# Perform a z-transform on all numeric variables of the loaded
# iris dataset. These would be Sepal.Length, Sepal.Width,
# Petal.Length, and Petal.Width. The 4 new derived variables
# will be named derived_Sepal.Length, derived_Sepal.Width,
# derived_Petal.Length, and derived_Petal.Width
iris_box_1 <- xform_z_score(iris_box)
# Perform a z-transform on the 1st column of the dataset (Sepal.Length)
# and give the derived variable the name "dsl"
iris_box_2 <- xform_z_score(iris_box, xform_info = "column1 -> dsl")
# Repeat the above operation; adding the new transformed variable
# to the iris_box object
iris_box <- xform_z_score(iris_box, xform_info = "column1 -> dsl")
# Transform Sepal.Width(the 2nd column)
# The new transformed variable will be given the default name
# "derived_Sepal.Width"
iris_box_3 <- xform_z_score(iris_box, xform_info = "column2")
# Repeat the same operation as above, this time using the variable
# name
iris_box_4 <- xform_z_score(iris_box, xform_info = "Sepal.Width")
# Repeat the same operation as above, assign the transformed variable
# "derived_Sepal.Width". The value of 1.0 if the input value of the
# "Sepal.Width" variable is missing. Add the new information to the
# iris_box object.
iris_box <- xform_z_score(iris_box,
xform_info = "Sepal.Width",
"map_missing_to=1.0"
)
### * <FOOTER>
###
cleanEx()
options(digits = 7L)
base::cat("Time elapsed: ", proc.time() - base::get("ptime", pos = 'CheckExEnv'),"\n")
grDevices::dev.off()
###
### Local variables: ***
### mode: outline-minor ***
### outline-regexp: "\\(> \\)?### [*]+" ***
### End: ***
quit('no')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.