tests/testthat/test_pmml_integration_other.R

data(iris)
data(audit)
data("WWWusage")
data("AirPassengers")
data("USAccDeaths")
data("JohnsonJohnson")
data("sunspots")
audit_factor <- audit
audit_factor[, 13] <- as.factor(audit_factor[, 13])
iris_p <- read.csv("iris.csv", stringsAsFactors = TRUE)
audit <- na.omit(audit)
elnino <- read.csv("elnino.csv", stringsAsFactors = TRUE)
heart <- read.csv("heart.csv", stringsAsFactors = TRUE)
glm_issue3543_data <- read.csv("glm_issue3543_data.csv", stringsAsFactors = TRUE)
credit_class <- read.csv("credit_class.csv", stringsAsFactors = TRUE)
covtype2 <- read.csv("covtype2.csv", header = TRUE, stringsAsFactors = TRUE)
credit <- read.csv("credit.csv", stringsAsFactors = TRUE)
credit_class_01 <- read.csv("credit_class_01.csv", stringsAsFactors = TRUE)
audit_nor_logical <- na.omit(read.csv("audit_nor_logical.csv", stringsAsFactors = TRUE))
audit_nor <- na.omit(read.csv("audit_nor.csv", stringsAsFactors = TRUE))
audit_nor_fake_logical <- na.omit(read.csv("audit_nor_fake_logical.csv", stringsAsFactors = TRUE))
random_data_small <- read.csv("random_data_small.csv", stringsAsFactors = TRUE)
iris_nor <- read.csv("iris_nor.csv", stringsAsFactors = TRUE)
bank <- na.omit(read.csv("bank.csv", stringsAsFactors = TRUE))
audit_r_build_in <- na.omit(read.csv("audit_r_build_in.csv", stringsAsFactors = TRUE))
insurance <- na.omit(read.csv("insurance.csv", stringsAsFactors = TRUE))
iris_bin <- read.csv("iris_bin.csv", stringsAsFactors = TRUE)
house_votes <- na.omit(read.csv("house_votes_84.csv", stringsAsFactors = TRUE))
iris_mini_dot <- read.csv("iris_mini_dot.csv", stringsAsFactors = TRUE)
petfood <- read.csv("petfood.csv", stringsAsFactors = TRUE)
job_cat <- read.csv("job_cat.csv", stringsAsFactors = TRUE)
job_cat_index <- read.csv("job_cat_index.csv", stringsAsFactors = TRUE)
iris_nor_logical <- read.csv("iris_nor_logical.csv", stringsAsFactors = TRUE)
factor_40k <- read.csv("factor_40k.csv", stringsAsFactors = TRUE)
numeric_10k <- na.omit(read.csv("numeric_10k.csv", stringsAsFactors = TRUE))
factor_10k <- read.csv("factor_10k.csv", stringsAsFactors = TRUE)
numeric_no_na_10k <- read.csv("numeric_no_na_10k.csv", stringsAsFactors = TRUE)


forecast_with_cpi <- function(model, h) {
  # create data frame of point forecast and CPI
  pr <- forecast(model, h)
  dframe <- data.frame(
    "Predicted_ts_value" = as.numeric(pr$mean),
    "cpi_80_lower" = as.numeric(pr$lower[, 1]),
    "cpi_80_upper" = as.numeric(pr$upper[, 1]),
    "cpi_95_lower" = as.numeric(pr$lower[, 2]),
    "cpi_95_upper" = as.numeric(pr$upper[, 2])
  )
  return(dframe)
}

expect_equal_df <- function(z_pred, r_pred) {
  # expect_equal for data frames with point and CPI

  # rearrange z_pred columns into same order as that of r_pred
  z_pred <- z_pred[c(
    "Predicted_ts_value", "cpi_80_lower",
    "cpi_80_upper", "cpi_95_lower", "cpi_95_upper"
  )]

  expect_equal_nn(z_pred, r_pred)
}

expect_equal_df_2 <- function(z_pred_out, r_pred) {
  # expect_equal for data frames with point and CPI where Zementis output is a JSON string.

  z_pred_out <- z_pred_out[NROW(z_pred_out), ] # only use the last row

  # rearrange z_pred columns into same order as that of r_pred;
  # using names(r_pred) accounts for names with different CPI
  z_pred_out <- z_pred_out[names(r_pred)]

  z_pred_transf <- data.frame(matrix(NA,
    nrow = NCOL(z_pred_out[[1]]),
    ncol = NCOL(r_pred)
  ), stringsAsFactors = TRUE)
  colnames(z_pred_transf) <- names(r_pred)

  for (x in names(r_pred)) {
    z_pred_transf[x] <- as.numeric(t(z_pred_out[[x]]))
  }

  expect_equal_nn(z_pred_transf, r_pred)
}

expect_equal_nn <- function(...) {
  # expect_equal without name checking
  expect_equal(..., check.names = FALSE)
}

svm_ad_predict <- function(fit, newdata) {
  # For e1071::svm anomaly detection models, format the output correctly for matching R with Zementis Server.
  # Output "svm_predict_anomaly" and "anomaly_score" fields for comparison with Zementis Server.
  preds <- predict(fit, newdata = newdata, decision.values = TRUE)
  svm_predict_anomaly <- as.logical(preds)
  anomaly_score <- attr(preds, "decision.values")
  preds_df <- data.frame(svm_predict_anomaly, anomaly_score, stringsAsFactors = FALSE)
  names(preds_df) <- c("svm_predict_anomaly", "anomaly_score")
  return(preds_df)
}

single_col_h_df <- function(h) {
  # For ARIMA models, create a data frame with a single column of h (number of steps ahead) values.
  dframe <- data.frame("h" = c(1:h), stringsAsFactors = TRUE)
  return(dframe)
}

h_20 <- single_col_h_df(20)
h_20_one_line <- data.frame("h" = c(20))

# # Temp files for xgboost
# xgb_tmp_01_save <- tempfile()
# xgb_tmp_01_dump <- tempfile()
#
#
# teardown(unlink(c(xgb_tmp_01_save, xgb_tmp_01_dump), recursive = TRUE))

test_that("AnomalyDetectionModel/iForest PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("isofor")
  library(isofor)

  anomaly_threshold <- 0.6
  fit <- iForest(iris, nt = 7, phi = 30)
  p_fit <- pmml(fit, anomaly_threshold = anomaly_threshold)
  r_pred <- predict(fit, iris)
  r_pred_anomaly <- r_pred >= anomaly_threshold
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$anomalyScore, r_pred, tolerance = 1e-5)
  expect_equal_nn(z_pred$outputs$anomaly, r_pred_anomaly)

  anomaly_threshold <- 0.5
  fit <- iForest(as.matrix(iris[, 1:4]), nt = 6, phi = 27)
  p_fit <- pmml(fit, anomaly_threshold = anomaly_threshold)
  r_pred <- predict(fit, as.matrix(iris[, 1:4]))
  r_pred_anomaly <- r_pred >= anomaly_threshold
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris[, 1:4], up_stat$model_name) # cannot use matrix, must be data frame or file
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$anomalyScore, r_pred, tolerance = 1e-5)
  expect_equal_nn(z_pred$outputs$anomaly, r_pred_anomaly)


  anomaly_threshold <- 0.4
  box_obj <- xform_wrap(audit[, -1])
  box_obj <- xform_norm_discrete(box_obj, xform_info = "Sex")
  box_obj <- xform_function(box_obj, orig_field_name = "Age,Hours", new_field_name = "Agrs", expression = "Age/Hours")
  fit <- iForest(box_obj$data[, -c(1, 7, 9, 10)], nt = 5, phi = 420)
  p_fit <- pmml(fit, transforms = box_obj, anomaly_threshold = anomaly_threshold)
  r_pred <- predict(fit, box_obj$data[, -c(1, 7, 9, 10)])
  r_pred_anomaly <- r_pred >= anomaly_threshold
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$anomalyScore, r_pred)
  expect_equal_nn(z_pred$outputs$anomaly, r_pred_anomaly)
})


test_that("ClusteringModel/stats kmeans PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  
  skip_if_not_installed("clue")
  library(clue)

  fit <- kmeans(audit[, c(2, 7, 9, 10, 12)], 2)
  p_fit <- pmml(fit)
  r_pred <- sprintf("%.0f", cl_predict(fit, audit[, c(2, 7, 9, 10, 12)]))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit[, c(2, 7, 9, 10, 12)], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  fit <- kmeans(iris[, 1:4], 3)
  p_fit <- pmml(fit)
  r_pred <- sprintf("%.0f", cl_predict(fit, iris[, 1:4]))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, "column1->d1")
  box_obj <- xform_z_score(box_obj, "column2->d2")
  box_obj <- xform_z_score(box_obj, "column3->d3")
  box_obj <- xform_z_score(box_obj, "column4->d4")
  box_obj <- xform_min_max(box_obj, "d1->dd1")
  box_obj <- xform_min_max(box_obj, "d2->dd2")
  box_obj <- xform_min_max(box_obj, "d3->dd3")
  box_obj <- xform_min_max(box_obj, "d4->dd4")
  box_obj <- xform_z_score(box_obj, "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, "dd4->ddd4")
  fit <- kmeans(box_obj$data[, 14:17], 3)
  p_fit <- pmml(fit, transform = box_obj)
  r_pred <- sprintf("%.0f", cl_predict(fit, data = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_map(box_obj, xform_info = "[Species->d_Species][string->double]", table = "iris_class_table.csv", default_value = "-1", map_missing_to = "1")
  box_obj <- xform_norm_discrete(box_obj, input_var = "Species")
  fit <- kmeans(box_obj$data[, 14:21], 3)
  p_fit <- pmml(fit, transform = box_obj)
  r_pred <- sprintf("%.0f", cl_predict(fit, data = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_map(box_obj, xform_info = "[Species->d_Species][string->double]", table = "iris_class_full_name_table.csv", default_value = "-1", map_missing_to = "1")
  box_obj <- xform_norm_discrete(box_obj, input_var = "Species")
  fit <- kmeans(box_obj$data[, 14:21], 3)
  p_fit <- pmml(fit, transform = box_obj)
  r_pred <- sprintf("%.0f", cl_predict(fit, data = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)
})


test_that("GeneralRegressionModel/glmnet PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("glmnet")
  library(glmnet)

  fit <- cv.glmnet(data.matrix(audit[, c(2, 7, 9:10)]), data.matrix(audit[, 13]))
  p_fit <- pmml(fit)
  r_pred <- as.vector(predict(fit, data.matrix(audit[, c(2, 7, 9:10)])))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit[, c(2, 7, 9:10)], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  fit <- cv.glmnet(data.matrix(iris[1:4]), data.matrix(iris[5]))
  p_fit <- pmml(fit)
  r_pred <- as.vector(predict(fit, data.matrix(data.matrix(iris[1:4]))))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris[1:4], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  fit <- cv.glmnet(data.matrix(elnino[1:6]), data.matrix(elnino[7]), family = "poisson")
  p_fit <- pmml(fit)
  r_pred <- as.vector(predict(fit, data.matrix(elnino[1:6])))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(elnino[1:6], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  fit <- cv.glmnet(data.matrix(elnino[1:6]), data.matrix(elnino[7]))
  p_fit <- pmml(fit)
  r_pred <- as.vector(predict(fit, data.matrix(elnino[1:6])))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(elnino[1:6], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  box_obj <- xform_wrap(elnino)
  rownames(box_obj$field_data)[7] <- "predictedScore"
  box_obj$field_data["predictedScore", "dataType"] <- "numeric"
  names(box_obj$data)[7] <- "predictedScore"
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_z_score(box_obj, xform_info = "column5->d5")
  box_obj <- xform_z_score(box_obj, xform_info = "column6->d6")
  box_obj <- xform_z_score(box_obj, xform_info = "column7->d7")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_min_max(box_obj, xform_info = "d5->dd5")
  box_obj <- xform_min_max(box_obj, xform_info = "d6->dd6")
  box_obj <- xform_min_max(box_obj, xform_info = "d7->dd7")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd5->ddd5")
  box_obj <- xform_z_score(box_obj, xform_info = "dd6->ddd6")
  box_obj <- xform_z_score(box_obj, xform_info = "dd7->ddd7")
  x <- data.matrix(box_obj$data[1:6])
  y <- data.matrix(box_obj$data[7])
  fit <- cv.glmnet(x, y)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.numeric(predict(fit, x))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(elnino, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_map(box_obj,
    xform_info = "[Species->d_Species][string->double]",
    table = "iris_class_full_name_table.csv", default_value = "-1", map_missing_to = "1"
  )
  x <- data.matrix(box_obj$data[, c(1:3, 6)])
  y <- data.matrix(box_obj$data[4])
  fit <- cv.glmnet(x, y)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.numeric(predict(fit, x))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  box_obj <- xform_wrap(elnino)
  box_obj <- rename_wrap_var(wrap_object = box_obj, xform_info = "temp->predictedScore")
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_z_score(box_obj, xform_info = "column5->d5")
  box_obj <- xform_z_score(box_obj, xform_info = "column6->d6")
  box_obj <- xform_z_score(box_obj, xform_info = "column7->d7")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_min_max(box_obj, xform_info = "d5->dd5")
  box_obj <- xform_min_max(box_obj, xform_info = "d6->dd6")
  box_obj <- xform_min_max(box_obj, xform_info = "d7->dd7")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd5->ddd5")
  box_obj <- xform_z_score(box_obj, xform_info = "dd6->ddd6")
  box_obj <- xform_z_score(box_obj, xform_info = "dd7->ddd7")
  x <- data.matrix(box_obj$data[1:6])
  y <- data.matrix(box_obj$data[7])
  fit <- cv.glmnet(x, y)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.numeric(predict(fit, x))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(elnino, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  x <- data.matrix(iris_p[, c(1:3, 5)])
  y <- data.matrix(iris_p[4])
  box_obj <- xform_wrap(x)
  box_obj <- xform_map(box_obj,
    xform_info = "[class->d_class][string->double]",
    table = "iris_class_index_table.csv", default_value = "-1", map_missing_to = "1"
  )
  box_obj <- xform_norm_discrete(box_obj, input_var = "class")
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_length->dis_pl][double->integer]",
    table = "iris_discretize_pl.csv", map_missing_to = "0", default_value = "1"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_width->dis_pw][double->integer]",
    table = "iris_discretize_pw.csv", map_missing_to = "0", default_value = "1"
  )
  fit <- cv.glmnet(as.matrix(box_obj$data), y)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.numeric(predict(fit, as.matrix(box_obj$data)))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(as.data.frame(x, stringsAsFactors = TRUE), up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)


  x <- data.frame(replicate(20, rnorm(1000)))
  y <- rnorm(1000)
  box_obj <- xform_wrap(x)
  box_obj <- xform_min_max(box_obj, xform_info = "column1->d_X1")
  box_obj <- xform_min_max(box_obj, xform_info = "X2->d_X2")
  box_obj <- xform_min_max(box_obj, xform_info = "X3->myDerived_X3[10,20]")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d_X4")
  box_obj <- xform_z_score(box_obj, xform_info = "X5->d_X5")

  fit <- cv.glmnet(as.matrix(box_obj$data), y)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.numeric(predict(fit, as.matrix(box_obj$data)))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(as.data.frame(x, stringsAsFactors = TRUE), up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$predictedValue, r_pred)
})


test_that("GeneralRegressionModel/stats PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)

  # suppress warning: "glm.fit: fitted probabilities numerically 0 or 1 occurred"
  suppressWarnings(fit <- glm(
    formula = as.factor(Adjusted) ~ Age + Employment + Education + Marital + Occupation + Income + Sex + Deductions + Hours,
    family = binomial(link = logit), audit
  ))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred)


  fit <- glm(Out ~ ., data = glm_issue3543_data)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, glm_issue3543_data)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(glm_issue3543_data, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Out, r_pred)


  suppressWarnings(fit <- glm(
    formula = as.factor(Adjusted) ~ Age + Employment + Education + Marital + Occupation + Income + Sex + Deductions + Hours,
    family = binomial(link = logit), audit
  ))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred)


  fit <- glm(formula = target ~ A1 + A2 + A3, family = binomial(link = logit), data = credit_class)
  p_fit <- pmml(fit)
  r_pred <- unname(predict(fit, credit_class, type = "response"))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit_class, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_pos, r_pred, tolerance = 1e-4)


  fit <- glm(
    formula = Income ~ Age + Employment + Education + Marital + Occupation + Sex + Hours,
    family = Gamma(link = inverse), audit_nor
  )
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit_nor, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Income, r_pred)


  fit <- glm(
    formula = Adjusted ~ Age + Employment + Education + Marital + Occupation + Income + Sex + Deductions + Hours,
    family = gaussian(link = identity), audit
  )
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  fit <- glm(formula = as.factor(fbs) ~ ., family = binomial(link = logit), heart)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, heart, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(heart, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred)


  suppressWarnings(fit <- glm(
    formula = Adjusted ~ Age + Employment + Education + Marital + Occupation + Income + Sex + Deductions + Hours,
    family = poisson(link = log), audit
  ))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  iris_binom <- iris
  iris_binom$y <- I(iris$Species == "setosa")
  # suppress warning: "glm.fit: algorithm did not converge"
  suppressWarnings(fit <- glm(y ~ ., data = iris_binom, family = binomial))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, iris_binom, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_binom, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_y, r_pred)


  box_obj <- xform_wrap(audit)
  box_obj <- xform_z_score(box_obj, "column2->d_Age")
  box_obj <- xform_z_score(box_obj, "column7->d_Income")
  box_obj <- xform_z_score(box_obj, "column9->d_Deductions")
  box_obj <- xform_z_score(box_obj, "column10->d_Hours")
  box_obj <- xform_min_max(box_obj, "d_Age->dd_Age")
  box_obj <- xform_min_max(box_obj, "d_Income->dd_Income")
  box_obj <- xform_min_max(box_obj, "d_Deductions->dd_Deductions")
  box_obj <- xform_min_max(box_obj, "d_Hours->dd_Hours")
  box_obj <- xform_z_score(box_obj, "dd_Age->ddd_Age")
  box_obj <- xform_z_score(box_obj, "dd_Income->ddd_Income")
  box_obj <- xform_z_score(box_obj, "dd_Deductions->ddd_Deductions")
  box_obj <- xform_z_score(box_obj, "dd_Hours->ddd_Hours")
  suppressWarnings(fit <- glm(
    formula = as.factor(Adjusted) ~ ddd_Age + Employment +
      Education + Marital + Occupation + ddd_Income + Sex + ddd_Deductions + ddd_Hours,
    family = binomial(link = logit), box_obj$data
  ))
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- predict(fit, box_obj$data, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred)


  audit$Adjusted <- as.factor(audit$Adjusted)
  box_obj <- xform_wrap(audit)
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d_Age")
  box_obj <- xform_z_score(box_obj, xform_info = "column7->d_Income")
  box_obj <- xform_z_score(box_obj, xform_info = "column9->d_Deductions")
  box_obj <- xform_z_score(box_obj, xform_info = "column10->d_Hours")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Age->dd_Age")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Income->dd_Income")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Deductions->dd_Deductions")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Hours->dd_Hours")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Age->ddd_Age")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Income->ddd_Income")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Deductions->ddd_Deductions")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Hours->ddd_Hours")
  box_obj <- xform_norm_discrete(box_obj, input_var = "Employment")
  box_obj <- xform_map(box_obj,
    xform_info = "[Marital-> d_Marital][string->double]",
    table = "audit_marital_table.csv", default_value = "-1", map_missing_to = "1"
  )

  suppressWarnings(fit <- glm(
    formula = Adjusted ~ ddd_Age + ddd_Income + ddd_Deductions +
      ddd_Hours + d_Marital + Employment_Private + Employment_Consultant +
      Employment_SelfEmp + Employment_PSLocal + Employment_PSState +
      Employment_PSFederal + Employment_Volunteer + Sex + Occupation + Education,
    family = binomial(link = logit), box_obj$data
  ))
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- unname(predict(fit, box_obj$data, type = "response"))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred, tolerance = 1e-2)
})


test_that("MiningModel/ada PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("ada")
  library(ada)

  set.seed(1234)
  fit <- ada(Adjusted ~ Employment + Education + Hours + Income, iter = 3, audit)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit, type = "both")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_0, r_pred$probs[, 1], tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, as.numeric(r_pred$class) - 1)

  set.seed(12345)
  fit <- ada(as.factor(fbs) ~ ., iter = 5, data = heart)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, heart, type = "both")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(heart, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Probability_0, r_pred$probs[, 1])
  expect_equal_nn(z_pred$outputs$Predicted_fbs, as.character(r_pred$class))

  set.seed(1236)
  fit <- ada(target ~ ., iter = 11, data = credit_class)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, credit_class)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit_class, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_target, as.character(r_pred))

  set.seed(1834)
  iris_binom_2 <- iris[iris[, 5] != "setosa", ]
  iris_binom_2[, 5] <- as.factor(levels(iris[, 5])[2:3])[as.numeric(iris[, 5]) - 1]
  fit <- ada(Species ~ ., data = iris_binom_2, iter = 20, nu = 0.9, type = "discrete")
  p_fit <- pmml(fit)
  # remove precision issues by varying the training data randomly
  iris_binom_2[, 1:4] <- iris_binom_2[, 1:4] + runif(4, -0.1, 0.1)
  r_pred <- predict(fit, iris_binom_2, type = "both")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_binom_2[, 1:4], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, as.character(r_pred$class))
  expect_equal_nn(z_pred$outputs$Probability_versicolor, r_pred$probs[, 1])

  set.seed(534)
  fit <- ada(as.factor(Adjusted) ~ Employment + Education + Hours + Income, iter = 3, audit)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, audit)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, as.character(r_pred))


  box_obj <- xform_wrap(audit)
  box_obj <- xform_z_score(box_obj, "column2->d_Age")
  box_obj <- xform_z_score(box_obj, "column7->d_Income")
  box_obj <- xform_z_score(box_obj, "column9->d_Deductions")
  box_obj <- xform_z_score(box_obj, "column10->d_Hours")
  box_obj <- xform_min_max(box_obj, "d_Age->dd_Age")
  box_obj <- xform_min_max(box_obj, "d_Income->dd_Income")
  box_obj <- xform_min_max(box_obj, "d_Deductions->dd_Deductions")
  box_obj <- xform_min_max(box_obj, "d_Hours->dd_Hours")
  box_obj <- xform_z_score(box_obj, "dd_Age->ddd_Age")
  box_obj <- xform_z_score(box_obj, "dd_Income->ddd_Income")
  box_obj <- xform_z_score(box_obj, "dd_Deductions->ddd_Deductions")
  box_obj <- xform_z_score(box_obj, "dd_Hours->ddd_Hours")
  set.seed(12884)
  fit <- ada(as.factor(Adjusted) ~ ddd_Age + ddd_Income + Sex + ddd_Deductions + ddd_Hours, iter = 3, box_obj$data)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- predict(fit, box_obj$data)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, as.character(r_pred))
})


test_that("MiningModel/gbm PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("gbm")
  skip_if_not_installed("caret")
  library(gbm)
  library(caret)

  set.seed(2112)
  audit_dat <- audit[, -c(1, 4, 6, 9, 10, 11, 12)]
  train_index <- createDataPartition(audit_dat$Adjusted, p = .96, list = FALSE, times = 1)
  audit_dat_train <- audit_dat[train_index, ]
  audit_dat_test <- audit_dat[-train_index, ]


  fit <- gbm(Adjusted ~ ., data = audit_dat_train, n.trees = 3, interaction.depth = 4, distribution = "bernoulli")
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = audit_dat_test, n.trees = 3)
  r_pred_2 <- predict(fit, newdata = audit_dat_test, n.trees = 3, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_dat_test, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$BernoulliLinkPrediction, r_pred)
  expect_equal_nn(z_pred$outputs$BernoulliResponsePrediction, r_pred_2)


  fit <- gbm(Adjusted ~ ., data = audit_dat_train, n.trees = 3, interaction.depth = 4, distribution = "gaussian")
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = audit_dat_test, n.trees = 3)
  r_pred_2 <- predict(fit, newdata = audit_dat_test, n.trees = 3, type = "response") - fit$initF
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_dat_test, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$GaussianPrediction, r_pred)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred_2)


  num_rows <- NROW(covtype2)
  covtype2_m <- as.matrix(covtype2)
  y0 <- as.vector(covtype2_m[, "X3"])
  invisible(capture.output(fit <- gbm.fit(covtype2_m[, 1:11], y0,
    distribution = "multinomial", n.trees = 3, interaction.depth = 4
  )))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = covtype2_m[, 1:11], n.trees = 3)[1:num_rows, , ]
  r_pred_2 <- predict(fit, newdata = covtype2_m[, 1:11], n.trees = 3, type = "response")[1:num_rows, , ]
  pr <- vector()
  for (i in 1:nrow(r_pred_2)) {
    pr[i] <- (colnames(r_pred_2)[which(r_pred_2[i, ] == max(r_pred_2[i, ]))])
  }
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(covtype2, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$link_3, r_pred[, 1])
  expect_equal_nn(z_pred$outputs$link_6, r_pred[, 2])
  expect_equal_nn(z_pred$outputs$Probability_3, r_pred_2[, 1])
  expect_equal_nn(z_pred$outputs$Probability_6, r_pred_2[, 2])
  expect_equal_nn(z_pred$outputs$Predicted_y, pr)


  fit <- gbm(target ~ ., data = credit, n.trees = 3, interaction.depth = 4, distribution = "gaussian")
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = credit, n.trees = 3) - fit$initF
  r_pred_2 <- predict(fit, n.trees = 3, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_target, r_pred)
  expect_equal_nn(z_pred$outputs$GaussianPrediction, r_pred_2)


  fit <- gbm(target ~ ., data = credit_class, n.trees = 3, distribution = "multinomial", interaction.depth = 4)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = credit_class, n.trees = 3)[, , 1]
  r_pred_2 <- predict(fit, n.trees = 3, type = "response")[, , 1]
  pr <- vector()
  for (i in 1:nrow(r_pred_2)) {
    pr[i] <- (colnames(r_pred_2)[which(r_pred_2[i, ] == max(r_pred_2[i, ]))])
  }
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit_class, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$link_neg, r_pred[, 1], tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$link_pos, r_pred[, 2], tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_neg, r_pred_2[, 1], tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_pos, r_pred_2[, 2], tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Predicted_target, pr)


  set.seed(12)
  fit <- gbm(target ~ ., data = credit_class_01, n.trees = 3, interaction.depth = 4, distribution = "bernoulli")
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = credit_class_01, n.trees = 3)
  r_pred_2 <- predict(fit, n.trees = 3, type = "response")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit_class_01, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$BernoulliLinkPrediction, r_pred, tolerance = 1e-5)
  expect_equal_nn(z_pred$outputs$BernoulliResponsePrediction, r_pred_2, tolerance = 1e-5)


  fit <- gbm(Species ~ ., data = iris, n.trees = 2, interaction.depth = 3, distribution = "multinomial")
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = iris[1:60, ], n.trees = 2)[1:60, , ]
  r_pred_2 <- predict(fit, newdata = iris[1:60, ], n.trees = 2, type = "response")[1:60, , ]
  pr <- vector()
  for (i in 1:nrow(r_pred_2)) {
    pr[i] <- (colnames(r_pred_2)[which(r_pred_2[i, ] == max(r_pred_2[i, ]))])
  }
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris[1:60, ], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$link_setosa, r_pred[, 1])
  expect_equal_nn(z_pred$outputs$link_versicolor, r_pred[, 2])
  expect_equal_nn(z_pred$outputs$link_virginica, r_pred[, 3])
  expect_equal_nn(z_pred$outputs$Probability_setosa, r_pred_2[, 1])
  expect_equal_nn(z_pred$outputs$Probability_versicolor, r_pred_2[, 2])
  expect_equal_nn(z_pred$outputs$Probability_virginica, r_pred_2[, 3])
  expect_equal_nn(z_pred$outputs$Predicted_Species, pr)


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  fit <- gbm(class ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data, n.trees = 2, interaction.depth = 3, distribution = "multinomial")
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- predict(fit, newdata = box_obj$data, n.trees = 2)[1:4, , ]
  r_pred_2 <- predict(fit, newdata = box_obj$data, n.trees = 2, type = "response")[1:4, , ]
  pr <- vector()
  for (i in 1:nrow(r_pred_2)) {
    pr[i] <- (colnames(r_pred_2)[which(r_pred_2[i, ] == max(r_pred_2[i, ]))])
  }
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p[1:4, ], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$`link_Iris-setosa`, r_pred[, 1])
  expect_equal_nn(z_pred$outputs$`link_Iris-versicolor`, r_pred[, 2])
  expect_equal_nn(z_pred$outputs$`link_Iris-virginica`, r_pred[, 3])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_2[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_2[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_2[, 3])
  expect_equal_nn(z_pred$outputs$Predicted_class, pr)
})


test_that("MiningModel/randomForest PMML output matches R", {
  skip_on_cran()
  skip_on_ci()
  skip_if_not_installed("randomForest")

  library(zementisr)
  library(randomForest)

  audit_nor_logical[, "Sex"] <- as.factor(audit_nor_logical[, "Sex"])
  suppressWarnings(fit <- randomForest(Adjusted ~ ., audit_nor_logical[, -1], ntree = 8))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = audit_nor_logical)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor_logical, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  suppressWarnings(fit <- randomForest(Adjusted ~ ., audit_nor, ntree = 4))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = audit_nor)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  suppressWarnings(fit <- randomForest(Adjusted ~ ., audit_nor_fake_logical, ntree = 5))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = audit_nor_fake_logical)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor_fake_logical, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  suppressWarnings(fit <- randomForest(predictedClass ~ ., random_data_small, ntree = 7))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = random_data_small)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(random_data_small, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_predictedClass, r_pred)


  suppressWarnings(fit <- randomForest(temp ~ ., elnino, ntree = 6))
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = elnino)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(elnino, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_temp, r_pred)


  fit <- randomForest(SEPAL_LE ~ ., data = iris_nor, ntree = 9)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = iris_nor)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_SEPAL_LE, r_pred)


  iris_nor_logical[, 5] <- as.factor(iris_nor_logical[, 5])
  fit <- randomForest(SEPAL_LE ~ ., iris_nor_logical, ntree = 7)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, newdata = iris_nor_logical)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor_logical, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_SEPAL_LE, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  set.seed(123)
  fit <- randomForest(Species ~ Petal.Length + ddd2 + ddd3 + ddd4, box_obj$data, ntree = 7)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, r_pred)


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_width->dis_sw][double->boolean]",
    table = "iris_discretize_bool_sw.csv", map_missing_to = "0", default_value = "1"
  )
  fit <- randomForest(class ~ petal_length + ddd2 + ddd3 + dis_sw, box_obj$data, ntree = 7)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, r_pred)


  box_obj <- xform_wrap(audit_factor)
  box_obj <- xform_z_score(box_obj, "column2->d_Age")
  box_obj <- xform_z_score(box_obj, "column7->d_Income")
  box_obj <- xform_z_score(box_obj, "column9->d_Deductions")
  box_obj <- xform_z_score(box_obj, "column10->d_Hours")
  box_obj <- xform_min_max(box_obj, "d_Age->dd_Age")
  box_obj <- xform_min_max(box_obj, "d_Income->dd_Income")
  box_obj <- xform_min_max(box_obj, "d_Deductions->dd_Deductions")
  box_obj <- xform_min_max(box_obj, "d_Hours->dd_Hours")
  box_obj <- xform_z_score(box_obj, "dd_Age->ddd_Age")
  box_obj <- xform_z_score(box_obj, "dd_Income->ddd_Income")
  box_obj <- xform_z_score(box_obj, "dd_Deductions->ddd_Deductions")
  box_obj <- xform_z_score(box_obj, "dd_Hours->ddd_Hours")
  set.seed(14)
  fit <- randomForest(Adjusted ~ ddd_Age + Employment + Education + Marital +
    Occupation + ddd_Income + Sex + ddd_Deductions + ddd_Hours,
  box_obj$data,
  ntree = 7
  )
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_factor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  box_obj <- xform_wrap(audit_factor[, c("Age", "Marital", "Sex", "Adjusted")])
  box_obj <- xform_norm_discrete(box_obj, xform_info = "Marital")
  box_obj <- xform_norm_discrete(box_obj, xform_info = "Sex")
  audit_box_features <- box_obj$data[!names(box_obj$data) %in% c("Marital", "Sex")]
  fit <- randomForest(Adjusted ~ ., audit_box_features, ntree = 3)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_factor[, c("Age", "Marital", "Sex", "Adjusted")], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  box_obj <- xform_wrap(audit_factor[, c(
    "Age", "Employment", "Income",
    "Deductions", "Marital", "Sex", "Adjusted"
  )])
  box_obj <- xform_norm_discrete(box_obj, xform_info = "Marital")
  box_obj <- xform_norm_discrete(box_obj, xform_info = "Sex")
  box_obj <- xform_function(box_obj,
    orig_field_name = "Age",
    new_field_name = "Age.log",
    expression = "log(Age)"
  )
  box_obj <- xform_function(box_obj,
    orig_field_name = "Income,Age,Deductions",
    new_field_name = "Inc.Ded.Age",
    expression = "(Income-Deductions)/Age"
  )
  audit_box_features <- box_obj$data[!names(box_obj$data) %in% c("Marital", "Sex")]
  fit <- randomForest(Adjusted ~ ., audit_box_features, ntree = 3)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_factor[, c(
    "Age", "Employment", "Income",
    "Deductions", "Marital", "Sex", "Adjusted"
  )], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  set.seed(335)
  fit <- randomForest(Species ~ ddd1 + ddd2 + ddd3 + ddd4, box_obj$data[1:120, ], ntree = 5)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred_class <- as.character(predict(fit, newdata = box_obj$data[121:150, ]))
  r_pred_prob <- unname(predict(fit, newdata = box_obj$data[121:150, ], type = "prob"))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris[121:150, ], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, r_pred_class)
  expect_equal_nn(z_pred$outputs$Probability_setosa, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_versicolor, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$Probability_virginica, r_pred_prob[, 3])
})


test_that("NaiveBayesModel/e1071 PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("e1071")
  library(e1071)

  fit <- naiveBayes(as.factor(Adjusted) ~ Employment + Education + Marital + Occupation + Sex, data = audit_nor)
  p_fit <- pmml(fit, predicted_field = "Adjusted")
  r_pred_class <- predict(fit, newdata = audit_nor)
  r_pred_prob <- predict(fit, newdata = audit_nor, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_0, r_pred_prob[, 1])


  fit <- naiveBayes(BANKCARD ~ GENDER + MARITAL_STATUS + PROFESSION + SAVINGS_ACCOUNT + ONLINE_ACCESS + JOINED_ACCOUNTS,
    data = bank
  )
  p_fit <- pmml(fit, predicted_field = "BANKCARD")
  r_pred_class <- predict(fit, newdata = bank)
  r_pred_prob <- predict(fit, newdata = bank, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(bank, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_BANKCARD, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_NO, r_pred_prob[, 1])


  fit <- naiveBayes(CLASS ~ ., data = iris_nor)
  p_fit <- pmml(fit, predicted_field = "CLASS")
  r_pred_class <- predict(fit, newdata = iris_nor)
  r_pred_prob <- predict(fit, newdata = iris_nor, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_CLASS, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virgin`, r_pred_prob[, 3], tolerance = 1e-4)


  fit <- naiveBayes(Marital ~ ., data = audit[, c(2:8, 10)])
  p_fit <- pmml(fit, predicted_field = "Marital")
  r_pred_class <- predict(fit, newdata = audit[, c(2:8, 10)])
  r_pred_prob <- predict(fit, newdata = audit[, c(2:8, 10)], type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit[, c(2:8, 10)], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Marital, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_Absent, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_Divorced, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$Probability_Married, r_pred_prob[, 3])


  fit <- naiveBayes(Marital ~ ., data = audit_r_build_in[, c(2:8, 10)])
  p_fit <- pmml(fit, predicted_field = "Marital")
  r_pred_class <- predict(fit, newdata = audit_r_build_in[, c(2:8, 10)])
  r_pred_prob <- predict(fit, newdata = audit_r_build_in[, c(2:8, 10)], type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_r_build_in[, c(2:8, 10)], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Marital, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_Absent, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_Divorced, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$Probability_Married, r_pred_prob[, 3])


  fit <- naiveBayes(as.factor(amount_of_claims) ~ gender + domicile, data = insurance)
  p_fit <- pmml(fit, predicted_field = "amount_of_claims")
  r_pred_class <- predict(fit, newdata = insurance)
  r_pred_prob <- predict(fit, newdata = insurance, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(insurance, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_amount_of_claims, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_100, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_500, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$Probability_1000, r_pred_prob[, 3])


  fit <- naiveBayes(as.factor(amount_of_claims) ~ gender + domicile + no_of_claims, data = insurance)
  p_fit <- pmml(fit, predicted_field = "amount_of_claims")
  r_pred_class <- predict(fit, newdata = insurance)
  r_pred_prob <- predict(fit, newdata = insurance, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(insurance, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_amount_of_claims, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_100, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_500, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$Probability_1000, r_pred_prob[, 3])


  fit <- naiveBayes(class ~ ., data = iris_bin)
  p_fit <- pmml(fit, predicted_field = "class")
  r_pred_class <- predict(fit, newdata = iris_bin)
  r_pred_prob <- predict(fit, newdata = iris_bin, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_bin, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])


  fit <- naiveBayes(target ~ ., data = credit_class)
  p_fit <- pmml(fit, predicted_field = "target")
  r_pred_class <- predict(fit, newdata = credit_class)
  r_pred_prob <- predict(fit, newdata = credit_class, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit_class, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_target, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_neg, r_pred_prob[, 1], tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_pos, r_pred_prob[, 2], tolerance = 1e-3)


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_length->dis_pl][double->integer]",
    table = "iris_discretize_pl.csv", map_missing_to = "0", default_value = "1"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_width->dis_pw][double->integer]",
    table = "iris_discretize_pw.csv", map_missing_to = "0", default_value = "1"
  )
  fit <- naiveBayes(class ~ dis_pl + dis_pw + sepal_length + sepal_width, data = box_obj$data)
  p_fit <- pmml(fit, predicted_field = "class", transforms = box_obj)
  r_pred_class <- predict(fit, newdata = box_obj$data)
  r_pred_prob <- predict(fit, newdata = box_obj$data, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_length->dis_pl][double->string]",
    table = "iris_discretize_pl.csv", map_missing_to = "0", default_value = "1"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_width->dis_pw][double->string]",
    table = "iris_discretize_pw.csv", map_missing_to = "0", default_value = "1"
  )

  fit <- naiveBayes(class ~ dis_pl + dis_pw + sepal_length + sepal_width, data = box_obj$data)
  p_fit <- pmml(fit, predicted_field = "class", transforms = box_obj)
  r_pred_class <- predict(fit, newdata = box_obj$data)
  r_pred_prob <- predict(fit, newdata = box_obj$data, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_length->dis_pl][double->string]",
    table = "iris_discretize_pl.csv", map_missing_to = "0", default_value = "1"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_width->dis_pw][double->string]",
    table = "iris_discretize_pw.csv", map_missing_to = "0", default_value = "1"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_length->dis_sl][double->string]",
    table = "iris_discretize_sl.csv", map_missing_to = "0", default_value = "1"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_width->dis_sw][double->string]",
    table = "iris_discretize_sw.csv", map_missing_to = "0", default_value = "1"
  )

  fit <- naiveBayes(class ~ dis_pl + dis_pw + dis_sl + dis_sw, data = box_obj$data)
  p_fit <- pmml(fit, predicted_field = "class", transforms = box_obj)
  r_pred_class <- predict(fit, newdata = box_obj$data)
  r_pred_prob <- predict(fit, newdata = box_obj$data, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_length->dis_pl][double->string]",
    table = "iris_discretize_pl.csv"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_width->dis_pw][double->string]",
    table = "iris_discretize_pw.csv"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_length->dis_sl][double->string]",
    table = "iris_discretize_sl.csv"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_width->dis_sw][double->string]",
    table = "iris_discretize_sw.csv"
  )

  fit <- naiveBayes(class ~ dis_pl + dis_pw + dis_sl + dis_sw, data = box_obj$data)
  p_fit <- pmml(fit, predicted_field = "class", transforms = box_obj)
  r_pred_class <- predict(fit, newdata = box_obj$data)
  r_pred_prob <- predict(fit, newdata = box_obj$data, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_length->dis_pl][double->string]",
    table = "iris_discretize_pl.csv"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[petal_width->dis_pw][double->string]",
    table = "iris_discretize_pw.csv"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_length->dis_sl][double->string]",
    table = "iris_discretize_sl.csv"
  )
  box_obj <- xform_discretize(box_obj,
    xform_info = "[sepal_width->dis_sw][double->string]",
    table = "iris_discretize_sw.csv"
  )
  fit <- naiveBayes(class ~ dis_pl + dis_pw + dis_sl + dis_sw, data = box_obj$data)

  p_fit <- pmml(fit, predicted_field = "class", transforms = box_obj)
  r_pred_class <- predict(fit, newdata = box_obj$data)
  r_pred_prob <- predict(fit, newdata = box_obj$data, type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])
})


test_that("NearestNeighborModel/neighbr PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("neighbr")
  library(neighbr)

  iris_with_id <- iris
  iris_with_id$ID <- c(1:150)

  iris_train <- iris[1:140, ]
  iris_test <- iris[141:150, -c(4, 5)]
  fit <- knn(
    train_set = iris_train, test_set = iris_test, k = 3, categorical_target = "Species",
    continuous_target = "Petal.Width", comparison_measure = "euclidean"
  )
  p_fit <- pmml(fit)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_test, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, fit$test_set_scores$categorical_target)
  expect_equal_nn(z_pred$outputs$Predicted_Petal.Width, fit$test_set_scores$continuous_target)

  #
  #   iris_with_id <- iris
  #   iris_with_id$ID <- c(1:150)
  #   iris_train <- iris_with_id[1:130, -c(4, 5)]
  #   iris_test <- iris_with_id[132:150, -c(4, 5, 6)]
  #   fit <- knn(
  #     train_set = iris_train, test_set = iris_test, k = 5, comparison_measure = "euclidean",
  #     return_ranked_neighbors = 3, id = "ID"
  #   )
  #   p_fit <- pmml(fit)
  #   up_stat <- upload_model(p_fit)
  #   z_pred <- predict_pmml_batch(iris_test, up_stat$model_name)
  #   delete_model(up_stat$model_name)
  #   expect_equal_nn(z_pred$outputs$neighbor1, as.character(fit$test_set_scores$neighbor1))
  #   expect_equal_nn(z_pred$outputs$neighbor2, as.character(fit$test_set_scores$neighbor2))
  #   expect_equal_nn(z_pred$outputs$neighbor3, as.character(fit$test_set_scores$neighbor3))


  iris_train <- iris_with_id[1:130, ]
  iris_test <- iris_with_id[132:150, -c(5, 6)]
  fit <- knn(
    train_set = iris_train, test_set = iris_test, k = 5, categorical_target = "Species",
    comparison_measure = "squared_euclidean", return_ranked_neighbors = 4, id = "ID"
  )
  p_fit <- pmml(fit)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_test, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, fit$test_set_scores$categorical_target)
  # expect_equal_nn(z_pred$outputs$neighbor1, as.character(fit$test_set_scores$neighbor1))
  # expect_equal_nn(z_pred$outputs$neighbor2, as.character(fit$test_set_scores$neighbor2))
  # expect_equal_nn(z_pred$outputs$neighbor3, as.character(fit$test_set_scores$neighbor3))
  # expect_equal_nn(z_pred$outputs$neighbor4, as.character(fit$test_set_scores$neighbor4))


  house_votes_nbr <- house_votes
  feature_names <- names(house_votes_nbr)[!names(house_votes_nbr) %in% c("Class", "ID")]
  for (n in feature_names) {
    levels(house_votes_nbr[, n])[levels(house_votes_nbr[, n]) == "n"] <- 0
    levels(house_votes_nbr[, n])[levels(house_votes_nbr[, n]) == "y"] <- 1
  }
  for (n in feature_names) {
    house_votes_nbr[, n] <- as.numeric(levels(house_votes_nbr[, n]))[house_votes_nbr[, n]]
  }
  house_votes_nbr$ID <- c(1:nrow(house_votes_nbr))

  house_votes_train <- house_votes_nbr[1:100, ]
  house_votes_test <- house_votes_nbr[212:232, !names(house_votes_nbr) %in% c("Class", "ID")]
  fit <- knn(
    train_set = house_votes_train, test_set = house_votes_test, k = 7, categorical_target = "Class",
    comparison_measure = "jaccard", return_ranked_neighbors = 3, id = "ID"
  )
  p_fit <- pmml(fit)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(house_votes_test, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Class, fit$test_set_scores$categorical_target)
  # expect_equal_nn(z_pred$outputs$neighbor1, as.character(fit$test_set_scores$neighbor1))
  # expect_equal_nn(z_pred$outputs$neighbor2, as.character(fit$test_set_scores$neighbor2))
  # expect_equal_nn(z_pred$outputs$neighbor3, as.character(fit$test_set_scores$neighbor3))


  house_votes_train <- house_votes_nbr[1:30, ]
  house_votes_test <- house_votes_nbr[105:232, !names(house_votes_nbr) %in% c("Class", "ID")]
  fit <- knn(
    train_set = house_votes_train, test_set = house_votes_test, k = 4, categorical_target = "Class",
    comparison_measure = "simple_matching", return_ranked_neighbors = 4, id = "ID"
  )
  p_fit <- pmml(fit)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(house_votes_test, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Class, fit$test_set_scores$categorical_target)
  # expect_equal_nn(z_pred$outputs$neighbor1, as.character(fit$test_set_scores$neighbor1))
  # expect_equal_nn(z_pred$outputs$neighbor2, as.character(fit$test_set_scores$neighbor2))
  # expect_equal_nn(z_pred$outputs$neighbor3, as.character(fit$test_set_scores$neighbor3))
  # expect_equal_nn(z_pred$outputs$neighbor4, as.character(fit$test_set_scores$neighbor4))


  # house_votes_train <- house_votes_nbr[2:90, !names(house_votes_nbr) %in% c("Class")]
  # house_votes_test <- house_votes_nbr[195:232, !names(house_votes_nbr) %in% c("Class", "ID")]
  # fit <- knn(
  #   train_set = house_votes_train, test_set = house_votes_test, k = 4, comparison_measure = "tanimoto",
  #   return_ranked_neighbors = 4, id = "ID"
  # )
  # p_fit <- pmml(fit)
  # up_stat <- upload_model(p_fit)
  # z_pred <- predict_pmml_batch(house_votes_test, up_stat$model_name)
  # delete_model(up_stat$model_name)
  # expect_equal_nn(z_pred$outputs$neighbor1, as.character(fit$test_set_scores$neighbor1))
  # expect_equal_nn(z_pred$outputs$neighbor2, as.character(fit$test_set_scores$neighbor2))
  # expect_equal_nn(z_pred$outputs$neighbor3, as.character(fit$test_set_scores$neighbor3))
  # expect_equal_nn(z_pred$outputs$neighbor4, as.character(fit$test_set_scores$neighbor4))
})


test_that("NeuralNetwork/nnet PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("nnet")
  library(nnet)

  audit_nor_factor <- audit_nor
  audit_nor_factor[, 13] <- as.factor(audit_nor[, 13])
  invisible(capture.output(fit <- nnet(Marital ~ ., data = audit_nor_factor[, c(2, 5, 7, 8, 10, 13)], size = 4)))
  p_fit <- pmml(fit)
  r_pred_class <- predict(fit, audit_nor_factor[, c(2, 5, 7, 8, 10, 13)], type = "class")
  r_pred_prob <- predict(fit, audit_nor_factor[, c(2, 5, 7, 8, 10, 13)], type = "raw")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor_factor[, c(2, 5, 7, 8, 10, 13)], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Marital, r_pred_class)
  expect_equal_nn(z_pred$outputs$Probability_Absent, unname(r_pred_prob[, 1]), tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_Divorced, unname(r_pred_prob[, 2]), tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_Married, unname(r_pred_prob[, 3]), tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$`Probability_Married-spouse-absent`, unname(r_pred_prob[, 4]), tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_Unmarried, unname(r_pred_prob[, 5]), tolerance = 1e-3)
  expect_equal_nn(z_pred$outputs$Probability_Widowed, unname(r_pred_prob[, 6]), tolerance = 1e-3)


  invisible(capture.output(fit <- nnet(CLASS ~ ., data = iris_nor, size = 4)))
  p_fit <- pmml(fit)
  r_pred_class <- predict(fit, iris_nor, type = "class")
  r_pred_prob <- unname(predict(fit, iris_nor, type = "raw"))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_CLASS, r_pred_class)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1], tolerance = 1e-5)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versic`, r_pred_prob[, 2], tolerance = 1e-6)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virgin`, r_pred_prob[, 3], tolerance = 1e-5)


  invisible(capture.output(fit <- nnet(Adjusted ~ ., data = audit_nor, size = 4)))
  p_fit <- pmml(fit)
  r_pred <- unlist(as.list(predict(fit, audit_nor)))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_map(box_obj,
    xform_info = "[Species->d_Species][string->double]",
    table = "iris_class_table.csv", default_value = "-1", map_missing_to = "1"
  )
  invisible(capture.output(fit <- nnet(Species ~ ddd1 + ddd2 + ddd3 + ddd4, box_obj$data, size = 5)))
  p_fit <- pmml(fit, transform = box_obj)
  r_pred_class <- predict(fit, box_obj$data, type = "class")
  r_pred_prob <- unname(predict(fit, box_obj$data, type = "raw")[, 1:3])
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, r_pred_class)
  expect_equal_nn(z_pred$outputs$Probability_setosa, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_versicolor, r_pred_prob[, 2], tolerance = 1e-6)
  expect_equal_nn(z_pred$outputs$Probability_virginica, r_pred_prob[, 3], tolerance = 1e-5)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  invisible(capture.output(fit <- nnet(Species ~ ddd1 + ddd2 + ddd3 + ddd4, box_obj$data, size = 3)))
  p_fit <- pmml(fit, transform = box_obj)
  r_pred_class <- predict(fit, box_obj$data, type = "class")
  r_pred_prob <- unname(predict(fit, box_obj$data, type = "raw")[, 1:3])
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, r_pred_class)
  expect_equal_nn(z_pred$outputs$Probability_setosa, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_versicolor, r_pred_prob[, 2], tolerance = 1e-6)
  expect_equal_nn(z_pred$outputs$Probability_virginica, r_pred_prob[, 3], tolerance = 1e-6)
})


test_that("RegressionModel/nnet PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("nnet")
  library(nnet)

  fit <- multinom(as.factor(Adjusted) ~ ., data = audit_nor, trace = F)
  p_fit <- pmml(fit)
  r_pred_class <- predict(fit, audit_nor)
  r_pred_prob <- unname(predict(fit, audit_nor, type = "probs"))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred_prob, tolerance = 1e-4)


  fit <- multinom(CLASS ~ ., data = iris_nor, trace = F)
  p_fit <- pmml(fit)
  r_pred_class <- predict(fit, iris_nor)
  r_pred_prob <- predict(fit, iris_nor, type = "probs")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_CLASS, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1], tolerance = 1e-4)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versic`, r_pred_prob[, 2], tolerance = 1e-4)


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  fit <- multinom(class ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data, trace = F)
  p_fit <- pmml(fit, transform = box_obj)
  r_pred_class <- predict(fit, data = box_obj$data)
  r_pred_prob <- predict(fit, box_obj$data, type = "probs")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1], tolerance = 1e-4)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versic`, r_pred_prob[, 2], tolerance = 1e-4)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_map(box_obj,
    xform_info = "[Species->d_Species][string->double]",
    table = "iris_class_table.csv", default_value = "-1", map_missing_to = "1"
  )
  fit <- multinom(Species ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data, trace = F)
  p_fit <- pmml(fit, transform = box_obj)
  r_pred_class <- predict(fit, data = box_obj$data)
  r_pred_prob <- predict(fit, box_obj$data, type = "probs")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, as.character(r_pred_class))
  expect_equal_nn(z_pred$outputs$Probability_setosa, r_pred_prob[, 1], tolerance = 1e-4)
  expect_equal_nn(z_pred$outputs$Probability_versicolor, r_pred_prob[, 2], tolerance = 1e-4)
})


test_that("SupportVectorMachineModel/kernlab PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("kernlab")
  library(kernlab)

  fit <- ksvm(target ~ ., data = credit, kernel = "rbfdot", model_name = "ksvm")
  p_fit <- pmml(fit, data = credit)
  r_pred <- as.numeric(predict(fit, credit))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(credit, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_target, r_pred, tolerance = 1e-4)


  fit <- ksvm(CLASS ~ ., data = iris_nor, kernel = "rbfdot")
  p_fit <- pmml(fit, data = iris_nor)
  r_pred <- as.character(predict(fit, iris_nor))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_CLASS, r_pred)


  invisible(capture.output(fit <- ksvm(CLASS ~ ., data = iris_nor, kernel = "vanilladot")))
  p_fit <- pmml(fit, data = iris_nor)
  r_pred <- as.character(predict(fit, iris_nor))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_CLASS, r_pred)


  fit <- ksvm(Adjusted ~ ., data = audit[1:900, ], kernel = "rbfdot")
  p_fit <- pmml(fit, data = audit[1:900, ])
  r_pred <- as.numeric(predict(fit, audit[1:900, ]))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit[1:900, ], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  fit <- ksvm(as.factor(purchase) ~ ., data = petfood)
  p_fit <- pmml(fit, data = petfood)
  r_pred <- as.character(predict(fit, petfood))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(petfood, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_purchase, r_pred)


  fit <- ksvm(as.factor(Adjusted) ~ ., data = audit[1:900, ], kernel = "rbfdot")
  p_fit <- pmml(fit, data = audit[1:900, ])
  r_pred <- as.character(predict(fit, audit[1:900, ]))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit[1:900, ], up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)


  fit <- ksvm(PRE_1 ~ ., data = job_cat, kernel = "rbfdot")
  p_fit <- pmml(fit, data = job_cat)
  r_pred <- as.character(predict(fit, job_cat))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(job_cat, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_PRE_1, r_pred)


  fit <- ksvm(as.factor(PRE_1) ~ ., data = job_cat_index, kernel = "rbfdot")
  p_fit <- pmml(fit, data = job_cat_index)
  r_pred <- as.character(predict(fit, job_cat_index))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(job_cat_index, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_PRE_1, r_pred)


  box_obj <- xform_wrap(iris)
  box_obj <- xform_z_score(box_obj, "column1->d1")
  box_obj <- xform_z_score(box_obj, "column2->d2")
  box_obj <- xform_z_score(box_obj, "column3->d3")
  box_obj <- xform_z_score(box_obj, "column4->d4")
  box_obj <- xform_min_max(box_obj, "d1->dd1")
  box_obj <- xform_min_max(box_obj, "d2->dd2")
  box_obj <- xform_min_max(box_obj, "d3->dd3")
  box_obj <- xform_min_max(box_obj, "d4->dd4")
  box_obj <- xform_z_score(box_obj, "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, "dd4->ddd4")
  fit <- ksvm(Species ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data)
  p_fit <- pmml(fit, dataset = box_obj$data, transform = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Species, r_pred)


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_map(box_obj,
    xform_info = "[class->d_class][string->double]",
    table = "iris_p_class_table.csv", default_value = "-1", map_missing_to = "1"
  )
  box_obj <- xform_norm_discrete(box_obj, xform_info = "class")
  fit <- ksvm(class ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data)
  p_fit <- pmml(fit, dataset = box_obj$data, transform = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, r_pred)


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  box_obj <- xform_map(box_obj,
    xform_info = "[class->d_class][string->double]",
    table = "iris_p_class_table.csv", default_value = "-1", map_missing_to = "1"
  )
  box_obj <- xform_norm_discrete(box_obj, input_var = "class")
  fit <- ksvm(class ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data)
  p_fit <- pmml(fit, dataset = box_obj$data, transform = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, r_pred)


  box_obj <- xform_wrap(audit_factor)
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d_Age")
  box_obj <- xform_z_score(box_obj, xform_info = "column7->d_Income")
  box_obj <- xform_z_score(box_obj, xform_info = "column9->d_Deductions")
  box_obj <- xform_z_score(box_obj, xform_info = "column10->d_Hours")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Age->dd_Age")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Income->dd_Income")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Deductions->dd_Deductions")
  box_obj <- xform_min_max(box_obj, xform_info = "d_Hours->dd_Hours")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Age->ddd_Age")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Income->ddd_Income")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Deductions->ddd_Deductions")
  box_obj <- xform_z_score(box_obj, xform_info = "dd_Hours->ddd_Hours")
  box_obj <- xform_norm_discrete(box_obj, input_var = "Employment")
  box_obj <- xform_map(box_obj,
    xform_info = "[Marital-> d_Marital][string->double]",
    table = "audit_marital_table.csv", default_value = "-1", map_missing_to = "1"
  )
  fit <- ksvm(Adjusted ~ ddd_Age + ddd_Income + ddd_Deductions +
    ddd_Hours + d_Marital + Employment_Private + Employment_Consultant +
    Employment_SelfEmp + Employment_PSLocal + Employment_PSState +
    Employment_PSFederal + Employment_Volunteer + Sex + Occupation +
    Education, data = box_obj$data)

  p_fit <- pmml(fit, dataset = box_obj$data, transform = box_obj)
  r_pred <- as.character(predict(fit, newdata = box_obj$data))
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_factor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred)
})


test_that("TreeModel/rpart PMML output matches R", {
  skip_on_cran()
  skip_on_ci()

  library(zementisr)
  skip_if_not_installed("rpart")
  library(rpart)

  fit <- rpart(as.factor(Adjusted) ~ Employment + Education + Marital + Occupation + Sex, data = audit_nor)
  p_fit <- pmml(fit)
  r_pred_class <- as.character(predict(fit, audit_nor, type = "class"))
  r_pred_prob <- predict(fit, audit_nor, type = "prob")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(audit_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_Adjusted, r_pred_class)
  expect_equal_nn(z_pred$outputs$Probability_0, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred_prob[, 2])


  fit <- rpart(temp ~ ., data = elnino)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, elnino)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(elnino, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_temp, r_pred)


  fit <- rpart(CLASS ~ ., data = iris_nor)
  p_fit <- pmml(fit)
  r_pred_class <- as.character(predict(fit, iris_nor, type = "class"))
  r_pred_prob <- predict(fit, iris_nor, type = "prob")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_nor, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_CLASS, r_pred_class)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versic`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virgin`, r_pred_prob[, 3])


  fit <- rpart(fbs ~ ., data = heart)
  p_fit <- pmml(fit)
  r_pred <- predict(fit, heart)
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(heart, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_fbs, r_pred)


  fit <- rpart(as.factor(fbs) ~ ., data = heart)
  p_fit <- pmml(fit)
  r_pred_class <- as.character(predict(fit, heart, type = "class"))
  r_pred_prob <- predict(fit, heart, type = "prob")
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(heart, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_fbs, r_pred_class)
  expect_equal_nn(z_pred$outputs$Probability_0, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$Probability_1, r_pred_prob[, 2])


  box_obj <- xform_wrap(iris_p)
  box_obj <- xform_z_score(box_obj, xform_info = "column1->d1")
  box_obj <- xform_z_score(box_obj, xform_info = "column2->d2")
  box_obj <- xform_z_score(box_obj, xform_info = "column3->d3")
  box_obj <- xform_z_score(box_obj, xform_info = "column4->d4")
  box_obj <- xform_min_max(box_obj, xform_info = "d1->dd1")
  box_obj <- xform_min_max(box_obj, xform_info = "d2->dd2")
  box_obj <- xform_min_max(box_obj, xform_info = "d3->dd3")
  box_obj <- xform_min_max(box_obj, xform_info = "d4->dd4")
  box_obj <- xform_z_score(box_obj, xform_info = "dd1->ddd1")
  box_obj <- xform_z_score(box_obj, xform_info = "dd2->ddd2")
  box_obj <- xform_z_score(box_obj, xform_info = "dd3->ddd3")
  box_obj <- xform_z_score(box_obj, xform_info = "dd4->ddd4")
  fit <- rpart(class ~ ddd1 + ddd2 + ddd3 + ddd4, data = box_obj$data)
  p_fit <- pmml(fit, transforms = box_obj)
  r_pred_class <- as.character(predict(fit, box_obj$data, type = "class"))
  r_pred_prob <- predict(fit, box_obj$data, type = "prob")[, 1:3]
  up_stat <- upload_model(p_fit)
  z_pred <- predict_pmml_batch(iris_p, up_stat$model_name)
  delete_model(up_stat$model_name)
  expect_equal_nn(z_pred$outputs$Predicted_class, r_pred_class)
  expect_equal_nn(z_pred$outputs$`Probability_Iris-setosa`, r_pred_prob[, 1])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-versicolor`, r_pred_prob[, 2])
  expect_equal_nn(z_pred$outputs$`Probability_Iris-virginica`, r_pred_prob[, 3])
})

Try the pmml package in your browser

Any scripts or data that you put into this service are public.

pmml documentation built on March 18, 2022, 5:49 p.m.