vtreat
transforms can be hosted on rquery
. This allows transforms at scale.
library("vtreat") eval_examples <- requireNamespace("rquery", quietly = TRUE) eval_rqdt <- eval_examples && requireNamespace("rqdatatable", quietly = TRUE) eval_db <- eval_examples && requireNamespace("DBI", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE) db <- NULL if(eval_db) { db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:") }
Classification example.
dTrainC <- data.frame(x= c('a', 'a', 'a', 'b' ,NA , 'b'), z= c(1, 2, NA, 4, 5, 6), y= c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE), stringsAsFactors = FALSE) dTrainC$id <- seq_len(nrow(dTrainC)) treatmentsC <- designTreatmentsC(dTrainC, c("x", "z"), 'y', TRUE) prepare(treatmentsC, dTrainC) %.>% knitr::kable(.) rqplan <- as_rquery_plan(list(treatmentsC))
source_data <- rquery::rq_copy_to(db, "dTrainC", dTrainC, overwrite = TRUE, temporary = TRUE) rest <- rquery_prepare(db, rqplan, source_data, "dTreatedC", extracols = "id") resd <- DBI::dbReadTable(db, rest$table_name) resd %.>% knitr::kable(.) rquery::rq_remove_table(db, source_data$table_name) rquery::rq_remove_table(db, rest$table_name)
Regression example.
dTrainR <- data.frame(x= c('a', 'a', 'a', 'b' ,NA , 'b'), z= c(1, 2, NA, 4, 5, 6), y= as.numeric(c(FALSE, FALSE, TRUE, FALSE, TRUE, TRUE)), stringsAsFactors = FALSE) dTrainR$id <- seq_len(nrow(dTrainR)) treatmentsN <- designTreatmentsN(dTrainR, c("x", "z"), 'y') prepare(treatmentsN, dTrainR) %.>% knitr::kable(.) rqplan <- as_rquery_plan(list(treatmentsN))
source_data <- rquery::rq_copy_to(db, "dTrainR", dTrainR, overwrite = TRUE, temporary = TRUE) if(FALSE) { ops <- rquery_prepare(db, rqplan, source_data, "dTreatedN", extracols = "id", return_ops = TRUE) cat(format(ops)) ops %.>% rquery::op_diagram(.) %.>% DiagrammeR::grViz(.) # sql <- rquery::to_sql(ops, db) # cat(sql) } rest <- rquery_prepare(db, rqplan, source_data, "dTreatedN", extracols = "id") resd <- DBI::dbReadTable(db, rest$table_name) resd %.>% knitr::kable(.) rquery::rq_remove_table(db, source_data$table_name) rquery::rq_remove_table(db, rest$table_name)
y-free example.
dTrainZ <- data.frame(x= c('a', 'a', 'a', 'b' ,NA , 'b'), z= c(1, 2, NA, 4, 5, 6), stringsAsFactors = FALSE) dTrainZ$id <- seq_len(nrow(dTrainZ)) treatmentsZ <- designTreatmentsZ(dTrainZ, c("x", "z")) prepare(treatmentsZ, dTrainZ) %.>% knitr::kable(.) rqplan <- as_rquery_plan(list(treatmentsZ))
source_data <- rquery::rq_copy_to(db, "dTrainZ", dTrainZ, overwrite = TRUE, temporary = TRUE) rest <- rquery_prepare(db, rqplan, source_data, "dTreatedZ", extracols = "id") resd <- DBI::dbReadTable(db, rest$table_name) resd %.>% knitr::kable(.) rquery::rq_remove_table(db, source_data$table_name) rquery::rq_remove_table(db, rest$table_name)
if(!is.null(db)) { DBI::dbDisconnect(db) }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.