library("ggplot2")
customCoders = list('c.PiecewiseV.num' = vtreat::solve_piecewise, 'n.PiecewiseV.num' = vtreat::solve_piecewise, 'c.knearest.num' = vtreat::square_window, 'n.knearest.num' = vtreat::square_window) codeRestriction = c("PiecewiseV", "knearest", "poolN", "poolC", "NonDecreasingV", "NonIncreasingV", "clean", "isBAD", "catB", "catP")
d <- data.frame(x_numeric = seq(0, 15, by = 0.01)) d$x_cat <- paste0("l_", round(d$x_numeric, digits = 1)) d$y_ideal <- sin(d$x_numeric) d$x_numeric_noise <- d$x_numeric[sample.int(nrow(d), nrow(d), replace = FALSE)] d$x_cat_noise <- d$x_cat[sample.int(nrow(d), nrow(d), replace = FALSE)] d$y <- d$y_ideal + 0.5*rnorm(nrow(d)) d$yc <- d$y>0.5 d$is_train <- runif(nrow(d))>=0.2 dcheck <- d[1:2, ] dcheck$x_numeric <- NA_real_ dcheck$x_cat[1] <- "new_level" dcheck$x_cat[2] <- NA_character_ dcheck head(d) summary(d) ggplot(data=d) + geom_point(aes(x = x_numeric, y = y, color = yc), alpha=0.5) + geom_line(aes(x = x_numeric, y = y_ideal), color = "lightblue") + geom_hline(yintercept = 0.5, color = "red")
cfn <- vtreat::mkCrossFrameNExperiment( d[d$is_train, , drop=FALSE], c('x_numeric', 'x_numeric_noise', 'x_cat', 'x_cat_noise'), 'y', customCoders = customCoders, codeRestriction = codeRestriction, verbose = FALSE) cfn$treatments vtreat::variable_values(cfn$treatments$scoreFrame) # or directly vtreat::value_variables_N( d[d$is_train, , drop=FALSE], c('x_numeric', 'x_numeric_noise', 'x_cat', 'x_cat_noise'), 'y') prepared <- vtreat::prepare(cfn$treatments, d) d$x_numeric_PiecewiseV <- prepared$x_numeric_PiecewiseV d$x_numeric_knearest <- prepared$x_numeric_knearest ggplot(data=d) + # geom_point(aes(x = x_numeric, y = y)) + geom_line(aes(x = x_numeric, y = y_ideal), color = "lightblue") + geom_line(aes(x = x_numeric, y = x_numeric_PiecewiseV)) + ggtitle("y_ideal as a function of x_numeric_PiecewiseV") ggplot(data=d) + # geom_point(aes(x = x_numeric, y = y)) + geom_line(aes(x = x_numeric, y = y_ideal), color = "lightblue") + geom_line(aes(x = x_numeric, y = x_numeric_knearest)) + ggtitle("y_ideal as a function of x_numeric_knearest") WVPlots::ScatterHist(d[d$is_train, , drop=FALSE], "x_numeric_PiecewiseV", "y", "x_numeric_PiecewiseV versus observed y on train", smoothmethod = "identity", estimate_sig = TRUE) WVPlots::ScatterHist(d[d$is_train, , drop=FALSE], "x_numeric_PiecewiseV", "y_ideal", "x_numeric_PiecewiseV versus ideal y on train", smoothmethod = "identity", estimate_sig = TRUE) WVPlots::ScatterHist(d[!d$is_train, , drop=FALSE], "x_numeric_PiecewiseV", "y", "x_numeric_PiecewiseV versus observed y on test", smoothmethod = "identity", estimate_sig = TRUE) WVPlots::ScatterHist(d[!d$is_train, , drop=FALSE], "x_numeric_PiecewiseV", "y_ideal", "x_numeric_PiecewiseV versus ideal y on test", smoothmethod = "identity", estimate_sig = TRUE) vtreat::prepare(cfn$treatments, dcheck)
cfc <- vtreat::mkCrossFrameCExperiment( d[d$is_train, , drop=FALSE], c('x_numeric', 'x_numeric_noise', 'x_cat', 'x_cat_noise'), 'yc', TRUE, customCoders = customCoders, codeRestriction = codeRestriction, verbose = FALSE) cfc$treatments vtreat::variable_values(cfc$treatments$scoreFrame) # or directly vtreat::value_variables_C( d[d$is_train, , drop=FALSE], c('x_numeric', 'x_numeric_noise', 'x_cat', 'x_cat_noise'), 'yc', TRUE) vtreat::prepare(cfc$treatments, dcheck)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.