In this example we fit a piecewise linear function to example data.
Please see here for a discussion of the methodology.
library("RcppDynProg") library("ggplot2") set.seed(2018) g <- 100 d <- data.frame( x = 0.05*(1:(3*g))) # ordered in x d$y_ideal <- sin((0.3*d$x)^2) d$y_observed <- d$y_ideal + 0.25*rnorm(length(d$y_ideal)) plt1 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed)) + ylab("y") + ggtitle("raw data", subtitle = "dots: observed values, dashed line: unobserved true values") print(plt1) x_cuts <- solve_for_partition(d$x, d$y_observed, penalty = 1) print(x_cuts) d$estimate <- approx(x_cuts$x, x_cuts$pred, xout = d$x, method = "linear", rule = 2)$y d$group <- as.character(findInterval(d$x, x_cuts[x_cuts$what=="left", "x"]))
print(sum((d$y_observed - d$y_ideal)^2))
print(sum((d$estimate - d$y_ideal)^2))
print(sum((d$estimate - d$y_observed)^2))
plt2 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed, color = group)) + geom_line(aes(y = estimate, color = group)) + ylab("y") + ggtitle("RcppDynProg piecewise linear estimate", subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + theme(legend.position = "none") print(plt2)
use_vtreat <- requireNamespace("vtreat", quietly = TRUE)
Here we show many of the vtreat
custom variable coders.
An application of these coders can be found here.
spline_variable <- function(varName, x, y, w = NULL) { if(is.null(w)) { w <- numeric(n) + 1 } nknots <- 100 spline <- stats::smooth.spline(x, y, w = w, nknots = nknots, keep.data = FALSE, keep.stuff = FALSE, cv = TRUE)$fit estimate <- stats::predict(spline, x)$y return(estimate) } customCoders = list('c.PiecewiseV.num' = vtreat::solve_piecewise, 'n.PiecewiseV.num' = vtreat::solve_piecewise, 'c.knearest.num' = vtreat::square_window, 'n.knearest.num' = vtreat::square_window, 'c.spline.num' = spline_variable, 'n.spline.num' = spline_variable, 'c.PiecewiseLin.num' = RcppDynProg::piecewise_linear, 'n.PiecewiseLin.num' = RcppDynProg::piecewise_linear, 'c.PiecewiseC.num' = RcppDynProg::piecewise_constant, 'n.PiecewiseC.num' = RcppDynProg::piecewise_constant) codeRestriction = c("PiecewiseV", "knearest", "spline", "PiecewiseLin", "PiecewiseC") trt <- vtreat::designTreatmentsN( d, 'x', 'y_observed', customCoders = customCoders, codeRestriction = codeRestriction, verbose = FALSE) knitr::kable(trt$scoreFrame[, c("varName", "rsq", "sig")]) dt <- vtreat::prepare(trt, d) dt$y_observed <- NULL d <- cbind(d, dt)
print(sum((d$x_PiecewiseV - d$y_ideal)^2)) print(sum((d$x_PiecewiseV - d$y_observed)^2))
print(sum((d$x_knearest - d$y_ideal)^2)) print(sum((d$x_knearest - d$y_observed)^2))
print(sum((d$x_PiecewiseLin - d$y_ideal)^2)) print(sum((d$x_PiecewiseLin - d$y_observed)^2))
print(sum((d$x_spline - d$y_ideal)^2)) print(sum((d$x_spline - d$y_observed)^2))
plt3 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed)) + geom_line(aes(y = x_PiecewiseV)) + ylab("y") + ggtitle("vtreat PiecewiseV estimate", subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + theme(legend.position = "none") print(plt3) plt4 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed)) + geom_line(aes(y = x_knearest)) + ylab("y") + ggtitle("vtreat k-nearest estimate", subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + theme(legend.position = "none") print(plt4) plt5 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed)) + geom_line(aes(y = x_PiecewiseLin)) + ylab("y") + ggtitle("vtreat RcppDynProg piecewise linear estimate", subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + theme(legend.position = "none") print(plt5) plt6 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed)) + geom_line(aes(y = x_PiecewiseC)) + ylab("y") + ggtitle("vtreat RcppDynProg piecewise constant estimate", subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + theme(legend.position = "none") print(plt6) plt7 <- ggplot(data= d, aes(x = x)) + geom_line(aes(y = y_ideal), linetype=2) + geom_point(aes(y = y_observed)) + geom_line(aes(y = x_spline)) + ylab("y") + ggtitle("spline estimate", subtitle = "dots: observed values, segments: observed group means, dashed line: unobserved true values") + theme(legend.position = "none") print(plt7)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.