library("RcppDynProg")
library("ggplot2")

# Data from: 
# https://finance.yahoo.com/quote/%5EGSPC/history?period1=-630950400&period2=1546416000&interval=1d&filter=history&frequency=1d
sp500 <- read.csv("^GSPC.csv")
sp500$Date <- as.Date(sp500$Date)
sp500$x <- as.numeric(sp500$Date)
sp500$log_price <- log(sp500$Adj.Close)


sp500recent <- sp500[sp500$Date >= as.Date('2000-01-01'), ]


# find penalty
sp500recent$permuted <- sp500recent$log_price[sample.int(nrow(sp500recent), 
                                                         nrow(sp500recent), 
                                                         replace = FALSE)]
# look for a penalty that prefers 1 segment on permuted data
lb <- 0
ub <- 100
while(TRUE) {
  soln <- solve_for_partition(sp500recent$x, sp500recent$permuted, penalty = ub, max_k = 3)
  if(nrow(soln)==2) {
    break
  }
  lb <- ub
  ub <- 10*ub
}
while(TRUE) {
  penalty <- ceiling((lb+ub)/2)
  if(penalty>=ub) {
    break
  }
  soln <- solve_for_partition(sp500recent$x, sp500recent$permuted, penalty = penalty, max_k = 3)
  if(nrow(soln)==2) {
    ub <- penalty
  } else {
    lb <- penalty
  }
}
print(penalty)

soln <- solve_for_partition(sp500recent$x, sp500recent$log_price, penalty = penalty)
sp500recent$estimate <- exp(approx(soln$x, soln$pred, 
                             xout = sp500recent$x, 
                             method = "linear", rule = 2)$y)
sp500recent$group <- as.character(
  findInterval(sp500recent$x, soln[soln$what=="left", "x"]))

ggplot(data = sp500recent, aes(x = Date)) +
  geom_line(aes(y=Adj.Close), color = "darkgray") +
  geom_line(aes(y=estimate, color = group)) +
  ggtitle("segment approximation of historic sp500recent data",
          subtitle = paste("per-segment penalty =", penalty)) +
  theme(legend.position = "none") +
  scale_color_brewer(palette = "Dark2") +
  scale_y_log10()
penalty <- 1

soln <- solve_for_partition(sp500recent$x, sp500recent$log_price, penalty = penalty)
sp500recent$estimate <- exp(approx(soln$x, soln$pred, 
                             xout = sp500recent$x, 
                             method = "linear", rule = 2)$y)
sp500recent$group <- as.character(
  findInterval(sp500recent$x, soln[soln$what=="left", "x"]))

ggplot(data = sp500recent, aes(x = Date)) +
  geom_line(aes(y=Adj.Close), color = "darkgray") +
  geom_line(aes(y=estimate, color = group)) +
  ggtitle("segment approximation of historic sp500 data",
          subtitle = paste("per-segment penalty =", penalty)) +
  theme(legend.position = "none") +
  scale_color_brewer(palette = "Dark2") +
  scale_y_log10()
penalty <- 5

soln <- solve_for_partition(sp500$x, sp500$log_price, penalty = penalty)
sp500$estimate <- exp(approx(soln$x, soln$pred, 
                             xout = sp500$x, 
                             method = "linear", rule = 2)$y)

sp500$group <- as.character(
  findInterval(sp500$x, soln[soln$what=="left", "x"]))

ggplot(data = sp500, aes(x = Date)) +
  geom_line(aes(y=Adj.Close), color = "darkgray") +
  geom_line(aes(y=estimate, group = group), color = "darkgreen") +
  ggtitle("segment approximation of historic sp500 data",
          subtitle = paste("per-segment penalty =", penalty)) +
  theme(legend.position = "none") +
  scale_color_manual(values = colors) +
  scale_y_log10()

Naive gaps (TODO: need to find breakpoints that are good for the no-gap solution).

sl <- soln[soln$what=='left', ]
fit <- vtreat:::encode_x_as_lambdas(
  sp500$x, min(sp500$x), max(sp500$x),
  sl$x)
vars <- setdiff(colnames(fit), "intercept")
fit$y <- sp500$log_price
fmla <- wrapr::mk_formula("y", vars)
model <- lm(fmla, data = fit)
sp500$pred <- exp(predict(model, newdata = fit))

ggplot(data = sp500, aes(x = Date)) +
  geom_line(aes(y=Adj.Close), color = "darkgray") +
  geom_line(aes(y=pred), color = "darkgreen") +
  ggtitle("segment approximation (no gaps) of historic sp500 data",
          subtitle = paste("per-segment penalty =", penalty)) +
  theme(legend.position = "none") +
  scale_color_brewer(palette = "Dark2") +
  scale_y_log10()

Fit piecewise constant on delta series.

penalty <- 0.01

sp500$delta_log_price <- c(0, sp500$log_price[-1] - sp500$log_price[-nrow(sp500)])

soln <- solve_for_partitionc(sp500$x, sp500$delta_log_price, penalty = penalty)
sl <- soln[soln$what=='left', ]
fit <- vtreat:::encode_x_as_lambdas(
  sp500$x, min(sp500$x), max(sp500$x),
  sl$x)
vars <- setdiff(colnames(fit), "intercept")
fit$y <- sp500$log_price
fmla <- wrapr::mk_formula("y", vars)
model <- lm(fmla, data = fit)
sp500$pred <- exp(predict(model, newdata = fit))

ggplot(data = sp500, aes(x = Date)) +
  geom_line(aes(y=Adj.Close), color = "darkgray") +
  geom_line(aes(y=pred), color = "darkgreen") +
  ggtitle("segment approximation (slope ests, no gaps) of historic sp500 data",
          subtitle = paste("per-segment penalty =", penalty)) +
  theme(legend.position = "none") +
  scale_color_brewer(palette = "Dark2") +
  scale_y_log10()


WinVector/RcppDynProg documentation built on Aug. 26, 2023, 4:47 a.m.