inst/doc/segmentr.R

## ----setup, include = FALSE----------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----message=FALSE-------------------------------------------------------
require(segmentr)
require(tidyr)
require(tibble)
require(dplyr)
require(lubridate)
require(magrittr)
require(purrr)

## ------------------------------------------------------------------------
data(berlin)
as_tibble(berlin, rownames="station") %>%
  mutate(`..`="..") %>%
  select(station, `2010-01-01`:`2010-01-03`, `..`, `2011-12-29`:`2011-12-31`)

## ------------------------------------------------------------------------
berlin %>%
  colMeans() %>%
  enframe("time", "temperature") %>%
  mutate_at(vars(time), ymd) %>%
  with(plot(time, temperature, cex=0.2))

## ------------------------------------------------------------------------
plot_results <- function(results, data) {
  dates <- colnames(data) %>% ymd()
  
  data %>%
    colMeans() %>%
    enframe("time", "temperature") %>%
    mutate_at(vars(time), ymd) %>%
    with({
      plot(time, temperature, cex=0.2)
      abline(v=dates[results$changepoints], col="red", lty=2)
    })
}

plot_results(list(changepoints=c(200, 360, 570)), berlin)

## ------------------------------------------------------------------------
lm_likelihood <- function (data) {
  fit <- t(data) %>%
    as_tibble() %>%
    rowid_to_column() %>%
    gather(station, temperature, -rowid) %>%
    with(lm(temperature ~ rowid))
    
  -mean(fit$residuals ^ 2)
}

c(lm_likelihood(berlin[, 2:3]), lm_likelihood(berlin[, 1:150]), lm_likelihood(berlin))

## ------------------------------------------------------------------------
results <- segment(
  berlin,
  likelihood = lm_likelihood,
  algorithm = "hierarchical"
)

results

## ------------------------------------------------------------------------
plot_results(results, berlin)

## ------------------------------------------------------------------------
plot_curve <- function(expr, from, to, points = 100, plot_func=plot, ...) {
  x <- floor(seq(from, to, length.out = 100))
  y <- map_dbl(x, expr)
  plot_func(x, y, ...)
}

plot_curve(~ exp(0.3*(. - 50)) + exp(0.3 * (-. + 50)), from = 0, to = 100, type="l")

## ------------------------------------------------------------------------
penalized_likelihood <- auto_penalize(berlin, lm_likelihood)
results <- segment(
  berlin,
  likelihood = penalized_likelihood,
  algorithm = "hierarchical"
)
results

## ------------------------------------------------------------------------
plot_results(results, berlin)

## ------------------------------------------------------------------------
penalized_likelihood <- auto_penalize(berlin, lm_likelihood, big_segment_penalty = 1000)
results <- segment(
  berlin,
  likelihood = penalized_likelihood,
  algorithm = "hierarchical"
)
results

## ------------------------------------------------------------------------
plot_results(results, berlin)

## ------------------------------------------------------------------------
monthly_berlin <- berlin %>%
  as_tibble(rownames = "station") %>%
  gather(time, temperature, -station) %>%
  mutate(month = floor_date(ymd(time), "month")) %>%
  group_by(station, month) %>%
  summarize(temperature = mean(temperature)) %>%
  spread(month, temperature) %>% {
    stations <- .$station
    result <- as.matrix(.[, -1])
    rownames(result) <- stations
    result
  }

monthly_berlin %>%
  colMeans() %>%
  enframe("time", "temperature") %>%
  mutate_at(vars(time), ymd) %>%
  with(plot(time, temperature, cex=0.2))

## ------------------------------------------------------------------------
penalized_likelihood <- auto_penalize(monthly_berlin, lm_likelihood, small_segment_penalty = 100)

results <- segment(
  monthly_berlin,
  likelihood = penalized_likelihood,
  algorithm = "exact"
)

results

## ------------------------------------------------------------------------
plot_results(results, monthly_berlin)

## ------------------------------------------------------------------------
rsquared_likelihood <- function (data) {
  as_tibble(t(data)) %>%
    rowid_to_column() %>%
    gather(station, temperature, -rowid) %>%
    with(lm(temperature ~ rowid)) %>%
    summary %>%
    .$adj.r.squared
}

c(rsquared_likelihood(berlin[, 2:3]), rsquared_likelihood(berlin[, 1:150]), rsquared_likelihood(berlin))

## ------------------------------------------------------------------------
penalized_likelihood <- auto_penalize(berlin, rsquared_likelihood)
results <- segment(
  berlin,
  likelihood = penalized_likelihood,
  algorithm = "hierarchical"
)
results

## ------------------------------------------------------------------------
plot_results(results, berlin)

## ------------------------------------------------------------------------
penalized_likelihood <- auto_penalize(berlin, rsquared_likelihood, small_segment_penalty = 1.5)
results <- segment(
  berlin,
  likelihood = penalized_likelihood,
  algorithm = "hierarchical"
)
results

## ------------------------------------------------------------------------
plot_results(results, berlin)

## ------------------------------------------------------------------------
sub_berlin <- berlin[, 1:547]
penalized_likelihood <- auto_penalize(sub_berlin, rsquared_likelihood)
results <- segment(
  sub_berlin,
  likelihood = penalized_likelihood,
  algorithm = "hierarchical"
)
results

## ------------------------------------------------------------------------
plot_results(results, sub_berlin)

Try the segmentr package in your browser

Any scripts or data that you put into this service are public.

segmentr documentation built on Aug. 29, 2019, 1:03 a.m.