comparison-pelt.R
In fastcpd: Fast Change Point Detection via Sequential Gradient Descent

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>", eval = FALSE, cache = FALSE,
  warning = FALSE, fig.width = 8, fig.height = 5
)
library(fastcpd)

## ----logistic-regression-setup------------------------------------------------
#  #' Cost function for Logistic regression, i.e. binomial family in GLM.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_binomial <- function(data, family = "binomial") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(
#      as.matrix(data[, 1:p]), data[, p + 1],
#      family = family
#    )
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for logistic regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_binomial <- function(data, beta, cost = cost_glm_binomial) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ]))
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#    }
#    cp <- cp_set[[n + 1]]
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_logistic_update <- function(
#      data_new, coef, cum_coef, cmatrix, epsilon = 1e-10) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- 1 / (1 + exp(-eta))
#    cmatrix <- cmatrix + (X_new %o% X_new) * as.numeric((1 - mu) * mu)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_binomial <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + log(1 + exp(u))
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_binomial <- function(data, beta, B = 10, trim = 0.025) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(
#        as.matrix(data[index == i, 1:p]),
#        data[index == i, p + 1],
#        family = "binomial"
#      )
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta / (1 + e_eta)^2
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_logistic_update(data[t, ], coef_c, cum_coef_c, cmatrix_c)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) {
#          cval[i] <-
#            neg_log_lik_binomial(data[k:t, ], cum_coef[, i] / (t - k + 1))
#        }
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t / (1 + e_eta_t)^2
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (seq_len(length(cp)))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      cp <- cp[-ind3]
#    }
#  
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }

## ----poisson-regression-setup-------------------------------------------------
#  #' Cost function for Poisson regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_poisson <- function(data, family = "poisson") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(as.matrix(data[, 1:p]), data[, p + 1], family = family)
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for poisson regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_poisson <- function(data, beta, cost = cost_glm_poisson) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ])) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      # if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_poisson_update <- function(data_new, coef, cum_coef, cmatrix, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- exp(eta)
#    cmatrix <- cmatrix + (X_new %o% X_new) * min(as.numeric(mu), G)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    coef <- pmin(pmax(coef, L), H)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_poisson <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + exp(u) + lfactorial(Y)
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_poisson <- function(data, beta, B = 10, trim = 0.03, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(x = as.matrix(data[index == i, 1:p]), y = data[index == i, p + 1], family = "poisson")
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- pmin(pmax(matrix(coef.int[1, ], p, 1), L), H)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_poisson_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, epsilon = epsilon, G = G, L = L, H = H)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cum_coef_win <- pmin(pmax(cum_coef[, i] / (t - k + 1), L), H)
#        if (t - k >= p - 1) cval[i] <- neg_log_lik_poisson(data[k:t, ], cum_coef_win) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- pmin(pmax(coef.int[index[t], ], L), H)
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #   seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #   data_seg <- data[seg,]
#    #   out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="Poisson")
#    #   nLL <- out$deviance/2 + nLL
#    # }
#  
#    # output <- list(cp, nLL)
#    # names(output) <- c("cp", "nLL")
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  # Generate data from poisson regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_poisson <- function(n, d, true.coef, true.cp.loc, Sigma) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      mu <- exp(x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE])
#      group <- rpois(length(mu), mu)
#      y <- c(y, group)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }

## ----penalized-linear-regression-setup----------------------------------------
#  #' Cost function for penalized linear regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param lambda Penalty coefficient.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_lasso <- function(data, lambda, family = "gaussian") {
#    data <- as.matrix(data)
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    out <- glmnet::glmnet(as.matrix(data[, 1:p]), data[, p + 1], family = family, lambda = lambda)
#    return(deviance(out) / 2)
#  }
#  
#  #' Implementation of vanilla PELT for penalized linear regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_lasso <- function(data, beta, B = 10, cost = cost_lasso, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    index <- rep(1:B, rep(n / B, B))
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef)
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= 1) cval[i] <- suppressWarnings(cost(data[k:t, ], lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- glmnet(as.matrix(data_seg[, 1:p]), data_seg[, p+1], lambda=lambda, family=family)
#    #  nLL <- deviance(out)/2 + nLL
#    # }
#    # output <- list(cp, nLL)
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #' @param a Coefficient to be updated.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Updated coefficient.
#  soft_threshold <- function(a, lambda) {
#    sign(a) * pmax(abs(a) - lambda, 0)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_lasso_update <- function(data_new, coef, cum_coef, cmatrix, lambda) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    mu <- X_new %*% coef
#    cmatrix <- cmatrix + X_new %o% X_new
#    # B <- as.vector(cmatrix_inv%*%X_new)
#    # cmatrix_inv <- cmatrix_inv - B%o%B/(1+sum(X_new*B))
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix, lik_dev)
#    nc <- norm(cmatrix, type = "F") # the choice of norm affects the speed. Spectral norm is more accurate but slower than F norm.
#    coef <- soft_threshold(coef, lambda / nc)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_lasso <- function(data, b, lambda) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    resi <- Y - X %*% b
#    L <- sum(resi^2) / 2 + lambda * sum(abs(b))
#    return(L)
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_lasso <- function(data, beta, B = 10, trim = 0.025, epsilon = 1e-5, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef.int[i, ] <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef.int[i, ])
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef.int[i, ]) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    eta <- coef %*% X1
#    # c_int <- diag(1/epsilon,p) - X1%o%X1/epsilon^2/(1+sum(X1^2)/epsilon)
#    # cmatrix_inv <- array(c_int, c(p,p,1))
#    cmatrix <- array(X1 %o% X1 + epsilon * diag(1, p), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        # cmatrix_inv_c <- cmatrix_inv[,,i]
#        cmatrix_c <- cmatrix[, , i]
#        k <- set[i] + 1
#        out <- cost_lasso_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        # cmatrix_inv[,,i] <- out[[3]]
#        cmatrix[, , i] <- out[[3]]
#        if (t - k >= 2) cval[i] <- neg_log_lik_lasso(data[k:t, ], cum_coef[, i] / (t - k + 1), lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1))) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      # cmatrix_inv_add <- diag(1/epsilon,p) - Xt%o%Xt/epsilon^2/(1+sum(Xt^2)/epsilon)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      # cmatrix_inv <- abind::abind(cmatrix_inv, cmatrix_inv_add, along=3)
#      cmatrix <- abind::abind(cmatrix, Xt %o% Xt + epsilon * diag(1, p), along = 3)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      # cmatrix_inv <- cmatrix_inv[,,ind2,drop=FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries and merge change-points
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="binomial")
#    #  nLL <- out$deviance/2 + nLL
#    # }
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  # Generate data from penalized linear regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @param evar Error variance.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_lasso <- function(n, d, true.coef, true.cp.loc, Sigma, evar) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      Xb <- x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE]
#      add <- Xb + rnorm(length(Xb), sd = sqrt(evar))
#      y <- c(y, add)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }

## ----logistic-regression------------------------------------------------------
#  set.seed(1)
#  p <- 5
#  x <- matrix(rnorm(300 * p, 0, 1), ncol = p)
#  
#  # Randomly generate coefficients with different means.
#  theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
#  
#  # Randomly generate response variables based on the segmented data and
#  # corresponding coefficients
#  y <- c(
#    rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
#    rbinom(300 - 125, 1, 1 / (1 + exp(-x[(125 + 1):300, ] %*% theta[2, ])))
#  )
#  
#  segd_binomial(cbind(x, y), (p + 1) * log(300) / 2, B = 5)$cp
#  #> [1] 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125
#  
#  pelt_vanilla_binomial(cbind(x, y), (p + 1) * log(300) / 2)$cp
#  #> [1]   0 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    vanilla_percentage = 1,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125

## ----poisson-regression-------------------------------------------------------
#  set.seed(1)
#  n <- 1500
#  d <- 5
#  rho <- 0.9
#  Sigma <- array(0, c(d, d))
#  for (i in 1:d) {
#    Sigma[i, ] <- rho^(abs(i - (1:d)))
#  }
#  delta <- c(5, 7, 9, 11, 13)
#  a.sq <- 1
#  delta.new <-
#    delta * sqrt(a.sq) / sqrt(as.numeric(t(delta) %*% Sigma %*% delta))
#  true.cp.loc <- c(375, 750, 1125)
#  
#  # regression coefficients
#  true.coef <- matrix(0, nrow = d, ncol = length(true.cp.loc) + 1)
#  true.coef[, 1] <- c(1, 1.2, -1, 0.5, -2)
#  true.coef[, 2] <- true.coef[, 1] + delta.new
#  true.coef[, 3] <- true.coef[, 1]
#  true.coef[, 4] <- true.coef[, 3] - delta.new
#  
#  out <- data_gen_poisson(n, d, true.coef, true.cp.loc, Sigma)
#  data <- out[[1]]
#  g_tr <- out[[2]]
#  beta <- log(n) * (d + 1) / 2
#  
#  segd_poisson(
#    data, beta, trim = 0.03, B = 10, epsilon = 0.001, G = 10^10, L = -20, H = 20
#  )$cp
#  #> [1]  380  751 1136 1251
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    beta = beta,
#    cost_adjustment = "BIC",
#    epsilon = 0.001,
#    segment_count = 10,
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  380  751 1136 1251
#  
#  pelt_vanilla_poisson(data, beta)$cp
#  #> [1]    0  374  752 1133
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    segment_count = 10,
#    vanilla_percentage = 1,
#    beta = beta,
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  374  752 1133

## ----penalized-linear-regression----------------------------------------------
#  set.seed(1)
#  n <- 1000
#  s <- 3
#  d <- 50
#  evar <- 0.5
#  Sigma <- diag(1, d)
#  true.cp.loc <- c(100, 300, 500, 800, 900)
#  seg <- length(true.cp.loc) + 1
#  true.coef <- matrix(rnorm(seg * s), s, seg)
#  true.coef <- rbind(true.coef, matrix(0, d - s, seg))
#  out <- data_gen_lasso(n, d, true.coef, true.cp.loc, Sigma, evar)
#  data <- out[[1]]
#  beta <- log(n) / 2 # beta here has different meaning
#  
#  segd_lasso(data, beta, B = 10, trim = 0.025)$cp
#  #> [1] 100 300 520 800 901
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 100 300 520 800 901
#  
#  pelt_vanilla_lasso(data, beta, cost = cost_lasso)$cp
#  #> [1] 100
#  #> [1] 200
#  #> [1] 300
#  #> [1] 400
#  #> [1] 500
#  #> [1] 600
#  #> [1] 700
#  #> [1] 800
#  #> [1] 900
#  #> [1] 1000
#  #> [1]   0 103 299 510 800 900
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    vanilla_percentage = 1,
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 103 299 510 800 900

## ----ref.label = knitr::all_labels(), echo = TRUE-----------------------------
#  knitr::opts_chunk$set(
#    collapse = TRUE, comment = "#>", eval = FALSE, cache = FALSE,
#    warning = FALSE, fig.width = 8, fig.height = 5
#  )
#  library(fastcpd)
#  #' Cost function for Logistic regression, i.e. binomial family in GLM.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_binomial <- function(data, family = "binomial") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(
#      as.matrix(data[, 1:p]), data[, p + 1],
#      family = family
#    )
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for logistic regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_binomial <- function(data, beta, cost = cost_glm_binomial) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ]))
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#    }
#    cp <- cp_set[[n + 1]]
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_logistic_update <- function(
#      data_new, coef, cum_coef, cmatrix, epsilon = 1e-10) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- 1 / (1 + exp(-eta))
#    cmatrix <- cmatrix + (X_new %o% X_new) * as.numeric((1 - mu) * mu)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_binomial <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + log(1 + exp(u))
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_binomial <- function(data, beta, B = 10, trim = 0.025) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(
#        as.matrix(data[index == i, 1:p]),
#        data[index == i, p + 1],
#        family = "binomial"
#      )
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta / (1 + e_eta)^2
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_logistic_update(data[t, ], coef_c, cum_coef_c, cmatrix_c)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cval[i] <- 0
#        if (t - k >= p - 1) {
#          cval[i] <-
#            neg_log_lik_binomial(data[k:t, ], cum_coef[, i] / (t - k + 1))
#        }
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t / (1 + e_eta_t)^2
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (seq_len(length(cp)))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      cp <- cp[-ind3]
#    }
#  
#    nLL <- 0
#    cp_loc <- unique(c(0, cp, n))
#    for (i in 1:(length(cp_loc) - 1))
#    {
#      seg <- (cp_loc[i] + 1):cp_loc[i + 1]
#      data_seg <- data[seg, ]
#      out <- fastglm::fastglm(
#        as.matrix(data_seg[, 1:p]), data_seg[, p + 1],
#        family = "binomial"
#      )
#      nLL <- out$deviance / 2 + nLL
#    }
#  
#    output <- list(cp, nLL)
#    names(output) <- c("cp", "nLL")
#    return(output)
#  }
#  #' Cost function for Poisson regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_glm_poisson <- function(data, family = "poisson") {
#    data <- as.matrix(data)
#    p <- dim(data)[2] - 1
#    out <- fastglm::fastglm(as.matrix(data[, 1:p]), data[, p + 1], family = family)
#    return(out$deviance / 2)
#  }
#  
#  #' Implementation of vanilla PELT for poisson regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_poisson <- function(data, beta, cost = cost_glm_poisson) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= p - 1) cval[i] <- suppressWarnings(cost(data[k:t, ])) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      # if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'     the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_poisson_update <- function(data_new, coef, cum_coef, cmatrix, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    eta <- X_new %*% coef
#    mu <- exp(eta)
#    cmatrix <- cmatrix + (X_new %o% X_new) * min(as.numeric(mu), G)
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix + epsilon * diag(1, p), lik_dev)
#    coef <- pmin(pmax(coef, L), H)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_poisson <- function(data, b) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    u <- as.numeric(X %*% b)
#    L <- -Y * u + exp(u) + lfactorial(Y)
#    return(sum(L))
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param G Upper bound for the coefficient.
#  #' @param L Winsorization lower bound.
#  #' @param H Winsorization upper bound.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_poisson <- function(data, beta, B = 10, trim = 0.03, epsilon = 0.001, G = 10^10, L = -20, H = 20) {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    for (i in 1:B)
#    {
#      out <- fastglm::fastglm(x = as.matrix(data[index == i, 1:p]), y = data[index == i, p + 1], family = "poisson")
#      coef.int[i, ] <- coef(out)
#    }
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- pmin(pmax(matrix(coef.int[1, ], p, 1), L), H)
#    e_eta <- exp(coef %*% X1)
#    const <- e_eta
#    cmatrix <- array((X1 %o% X1) * as.numeric(const), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        cmatrix_c <- cmatrix[, , i]
#        out <- cost_poisson_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, epsilon = epsilon, G = G, L = L, H = H)
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        cmatrix[, , i] <- out[[3]]
#        k <- set[i] + 1
#        cum_coef_win <- pmin(pmax(cum_coef[, i] / (t - k + 1), L), H)
#        if (t - k >= p - 1) cval[i] <- neg_log_lik_poisson(data[k:t, ], cum_coef_win) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- pmin(pmax(coef.int[index[t], ], L), H)
#      e_eta_t <- exp(coef_add %*% Xt)
#      const <- e_eta_t
#      cmatrix_add <- (Xt %o% Xt) * as.numeric(const)
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      cmatrix <- abind::abind(cmatrix, cmatrix_add, along = 3)
#  
#      # Adding a momentum term (TBD)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #   seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #   data_seg <- data[seg,]
#    #   out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="Poisson")
#    #   nLL <- out$deviance/2 + nLL
#    # }
#  
#    # output <- list(cp, nLL)
#    # names(output) <- c("cp", "nLL")
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#  
#    return(output)
#  }
#  
#  # Generate data from poisson regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_poisson <- function(n, d, true.coef, true.cp.loc, Sigma) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      mu <- exp(x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE])
#      group <- rpois(length(mu), mu)
#      y <- c(y, group)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }
#  #' Cost function for penalized linear regression.
#  #'
#  #' @param data Data to be used to calculate the cost values. The last column is
#  #'     the response variable.
#  #' @param lambda Penalty coefficient.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Cost value for the corresponding segment of data.
#  cost_lasso <- function(data, lambda, family = "gaussian") {
#    data <- as.matrix(data)
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    out <- glmnet::glmnet(as.matrix(data[, 1:p]), data[, p + 1], family = family, lambda = lambda)
#    return(deviance(out) / 2)
#  }
#  
#  #' Implementation of vanilla PELT for penalized linear regression type data.
#  #'
#  #' @param data Data to be used for change point detection.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param cost Cost function to be used to calculate cost values.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list consisting of two: change point locations and negative log
#  #'     likelihood values for each segment.
#  pelt_vanilla_lasso <- function(data, beta, B = 10, cost = cost_lasso, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    index <- rep(1:B, rep(n / B, B))
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef)
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#      for (i in 1:m)
#      {
#        k <- set[i] + 1
#        if (t - k >= 1) cval[i] <- suppressWarnings(cost(data[k:t, ], lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))) else cval[i] <- 0
#      }
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      Fobj <- c(Fobj, min_val)
#      if (t %% 100 == 0) print(t)
#    }
#    cp <- cp_set[[n + 1]]
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- glmnet(as.matrix(data_seg[, 1:p]), data_seg[, p+1], lambda=lambda, family=family)
#    #  nLL <- deviance(out)/2 + nLL
#    # }
#    # output <- list(cp, nLL)
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #' @param a Coefficient to be updated.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Updated coefficient.
#  soft_threshold <- function(a, lambda) {
#    sign(a) * pmax(abs(a) - lambda, 0)
#  }
#  
#  #' Function to update the coefficients using gradient descent.
#  #'
#  #' @param data_new New data point used to update the coeffient.
#  #' @param coef Previous coeffient to be updated.
#  #' @param cum_coef Summation of all the past coefficients to be used in
#  #'     averaging.
#  #' @param cmatrix Hessian matrix in gradient descent.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list of values containing the new coefficients, summation of
#  #'     coefficients so far and all the Hessian matrices.
#  cost_lasso_update <- function(data_new, coef, cum_coef, cmatrix, lambda) {
#    p <- length(data_new) - 1
#    X_new <- data_new[1:p]
#    Y_new <- data_new[p + 1]
#    mu <- X_new %*% coef
#    cmatrix <- cmatrix + X_new %o% X_new
#    # B <- as.vector(cmatrix_inv%*%X_new)
#    # cmatrix_inv <- cmatrix_inv - B%o%B/(1+sum(X_new*B))
#    lik_dev <- as.numeric(-(Y_new - mu)) * X_new
#    coef <- coef - solve(cmatrix, lik_dev)
#    nc <- norm(cmatrix, type = "F") # the choice of norm affects the speed. Spectral norm is more accurate but slower than F norm.
#    coef <- soft_threshold(coef, lambda / nc)
#    cum_coef <- cum_coef + coef
#    return(list(coef, cum_coef, cmatrix))
#  }
#  
#  #' Calculate negative log likelihood given data segment and guess of
#  #' coefficient.
#  #'
#  #' @param data Data to be used to calculate the negative log likelihood.
#  #' @param b Guess of the coefficient.
#  #' @param lambda Penalty coefficient.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return Negative log likelihood.
#  neg_log_lik_lasso <- function(data, b, lambda) {
#    p <- dim(data)[2] - 1
#    X <- data[, 1:p, drop = FALSE]
#    Y <- data[, p + 1, drop = FALSE]
#    resi <- Y - X %*% b
#    L <- sum(resi^2) / 2 + lambda * sum(abs(b))
#    return(L)
#  }
#  
#  #' Find change points using dynamic programming with pruning and SeGD.
#  #'
#  #' @param data Data used to find change points.
#  #' @param beta Penalty coefficient for the number of change points.
#  #' @param B Initial guess on the number of change points.
#  #' @param trim Propotion of the data to ignore the change points at the
#  #'     beginning, ending and between change points.
#  #' @param epsilon Small adjustment to avoid singularity when doing inverse on
#  #'    the Hessian matrix.
#  #' @param family Family of the distribution.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing potential change point locations and negative log
#  #'     likelihood for each segment based on the change points guess.
#  segd_lasso <- function(data, beta, B = 10, trim = 0.025, epsilon = 1e-5, family = "gaussian") {
#    n <- dim(data)[1]
#    p <- dim(data)[2] - 1
#    Fobj <- c(-beta, 0)
#    cp_set <- list(NULL, 0)
#    set <- c(0, 1)
#  
#    # choose the initial values based on pre-segmentation
#  
#    index <- rep(1:B, rep(n / B, B))
#    coef.int <- matrix(NA, B, p)
#    err_sd <- act_num <- rep(NA, B)
#    for (i in 1:B)
#    {
#      cvfit <- glmnet::cv.glmnet(as.matrix(data[index == i, 1:p]), data[index == i, p + 1], family = family)
#      coef.int[i, ] <- coef(cvfit, s = "lambda.1se")[-1]
#      resi <- data[index == i, p + 1] - as.matrix(data[index == i, 1:p]) %*% as.numeric(coef.int[i, ])
#      err_sd[i] <- sqrt(mean(resi^2))
#      act_num[i] <- sum(abs(coef.int[i, ]) > 0)
#    }
#    err_sd_mean <- mean(err_sd) # only works if error sd is unchanged.
#    act_num_mean <- mean(act_num)
#    beta <- (act_num_mean + 1) * beta # seems to work but there might be better choices
#  
#    X1 <- data[1, 1:p]
#    cum_coef <- coef <- matrix(coef.int[1, ], p, 1)
#    eta <- coef %*% X1
#    # c_int <- diag(1/epsilon,p) - X1%o%X1/epsilon^2/(1+sum(X1^2)/epsilon)
#    # cmatrix_inv <- array(c_int, c(p,p,1))
#    cmatrix <- array(X1 %o% X1 + epsilon * diag(1, p), c(p, p, 1))
#  
#    for (t in 2:n)
#    {
#      m <- length(set)
#      cval <- rep(NA, m)
#  
#      for (i in 1:(m - 1))
#      {
#        coef_c <- coef[, i]
#        cum_coef_c <- cum_coef[, i]
#        # cmatrix_inv_c <- cmatrix_inv[,,i]
#        cmatrix_c <- cmatrix[, , i]
#        k <- set[i] + 1
#        out <- cost_lasso_update(data[t, ], coef_c, cum_coef_c, cmatrix_c, lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1)))
#        coef[, i] <- out[[1]]
#        cum_coef[, i] <- out[[2]]
#        # cmatrix_inv[,,i] <- out[[3]]
#        cmatrix[, , i] <- out[[3]]
#        if (t - k >= 2) cval[i] <- neg_log_lik_lasso(data[k:t, ], cum_coef[, i] / (t - k + 1), lambda = err_sd_mean * sqrt(2 * log(p) / (t - k + 1))) else cval[i] <- 0
#      }
#  
#      # the choice of initial values requires further investigation
#  
#      cval[m] <- 0
#      Xt <- data[t, 1:p]
#      cum_coef_add <- coef_add <- coef.int[index[t], ]
#      # cmatrix_inv_add <- diag(1/epsilon,p) - Xt%o%Xt/epsilon^2/(1+sum(Xt^2)/epsilon)
#  
#      coef <- cbind(coef, coef_add)
#      cum_coef <- cbind(cum_coef, cum_coef_add)
#      # cmatrix_inv <- abind::abind(cmatrix_inv, cmatrix_inv_add, along=3)
#      cmatrix <- abind::abind(cmatrix, Xt %o% Xt + epsilon * diag(1, p), along = 3)
#  
#      obj <- cval + Fobj[set + 1] + beta
#      min_val <- min(obj)
#      ind <- which(obj == min_val)[1]
#      cp_set_add <- c(cp_set[[set[ind] + 1]], set[ind])
#      cp_set <- append(cp_set, list(cp_set_add))
#      ind2 <- (cval + Fobj[set + 1]) <= min_val
#      set <- c(set[ind2], t)
#      coef <- coef[, ind2, drop = FALSE]
#      cum_coef <- cum_coef[, ind2, drop = FALSE]
#      cmatrix <- cmatrix[, , ind2, drop = FALSE]
#      # cmatrix_inv <- cmatrix_inv[,,ind2,drop=FALSE]
#      Fobj <- c(Fobj, min_val)
#    }
#  
#    # Remove change-points close to the boundaries and merge change-points
#  
#    cp <- cp_set[[n + 1]]
#    if (length(cp) > 0) {
#      ind3 <- (1:length(cp))[(cp < trim * n) | (cp > (1 - trim) * n)]
#      if (length(ind3) > 0) cp <- cp[-ind3]
#    }
#  
#    cp <- sort(unique(c(0, cp)))
#    index <- which((diff(cp) < trim * n) == TRUE)
#    if (length(index) > 0) cp <- floor((cp[-(index + 1)] + cp[-index]) / 2)
#    cp <- cp[cp > 0]
#  
#    # nLL <- 0
#    # cp_loc <- unique(c(0,cp,n))
#    # for(i in 1:(length(cp_loc)-1))
#    # {
#    #  seg <- (cp_loc[i]+1):cp_loc[i+1]
#    #  data_seg <- data[seg,]
#    #  out <- fastglm(as.matrix(data_seg[, 1:p]), data_seg[, p+1], family="binomial")
#    #  nLL <- out$deviance/2 + nLL
#    # }
#  
#    output <- list(cp)
#    names(output) <- c("cp")
#    return(output)
#  }
#  
#  # Generate data from penalized linear regression models with change-points
#  #' @param n Number of observations.
#  #' @param d Dimension of the covariates.
#  #' @param true.coef True regression coefficients.
#  #' @param true.cp.loc True change-point locations.
#  #' @param Sigma Covariance matrix of the covariates.
#  #' @param evar Error variance.
#  #' @keywords internal
#  #'
#  #' @noRd
#  #' @return A list containing the generated data and the true cluster
#  #'    assignments.
#  data_gen_lasso <- function(n, d, true.coef, true.cp.loc, Sigma, evar) {
#    loc <- unique(c(0, true.cp.loc, n))
#    if (dim(true.coef)[2] != length(loc) - 1) stop("true.coef and true.cp.loc do not match")
#    x <- mvtnorm::rmvnorm(n, mean = rep(0, d), sigma = Sigma)
#    y <- NULL
#    for (i in 1:(length(loc) - 1))
#    {
#      Xb <- x[(loc[i] + 1):loc[i + 1], , drop = FALSE] %*% true.coef[, i, drop = FALSE]
#      add <- Xb + rnorm(length(Xb), sd = sqrt(evar))
#      y <- c(y, add)
#    }
#    data <- cbind(x, y)
#    true_cluster <- rep(1:(length(loc) - 1), diff(loc))
#    result <- list(data, true_cluster)
#    return(result)
#  }
#  set.seed(1)
#  p <- 5
#  x <- matrix(rnorm(300 * p, 0, 1), ncol = p)
#  
#  # Randomly generate coefficients with different means.
#  theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
#  
#  # Randomly generate response variables based on the segmented data and
#  # corresponding coefficients
#  y <- c(
#    rbinom(125, 1, 1 / (1 + exp(-x[1:125, ] %*% theta[1, ]))),
#    rbinom(300 - 125, 1, 1 / (1 + exp(-x[(125 + 1):300, ] %*% theta[2, ])))
#  )
#  
#  segd_binomial(cbind(x, y), (p + 1) * log(300) / 2, B = 5)$cp
#  #> [1] 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125
#  
#  pelt_vanilla_binomial(cbind(x, y), (p + 1) * log(300) / 2)$cp
#  #> [1]   0 125
#  
#  fastcpd.binomial(
#    cbind(y, x),
#    segment_count = 5,
#    vanilla_percentage = 1,
#    beta = "BIC",
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 125
#  set.seed(1)
#  n <- 1500
#  d <- 5
#  rho <- 0.9
#  Sigma <- array(0, c(d, d))
#  for (i in 1:d) {
#    Sigma[i, ] <- rho^(abs(i - (1:d)))
#  }
#  delta <- c(5, 7, 9, 11, 13)
#  a.sq <- 1
#  delta.new <-
#    delta * sqrt(a.sq) / sqrt(as.numeric(t(delta) %*% Sigma %*% delta))
#  true.cp.loc <- c(375, 750, 1125)
#  
#  # regression coefficients
#  true.coef <- matrix(0, nrow = d, ncol = length(true.cp.loc) + 1)
#  true.coef[, 1] <- c(1, 1.2, -1, 0.5, -2)
#  true.coef[, 2] <- true.coef[, 1] + delta.new
#  true.coef[, 3] <- true.coef[, 1]
#  true.coef[, 4] <- true.coef[, 3] - delta.new
#  
#  out <- data_gen_poisson(n, d, true.coef, true.cp.loc, Sigma)
#  data <- out[[1]]
#  g_tr <- out[[2]]
#  beta <- log(n) * (d + 1) / 2
#  
#  segd_poisson(
#    data, beta, trim = 0.03, B = 10, epsilon = 0.001, G = 10^10, L = -20, H = 20
#  )$cp
#  #> [1]  380  751 1136 1251
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    beta = beta,
#    cost_adjustment = "BIC",
#    epsilon = 0.001,
#    segment_count = 10,
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  380  751 1136 1251
#  
#  pelt_vanilla_poisson(data, beta)$cp
#  #> [1]    0  374  752 1133
#  
#  fastcpd.poisson(
#    cbind(data[, d + 1], data[, 1:d]),
#    segment_count = 10,
#    vanilla_percentage = 1,
#    beta = beta,
#    cost_adjustment = "BIC",
#    r.progress = FALSE
#  )@cp_set
#  #> [1]  374  752 1133
#  set.seed(1)
#  n <- 1000
#  s <- 3
#  d <- 50
#  evar <- 0.5
#  Sigma <- diag(1, d)
#  true.cp.loc <- c(100, 300, 500, 800, 900)
#  seg <- length(true.cp.loc) + 1
#  true.coef <- matrix(rnorm(seg * s), s, seg)
#  true.coef <- rbind(true.coef, matrix(0, d - s, seg))
#  out <- data_gen_lasso(n, d, true.coef, true.cp.loc, Sigma, evar)
#  data <- out[[1]]
#  beta <- log(n) / 2 # beta here has different meaning
#  
#  segd_lasso(data, beta, B = 10, trim = 0.025)$cp
#  #> [1] 100 300 520 800 901
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 100 300 520 800 901
#  
#  pelt_vanilla_lasso(data, beta, cost = cost_lasso)$cp
#  #> [1] 100
#  #> [1] 200
#  #> [1] 300
#  #> [1] 400
#  #> [1] 500
#  #> [1] 600
#  #> [1] 700
#  #> [1] 800
#  #> [1] 900
#  #> [1] 1000
#  #> [1]   0 103 299 510 800 900
#  
#  fastcpd.lasso(
#    cbind(data[, d + 1], data[, 1:d]),
#    vanilla_percentage = 1,
#    epsilon = 1e-5,
#    beta = beta,
#    cost_adjustment = "BIC",
#    pruning_coef = 0,
#    r.progress = FALSE
#  )@cp_set
#  #> [1] 103 299 510 800 900
Any scripts or data that you put into this service are public.
fastcpd documentation built on May 29, 2024, 8:36 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fastcpd
Fast Change Point Detection via Sequential Gradient Descent

inst/doc/comparison-pelt.R
In fastcpd: Fast Change Point Detection via Sequential Gradient Descent

Try the fastcpd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastcpd Fast Change Point Detection via Sequential Gradient Descent

inst/doc/comparison-pelt.R In fastcpd: Fast Change Point Detection via Sequential Gradient Descent

Try the fastcpd package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastcpd
Fast Change Point Detection via Sequential Gradient Descent

inst/doc/comparison-pelt.R
In fastcpd: Fast Change Point Detection via Sequential Gradient Descent