R/sid.R

Defines functions sid

Documented in sid

#' @title Deconvolution
#'
#' @description Deconvolution for time series
#' @param df
#' @param clip_lambda
#' @param use_local_level
#' @param h
#'
#' @return
#' @export
#'
#' @import magrittr
#' @importFrom tibble enframe
#' @importFrom dplyr group_by tally left_join mutate select bind_cols filter
#' @importFrom tidyr pivot_longer
#' @importFrom purrr pull
#'
#' @examples
#' \dontrun{
#' train %>%
#'  filter(key == 21063431) %>%
#'  select(key, value) %>%
#'  group_by(key) %>%
#'  sid(use_local_level = TRUE, h = 5)
#'  }
sid <- function(df, clip_lambda = 20, use_local_level = TRUE, h = 5){

  # clip_lambda is a helper that restrict the right side of the parameter space
  # use_local_level is true if the mean is to be found via local level trend
  # if false then global mean is used.

  # dataframe of value and tally
  df_counts <- df %>%
    group_by(value) %>%
    tally()
  # seq value to fill missings with 0
  df_filled <- seq(min(df_counts$value), max(df_counts$value)) %>%
    enframe(name = NULL) %>%
    left_join(df_counts, by = "value") %>%
    mutate(n = ifelse(is.na(n), 0, n))
  #get counts
  counts <- df_filled %>%
    pull(n)
  # get N of counts
  N <- length(counts)
  # do deconvolution
  lambda <- seq(-4, 5.0, .01)
  tau <- exp(lambda)
  result <- deconv(tau = tau, y = counts, n = N, c0=1, pDegree = 6, ignoreZero = TRUE)
  stats <- result$stats
  g <- result$stats[,"g"]
  g <- g/sum(g)
  fE <- result$P %*% g
  fE <- counts[1] * fE/fE[1]
  log_counts <- log(counts)

  d <- data.frame(lambda = lambda, g = stats[, "g"], SE.g = stats[, "SE.g"], tg = stats[, "tg"])
  gPost <- sapply(seq_len(N), function(i) local({tg <- d$tg * result$P[i, ]; tg / sum(tg)}))
  # df_post is a lookup table of posteriors. The mean must be found either globally or by
  # local level optimizations
  df_post <- as_tibble(gPost) %>%
    cbind(theta = result$stats[, 'theta']) %>%
    tidyr::pivot_longer(cols = -theta, names_to = "mean", values_to = "gPost") %>%
    mutate(mean = as.factor(as.integer(str_sub(mean, start = 2L)) - 1L)) %>%
    select(mean, theta, gPost) %>%
    filter(theta < clip_lambda)

  # choose the correct mean
  if(use_local_level == TRUE){
    value <- df$value

    S <- function(x) {
      a <- x[1]
      if(a < 0 | a > 1)
        return(Inf)
      n <- length(value)
      lambda <- numeric(n+1)
      error <- numeric(n)
      lambda[1] <- x[2]
      for(i in 1:n) {
        error[i]    <- value[i]-lambda[i]
        lambda[i+1] <- (1-a)*lambda[i] + a*value[i]
      }
      return(sum(error^2))
    }

    #op <- optim(par = c(0.5,2L), S, lower=c(0,0), upper=c(1,Inf), method = "L-BFGS-B")
    op <- optim(c(0.5,2L), S)
    alpha <- op$par[1]
    lvl <- op$par[2]
    nrs <- length(value)
    lvl_vector = c(lvl)

    for(i in 1:length(value)){
      lvl_vector[i + 1] <- alpha * value[i] + (1 - alpha) * lvl_vector[i]
    }

    lambda <- last(lvl_vector[-length(lvl_vector)]) # for some reason have a trailing value
    h <- h
    y <- numeric(h)
    for(i in 1:h)
    {
      y[i] <- rpois(1,lambda) # introduces randomness in the forecasted mean
      lambda <- (1-alpha)*lambda + alpha*y[i]
    }

    seq_h <- seq(from = 1, to = h)
    vec_fc <- y %>%
      enframe(name = NULL, value = "fc_mean") %>%
      bind_cols(hrzn = seq_h )

    df_post <- df_post %>% mutate(mean = as.integer(mean) - 1)

    df_fc <- vec_fc %>%
      left_join(df_post, by = c("fc_mean" = "mean"))

  }else{
    chosen_mean = round(mean(df$value))
    df_post <- df_post %>% mutate(mean = as.integer(mean))
    df_post <- df_post %>% filter(mean == 2)
  }

  # return full df_post
  # return chosen posterior
  # return forecast
  # fitted local level

  return(df_fc)
}
alexhallam/sids documentation built on Feb. 2, 2020, 12:04 a.m.