#' @title Deconvolution
#'
#' @description Deconvolution for time series
#' @param df
#' @param clip_lambda
#' @param use_local_level
#' @param h
#'
#' @return
#' @export
#'
#' @import magrittr
#' @importFrom tibble enframe
#' @importFrom dplyr group_by tally left_join mutate select bind_cols filter
#' @importFrom tidyr pivot_longer
#' @importFrom purrr pull
#'
#' @examples
#' \dontrun{
#' train %>%
#' filter(key == 21063431) %>%
#' select(key, value) %>%
#' group_by(key) %>%
#' sid(use_local_level = TRUE, h = 5)
#' }
sid <- function(df, clip_lambda = 20, use_local_level = TRUE, h = 5){
# clip_lambda is a helper that restrict the right side of the parameter space
# use_local_level is true if the mean is to be found via local level trend
# if false then global mean is used.
# dataframe of value and tally
df_counts <- df %>%
group_by(value) %>%
tally()
# seq value to fill missings with 0
df_filled <- seq(min(df_counts$value), max(df_counts$value)) %>%
enframe(name = NULL) %>%
left_join(df_counts, by = "value") %>%
mutate(n = ifelse(is.na(n), 0, n))
#get counts
counts <- df_filled %>%
pull(n)
# get N of counts
N <- length(counts)
# do deconvolution
lambda <- seq(-4, 5.0, .01)
tau <- exp(lambda)
result <- deconv(tau = tau, y = counts, n = N, c0=1, pDegree = 6, ignoreZero = TRUE)
stats <- result$stats
g <- result$stats[,"g"]
g <- g/sum(g)
fE <- result$P %*% g
fE <- counts[1] * fE/fE[1]
log_counts <- log(counts)
d <- data.frame(lambda = lambda, g = stats[, "g"], SE.g = stats[, "SE.g"], tg = stats[, "tg"])
gPost <- sapply(seq_len(N), function(i) local({tg <- d$tg * result$P[i, ]; tg / sum(tg)}))
# df_post is a lookup table of posteriors. The mean must be found either globally or by
# local level optimizations
df_post <- as_tibble(gPost) %>%
cbind(theta = result$stats[, 'theta']) %>%
tidyr::pivot_longer(cols = -theta, names_to = "mean", values_to = "gPost") %>%
mutate(mean = as.factor(as.integer(str_sub(mean, start = 2L)) - 1L)) %>%
select(mean, theta, gPost) %>%
filter(theta < clip_lambda)
# choose the correct mean
if(use_local_level == TRUE){
value <- df$value
S <- function(x) {
a <- x[1]
if(a < 0 | a > 1)
return(Inf)
n <- length(value)
lambda <- numeric(n+1)
error <- numeric(n)
lambda[1] <- x[2]
for(i in 1:n) {
error[i] <- value[i]-lambda[i]
lambda[i+1] <- (1-a)*lambda[i] + a*value[i]
}
return(sum(error^2))
}
#op <- optim(par = c(0.5,2L), S, lower=c(0,0), upper=c(1,Inf), method = "L-BFGS-B")
op <- optim(c(0.5,2L), S)
alpha <- op$par[1]
lvl <- op$par[2]
nrs <- length(value)
lvl_vector = c(lvl)
for(i in 1:length(value)){
lvl_vector[i + 1] <- alpha * value[i] + (1 - alpha) * lvl_vector[i]
}
lambda <- last(lvl_vector[-length(lvl_vector)]) # for some reason have a trailing value
h <- h
y <- numeric(h)
for(i in 1:h)
{
y[i] <- rpois(1,lambda) # introduces randomness in the forecasted mean
lambda <- (1-alpha)*lambda + alpha*y[i]
}
seq_h <- seq(from = 1, to = h)
vec_fc <- y %>%
enframe(name = NULL, value = "fc_mean") %>%
bind_cols(hrzn = seq_h )
df_post <- df_post %>% mutate(mean = as.integer(mean) - 1)
df_fc <- vec_fc %>%
left_join(df_post, by = c("fc_mean" = "mean"))
}else{
chosen_mean = round(mean(df$value))
df_post <- df_post %>% mutate(mean = as.integer(mean))
df_post <- df_post %>% filter(mean == 2)
}
# return full df_post
# return chosen posterior
# return forecast
# fitted local level
return(df_fc)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.