Nothing
################################################################################
# IMPLEMENTED DISTRIBUTIONS
.DISTR_TYPES = c("continuous", "discrete")
.DISCR_DISTR = c("poisson", "nbinom")
.CONT_DISTR = c("gaussian")
################################################################################
# PARAMETERS FOR PMF CONVOLUTION AND SMOOTHING
.TOLL = 1e-15
.RTOLL = 1e-9
.ALPHA_SMOOTHING = 1e-9
.LAP_SMOOTHING = FALSE
################################################################################
# CHECK INPUT
# Function to check values allowed in S.
.check_S <- function(S) {
if(!identical(sort(unique(as.vector(S))), c(0,1)) ){
stop("Input error in S: S must be a matrix containing only 0s and 1s.")
}
if(!all(colSums(S)>1)){
stop("Input error in S: all bottom level forecasts must aggregate into an upper.")
}
if(nrow(unique(S))!=nrow(S)){
warning("S has some repeated rows.")
}
# Check that each bottom has a corresponding row with with one 1 and the rest 0s.
if (nrow(unique(S[rowSums(S)==1,])) < ncol(S)) {
stop("Input error in S: there is at least one bottom that does not have a row with one 1 and the rest 0s.")
}
}
# Function to check aggregation matrix A
.check_A <- function(A) {
if (!all(A %in% c(0,1))) {
stop("Input error in A: A must be a matrix containing only 0s and 1s.")
}
if(any(colSums(A)==0)){
stop("Input error in A: some columns do not have any 1.
All bottom level forecasts must aggregate into an upper.")
}
if(nrow(unique(A))!=nrow(A)){
warning("A has some repeated rows.")
}
}
# Check if it is a covariance matrix (i.e. symmetric p.d.)
.check_cov <- function(cov_matrix, Sigma_str,pd_check=FALSE,symm_check=FALSE) {
# Check if the matrix is square
if (!is.matrix(cov_matrix) || nrow(cov_matrix) != ncol(cov_matrix)) {
stop(paste0(Sigma_str, " is not square"))
}
# Check if the matrix is positive semi-definite
if(pd_check){
eigen_values <- eigen(cov_matrix, symmetric = TRUE)$values
if (any(eigen_values <= 0)) {
stop(paste0(Sigma_str, " is not positive semi-definite"))
}
}
if(symm_check){
# Check if the matrix is symmetric
if (!isSymmetric(cov_matrix)) {
stop(paste0(Sigma_str, " is not symmetric"))
}
}
# Check if the diagonal elements are non-negative
if (any(diag(cov_matrix) < 0)) {
stop(paste0(Sigma_str, ": some elements on the diagonal are negative"))
}
# If all checks pass, return TRUE
return(TRUE)
}
# Checks if the input is a real number
.check_real_number <- function(x) {
return(length(x)==1 & is.numeric(x))
}
# Checks if the input is a positive number
.check_positive_number <- function(x) {
return(length(x)==1 && is.numeric(x) && x > 0)
}
# Check that the distr is implemented
.check_implemented_distr <- function(distr) {
if (!(distr %in% c(.DISCR_DISTR, .CONT_DISTR))) {
stop(paste(
"Input error: the distribution must be one of {",
paste(c(.DISCR_DISTR, .CONT_DISTR), collapse = ', '), "}"))
}
}
# Check the parameters of distr
.check_distr_params <- function(distr, params) {
.check_implemented_distr(distr)
if (!is.list(params)) {
stop("Input error: the parameters of the distribution must be given as a list.")
}
switch(
distr,
"gaussian" = {
mean = params$mean
sd = params$sd
if (!.check_real_number(mean)) {
stop("Input error: mean of Gaussian must be a real number")
}
if (!.check_positive_number(sd)) {
stop("Input error: sd of Gaussian must be a positive number")
}
},
"poisson" = {
lambda = params$lambda
if (!.check_positive_number(lambda)) {
stop("Input error: lambda of Poisson must be a positive number")
}
},
"nbinom" = {
size = params$size
prob = params$prob
mu = params$mu
# Check that size is specified, and that is a positive number
if (is.null(size)) {
stop("Input error: size parameter for the nbinom distribution must be specified")
}
if (!.check_positive_number(size)) {
stop("Input error: size of nbinom must be a positive number")
}
# Check that exactly one of prob, mu is specified
if (!is.null(prob) & !is.null(mu)) {
stop("Input error: prob and mu for the nbinom distribution are both specified ")
} else if (is.null(prob) & is.null(mu)) {
stop("Input error: either prob or mu must be specified")
} else {
if (!is.null(prob)) {
if (!.check_positive_number(prob) | prob > 1) {
stop("Input error: prob of nbinom must be positive and <= 1")
}
} else if (!is.null(mu)) {
if (!.check_positive_number(mu)) {
stop("Input error: mu of nbinom must be positive")
}
}
}
},
)
}
# Check that the samples are discrete
.check_discrete_samples <- function(samples) {
if (!isTRUE(all.equal(unname(samples), as.integer(samples)))) {
stop("Input error: samples are not all discrete")
}
}
# Check input for BUIS (and for MH)
# base_forecasts, in_type, and distr must be list
.check_input_BUIS <- function(A, base_forecasts, in_type, distr) {
.check_A(A)
n_tot_A <- ncol(A)+nrow(A)
# Check in_type
if (!is.list(in_type)) {
stop("Input error: in_type must be a list")
}
if (!(n_tot_A == length(in_type))) {
stop("Input error: ncol(A)+nrow(A) != length(in_type)")
}
for(i in 1:n_tot_A){
if (!(in_type[[i]] %in% c("params", "samples"))) {
stop("Input error: in_type[[",i,"]] must be either 'samples' or 'params'")
}
}
# Check distr and base forecasts
if (!is.list(distr)) {
stop("Input error: distr must be a list")
}
if (!(n_tot_A == length(distr))) {
stop("Input error: ncol(A)+nrow(A) != length(distr)")
}
if (!is.list(base_forecasts)) {
stop("Input error: base_forecasts must be a list")
}
if (!(n_tot_A == length(base_forecasts))) {
stop("Input error: ncol(A)+nrow(A) != length(base_forecasts)")
}
for(i in 1:n_tot_A){
if (in_type[[i]] == "params") {
.check_distr_params(distr[[i]], base_forecasts[[i]])
} else if (in_type[[i]] == "samples") {
if (!(distr[[i]] %in% .DISTR_TYPES)) {
stop(paste(
"Input error: the distribution must be one of {",
paste(.DISTR_TYPES, collapse = ', '), "}"))
}
if (distr[[i]] == "discrete") {
.check_discrete_samples(base_forecasts[[i]])
}
# TODO: check sample size?
} else {
stop("Input error: in_type[[",i,"]] must be either 'samples' or 'params'")
}
}
}
# Check input for TDcond
.check_input_TD <- function(A, fc_bottom, fc_upper,
bottom_in_type, distr,
return_type) {
.check_A(A)
n_b = ncol(A) # number of bottom TS
n_u = nrow(A) # number of upper TS
if (!(bottom_in_type %in% c("pmf", "samples", "params"))) {
stop("Input error: bottom_in_type must be either 'pmf', 'samples', or 'params'")
}
if (!(return_type %in% c("pmf", "samples", "all"))) {
stop("Input error: return_type must be either 'pmf', 'samples', or 'all'")
}
if (length(fc_bottom) != n_b) {
stop("Input error: length of fc_bottom does not match with A")
}
# If Sigma is a number, transform into a matrix
if (length(fc_upper$Sigma) == 1) {
fc_upper$Sigma = as.matrix(fc_upper$Sigma)
}
# Check the dimensions of mu and Sigma
if (length(fc_upper$mu) != n_u | any(dim(fc_upper$Sigma) != c(n_u, n_u))) {
stop("Input error: the dimensions of the upper parameters do not match with A")
}
# Check that Sigma is a covariance matrix (symmetric positive semi-definite)
.check_cov(fc_upper$Sigma, "Upper covariance matrix", symm_check=TRUE)
# If bottom_in_type is not "params" but distr is specified, throw a warning
if (bottom_in_type %in% c("pmf", "samples") & !is.null(distr)) {
warning(paste0("Since bottom_in_type = '", bottom_in_type, "', the input distr is ignored"))
}
# If bottom_in_type is params, distr must be one of the implemented discrete distr.
# Also, check the parameters
if (bottom_in_type == "params") {
if (is.null(distr)) {
stop("Input error: if bottom_in_type = 'params', distr must be specified")
}
if (!(distr %in% .DISCR_DISTR)) {
stop(paste0("Input error: distr must be one of {",
paste(.DISCR_DISTR, collapse = ', '), "}"))
}
for (i in 1:n_b) {
.check_distr_params(distr, fc_bottom[[i]])
}
}
}
# Check importance sampling weights
.check_weights <- function(w, n_eff_min=200, p_n_eff=0.01) {
warning = FALSE
warning_code = c()
warning_msg = c()
n = length(w)
n_eff = n
# 1. w==0
if (all(w==0)) {
warning = TRUE
warning_code = c(warning_code, 1)
warning_msg = c(warning_msg,
"Importance Sampling: all the weights are zeros. This is probably caused by a strong incoherence between bottom and upper base forecasts.")
}else{
# Effective sample size
w = w / sum(w)
n_eff = 1 / sum(w^2)
# 2. n_eff < threshold
if (n_eff < n_eff_min) {
warning = TRUE
warning_code = c(warning_code, 2)
warning_msg = c(warning_msg,
paste0("Importance Sampling: effective_sample_size= ", round(n_eff,2), " (< ", n_eff_min,")."))
}
# 3. n_eff < p*n, e.g. p = 0.05
if (n_eff < p_n_eff*n) {
warning = TRUE
warning_code = c(warning_code, 3)
warning_msg = c(warning_msg,
paste0("Importance Sampling: effective_sample_size= ", round(n_eff,2), " (< ", round(p_n_eff * 100, 2),"%)."))
}
}
res = list(warning = warning,
warning_code = warning_code,
warning_msg = warning_msg,
n_eff = n_eff)
return(res)
}
################################################################################
# SAMPLE
# Sample from one of the implemented distributions
.distr_sample <- function(params, distr, n) {
.check_distr_params(distr, params)
switch(
distr,
"gaussian" = {
mean = params$mean
sd = params$sd
samples = stats::rnorm(n = n, mean = mean, sd = sd) },
"poisson" = {
lambda = params$lambda
samples = stats::rpois(n = n, lambda = lambda) },
"nbinom" = {
size = params$size
prob = params$prob
mu = params$mu
if (!is.null(prob)) {
samples = stats::rnbinom(n = n, size = size, prob = prob)
} else if (!is.null(mu)) {
samples = stats::rnbinom(n = n, size = size, mu = mu)
}
},
)
return(samples)
}
# Sample from a multivariate Gaussian distribution with specified mean and cov. matrix
.MVN_sample <- function(n_samples, mu, Sigma) {
n = length(mu)
if (any(dim(Sigma) != c(n,n))) {
stop("Dimension of mu and Sigma are not compatible!")
}
.check_cov(Sigma, "Sigma", pd_check = FALSE, symm_check = FALSE)
Z = matrix(stats::rnorm(n*n_samples), ncol = n)
Ch <- tryCatch(base::chol(Sigma),
error = function(e) stop(paste0(e,"check the covariance in .MVN_sample, the Cholesky fails.")))
samples = Z %*% Ch + matrix(mu, nrow = n_samples, ncol = n, byrow = TRUE)
return(samples)
}
# Compute the MVN density
.MVN_density <- function(x, mu, Sigma, max_size_x=5e3, suppress_warnings=TRUE) {
# save dimension of mu
n = length(mu)
# Check Sigma
if (any(dim(Sigma) != c(n,n))) {
stop("Dimension of mu and Sigma are not compatible!")
}
.check_cov(Sigma, "Sigma", pd_check = FALSE, symm_check = FALSE)
# x must be a matrix with ncol = n (nrow is the number of points to evaluate)
# or a vector with length n (in which case it is transformed into a matrix)
if(is.vector(x)){
if (length(x)!=n) stop("Length of x must be the same of mu")
x <- matrix(x, ncol=length(x))
} else if (is.matrix(x)) {
if (ncol(x)!=n) stop("The number of columns of x must be equal to the length of mu")
} else {
stop("x must be either a vector or a matrix")
}
# Compute Cholesky of Sigma
chol_S <- tryCatch(base::chol(Sigma),
error = function(e) stop(paste0(e,"check the covariance in .MVN_density, the Cholesky fails.")))
# Constant of the loglikelihood (computed here because it is always the same)
const <- -sum(log(diag(chol_S))) - 0.5 * n * log(2 *pi)
# This part breaks down the evaluation of the density eval into batches, for memory
rows_x <- nrow(x)
if(rows_x > max_size_x){
logval <- rep(0, rows_x)
# Compute how many batches we need
num_backsolves <- rows_x %/% max_size_x
if(!suppress_warnings){
warning_msg <- paste0("x has ",rows_x," rows, the density evaluation is broken down into ",num_backsolves," pieces for memory preservation.")
warning(warning_msg)
}
for(j in seq(num_backsolves)){
idx_to_select <- (1+(j-1)*max_size_x):((j)*max_size_x)
# Do one backsolve for each batch
tmp <- backsolve(chol_S, t(x[idx_to_select,]) - mu, transpose = TRUE)
rss <- colSums(tmp^2)
# Update the logval for those indices
logval[idx_to_select] <- const - 0.5 * rss
}
# Last indices: if the number of rows of x is not exactly divided by the size of the batches
remainder <- rows_x %% max_size_x
if(remainder !=0){
idx_to_select <- (1+(num_backsolves)*max_size_x):(remainder+(num_backsolves)*max_size_x)
# Do backsolve on the remaining indices
tmp <- backsolve(chol_S, t(x[idx_to_select,]) - mu, transpose = TRUE)
rss <- colSums(tmp^2)
logval[idx_to_select] <- const - 0.5 * rss
}
}else{
tmp <- backsolve(chol_S, t(x) - mu, transpose = TRUE)
rss <- colSums(tmp^2)
logval <- const - 0.5 * rss
}
return(exp(logval))
}
# Resample from weighted sample
.resample <- function(S_, weights, num_samples = NA) {
if (is.na(num_samples)) {
num_samples = length(weights)
}
if(nrow(S_)!=length(weights))
stop("Error in .resample: nrow(S_) != length(weights)")
tmp_idx = sample(x = 1:nrow(S_), num_samples, replace = TRUE, prob = weights)
return(S_[tmp_idx, ])
}
################################################################################
# Miscellaneous
# Compute the pmf of the distribution specified by distr and params at the points x
.distr_pmf <- function(x, params, distr) {
.check_distr_params(distr, params)
switch(
distr,
"gaussian" = {
mean = params$mean
sd = params$sd
pmf = stats::dnorm(x = x, mean = mean, sd = sd) },
"poisson" = {
lambda = params$lambda
pmf = stats::dpois(x = x, lambda = lambda) },
"nbinom" = {
size = params$size
prob = params$prob
mu = params$mu
if (!is.null(prob)) {
pmf = stats::dnbinom(x = x, size = size, prob = prob)
} else if (!is.null(mu)) {
pmf = stats::dnbinom(x = x, size = size, mu = mu)
}
},
)
return(pmf)
}
.shape <- function(m) {
print(paste0("(", nrow(m), ",", ncol(m), ")"))
}
################################################################################
# Functions for tests
.gen_gaussian <- function(params_file, seed=NULL) {
if (!is.null(seed)) set.seed(seed)
params = utils::read.csv(file = params_file, header = FALSE)
out = list()
for (i in 1:nrow(params)) {
out[[i]] = stats::rnorm(n=1e6, mean=params[[1]][i], sd=params[[2]][i])
}
return(out)
}
.gen_poisson <- function(params_file, seed=NULL) {
if (!is.null(seed)) set.seed(seed)
params = utils::read.csv(file = params_file, header = FALSE)
out = list()
for (i in 1:nrow(params)) {
out[[i]] = stats::rpois(n=1e6, lambda=params[[1]][i])
}
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.