Nothing
# function to infuse remove outliers template
#' @importFrom stringi stri_replace
infuse_remove_outliers_tmp = function(data_file,
working_folder,
output_file,
n_seasonal,
IQ_factor,
x){
# define template
remove_outliers_tmpl = "DataFile data_file_value\nDataDirectory data_directory_value\nOutputFile output_file_value\ninterpolate no\nseasonalsignal seasonal_signal_value\nhalfseasonalsignal half_seasonal_signal_value\nestimateoffsets estimate_offsets_value\nIQ_factor iq_factor_value\nPhysicalUnit mm\n"
# replace data_file_value by data_file
tpl_1 = stringi::stri_replace( str = remove_outliers_tmpl, regex = "data_file_value" , replacement = data_file)
tpl_2 = stringi::stri_replace( str = tpl_1,regex = "data_directory_value" , replacement = working_folder)
tpl_3 = stringi::stri_replace( str = tpl_2,regex = "output_file_value" , replacement = output_file)
tpl_4 = stringi::stri_replace( str = tpl_3,regex = "seasonal_signal_value" , replacement = if (n_seasonal > 0) "yes" else "no")
tpl_5 = stringi::stri_replace( str = tpl_4,regex = "half_seasonal_signal_value" , replacement = if (n_seasonal > 1) "yes" else "no")
tpl_6 = stringi::stri_replace( str = tpl_5,regex = "estimate_offsets_value" , replacement = if (!is.null(x$jumps[1])) "yes" else "no")
tpl_7 = stringi::stri_replace( str = tpl_6,regex = "iq_factor_value" , replacement = sprintf('%.2f', IQ_factor))
tpl_7
}
# function to infuse estimate trend template
#' @importFrom stringi stri_replace
infuse_estimate_trend_tmp = function(data_file,
working_folder,
output_file,
noise_model,
n_seasonal,
likelihood_method = "AmmarGrag",
x){
# define template differently based on the specified ci method
if(likelihood_method == "FullCov"){
estimate_trend_tmpl = "DataFile data_file_value\nDataDirectory data_directory_value\nOutputFile output_file_value\ninterpolate no\nPhysicalUnit mm\nScaleFactor 1.0\nNoiseModels noise_models_value\nseasonalsignal seasonal_signal_value\nhalfseasonalsignal half_seasonal_signal_value\nestimateoffsets estimate_offsets_value\nestimatepostseismic no\nestimateslowslipevent no\nRandomiseFirstGuess no\nTimeNoiseStart 1\nDegreePolynomial degree_polynomial_value\nLikelihoodMethod FullCov\nJSON yes\n"
}else if(likelihood_method == "AmmarGrag"){
estimate_trend_tmpl = "DataFile data_file_value\nDataDirectory data_directory_value\nOutputFile output_file_value\ninterpolate no\nPhysicalUnit mm\nScaleFactor 1.0\nNoiseModels noise_models_value\nseasonalsignal seasonal_signal_value\nhalfseasonalsignal half_seasonal_signal_value\nestimateoffsets estimate_offsets_value\nestimatepostseismic no\nestimateslowslipevent no\nRandomiseFirstGuess no\nTimeNoiseStart 1\nDegreePolynomial degree_polynomial_value\nLikelihoodMethod AmmarGrag\nJSON yes\n"
}
# replace data_file_value by data_file
tpl_1 = stringi::stri_replace( str = estimate_trend_tmpl, regex = "data_file_value" , replacement = data_file)
tpl_2 = stringi::stri_replace( str = tpl_1,regex = "data_directory_value" , replacement = working_folder)
tpl_3 = stringi::stri_replace( str = tpl_2,regex = "output_file_value" , replacement = output_file)
tpl_4 = stringi::stri_replace( str = tpl_3,regex = "noise_models_value" , replacement = noise_model)
tpl_5 = stringi::stri_replace( str = tpl_4,regex = "seasonal_signal_value" , replacement = if (n_seasonal > 0) "yes" else "no")
tpl_6 = stringi::stri_replace( str = tpl_5,regex = "half_seasonal_signal_value" , replacement = if (n_seasonal > 1) "yes" else "no")
tpl_7 = stringi::stri_replace( str = tpl_6,regex = "estimate_offsets_value" , replacement = if (!is.null(x$jumps[1])) "yes" else "no")
tpl_8 = stringi::stri_replace( str = tpl_7,regex = "degree_polynomial_value" , replacement = 1)
if(grepl(pattern = "RandomWalkGGM", x = noise_model)){
tpl_9 = paste0(tpl_8, "GGM_1mphi 6.9e-06\n")
}else{
tpl_9 = tpl_8
}
tpl_9
}
#' Estimate a stochastic model based on the MLE and the Hector implementation.
#'
#' @param x A \code{gnssts} object
#' @param n_seasonal An \code{integer} specifying the number of seasonal component in the time series.
#' @param model_string A \code{string} specifying the model to be estimated.
#' @param likelihood_method A \code{string} taking either value "FullCov" or "AmmarGrag" that specify the method for the Likelihood computation.
#' @param cleanup A \code{boolean} specifying if the files created by the estimation procedure should be cleaned.
#' @return A \code{gnsstsmodel} object.
#' @export
#' @importFrom rjson fromJSON
#' @importFrom stringi stri_rand_strings
#' @importFrom wv wvar
#' @examples
#' \dontrun{
#' cola = PBO_get_station(station_name = "COLA", column = "dE", time_range = c(51130, 52000))
#' fit_mle = estimate_hector(x = cola,
#' n_seasonal = 1,
#' model_string = "wn+matern")
#'
#' }
#'
estimate_hector <- function(
x,
n_seasonal = 1,
model_string,
likelihood_method = "AmmarGrag",
cleanup = TRUE) {
if (!("gnssts" %in% class(x))) {
stop("x must be an object of type 'gnssts")
}
if (n_seasonal > 2) {
stop("Hector supports only seasonal and halfseasonal signals ( 0 <= n_seasonal <= 1 )")
}
model = create_model_descriptor(model_string)
# create temporary folders
working_folder = paste(tempdir(), stringi::stri_rand_strings(n=1,length = 16), sep = "/")
if (cleanup == F) {
message(paste("Working in", working_folder))
}
# create working folder
dir.create(working_folder, showWarnings = F, recursive = T)
# store time series
write.gnssts(x, filename = paste(working_folder, "ts.mom", sep = "/"), format = "mom")
# create hector configuration file
cfg_file = paste(working_folder, "estimate.ctl", sep="/")
output_file = paste(working_folder, "output.mom", sep="/")
cfg = infuse_estimate_trend_tmp(data_file = "ts.mom",
working_folder = working_folder,
output_file = output_file,
noise_model = gen_hector_model(model),
n_seasonal = n_seasonal,
x = x,
likelihood_method = likelihood_method)
# write cfg file
write(cfg, file = cfg_file)
# define command to run hector on signal with ctl file
cmd = sprintf("cd %s; %s '%s'", working_folder, "estimatetrend", cfg_file)
# run hector assuming that estimatetrend is accesible in the PATH
timing = system.time({out = system(cmd, intern = TRUE)})
# parse json
json = rjson::fromJSON(file = paste(working_folder, "estimatetrend.json", sep="/"))
# collect deterministic parameters
# hector does not put the bias in the json file, parse the output
bias_raw = find_one_output(out, "bias\\s+:\\s+([+-]?\\d+\\.\\d+) \\+/- ([+-]?\\d+\\.\\d+)")
bias = as.numeric(bias_raw[1,2:3])
beta_hat = c(
bias[1],
json[["trend"]] / 365.25, # hector gives the trend in unit/year
json[["Sa_sin"]],
json[["Sa_cos"]],
json[["Ssa_sin"]], # if not enabled they are just null
json[["Ssa_cos"]],
json[["jumps_sizes"]]
)
beta_std = c(
bias[2],
json[["trend_sigma"]] / 365.25, # hector gives the trend in unit/year
json[["Sa_sin_sigma"]],
json[["Sa_cos_sigma"]],
json[["Ssa_sin_sigma"]], # if not enabled they are just null
json[["Ssa_cos_sigma"]],
json[["jumps_sigmas"]]
)
# set names of beta std
names(beta_std) = sprintf("std_%s", names(beta_hat))
# transform beta hat to vector
beta_hat = as.vector(unlist(beta_hat))
names(beta_hat) <- c("bias", "trend", rep(c("A*cos(U)", "A*sin(U)"), n_seasonal), rep("jump", length(x$jumps)))
# collect stochastic parameters
theta_hat = gen_pick_params_hector(model, json)
if (cleanup == TRUE) {
unlink(working_folder, recursive = TRUE)
}
# compute functional model, residuals and wv
t_nogap = x$t[1]:tail(x$t,1)
which_data = is.element(t_nogap, x$t)
X = create_A_matrix(t_nogap, x$jumps, n_seasonal)
x_hat = X %*% beta_hat
rsd = x$y - X[which_data, ] %*% beta_hat # compute residuals where I have data
wv_rsd = wvar(rsd)
# create the output object
ret = list()
ret$model = model
# functional parameters
ret$beta_hat = beta_hat
ret$beta_std = beta_std
# stochastic parameter
ret$theta_hat = theta_hat
ret$theta_std = rep(NA, length(theta_hat)) # TODO: CI for theta not collected
# original time series
ret$input = list()
ret$input$jumps = x$jumps
ret$input$t_orig = x$t
ret$input$x_orig = x$y
# fitted functional model
ret$output = list()
ret$output$t_nogap = t_nogap
ret$output$x_hat = x_hat
residuals
ret$residuals = rsd
# ww of the residuals
ret$wv_residuals = wv_rsd
# timing
ret$estimation_time = timing
class(ret) <- "gnsstsmodel"
ret
}
#' Remove outliers from a gnssts object using Hector
#'
#' @param x A \code{gnssts} object
#' @param n_seasonal An \code{integer} specifying the number of seasonal component in the time series.
#' @param IQ_factor A \code{double} specifying the number used to scale the interquartile range and corresponding to the argument \code{IQ_factor} in Hector removeoutliers.ctl
#' @param cleanup An \code{boolean} specifying if temporary files should be cleaned.
#' @return A \code{gnssts} object.
#' @export
#' @importFrom stringi stri_rand_strings
#'
#'
#' @examples
#' phase = 0.45
#' amplitude = 2.5
#' sigma2_wn = 15
#' bias = 0
#' trend = 5/365.25
#' cosU = amplitude*cos(phase)
#' sinU = amplitude*sin(phase)
#' n= 2*365
#' # define time at which there are jumps
#' jump_vec = c(100, 200)
#' jump_height = c(10, 20)
#' # generate residuals
#' eps = rnorm(n = n, sd = sqrt(sigma2_wn))
#' # add trend, gaps and sin
#' A = create_A_matrix(1:length(eps), jump_vec, n_seasonal = 1)
#' # define beta
#' x_0 = c(bias, trend, jump_height, cosU, sinU)
#' # create time series
#' yy = A %*% x_0 + eps
#' plot(yy, type="l")
#' n_outliers = 30
#' set.seed(123)
#' id_outliers=sample(150:350, size = n_outliers)
#' val_outliers = rnorm(n = n_outliers, mean = max(yy)+10, sd = 5)
#' yy[id_outliers] = val_outliers
#' plot(yy, type="l")
#' # save signal in temp
#' gnssts_obj = create.gnssts(t = 1:length(yy), y = yy, jumps = jump_vec)
#' \dontrun{
#' clean_yy = remove_outliers_hector(x=gnssts_obj, n_seasonal = 1)
#' plot(clean_yy$t, clean_yy$y, type="l")
#' }
remove_outliers_hector <- function(x, n_seasonal, IQ_factor = 3, cleanup = TRUE) {
if (!("gnssts" %in% class(x))) {
stop("x must be an object of type 'gnssts")
}
if (n_seasonal > 2) {
stop("Hector supports only seasonal and halfseasonal signals ( 0 <= n_seasonal <= 1 )")
}
# create temporary folders
working_folder = paste(tempdir(), stri_rand_strings(n=1,length = 16), sep = "/")
if (cleanup == FALSE) {
message(paste("Working in", working_folder))
}
dir.create(working_folder, showWarnings = FALSE, recursive = TRUE)
# store time series
write.gnssts(x, filename = paste(working_folder, "ts.mom", sep = "/"), format = "mom")
cfg_file = paste(working_folder, "removeoutliers.ctl", sep="/")
cfg = infuse_remove_outliers_tmp(data_file = "ts.mom",
working_folder = working_folder,
output_file = "ts_out.mom",
n_seasonal = n_seasonal,
IQ_factor = IQ_factor,
x = x)
# infuse using the template removeoutliers_tmpl accesible in R/sysdata.rda
# cfg = infuse(
# file_or_string = removeoutliers_tmpl,
# list(
# data_file = "ts.mom",
# data_dir = working_folder,
# output_file = "ts_out.mom",
# seasonalsignal = if (n_seasonal > 0) "yes" else "no",
# halfseasonalsignal = if (n_seasonal > 1) "yes" else "no",
# estimateoffsets = if (!is.null(x$jumps[1])) "yes" else "no",
# IQ_factor = sprintf('%.2f', IQ_factor)
# )
# )
write(cfg, file = cfg_file)
# run hector assuming that removeoutliers is accesible in the PATH
cmd = sprintf("cd %s; %s '%s'", working_folder, 'removeoutliers', cfg_file)
timing = system.time({out = system(cmd, intern = TRUE)})
y = read.gnssts(
filename = paste(working_folder, "ts_out.mom", sep = "/"),
format = "mom"
)
if (cleanup == TRUE) {
unlink(working_folder, recursive = T)
}
y
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.