R/TS_LG_object.R

#'  Prepare a time series for a local Gaussian inspection
#'
#' @details This function will for a given (sample from a) time series
#'     create a directory that will be used to store all the files
#'     that occur during the local Gaussian analysis.  The function
#'     will in addition create a local info-file to take care of the
#'     subsequent bookkeeping.  Moreover, this function will also
#'     maintain a global info-file in the \code{main_dir}-directory,
#'     where information about the top-level part of the
#'     directory-structure is stored -- in order to avoid the
#'     despicable situation that more than one directory stores
#'     information about the exact same set of data.  The sample will
#'     be nudged a tiny bit (always using 1 as the seed-value) if ties
#'     are detected in it.  The algorithm ensures that this nudge is
#'     of a small order compared to the original values.
#'
#' @param TS_data The time series data we will work upon.  This can
#'     either be an observed or a simulated time series.  If
#'     \code{TS_data} are of class "TS_simulated", i.e. it has been
#'     generated by \code{TS_sample}, then the information stored in
#'     it will be used to create \code{save_dir}.  Note that
#'     \code{TS_data} can be univariate or multivariate.  A univariate
#'     time series can be given as a vector, whereas a multivariate
#'     must have the observations along the rows and the variables
#'     along the columns.  (The program terminates if the number of
#'     rows are lower than the number of columns.)
#'
#' @param details This can be used to add a reminder that will be
#'     shown under the interactive investigation later on.  The
#'     default value \code{NULL} will imply that no information is
#'     shown.
#'
#' @param main_dir The main directory into which the information will
#'     be stored.  Default value \code{c("~", "LG_DATA")}, i.e. a
#'     specially designed directory in the home directory of your
#'     file-system.  If the proposed default directory does not
#'     exists, then \code{TS_LG_object} will ask for permission to
#'     create it, but otherwise it is a requirement that only existing
#'     directories can be used.  This is done as a precaution against
#'     accidentally ending up with unintended data-directories all
#'     over the file-system. Note that the default value is given as a
#'     vector in order to avoid issues related to operative system
#'     dependent values for the file separator.  The argument can also
#'     be given as a character-string.
#' 
#' @param save_dir The sub-directory of \code{main_dir} where all the
#'     stuff related to \code{TS_data} will be saved.  Default value
#'     \code{NULL}, but with the following defaults in the code for
#'     what to replace it with: When \code{TS_data} has been created
#'     by \code{TS_sample}, a value for \code{save_dir} will be
#'     created from the information in \code{TS_data}, and any attempt
#'     from the user to create another name will be outright ignored.
#'     If no value is given for \code{save_dir} (and none can be
#'     computed from \code{TS_data}), then the default value from
#'     \code{LG_default} will be used to create \code{save_dir}.  If the
#'     user specifies \code{save_dir} (for a time-series not
#'     originating from \code{TS_sample}), then that name will be
#'     used, but only if no previous directories happens to have that
#'     name too -- if that should be the case, the program will
#'     terminate and inform the user about it.
#' 
#' @inheritParams TS_LG_normalisation
#' 
#' @return This function will take care of some file-handling before
#'     it returns a two-component list to the work-flow, containing
#'     the following nodes:
#'
#' \describe{
#'
#' \item{TS_done_before}{A logical value that reveals whether or not
#'     the time series from \code{TS_data} already was stored in the
#'     folder \code{main_dir}. }
#'
#' \item{result}{A list whose format depends upon whether or not
#'     \code{TS_data} was created by \code{TS_sample} -- and some of
#'     the content are only connected to the internal work-flow of
#'     this function.  The four parts of \code{result} that always is
#'     present is \code{TS_key} (the origin of the time series),
#'     \code{TS} (the values), \code{N} (the number of observations),
#'     and \code{save_dir} (the path to the save-directory).  These
#'     four values will be used by the functions that analyses
#'     \code{TS} based upon Local Gaussian Approximations and Local
#'     Gaussian Spectral Densities.}
#'
#' }
#' 
#' @export

TS_LG_object <- function (
    TS_data,
    details = NULL,
    main_dir = c("~", "LG_DATA"),
    save_dir = NULL,
    .remove_ties = TRUE) {
    ##  A sanity check to see if multivariate 'TS_data' given as an
    ##  array looks like it should, i.e. fewer columns than rows.
    if (is.array(TS_data)) {
        ##  Correct dimension?
        if (length(dim(TS_data)) >2)
            error(.argument = "TS_data",
                  c("The dimension of the array 'TS_data' is ",
                    length(dim(TS_data)),
                    ".  It should not be higher than 2."))
        ##  Correct format if dimension equal to 2?
        .nrow <- dim(TS_data)[1]
        .ncol <- dim(TS_data)[2]
        if (.ncol >= .nrow)
            error(.argument = "TS_data",
                  c("The observations in the array 'TS_data' should ",
                    "be along the rows, but the shape of 'TS_data'",
                    "implies that this might not be the case, i.e. ",
                    "it has ",
                    .ncol,
                    " columns and only ",
                    .nrow,
                    " rows..."))
        kill(.nrow, .ncol)
    }
    ##  Collect 'main_dir' to one string if it is given as a vector.
    if (length(main_dir) > 1) {
        main_dir <- paste(
            main_dir,
            collapse = .Platform$file.sep)
    }
    ##  Extract a couple of Boolean objects related to 'main_dir'.
    main_dir_boolean <- {main_dir == paste(LG_default$main_dir,
                                           collapse = .Platform$file.sep)}
    main_dir_exists_boolean <- dir.exists(main_dir)
    ##  If 'main_dir' does not exists: Create it if it is the default
    ##  directory, otherwise stop the program.
    if (! main_dir_exists_boolean)
        if (main_dir_boolean) {
            ##  First time initiation, create directory.
            dir.create(main_dir)
        } else {
            error(.argument = "main_dir",
                  c("The directory '",
                    main_dir,
                    "' doesn't exist!",
                    "Create it and try once more."))
        }
    kill(main_dir_exists_boolean, main_dir_boolean)
    ##  Construct the path to the content-file.
    content_path <-
        file.path(main_dir,
                  LG_default$content_file_name)
    ##  Extract a Boolean object related to the content-file.
    content_boolean <- file.exists(content_path)
    ##  Load the content file if it exists, in order to get hold of
    ##  'TS_content', otherwise initiate an empty content-list
    if (content_boolean) {
        load(file = content_path) # This gives us 'TS_content'
    } else {
        TS_content <- list() #  First time initiation.
    }
    kill(content_boolean)
    ##  Collect the arguments related to the adjustment.
    .comp_arg_names <- setdiff(
        x = names(formals(TS_LG_normalisation)),
        y = "TS")
    .comp_arg <- structure(
        .Data = lapply(X = .comp_arg_names,
                       FUN = function(x)
                           eval(bquote(get(.(x)))) ),
        .Names = .comp_arg_names)
    ###------------------------------------------------------###
    ##  Investigate the content of 'TS_data', in particular with
    ##  regard to its origin.  Data simulated by 'TS_sample' will in
    ##  'TS_content' be sorted in sub-lists based on the keys that
    ##  generated them, while other data will be placed in a sub-list
    ##  named with 'LG_default$other_TS_dir_prefix'.  The next level
    ##  in the list-structure will be the lists that contains the
    ##  interesting stuff derived from 'TS_data'.
    ###------------------------------------------------------###
    ##  Create a Boolean object related to 'TS_data'.
    TS_simulated_boolean <- 
        any(class(TS_data) == LG_default$class$TS)
    ##  Based on 'TS_simulated_boolean', extract the time series 'TS',
    ##  and record the 'TS_key' to be used as identification later on.
    if (TS_simulated_boolean) {
        TS <- TS_data$TS
        TS_key <- TS_data$spy_report$envir$TS_key
        .multivariate_TS <- attributes(TS)$.multivariate_TS
    } else {
        ##  Create a standardised array for TS, based on whether or not
        ##  the time series is univariate or multivariate.
        .multivariate_TS <- nested_if(
            if_list = list(
                is.array(TS_data),
                length(dim(TS_data)) == 2),
            expr_not_all_TRUE = FALSE)
        ##  Compute the dimension and the dimension-names
        .dim <- if (.multivariate_TS) {
            c(dim(TS_data), 1)  ##  Add content as the last one...
        } else
            c(length(TS_data), 1, 1)
        .dimnames <- list(
            observations = paste(
                "t",
                1:.dim[1],
                sep = ""),
            variables =
                if (.multivariate_TS) {
                    paste(
                        "Y",
                        1:.dim[2],
                        sep = "")
                } else
                    "Y",
            content = LG_default$sample.prefix)
        TS <- structure(
            .Data = array(data = TS_data,
                          dim = .dim,
                          dimnames = .dimnames),
            .multivariate_TS = .multivariate_TS,
            class = LG_default$class$array)
        kill(.dim, .dimnames)
        TS_key <- LG_default$other_TS_dir_prefix
    }
    ##  Create attributes to simplify the code later on when dealing
    ##  with the different cases that must be investigated.  The
    ##  attributes should be added both to 'TS' and to the result to
    ##  be stored in the 'info'-file (at the end of the function.)
    .variables_data <- list(
        .variables = dimnames(TS)$variables,
        .nr_variables = length(dimnames(TS)$variables),
        .original_variable_names =
            if (.multivariate_TS)
                colnames(TS_data),
        .variable_pairs =
            local({
                .variables <- dimnames(TS)$variables
                .l <- 1:length(.variables)
                .ind <- expand.grid(first = .l, second = .l)
                paste(.variables[.ind[, "first"]],
                      .variables[.ind[, "second"]],
                      sep = "_")
            }),
        .bivariate_pairs = 
            if (.multivariate_TS) {
                as.vector(combn(
                    x = dimnames(TS)$variables,
                    m = 2,
                    FUN = paste,
                    collapse = "_"))
            } else
                NA_character_,
        .bivariate_pairs_II = 
            if (.multivariate_TS) {
                as.vector(combn(
                    x = dimnames(TS)$variables,
                    m = 2,
                    FUN = function(x) {
                        paste(x[2],
                              x[1],
                              sep = "_")
                    }))
            } else
                NA_character_,
        .univariate_pairs = 
            paste(dimnames(TS)$variables,
                  dimnames(TS)$variables,
                  sep = "_"))
    ##  Add the attributes to 'TS'.
    attributes(TS) <- c(
        attributes(TS),
        .variables_data)
    ##  Investigate whether or not the time series already has been
    ##  registered in 'TS_content'.  Check everything, regardless of
    ##  source, record the sub_list if a match is obtained.  Check
    ##  first if a match occurs when no attributes are present 'TS',
    ##  and for any matches proceed to check if the computational
    ##  attributes are identical too.
    TS_copy <- as.vector(TS)
    attributes(TS_copy)$TS_for_analysis <- NULL
    old_TS <- new.env()
    for (lev1 in seq_along(TS_content))
        for (lev2 in seq_along(TS_content[[lev1]])) {
            ##  Read old `TS`-data from file.
            load(file = paste(c(main_dir,
                                TS_content[[c(lev1, lev2)]][["TS"]]),
                              collapse = .Platform$file.sep),
                 envir = old_TS)
            if (identical(x = TS_copy,
                          y = as.vector(old_TS$TS))) {
                ##  Get hold of the old computational arguments.
                .comp_arg_old <- attributes(attributes(
                    old_TS$TS)$TS_for_analysis)[.comp_arg_names]
                ##  Compare old and new computational arguments.
                if (identical(.comp_arg_old, .comp_arg))
                    result <- TS_content[[lev1]][[lev2]]
            }
        }
    kill(TS_copy, old_TS, lev1, lev2, .comp_arg_old)
    ###------------------------------------------------------###
    ##  Reminder: If no match for 'TS' where found in the loop above,
    ##  then 'result' will still not have been created _within_ this
    ##  function frame.  A Boolean object can thus be created based on
    ##  'exists', but take care to specify 'inherits=FALSE' to avoid
    ##  erroneous conclusions due to the existence of an object named
    ##  'result' in some frame at a higher level.
    ###------------------------------------------------------###
    ##  Create a Boolean object related to the test of 'TS'.
    TS_done_before <- 
        exists(x = "result", inherits = FALSE)    
    ##  If no match where found for 'TS', we need to do a bunch of
    ##  stuff in order to create 'result' from scratch.
    if (! TS_done_before) {
        ##  If 'save_dir' is given, check to see that it does not
        ##  already exist a directory with that name.  (The default
        ##  value 'NULL' will not create a breakdown of the code.)
        if (length(list.dirs(file.path(main_dir, save_dir))) != 0)
            error(.argument = "save_dir",
                  paste("There already exists a directory named ",
                        sQuote(save_dir),
                        ", that contains computations for some ",
                        "other time series!",
                        sep = ""))
        ##  Create a `save_dir` based on `TS` and `.comp_arg`.
        if (is.null(save_dir))
            save_dir <- digest::digest(list(TS, .comp_arg))
        ##  Create a vector with "save 'TS'-details."
        save_TS_file <- c(save_dir,
                          LG_default$global["TS"])
        ##  Initiate 'result' by recording some common stuff.
        result <- c(
            list(
                TS_key = TS_key,
                TS = save_TS_file,
                block = LG_default$class$block %in% class(TS),
                details = details,
                N = length(dimnames(TS)$observations)),
            .variables_data)
        ##  Add more to 'result' based on 'TS_simulated_boolean'.
        if (TS_simulated_boolean) {
            ##  'TS_data' generated by 'TS_sample'.
            ##  Add the rest of 'TS_data', except 'TS', to 'result'.
            result <- c(result,
                        TS_data[which(names(TS_data) != "TS")])
            ##  Initiate 'save_dir' based on values from 'TS_data',
            ##  overwrite any values provided by lazy ignorant users
            ##  that didn't read the documentation.
            ###------------------------------------------------------###
            ##  The case when 'TS_data' was not generated by
            ##  'TS_sample'.
            ###------------------------------------------------------###
        } else {
            ##  Check how many times the "other"-part of 'TS_content'
            ##  has recorded that a default has been used in the
            ##  creation of 'save_dir', remember that 'TRUE' is
            ##  converted to '1' in a sum.
            previous_1 <- 0
            for(index in seq_along(TS_content[[TS_key]]))
                previous_1 <-
                    previous_1 +
                        TS_content[[TS_key]][[index]][["default_used_for_dir"]]
            ##  Create the proposed value for 'save_dir', that will be
            ##  used if the user didn't specify one.
            save_dir_default <- 
                paste(TS_key,
                      str_sub(
                          paste("__",
                                previous_1 + 1,
                                sep = ""),
                          start = - 3),
                      sep = "")
            ##  Investigate if the user did provide a value for
            ##  'save_dir' and update according to that.
            if (is.null(save_dir)) {
                save_dir <- save_dir_default
                result$default_used_for_dir <- TRUE
            } else {
                ##  Take into account the existence of anal retentive
                ##  users that might take pleasure from entering every
                ##  itty-bitty details themselves, including the next
                ##  default-name to be used.
                result$default_used_for_dir <-
                    identical(save_dir, save_dir_default)
            }
        }
        kill(TS_simulated_boolean)
        ##  Add a vector to the result with the directories needed in
        ##  order to create the path to our destination.
        result$save_dir <- structure(
            .Data = save_dir,
            .Names = "ts.dir")
        ##  Create a normalised version 'TS_for_analysis'.
        TS_for_analysis <- TS_LG_normalisation(
            TS = TS,
            .remove_ties = .remove_ties)
        kill(.remove_ties)
        ##  Add 'OK_attribute'
        attr(TS_for_analysis, which = "OK_attribute") <-
            LG_default$OK_attribute
        ##  Add the additional attributes from 'TS'.
        attributes(TS_for_analysis) <- c(
            attributes(TS_for_analysis),
            local({
                .include <-
                    ! names(attributes(TS)) %in% names(attributes(TS_for_analysis))
                attributes(TS)[.include]
            }))
        ##  Extend the attributes of 'TS', in order to simplify the
        ##  code later on.
        attributes(TS) <- c(
            attributes(TS),
            bootstrap = FALSE,
            list(TS_for_analysis = TS_for_analysis))
        kill(TS_for_analysis)
        ##  Append 'result' to 'TS_content'.
        TS_content[[TS_key]][[save_dir]] <- result
        ##  Create the new directory, and save 'TS'.
        dir.create(path = file.path(main_dir, save_dir))
        save(TS, file = paste(c(main_dir,
                                save_TS_file),
                              collapse = .Platform$file.sep))
        ##  Save the revised 'TS_content' to file.
        save(TS_content, file = content_path)
        ##  Create an info-file in our new directory, and store an
        ##  'info'-object containing 'result'.
        info <- list(TS_info = result)
        save(info,
             file =
                 file.path(main_dir,
                           save_dir,
                           LG_default$info_file_name))
    }
    ##  Return a list with the values 'TS_done_before' and an adjusted
    ##  version of 'result' (`main_dir` added at the end).
    return(
        list(TS_done_before = TS_done_before,
             TS_info = c(result,
                         list(main_dir = main_dir))))
}
LAJordanger/localgaussSpec documentation built on May 6, 2023, 4:31 a.m.