#' @title Check that the times of a dataset are evenly sampled
#' @aliases is_evenly_sampled
#'
#' @param data dataset to check
#' @param period period to check the times against (if `NULL`, first check to
#' see if there is a known `period` set in the metadata, otherwise assumes 1)
#' @param tol tolerance for the period
#'
#' @return `TRUE` or `FALSE`
#'
#' @export
is_equitimed <- function(data, period = NULL, tol = 1e-06)
{
stopifnot(check_data_format(data))
full_times <- get_full_times(data, period, tol)
times <- get_times_from_data(data)
isTRUE(all.equal(times, full_times))
}
#' @export
is_evenly_sampled <- is_equitimed
#' @title Insert rows if necessary so that time series are evenly sampled
#' @aliases make_evenly_sampled
#'
#' @param data dataset to modify
#' @inheritParams is_equitimed
#' @param method one of `c("mean", "method", "closest")` that determines how
#' the rows of the original data will get coerced into the output here.
#' @inheritParams base::mean
#'
#' @return the dataset, with rows coerced according to the equitimed time
#' indices, and additional empty rows inserted if needed
#'
#' @details First, `get_full_times()` computes the sequence of time index values
#' at a regular sampling interval of period. These will be the final time
#' index values for the output. *Some* set of rows of the original dataset
#' will map to each of these time indices.
#'
#' The `method` argument determines how these rows get coerced:
#' \describe{
#' \item{mean}{the values in the rows are averaged together using `mean`}
#' \item{median}{the values in the rows are averaged together using `median`}
#' \item{closest}{the values in the row that is closest in time to the
#' desired time index are used.}
#' }
#'
#' @export
make_equitimed <- function(data, period = NULL, tol = 1e-06,
method = c("mean", "method", "closest"),
na.rm = TRUE)
{
stopifnot(check_data_format(data))
full_times <- get_full_times(data, period, tol)
if (is.null(full_times))
{
stop("Unable to construct an evenly spaced time index.")
}
times <- get_times_from_data(data)
if (isTRUE(all.equal(times, full_times)))
{
message("Dataset is already evenly sampled in time.")
return(invisible(data))
}
# generate empty matrices to hold final abundance and covariates
abundance <- matrix(NA, nrow = length(full_times), ncol = NCOL(data$abundance))
covariates <- data$covariates[0, , drop = FALSE]
# compute separation between times and full_times
times_dist <- outer(times, full_times, function(a, b) {abs(b - a)})
# fill abundance and covariates
method <- match.arg(method)
switch(method,
mean = {
idx <- times_dist <= tol
for (i in seq_along(full_times))
{
abundance[i, ] <- colMeans(data$abundance[idx[, i], , drop = FALSE], na.rm = na.rm)
covariates[i, ] <- purrr::map_dfc(data$covariates[idx[, i], , drop = FALSE], mean, na.rm = TRUE)
}
},
median = {
idx <- times_dist <= tol
for (i in seq_along(full_times))
{
abundance[i, ] <- apply(data$abundance[idx[, i], , drop = FALSE], 2, median, na.rm = na.rm)
covariates[i, ] <- purrr::map_dfc(data$covariates[idx[, i], , drop = FALSE], median, na.rm = TRUE)
}
},
closest = {
idx <- apply(times_dist, 2, which.min)
abundance <- data$abundance[idx,]
covariates <- data$covariates[idx,]
})
# restore column names and convert to tibbles
colnames(abundance) <- colnames(data$abundance)
abundance <- tibble::as_tibble(abundance)
covariates <- tibble::as_tibble(covariates)
# make sure times column is properly filled
time_var <- resolve_covariate_variable(data, "timename")
if (is.null(time_var))
{
# make sure timename variable is unique
new_col_names <- vctrs::vec_as_names(c(colnames(covariates), "time"),
repair = "unique", quiet = TRUE)
time_var <- tail(new_col_names, 1)
data$metadata$timename <- time_var
}
covariates[time_var] <- full_times
# assemble data to return
out <- list(abundance = abundance,
covariates = covariates,
metadata = data$metadata)
attr(out, "class") <- "matssdata"
return(out)
}
#' @export
make_evenly_sampled <- make_equitimed
#' @title Check if a dataset has integer times
#'
#' @param data dataset to check
#'
#' @return `TRUE` or `FALSE`
#'
#' @details If the times are already integer or Date, true. Otherwise FALSE,
#' with a message if times are missing, or if times could potentially be
#' rounded.
#'
#' @export
has_integer_times <- function(data)
{
# check for existence of times
times <- get_times_from_data(data)
if (is.null(times))
{
message("Dataset is missing times.")
return(FALSE)
}
# check for integer times
if (is.integer(times) || inherits(times, "Date"))
{
return(TRUE)
} else if (all(is.wholenumber(times))) {
message("Dataset has close to integer times, but they need to be rounded.\n",
"Perhaps you want to call ", usethis::ui_code("make_integer_times()"), ".\n")
return(FALSE)
}
# otherwise
return(FALSE)
}
#' @title Add a time variable with integer values for evenly sampled data
#'
#' @param data dataset to modify
#' @inheritParams is_equitimed
#'
#' @return the dataset, with integer times
#'
#' @details First, check if the data are evenly sampled in time. If not, we
#' exit early. Next, if the times are already integer or Date, we don't do
#' anything. If the times are numeric, but roundable to integer, we round.
#' Otherwise, we add a new variable to `covariates` from 1:n and designate
#' this variable as the `timename`.
#'
#' @export
make_integer_times <- function(data, period = NULL, tol = 1e-06)
{
times <- get_times_from_data(data)
# do checks based on existing times
if (!is.null(times))
{
# check for equitimed
if (!is_equitimed(data, period, tol))
{
stop(c("Dataset is not evenly sampled in time.\n",
"Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
}
# check for integer times
if (is.integer(times))
{
message("Dataset is evenly sampled with integer times already.")
return(invisible(data))
} else if (inherits(times, "Date")) {
message("Dataset is evenly sampled with `Date` formatted times already.")
return(invisible(data))
} else if (all(is.wholenumber(times))) {
message("Dataset is evenly sampled with (close to) integer times already.")
message("Rounding times to integer and replacing them...")
time_var <- data$metadata$timename
data$covariates[time_var] <- as.integer(round(times))
return(invisible(data))
}
}
# add time
times <- seq_len(NROW(data$abundance))
if (is.null(data$covariates)) # create covariates
{
time_var <- "time"
data$covariates <- tibble::tibble(time_var = times)
} else {
new_col_names <- vctrs::vec_as_names(c(colnames(data$covariates), "time"),
repair = "unique", quiet = TRUE)
time_var <- tail(new_col_names, 1)
data$covariates[time_var] <- times
}
data$metadata$timename <- time_var
message("Integer times created in variable ", usethis::ui_code(time_var), ".")
return(invisible(data))
}
#' Check for missing samples
#' @aliases is_fully_sampled
#'
#' @description Some analyses may require evenly sampled data without missing
#' values. `has_missing_samples` checks that the dataset is equitimed, and
#' then for missing values within `abundance` (and optionally, `covariates`)
#'
#' `is_full_sampled()` does the same check, but returns `TRUE` if there are
#' NO missing samples.
#'
#' @inheritParams is_equitimed
#' @param check_covariates `TRUE` or `FALSE` (whether to check covariates, too)
#'
#' @return `TRUE` or `FALSE`
#'
#' @export
has_missing_samples <- function(data, period = NULL, tol = 1e-06,
check_covariates = FALSE)
{
if (!is_equitimed(data, period, tol))
{
message(c("Dataset is not evenly sampled in time.\n",
"Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
return(TRUE)
}
# check abundance
if (any(is.na(data$abundance)))
{
message("Dataset has NA values in ", usethis::ui_code("abundance"), ".")
return(TRUE)
}
# check covariates
if (check_covariates && any(is.na(data$covariates)))
{
message("Dataset has NA values in ", usethis::ui_code("covariates"), ".")
return(TRUE)
}
return(FALSE)
}
#' @export
is_fully_sampled <- function(data, period = NULL, tol = 1e-06,
check_covariates = FALSE)
{
return(!has_missing_samples(data, period, tol, check_covariates))
}
#' @title Impute missing samples using linear interpolation
#'
#' @param data dataset to modify
#' @inheritParams is_equitimed
#' @param interpolate_covariates `TRUE` or `FALSE` (whether to do covariates, too)
#'
#' @return the dataset, with interpolated samples
#'
#' @details First, check if the data are evenly sampled in time. If not, we
#' exit early. Next, apply `forecast::na.interp()` to each variable that has
#' non-finite values.
#'
#' @export
interpolate_missing_samples <- function(data, period = NULL, tol = 1e-06,
interpolate_covariates = FALSE)
{
interpolate_tbl <- function(df)
{
finite_cols_idx <- apply(is.na(df), 2, all)
# replace all non finite values with NA
for (j in which(!finite_cols_idx))
{
x <- df[[j]]
x[!is.finite(x)] <- NA
interpolated <- forecast::na.interp(x)
df[[j]] <- as.numeric(interpolated)
class(df[[j]]) <- class(x)
}
return(df)
}
if (!is_equitimed(data, period, tol))
{
stop(c("Dataset is not evenly sampled in time.\n",
"Perhaps you want to call ", usethis::ui_code("make_equitimed()"), " first.\n"))
}
data$abundance <- interpolate_tbl(data$abundance)
if (interpolate_covariates)
{
data$covariates <- interpolate_tbl(data$covariates)
}
return(invisible(data))
}
#' get the complete time index, filling in gaps where necessary, and using the
#' period to establish the sampling frequency
#'
#' @noRd
get_full_times <- function(data, period = NULL, tol = 1e-06)
{
times <- get_times_from_data(data)
if (is.null(times))
{
stop("Dataset does not appear to have a times variable.\n",
"Check", usethis::ui_code("covariates"), " and ",
usethis::ui_code("metadata$timename"), ".\n")
}
period <- get_period_from_data(data, period)
full_times <- tryCatch(tidyr::full_seq(times, period, tol),
error = function(e) {
message(e$message)
return(NULL)
})
return(full_times)
}
#' extract the period, given the value from the metadata field, and a value
#' specified by the user. The flowchart is:
#' (1) if user has supplied non-null `period`, use that
#' (2) if metadata period is non-null, use that
#' (3) use a default value of 1 and print a message
#'
#' @noRd
get_period_from_data <- function(data, period = NULL)
{
if (is.null(period))
{
period <- data$metadata$period
if (is.null(period))
{
message("No time period found. Assuming period = 1.")
period <- 1
}
}
return(period)
}
#' @noRd
is.wholenumber <- function(x, tol = .Machine$double.eps ^ 0.5)
{
abs(x - round(x)) < tol
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.