# ---------------------------------------------------------------
#
# This file contain a list of functions to work with SITS tables
# SITS tables are the main structures of the "sits" package
# They contain both the satellite image time series and its metadata
#
# A sits table is a tibble with pre-defined columns that
# has the metadata and data for each time series. The columns are
# <longitude, latitude, start_date, end_date, label, coverage, time_series>
# Most functions on the sits package use a sits table as input (with additional parameters)
# and a sits table as output. This allows for chaining of operation on time series.
# The package provides the generic method sits_apply to apply a
# 1D generic function to a time series and specific methods for
# common tasks such as missing values removal and smoothing.
#
# The functions on this file work with sits tables, but do not change
# the values of time series. For 1D functions that change the values of
# the image time series, please see the file "sits_filters".R
#
# ---------------------------------------------------------------
#' @title Create a sits table to store the time series information
#' @name sits_table
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description this function returns an empty sits table.
#' A sits table is a tibble with pre-defined columns that
#' has the metadata and data for each time series. The columns are
#' <longitude, latitude, start_date, end_date, label, coverage, time_series>
#' Most functions on the sits package use a sits table as input (with additional parameters)
#' and a sits table as output. This allows for chaining of operation on time series.
#'
#' @return result.tb a tibble in SITS format
#' @export
sits_table <- function () {
result.tb <- tibble::tibble(longitude = double(),
latitude = double (),
start_date = as.Date(character()),
end_date = as.Date(character()),
label = character(),
coverage = character(),
time_series = list()
)
class (result.tb) <- append (class(result.tb), "sits_table")
return (result.tb)
}
#' @title Aligns dates of time series to a reference date
#' @name sits_align
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description converts the time indexes of a set of sits tables to a single reference year.
#' This function is useful to join many time series from different years to a single year,
#' which is required by methods that combine many time series, such as clustering methods.
#' The reference year is taken from the date of the start of the time series
#' available in the coverage.
#'
#' @param data.tb tibble - input SITS table (useful for chaining functions)
#' @param ref_dates the dates to align the time series
#' @return data1.tb tibble - the converted SITS table (useful for chaining functions)
#' @export
#'
sits_align <- function (data.tb, ref_dates) {
# function to shift a time series in time
shift_ts <- function(d, k) dplyr::bind_rows(utils::tail(d,k), utils::head(d,-k))
# get the reference date
start_date <- lubridate::as_date(ref_dates[1])
# create an output table
data1.tb <- sits_table()
# add a progress bar
message("Aligning samples time series intervals...")
progress_bar <- utils::txtProgressBar(min = 0, max = nrow(data.tb), style = 3)
for (i in 1:nrow(data.tb)) {
# extract the time series
row <- data.tb[i,]
ts <- row$time_series[[1]]
# rows that do not match the number of reference dates are discarded
if(length(ref_dates) != nrow(ts)) {
next
}
# in what direction do we need to shift the time series?
sense <- lubridate::yday(lubridate::as_date (ts[1,]$Index)) - lubridate::yday(lubridate::as_date(start_date))
# find the date of minimum distance to the reference date
idx <- which.min(abs((lubridate::as_date (ts$Index) - lubridate::as_date(start_date))/lubridate::ddays(1)))
# do we shift time up or down?
if (sense < 0) shift <- -(idx - 1) else shift <- (idx - 1)
# shift the time series to match dates
if (idx != 1) ts <- shift_ts(ts, -(idx - 1))
# convert the time index to a reference year
first_date <- lubridate::as_date(ts[1,]$Index)
# change the dates to the reference dates
ts1 <- dplyr::mutate (ts, Index = ref_dates)
# save the resulting row in the output table
row$time_series[[1]] <- ts1
row$start_date <- lubridate::as_date(ref_dates[1])
row$end_date <- ref_dates[length(ref_dates)]
data1.tb <- dplyr::bind_rows(data1.tb, row)
# update progress bar
utils::setTxtProgressBar(progress_bar, i)
}
close(progress_bar)
return (data1.tb)
}
#' @title returns the names of the bands of a time series
#' @name sits_bands
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description finds the names of the bands of time series in a sits table
#'
#' @param data.tb a valid sits table
#' @return result.vec a string vector with the names of the bands
#' @export
#'
sits_bands <- function (data.tb) {
result.vec <- data.tb[1,]$time_series[[1]] %>%
colnames() %>% .[2:length(.)]
return (result.vec)
}
#' @title Return the dates of a sits table
#' @name sits_dates
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description returns a vector containing the dates of a sits table
#'
#' @param data.tb a tibble in SITS format with time series for different bands
#' @return table a tibble in SITS format with values of time indexes
#' @export
sits_dates <- function (data.tb) {
values <- data.tb$time_series %>%
data.frame() %>%
tibble::as_tibble() %>%
dplyr::select (dplyr::starts_with ("Index")) %>%
t() %>%
as.vector() %>%
lubridate::as_date()
return (values)
}
#' @title Group different time series for the same lat/long coordinate
#' @name sits_group_bylatlong
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Takes a sits table in which different time references
#' for the same lat/long coordinate has been separated, and groups them together.
#' This function is useful por plotting together all time series associated to
#' the same location and is also useful to regroup series that have been split
#' to produce yearly samples that are used to define patterns
#'
#' @param data.tb tibble - input SITS table
#' @return data1.tb tibble - the converted SITS table with time series grouped by latlong
#' @export
#'
sits_group_bylatlong <- function (data.tb) {
#create a sits table to store the output
out.tb <- sits_table()
#find out how many distinct lat/long locations exist in the data
locs <- dplyr::distinct(data.tb, latitude, longitude)
# process each lat/long location
locs %>%
purrrlyr::by_row ( function (loc) {
long = as.double (loc$longitude) # select longitude
lat = as.double (loc$latitude) # select latitude
# filter only those rows with the same label
rows <- dplyr::filter (data.tb, longitude == long, latitude == lat)
# make an initial guess for the start and end dates
start_date <- rows[1,]$start_date
end_date <- rows[1,]$end_date
# get the first time series
time_series <- rows[1,]$time_series[[1]]
# are there more time series for the same location?
if (nrow(rows) > 1) {
rows %>%
utils::tail (n = -1) %>%
purrrlyr::by_row (function(row) {
# adjust the start and end dates
if (row$start_date < start_date) start_date <- row$start_date
if (row$end_date > end_date) end_date <- row$end_date
# get the time series and join it with the previous ones
t <- row$time_series[[1]]
time_series <<- dplyr::bind_rows(time_series, t)
})
}
ts.lst <- tibble::lst()
ts.lst[[1]] <- time_series
out.tb <<- tibble::add_row (out.tb,
longitude = long,
latitude = lat,
start_date = as.Date(start_date),
end_date = as.Date(end_date),
label = "NoClass",
coverage = rows[1,]$coverage,
time_series = ts.lst)
})
return (out.tb)
}
#' @title Merge two satellite image time series
#' @name sits_merge
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function merges the time series of two STIS tables.
#' To merge two series, we consider that they contain different
#' attributes but refer to the same coverage, and spatio-temporal location.
#' This function is useful to merge different bands of the same spatio-temporal locations.
#' For example, one may want to put the raw and smoothed bands for the same set of locations
#' in the same table.
#'
#' @param data1.tb the first SITS table to be merged
#' @param data2.tb the second SITS table to be merged
#' @return result.tb a merged SITS tibble with a nested set of time series
#' @export
sits_merge <- function(data1.tb, data2.tb) {
# are the names of the bands different?
ensurer::ensure_that(data1.tb, !(any(sits_bands(.) %in% sits_bands(data2.tb)) | any(sits_bands(data2.tb) %in% sits_bands(.))),
err_desc = "sits_merge: cannot merge two sits tables with bands with the same names")
# if some parameter is empty returns the another one
if (NROW(data1.tb) == 0)
return (data2.tb)
if (NROW(data2.tb) == 0)
return (data1.tb)
# verify if data1.tb and data2.tb has the same number of rows
ensurer::ensure_that(data1.tb, NROW(.) == NROW(data2.tb),
err_desc = "sits_merge: cannot merge two sits tables with different numbers of rows")
# prepare result
result.tb <- data1.tb
# merge time series
result.tb$time_series <- purrr::map2 (data1.tb$time_series, data2.tb$time_series, function (ts1, ts2) {
ts3 <- dplyr::bind_cols(ts1, dplyr::select(ts2, -Index))
return (ts3)
})
return (result.tb)
}
#' @title Prunes dates of time series to fit an interval
#' @name sits_prune
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description prunes the times series contained a set of sits tables
#' to an interval. This function is useful to constrain the different samples
#' of land cover to the same interval (usually, one year)
#'
#' @param data.tb tibble - input SITS table
#' @param min_interval a string describing the min interval (in days) bellow which the samples are discarded.
#' @param max_interval a string describing the max interval (in days) above which the samples are proned.
#' @return pruned.tb tibble - the converted SITS table
#' @export
#'
sits_prune <- function (data.tb, min_interval = "349 days", max_interval = "365 days") {
#does the input data exist?
.sits_test_table (data.tb)
pruned.tb <- sits_table()
discarded.tb <- sits_table()
#
message("Processing...")
# add a progress bar
i <- 0
progress_bar <- utils::txtProgressBar(min = 0, max = nrow(data.tb), style = 3)
data.tb %>%
purrrlyr::by_row (function (row) {
ts <- row$time_series[[1]]
row_interval <- lubridate::as_date(row$end_date) - lubridate::as_date(row$start_date)
# data interval is greater than maximum interval. Trying to cut it.
if ( row_interval >= lubridate::as.duration(max_interval)) {
# extract the time series
ts <- row$time_series[[1]]
# find the first date which exceeds the required max_interval
idx <- which.max (lubridate::as_date(ts$Index) - lubridate::as_date(row$start_date) >= lubridate::as.duration(max_interval))
# prune the time series to fit inside the required max_interval
ts1 <- ts[1:(idx - 1),]
# save the pruned time series
row$time_series[[1]] <- ts1
# store the new end date
row$end_date <- ts1[nrow(ts1),]$Index
}
# verifies if resulting time series satisfies min_interval requirement. If don't discard sample.
# Else, stores the resulting row in the SITS table
row_interval <- lubridate::as_date(row$end_date) - lubridate::as_date(row$start_date)
if ( row_interval < lubridate::as.duration(min_interval))
discarded.tb <<- dplyr::bind_rows(discarded.tb, row)
else
pruned.tb <<- dplyr::bind_rows(pruned.tb, row)
# update progress bar
i <<- i + 1
utils::setTxtProgressBar(progress_bar, i)
})
close(progress_bar)
if (nrow(discarded.tb) > 0){
message("The following sample(s) has(have) been discarded:\n")
print(tibble::as_tibble(discarded.tb))
}
return (pruned.tb)
}
#' @title Add new SITS bands.
#' @name sits_mutate
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @description Adds new bands and preserves existing in a sits_table's time series,
#' using dplyr::mutate function
#' @param data.tb a valid sits table
#' @param ... Name-value pairs of expressions. Use NULL to drop a variable.
#' @return result.tb a sits_table with same samples and the new bands
#' @export
sits_mutate <- function(data.tb, ...){
result.tb <- data.tb
result.tb$time_series <- result.tb$time_series %>% purrr::map(function(ts.tb) {
ts_computed.tb <- dplyr::mutate(ts.tb, ...)
return(ts_computed.tb)
})
return(result.tb)
}
#' @title Rename bands of a sits table
#' @name sits_rename
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description replaces the names of the bands of a satellite image time series
#'
#' @param data.tb a SITS table with a list of SITS time series
#' @param bands_new a list of new band names
#' @return out.tb a SITS table with a list of renamed bands for the time series
#' @export
sits_rename <- function (data.tb, bands_new) {
#does the input data exist?
.sits_test_table (data.tb)
ensurer::ensure_that(bands_new, !purrr::is_null(.), err_desc = "sits_rename: New band names should be provided")
ensurer::ensure_that(data.tb, length(sits_bands(.)) == length (bands_new),
fail_with = function (e) stop(e),
err_desc = "sits_rename: Please provide names for all input bands")
# rename the time series
out.ts <- data.tb$time_series %>%
purrr::map (function (ts) {
ts_out <- ts
colnames (ts_out) <- c("Index", bands_new)
return (ts_out)
})
out.tb <- dplyr::select (data.tb, latitude, longitude, start_date, end_date, label, coverage)
out.tb$time_series <- out.ts
return (out.tb)
}
#' @title Filter bands on a SITS table
#' @name sits_select
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description returns a sits table with the selected bands
#'
#' @param data.tb a sits table with the time series of the selected bands
#' @param bands a vector of bands
#' @return result.tb a tibble in SITS format with the selected bands
#' @export
sits_select <- function (data.tb, bands) {
# verify if bands exists in data.tb
ensurer::ensure_that(data.tb, all(bands %in% sits_bands(.)),
err_desc = "sits_select: some band(s) not found in input data")
# prepare result SITS table
result.tb <- data.tb
# select the chosen bands for the time series
result.tb$time_series <- data.tb$time_series %>%
purrr::map (function (ts) ts[, c("Index", bands)])
# return the result
return (result.tb)
}
#' @title Add new SITS bands and drops existing.
#' @name sits_transmute
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @description Adds new bands and drops existing in a sits_table's time series,
#' using dplyr::transmute function
#' @param data.tb a valid sits table
#' @param ... Name-value pairs of expressions.
#' @return result.tb a sits_table with same samples and the new bands
#' @export
sits_transmute <- function(data.tb, ...){
result.tb <- data.tb
result.tb$time_series <- result.tb$time_series %>% purrr::map(function(ts.tb) {
ts_computed.tb <- dplyr::transmute(ts.tb, ...)
if (!("Index" %in% colnames(ts_computed.tb)))
ts_computed.tb <- dplyr::bind_cols(dplyr::select(ts.tb, Index), ts_computed.tb)
return(ts_computed.tb)
})
return(result.tb)
}
#' @title Apply a function over SITS bands.
#' @name sits_apply
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @description `sits_apply` returns a sits_table with the same samples points and new bands computed by `fun`,
#' `fun_index` functions. These functions must be defined inline and are called by `sits_ts_apply` for each band,
#' whose vector values is passed as the function argument.
#'
#' `fun` function may either return a vector or a list of vectors. In the first case, the vector will be the new values
#' of the corresponding band. In the second case, the returned list must have names, and each element vector will
#' generate a new band which name composed by concatenating original band name and the corresponding list element name.
#'
#' If a suffix is provided in `bands_suffix`, all resulting bands names will end with provided suffix separated by a ".".
#'
#' @param data.tb a valid sits table
#' @param fun a function with one parameter as input and a vector or list of vectors as output.
#' @param fun_index a function with one parameter as input and a Date vector as output.
#' @param bands_suffix a string informing the resulting bands name's suffix.
#' @return data.tb a sits_table with same samples and the new bands
#' @export
sits_apply <- function(data.tb, fun, fun_index = function(index){ return(index) }, bands_suffix = "") {
# veify if data.tb has values
.sits_test_table(data.tb)
# computes fun and fun_index for all time series and substitutes the original time series data
data.tb$time_series <- data.tb$time_series %>%
purrr::map(function(ts.tb) {
ts_computed.lst <- dplyr::select(ts.tb, -Index) %>%
purrr::map(fun)
# append bands names' suffixes
if (nchar(bands_suffix) != 0)
names(ts_computed.lst) <- paste0(bands, ".", bands_suffix)
# unlist if there are more than one result from `fun`
if (is.recursive(ts_computed.lst[[1]]))
ts_computed.lst <- unlist(ts_computed.lst, recursive = FALSE)
# convert to tibble
ts_computed.tb <- tibble::as_tibble(ts_computed.lst)
# compute Index column
ts_computed.tb <- dplyr::mutate(ts_computed.tb, Index = fun_index(ts.tb$Index))
# reorganizes time series tibble
return(dplyr::select(ts_computed.tb, Index, dplyr::everything()))
})
return(data.tb)
}
#' @title Return the values of a given SITS table as a list of matrices according to a specified format.
#' @name sits_values
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description this function returns only the values of a sits table (according a specified format).
#' This function is useful to use packages such as ggplot, dtwclust, or kohonen that
#' require values that are rowwise or colwise organised.
#'
#' @param data.tb a tibble in SITS format with time series for different bands
#' @param bands string - a group of bands whose values are to be extracted. If no bands is informed extract ALL bands.
#' @param format string - either "cases_dates_bands" or "bands_cases_dates" or "bands_dates_cases"
#' @return table a tibble in SITS format with values
#' @family STIS table functions
#' @export
sits_values <- function(data.tb, bands = NULL, format = "cases_dates_bands"){
ensurer::ensure_that(format, . == "cases_dates_bands" || . == "bands_cases_dates" || . == "bands_dates_cases",
err_desc = "sits_values: valid format parameter are 'cases_dates_bands', 'bands_cases_dates', or 'bands_dates_cases'")
if (purrr::is_null(bands))
bands <- sits_bands(data.tb)
# equivalent to former sits_values_rows()
# used in sits_cluster input data
# list elements: bands, matrix's rows: cases, matrix's cols: dates
if (format == "cases_dates_bands") {
# populates result
values.lst <- data.tb$time_series %>%
purrr::map(function (ts) {
data.matrix(dplyr::select(ts, dplyr::one_of(bands)))
})
# another kind of sits_values_rows()
# used in sits_kohonen input
# list elements: bands, matrix's rows: cases, matrix's cols: dates
} else if (format == "bands_cases_dates") {
values.lst <- bands %>% purrr::map(function (band) {
data.tb$time_series %>%
purrr::map(function (ts) {
dplyr::select(ts, dplyr::one_of(band))
}) %>%
data.frame() %>%
tibble::as_tibble() %>%
as.matrix() %>% t()
})
names(values.lst) <- bands
# equivalent to former sits_values_cols()
# list elements: bands, matrix's rows: dates, matrix's cols: cases
} else if (format == "bands_dates_cases") {
values.lst <- bands %>% purrr::map(function (band) {
data.tb$time_series %>%
purrr::map(function (ts) {
dplyr::select(ts, dplyr::one_of(band))
}) %>%
data.frame() %>%
tibble::as_tibble() %>%
as.matrix()
})
names(values.lst) <- bands
}
return (values.lst)
}
#' @title Spread matches from a sits matches tibble
#' @name sits_spread_matches
#' @author Victor Maus, \email{vwmaus1@@gmail.com}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Alexandre Xavier Ywata de Carvalho, \email{alexandre.ywata@@ipea.gov.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Given a SITS tibble with a matches, returns a tibble whose columns have
#' the reference label and the TWDTW distances for each temporal pattern.
#'
#' @param data.tb a SITS matches tibble
#' @return result.tb a tibble where whose columns have the reference label and the TWDTW distances for each temporal pattern
#' @export
sits_spread_matches <- function(data.tb){
# Get best TWDTW aligniments for each class
data.tb$matches <- data.tb$matches %>%
purrr::map(function (data.tb){
data.tb %>%
dplyr::group_by(predicted) %>%
dplyr::summarise(distance=min(distance))
})
# Select best match and spread pred to columns
result.tb <- data.tb %>%
dplyr::transmute(original_row = 1:NROW(.), reference = label, matches = matches) %>%
tidyr::unnest(matches, .drop = FALSE) %>%
tidyr::spread(key = predicted, value = distance)
return(result.tb)
}
#' @title Spread matches from a sits matches tibble
#' @name .sits_test_table
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Tests if a SITS table exists or has data inside
#'
#' @param data.tb a SITS table
#' @return returns TRUE if data.tb has data.
#'
.sits_test_table <- function (data.tb) {
ensurer::ensure_that(data.tb, !purrr::is_null(.),
err_desc = "input data not provided")
ensurer::ensure_that(data.tb, NROW(.) > 0,
err_desc = "input data is empty")
return (TRUE)
}
#' @title Create an empty distance table to store the results of distance metrics
#' @name sits_distance_table
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Create an empty distance table to store the results of distance metrics
#'
#' @param patterns.tb a SITS table with a set of patterns
#' @return distances.tb a tibble to store the distances between a time series and a set of patterns
#' @export
#'
sits_distance_table <- function (patterns.tb) {
distances.tb <- tibble::tibble(
original_row = integer(),
reference = character())
distances.tb <- tibble::as_tibble (distances.tb)
labels <- (dplyr::distinct(patterns.tb, label))$label
bands <- sits_bands (patterns.tb)
for (l in 1:length(labels))
for (b in 1:length(bands)) {
measure <- paste0 (labels[l], ".", bands[b])
distances.tb [measure] = double()
}
return (distances.tb)
}
#' @title Create an empty distance table to store the results of distance metrics
#' @name sits_distance_table_from_data
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Create an empty distance table to store the results of distance metrics
#'
#' @param data.tb a SITS table with a data set
#' @return distances.tb a tibble to store the distances between a time series and a set of patterns
#' @export
#'
sits_distance_table_from_data <- function (data.tb) {
distances.tb <- tibble::tibble(
original_row = 1:NROW(data.tb),
reference = data.tb$label)
return (distances.tb)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.