#' @title Create a sits tibble to store the time series information
#' @name .sits_tibble
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function returns an empty sits tibble.
#' Sits tibbles are the main structures of sits package.
#' They contain both the satellite image time series and its metadata.
#' A sits tibble is a tibble with pre-defined columns that
#' has the metadata and data for each time series. The columns are
#' <longitude, latitude, start_date, end_date, label, cube, time_series>.
#' Most functions of sits package get a sits tibble as input
#' (with additional parameters)
#' and return another sits tibble as output.
#' This allows chaining functions over sits tibbles.
#'
#' @return A sits tibble.
#' @export
.sits_tibble <- function() {
result <- tibble::tibble(longitude = double(),
latitude = double(),
start_date = as.Date(character()),
end_date = as.Date(character()),
label = character(),
cube = character(),
time_series = list()
)
class(result) <- append(class(result), c("sits", "sits_ts_tbl"), after = 0)
return(result)
}
#' @title Aligns dates of time series to a reference date
#' @name sits_align_dates
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Converts the time indexes of a set of sits
#' tibble to a single reference year.
#' This function is useful to join many time series from
#' different years to a single year,
#' which is required by methods that combine many time series,
#' such as clustering methods.
#' The reference year is taken from the date of the start of the time series
#' available in the data cube.
#'
#' @param data Input sits tibble (useful for chaining functions).
#' @param ref_dates Dates to align the time series.
#' @return The converted sits tibble
#' @export
sits_align_dates <- function(data, ref_dates) {
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
.sits_test_tibble(data)
# function to shift a time series in time
shift_ts <- function(d, k) dplyr::bind_rows(utils::tail(d,k),
utils::head(d,-k))
# get the reference date
start_date <- lubridate::as_date(ref_dates[1])
# create an output tibble
data1.tb <- .sits_tibble()
rows.lst <- purrr::pmap(list(data$longitude,
data$latitude,
data$label,
data$cube,
data$time_series),
function(long, lat, lab, cb, ts) {
# only rows that match reference dates are kept
if (length(ref_dates) == nrow(ts)) {
# in what direction to shift the time series?
sense <- lubridate::yday(lubridate::as_date(ts[1,]$Index)) -
lubridate::yday(lubridate::as_date(start_date))
# find the date of minimum distance to the reference date
idx <- which.min(abs((lubridate::as_date(ts$Index)
- lubridate::as_date(start_date))/lubridate::ddays(1)))
# do we shift time up or down?
if (sense < 0) shift <- -(idx - 1) else shift <- (idx - 1)
# shift the time series to match dates
if (idx != 1) ts <- shift_ts(ts, -(idx - 1))
# convert the time index to a reference year
first_date <- lubridate::as_date(ts[1,]$Index)
# change the dates to the reference dates
ts1 <- dplyr::mutate(ts, Index = ref_dates)
# save the resulting row in the output tibble
row <- tibble::tibble(
longitude = long,
latitude = lat,
start_date = lubridate::as_date(ref_dates[1]),
end_date = ref_dates[length(ref_dates)],
label = lab,
cube = cb,
time_series = list(ts1))
}
return(row)
})
data1.tb <- dplyr::bind_rows(data1.tb, rows.lst)
return(data1.tb)
}
#' @title Apply a function over sits bands.
#' @name sits_apply
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @description Apply a 1D generic function to a time series
#' and specific methods for common tasks,
#' such as missing values removal and smoothing.
#' `sits_apply()` returns a sits tibble with the same samples
#' and new bands computed by `fun`, `fun_index` functions.
#' These functions must be defined inline; they are called by `sits_apply`
#' for each band, whose vector values is passed as the function argument.
#' The `fun` function may either return a vector or a list of vectors.
#' In the first case, the vector will be the new values
#' of the corresponding band.
#' In the second case, the returned list must have names,
#' and each element vector will generate a new band which name composed
#' by concatenating original band name and the corresponding list element name.
#'
#' If a suffix is provided in `bands_suffix`, all resulting band
#' names will end with provided suffix separated by a ".".
#'
#' @param data Valid sits tibble
#' @param fun Function with one parameter as input
#' and a vector or list of vectors as output.
#' @param fun_index Function with one parameter as input
#' and a Date vector as output.
#' @param bands_suffix String informing the suffix of the resulting bands.
#' @param multicores Number of cores to be used
#' @return A sits tibble with same samples and the new bands.
#' @examples
#' # Get a time series
#' data(point_ndvi)
#' # apply a normalization function
#' point2 <- sits_apply (point_ndvi,
#' fun = function (x) { (x - min (x))/(max(x) - min(x))})
#' @export
sits_apply <- function(data,
fun,
fun_index = function(index){ return(index) },
bands_suffix = "",
multicores = 1) {
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# verify if data is valid
.sits_test_tibble(data)
# computes fun and fun_index for all time series
data$time_series <- data$time_series %>%
purrr::map(function(ts.tb) {
ts_computed.lst <- dplyr::select(ts.tb, -Index) %>%
purrr::map(fun)
# append bands names' suffixes
if (nchar(bands_suffix) != 0)
names(ts_computed.lst) <- paste0(names(ts_computed.lst), ".",
bands_suffix)
# unlist if there are more than one result from `fun`
if (is.recursive(ts_computed.lst[[1]]))
ts_computed.lst <- unlist(ts_computed.lst, recursive = FALSE)
# convert to tibble
ts_computed.tb <- tibble::as_tibble(ts_computed.lst)
# compute Index column
ts_computed.tb <- dplyr::mutate(ts_computed.tb,
Index = fun_index(ts.tb$Index))
# reorganizes time series tibble
return(dplyr::select(ts_computed.tb, Index, dplyr::everything()))
})
return(data)
}
#' @title Informs the names of the bands of a time series
#' @name sits_bands
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Finds the names of the bands of time series in a sits tibble
#' or in a metadata cube
#'
#' @param data Valid sits tibble (time series or a cube)
#' @return A string vector with the names of the bands.
#'
#' @examples
#' # Retrieve the set of samples for Mato Grosso (provided by EMBRAPA)
#' # show the bands
#' sits_bands(samples_mt_6bands)
#' @export
sits_bands <- function(data) {
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# is this a cube metadata?
if ("timeline" %in% names(data))
bands <- data$bands[[1]]
# is this a sits tibble with the time series?
if ("time_series" %in% names(data))
bands <- sits_time_series(data) %>%
colnames() %>% .[2:length(.)]
return(bands)
}
#' @title Breaks a set of time series into equal intervals
#' @name sits_break
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function breaks a set of time series
#' into equal time intervals. This function is useful to produce
#' a set of time series with the same number of samples, which is
#' required for building a set of samples for classification.
#'
#' @param data A sits tibble.
#' @param timeline Timeline associated with the data cube.
#' @param start_date Starting date within an interval.
#' @param end_date Ending date within an interval.
#' @param interval Interval for breaking the series.
#' @return A sits tibble broken into equal intervals.
#' @examples
#' points.tb <- sits_break(point_ndvi, timeline_modis_392,
#' "2000-08-28", "2016-08-12")
#' @export
sits_break <- function(data,
timeline,
start_date,
end_date,
interval = "12 month"){
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# create a tibble to store the results
newdata <- .sits_tibble()
# get the dates
subset_dates.lst <- sits_timeline_match(timeline,
as.Date(start_date),
as.Date(end_date),
interval = interval)
# break the data into intervals
lapply(seq_len(nrow(data)), function(i) {
subset_dates.lst %>%
purrr::map(function(date){
point.tb <- .sits_extract(data[i,],
as.Date(date[1]),
as.Date(date[2]))
if (nrow(point.tb) > 0)
newdata <<- dplyr::bind_rows(newdata, point.tb)
})
})
# prune the results to get the same number of samples
newdata <- sits_prune(newdata)
return(newdata)
}
#' @title Return the dates of a sits tibble
#' @name sits_dates
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Returns a vector containing the dates of a sits tibble.
#'
#' @param data A tibble in sits format with time series for different bands.
#' @return A tibble with values of time indexes.
#' @examples
#' # get a point and print its dates
#' sits_dates(point_mt_6bands)
#' @export
sits_dates <- function(data) {
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
return(sits_time_series_dates(data))
}
#' @title Merge two satellite image time series
#' @name sits_merge
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function merges the time series of two sits tibbles.
#' To merge two series, we consider that they contain different
#' attributes but refer to the same data cube, and spatio-temporal location.
#' This function is useful to merge different bands of the same locations.
#' For example, one may want to put the raw and smoothed bands
#' for the same set of locations in the same tibble.
#'
#' @param data1.tb The first sits tibble to be merged.
#' @param data2.tb The second sits tibble to be merged.
#' @return A merged sits tibble with a nested set of time series.
#' @examples
#' \donttest{
#' #' # Retrieve a time series with values of NDVI
#' data(point_ndvi)
#' # Filter the point using the whittaker smoother
#' point_ws.tb <- sits_whittaker(point_ndvi, lambda = 3.0)
#' # Plot the two points to see the smoothing effect
#' plot(sits_merge(point_ndvi, point_ws.tb))
#' }
#' @export
sits_merge <- function(data1.tb, data2.tb) {
# backward compatibility
if ("coverage" %in% names(data1.tb))
data1.tb <- .sits_tibble_rename(data1.tb)
if ("coverage" %in% names(data2.tb))
data2.tb <- .sits_tibble_rename(data2.tb)
# if some parameter is empty returns the another one
if (NROW(data1.tb) == 0)
return(data2.tb)
if (NROW(data2.tb) == 0)
return(data1.tb)
# verify if data1.tb and data2.tb has the same number of rows
assertthat::assert_that(NROW(data1.tb) == NROW(data2.tb),
msg = "sits_merge: cannot merge tibbles of different sizes")
# are the names of the bands different?
# if they are not
bands1 <- sits_bands(data1.tb)
bands2 <- sits_bands(data2.tb)
if (any(bands1 %in% bands2) || any(bands2 %in% bands1)) {
if ( any(grepl(".new", bands1)) || any(grepl(".new", bands2)))
bands2 <- paste0(bands2, ".new2")
else
bands2 <- paste0(bands2, ".new")
data2.tb <- sits_rename(data2.tb, bands2)
}
# prepare result
result <- data1.tb
# merge time series
result$time_series <- purrr::map2(data1.tb$time_series,
data2.tb$time_series,
function(ts1.tb, ts2.tb) {
ts3.tb <- dplyr::bind_cols(ts1.tb, dplyr::select(ts2.tb, -Index))
return(ts3.tb)
})
return(result)
}
#' @title Add new sits bands.
#' @name sits_mutate_bands
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @description Adds new bands and preserves existing in the time series
#' of a sits tibble using \code{dplyr::mutate} function.
#' @param data Valid sits tibble.
#' @param ... Expressions written as `name = value`.
#' See \code{dplyr::mutate()} help for more details.
#' @return A sits tibble with same samples and the selected bands.
#' @examples
#' \donttest{
#' # Retrieve data for time series with label samples in Mato Grosso in Brazil
#' data (samples_mt_6bands)
#' # Generate a new image with the SAVI (Soil-adjusted vegetation index)
#' savi.tb <- sits_mutate_bands(samples_mt_6bands,
#' savi = (1.5*(nir - red)/(nir + red + 0.5)))
#' }
#' @export
sits_mutate_bands <- function(data, ...){
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# verify if data has values
.sits_test_tibble(data)
# compute mutate for each time_series tibble
proc_fun <- function(...){
data$time_series <- data$time_series %>%
purrr::map(function(ts.tb) {
ts_computed.tb <- ts.tb %>%
dplyr::mutate(...)
return(ts_computed.tb)
})
}
# compute mutate for each time_series tibble
data$time_series <- proc_fun(...)
return(data)
}
#' @title Checks that the timeline of all time series of a data set are equal
#' @name sits_prune
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function tests if all time series in a sits tibble
#' have the same number of samples, and returns a time series whose indices
#' match the majority of the samples.
#'
#' @param data Either a sits tibble or a raster metadata.
#' @return A pruned sits tibble.
#' @export
sits_prune <- function(data) {
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
.sits_test_tibble(data)
# create a vector to store the number of indices per time series
n_samples <- vector()
data$time_series %>%
purrr::map(function(t) {
n_samples[length(n_samples) + 1] <<- NROW(t)
})
# check if all time indices are equal to the median
if (all(n_samples == stats::median(n_samples))) {
message("Success!! All samples have the same number of time indices")
return(data)
}
else{
message("Some samples of time series do not have the same time indices
as the majority of the data - see log file")
# save the wrong data in a log file
ind1 <- which(n_samples != stats::median(n_samples))
msg_log <- paste0("Lines with wrong number of samples are ",ind1)
data_err.tb <- data[ind1, ]
# return the time series that have the same number of samples
ind2 <- which(n_samples == stats::median(n_samples))
return(data[ind2, ])
}
}
#' @title Names of the bands of a time series
#' @name sits_rename
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Set the names of the bands of time series in a sits tibble.
#'
#' @param data Valid sits tibble.
#' @param names String vector with the names of the new bands.
#' @return A sits tibble with the new names for the bands.
#' @examples
#' # Retrieve a time series with one band
#' data(point_ndvi)
#' # Rename the band
#' ndvi1.tb <- sits_rename (point_ndvi, names = c("veg_index"))
#' # print the names of the new band
#' sits_bands(ndvi1.tb)
#' @export
sits_rename <- function(data, names){
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# verify if the number of bands informed is the same
# as the actual number of bands in input data
assertthat::assert_that(length(names) == length(sits_bands(data)),
msg = "sits_bands: input bands and informed bands have different sizes.")
# proceed rename and return invisible
data$time_series <- data$time_series %>%
purrr::map(function(ts){
names(ts) <- c("Index", names)
return(ts)
})
return(data)
}
#' @title Sample a percentage of a time series
#' @name sits_sample
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Takes a sits tibble with different labels and
#' returns a new tibble. For a given field as a group criterion,
#' this new tibble contains a given number or percentage
#' of the total number of samples per group.
#' Parameter n: number of random samples with reposition.
#' Parameter frac: a fraction of random samples without reposition.
#' If frac > 1, no sampling is done.
#'
#' @param data Input sits tibble.
#' @param n Number of samples to pick from each group of data.
#' @param frac Percentage of samples to pick from each group of data.
#' @return A sits tibble with a fixed quantity of samples.
#' @examples
#' # Retrieve a set of time series with 2 classes
#' data(cerrado_2classes)
#' # Print the labels of the resulting tibble
#' sits_labels(cerrado_2classes)
#' # Samples the data set
#' data <- sits_sample(cerrado_2classes, n = 10)
#' # Print the labels of the resulting tibble
#' sits_labels(data)
#' @export
sits_sample <- function(data, n = NULL, frac = NULL){
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# verify if data is valid
.sits_test_tibble(data)
# verify if either n or frac is informed
assertthat::assert_that(!(purrr::is_null(n) & purrr::is_null(frac)),
msg = "sits_sample: neither n or frac parameters informed")
# prepare sampling function
sampling_fun <- if (!purrr::is_null(n))
function(tb) {
if (nrow(tb) >= n) return(dplyr::sample_n(tb, size = n,
replace = FALSE))
else return(tb)
}
else if (frac <= 1)
function(tb) tb %>% dplyr::sample_frac(size = frac, replace = FALSE)
else
function(tb) tb %>% dplyr::sample_frac(size = frac, replace = TRUE)
# compute sampling
result <- .sits_tibble()
labels <- sits_labels(data)$label
labels %>%
purrr::map(function(l){
tb_l <- dplyr::filter (data, label == l)
tb_s <- sampling_fun(tb_l)
result <<- dplyr::bind_rows(result, tb_s)
})
return(result)
}
#' @title Filter bands on a sits tibble
#' @name sits_select_bands
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Returns a sits tibble with the selected bands.
#'
#' @param data A sits tibble metadata and data on time series.
#' @param bands Names of the selected bands.
#' @return A tibble in sits format with the selected bands.
#' @examples
#' # Retrieve a set of time series with 2 classes
#' data(cerrado_2classes)
#' # Print the original bands
#' sits_bands(cerrado_2classes)
#' # Select only the "ndvi" band
#' data <- sits_select_bands(cerrado_2classes, "ndvi")
#' # Print the labels of the resulting tibble
#' sits_bands(data)
#' @export
sits_select_bands <- function(data, bands) {
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
assertthat::assert_that(all(bands %in% sits_bands(data)),
msg = paste0("sits_select_bands: missing bands: ",
paste(bands[!bands %in% sits_bands(data)], collapse = ", ")))
# prepare result sits tibble
result <- data
# select the chosen bands for the time series
result$time_series <- data$time_series %>%
purrr::map(function(ts) ts[, c("Index", bands)])
# return the result
return(result)
}
#' @title Retrieve the dates of time series for a row of a sits tibble
#' @name sits_time_series_dates
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Returns the dates of the time series associated to a sits tibble
#'
#' @param data A sits tibble with one or more time series.
#' @return A vector of dates
#' @examples
#' # Retrieve a set of time series with 2 classes
#' data(cerrado_2classes)
#' # Retrieve the dates of the first time series
#' sits_time_series_dates(cerrado_2classes)
#' @export
sits_time_series_dates <- function(data) {
return(data$time_series[[1]]$Index)
}
#' @title Retrieve time series for a row of a sits tibble
#' @name sits_time_series
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Returns the time series associated to a row of the a sits tibble
#'
#' @param data A sits tibble with one or more time series.
#' @return A tibble in sits format with the time series.
#' @examples
#' # Retrieve a set of time series with 2 classes
#' data(cerrado_2classes)
#' # Retrieve the first time series
#' sits_time_series(cerrado_2classes)
#' @export
sits_time_series <- function(data) {
return(data$time_series[[1]])
}
#' @title Add new sits bands and drops existing.
#' @name sits_transmute_bands
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @description Adds new bands and drops existing in the time series
#' of a sits tibble using dplyr::transmute function.
#' @param data A sits tibble.
#' @param ... Pair expressions in the format `name = value`.
#' See \code{\link[dplyr]{mutate}} help for more details.
#' @return A sits tibble with same samples and the new bands.
#'
#' @examples
#' \donttest{
#' # Retrieve data for time series with label samples in Mato Grosso
#' data(samples_mt_6bands)
#' # Generate a new image with the SAVI (Soil-adjusted vegetation index)
#' savi.tb <- sits_transmute_bands(samples_mt_6bands,
#' savi = (1.5*(nir - red)/(nir + red + 0.5)))
#' }
#'
#' @export
sits_transmute_bands <- function(data, ...){
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
# verify if data is valid
.sits_test_tibble(data)
# tricky to include "Index" column and expand `...` arguments
proc_fun <- function(..., Index = Index){
Index <- quote(Index)
purrr::map(data$time_series, function(ts.tb) {
ts_computed.tb <- dplyr::transmute(ts.tb, !!(Index), ...)
})
}
# compute transmute for each time_series tibble
data$time_series <- proc_fun(...)
return(data)
}
#' @title Return the values of a given sits tibble as a list of matrices.
#' @name sits_values
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function returns only the values of a sits tibble
#' (according a specified format).
#' This function is useful to use packages such as ggplot2, dtwclust, or kohonen
#' that require values that are rowwise or colwise organised.
#'
#' @param data A sits tibble with time series for different bands.
#' @param bands A string with a group of bands whose
#' values are to be extracted. If no bands are informed
#' extract ALL bands.
#' @param format A string with either "cases_dates_bands"
#' or "bands_cases_dates" or "bands_dates_cases".
#'
#' @return A sits tibble with values.
#' @examples
#' # Retrieve a set of time series with 2 classes
#' data(cerrado_2classes)
#' # retrieve the values split by bands
#' sits_values(cerrado_2classes[1:2,], format = "bands_dates_cases")
#' @export
sits_values <- function(data, bands = NULL, format = "cases_dates_bands"){
assertthat::assert_that(format == "cases_dates_bands" ||
format == "bands_cases_dates" ||
format == "bands_dates_cases",
msg = "sits_values: valid format parameter are
'cases_dates_bands', 'bands_cases_dates', or 'bands_dates_cases'")
# backward compatibility
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
if (purrr::is_null(bands))
bands <- sits_bands(data)
# equivalent to former sits_values_rows()
# used in sits_cluster input data
# list elements: bands, matrix's rows: cases, matrix's cols: dates
if (format == "cases_dates_bands") {
# populates result
values.lst <- data$time_series %>%
purrr::map(function(ts) {
data.matrix(dplyr::select(ts, dplyr::one_of(bands)))
})
# another kind of sits_values_rows()
# used in sits_kohonen input
# list elements: bands, matrix's rows: cases, matrix's cols: dates
} else if (format == "bands_cases_dates") {
values.lst <- bands %>% purrr::map(function(band) {
data$time_series %>%
purrr::map(function(ts) {
dplyr::select(ts, dplyr::one_of(band))
}) %>%
data.frame() %>%
tibble::as_tibble() %>%
as.matrix() %>% t()
})
names(values.lst) <- bands
# equivalent to former sits_values_cols()
# list elements: bands, matrix's rows: dates, matrix's cols: cases
} else if (format == "bands_dates_cases") {
values.lst <- bands %>% purrr::map(function(band) {
data$time_series %>%
purrr::map(function(ts) {
dplyr::select(ts, dplyr::one_of(band))
}) %>%
data.frame() %>%
tibble::as_tibble() %>%
as.matrix()
})
names(values.lst) <- bands
}
return(values.lst)
}
#' @title Create partitions of a data set
#' @name .sits_create_folds
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#' @author Alexandre Ywata, \email{alexandre.ywata@@ipea.gov.br}
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Split a sits tibble into k groups, based on the label.
#'
#' @param data A sits tibble to be partitioned.
#' @param folds Number of folds.
.sits_create_folds <- function(data, folds = 5) {
# verify if data exists
.sits_test_tibble(data)
# splits the data into k groups
data$folds <- caret::createFolds(data$label, k = folds,
returnTrain = FALSE, list = FALSE)
return(data)
}
#' @title Extract a subset of the data based on dates
#' @name .sits_extract
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Returns a vector containing the dates of a sits tibble.
#'
#' @param row.tb A sits tibble.
#' @param start_date Starting date of the time series segment.
#' @param end_date End date of the time series segment.
#' @return A tibble in sits format with the chosen subset.
.sits_extract <- function(row.tb, start_date, end_date) {
# create a tibble to store the results
subset.tb <- .sits_tibble()
# filter the time series by start and end dates
ts <- sits_time_series(row.tb)
indexes <- dplyr::between(ts$Index, start_date, end_date)
if (any(indexes)) {
sub.ts <- ts[indexes, ]
# store the subset of the time series in a list
ts.lst <- tibble::lst()
ts.lst[[1]] <- sub.ts
# create a new row of the output tibble
subset.tb <- tibble::add_row(subset.tb,
longitude = row.tb$longitude,
latitude = row.tb$latitude,
start_date = as.Date(start_date),
end_date = as.Date(end_date),
label = row.tb$label,
cube = row.tb$cube,
time_series = ts.lst)
}
return(subset.tb)
}
#' @title Tests if a sits tibble is valid
#' @name .sits_test_tibble
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Tests if a sits tibble exists or has data inside.
#'
#' @param data A sits tibble.
#' @return Returns TRUE if data has data.
.sits_test_tibble <- function(data) {
assertthat::assert_that(!purrr::is_null(data),
msg = "input data not provided")
assertthat::assert_that(NROW(data) > 0,
msg = "input data is empty")
if ("coverage" %in% names(data))
data <- .sits_tibble_rename(data)
names <- c("longitude", "latitude", "start_date", "end_date",
"label", "cube", "time_series")
assertthat::assert_that(all(names %in% colnames(data)),
msg = "data input is not a valid sits tibble")
return(TRUE)
}
#' @title Store the results of CSV samples that could not be read
#' @name .sits_tibble_csv
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Create an empty tibble to store the results of classification.
#'
#' @return A tibble to store the result of classifications.
.sits_tibble_csv <- function() {
result <- tibble::tibble(longitude = double(),
latitude = double(),
start_date = as.Date(character()),
end_date = as.Date(character()),
label = character()
)
return(result)
}
#' @title Create an empty tibble to store the results of predictions
#' @name .sits_tibble_prediction
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @author Rolf Simoes, \email{rolf.simoes@@inpe.br}
#'
#' @description Create a tibble to store the results of predictions.
#' @param data A tibble with the input data.
#' @param class_info.tb A tibble with the information on classification.
#' @param pred.mtx The result of the classification
#' (one class per column and one row per interval).
#' @param interval The time interval between two classifications.
#' @return A tibble storing the predictions.
.sits_tibble_prediction <- function(data, class_info.tb, pred.mtx, interval) {
# retrieve the list of reference dates
# this list is a global one and it is created based on the samples
ref_dates.lst <- class_info.tb$ref_dates[[1]]
# retrieve the global timeline
timeline_global <- class_info.tb$timeline[[1]]
# size of prediction tibble
nrows <- length(ref_dates.lst)
# get the labels of the data
labels <- class_info.tb$labels[[1]]
n_labels <- length(labels)
# create a named vector with integers match the class labels
int_labels <- c(1:n_labels)
names(int_labels) <- labels
# compute pred.vec
pred.vec <- names(int_labels[max.col(pred.mtx)])
class_idx <- 1
predicted.lst <- purrr::pmap(
list(data$start_date, data$end_date, data$time_series),
function(row_start_date, row_end_date, row_time_series) {
# get the timeline of the row
timeline_row <- lubridate::as_date(row_time_series$Index)
# the timeline of the row may be different from the global timeline
# this happens when we are processing samples with different dates
if (timeline_row[1] != timeline_global[1]) {
# what is the reference start date?
ref_start_date <- lubridate::as_date(row_start_date)
# what is the reference end date?
ref_end_date <- lubridate::as_date(row_end_date)
# what are the reference dates to do the classification?
ref_dates.lst <- sits_timeline_match(timeline_row,
ref_start_date,
ref_end_date,
interval)
}
# store the classification results
pred_row.lst <- ref_dates.lst %>%
purrr::map(function(rd){
pred_row <- tibble::tibble(
from = as.Date(rd[1]),
to = as.Date(rd[2]),
class = pred.vec[class_idx],
probs = list(pred.mtx[class_idx,])
)
class_idx <<- class_idx + 1
return(pred_row)
})
# transform the list into a tibble
predicted.tb <- dplyr::bind_rows(pred_row.lst)
return(predicted.tb)
})
data$predicted <- predicted.lst
class(data) <- append(class(data), "predicted", after = 0)
return(data)
}
#' @title Rename a tibble to use "cube" instead of "coverage"
#' @name .sits_tibble_rename
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#' @param data Tibble with
.sits_tibble_rename <- function(data)
{
# is the input data a valid sits tibble?
assertthat::assert_that("coverage" %in% names(data),
msg = "sits_tibble_rename: input data does not have a coverage column ")
data <- data %>% dplyr::rename(cube = coverage)
return(data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.