#' @title Define the information required for classifying time series
#' @name .sits_timeline_class_info
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Time series classification requires a series of steps:
#' (a) Provide labelled samples that will be used as training data.
#' (b) Provide information on how the classification will be performed,
#' including data timeline, temporal interval,
#' and start and end dates per interval.
#' (c) Clean the training data to ensure it meets the specifications
#' of the classification info.
#' (d) Use the clean data to train a machine learning classifier.
#' (e) Classify non-labelled data sets.
#'
#' This function provides support for step (b). It requires the user
#' to provide a timeline, the classification interval,
#' and the start and end dates of
#' the reference period. The results is a tibble with information
#' that allows the user to perform steps (c) to (e).
#'
#' @param cube Description on the data cube being classified.
#' @param samples Samples used for training the classification model.
#' @param interval Interval between two sucessive classifications.
#' @return A tibble with the classification information.
.sits_timeline_class_info <- function(cube, samples, interval){
# find the bands of the samples
bands <- sits_bands(samples)
# check if bands of the samples are consistent with the bands of the cube
assertthat::assert_that(all(bands %in% cube$bands[[1]]),
msg = "Cube does not have all bands for the samples")
# find the timeline
timeline <- sits_timeline(cube)
# find the labels
labels <- sits_labels(samples)$label
# what is the reference start date?
ref_start_date <- lubridate::as_date(samples[1,]$start_date)
# what is the reference end date?
ref_end_date <- lubridate::as_date(samples[1,]$end_date)
# obtain the reference dates that match the patterns in the full timeline
ref_dates.lst <- sits_timeline_match(timeline,
ref_start_date,
ref_end_date,
interval)
# obtain the indexes of the timeline that match the reference dates
dates_index.lst <- .sits_timeline_match_indexes(timeline, ref_dates.lst)
# find the number of the samples
nsamples <- dates_index.lst[[1]][2] - dates_index.lst[[1]][1] + 1
class_info <- tibble::tibble(
bands = list(bands),
labels = list(labels),
interval = interval,
timeline = list(timeline),
num_samples = nsamples,
ref_dates = list(ref_dates.lst),
dates_index = list(dates_index.lst)
)
return(class_info)
}
#' @title Find the time index of the blocks to be extracted for classification
#' @name .sits_timeline_index_blocks
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Obtains the indexes of the blocks to be extracted
#' for each time interval associated with classification.
#'
#' @param class_info.tb Tibble with information required for classification.
#' @return List with indexes of the input data set
#' associated to each time interval
#' used for classification.
.sits_timeline_index_blocks <- function(class_info.tb) {
# find the subsets of the input data
dates_index.lst <- class_info.tb$dates_index[[1]]
#retrieve the timeline of the data
timeline <- class_info.tb$timeline[[1]]
# retrieve the bands
bands <- class_info.tb$bands[[1]]
#retrieve the time index
time_index.lst <- .sits_timeline_index_from_dates(dates_index.lst,
timeline,
bands)
return(time_index.lst)
}
#' @title Test if starting date fits with the timeline
#' @name .sits_timeline_is_valid_start_date
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description A timeline is a list of dates where observations are available.
#' This function estimates if a date is valid by comparing it to the timeline.
#' If the date's estimate is not inside the timeline
#' and the difference between the date and the first date of timeline is
#' greater than the acquisition interval of the timeline,
#' then we conclude the date is not valid.
#'
#' @param date A date.
#' @param timeline A vector of reference dates.
#' @return Is this is valid starting date?
.sits_timeline_is_valid_start_date <- function(date, timeline){
# is the date inside the timeline?
if (date %within% lubridate::interval(timeline[1],
timeline[length(timeline)]))
return(TRUE)
# what is the difference in days between the last two days of the timeline?
timeline_diff <- as.integer(timeline[2] - timeline[1])
# if the difference in days in the timeline is smaller than the difference
# between the reference date and the first date of the timeline, then
# we assume the date is valid
if (abs(as.integer(date - timeline[1])) <= timeline_diff ) return(TRUE)
return(FALSE)
}
#' @title Test if end date fits inside the timeline
#' @name .sits_timeline_is_valid_end_date
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description A timeline is a list of dates where observations are available.
#' This function estimates if a date is valid by comparing it to the timeline.
#' If the date's estimate
#' is not inside the timeline and the difference between the date and the last
#' date of timeline is
#' greater than the acquisition interval of the timeline,
#' then we conclude the date is not valid.
#'
#' @param date A Date.
#' @param timeline A vector of reference dates.
#' @return Nearest date.
.sits_timeline_is_valid_end_date <- function(date, timeline){
# is the date inside the timeline?
if (date %within% lubridate::interval(timeline[1],
timeline[length(timeline)]))
return(TRUE)
# what is the difference in days between the last two days of the timeline?
n_times <- length(timeline)
timeline_diff <- as.integer(timeline[n_times] - timeline[n_times - 1])
# if the difference in days in the timeline is smaller than the difference
# between the reference date and the last date of the timeline, then
# we assume the date is valid
if (abs(as.integer(date - timeline[n_times])) <= timeline_diff)
return(TRUE)
return(FALSE)
}
#' @title Find dates in the input data cube that match those of the patterns
#' @name sits_timeline_match
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description For correct classification, the input time series
#' should be aligned to that of the reference data set
#' (usually a set of patterns).
#' This function aligns these data sets so that shape
#' matching works correctly
#'
#' @param timeline Timeline of input observations (vector).
#' @param ref_start_date Reference for starting the classification.
#' @param ref_end_date Reference for ending the classification.
#' @param interval Period between two classification.
#' @return A list of breaks that will be applied to the input data set.
#' @examples
#' # get a timeline for MODIS data
#' data("timeline_2000_2017")
#' # get a set of subsets for a period of 10 years
#' ref_start_date <- lubridate::ymd("2000-08-28")
#' ref_end_date <- lubridate::ymd("2000-08-13")
#' subset_dates.lst <- sits_timeline_match(timeline_2000_2017,
#' ref_start_date, ref_end_date)
#' @export
sits_timeline_match <- function(timeline, ref_start_date,
ref_end_date, interval = "12 month"){
# make sure the timelines is a valid set of dates
timeline <- lubridate::as_date(timeline)
#define the input start and end dates
input_start_date <- timeline[1]
# input_end_date <- timeline[length(timeline)]
# how many instances are there per interval?
num_inst <- .sits_timeline_num_instances(timeline, interval)
# what is the expected start and end dates based on the patterns?
ref_st_mday <- as.character(lubridate::mday(ref_start_date))
ref_st_month <- as.character(lubridate::month(ref_start_date))
year_st_date <- as.character(lubridate::year(input_start_date))
est_start_date <- lubridate::as_date(paste0(year_st_date,"-",
ref_st_month,"-",ref_st_mday))
# find the actual starting date by searching the timeline
start_date <- timeline[which.min(abs(est_start_date - timeline))]
# is the start date a valid one?
assertthat::assert_that(
.sits_timeline_is_valid_start_date(start_date, timeline),
msg = "sits_timeline_match: start date in not inside timeline")
# obtain the subset dates to break the input data set
# adjust the dates to match the timeline
subset_dates.lst <- list()
# what is the expected end date of the classification?
end_date <- timeline[which(timeline == start_date) + (num_inst - 1)]
# is the start date a valid one?
assertthat::assert_that(!(is.na(end_date)),
msg = "sits_timeline_match: start and end date do not match timeline /n
Please compare your timeline with your samples")
# go through the timeline of the data
# find the reference dates for the classification
while (!is.na(end_date)) {
# add the start and end date
dt_len <- length(subset_dates.lst)
subset_dates.lst[[dt_len + 1]] <- c(start_date, end_date)
# estimate the next end date based on the interval
next_st_date <- as.Date(start_date + lubridate::as.duration(interval))
# define the estimated end date of the input data based on the patterns
# find the actual start date by searching the timeline
start_date <- timeline[which.min(abs(next_st_date - timeline))]
# estimate
end_date <- timeline[which(timeline == start_date) + (num_inst - 1)]
}
# is the end date a valid one?
end_date <- subset_dates.lst[[length(subset_dates.lst)]][2]
assertthat::assert_that(
.sits_timeline_is_valid_end_date(end_date, timeline),
msg = "sits_timeline_match: end date in not inside timeline")
return(subset_dates.lst)
}
#' @title Find indexes in a timeline that match the reference dates
#' @name .sits_timeline_match_indexes
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description For correct classification, the time series of the input data
#' should be aligned to that of the reference data set
#' (usually a set of patterns).
#' This function aligns these data sets so that shape
#' matching works correctly
#'
#' @param timeline Timeline of input observations (vector).
#' @param ref_dates.lst Breaks to be applied to the input data set.
#' @return A list of indexes that match the reference dates
#' to the timelines.
.sits_timeline_match_indexes <- function(timeline, ref_dates.lst){
dates_index.lst <- ref_dates.lst %>%
purrr::map(function(date_pair) {
start_index <- which(timeline == date_pair[1])
end_index <- which(timeline == date_pair[2])
dates_index <- c(start_index, end_index)
return(dates_index)
})
return(dates_index.lst)
}
#' @title Find number of instances, given a timeline and an interval
#' @name .sits_timeline_num_instances
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function retrieves the number of samples
#'
#' @param timeline Timeline of input observations (vector).
#' @param interval Period to match the data to the patterns.
#' @return The number of instances during the chosen interval.
.sits_timeline_num_instances <- function(timeline, interval = "12 month"){
start_date <- timeline[1]
next_interval_date <- lubridate::as_date(lubridate::as_date(start_date)
+ lubridate::as.duration(interval))
times <- timeline[(next_interval_date - timeline) > 0]
end_date <- timeline[which.max(times)]
return(which(timeline == end_date) - which(timeline == start_date) + 1)
}
#' @title Provide a list of indexes to extract data from a
#' distance table for classification
#' @name .sits_timeline_distance_indexes
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Given a list of time indexes that indicate the
#' start and end of the values to be extracted to classify each band,
#' obtain a list of indexes that will be used to extract values from a
#' combined distance tibble (which has all the bands put together).
#'
#' @param class_info.tb Tibble with classification information.
#' @param ntimes Number of time instances.
#' @return List of values to be extracted for each classification interval.
.sits_timeline_distance_indexes <- function(class_info.tb, ntimes) {
# find the subsets of the input data
dates_index.lst <- class_info.tb$dates_index[[1]]
# find the number of the samples
nsamples <- class_info.tb$num_samples
#retrieve the timeline of the data
timeline <- class_info.tb$timeline[[1]]
# retrieve the bands
bands <- class_info.tb$bands[[1]]
nbands <- length(bands)
#retrieve the time index
time_index.lst <- .sits_timeline_index_from_dates(dates_index.lst,
timeline,
bands)
size_lst <- nbands*ntimes + 2
select.lst <- purrr::map(time_index.lst, function(idx) {
# for a given time index, build the data.table to be classified
# build the classification matrix extracting the relevant columns
select <- logical(length = size_lst)
select[1:2] <- TRUE
for (b in 1:nbands) {
i1 <- idx[(2*b - 1)] + 2
i2 <- idx[2*b] + 2
select[i1:i2] <- TRUE
}
return(select)
})
return(select.lst)
}
#' @title Provide a list of indexes to extract data from a raster-derived
#' data table for classification
#' @name .sits_timeline_raster_indexes
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description Given a list of time indexes that indicate the start and end
#' of the values to be extracted to classify each band,
#' obtain a list of indexes that will be used to extract values
#' from a combined distance tibble
#' (which has all the bands put together).
#'
#' @param cube Data cube with input data set.
#' @param samples Tibble with samples used for classification.
#' @param interval Classification interval.
#' @return List of values to be extracted for each classification interval.
.sits_timeline_raster_indexes <- function(cube, samples, interval) {
# define the classification info parameters
class_info <- .sits_timeline_class_info(cube, samples, interval)
# define the time indexes required for classification
time_index.lst <- .sits_timeline_index_blocks(class_info)
# find the length of the timeline
ntimes <- length(sits_timeline(cube))
# get the bands in the same order as the samples
nbands <- length(sits_bands(samples))
size_lst <- nbands*ntimes + 2
select.lst <- purrr::map(time_index.lst, function(idx) {
# for a given time index, build the data.table to be classified
# build the classification matrix extracting the relevant columns
select <- logical(length = size_lst)
select[1:2] <- TRUE
for (b in 1:nbands) {
i1 <- idx[(2*b - 1)] + 2
i2 <- idx[2*b] + 2
select[i1:i2] <- TRUE
}
return(select)
})
return(select.lst)
}
#' @title Create a list of time indexes from the dates index
#' @name .sits_timeline_index_from_dates
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @param dates_index.lst A list of dates with the subsets of the input data.
#' @param timeline The timeline of the data set.
#' @param bands Bands used for classification.
#' @return The subsets of the timeline.
.sits_timeline_index_from_dates <- function(dates_index.lst, timeline, bands) {
# transform the dates index (a list of dates) to a list of indexes
# this speeds up extracting the distances for classification
n_bands <- length(bands)
time_index.lst <- dates_index.lst %>%
purrr::map(function(idx){
index_ts <- vector()
for (i in 1:n_bands) {
idx1 <- idx[1] + (i - 1)*length(timeline)
index_ts[length(index_ts) + 1 ] <- idx1
idx2 <- idx[2] + (i - 1)*length(timeline)
index_ts[length(index_ts) + 1 ] <- idx2
}
return(index_ts)
})
return(time_index.lst)
}
#' @title Obtains the timeline
#' @name sits_timeline
#' @author Gilberto Camara, \email{gilberto.camara@@inpe.br}
#'
#' @description This function returns the timeline for a given cube.
#'
#' @param data A sits tibble (either a sits tibble or a raster metadata).
#' @param index The index to obtain the timeline (in case of raster bricks)
#' @export
sits_timeline <- function(data, index = 1){
timeline <- NULL
# is this a cube metadata?
if ("timeline" %in% names(data))
timeline <- lubridate::as_date(sits_cube_timeline(data, index))
# is this a sits tibble with the time series?
if ("time_series" %in% names(data))
timeline <- lubridate::as_date(sits_time_series_dates(data))
assertthat::assert_that(!purrr::is_null(timeline),
msg = "sits_timeline: input does not contain a valid timeline")
return(timeline)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.