#' @title Load sample time series from Rdata files
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#' @description Load (and filter) sample time series from Rdata files.
#'
#' @param x A character. A path to a file of samples time series.
#' @param sat A Length-one character. The satellite or sensor name in c("Landsat8", "MOD13").
#' @param expected_nrow A lengh-one inetger. Keep the samples with this number of time steps.
#' @return A tibble or a list of tibbles.
#' @export
load_samples <- function(x, sat, expected_nrow){
stopifnot(is.character(x))
# util functions
# test if a vector elements are the same. Taken from https://stackoverflow.com/questions/4752275/test-for-equality-among-all-elements-of-a-single-vector
zero_range <- function(x, tol = .Machine$double.eps ^ 0.5) {
if (length(x) == 1) return(TRUE)
x <- range(x) / mean(x)
isTRUE(all.equal(x[1], x[2], tolerance = tol))
}
# validate the time series of a SITS sample
is_ts_valid <- function(ts_tb, expected_nrow){
if (!dplyr::is.tbl(ts_tb)) return(FALSE)
if (nrow(ts_tb) != expected_nrow || ncol(ts_tb) < 2) return(FALSE)
if (!any(vapply(dplyr::select(ts_tb, -1), is.numeric, logical(1)))) return(FALSE)
# are there any NA?
if (any(vapply(ts_tb, function(x) any(is.na(unlist(ts_tb))), logical(1))))
return(FALSE)
# are all the observations the same? This could be NAs, etc
if (any(vapply(dplyr::select(ts_tb, -1), zero_range, logical(1)))) return(FALSE)
return(TRUE)
}
# body
if (is.na(x) || length(x) == 0) {
return(NA)
}else if (length(x) == 1) {
samples.tb <- NULL
load(x)
if (is.null(samples.tb))
return(NA)
end_date <- label <- longitude <- latitude <- start_date <- time_series <- valid <- NULL
res <- samples.tb %>%
ensurer::ensure_that(all(c("end_date", "label", "longitude",
"latitude", "start_date", "time_series") %in% colnames(.)),
err_desc = "Missing columns.") %>%
dplyr::arrange(longitude, latitude, start_date, end_date, label) %>%
dplyr::mutate(valid = purrr::map_lgl(time_series, is_ts_valid, expected_nrow)) %>%
dplyr::filter(valid == TRUE) %>%
dplyr::select(-valid)
if (!is.null(sat))
res <- res %>%
dplyr::mutate(time_series = purrr::map(time_series, compute_indexes,
sat = sat))
return(res)
} else {
return(lapply(x, load_samples, sat, expected_nrow))
}
}
#' @title Process a shapefile of validated samples.
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#' @description Get validated PRODES samples and split them by label in year
#' into a table (csv). These processed samples are ready for other script to
#' retrieve time-series
#'
#' @param shp_path A length-one character. Path to a shapefile.
#' @param out_dir A length-one character. Path to a folder.
#' @return A vector of paths to CSV files
process_valid_shp <- function(shp_path, out_dir){
# read shp, drop geometry, fix column names
shp <- shp_path %>%
sf::st_read(quiet = TRUE, stringsAsFactors = FALSE)
coords <- shp %>%
sf::st_coordinates() %>%
dplyr::as_tibble()
time_serie <- NULL
csv <- shp %>%
ensurer::ensure_that(all(c("time_serie") %in% colnames(.))) %>%
dplyr::mutate(longitude = coords$X, latitude = coords$Y) %>%
dplyr::rename(time_series = time_serie) %>%
sf::st_set_geometry(value = NULL) %>%
dplyr::as_tibble()
# check for a valid id field
id_col <- csv %>%
dplyr::select(tidyselect::starts_with('tmp_id'))
if (nrow(id_col) > 0 && ncol(id_col) > 0) {
# take the last tmp_id used
id_col <- as.integer(as.vector(unlist(id_col[, ncol(id_col)])))
if (length(id_col) != length(unique(id_col)))
# recompute id if it is invalid
id_col <- 1:nrow(csv)
}else{
id_col <- 1:nrow(csv)
}
id <- NULL
csv <- csv %>%
dplyr::mutate(id = id_col) %>%
dplyr::select(-tidyselect::starts_with("tmp_id")) %>%
dplyr::select(id, tidyselect::everything())
# update date ranges
start_md <- "-08-01" # "-08-20"
end_md <- "-08-31" # "-08-19"
res <- csv[0,]
for (y in 2013:2017) {
label_all_years <- c("FLORESTA", "NAO_FLORESTA", "NAO_FLORESTA2",
"HIDROGRAFIA", paste0('d', y))
label <- NULL
res <- csv %>%
ensurer::ensure_that("label" %in% colnames(.)) %>%
dplyr::filter(label %in% label_all_years) %>%
dplyr::mutate(start_date = paste0(y - 1, start_md)) %>%
dplyr::mutate(end_date = paste0(y, end_md)) %>%
dplyr::bind_rows(res)
}
for (y in 2014:2017) {
label_some_years <- c("flood")
res <- csv %>% dplyr::filter(label %in% label_some_years) %>%
dplyr::mutate(start_date = paste0(y - 1, start_md)) %>%
dplyr::mutate(end_date = paste0(y, end_md)) %>%
dplyr::bind_rows(res)
}
# re-label
res <- res %>%
dplyr::mutate(label = dplyr::recode(label,
"FLORESTA" = "forest" ,
"NAO_FLORESTA" = "no forest",
"NAO_FLORESTA2" = "no forest 2",
"HIDROGRAFIA" = "water",
"DESMATAMENTO" = 'deforestation',
"d2012" = 'deforestation',
"d2013" = 'deforestation',
"d2014" = 'deforestation',
"d2015" = 'deforestation',
"d2016" = 'deforestation',
"d2017" = 'deforestation',
"d2018" = 'deforestation',
"flood" = "flood"))
# write a file per year
csv_files <- character()
for (d in unique(res$start_date)) {
start_date <- NULL
csv_d <- res %>%
dplyr::filter(start_date == d)
fname <- file.path(out_dir, paste0(substr(basename(shp_path), 1,
nchar(basename(shp_path)) - 4),
'_', d, '.csv'))
csv_files <- append(csv_files, fname)
print(paste0("Writing ", nrow(csv_d), " samples to file ", fname))
print(table(csv_d$label))
utils::write.csv(csv_d, file = fname, quote = FALSE, row.names = FALSE)
}
return(csv_files)
}
#' @title Get time series of sample points from SITS' bricks.
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#' @description Get validated PRODES samples (from a CSV file) and retrieve time
#' series for each one
#'
#' @param cpath A length-one character. Path to a CSV file.
#' @param path_bricks A character. Path to a directory with Landsdat8-MODIS bricks.
#' @param brick_prefix A character. Prefix to the name of the Landsdat8-MODIS bricks.
#' @param class_bands A character. The name of the bands to process.
#' @param scale_factor A list. Scale factor of the image band files.
#' @param missing_values A list. Values representing missing pixels for each band.
#' @param minimum_values A list. Minimum values of each band.
#' @param maximum_values A list. Maximum values of each band.
#' @param suffix A length-one character. A suffix to append to the filename of the results
#' @param max_time_diff A length-one numeric. The maximum difference (in days) between the fist day of the bricks and the first day of the CSV sample file.
#' @param cov_name A length-one character. Name for the sits coverage to create.
#' @param time_len A length-one integer. The lenght of the brick's time line.
#' @param time_by A length-one inetger. The number of days between observations in the time line.
#' @return A vector of paths to Rdata files
get_timeseries <- function(cpath, path_bricks, brick_prefix, class_bands,
scale_factor = NULL, missing_values = NULL,
minimum_values = NULL, maximum_values = NULL,
suffix = "", max_time_diff = 30,
cov_name = "SITS coverage", time_len = 23,
time_by = 16){
stopifnot(length(cpath) == 1)
prodes_year_start <- "-08-01"
# SITS default parameteres
sits_conf <- config::get(file = system.file("extdata", "config.yml", package = "sits"))
if (is.null(scale_factor))
scale_factor <- sits_conf$RASTER_scale_factor$LANDSAT
if (is.null(missing_values))
missing_values <- sits_conf$RASTER_missing_value$LANDSAT
if (is.null(minimum_values))
minimum_values <- sits_conf$RASTER_minimum_value
if (is.null(maximum_values))
maximum_values <- sits_conf$RASTER_maximum_value
# get metadata from brick's name
pathrow <- cpath %>%
basename() %>%
stringr::str_extract(pattern = "_[0-9]{3}_[0-9]{3}_") %>%
strsplit(split = '_', fixed = TRUE) %>%
unlist() %>%
paste(collapse = '')
csv_start_date <- cpath %>%
basename() %>%
stringr::str_extract(pattern = "[0-9]{4}-[0-9]{2}-[0-9]{2}")
if (class(try(as.Date(csv_start_date))) == "try-error" ||
is.na(try(as.Date(csv_start_date))))
stop("Invalid start_date in file name ", cpath)
# build a data.frame of bricks' metadata
band <- dif_time <- start_date <- NULL
brick_tb <- path_bricks %>%
list.files(pattern = paste0(brick_prefix, pathrow, '_*'),
full.names = TRUE) %>%
get_brick_md() %>%
dplyr::as_tibble() %>%
ensurer::ensure_that(all(c("band", "year", "start_date") %in% colnames(.)),
err_desc = "Missing bands") %>%
dplyr::mutate(dif_time = abs(as.numeric(difftime(as.Date(start_date),
as.Date(csv_start_date), units = 'days')))) %>%
dplyr::filter(pathrow == pathrow,
dif_time < max_time_diff,
band %in% class_bands) %>%
ensurer::ensure_that(nrow(brick_tb) > 0,
err_desc = sprintf("No brick found for scene %s and date %s.",
pathrow, csv_start_date))
# A brick should contain one year worth of images of a single path/row of a single band
time_line <- brick_tb %>%
dplyr::pull(start_date) %>%
unique() %>%
ensurer::ensure_that(length(.) == 1) %>%
# These motherfuckers throw a WARNING:
# lubridate::date()
# lubridate::as_date()
as.Date() %>%
seq(by = time_by, length.out = time_len)
# get a sits coverage
raster_cov <- sits::sits_coverage(service = "RASTER",
name = cov_name,
bands = brick_tb$band,
scale_factor = scale_factor,
missing_values = missing_values,
minimum_values = minimum_values,
maximum_values = maximum_values,
timeline = time_line,
files = brick_tb$path)
# get samples
#------------------------------------------------------------------------------
# NOTE: workaround for testing
# samples_csv <- process_valid_shp(shp_path = cpath, out_dir = tempdir())
# samples.tb <- sits::sits_get_data(file = samples_csv[1], coverage = raster_cov)
#------------------------------------------------------------------------------
samples.tb <- sits::sits_get_data(file = cpath, coverage = raster_cov)
fp <- file.path(dirname(cpath),
paste0(basename(tools::file_path_sans_ext(cpath)), suffix,
".Rdata"))
save(samples.tb, file = fp)
csv_paths <- character()
csv_paths <- append(csv_paths, fp)
return(csv_paths)
}
#' @title Get MODIS time series for validates sample points.
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#' @description Get validated PRODES samples (from a CSV file) and retrieve time
#' series for each one from WTSS server.
#'
#' @param cpath A length-one character. Path to a CSV file.
#' @return A length-one character. Path to the result file.
get_timeseries_modis <- function(cpath){
wtss_coverage <- sits::sits_coverage(service = "WTSS-INPE", name = "MOD13Q1")
samples.tb <- sits::sits_get_data(coverage = wtss_coverage, file = cpath)
rd_file <- file.path(paste0(substr(cpath, 1, nchar(cpath) - 4), '_mod13q1.Rdata'))
save(samples.tb, file = rd_file)
return(rd_file)
}
#' @title Validate CSV files of samples.
#' @author Alber Sanchez, \email{alber.ipia@@inpe.br}
#' @description Validate SITS sample CSV files.
#'
#' @param csv_paths A character. Paths to a CSV file.
#' @return A character.
#' @export
validate_csv <- function(csv_paths){
res <- lapply(csv_paths, function(x){
r <- character()
if (!file.exists(x)) {
warning(paste0("File ", x, " does not exist"))
return(paste0("File ", x, " does not exist"))
}
csv <- utils::read.csv(x)
valid_cn <- c("id", "longitude", "latitude", "start_date", "end_date", "label")
miss_f <- setdiff(valid_cn, colnames(csv))
if (length(miss_f) > 0) {
warning(paste0("There are missing fields in ", basename(x)))
r <- paste0("There are missing fields in ", basename(x))
}
return(r)
})
return(unlist(res))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.