#' @title Retrieve list of available S2 products.
#' @description The function retrieves the list of available Sentinel-2
#' products satisfying given search criteria.
#' @details **Important**: to search and download from Google Cloud, Google Cloud SDK
#' must be installed and configured following the indications in
#' \url{https://cloud.google.com/sdk/docs/install}.
#' Notice also that querying Google Cloud can be very slow (while downloading
#' them is generally faster than from SciHub).
#' @param spatial_extent A valid spatial object object of class `sf`,
#' `sfc` or `sfg`
#' @param tile `string array` Sentinel-2 Tiles to be considered string (5-length character)
#' @param orbit `string array` Sentinel-2 orbit numbers to be considered
#' @param time_interval Dates to be considered, as a temporal vector (class [POSIXct] or
#' [Date], or string in `YYYY-mm-dd` format) of length 1 (specific day) or 2 (time interval).
#' @param time_period (optional) Character:
#' * "full" (default) means that all
#' the images included in the time window are considered;
#' * "seasonal" means that only the single seasonal periods in the
#' window are used (i.e., with a time window from 2015-06-01 to
#' 2017-08-31, the periods 2015-06-01 to 2015-08-31, 2016-06-01
#' to 2016-08-31 and 2017-06-01 to 2017-08-31 are considered).
#' @param level Character vector with one of the following:
#' - "auto" (default): check if level-2A is available on SciHub:
#' if so, list it; if not, list the corresponding level-1C
#' product
#' - "L1C": list available level-1C products
#' - "L2A": list available level-2A products
#' @param server (deprecate) Character vector of length 1, with the names of
#' the servers on which SAFE archives are searched.
#' Currently, only `"gcloud"` (Google Cloud) is supported.
#' Old `"scihub"` (ESA Sentinel Hub) can no more be used, since November 2023,
#' when the Copernicus Sentinel Data is no longer available and has been
#' replaced by the Copernicus Data Space Ecosystem.
#' See also the section "Details".
#' @param apihub _deprecated_
#' @param service _deprecated_
#' @param max_cloud Integer number (0-100) containing the maximum cloud
#' level of the tiles to be listed (default: no filter).
#' @param availability Character argument, determining which products have
#' to be returned:
#' - `"online"` : only archive names already available for download are returned;
#' - `"lta"`: only archive names stored in the
#' Long Term Archive
#' are returned;
#' - `"check"`: all archive names are returned, checking if they are
#' available or not for download (see "Value" to know
#' how to distinguish each other);
#' - `"ignore"` (default): all archive names are returned, without doing the check
#' (running the function is faster).
#' If not provided, `"ignore"` is the default value unless
#' `server = c("scihub","gcloud")` (in which case `"check"` is used).
#' @param tmpdir (optional) Path where intermediate files (xml) will be created
#' (it is used only if argument `server` includes `"gcloud"`).
#' Default is a temporary directory.
#' If `tmpdir` is a non-empty folder, a random subdirectory will be used.
#' @param output_type Deprecated (use `as.data.table` to obtain a data.table).
#' @return An object of class [safelist].
#' The attribute `online` contains logical values: in case
#' `availability != "ignore"`, values are TRUE / FALSE for
#' products available for download / stored in the Long Term Archive;
#' otherwise, values are set to NA.
#' @author Lorenzo Busetto, phD (2019) - Inspired by
#' function `getSentinel_query` of package
#' [`getSpatialData`](https://github.com/16EAGLE/getSpatialData) by J. Schwalb-Willmann
#' @author Luigi Ranghetti, phD (2019)
#' @references L. Ranghetti, M. Boschetti, F. Nutini, L. Busetto (2020).
#' "sen2r": An R toolbox for automatically downloading and preprocessing
#' Sentinel-2 satellite data. _Computers & Geosciences_, 139, 104473.
#' \doi{10.1016/j.cageo.2020.104473}, URL: \url{https://sen2r.ranghetti.info/}.
#' @note License: GPL 3.0
#' @import data.table
#' @importFrom methods is as
#' @importFrom sf st_as_sfc st_sfc st_point st_as_text st_bbox st_coordinates
#' st_geometry st_intersection st_geometry st_convex_hull st_transform st_cast
#' st_union st_centroid st_is_valid st_make_valid
#' @importFrom httr RETRY authenticate content
#' @importFrom XML htmlTreeParse saveXML xmlRoot
#' @importFrom utils head read.table
#' @export
#'
#' @examples
#' \donttest{
#'
#' pos <- sf::st_sfc(sf::st_point(c(9.85,45.81)), crs = 4326)
#' time_window <- as.Date(c("2020-08-01", "2020-08-11"))
#'
#' # Full-period list
#' if (is_gcloud_configured()) {
#' example_s2_list <- s2_list(
#' spatial_extent = pos,
#' tile = "32TNR",
#' time_interval = time_window,
#' orbit = "065"
#' )
#' } else {
#' example_s2_list <- as(character(), "safelist")
#' }
#' print(example_s2_list)
#' # Print the dates of the retrieved products
#' safe_getMetadata(example_s2_list, "sensing_datetime")
#'
#' # Seasonal-period list
#' if (is_gcloud_configured()) {
#' example_s2_list <- s2_list(
#' spatial_extent = pos,
#' tile = "32TNR",
#' time_interval = time_window,
#' time_period = "seasonal"
#' )
#' } else {
#' example_s2_list <- as(character(), "safelist")
#' }
#' print(example_s2_list)
#' # Print the dates of the retrieved products
#' safe_getMetadata(example_s2_list, "sensing_datetime")
#'
#' }
s2_list <- function(spatial_extent = NULL,
tile = NULL,
orbit = NULL, # spatial parameters
time_interval = c(Sys.Date() - 10, Sys.Date()),
time_period = "full", # temporal parameters
level = "auto",
server = "gcloud",
apihub = NA,
service = "apihub",
max_cloud = 100,
availability,
tmpdir = NA,
output_type = "deprecated") {
if (!level %in% c("auto", "L2A", "L1C")) {
print_message(
type = "error",
"`level` must be \"auto\", \"L2A\" or \"L1C\""
)
}
if (!time_period %in% c("full", "seasonal")) {
print_message(
type = "error",
"`level` must be \"full\" or \"seasonal\""
)
}
if (missing(availability)) {
if (length(server) == 2 && server[1] == "scihub" && server[2] == "gcloud") {
availability <- "check"
} else {
availability <- "ignore"
}
}
if (!availability %in% c("ignore", "check", "online", "lta")) {
print_message(
type = "error",
"`availability` must be one among \"online\", \"lta\", ",
"\"check\" and \"ignore\""
)
}
# check the used service
if (!service %in% c("apihub", "dhus")) {
print_message(
type = "error",
"Argument 'service' can be only \"apihub\" or \"dhus\"; ",
"switching to \"apihub\"."
)
service <- "apihub"
}
if (inherits(try(as.Date(time_interval), silent = TRUE), "try-error")) {
print_message(
type = "error",
"`time_interval` must be of class `Date`, `POSIXct` or `character` ",
"cohercible to Date (YYYY-mm-dd)."
)
} else if (inherits(time_interval, "character")) {
time_interval <- as.Date(time_interval)
}
if (anyNA(match(server, c("gcloud", "s2cogs")))) {
print_message(
type = "error",
"`server` must be \"gcloud\" (\"apihub\" is no longer available)."
)
}
# to avoid NOTE on check
. <- online <- id_tile <- id_orbit <-
sensing_datetime <- ingestion_datetime <- centroid <- footprint <- NULL
# Do not use the {s2} spherical geometry package (to avoid errors)
# (when the interface between {sf} and {s2} will be stable, this should be removed)
if (requireNamespace("sf", quietly = TRUE)) {
try({
invisible(capture.output(sf_use_s2_prev <- sf::sf_use_s2(FALSE)))
on.exit(invisible(capture.output(sf::sf_use_s2(sf_use_s2_prev))))
}, silent = TRUE)
}
# convert input NA arguments in NULL
for (a in c("spatial_extent","tile","orbit","time_interval","apihub")) {
if (suppressWarnings(all(is.na(get(a))))) {
assign(a,NULL)
}
}
# check if spatial_extent was provided
spatial_extent_exists <- if (!exists("spatial_extent")) {
FALSE
} else if (is.null(spatial_extent)) {
FALSE
} else if (is(spatial_extent, "POLYGON")) {
if (length(spatial_extent) == 0) {
FALSE
} else {
TRUE
}
} else {
TRUE
}
# verify that spatial_extent is `sf` or `sfc`, and convert to `sfc`
if (spatial_extent_exists) {
if (!inherits(spatial_extent, c("sf", "sfc"))){
print_message(
type = "error",
"`spatial_extent` is not a `sf` or `sfc` object."
)
} else {
spatial_extent <- st_transform(
st_geometry(spatial_extent),
4326
)
}
}
# if not, retrieve it from tile
if (all(!spatial_extent_exists, is.null(tile))) {
print_message(
type = "error",
"At least one parameter among spatial_extent and tile must be specified."
)
}
# load tiles borders if needed.
if (any(!spatial_extent_exists, is.null(tile), "gcloud" %in% server)) {
# extract and import tiles kml
s2tiles <- s2_tiles()
}
# determine required tiles if needed
if (is.null(tile)) {
tile <- tiles_intersects(spatial_extent, .s2tiles = s2tiles)
}
# take the the selected tiles as extent if needed
# (this will result in the selection of more tiles, cause to overlapping
# areas; it is filtered in s2_download, but it is slow: FIXME).
# It is not possible to use tile centroids, because tile of external areas
# of orbits could not be included).
if (!spatial_extent_exists & !is.null(tile)) {
spatial_extent <- suppressWarnings(
sf::st_cast(sf::st_geometry(s2tiles[s2tiles$tile_id %in% tile,]), "POLYGON")
)
}
spatial_extent <- suppressMessages(sf::st_union(spatial_extent))
if (any(!st_is_valid(spatial_extent))) {
spatial_extent <- st_make_valid(spatial_extent)
}
# checks on dates
# TODO add checks on format
if (length(time_interval) == 1) {
time_interval <- rep(time_interval,2)
}
# split time_interval in case of seasonal download
time_intervals <- if (time_period == "full") {
data.frame(
"start" = time_interval[1],
"end" = time_interval[2],
stringsAsFactors = FALSE
)
} else if (time_period == "seasonal") {
data.frame(
"start" = seq(time_interval[1], time_interval[2], by = "year"),
"end" = rev(seq(time_interval[2], time_interval[1], by = "-1 year")),
stringsAsFactors = FALSE
)
}
if (!is.null(orbit)) {
orbit <- as.integer(orbit)
if (any(is.na(orbit))) {
print_message(
type = "error",
"`orbit` must be integer or cohercible to integer."
)
}
}
if (!is.numeric(max_cloud)) {
print_message(
type = "error",
"`max_cloud` must be integer [0,100]."
)
}
# List of output dt (one per server method)
out_dt_list <- list()
## ESA SciHub specific methods
if ("scihub" %in% server) {
out_dt_list[["scihub"]] <- .s2_list_scihub(
spatial_extent = spatial_extent,
time_intervals = time_intervals,
tile = tile,
orbit = orbit,
max_cloud = max_cloud,
apihub = apihub,
service = service,
availability = availability,
.s2tiles = s2tiles
)
out_dt_list[["scihub"]][,server:=rep("scihub", nrow(out_dt_list[["scihub"]]))]
}
## Google Cloud specific methods
if ("gcloud" %in% server) {
out_dt_list[["gcloud"]] <- .s2_list_gcloud(
time_intervals = time_intervals,
tile = tile,
orbit = orbit,
level = level,
max_cloud = max_cloud,
tmpdir = tmpdir
)
out_dt_list[["gcloud"]][,server:=rep("gcloud", nrow(out_dt_list[["gcloud"]]))]
}
## AWS s2-COGs specific methods
if ("s2cogs" %in% server) {
if (eval(parse(text = 'requireNamespace("sen2r.addons", quietly = TRUE)'))) {
out_dt_list[["s2cogs"]] <- eval(parse(text = paste0(
"sen2r.addons::.s2_list_s2cogs(",
" spatial_extent = spatial_extent,",
" time_intervals = time_intervals,",
# " s2bands = s2bands,", # TODO implement!
" tile = tile,",
" orbit = orbit,",
" max_cloud = max_cloud,",
" tmpdir = tmpdir",
")"
)))
} else {
print_message(
type = "error",
"Please install package 'sen2r.addons' to use server = \"s2cogs\"."
)
}
}
## Merge dt (in case of multiple servers)
# order by the order of input "server" argument (this is used in duplicated:
# first "server" value is taken in case of douplicated availability)
out_dt <- rbindlist(out_dt_list)
out_dt$server <- factor(out_dt$server, levels=server) # order by argument server
if (nrow(out_dt) == 0) {return(as(setNames(character(0), character(0)), "safelist"))}
# compute date (to ignore duplicated dates)
out_dt[,date := as.Date(substr(as.character(out_dt$sensing_datetime), 1, 10))]
out_names <- copy(names(out_dt)); out_names <- out_names[out_names != "server"]
# fix footprint topology errors
invalid_entries <- out_dt[,which(!st_is_valid(st_as_sfc(footprint, crs = 4326)))]
if (length(invalid_entries) > 0) {
out_footprint_fixed <- st_as_text(st_make_valid(st_as_sfc(out_dt[invalid_entries, footprint], crs = 4326)))
out_dt[invalid_entries, footprint := out_footprint_fixed]
}
if (nrow(out_dt) == 0) {return(as(setNames(character(0), character(0)), "safelist"))}
# first, order by level (L2A, then L1C) and ingestion time (newer first)
out_dt <- out_dt[order(-level,server,-ingestion_datetime),]
# second, order by availability (LTA SciHub products are always used as last choice)
out_dt <- rbind(
out_dt[is.na(online) | online == TRUE,],
out_dt[online == FALSE,]
)
if (level == "L1C") {
out_dt <- out_dt[level == "1C",]
} else if (level == "L2A") {
out_dt <- out_dt[grepl("^2Ap?$", level),]
} # for level = "auto", do nothing because unuseful products are filtered below
# filter (univocity)
suppressWarnings({
out_dt[,centroid := st_centroid(st_as_sfc(footprint, crs = 4326))]
})
out_dt <- out_dt[,head(.SD, 1), by = .(
date, id_tile, id_orbit,
apply(round(st_coordinates(centroid), 2), 1, paste, collapse = " ")
)]
out_dt <- out_dt[,out_names,with=FALSE]
if (nrow(out_dt) == 0) {return(as(setNames(character(0), character(0)), "safelist"))}
out_dt <- out_dt[order(sensing_datetime),]
out_dt[,date := NULL]
# filter by availability
if (availability == "online") {
out_dt <- out_dt[online == TRUE,]
} else if (availability == "lta") {
out_dt <- out_dt[online == FALSE,]
}
# return output
if (output_type =="data.table") { # deprecated
out_dt
} else if (output_type =="data.frame") { # deprecated
as.data.frame(out_dt)
} else if (output_type =="vector") { # deprecated
as.character(as(out_dt, "safelist"))
} else {
as(out_dt, "safelist")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.