#' Export reflectance time-series from the Landsat record using rgee
#'
#' This function exports surface reflectance time series for a set of
#' point-coordinates from the whole Landsat Collection 2 record using the Google
#' Earth Engine (GEE). The resulting time-series can then be processed
#' using the remainder of the LandsatTS workflow.\cr\cr
#' For polygon geometries consider using lsat_get_pixel_centers() to generate
#' pixel center coordinates for all pixels within a given polygon first. \cr\cr
#' Please note: Unlike other functions in this package, this function does
#' NOT return the time-series as an object, instead it returns a list of GEE
#' tasks issued for the export. The actual time-series are exported as CSV
#' objects via GEE to the user's Google Drive. This way of exporting allows
#' for a more efficient scheduling, larger exports, and does not require the R
#' session to continue to run in the background while the requests are processed
#' on the GEE. \cr\cr
#' The progress of the exports can be monitored using the list of tasks returned
#' in combination with ee_monitoring() from the rgee package, or simply by
#' using the task overview in the GEE web code-editor
#' (https://code.earthengine.google.com).
#'
#' @param pixel_coords_sf Simple feature object of point coordinates for the
#' sample.
#' @param sample_id_from The column name that specifies the unique sample
#' identifier in pixel_coords_sf (defaults to "sample_id" as generated by
#' lsat_get_pixel_centers).
#' @param chunks_from Column name in pixel_coords_sf to divide the exports into
#' chunks. Over-rides chunk division by size (see max_chunk_size).
#' @param this_chunk_only Name of a specific chunk to be exported. Useful for
#' re-exporting a single chunk should the export fail for some reason.
#' @param max_chunk_size Maximum number of sample coordinates to be exported in
#' each chunk. Defaults to 250.
#' @param drive_export_dir Folder on the user's Google Drive to export the
#' records to. Defaults to "lsatTS_export".
#' @param file_prefix Optional file_prefix for the exported files.
#' @param start_doy Optional first day of year to extract for. Defaults to 152.
#' @param end_doy Optional last day of year to extract for. Defaults to 243.
#' @param start_date Optional extraction start date (as string,
#' format YYYY-MM-DD). Defaults to "1984-01-01".
#' @param end_date Optional extraction end date (as string, format YYYY-MM-DD).
#' Defaults to today's date.
#' @param buffer_dist Buffer distance around sample coordinates. Wrapper for
#' lsat_get_pixel_centers() to find all Landsat pixel centers around each
#' point in pixel_coords_sf within the specified buffer distance (square)).
#' Can be slow if the number of points is large. Defaults to 0 m.
#' @param scale scale for extraction. Defaults to 30 m nominal Landsat pixel
#' size.
#' @param mask_value Optional masking value for global surface water mask.
#' Defaults to 0.
#' @import geojsonio
#' @return List of initiated rgee tasks.
#'
#' @author Jakob J. Assmann and Richard Massey
#' @export lsat_export_ts
#'
#' @examples
#' # Only run example if "rgee" is installed
#' if (requireNamespace("rgee", quietly = TRUE)) {
#'
#' # Using sf, dplyr and rgee
#' library(sf)
#' library(dplyr)
#' library(rgee)
#'
#' # Initialize GEE
#' ee_Initialize()
#'
#' # Generate test points
#' test_points_sf <- st_sfc(st_point(c(-149.6026, 68.62574)),
#' st_point(c(-149.6003, 68.62524)),
#' st_point(c(-75.78057, 78.87038)),
#' st_point(c(-75.77098, 78.87256)),
#' st_point(c(-20.56182, 74.47670)),
#' st_point(c(-20.55376, 74.47749)), crs = 4326) %>%
#' st_sf() %>%
#' mutate(sample_id = c("toolik_1",
#' "toolik_2",
#' "ellesmere_1",
#' "ellesmere_1",
#' "zackenberg_1",
#' "zackenberg_2"),
#' region = c("toolik", "toolik",
#' "ellesmere", "ellesmere",
#' "zackenberg", "zackenberg"))
#'
#' # Export time-series using lsat_export_ts()
#' task_list <- lsat_export_ts(test_points_sf)
#'
#' # Export time-series using with a chunk size of 2
#' task_list <- lsat_export_ts(test_points_sf, max_chunk_size = 2)
#'
#' # Export time-series in chunks by column
#' task_list <- lsat_export_ts(test_points_sf, chunks_from = "region")
#'
#' # Closing bracket for the "rgee" check
#' }
lsat_export_ts <- function(pixel_coords_sf,
sample_id_from = "sample_id",
chunks_from = NULL,
this_chunk_only = NULL,
max_chunk_size = 250,
drive_export_dir = "lsatTS_export",
file_prefix = "lsatTS_export",
start_doy = 152,
end_doy = 243,
start_date = "1984-01-01",
end_date = "today",
buffer_dist = 0,
scale = 30,
mask_value = 0
){
# confirm rgee is initialized
tryCatch(rgee::ee_user_info(quiet = T),
error = function(e) {
stop("rgee not initialized!\nPlease intialize
rgee. See: https://r-spatial.github.io/rgee/index.html")
})
# Check whether end_date was supplied and if not set to today's.
if(end_date == "today") end_date <- as.character(Sys.Date())
# Turn s2 off in sf for backwards compatibility
sf::sf_use_s2(FALSE)
# Check whether pixel_coords_sf is an sf object with an sfc of points
if(!("sfc_POINT" %in% class(sf::st_geometry(pixel_coords_sf)))) {
stop("Invalid argument supplied for pixel_coords_sf!\n",
"Please supply an object with 'sfc_POINT' geometries.")
}
# Check wether number of point coordinates exceeds 100 000
if(length(sf::st_geometry(pixel_coords_sf)) > 100000){
cat(crayon::red("Warning: Extraction requested for more than 100000 point locations!\n"))
warning("Extraction requested for more than 100000 point locations!")
answer <- readline("Would you like to continue nonetheless? (not recommended!) [y/n]: ")
if(answer == "n"){
cat("Okay, stopping extraction.\n")
return(NULL)
} else if(answer == "y"){
cat("Okay, continuing...\n")
} else {
cat("Invalid answer, stopping extraction.\n")
return(NULL)
}
}
# Check whether columns exists if column selectors were supplied
if((sample_id_from != "sample_id") & !(sample_id_from %in% names(pixel_coords_sf))) {
stop("Invalid columns specificed for 'sample_id_from': ", sample_id_from)
}
# Prep Landsat Time series
bands <- list("SR_B1",
"SR_B2",
"SR_B3",
"SR_B4",
"SR_B5",
"SR_B6",
"SR_B7",
"QA_PIXEL",
"QA_RADSAT")
BAND_LIST <- rgee::ee$List(bands)
# addon assets and bands
ADDON <- rgee::ee$Image('JRC/GSW1_0/GlobalSurfaceWater')$
float()$unmask(mask_value)
ADDON_BANDLIST <- rgee::ee$List(list("max_extent"));
# Blank image for "SR_B6" to replace in collections earlier than LS8
ZERO_IMAGE <- rgee::ee$Image(0)$select(list("constant"),
list("SR_B6"))$selfMask()
# Set image properties to export
PROPERTIES <- list("CLOUD_COVER",
"COLLECTION_NUMBER",
"DATE_ACQUIRED",
"GEOMETRIC_RMSE_MODEL",
"LANDSAT_PRODUCT_ID",
'LANDSAT_SCENE_ID',
"PROCESSING_LEVEL",
"SPACECRAFT_ID",
"SUN_ELEVATION")
# Landsat Surface Reflectance collections
ls5_1 <- rgee::ee$ImageCollection("LANDSAT/LT05/C02/T1_L2")
ls5_2 <- rgee::ee$ImageCollection("LANDSAT/LT05/C02/T2_L2")
ls7_1 <- rgee::ee$ImageCollection("LANDSAT/LE07/C02/T1_L2")
ls7_2 <- rgee::ee$ImageCollection("LANDSAT/LE07/C02/T2_L2")
ls8_1 <- rgee::ee$ImageCollection("LANDSAT/LC08/C02/T1_L2")
ls8_2 <- rgee::ee$ImageCollection("LANDSAT/LC08/C02/T2_L2")
ALL_BANDS <- BAND_LIST$cat(ADDON_BANDLIST)
# merge all collections into one
LS_COLL <- ls5_1$
merge(ls7_1$
merge(ls8_1$
merge(ls5_2$
merge(ls7_2$
merge(ls8_2)))))$
filterDate(start_date, end_date)$
filter(rgee::ee$Filter$calendarRange(start_doy,
end_doy,
"day_of_year"))$
#.filterBounds(table)
map(function(image){
image = rgee::ee$Algorithms$If(image$bandNames()$size()$
eq(rgee::ee$Number(10)),
image,
image$addBands(ZERO_IMAGE))
return(image)})$
map(function(image) {return(image$addBands(ADDON, ADDON_BANDLIST))})$
select(ALL_BANDS)$
map(function(image){ return(image$float())} )
# Check if buffer_dist was specified if yes, retrieve points in buffer using
# lsat_get_pixel_centers
if(buffer_dist > 0){
pixel_coords_sf_buffered <- pixel_coords_sf %>%
split(sf::st_drop_geometry(pixel_coords_sf)[,sample_id_from]) %>%
purrr::map(lsat_get_pixel_centers,
buffer = buffer_dist + 15,
pixel_prefix_from = sample_id_from) %>%
dplyr::bind_rows()
# re-add chunks_from column to data frame
pixel_coords_sf_buffered$sample_id_original <-
gsub("(.*)_[0-9]*$",
"\\1",
sf::st_drop_geometry(pixel_coords_sf_buffered)[,"sample_id"])
names(pixel_coords_sf_buffered)[
names(pixel_coords_sf_buffered) == "sample_id_original"] <-
paste0(sample_id_from, "_original")
names(pixel_coords_sf)[names(pixel_coords_sf) == sample_id_from] <-
paste0(sample_id_from, "_original")
pixel_coords_sf_buffered <- pixel_coords_sf %>%
sf::st_drop_geometry() %>%
dplyr::full_join(pixel_coords_sf_buffered, .)
pixel_coords_sf <- pixel_coords_sf_buffered
}
# Check if chunks_from was specified, if not determine chunks
if(!is.null(chunks_from)){
if(!(chunks_from %in% colnames(pixel_coords_sf))) {
stop("Invalid colum name specified for chunks_from: ", chunks_from)
}
} else {
n_chunks <- floor(nrow(pixel_coords_sf) / max_chunk_size) + 1
pixel_coords_sf$chunk_id <-
paste0("chunk_",
sort(rep(1:n_chunks, max_chunk_size)))[1:nrow(pixel_coords_sf)]
chunks_from <- "chunk_id"
}
# Check if this_chunk_only was specified if so remove all other chunks
if(!is.null(this_chunk_only)){
if(!(this_chunk_only %in% (pixel_coords_sf %>%
sf::st_drop_geometry() %>%
as.data.frame() %>%
.[,chunks_from]))){
stop("Could not find chunk specified: ",
this_chunk_only)
}
pixel_coords_sf <- pixel_coords_sf[
(sf::st_drop_geometry(pixel_coords_sf) %>%
as.data.frame() %>% .[, chunks_from]) == this_chunk_only,]
}
# Status:
cat(paste0("Exporting time-series for ",
nrow(pixel_coords_sf),
" pixels",
" in ",
length(unique(sf::st_drop_geometry(pixel_coords_sf) %>%
as.data.frame() %>%
.[,chunks_from])),
" chunks.\n"))
# Retrieve time-series by chunk
task_list <- pixel_coords_sf %>%
split(., sf::st_drop_geometry(.)[,chunks_from]) %>%
purrr::map(function(chunk){
# Status
cat(paste0("Submitting task to EE for chunk_id: ",
sf::st_drop_geometry(chunk) %>%
as.data.frame() %>%
.[1, chunks_from],
".\n"))
# Upload chunk to sf to reduce size keep only necessary columns
ee_chunk <- rgee::sf_as_ee(chunk[,c("geometry",
sample_id_from,
chunks_from)])
# Retrieve Landsat time-series
ee_chunk_export <- ee_chunk$map(function(feature){
return(
# Create FC containing a single empty image
# This will ensure all bands are present in the export
rgee::ee$ImageCollection$fromImages(
list(rgee::ee$Image(list(0,0,0,0,0,0,0,0,0,0))$
select(list(0,1,2,3,4,5,6,7,8,9), ALL_BANDS)$
copyProperties(ls8_1$first())))$
# Merge with extraction of time-series form Landsat collection
merge(LS_COLL$filterBounds(feature$geometry()))$
# For each image in the collection ....
map(function(image){
# Create a feature
return(rgee::ee$Feature(feature$geometry(),
# fill it with the point value extracted with
# reduceRegion with first() reducer at scale
image$reduceRegion(rgee::ee$Reducer$first(),
feature$geometry(),
scale))$
# copy the image properties to the feature
# (incl. date and image metadata) as specified above
copyProperties(image, PROPERTIES)$
# assign a pixel and chunk id columns for identification
set(sample_id_from, feature$get(sample_id_from))$
set(chunks_from, feature$get(chunks_from)))
}))
})$flatten()
# Prepare export task
chunk_task <- rgee::ee_table_to_drive(
collection = ee_chunk_export,
description = paste0("lsatTS_export_",
sf::st_drop_geometry(chunk) %>%
as.data.frame() %>%
.[1, chunks_from]),
folder = drive_export_dir,
fileNamePrefix = paste0(file_prefix,
"_",
sf::st_drop_geometry(chunk) %>%
as.data.frame() %>%
.[1, chunks_from]),
timePrefix = F,
fileFormat = "csv")
# Submit export task
chunk_task$start()
# Return nothing
return(chunk_task)
})
# Status update
cat(crayon::green("Done!\n"))
cat("You can monitor the progress of the task(s)",
"using rgee's ee_monitoring() or the GEE WebAPI.\n")
return(task_list)
}
## EOF
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.