R/lsat_export_ts.R

Defines functions lsat_export_ts

Documented in lsat_export_ts

#' Export reflectance time-series from the Landsat record using rgee
#'
#' This function exports surface reflectance time series for a set of
#' point-coordinates from the whole Landsat Collection 2 record using the Google
#' Earth Engine (GEE). The resulting time-series can then be processed
#' using the remainder of the LandsatTS workflow.\cr\cr
#' For polygon geometries consider using lsat_get_pixel_centers() to generate
#' pixel center coordinates for all pixels within a given polygon first. \cr\cr
#' Please note: Unlike other functions in this package, this function does
#' NOT return the time-series as an object, instead it returns a list of GEE
#' tasks issued for the export. The actual time-series are exported as CSV
#' objects via  GEE to the user's Google Drive. This way of exporting allows
#' for a more efficient scheduling, larger exports, and does not require the R
#' session to continue to run in the background while the requests are processed
#' on the GEE. \cr\cr
#' The progress of the exports can be monitored using the list of tasks returned
#' in combination with ee_monitoring() from the rgee package, or simply by
#' using the task overview in the GEE web code-editor
#' (https://code.earthengine.google.com).
#'
#' @param pixel_coords_sf Simple feature object of point coordinates for the
#'   sample.
#' @param sample_id_from The column name that specifies the unique sample
#'   identifier in pixel_coords_sf (defaults to "sample_id" as generated by
#'   lsat_get_pixel_centers).
#' @param chunks_from Column name in pixel_coords_sf to divide the exports into
#'   chunks. Over-rides chunk division by size (see max_chunk_size).
#' @param this_chunk_only Name of a specific chunk to be exported. Useful for
#'   re-exporting a single chunk should the export fail for some reason.
#' @param max_chunk_size Maximum number of sample coordinates to be exported in
#'   each chunk. Defaults to 250.
#' @param drive_export_dir Folder on the user's Google Drive to export the
#'   records to. Defaults to "lsatTS_export".
#' @param file_prefix Optional file_prefix for the exported files.
#' @param start_doy Optional first day of year to extract for. Defaults to 152.
#' @param end_doy Optional last day of year to extract for. Defaults to 243.
#' @param start_date Optional extraction start date (as string,
#'   format YYYY-MM-DD). Defaults to "1984-01-01".
#' @param end_date Optional extraction end date (as string, format YYYY-MM-DD).
#'   Defaults to today's date.
#' @param buffer_dist Buffer distance around sample coordinates. Wrapper for
#'   lsat_get_pixel_centers() to find all Landsat pixel centers around each
#'   point in pixel_coords_sf within the specified buffer distance (square)).
#'   Can be slow if the number of points is large. Defaults to 0 m.
#' @param scale scale for extraction. Defaults to 30 m nominal Landsat pixel
#'   size.
#' @param mask_value Optional masking value for global surface water mask.
#'   Defaults to 0.
#' @import geojsonio
#' @return List of initiated rgee tasks.
#'
#' @author Jakob J. Assmann and Richard Massey
#' @export lsat_export_ts
#'
#' @examples
#' # Only run example if "rgee" is installed
#' if (requireNamespace("rgee", quietly = TRUE)) { 
#' 
#' # Using sf, dplyr and rgee
#' library(sf)
#' library(dplyr)
#' library(rgee)
#'
#' # Initialize GEE
#' ee_Initialize()
#'
#' # Generate test points
#' test_points_sf <- st_sfc(st_point(c(-149.6026, 68.62574)),
#'                          st_point(c(-149.6003, 68.62524)),
#'                          st_point(c(-75.78057, 78.87038)),
#'                          st_point(c(-75.77098, 78.87256)),
#'                          st_point(c(-20.56182, 74.47670)),
#'                          st_point(c(-20.55376, 74.47749)), crs = 4326) %>%
#'   st_sf() %>%
#'   mutate(sample_id = c("toolik_1",
#'                       "toolik_2",
#'                       "ellesmere_1",
#'                       "ellesmere_1",
#'                       "zackenberg_1",
#'                       "zackenberg_2"),
#'          region = c("toolik", "toolik",
#'                     "ellesmere", "ellesmere",
#'                     "zackenberg", "zackenberg"))
#'
#' # Export time-series using lsat_export_ts()
#' task_list <- lsat_export_ts(test_points_sf)
#'
#' # Export time-series using with a chunk size of 2
#' task_list <- lsat_export_ts(test_points_sf, max_chunk_size = 2)
#'
#' # Export time-series in chunks by column
#' task_list <- lsat_export_ts(test_points_sf, chunks_from = "region")
#' 
#' # Closing bracket for the "rgee" check   
#' }

lsat_export_ts <- function(pixel_coords_sf,
                             sample_id_from = "sample_id",
                             chunks_from = NULL,
                             this_chunk_only = NULL,
                             max_chunk_size = 250,
                             drive_export_dir = "lsatTS_export",
                             file_prefix = "lsatTS_export",
                             start_doy = 152,
                             end_doy = 243,
                             start_date = "1984-01-01",
                             end_date = "today",
                             buffer_dist = 0,
                             scale = 30,
                             mask_value = 0
                             ){

  # confirm rgee is initialized
  tryCatch(rgee::ee_user_info(quiet = T),
           error = function(e) {
           stop("rgee not initialized!\nPlease intialize
                rgee. See: https://r-spatial.github.io/rgee/index.html")
             })

  # Check whether end_date was supplied and if not set to today's.
  if(end_date == "today") end_date <- as.character(Sys.Date())

  # Turn s2 off in sf for backwards compatibility
  sf::sf_use_s2(FALSE)

  # Check whether pixel_coords_sf is an sf object with an sfc of points
  if(!("sfc_POINT" %in% class(sf::st_geometry(pixel_coords_sf)))) {
    stop("Invalid argument supplied for pixel_coords_sf!\n",
         "Please supply an object with 'sfc_POINT' geometries.")
  }
  # Check wether number of point coordinates exceeds 100 000
  if(length(sf::st_geometry(pixel_coords_sf)) > 100000){
    cat(crayon::red("Warning: Extraction requested for more than 100000 point locations!\n"))
    warning("Extraction requested for more than 100000 point locations!")
    answer <- readline("Would you like to continue nonetheless? (not recommended!) [y/n]: ")
    if(answer == "n"){
      cat("Okay, stopping extraction.\n")
      return(NULL)
    } else if(answer == "y"){
      cat("Okay, continuing...\n")
    } else {
      cat("Invalid answer, stopping extraction.\n")
      return(NULL)
    }
  }

  # Check whether columns exists if column selectors were supplied
  if((sample_id_from != "sample_id") & !(sample_id_from %in% names(pixel_coords_sf))) {
    stop("Invalid columns specificed for 'sample_id_from': ", sample_id_from)
  }

  # Prep Landsat Time series
  bands <- list("SR_B1",
                "SR_B2",
                "SR_B3",
                "SR_B4",
                "SR_B5",
                "SR_B6",
                "SR_B7",
                "QA_PIXEL",
                "QA_RADSAT")
  BAND_LIST <- rgee::ee$List(bands)

  # addon assets and bands
  ADDON <- rgee::ee$Image('JRC/GSW1_0/GlobalSurfaceWater')$
    float()$unmask(mask_value)
  ADDON_BANDLIST <- rgee::ee$List(list("max_extent"));

  # Blank image for "SR_B6" to replace in collections earlier than LS8
  ZERO_IMAGE <- rgee::ee$Image(0)$select(list("constant"),
                                         list("SR_B6"))$selfMask()

  # Set image properties to export
  PROPERTIES <- list("CLOUD_COVER",
                     "COLLECTION_NUMBER",
                     "DATE_ACQUIRED",
                     "GEOMETRIC_RMSE_MODEL",
                     "LANDSAT_PRODUCT_ID",
                     'LANDSAT_SCENE_ID',
                     "PROCESSING_LEVEL",
                     "SPACECRAFT_ID",
                     "SUN_ELEVATION")

  # Landsat Surface Reflectance collections
  ls5_1 <- rgee::ee$ImageCollection("LANDSAT/LT05/C02/T1_L2")
  ls5_2 <- rgee::ee$ImageCollection("LANDSAT/LT05/C02/T2_L2")
  ls7_1 <- rgee::ee$ImageCollection("LANDSAT/LE07/C02/T1_L2")
  ls7_2 <- rgee::ee$ImageCollection("LANDSAT/LE07/C02/T2_L2")
  ls8_1 <- rgee::ee$ImageCollection("LANDSAT/LC08/C02/T1_L2")
  ls8_2 <- rgee::ee$ImageCollection("LANDSAT/LC08/C02/T2_L2")

  ALL_BANDS <- BAND_LIST$cat(ADDON_BANDLIST)

  # merge all collections into one
  LS_COLL <- ls5_1$
    merge(ls7_1$
         merge(ls8_1$
                merge(ls5_2$
                       merge(ls7_2$
                              merge(ls8_2)))))$
    filterDate(start_date, end_date)$
    filter(rgee::ee$Filter$calendarRange(start_doy,
                                         end_doy,
                                         "day_of_year"))$
    #.filterBounds(table)
    map(function(image){
      image = rgee::ee$Algorithms$If(image$bandNames()$size()$
                                 eq(rgee::ee$Number(10)),
                               image,
                               image$addBands(ZERO_IMAGE))
      return(image)})$
    map(function(image) {return(image$addBands(ADDON, ADDON_BANDLIST))})$
    select(ALL_BANDS)$
    map(function(image){ return(image$float())} )

  # Check if buffer_dist was specified if yes, retrieve points in buffer using
  # lsat_get_pixel_centers
  if(buffer_dist > 0){
    pixel_coords_sf_buffered <- pixel_coords_sf %>%
      split(sf::st_drop_geometry(pixel_coords_sf)[,sample_id_from]) %>%
      purrr::map(lsat_get_pixel_centers,
                 buffer = buffer_dist + 15,
                 pixel_prefix_from = sample_id_from) %>%
      dplyr::bind_rows()
    # re-add chunks_from column to data frame
    pixel_coords_sf_buffered$sample_id_original <-
      gsub("(.*)_[0-9]*$",
           "\\1",
           sf::st_drop_geometry(pixel_coords_sf_buffered)[,"sample_id"])
    names(pixel_coords_sf_buffered)[
      names(pixel_coords_sf_buffered) == "sample_id_original"] <-
      paste0(sample_id_from, "_original")
    names(pixel_coords_sf)[names(pixel_coords_sf) == sample_id_from] <-
      paste0(sample_id_from, "_original")
    pixel_coords_sf_buffered <- pixel_coords_sf %>%
      sf::st_drop_geometry() %>%
      dplyr::full_join(pixel_coords_sf_buffered, .)
    pixel_coords_sf <- pixel_coords_sf_buffered
  }

  # Check if chunks_from was specified, if not determine chunks
  if(!is.null(chunks_from)){
    if(!(chunks_from %in% colnames(pixel_coords_sf))) {
      stop("Invalid colum name specified for chunks_from: ", chunks_from)
    }
  } else {
    n_chunks <- floor(nrow(pixel_coords_sf) / max_chunk_size) + 1
    pixel_coords_sf$chunk_id <-
      paste0("chunk_",
             sort(rep(1:n_chunks, max_chunk_size)))[1:nrow(pixel_coords_sf)]
    chunks_from <- "chunk_id"
  }

  # Check if this_chunk_only was specified if so remove all other chunks
  if(!is.null(this_chunk_only)){
    if(!(this_chunk_only %in% (pixel_coords_sf %>%
                               sf::st_drop_geometry() %>%
                               as.data.frame() %>%
                               .[,chunks_from]))){
      stop("Could not find chunk specified: ",
                  this_chunk_only)
    }
    pixel_coords_sf <- pixel_coords_sf[
      (sf::st_drop_geometry(pixel_coords_sf) %>%
        as.data.frame() %>% .[, chunks_from]) == this_chunk_only,]
  }

  # Status:
  cat(paste0("Exporting time-series for ",
             nrow(pixel_coords_sf),
             " pixels",
             " in ",
             length(unique(sf::st_drop_geometry(pixel_coords_sf) %>%
                             as.data.frame() %>%
                             .[,chunks_from])),
             " chunks.\n"))

  # Retrieve time-series by chunk
  task_list <- pixel_coords_sf %>%
    split(., sf::st_drop_geometry(.)[,chunks_from]) %>%
    purrr::map(function(chunk){
      # Status
      cat(paste0("Submitting task to EE for chunk_id: ",
                 sf::st_drop_geometry(chunk) %>%
                   as.data.frame() %>%
                   .[1, chunks_from],
                 ".\n"))
      # Upload chunk to sf to reduce size keep only necessary columns
      ee_chunk <- rgee::sf_as_ee(chunk[,c("geometry",
                                          sample_id_from,
                                          chunks_from)])
      # Retrieve Landsat time-series
      ee_chunk_export <- ee_chunk$map(function(feature){
          return(
            # Create FC containing a single empty image
            # This will ensure all bands are present in the export
            rgee::ee$ImageCollection$fromImages(
            list(rgee::ee$Image(list(0,0,0,0,0,0,0,0,0,0))$
                   select(list(0,1,2,3,4,5,6,7,8,9), ALL_BANDS)$
                   copyProperties(ls8_1$first())))$
              # Merge with extraction of time-series form Landsat collection
              merge(LS_COLL$filterBounds(feature$geometry()))$
                # For each image in the collection ....
                map(function(image){
                  # Create a feature
                return(rgee::ee$Feature(feature$geometry(),
                                  # fill it with the point value extracted with
                                  # reduceRegion with first() reducer at scale
                                  image$reduceRegion(rgee::ee$Reducer$first(),
                                                     feature$geometry(),
                                                     scale))$
                         # copy the image properties to the feature
                         # (incl. date and image metadata) as specified above
                         copyProperties(image, PROPERTIES)$
                         # assign a pixel and chunk id columns for identification
                         set(sample_id_from, feature$get(sample_id_from))$
                         set(chunks_from, feature$get(chunks_from)))
              }))
        })$flatten()
      # Prepare export task
      chunk_task <- rgee::ee_table_to_drive(
        collection = ee_chunk_export,
        description = paste0("lsatTS_export_",
                             sf::st_drop_geometry(chunk) %>%
                             as.data.frame() %>%
                               .[1, chunks_from]),
        folder = drive_export_dir,
        fileNamePrefix = paste0(file_prefix,
                                "_",
                                sf::st_drop_geometry(chunk) %>%
                                  as.data.frame() %>%
                                  .[1, chunks_from]),
        timePrefix = F,
        fileFormat = "csv")
      # Submit export task
      chunk_task$start()

      # Return nothing
      return(chunk_task)
    })
  # Status update
  cat(crayon::green("Done!\n"))
  cat("You can monitor the progress of the task(s)",
      "using rgee's ee_monitoring() or the GEE WebAPI.\n")
  return(task_list)
}

## EOF
logan-berner/lsatTS documentation built on Oct. 21, 2024, 12:23 a.m.