knitr::opts_chunk$set(echo = TRUE, 
                      eval = TRUE)
library(reticulate)
library(makeDataCube)
library(tidyverse)
library(sf)
library(unixtools)
library(lubridate)
library(parallel)
# Find Python in this machine (location may vary across users)

import("pylandsat") # Tip: this loads the (first) Python environment containing pylandsat (installed via `pip install pylandsat`). 

# If a specific environment is required, use: 
#
# `use_python(<python path>, required = TRUE)` # Where <python path> could be, for instance, "anaconda3/bin/python"
# or
# `use_condaenv(<environment name>)`
# or
# `use_virtualenv(<environment name>)`
# instead of `import`.
#
# Note: changing Python version requires restarting RStudio.

py_config()

Description

This document generates a data cube of level-2 Landsat data over a user-defined area and study period, using only Landsat data below a user-defined cloud coverage threshold. The Landsat Level-1 scenes are downloaded from the public dataset hosted on Google Cloud. These scenes are processed to Level-2 and a data cube is generated using FORCE software.

Requirements

Installation

This script requires certain programs to be installed in your computer in order to work. The guide below will help you through this process.

Credentials

This script connects to different data sources. Some of them require credentials, typically a username and a password. Please follow this guide to get yours:

Visual workflow

Workflow

Inputs

# Extent of the area of interest, vector with xmin, xmax, ymin, ymax in degrees
ext <- c(-62.225300382434575,-62.115437101184575,1.7039980451907109,1.7973382355954262)
# Minimum and maximum cloud cover of the Landsat images (images outside this cloud cover range will not be downloaded)
cld <- c(0,50)
# Tier level of Landsat data (gives information about the quality of the data)
tiers <- 'T1'
# Sensors of interest
sensors <- c('LC08', 'LE07', 'LT05', 'LT04')# valid sensors: LT04 - Landsat 4 TM, LT05 - Landsat 5 TM, LE07 - Landsat 7 ETM+, LC08 - Landsat 8 OLI, S2A - Sentinel-2A MSI, S2B - Sentinel-2B MSI

# The folder where the datacube (and auxilliary data) will be stored
forcefolder <- normalizePath(file.path('..', 'data'))
# A directory where all temporary R files should be stored
Rtmpfolder <- normalizePath(file.path('..', 'data'))

## ==== These inputs are read from the document's header ====
# Start date of the study period: year, month, day
starttime <- date_to_vec(params$starttime)
# End date of the study period: year, month, day
endtime <- date_to_vec(params$endtime)
# ===== Parallelization parameters =====

# Level 1 download
# We'll parallelize this process by splitting the time domain in a number of 
# subintervals. The data corresponding to each subinterval will be downloaded
# by a different core
nsubintervals <- 1

# Level 2 processing
# These parameters are written in the configuration file
# The parallelization is managed directly by FORCE
# See the tutorials to learn more about the processes / thread balance:
# https://force-eo.readthedocs.io/en/latest/howto/l2-ard.html#parallel-processing
nproc <- '6'
nthread <- '1'
# Generate the date subintervals
subintervals <- partition_dates(starttime, endtime, nsubintervals)
starttimes <- lapply(int_start(subintervals), date_to_vec) # Extract starttimes as vectors
endtimes <- lapply(int_end(subintervals), date_to_vec) # Extract endtimes as vectors

Generate the directory structure

# Set the folder to store temporary data
unixtools::set.tempdir(Rtmpfolder)
# Set the upper memory limit for raster data. Raster will move data to disk if it exceeds the upper limit (defaults to 100MB). 
# options(rasterMaxMemory = 1e10)

# Generate folder structure
fldrs <- setFolders(forcefolder)

# TODO: these assignments are redundant, can be removed
tmpfolder <- fldrs['tmpfolder']
l1folder <- fldrs['l1folder']
l2folder <- fldrs['l2folder']
queuefolder <- fldrs['queuefolder']
queuefile <- fldrs['queuefile']
demfolder <- fldrs['demfolder']
wvpfolder <- fldrs['wvpfolder']
logfolder <- fldrs['logfolder']
paramfolder <- fldrs['paramfolder']
demlogfile <- fldrs['demlogfile']
wvplogfile <- fldrs['wvplogfile']
metafolder <- fldrs['metafolder']
S2auxfolder <- fldrs['S2auxfolder']

Generate a parameter file

paramfile <- 'l2param.prm'

cfg <- gen_params(FILE_QUEUE = file.path(fldrs['queuefolder'], fldrs['queuefile']),
                  DIR_LEVEL2 = fldrs['l2folder'],
                  DIR_LOG = fldrs['logfolder'],
                  DIR_TEMP = fldrs['tmpfolder'],
                  FILE_DEM = file.path(fldrs['demfolder'], 'srtm.vrt'),
                  ORIGIN_LON = '-90',
                  ORIGIN_LAT = '60',
                  RESAMPLING = 'NN',
                  DIR_WVPLUT = fldrs['wvpfolder'],
                  RES_MERGE = 'REGRESSION',
                  NPROC = nproc,
                  NTHREAD = nthread,
                  DELAY = '10',
                  OUTPUT_DST = 'TRUE',
                  OUTPUT_VZN = 'TRUE',
                  OUTPUT_HOT = 'TRUE',
                  OUTPUT_OVV = 'TRUE',
                  DEM_NODATA = '-32768',
                  TILE_SIZE = '3000',
                  BLOCK_SIZE = '300',
                  as.list = TRUE)

export_params(cfg, file = file.path(paramfolder, paramfile), overwrite = TRUE)
export_params(cfg, file = file.path(paramfolder, paste0(paramfile, ".bak")), overwrite = TRUE)

# Expected outputs:
# {paramfolder}/l2param.prm (typically data/param/l2param.prm) # Parameter file
# {paramfolder}/l2param.prm.bak (typically data/param/l2param.prm.bak) # Parameter file backup

Search Landsat scenes, download and add to queue

# Update metadata folder only if asked to
if(params$updatemeta) systemf("force-level1-csd -u %s", metafolder)
#TODO: this can be done only once
# Run only if required
if(params$updatel1) {

# Auxiliary variable
queuepath <- cfg$FILE_QUEUE

# Auxiliary function
# This is just getScenes with all parameters pre-passed, with the exception of 
# starttime and endtime.
# The purpose is to call it from a mcmapply functional.
# force-level1-csd -c 0,50 -d 20001101,20010528 -s LC08,LE07,LT05,LT04 data/misc/meta data/level1 data/level1/queue.txt data/temp/file55666e2730f1.shp
getScenesAux <- function(starttime, endtime) {
  getScenes(ext, queuepath, l1folder, metafolder, tmpfolder, cld = cld, starttime = starttime, endtime = endtime, tiers = tiers, sensors = sensors)
}

# If nsubintervals != 1, this runs in parallel
mcmapply(getScenesAux, starttimes, endtimes, mc.cores = nsubintervals)

# If nsubintervals == 1, the whole chunk is equivalent to:
# getScenes(ext, queuepath, l1folder, metafolder, tmpfolder, cld = cld, starttime = starttime, endtime = endtime, tiers = tiers, sensors = sensors)

}

Download the Digital Elevation Map (DEM)

# download DEM over study area
flsDEM <- dllDEM(ext, dl_dir= demfolder, logfile = demlogfile)

# Expected outputs:
# A collection of files like
# {demfolder}/S04W043.hgt (typically: misc/dem/[lat][lon].hgt)
#TODO: this can be done only once

Generate a Gdal Virtual Format (VRT)

# generate a vrt
# Note that it requires the {demfolder}/*.hgt files generated in the DEM chunk
#dllVRT(demfolder)
systemf("find %s -name '*.hgt' > %s", demfolder, file.path(demfolder, "srtm.txt"))

systemf("gdalbuildvrt -input_file_list %s %s", file.path(demfolder, 'srtm.txt'), file.path(demfolder, 'srtm.vrt'))

# Expected outputs:
# {demfolder}/srtm.txt (typically: misc/dem/srtm.txt)
# {demfolder}/srtm.vrt (typically: misc/dem/srtm.vrt)
#TODO: check if this can be done only once

Download the Water Vapor Database (WVP) data

dllWVP(wvpfolder, logfile = wvplogfile, endtime = endtime)

# Expected outputs:
# {wvpfolder}/wrs-2-land.coo (typically: misc/wvp/wrs-2-land.coo)
# {wvpfolder}/wvp-global.tar.gz
# A large collection of files like:
# {wvpfolder}/WVP_0000-01-00.txt
#TODO: check if this can be done only once

Process the level 1 data to level 2 and generate a VRT

if(params$updatel2) process2L2(paramfolder, paramfile, l2folder)


RETURN-project/makeDataCube documentation built on Feb. 11, 2022, 3:04 p.m.