knitr::opts_chunk$set(echo = TRUE, eval = TRUE) library(reticulate) library(makeDataCube) library(tidyverse) library(sf) library(unixtools) library(lubridate) library(parallel)
# Find Python in this machine (location may vary across users) import("pylandsat") # Tip: this loads the (first) Python environment containing pylandsat (installed via `pip install pylandsat`). # If a specific environment is required, use: # # `use_python(<python path>, required = TRUE)` # Where <python path> could be, for instance, "anaconda3/bin/python" # or # `use_condaenv(<environment name>)` # or # `use_virtualenv(<environment name>)` # instead of `import`. # # Note: changing Python version requires restarting RStudio. py_config()
This document generates a data cube of level-2 Landsat data over a user-defined area and study period, using only Landsat data below a user-defined cloud coverage threshold. The Landsat Level-1 scenes are downloaded from the public dataset hosted on Google Cloud. These scenes are processed to Level-2 and a data cube is generated using FORCE software.
This script requires certain programs to be installed in your computer in order to work. The guide below will help you through this process.
pip install pylandsat
and pip install shapely
.sudo apt-get install <library name>
.This script connects to different data sources. Some of them require credentials, typically a username and a password. Please follow this guide to get yours:
.netrc
file by executing makeDataCube::EartDataLogin()
in your R console and following the instructions.# Extent of the area of interest, vector with xmin, xmax, ymin, ymax in degrees ext <- c(-62.225300382434575,-62.115437101184575,1.7039980451907109,1.7973382355954262) # Minimum and maximum cloud cover of the Landsat images (images outside this cloud cover range will not be downloaded) cld <- c(0,50) # Tier level of Landsat data (gives information about the quality of the data) tiers <- 'T1' # Sensors of interest sensors <- c('LC08', 'LE07', 'LT05', 'LT04')# valid sensors: LT04 - Landsat 4 TM, LT05 - Landsat 5 TM, LE07 - Landsat 7 ETM+, LC08 - Landsat 8 OLI, S2A - Sentinel-2A MSI, S2B - Sentinel-2B MSI # The folder where the datacube (and auxilliary data) will be stored forcefolder <- normalizePath(file.path('..', 'data')) # A directory where all temporary R files should be stored Rtmpfolder <- normalizePath(file.path('..', 'data')) ## ==== These inputs are read from the document's header ==== # Start date of the study period: year, month, day starttime <- date_to_vec(params$starttime) # End date of the study period: year, month, day endtime <- date_to_vec(params$endtime)
# ===== Parallelization parameters ===== # Level 1 download # We'll parallelize this process by splitting the time domain in a number of # subintervals. The data corresponding to each subinterval will be downloaded # by a different core nsubintervals <- 1 # Level 2 processing # These parameters are written in the configuration file # The parallelization is managed directly by FORCE # See the tutorials to learn more about the processes / thread balance: # https://force-eo.readthedocs.io/en/latest/howto/l2-ard.html#parallel-processing nproc <- '6' nthread <- '1'
# Generate the date subintervals subintervals <- partition_dates(starttime, endtime, nsubintervals) starttimes <- lapply(int_start(subintervals), date_to_vec) # Extract starttimes as vectors endtimes <- lapply(int_end(subintervals), date_to_vec) # Extract endtimes as vectors
# Set the folder to store temporary data unixtools::set.tempdir(Rtmpfolder) # Set the upper memory limit for raster data. Raster will move data to disk if it exceeds the upper limit (defaults to 100MB). # options(rasterMaxMemory = 1e10) # Generate folder structure fldrs <- setFolders(forcefolder) # TODO: these assignments are redundant, can be removed tmpfolder <- fldrs['tmpfolder'] l1folder <- fldrs['l1folder'] l2folder <- fldrs['l2folder'] queuefolder <- fldrs['queuefolder'] queuefile <- fldrs['queuefile'] demfolder <- fldrs['demfolder'] wvpfolder <- fldrs['wvpfolder'] logfolder <- fldrs['logfolder'] paramfolder <- fldrs['paramfolder'] demlogfile <- fldrs['demlogfile'] wvplogfile <- fldrs['wvplogfile'] metafolder <- fldrs['metafolder'] S2auxfolder <- fldrs['S2auxfolder']
paramfile <- 'l2param.prm' cfg <- gen_params(FILE_QUEUE = file.path(fldrs['queuefolder'], fldrs['queuefile']), DIR_LEVEL2 = fldrs['l2folder'], DIR_LOG = fldrs['logfolder'], DIR_TEMP = fldrs['tmpfolder'], FILE_DEM = file.path(fldrs['demfolder'], 'srtm.vrt'), ORIGIN_LON = '-90', ORIGIN_LAT = '60', RESAMPLING = 'NN', DIR_WVPLUT = fldrs['wvpfolder'], RES_MERGE = 'REGRESSION', NPROC = nproc, NTHREAD = nthread, DELAY = '10', OUTPUT_DST = 'TRUE', OUTPUT_VZN = 'TRUE', OUTPUT_HOT = 'TRUE', OUTPUT_OVV = 'TRUE', DEM_NODATA = '-32768', TILE_SIZE = '3000', BLOCK_SIZE = '300', as.list = TRUE) export_params(cfg, file = file.path(paramfolder, paramfile), overwrite = TRUE) export_params(cfg, file = file.path(paramfolder, paste0(paramfile, ".bak")), overwrite = TRUE) # Expected outputs: # {paramfolder}/l2param.prm (typically data/param/l2param.prm) # Parameter file # {paramfolder}/l2param.prm.bak (typically data/param/l2param.prm.bak) # Parameter file backup
# Update metadata folder only if asked to if(params$updatemeta) systemf("force-level1-csd -u %s", metafolder) #TODO: this can be done only once
# Run only if required if(params$updatel1) { # Auxiliary variable queuepath <- cfg$FILE_QUEUE # Auxiliary function # This is just getScenes with all parameters pre-passed, with the exception of # starttime and endtime. # The purpose is to call it from a mcmapply functional. # force-level1-csd -c 0,50 -d 20001101,20010528 -s LC08,LE07,LT05,LT04 data/misc/meta data/level1 data/level1/queue.txt data/temp/file55666e2730f1.shp getScenesAux <- function(starttime, endtime) { getScenes(ext, queuepath, l1folder, metafolder, tmpfolder, cld = cld, starttime = starttime, endtime = endtime, tiers = tiers, sensors = sensors) } # If nsubintervals != 1, this runs in parallel mcmapply(getScenesAux, starttimes, endtimes, mc.cores = nsubintervals) # If nsubintervals == 1, the whole chunk is equivalent to: # getScenes(ext, queuepath, l1folder, metafolder, tmpfolder, cld = cld, starttime = starttime, endtime = endtime, tiers = tiers, sensors = sensors) }
# download DEM over study area flsDEM <- dllDEM(ext, dl_dir= demfolder, logfile = demlogfile) # Expected outputs: # A collection of files like # {demfolder}/S04W043.hgt (typically: misc/dem/[lat][lon].hgt) #TODO: this can be done only once
# generate a vrt # Note that it requires the {demfolder}/*.hgt files generated in the DEM chunk #dllVRT(demfolder) systemf("find %s -name '*.hgt' > %s", demfolder, file.path(demfolder, "srtm.txt")) systemf("gdalbuildvrt -input_file_list %s %s", file.path(demfolder, 'srtm.txt'), file.path(demfolder, 'srtm.vrt')) # Expected outputs: # {demfolder}/srtm.txt (typically: misc/dem/srtm.txt) # {demfolder}/srtm.vrt (typically: misc/dem/srtm.vrt) #TODO: check if this can be done only once
dllWVP(wvpfolder, logfile = wvplogfile, endtime = endtime) # Expected outputs: # {wvpfolder}/wrs-2-land.coo (typically: misc/wvp/wrs-2-land.coo) # {wvpfolder}/wvp-global.tar.gz # A large collection of files like: # {wvpfolder}/WVP_0000-01-00.txt #TODO: check if this can be done only once
if(params$updatel2) process2L2(paramfolder, paramfile, l2folder)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.