R/download_Copernicus.R

Defines functions downloadCopernicus

Documented in downloadCopernicus

#' @title Download COPERNICUS marine products
#'
#' @description
#' Download products from Copernicus Marine Environment Monitoring Service (CMEMS).
#' More information about the available data in \url{https://marine.copernicus.eu/}.
#'
#' @usage
#' downloadCopernicus(startDate, endDate, time, time2=NULL, var, minDepth=0.493, maxDepth=0.4942,
#' outDir, outName, Cuser, Cpssw, Cproduct, CproductId, maxLon, minLon, maxLat, minLat,
#' frequency = c("daily", "all"), parallel=FALSE, numCores=NULL)
#'
#' @param startDate Date or character, indicating the begin date to extract the data (Format [yyyy:mm:dd]).
#' @param endDate Date or character, indicating the end date to extract the data (Format [yyyy:mm:dd]).
#' @param time character, indicating the hours, minutes, and seconds of the startDate (Format [HH:MM:SS]).
#' @param time2 character, indicating the hours, minutes, and seconds of the endDate (Format [HH:MM:SS]).
#' If missing \code{time=time2}.
#' @param var Character with the variable name (Only one variable is allowed).
#' @param minDepth Numeric value specifying the minimum depth. Allowed interval [0 ; 2e31]
#' @param maxDepth Numeric value specifying the maximum depth. Allowed interval [0 ; 2e31]
#' @param outDir Character with the output path where downloaded files should be stored.
#' @param outName Character indicating the output file name.
#' @param Cuser Character with the CMEMS user name.
#' @param Cpssw Character with the CMEMS password.
#' @param Cproduct Character with the CMEMS service id.
#' @param CproductId Character with the CMEMS product id.
#' @param maxLon Max. longitude of the area/region to extract. Allowed interval [-180 ; 180]
#' @param minLon Min. longitude of the area/region to extract. Allowed interval [-180 ; 180]
#' @param maxLat Max. latitude of the area/region to extract. Allowed interval [-90 ; 90]
#' @param minLat Min. latitude of the area/region to extract. Allowed interval [-90 ; 90]
#' @param frequency Select the download frequency between `daily` and `all`. `daily`  will download one file per day,
#' whereas `all` will download all the data in one file.
#' @param parallel An optional logical argument indicating whether the download should be run in parallel or not. Default \code{FALSE}
#' @param numCores An integer indicating the number of cores to be used in parallel execution
#'
#' @details Python and motuclient are needed. To check if both are correctly installed run:
#'  \code{system("python --version")} and \code{system("motuclient --version")}.
#' If target files are already present in the \code{outDir} folder, they are not downloaded.
#'
#' More information:
#' \itemize{
#'    \item \url{https://help.marine.copernicus.eu/en/articles/4899195-how-to-write-and-run-a-script-to-download-a-subset-of-a-dataset-from-the-copernicus-marine-data-store}
#'    \item \url{https://help.marine.copernicus.eu/en/articles/4799385-can-i-download-copernicus-marine-data-via-r-or-matlab}
#' }
#'
#' @section Parallelization:
#' If `parallel`=TRUE then the `parallel` / `doParallel` / `foreach` / `doRNG` packages
#' will be used to run the computation over multiple cores of the computer. To
#' use this component you need to install those packages using:
#' `install.packages(c("foreach", "doParallel", "doRNG"))` It is advised that
#' you do not set `cores` to be greater than one less than the number of cores
#' on your machine.
#'
#' @examples
#' \dontrun{
#' # Python command generated by CMEMS. Frther information in:
#' https://help.marine.copernicus.eu/en/articles/4899195-how-to-write-and-run-a-script-to-download-a-subset-of-a-dataset-from-the-copernicus-marine-data-store
#'
#' command = "python -m motuclient --motu https://nrt.cmems-du.eu/motu-web/Motu
#' --service-id GLOBAL_ANALYSIS_FORECAST_WAV_001_027-TDS --product-id global-analysis-forecast-wav-001-027
#' --longitude-min -180 --longitude-max 79 --latitude-min -40 --latitude-max 90
#' --date-min '2021-11-25 00:00:00' --date-max '2021-11-25 00:00:00' --variable VHM0 --variable VHM0_SW1
#' --out-dir <OUTPUT_DIRECTORY> --out-name <OUTPUT_FILENAME> --user <USERNAME> --pwd <PASSWORD>"
#'
#' system(command)
#'
#'
#' # Function inputs
#' startDate = "2021-01-01"
#' endDate   = "2021-02-01"
#' time      = "12:00:00"
#' var       = "VHM0"
#' outDir    = "C:/Use/Escritorio/tmp"
#' outName   = "Global_"
#' Cuser     = <your user name>
#' Cpssw     = <your password>
#' Cproduct  = "GLOBAL_ANALYSIS_FORECAST_WAV_001_027-TDS"
#' CproductId = "global-analysis-forecast-wav-001-027"
#' minLon    = -75
#' maxLon    = 20
#' minLat    = -30
#' maxLat    = 20
#'
#' # Daily and in sequential
#' downloadCopernicus(startDate, endDate, time, var = var, outDir = outDir, outName = outName,
#' Cuser = Cuser, Cpssw = Cpssw, Cproduct = Cproduct, CproductId=CproductId,
#' maxLon = maxLon, minLon = minLon, maxLat = maxLat, minLat = minLat, frequency = "daily")
#'
#' # All data together
#' downloadCopernicus(startDate, endDate, time, var = var, outDir = outDir, outName = outName,
#' Cuser = Cuser, Cpssw = Cpssw, Cproduct = Cproduct, CproductId=CproductId,
#' maxLon = maxLon, minLon = minLon, maxLat = maxLat, minLat = minLat, frequency = "all")
#'
#' # Daily and in parallel
#' downloadCopernicus(startDate, endDate, time, var = var, outDir = outDir, outName = outName,
#'                   Cuser = Cuser, Cpssw = Cpssw, Cproduct = Cproduct, CproductId=CproductId,
#'                   maxLon = maxLon, minLon = minLon, maxLat = maxLat, minLat = minLat,
#'                   frequency = "daily", parallel = T, numCores = 4)
#' }
#'
#' @export
downloadCopernicus = function(startDate, endDate, time, time2=NULL, var, minDepth=0.493, maxDepth=0.4942,
                              outDir, outName, Cuser, Cpssw, Cproduct, CproductId,
                              maxLon, minLon, maxLat, minLat, frequency = c("daily", "all"),
                              parallel=FALSE, numCores=NULL){

  # Check: output folder, parallel, dates, and var
  if (!dir.exists(outDir))
    dir.create(outDir)
  if (substr(outDir,nchar(outDir),nchar(outDir))!='/')
    outDir = paste0(outDir, "/")
  stopifnot("Error: only logical values (T/F) are allowed in 'parallel' argument."= is.logical(parallel))
  stopifnot("Error: only one variable is allowed in 'var' argument (length(var)>1)."= length(var)==1)
  stopifnot("Error: invalid 'startDate' or 'endDate' format."= !is.na(as.Date(startDate, format="%Y-%m-%d")))
  stopifnot("Error: invalid 'startDate' or 'endDate' format."= !is.na(as.Date(endDate, format="%Y-%m-%d")))

  # Set time2
  if (is.null(time2))
    time2 = time

  # Check the desired download frequency
  frequency <- match.arg(frequency)

  if (frequency == "all") { # Download all the data together
    dateName   = gsub("-", "", paste(startDate, endDate, sep = "_"))
    outNameTmp = paste0(outName, var, "_",dateName, "_",  ".nc")
    tryIndx    = 0

    # Python script to download copernicus data
    pythonCommand = paste("python -m motuclient --motu http://nrt.cmems-du.eu/motu-web/Motu --service-id",
                          Cproduct, "--product-id", CproductId,   "--longitude-min", minLon,
                          "--longitude-max", maxLon, "--latitude-min", minLat, "--latitude-max", maxLat,
                          "--date-min", startDate, time, "--date-max", endDate, time2,
                          "--depth-min", minDepth, "--depth-max", maxDepth,
                          "--variable", var,
                          "--out-dir", outDir, "--out-name", outNameTmp,
                          "--user", Cuser, "--pwd", Cpssw)

    while(!file.exists(paste0(outDir, outNameTmp))) {
      print(paste0("Download try N: ", tryIndx, "  --  Product: ", Cproduct, " --  Date: ", dateName))
      system(pythonCommand)
      Sys.sleep(2)
      tryIndx = tryIndx + 1
      if (tryIndx > 0 & tryIndx %% 10 ==0) Sys.sleep(60)
    }
  } else { # Daily download and parallel

    if (parallel==TRUE) {
      # Check depencendies
      if(!all(requireNamespace("parallel", quietly = TRUE),
              requireNamespace("doParallel", quietly = TRUE),
              requireNamespace("foreach", quietly = TRUE))) {
        stop("Packages 'parallel', 'doParallel', 'doRNG', and 'foreach' required for parallelization!", call. = FALSE)}

      # get the number of cores to be used
      if (is.null(numCores)) numCores <- parallel::detectCores() - 1

      if(.Platform$OS.type == "windows"){ # snow functionality on Unix-like systems & Windows
        cl <- parallel::makeCluster(numCores, type = "PSOCK")
        doParallel::registerDoParallel(cl, cores = numCores)
      } else if(.Platform$OS.type == "unix") { # multicore functionality on Unix-like systems
        cl <- parallel::makeCluster(numCores, type = "FORK")
        doParallel::registerDoParallel(cl, cores = numCores)
      } else
        { stop("Only 'snow' and 'multicore' clusters allowed!") }

      cat("*** Running in parallel, wait...  \n" )
    }

    # Select loop operator
    `%mydo%` <-  if(parallel)  `%dopar%` else  `%do%`

    dateIndx = as.character(seq.Date(as.Date(startDate), as.Date(endDate), by = "day"))
    foreach(i = seq(along = dateIndx), .verbose = FALSE, .errorhandling = "stop") %mydo% {

      dateTmp = dateIndx[i]
      outNameTmp = paste0(outName, var, "_", dateTmp, ".nc")
      tryIndx = 0
      # Python script to download copernicus data
      pythonCommand = paste("python -m motuclient --motu http://nrt.cmems-du.eu/motu-web/Motu --service-id",
                            Cproduct, "--product-id", CproductId,
                            "--longitude-min", minLon, "--longitude-max", maxLon, "--latitude-min", minLat, "--latitude-max", maxLat,
                            "--date-min", dateTmp, time, "--date-max", dateTmp, time2,
                            "--depth-min", minDepth, "--depth-max", maxDepth,
                            "--variable", var,
                            "--out-dir", outDir, "--out-name", outNameTmp,
                            "--user", Cuser, "--pwd", Cpssw)

      while(!file.exists(paste0(outDir, outNameTmp))) {
        print(paste0("Download try N: ", tryIndx, "  --  Product: ", Cproduct, " --  Date: ", dateTmp))
        system(pythonCommand)
        Sys.sleep(2)
        tryIndx = tryIndx + 1
        if (tryIndx > 0 & tryIndx %% 10 ==0) Sys.sleep(60)
      }
    }
    # Stop cluster
    if(parallel)
      parallel::stopCluster(cl)
  }
  cat("Done!")
}
GranadoIgor/miscellaneousR documentation built on Sept. 20, 2023, 12:34 p.m.