R/pathogens.R

Defines functions fd_pathogens_defid2

Documented in fd_pathogens_defid2

# fd_pathogens_defid2

#' Download the DEFID2 database
#'
#' Download the Database of European Forest Insect and Disease Disturbances.
#'
#' Data may be freely used for research, study, or teaching, but be cited
#' appropriately (see references below).
#'
#' @param agent a character vector with the desired forest insect(s) and/or
#' disease(s). The default '\code{all}' retrieves every agent
#' @param host a character vector with the desired host tree(s) species. The
#' default '\code{all}' retrieves every tree
#' @param symptoms a character vector with the desired symptom(s). The default
#' '\code{all}' retrieves every symptom
#' @param country a character vector with the desired country(ies). The default
#' '\code{all}' retrieves every country
#' @param geometry a string with '\code{polygon}' to retrieve polygon data, or
#' '\code{point}' to retrieve point data
#' @param quiet if \code{TRUE}, suppress any message or progress bar
#'
#' @return \code{sf} object with \code{MULTIPOLYGON} or \code{POINT} geometry
#'
#' @importFrom stats na.omit
#' @export
#'
#' @details
#'
#' This function will download the DEFID2 database to the temporary directory
#' once per session. After it's downloaded, the queries to the database are
#' faster than the first time.
#'
#' Note that 99.6% of the observations correspond to _Picea abies_.
#' Also, 99.3% of the observations are in Czechia.
#'
#' The data comprises over 650,000 georeferenced records, which can be retrieved
#' as points or polygons, representing insects and diseases that occurred between
#' 1963 and 2021 in European Forests.
#'
#' Please, cite the data with the reference below.
#'
#' @references Forzieri G, Dutrieux LP, Elia A, Eckhardt B, Caudullo G, Taboada FÁ,
#'  Andriolo A, Bălacenoiu F, Bastos A, Buzatu A, Castedo Dorado F, Dobrovolný L,
#'  Duduman M, Fernandez-Carillo A, Hernández-Clemente R, Hornero A, Ionuț S,
#'  Lombardero MJ, Junttila S, Lukeš P, Marianelli L, Mas H, Mlčoušek M, Mugnai F,
#'  Nețoiu C, Nikolov C, Olenici N, Olsson P, Paoli F, Paraschiv M, Patočka Z,
#'  Pérez-Laorga E, Quero JL, Rüetschi M, Stroheker S, Nardi D, Ferenčík J,
#'  Battisti A, Hartmann H, Nistor C, Cescatti A, Beck PSA (2023).
#'  The Database of European Forest Insect and Disease Disturbances: DEFID2.
#'  Global Change Biology
#'
#' @examples
#' \donttest{
#' # Get the entire database (takes some seconds/minutes)
#' defid2_sf <- fd_pathogens_defid2()
#'
#' # Get data for Spain and Portugal
#' defid2_iberia_sf <- fd_pathogens_defid2(country = c("Spain", "Portugal"))
#'
#' }
fd_pathogens_defid2 <- function(agent = "all",
                                host = "all",
                                symptoms = "all",
                                country = "all",
                                geometry = "polygon",
                                quiet    = FALSE) {

  # 1. Download file
  ## 1.1. File url
  download_url <- "https://jeodpp.jrc.ec.europa.eu/ftp/jrc-opendata/FOREST/DISTURBANCES/DEFID2/VER1-0/defid2.gpkg"
  file_path <- stringr::str_glue("{tempdir()}/{basename(download_url)}")
  ## 1.2. Download file if doesn't exist
  if (!quiet) cli::cli_progress_step("Downloading data...", "Downloaded", "Download failed")
  dwld <- fdi_download(download_url, file_path)
  if (!dwld) {
    cli::cli_process_failed()
    return(cli::cli_alert_danger("`fd_pathogens_defid2()` failed to retrieve the data. Service might be currently unavailable"))
  }
    if (!quiet) cli::cli_progress_step("Preparing data...", "Prepared")

  # 2. Prepare query
  ## 2.1. Agents
  if (any(agent != "all")) {
    tmp.agent <- paste0("('", paste0(agent, collapse = "', '"), "')")
    agent_qr <- stringr::str_glue("agents IN {tmp.agent}")
  } else {
    agent_qr <- NA
  }
  ## 2.2. Hosts
  if (any(host != "all")) {
    tmp.host <- paste0("('", paste0(host, collapse = "', '"), "')")
    host_qr <- stringr::str_glue("hosts IN {tmp.host}")
  } else {
    host_qr <- NA
  }
  ## 2.3. Symptoms
  if (any(symptoms != "all")) {
    tmp.symptoms <- paste0("('", paste0(symptoms, collapse = "', '"), "')")
    symptoms_qr <- stringr::str_glue("symptoms IN {tmp.symptoms}")
  } else {
    symptoms_qr <- NA
  }
  ## 2.4. Countries
  if (any(country != "all")) {
    tmp.country <- paste0("('", paste0(country, collapse = "', '"), "')")
    country_qr <- stringr::str_glue("country IN {tmp.country}", sep = ",")
  } else {
    country_qr <- NA
  }
  ## 2.5. Build Query
  ### Data frame with semi-queries
  query_df <- data.frame(
    clause = c("agent", "host", "country", "symptoms"),
    query  = c(agent_qr, host_qr, country_qr, symptoms_qr)
  ) |> na.omit()
  ### Build final clauses query
  tmp.query <- ""
  if (nrow(query_df) == 1) {
    tmp.query <- stringr::str_glue("WHERE {query_df[1,2]}")
  } else if (nrow(query_df) > 1) {
    tmp.query <- stringr::str_glue("WHERE {query_df[1,2]}")
    for (i in 2:nrow(query_df)) {
      tmp.query <- stringr::str_glue("{tmp.query} AND {query_df[i,2]}")
    }
    tmp.query <- stringr::str_glue("{tmp.query}")
  }

  # 3. Read into R
  if (geometry == "polygon") {
    data_sf <- sf::read_sf(
      dsn   = file_path,
      query = stringr::str_glue("SELECT * FROM exact_polygons {tmp.query};")
    )
  } else if (geometry == "point") {
    data_sf <- sf::read_sf(
      dsn   = file_path,
      query = stringr::str_glue("SELECT * FROM exact_points {tmp.query};")
    )
  } else {
    cli::cli_abort("Invalid geometry. Please select polygon or point.")
  }
  if (!quiet) cli::cli_process_done()
  return(data_sf)
}

Try the forestdata package in your browser

Any scripts or data that you put into this service are public.

forestdata documentation built on June 8, 2025, 12:43 p.m.