get_galah: Get data using 'galah::atlas_occurrences()'

View source: R/get_galah.R

get_galahR Documentation

Get data using galah::atlas_occurrences()

Description

Get data using galah::atlas_occurrences()

Usage

get_galah(
  aoi = NULL,
  save_dir = NULL,
  get_new = FALSE,
  name = "galah",
  data_map = NULL,
  node = "ALA",
  qry = NULL,
  check_rel_metres = TRUE,
  filter_inconsistent = TRUE,
  ...
)

Arguments

aoi

Optional simple feature (sf). Used to limit the occurrences returned via galah::galah_geolocate()

save_dir

Character. Path to directory into which to save outputs. If NULL results will be saved to here::here("out", "ds", "galah"). File will be named galah.parquet

get_new

Logical. If FALSE, will attempt to load from existing save_dir.

name

Character. data_name value in envImport::data_map (or other data_map)

data_map

Dataframe or NULL. Mapping of fields to retrieve. See example envImport::data_map

node

Character. Name of atlas to use (see galah::atlas_occurrences()). Doesn't seem to work with node = "GBIF" and untested on other nodes.

qry

NULL or an object of class data_request, created using galah::galah_call()

check_rel_metres

Logical. Ensure that coordinateUncertaintyInMetres is no less than generalisationInMetres?

filter_inconsistent

Logical. If TRUE, inconsistencies between the occurrenceStatus column and either organismQuantity or individualCount are removed. e.g. a record with occurrenceStatus == "ABSENT" but individualCount == 1 would be filtered.

...

Passed to envImport::file_prep()

Value

Dataframe of occurrences and file saved to save_dir. .bib created when download_reason_id != 10.

Examples


  library("envImport")

  out_dir <- file.path(system.file(package = "envImport"), "examples")

  ## config -------
  old_atlas <- galah::galah_config()$atlas$region

  galah::galah_config(email = Sys.getenv("GBIF_email")
                      , username = Sys.getenv("GBIF_user")
                      , password = Sys.getenv("GBIF_pwd")
                      , caching = TRUE
                      , download_reason_id = 10 # testing
                      )

  galah::galah_config(atlas = "GBIF")


  # Australian Bustards--------
    # in the year 2020

  ## 01: atlas = gbif --------

  save_file <- fs::path(out_dir, "qry01", "qry01.rds")

  if(!file.exists(save_file)) {

    qry01 <- galah::galah_call() %>%
      galah::galah_identify("Ardeotis australis") %>%
      galah::galah_filter(year == 2000) %>%
      galah::atlas_occurrences() %>%
      dplyr::collect()

    rio::export(qry01
                , save_file
                )

  } else {

    qry01 <- rio::import(save_file)

  }


  ## 02: atlas = ala ----------
  galah::galah_config(atlas = "ALA")

  galah::galah_config(email = Sys.getenv("ALA_email"))

  # 'qry' used for both qry02 and qry03
  qry <- galah::galah_call() %>%
    galah::galah_identify("Ardeotis australis") %>%
    galah::galah_filter(year == 2000)

  save_file <- fs::path(out_dir, "qry02", "qry02.rds")

  if(!file.exists(save_file)) {

    qry02 <- qry %>%
      galah::atlas_occurrences()

    rio::export(qry02
                , save_file
                )

  } else {

    qry02 <- rio::import(save_file
                         , setclass = "tibble"
                         )

  }

  # similar (but not identical) # of records
  nrow(qry01)
  nrow(qry02)


  ## 03: get_galah ---------

  qry03 <- get_galah(save_dir = fs::path(out_dir, "qry03")
                     , data_map = data_map
                     , qry = qry
                     )

  # again, not quite the same number of records
  nrow(qry02)
  nrow(qry03)

  # get_galah removes, via envImport::remap_data_names NULL dates, lat and long
    # see arguments to envImport::remap_data_names
    # filtering qry02 on those columns gives the same result as qry03
  qry02 %>%
    dplyr::filter(!is.na(eventDate)
                  , !is.na(decimalLatitude)
                  , !is.na(decimalLongitude)
                  ) %>%
    nrow()

  # names from data_map
  names(qry02)
  names(qry03)

  ## 04: get_galah with profile -------

  qry04 <- get_galah(save_dir = fs::path(out_dir, "qry04")
                     , data_map = data_map
                     , qry = qry %>%
                       galah::apply_profile(CSDM)
                     )

  # lost some records due to the profile
  nrow(qry04)


  ############################################

  # Combine data --------
  ## get_galah for aoi -------
  bio_all_galah <- get_galah(aoi = envImport::aoi
                             , save_dir = out_dir
                             , data_map = data_map
                             , sub_dir = "bio_all"
                             )

  ## get_tern for aoi --------
  bio_all_tern <- get_tern(aoi = envImport::aoi
                           , save_dir = out_dir
                           , data_map = data_map
                           , sub_dir = "bio_all"
                           )

  ## or using get_data -------
  # to get both galah and tern
  datas <- c("galah", "tern", "gbif")

  # galah and tern already run from above

  temp <- purrr::map(datas
                     , \(x) get_data(x
                                     , save_dir = out_dir
                                     , get_new = FALSE
                                     , aoi = envImport::aoi
                                     , data_map = data_map
                                     , sub_dir = "bio_all"
                                     , previous_key = "0057643-240626123714530"
                                     )
                     )

  ## single dataset --------
  bio_all_files <- fs::dir_ls(fs::path(out_dir, "bio_all")
                              , regexp = "\\.parquet"
                              )

  bio_all <- purrr::map_dfr(bio_all_files, \(x) rio::import(x))


  if(FALSE) {

    # check for misaligned classes
    check <- purrr::map_dfr(temp
                            , \(x) purrr::map(x, class)
                            ) %>%
      purrr::map_dfr(\(x) length(unique(na.omit(x))) == 1) %>%
      tidyr::pivot_longer(everything()) %>%
      dplyr::filter(!value)

    use_schema <- arrow::schema(bio_all)

    use_schema$quantity <- arrow::Field$create("quantity", arrow::string())

    bio_all <- arrow::open_dataset(bio_all_files
                                   , schema = use_schema
                                   ) %>%
      dplyr::collect()

  }


  # 'bio_all' is now the sum of its components
  nrow(bio_all) == sum(purrr::map_dbl(temp, nrow))

  # clean up -------
  # return to original atlas
  galah::galah_config(atlas = old_atlas)

Acanthiza/envImport documentation built on Aug. 14, 2024, 8:18 a.m.