get_data | R Documentation |
data_name
dataImport data, running get_data_name
to requery original data source,
if get_new
. Data is saved to (and imported from)
file.path(save_dir, data_name, "paste0(data_name,"_raw.rds"))
. save_dir
and get_new
can be passed via ...
to get_data_name
, otherwise, default
values from get_data_name
are used
(respectively: FALSE
and
here::here("out", "ds", data_name, paste0(data_name, "_raw.rds"))
)
get_data(data_name, ...)
data_name |
Character. Name of data source. e.g. 'tern' or 'galah'. |
... |
Passed to |
Dataframe, either loaded from save_dir
or from a new query to
data_name
. If new data is queried, .rds results file will be created,
overwriting if necessary. Timing and number of records log in save_dir
.
Other Help with combining data sources:
remap_data_names()
# library("envImport")
out_dir <- file.path(system.file(package = "envImport"), "examples")
## config -------
old_atlas <- galah::galah_config()$atlas$region
galah::galah_config(email = Sys.getenv("GBIF_email")
, username = Sys.getenv("GBIF_user")
, password = Sys.getenv("GBIF_pwd")
, caching = TRUE
, download_reason_id = 10 # testing
)
galah::galah_config(atlas = "GBIF")
# Australian Bustards--------
# in the year 2020
## 01: atlas = gbif --------
save_file <- fs::path(out_dir, "qry01", "qry01.rds")
if(!file.exists(save_file)) {
qry01 <- galah::galah_call() %>%
galah::galah_identify("Ardeotis australis") %>%
galah::galah_filter(year == 2000) %>%
galah::atlas_occurrences() %>%
dplyr::collect()
rio::export(qry01
, save_file
)
} else {
qry01 <- rio::import(save_file)
}
## 02: atlas = ala ----------
galah::galah_config(atlas = "ALA")
galah::galah_config(email = Sys.getenv("ALA_email"))
# 'qry' used for both qry02 and qry03
qry <- galah::galah_call() %>%
galah::galah_identify("Ardeotis australis") %>%
galah::galah_filter(year == 2000)
save_file <- fs::path(out_dir, "qry02", "qry02.rds")
if(!file.exists(save_file)) {
qry02 <- qry %>%
galah::atlas_occurrences()
rio::export(qry02
, save_file
)
} else {
qry02 <- rio::import(save_file
, setclass = "tibble"
)
}
# similar (but not identical) # of records
nrow(qry01)
nrow(qry02)
## 03: get_galah ---------
qry03 <- get_galah(save_dir = fs::path(out_dir, "qry03")
, data_map = data_map
, qry = qry
)
# again, not quite the same number of records
nrow(qry02)
nrow(qry03)
# get_galah removes, via envImport::remap_data_names NULL dates, lat and long
# see arguments to envImport::remap_data_names
# filtering qry02 on those columns gives the same result as qry03
qry02 %>%
dplyr::filter(!is.na(eventDate)
, !is.na(decimalLatitude)
, !is.na(decimalLongitude)
) %>%
nrow()
# names from data_map
names(qry02)
names(qry03)
## 04: get_galah with profile -------
qry04 <- get_galah(save_dir = fs::path(out_dir, "qry04")
, data_map = data_map
, qry = qry %>%
galah::apply_profile(CSDM)
)
# lost some records due to the profile
nrow(qry04)
############################################
# Combine data --------
## get_galah for aoi -------
bio_all_galah <- get_galah(aoi = envImport::aoi
, save_dir = out_dir
, data_map = data_map
, sub_dir = "bio_all"
)
## get_tern for aoi --------
bio_all_tern <- get_tern(aoi = envImport::aoi
, save_dir = out_dir
, data_map = data_map
, sub_dir = "bio_all"
)
## or using get_data -------
# to get both galah and tern
datas <- c("galah", "tern", "gbif")
# galah and tern already run from above
temp <- purrr::map(datas
, \(x) get_data(x
, save_dir = out_dir
, get_new = FALSE
, aoi = envImport::aoi
, data_map = data_map
, sub_dir = "bio_all"
, previous_key = "0057643-240626123714530"
)
)
## single dataset --------
bio_all_files <- fs::dir_ls(fs::path(out_dir, "bio_all")
, regexp = "\\.parquet"
)
bio_all <- purrr::map_dfr(bio_all_files, \(x) rio::import(x))
if(FALSE) {
# check for misaligned classes
check <- purrr::map_dfr(temp
, \(x) purrr::map(x, class)
) %>%
purrr::map_dfr(\(x) length(unique(na.omit(x))) == 1) %>%
tidyr::pivot_longer(everything()) %>%
dplyr::filter(!value)
use_schema <- arrow::schema(bio_all)
use_schema$quantity <- arrow::Field$create("quantity", arrow::string())
bio_all <- arrow::open_dataset(bio_all_files
, schema = use_schema
) %>%
dplyr::collect()
}
# 'bio_all' is now the sum of its components
nrow(bio_all) == sum(purrr::map_dbl(temp, nrow))
# clean up -------
# return to original atlas
galah::galah_config(atlas = old_atlas)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.