jobs/col.R

# remotes::install_github("cboettig/prov")
library(prov)
library(tidyverse)
library(Hmisc)
library(fs)
devtools::load_all()

in_url <- "https://download.checklistbank.org/col/annual/2022_dwca.zip"
#in_url <- paste0("http://www.catalogueoflife.org/DCA_Export/zip-fixed/", 2021, "-annual.zip")
id <- contentid::store(in_url)
in_file <- contentid::resolve(id)
path <- file.path("data", basename(in_url))
if(!fs::link_exists(path))
  path <- fs::link_create(in_file, path)




# hash-based memoizer for file-based workflow
has_id <- FALSE
if (fs::file_exists("col_schema.json")) {
  #prov <- jsonlite::read_json("schema.json")
  prov <- readLines("col_schema.json")
  has_id <- any(grepl(id, prov))
}

if (!has_id) {
  preprocess_col(path)
}


#code <- c("R/col.R")
prov::write_prov(#data_in = "https://download.checklistbank.org/col/annual/2022_dwca.zip",
#                 code = code, 
                 data_out =  paste0("https://github.com/boettiger-lab/taxadb-cache/raw/master/", 
                                    fs::dir_ls("data/2022/dwc_col", recurse = TRUE)),
                 title = "v22.12_dwc_col",
                 description = "Darwin Core formatted version of Catalogue Of Life Taxonomic Names, created by rOpenSci",
                 license = "http://creativecommons.org/licenses/by/4.0/",
                 identifier = "https://doi.org/10.48580/dfq8",
                 url = "https://www.catalogueoflife.org/",
                 creator = list("type" = "Organization", 
                                name = "Catalogue Of Life",
                                url = "https://www.catalogueoflife.org/",
                                id = "https://www.catalogueoflife.org/"),
                 version = "22.12",
                 issued = Sys.Date(),
                 prov="schema.json",
                 schema="http://schema.org",
                 append=TRUE)


prov::write_prov(#data_in = "https://download.checklistbank.org/col/annual/2022_dwca.zip",
  #                 code = code, 
  data_out =  paste0("https://github.com/boettiger-lab/taxadb-cache/raw/master/", 
                     fs::dir_ls("data/2022/common_col", recurse = TRUE)),
  title = "v22.12_common_col",
  description = "Common Names Catalogue Of Life Taxonomic Names, created by rOpenSci",
  license = "http://creativecommons.org/licenses/by/4.0/",
  identifier = "https://doi.org/10.48580/dfq8",
  url = "https://www.catalogueoflife.org/",
  creator = list("type" = "Organization", 
                 name = "Catalogue Of Life",
                 url = "https://www.catalogueoflife.org/",
                 id = "https://www.catalogueoflife.org/"),
  version = "22.12",
  issued = Sys.Date(),
  prov="schema.json",
  schema="http://schema.org",
  append=TRUE)

source("jsonld-append.R")
boettiger-lab/taxadb-cache documentation built on March 20, 2023, 10:09 p.m.