tests/testthat/test-repoFinder.R

requireNamespace("readr")
requireNamespace("BeeBDC")


# make some data files for repoFinder to search and save the files 
## ALA data
ALA_data <- dplyr::tribble(
              ~scientificName,  ~family, ~subfamily, ~genus, ~subgenus, ~subspecies, ~species, ~specificEpithet, ~infraspecificEpithet, ~acceptedNameUsage, ~taxonRank, ~scientificNameAuthorship, ~identificationQualifier, ~higherClassification, ~identificationReferences, ~typeStatus, ~previousIdentifications, ~identifiedBy, ~dateIdentified, ~decimalLatitude, ~decimalLongitude, ~verbatimLatitude, ~verbatimLongitude,       ~stateProvince,    ~country, ~continent,                        ~locality, ~island, ~county, ~municipality, ~countryCode, ~`dcterms:license`,             ~eventDate, ~eventTime, ~day, ~month, ~year,       ~basisOfRecord, ~`dcterms:type`, ~occurrenceStatus, ~recordNumber, ~recordedBy, ~eventID, ~samplingProtocol, ~samplingEffort, ~individualCount, ~organismQuantity, ~coordinatePrecision, ~coordinateUncertaintyInMeters, ~spatiallyValid, ~catalogNumber, ~datasetID, ~institutionCode, ~datasetName, ~otherCatalogNumbers,                                          ~occurrenceID, ~collectionID, ~verbatimEventDate, ~associatedTaxa, ~associatedOrganisms, ~fieldNotes,   ~sex, ~rights, ~rightsHolder, ~`dcterms:accessRights`, ~associatedReferences, ~`dcterms:bibliographicCitation`, ~references, ~informationWithheld, ~isDuplicateOf,                                                                                                                                                                                                                                      ~assertions,        ~occurrenceYear, ~duplicateStatus, ~associatedOccurrences, ~locationRemarks,
                     "APIDAE", "Apidae",         NA,     NA,        NA,          NA,       NA,      "australis",                    NA,                 NA,   "family",                        NA,                       NA,                    NA,                        NA,          NA,                       NA,            NA,              NA,            -22.7,             133.4,                NA,                 NA, "Northern Territory", "Australia",         NA,                 "60 k S Ti Tree",      NA,      NA,            NA,         "AU",            "CC-BY", "1996-10-02T14:00:00Z",         NA,   3L,    10L, 1996L, "PRESERVED_SPECIMEN",              NA,         "PRESENT",            NA,          NA,       NA,                NA,              NA,               NA,                NA,                   NA,                             NA,            TRUE,      "T157061",         NA,             "QM",           NA,             2089363L, "urn:lsid:ozcam.taxonomy.org.au:QM.Entomology:T157061",            NA,                 NA,              NA,                   NA,          NA,     NA,      NA,            NA,                      NA,                    NA,                               NA,          NA,                   NA,             NA, "COORDINATE_UNCERTAINTY_METERS_INVALID | MISSING_GEOREFERENCEDBY | MISSING_GEOREFERENCEPROTOCOL | MISSING_GEOREFERENCESOURCES | MISSING_GEOREFERENCEVERIFICATIONSTATUS | MISSING_GEOREFERENCE_DATE | MISSING_TAXONRANK | TAXON_MATCH_HIGHERRANK", "1995-12-31T13:00:00Z",               NA,                     NA,               NA,
                     "APIDAE", "Apidae",         NA,     NA,        NA,          NA,       NA,     "essingtoni",                    NA,                 NA,   "family",                        NA,                       NA,                    NA,                        NA,          NA,                       NA,            NA,              NA,            -15.5,             128.7,                NA,                 NA,  "Western Australia", "Australia",         NA, "Middle Springs 15k N Kunanurra",      NA,      NA,            NA,         "AU",            "CC-BY", "1985-09-11T14:00:00Z",         NA,  12L,     9L, 1985L, "PRESERVED_SPECIMEN",              NA,         "PRESENT",            NA,          NA,       NA,                NA,              NA,               NA,                NA,                   NA,                             NA,            TRUE,      "T157059",         NA,             "QM",           NA,             2089361L, "urn:lsid:ozcam.taxonomy.org.au:QM.Entomology:T157059",            NA,                 NA,              NA,                   NA,          NA, "MALE",      NA,            NA,                      NA,                    NA,                               NA,          NA,                   NA,             NA, "COORDINATE_UNCERTAINTY_METERS_INVALID | MISSING_GEOREFERENCEDBY | MISSING_GEOREFERENCEPROTOCOL | MISSING_GEOREFERENCESOURCES | MISSING_GEOREFERENCEVERIFICATIONSTATUS | MISSING_GEOREFERENCE_DATE | MISSING_TAXONRANK | TAXON_MATCH_HIGHERRANK", "1984-12-31T13:00:00Z",               NA,                     NA,               NA,
                     "APIDAE", "Apidae",         NA,     NA,        NA,          NA,       NA,        "cassiae",                    NA,                 NA,   "family",                        NA,                       NA,                    NA,                        NA,          NA,                       NA,            NA,              NA,            -23.7,             149.9,                NA,                 NA,         "Queensland", "Australia",         NA,                       "Duaringa",      NA,      NA,            NA,         "AU",            "CC-BY", "1987-09-06T14:00:00Z",         NA,   7L,     9L, 1987L, "PRESERVED_SPECIMEN",              NA,         "PRESENT",            NA,          NA,       NA,                NA,              NA,               NA,                NA,                   NA,                             NA,            TRUE,      "T157064",         NA,             "QM",           NA,             2089366L, "urn:lsid:ozcam.taxonomy.org.au:QM.Entomology:T157064",            NA,                 NA,              NA,                   NA,          NA, "MALE",      NA,            NA,                      NA,                    NA,                               NA,          NA,                   NA,             NA, "COORDINATE_UNCERTAINTY_METERS_INVALID | MISSING_GEOREFERENCEDBY | MISSING_GEOREFERENCEPROTOCOL | MISSING_GEOREFERENCESOURCES | MISSING_GEOREFERENCEVERIFICATIONSTATUS | MISSING_GEOREFERENCE_DATE | MISSING_TAXONRANK | TAXON_MATCH_HIGHERRANK", "1986-12-31T13:00:00Z",               NA,                     NA,               NA,
                     "APIDAE", "Apidae",         NA,     NA,        NA,          NA,       NA,      "australis",                    NA,                 NA,   "family",                        NA,                       NA,                    NA,                        NA,          NA,                       NA,            NA,              NA,            -23.7,             149.9,                NA,                 NA,         "Queensland", "Australia",         NA,                       "Duaringa",      NA,      NA,            NA,         "AU",            "CC-BY", "1996-10-12T14:00:00Z",         NA,  13L,    10L, 1996L, "PRESERVED_SPECIMEN",              NA,         "PRESENT",            NA,          NA,       NA,                NA,              NA,               NA,                NA,                   NA,                             NA,            TRUE,      "T157060",         NA,             "QM",           NA,             2089362L, "urn:lsid:ozcam.taxonomy.org.au:QM.Entomology:T157060",            NA,                 NA,              NA,                   NA,          NA,     NA,      NA,            NA,                      NA,                    NA,                               NA,          NA,                   NA,             NA, "COORDINATE_UNCERTAINTY_METERS_INVALID | MISSING_GEOREFERENCEDBY | MISSING_GEOREFERENCEPROTOCOL | MISSING_GEOREFERENCESOURCES | MISSING_GEOREFERENCEVERIFICATIONSTATUS | MISSING_GEOREFERENCE_DATE | MISSING_TAXONRANK | TAXON_MATCH_HIGHERRANK", "1995-12-31T13:00:00Z",               NA,                     NA,               NA
              )

write.csv(ALA_data, paste0(tempdir(), "/data.csv"), row.names = FALSE)


## GBIF data - this is data from not GBIF because GBIF data refused to be copy-pasted
GBIF_data <- dplyr::tribble(
  ~studyID,           ~siteID, ~year,     ~date,                           ~animalID, ~abundance, ~abundanceMethod, ~samplingMethod, ~numCensus, ~samplingIntensity,                                                                                                                                                                                   ~censusType,     ~fieldDist,  ~flowering, ~decimalLatitude, ~decimalLongitude,                ~studyLocation, ~habitatType,                ~siteDescription,
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",             "Agapostemon angelicus",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",               "Agapostemon texanus",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",            "Anthophora californica",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",                "Ceratina shinnersi",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",                  "Ceratina strenua",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22", "Lasioglossum (Dialictus) sp.TX-14",        12L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden"
) 

write.table(GBIF_data, paste0(tempdir(), "/occurrence.txt"), row.names = FALSE)


## iDigBio data - this is data from not iDigBio because iDigBio data refused to be copy-pasted
iDigBio_data <- dplyr::tribble(
  ~studyID,           ~siteID, ~year,     ~date,                           ~animalID, ~abundance, ~abundanceMethod, ~samplingMethod, ~numCensus, ~samplingIntensity,                                                                                                                                                                                   ~censusType,     ~fieldDist,  ~flowering, ~decimalLatitude, ~decimalLongitude,                ~studyLocation, ~habitatType,                ~siteDescription,
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",             "Agapostemon angelicus",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",               "Agapostemon texanus",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",            "Anthophora californica",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",                "Ceratina shinnersi",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22",                  "Ceratina strenua",         1L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden",
  "Ball01", "Bastrop Gardens", 2013L, "2013/5/22", "Lasioglossum (Dialictus) sp.TX-14",        12L,      "Abundance",      "Pan Trap",         1L,                24L, "50 pan traps (painted blue, yellow or left white as in LeBuhn et al) placed in x formation 1 m apart from center of the 50 m2 plot, vegetation measured as described for blue vane trapped.", "not measured", "flowering",        30.157397,        -97.491703, "Bastrop, Bastrop County, TX",           NA, "Agriculture. Community garden"
) 

write.csv(iDigBio_data, paste0(tempdir(), "/occurrence_raw.csv"), row.names = FALSE)


## SCAN data
SCAN_data <- dplyr::tribble(
                     ~id, ~institutionCode, ~collectionCode,         ~ownerInstitutionCode,                          ~collectionID,      ~basisOfRecord,                  ~occurrenceID,      ~catalogNumber, ~otherCatalogNumbers,   ~kingdom,      ~phylum,    ~class,        ~order,  ~family,  ~scientificName, ~taxonID, ~scientificNameAuthorship, ~genus, ~specificEpithet, ~taxonRank, ~infraspecificEpithet,  ~identifiedBy, ~dateIdentified, ~identificationReferences, ~identificationRemarks, ~taxonRemarks, ~identificationQualifier, ~typeStatus,    ~recordedBy, ~recordNumber, ~eventDate, ~year, ~month, ~day, ~startDayOfYear, ~endDayOfYear,        ~verbatimEventDate, ~occurrenceRemarks, ~habitat, ~fieldNumber, ~informationWithheld, ~dataGeneralizations, ~dynamicProperties, ~associatedTaxa, ~associatedOccurrences, ~reproductiveCondition, ~establishmentMeans, ~lifeStage,     ~sex, ~individualCount, ~samplingProtocol, ~samplingEffort, ~preparations,        ~country, ~stateProvince, ~county, ~municipality,  ~locality,                  ~locationRemarks, ~decimalLatitude, ~decimalLongitude, ~geodeticDatum, ~coordinateUncertaintyInMeters, ~verbatimCoordinates, ~georeferencedBy, ~georeferenceProtocol, ~georeferenceSources, ~georeferenceVerificationStatus, ~georeferenceRemarks, ~minimumElevationInMeters, ~maximumElevationInMeters, ~minimumDepthInMeters, ~maximumDepthInMeters, ~verbatimDepth, ~verbatimElevation, ~disposition, ~language, ~recordEnteredBy,       ~modified,                                             ~rights,                                     ~rightsHolder, ~accessRights,                                       ~recordId,                                                                        ~references,
               13775122L,            "CAS",        "ANTWEB",        "UCDC, Davis, CA, USA", "dc646193-9811-40ce-872a-f005f7dc059a", "PreservedSpecimen",     "CAS:ANTWEB:casent0106100",     "casent0106100",                   NA, "Animalia", "Arthropoda", "Insecta", "Hymenoptera", "apidae", "apis mellifera",  235783L,          "Linnaeus, 1758", "Apis",      "mellifera",         NA,                    NA,             NA,              NA,                        NA,                     NA,            NA,                       NA,          NA,    "P.S. Ward",            NA,  "6/28/05", 2005L,     6L,  28L,            179L,            NA, "28 Jun 2005/29 Jun 2005",                 NA,       NA,   "PSW15443",                   NA,                   NA,                 NA,              NA,                     NA,                     NA,                  NA,         NA, "worker",               NA,                NA,              NA,         "pin", "United States",   "California",      NA,            NA,    "Davis", "coordinates obtained from Label",            38.54,        -121.75667,             NA,                             NA,                   NA,               NA,                    NA,                   NA,                              NA,                   NA,                       15L,                        NA,                    NA,                    NA,             NA,                 NA,           NA,        NA,               NA, "4/30/21 14:36", "http://creativecommons.org/publicdomain/zero/1.0/", "The California Academy of Sciences - AntWeb.org",            NA, "urn:uuid:46a46727-6535-4e70-88e7-a42c98f806ed", "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=13775122",
               13775123L,            "CAS",        "ANTWEB",        "UCDC, Davis, CA, USA", "dc646193-9811-40ce-872a-f005f7dc059a", "PreservedSpecimen", "CAS:ANTWEB:casent0106100-d01", "casent0106100-d01",                   NA, "Animalia", "Arthropoda", "Insecta", "Hymenoptera", "apidae", "apis mellifera",  235783L,          "Linnaeus, 1758", "Apis",      "mellifera",         NA,                    NA,             NA,              NA,                        NA,                     NA,            NA,                       NA,          NA,    "P.S. Ward",            NA,  "6/28/05", 2005L,     6L,  28L,            179L,            NA, "28 Jun 2005/29 Jun 2005",                 NA,       NA,   "PSW15443",                   NA,                   NA,                 NA,              NA,                     NA,                     NA,                  NA,         NA, "worker",               NA,                NA,              NA,   "100% EtOH", "United States",   "California",      NA,            NA,    "Davis", "coordinates obtained from Label",            38.54,        -121.75667,             NA,                             NA,                   NA,               NA,                    NA,                   NA,                              NA,                   NA,                       15L,                        NA,                    NA,                    NA,             NA,                 NA,           NA,        NA,               NA, "4/30/21 14:36", "http://creativecommons.org/publicdomain/zero/1.0/", "The California Academy of Sciences - AntWeb.org",            NA, "urn:uuid:d6ff3ddb-4695-4aaf-ab89-c251ab2fc7e6", "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=13775123",
               13775124L,            "CAS",        "ANTWEB",        "UCDC, Davis, CA, USA", "dc646193-9811-40ce-872a-f005f7dc059a", "PreservedSpecimen", "CAS:ANTWEB:casent0106100-d11", "casent0106100-d11",                   NA, "Animalia", "Arthropoda", "Insecta", "Hymenoptera", "apidae", "apis mellifera",  235783L,          "Linnaeus, 1758", "Apis",      "mellifera",         NA,                    NA,             NA,              NA,                        NA,                     NA,            NA,                       NA,          NA,    "P.S. Ward",            NA,  "6/28/05", 2005L,     6L,  28L,            179L,            NA, "28 Jun 2005/29 Jun 2005",                 NA,       NA,   "PSW15443",                   NA,                   NA,                 NA,              NA,                     NA,                     NA,                  NA,         NA, "worker",               NA,                NA,              NA,   "100% EtOH", "United States",   "California",      NA,            NA,    "Davis", "coordinates obtained from Label",            38.54,        -121.75667,             NA,                             NA,                   NA,               NA,                    NA,                   NA,                              NA,                   NA,                       15L,                        NA,                    NA,                    NA,             NA,                 NA,           NA,        NA,               NA, "4/30/21 14:36", "http://creativecommons.org/publicdomain/zero/1.0/", "The California Academy of Sciences - AntWeb.org",            NA, "urn:uuid:4504a6e6-ab1c-4de7-ac43-a3588a63ae90", "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=13775124",
               13852774L,            "CAS",        "ANTWEB", "CAS, San Francisco, CA, USA", "dc646193-9811-40ce-872a-f005f7dc059a", "PreservedSpecimen",     "CAS:ANTWEB:casent0249086",     "casent0249086",                   NA, "Animalia", "Arthropoda", "Insecta", "Hymenoptera", "apidae", "apis mellifera",  235783L,          "Linnaeus, 1758", "Apis",      "mellifera",         NA,                    NA, "C. Schöning",              NA,                        NA,                     NA,            NA,                       NA,          NA, "C. Schöning",            NA,   "9/1/09", 2009L,     9L,   1L,            244L,            NA,                "1-Sep-09",                 NA,       NA,     "N109-9",                   NA,                   NA,                 NA,              NA,                     NA,                     NA,                  NA,         NA, "worker",               NA,                NA,              NA,         "pin",         "Kenya",             NA,      NA,            NA, "Chogoria",                                NA,         -0.20222,          37.50083,             NA,                             NA,                   NA,               NA,                    NA,                   NA,                              NA,                   NA,                     2277L,                        NA,                    NA,                    NA,             NA,                 NA,           NA,        NA,               NA, "4/30/21 15:00", "http://creativecommons.org/publicdomain/zero/1.0/", "The California Academy of Sciences - AntWeb.org",            NA, "urn:uuid:964139d3-fb07-4a57-9b7c-e9a5830f4c82", "https://scan-bugs.org:443/portal/collections/individual/index.php?occid=13852774"
               )

write.csv(SCAN_data, paste0(tempdir(), "/occurrences.csv"), row.names = FALSE)



# path to the test files goes here - load the thing, run the function
testOut <- BeeBDC::repoFinder(path = paste0(tempdir()))


# get variable type for each item in list
ALAout <- testOut[[1]]
GBIFout <- testOut[[2]]
iDigBioout <- testOut[[3]]
SCANout <- testOut[[4]]


# expected results - should all be present and all character values
testthat::test_that("repoFinder ALA expected", {
  testthat::expect_type(ALAout, "character")
})

testthat::test_that("repoFinder GBIF expected", {
  testthat::expect_type(GBIFout, "character")
})

testthat::test_that("repoFinder iDigBio expected", {
  testthat::expect_type(iDigBioout, "character")
})

testthat::test_that("repoFinder SCAN expected", {
  testthat::expect_type(SCANout, "character")
})

testthat::test_that("repoFinder expected class", {
  testthat::expect_type(testOut, "list")
})

Try the BeeBDC package in your browser

Any scripts or data that you put into this service are public.

BeeBDC documentation built on Nov. 4, 2024, 9:06 a.m.