#' Retrieve records from the MyCoPortal
#' @param taxon character string specifying the taxon name (e.g., species name, family name or higher taxon). To query higher taxa, e.g. on order level, I recommend using \code{\link{mycoportal_hightax}}
#' @param country character string specifying country, e.g., "USA"
#' @param state character string specifying state, e.g., "Massachusetts"
#' @param county character string specifying county, e.g., "Worcester"
#' @param locality character string specifying locality, e.g., "Harvard Forest"
#' @param elevation_from character string, meter, e.g., "1000"
#' @param elevation_to character string, meter
#' @param host character string specifying host species, e.g., "Betula alba"
#' @param collection either "all" or a vector or integers with number corresponding to collection. For a list of collections use function \code{getCollections()}
#' @param taxon_type integer, one of 1 to 5 representing "Family or Scientific Name", "Scientific Name only", "Family Only", "Higher Taxonomy", Common Name"
#' @param north_lat character string, coordinate e.g., "45"
#' @param south_lat character string, coordinate
#' @param west_lon character string, coordinate, e.g., "-72"
#' @param east_lon character string, coordinate
#' @param point_lat character string, coordinate
#' @param point_lon character string, coordinate
#' @param radius character string, km, e.g., "50"
#' @param collector character string specifying collector name
#' @param collector_num character string specifying collector number
#' @param coll_date1 character string specifying collection data from, e.g., "19 August 1926"
#' @param coll_date2 character string specifying collection data from, e.g., "19 August 2018"
#' @param syns logical, if TRUE synonyms from MycoBank and IndexFungorum are searched
#' @param port default is 4445L
#' @param remoteServerAddr default is "localhost
#' @param verbose logical
#' @param screenshot logical, whether screenshot of results should be displayed in Viewer
#' @param browserName character string specifying the browser to use, recommended: "chrome"
#' @param wait numeric specifying the seconds to wait for website to load, recommended 2 for good internet connections;
#' higher otherwise. It would be good to first look up the number of pages for a species and to compare it with the function output to see whether loading times are sufficient.
#'
#' @return x an object of class "\code{\link{records}}" with the following components:
#' \item{nr.records}{A numeric giving the number of records retrieved}
#' \item{citation}{A character string with the recommended citation from the website}
#' \item{query}{A list of the user arguments used}
#' \item{records}{A data.frame with the query records results}
#' \item{db}{A character string specifying the database (currently only MyCoPortal)}
#'
#' @details Interface to the web database MyCoPortal. Query records available by various user specifications.
#' @references \url{http://mycoportal.org/portal/index.php}
#' @references A. N. Miller and S. T. Bates The Mycology Collections Portal (MyCoPortal) (2017) Cryptogamie Mycologie, 37:15-36.
#'
#' @import RSelenium httr RCurl
#' @importFrom XML htmlParse xpathApply xmlValue
#' @importFrom crayon red
#'
#' @author Franz-Sebastian Krah
#'
#' @examples
#' \dontrun{
#' ## Download Amanita muscaria observations and plot visualize data
#' am.dist <- mycoportal(taxon = "Amanita muscaria")
#' plot_distmap(x = am.dist, mapdatabase = "world", interactive = FALSE)
#' plot_distmap(x = am.dist, mapdatabase = "state", interactive = FALSE)
#' plot_distmap(x = am.dist, mapdatabase = "world", interactive = TRUE)
#' plot_datamap(x = am.dist, mapdatabase = "world")
#' plot_recordstreemap(x = am.dist, log = FALSE)
#' }
#' @export
mycoportal <- function(taxon = "Amanita muscaria",
country = "",
state = "",
county = "",
locality = "",
elevation_from = "",
elevation_to = "",
host = "",
taxon_type = 1,
collection = "all",
north_lat = "",
south_lat = "",
west_lon = "",
east_lon = "",
point_lat = "",
point_lon = "",
radius = "",
collector = "",
collector_num = "",
coll_date1 = "",
coll_date2 = "",
syns = TRUE,
verbose = TRUE,
screenshot = TRUE,
port = 4445L,
browserName = "chrome",
remoteServerAddr = "localhost",
wait = 2) {
# TESTS -----------------------------------------------------
# test internet conectivity
if(!url.exists("r-project.org") == TRUE)
stop( "Not connected to the internet. Please create a stable connection and try again." )
if(!is.character(getURL("http://mycoportal.org/portal/index.php")))
stop(" Database is not available : http://mycoportal.org/portal/index.php")
if(taxon_type == "4"){
cat(red("*mycoportal* may be unstable for higher taxon queries.","\n",
"If you encounter stabilitiy issues, try *mycoportal_hightax*"))
}
if(missing(taxon))
stop("At least a species name has to be specified")
if(length(grep("_", taxon))>0)
taxon <- gsub("_", " ", taxon)
## Test if Docker is running
out <- exec_internal("docker", args = c("ps", "-q"), error = FALSE)
if(out$status != 0)
stop("Docker not available. Please start Docker! https://www.docker.com")
## Wait should not be smaller than 2 seconds
wait <- ifelse(wait<=2, 2, wait)
# Initialize session -----------------------------------------------------
if(verbose){
cat("Initialize server\n")
}
start_docker_try(verbose = ifelse(verbose >= 1, TRUE, FALSE), max_attempts = 5, wait = wait)
## Set up remote
dr <- remoteDriver(remoteServerAddr = "localhost", port = port, browserName = browserName)
Sys.sleep(wait-1)
## Open connection; run server
out <- capture.output(dr$open(silent = FALSE))
Sys.sleep(2)
if(verbose > 1)
cat(out)
if(dr$getStatus()$ready)
cat(dr$getStatus()$message[1], "\n")
if(!dr$getStatus()$ready)
stop("Remote server is not running \n Please check if Docker is running!")
# Open Website -----------------------------------------------------------
cat(ifelse(verbose, "Open website\n", ""))
## Navigate to website
if(collection == "all"){ #usually it is convenient to query all collections
dr$navigate("http://mycoportal.org/portal/collections/harvestparams.php")
Sys.sleep(wait+1)
}else{ ## however, user may want specific collections
dr$navigate("http://mycoportal.org/portal/collections/index.php")
Sys.sleep(wait+1)
## uncheck all collections
button <- dr$findElement('xpath', "//*[@id='dballcb']")
button$clickElement()
ctn <- getCollections()
nonus <- grep("Addis", ctn)
## check the desired ones
for(i in collection){
button <- dr$findElement('xpath', paste0("//*[@id='specobsdiv']/form/div[",
ifelse(i < nonus, 2,3),
"]/table/tbody/tr[",
i,
"]/td[2]/input"))
button$clickElement()
Sys.sleep(1)
}
}
## Enter user query ------------------------------------------------------------------
cat(ifelse(verbose, "Send user query to website:\n", ""))
# input arguments (will be exportet)
argg <- do.call(c, as.list(match.call()))
query <- argg <- argg[-1]
argg <- do.call(c, argg)
print(argg)
## Fill elements: user defined query input
## Checkbox: Show results in table view
# [this has to be checked always, otherwise table download will not work]
button <- dr$findElement('xpath', "//*[@id='showtable']")
button$clickElement()
## Checkbox: Include Synonyms from Taxonomic Thesaurus
if(syns){
button <- dr$findElement('xpath', "//*[@id='harvestparams']/div[3]/span/input")
button$clickElement()
}
## Taxon type
webElem <- dr$findElement(using = 'xpath', paste0("//*[@id='taxontype']/option[", taxon_type ,"]"))
webElem$clickElement()
## Taxon
webElem <- dr$findElement('id', "taxa")
webElem$sendKeysToElement(list(taxon))
## Country
webElem <- dr$findElement('id', "country")
webElem$sendKeysToElement(list(country))
## State
webElem <- dr$findElement('id', "state")
webElem$sendKeysToElement(list(state))
## County
webElem <- dr$findElement('id', "county")
webElem$sendKeysToElement(list(county))
## Locality
webElem <- dr$findElement('id', "locality")
webElem$sendKeysToElement(list(locality))
## Elevation lower border
webElem <- dr$findElement('id', "elevlow")
webElem$sendKeysToElement(list(elevation_from))
## Elevation upper border
webElem <- dr$findElement('id', "elevhigh")
webElem$sendKeysToElement(list(elevation_to))
## Host (Plant species name)
webElem <- dr$findElement('id', "assochost")
webElem$sendKeysToElement(list(host))
##### Latitude and Longitude
## North latitude border
webElem <- dr$findElement('id', "upperlat")
webElem$sendKeysToElement(list(north_lat))
## South latitude border
webElem <- dr$findElement('id', "bottomlat")
webElem$sendKeysToElement(list(south_lat))
## West longitude border
webElem <- dr$findElement('id', "leftlong")
webElem$sendKeysToElement(list(west_lon))
## East longitude border
webElem <- dr$findElement('id', "rightlong")
webElem$sendKeysToElement(list(east_lon))
##### Point-Radius Search
## Latitude of point
webElem <- dr$findElement('id', "pointlat")
webElem$sendKeysToElement(list(point_lat))
## Longitude of point
webElem <- dr$findElement('id', "pointlong")
webElem$sendKeysToElement(list(point_lon))
## Radius in km
webElem <- dr$findElement('id', "radiustemp")
webElem$sendKeysToElement(list(radius))
##### Collector Criteria
## Collector name
webElem <- dr$findElement('id', "collector")
webElem$sendKeysToElement(list(collector))
## Collector number
webElem <- dr$findElement('id', "collnum")
webElem$sendKeysToElement(list(collector_num))
## Date record was found (from)
webElem <- dr$findElement('id', "eventdate1")
webElem$sendKeysToElement(list(coll_date1))
## Date record was found (to)
webElem <- dr$findElement('id', "eventdate2")
webElem$sendKeysToElement(list(coll_date2))
# Press Enter -----------------------------------------------------
webElem$sendKeysToElement(list(key = "enter"))
Sys.sleep(wait+2)
if(screenshot)
dr$screenshot(display = TRUE, useViewer = TRUE)
# Test whether results were found --------------------------------
res <- htmlParse(dr$getPageSource()[[1]])
res <- xpathApply(res, "//div", xmlValue)
res <- grep("No records found matching the query", res)
if(length(res)>0){
# close server
dr$close()
## stop docker
cat(ifelse(verbose, "Stop Server\n", ""))
system(
"docker stop $(docker ps -a -q)",
ignore.stdout = TRUE,
ignore.stderr = TRUE
)
cat(red(paste0(paste(rep("#", 43), collapse = ""),
"\n### No records for this query ###\n",
paste(rep("#", 43), collapse = ""))))
opt <- options(show.error.messages=FALSE)
on.exit(options(opt))
return(records(nr.records = 0,
citation = "Not applicable",
query = query,
records = data.frame(NULL),
db = "MyCoPortal"))
}
# Download tables -------------------------------------------------
nr.p <- nr_pages(dr)
cat(ifelse(verbose, paste("Downloading", nr.p, "pages\n"), ""))
cat(red("Make sure you have a stable internet connection!\n"))
## Download tables in page-wise batches
tabs <- list()
for (i in 0:(nr.p-1)) {
tabs[[i + 1]] <- retry_next_page_download(
z = i,
remdriver = dr,
max_attempts = 5,
wait_seconds = wait
)
}
## Rbind all tables
tabs <- do.call(rbind, tabs)
cat(nrow(tabs), "records were downloaded \n")
## Add coordinates as lon lat column
tabs$coord <- stringr::str_extract(tabs$Locality, "-?\\d*\\.\\d*\\s\\-?\\d*\\.\\d*")
coords <- data.frame(do.call(rbind, strsplit(tabs$coord , " ")))
names(coords) <- c("lat", "lon")
coords <- suppressWarnings(apply(coords, 2, function(x) as.numeric(as.character(x))))
tabs <- data.frame(tabs, coords)
tabs$spec <- stringr::word(tabs$Scientific.Name, 1,2)
# Close Website and Server ------------------------------------------------
cat(ifelse(verbose, "Close website and quit server\n", ""))
## Close Website
dr$close()
## Stop docker
stop_docker()
## Return downloaded query results as data.frame
if(collection != "all"){
ctn <- getCollections()
collection <- ctn[collection]
collection <- paste(collection, collapse = "; ")
}else{
collection <- "All available collections used"
}
cit <-
paste0(
"Biodiversity occurrence data published by: <",
collection,
"> (Accessed through MyCoPortal Data Portal, http//:mycoportal.org/portal/index.php, ",
Sys.Date(),
")"
)
records(
nr.records = nrow(tabs),
citation = cit,
query = query,
records = tabs,
db = "MyCoPortal"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.