R/gbif.R

# Author: Robert J. Hijmans
# Date : December 2009-2011
# Version 1.0
# Licence GPL v3

# 2011-12-04
# implemented trycatch to deal with poor response from GBIF server
# suggestion and changed code provided by John Baumgartner

# 2013-01-15
# translate ISO2 codes to full country names
# add "cloc"

#2014-03-08
# new version, using the json API


.getExtGBIF <- function(ext) {
	if (!is.null(ext)) { 
		ext <- round(extent(ext), 5)
		global <- extent(-180,180,-90,90)
		ex <- intersect(ext, global)
		if (!is.null(ex)) {
			ex <- paste0('&decimalLatitude=', ex@ymin,',', ex@ymax, '&decimalLongitude=', ex@xmin, ',', ex@xmax)
		} else {
			warning('invalid extent')
		}
	} else {
		ex <- NULL
	}
	return(ex)
} 

.fixNameGBIF <- function(genus, species) {
	genus <- trim(genus)
	species <- trim(species)
	gensp <- paste(genus, species)
	spec <- gsub("   ", " ", species) 
	spec <- gsub("  ", " ", spec) 	
	spec <- gsub(" ", "%20", spec)  # for genus species var. xxx
	spec <- paste(genus, '+', spec, sep='')
	return(spec)
}


gbif <- function(genus, species='', ext=NULL, args=NULL, geo=TRUE, sp=FALSE, removeZeros=FALSE, download=TRUE, ntries=5, nrecs=300, start=1, end=Inf) {
	
	
	if (! requireNamespace('jsonlite')) { stop('You need to install the jsonlite package to use this function') }

	tmpfile <- paste(tempfile(), '.json', sep='')
	ex <- .getExtGBIF(ext)
	spec <- .fixNameGBIF(genus, species)
	if (sp) geo <- TRUE
	if (geo) { cds <- '&coordinatestatus=true' } else { cds <- '' }

	base <- "http://api.gbif.org/v1/occurrence/search?"
	
	if (!is.null(args)) {
		args <- trim(as.character(args))
		args <- paste('&', paste(args, collapse='&'), sep='')
	}
	
	ntries <- min(max(ntries, 1), 100)

	url1 <- paste(base, "scientificname=", spec, '&limit=1', cds, ex, args, sep='')
	test <- try (download.file(url1, tmpfile, quiet=TRUE))
	json <- scan(tmpfile, what='character', quiet=TRUE, sep='\n',  encoding = "UTF-8")
	x <- jsonlite::fromJSON(json)
	if (!download) {
		if (is.null(x$count)) {
			return(0)
		} else {
			return(x$count)
		}
	} else {
		cnt <- ifelse(is.null(x$count), 0, x$count)
		message(cnt, ' records found')
		if (cnt == 0) {
			return(NULL)
		}
		if (cnt > 200000) {
			stop('The number of records is larger than the maximum for download via this service (200,000)')
		}		
	}

	
	
	start <- max(1, start)
	stopifnot(start <= end)
	nrecs <- min(max(nrecs, 1), 300)
	url1 <- paste(base, "scientificname=", spec, '&limit=', format(nrecs, scientific=FALSE), cds, ex, args, sep='')
	
	g <- list()
	breakout <- FALSE
	np <- i <- 1
	while (TRUE) {
		if (start+nrecs >= end) {
			nrecs <- end - start + 1
			url1 <- paste(base, "scientificname=", spec, '&limit=', format(nrecs, scientific=FALSE), cds, ex, args, sep='')
			breakout <- TRUE
		}	
	
		aurl <- paste(url1, '&offset=', format(start-1, scientific=FALSE), sep='')
		
		if (np > 20) {
			np <- 1
			message('')
		}
		message(paste(start-1, '-', sep=''), appendLF = FALSE) 
		flush.console()
		tries <- 0
        #======= if download fails due to server problems, keep trying  =======#
        while (TRUE) {
			tries <- tries + 1
			if (tries > ntries) {
				warning('GBIF did not return the data in ', ntries, '  tries for:')
				print(aurl)
				breakout <- TRUE
				break
			}
			test <- try (download.file(aurl, tmpfile, quiet=TRUE))
			if (class(test) == 'try-error') {
				print('download failure, trying again...')
			} else {
				json <- scan(tmpfile, what='character', quiet=TRUE, sep='\n',  encoding = "UTF-8")
				json <- chartr('\a\v', '  ', json)
				x <- jsonlite::fromJSON(json)
				if (is.null(x$count)) {
					x$count <- 0
					if (i == 1) {
						warning('no records found')
						break
					} else {
						break
					}
				}
				r <- x$results
				r <- r[, ! sapply(r, class) %in% c('data.frame', 'list')]
				rownames(r) <- NULL
				g[[i]] <- r
				break
			}
	    }
		start <- start + nrecs
		i <- i + 1
		if (breakout) break
		if (x$endOfRecords) break
	}
	
	message(min(end, x$count), ' records downloaded')

	if (length(g) == 0) {
		return(NULL)
	} else if (length(g) == 1) {
		z <- g[[1]]
	} else {
		z <- do.call(bind, g)
	}
	cn <- colnames(z)
	cn <- gsub('decimalLatitude', 'lat', cn)
	cn <- gsub('decimalLongitude', 'lon', cn)
	cn <- gsub('stateProvince', 'adm1', cn)
	cn <- gsub('county', 'adm2', cn)
	cn <- gsub('countryCode', 'ISO2', cn)
	cn <- gsub('country', 'fullCountry', cn)
	colnames(z) <- cn

	if (('lat' %in% cn) & ('lon' %in% cn)) {
		z[,'lon'] <- gsub(',', '.', z[,'lon'])
		z[,'lat'] <- gsub(',', '.', z[,'lat'])
		z[,'lon'] <- as.numeric(z[,'lon'])
		z[,'lat'] <- as.numeric(z[,'lat'])
		k <- apply(z[ ,c('lon', 'lat')], 1, function(x) isTRUE(any(x==0)))
		
		if (removeZeros) {
			if (geo) {
				z <- z[!k, ]
			} else {
				z[k, c('lat', 'lon')] <- NA 
			}
		} else {
			z[k, c('lat', 'lon')] <- NA 
		}
	} else {
		sp <- FALSE
	}
	
	if (nrow(z) > 0) {
	
		if ('ISO2' %in% cn) {
			iso <- ccodes()
			i <- match(z$ISO2, iso[, 'ISO2'])
			z$country <- iso[i, 1]
		}
		
		vrs <- c('locality', 'adm1', 'adm2', 'country', 'continent') 
		vrs <- vrs[vrs %in% colnames(z)]
		if (length(vrs) > 0) {
			fullloc <- trim(as.matrix(z[, vrs]))
			fullloc <- apply(fullloc, 1, function(x) paste(x, collapse=', '))
			fullloc <- gsub("NA, ", "", fullloc)
			fullloc <- gsub(", NA", "", fullloc)
			fullloc <- gsub('\"', "", fullloc)
			z$cloc <- fullloc
		} else {
			z$cloc <- NA
		}
		if (sp) {
			z <- z[!(is.na(z$lon) | is.na(z$lat)), ]
			if (nrow(z) > 0 ) {
				coordinates(z) <- ~lon+lat
			}
		}
	}	

	z <- z[, sort(colnames(z))]
	d <- as.Date(Sys.time())
	z <- cbind(z, downloadDate=d)
	
	#	if (inherits(ext, 'SpatialPolygons')) { overlay	}
	try(file.remove(tmpfile), silent=TRUE)
	
	return(z)
}

#sa <- gbif('solanum')
#sa <- gbif('solanum', '*')
#sa <- gbif('solanum', 'acaule*')
#sa <- gbif('solanum', 'acaule var acaule')

Try the dismo package in your browser

Any scripts or data that you put into this service are public.

dismo documentation built on May 2, 2019, 6:07 p.m.