# Simplest example
library(zoon)
work1 <- workflow(occurrence = UKAnophelesPlumbeus,
covariate = UKBioclim,
process = OneHundredBackground,
model = LogisticRegression,
output = PrintMap)
# Help
GetModuleList()
ModuleHelp(UKBioclim)
# Modules with arguments
ModuleHelp(SpOcc)
work2 <- workflow(occurrence = SpOcc(species = 'Loxia scotica',
extent=c(-10, 10, 45, 65)),
covariate = UKBioclim,
process = OneHundredBackground,
model = LogisticRegression,
output = PrintMap)
# Combining modules
# Do you want 1 analysis?
?Chain
work3 <- workflow(occurrence = Chain(SpOcc(species = 'Eresus kollari',
extent=c(-10, 10, 45, 65)),
SpOcc(species = 'Eresus sandaliatus',
extent=c(-10, 10, 45, 65))),
covariate = UKBioclim,
process = OneHundredBackground,
model = LogisticRegression,
output = PrintMap)
# Or many separate analyses
work4 <- workflow(occurrence = list(SpOcc(species = 'Eresus kollari',
extent=c(-10, 10, 45, 65)),
SpOcc(species = 'Eresus sandaliatus',
extent=c(-10, 10, 45, 65))),
covariate = UKBioclim,
process = OneHundredBackground,
model = LogisticRegression,
output = PrintMap)
# This should be in a module, but isnt.
par(mfrow=c(1,2))
plot(work4$report[[1]], main = 'Eresus kollari')
plot(work4$report[[2]], main = 'Eresus sandaliatus')
# Crossvalidation
ModuleHelp(BackgroundAndCrossvalid)
work5 <- workflow(occurrence = UKAnophelesPlumbeus,
covariate = UKBioclim,
process = BackgroundAndCrossvalid,
model = LogisticRegression,
output = PerformanceMeasures)
work5$report
# A larger example
# Note: only one module type can be in a list
# Many module types can be chained
# Model modules can be in a list, not in a chain
work6 <- workflow(occurrence = Chain(SpOcc(species = 'Eresus kollari',
extent=c(-10, 10, 45, 65)),
SpOcc(species = 'Eresus sandaliatus',
extent=c(-10, 10, 45, 65))),
covariate = UKBioclim,
process = BackgroundAndCrossvalid(k=2),
model = list(LogisticRegression, RandomForest),
output = Chain(PrintMap, PerformanceMeasures)
)
str(work6, 1)
par(mfrow=c(1,2))
plot(work6$report[[1]][[1]],
main=paste('Logistic Regression: AUC = ',
round(work6$report[[1]][[2]]$auc, 2)))
plot(work6$report[[2]][[1]],
main=paste('Random forest: AUC = ',
round(work6$report[[2]][[2]]$auc, 2)))
# Building modules
# A model module
# Input is a dataframe with columns value, type, fold, longitude, latitude then 6:ncol(df) covar columns
# Can have other arguments
NewModule <- function(df){
zoon:::GetPackage("gam")
covs <- as.data.frame(df[, 6:ncol(df)])
names(covs) <- names(df)[6:ncol(df)]
m <- gam::gam(formula = df$value ~ .,
data = covs,
family = binomial)
# Output a model object. The object class must have a predict method available.
# If it doesnt, define one here (see BiomodModel at link below for example)
# https://github.com/zoonproject/modules/blob/master/R/BiomodModel.R
return (m)
}
BuildModule(NewModule, type = "Model", dir = ".",
description = "My cool new module")
rm(NewModule)
LoadModule('NewModule.R')
work1 <- workflow(occurrence = UKAnophelesPlumbeus,
covariate = UKBioclim,
process = OneHundredBackground,
model = NewModule,
output = PrintMap)
################################################################################################################
# What follows is a simple example of each module type.
# If you wish to build a module you can use these as an outline
# The names and structure of input arguments are important
# And must match these, even if some arguments arent used
# Class, strucuter and column names of return value is also constrained.
# Structure of an occurrence module
# Input can be anything
SpOcc <- function(species, extent, databases = gbif){
zoon:::GetPackage(spocc)
raw <- occ2df(occ(query = species, geometry = extent, from = databases, limit=10e5))
occurrence <- raw[,c(longitude, latitude)]
occurrence$value <- 1
occurrence$type <- presence
occurrence$fold <- 1
# Must return a dataframe with columns longitude, latitude, value, type and fold
# Value can be 1/0 for presence absence or numbers for abundance etc.
# type is mostly presence, presence/absence or abundance string. Not settled yet.
# Fold is just a column of 1s. It is currently needed (though I imagine it might be moved out of modules).
# No other columns. Exact names please.
return(occurrence)
}
# Then run BuildModule to create the module file properly.
BuildModule(SpOcc, type = "Occurrence", dir = ".",
# Be a good citizen! Describe your module well including parameters
# These are autobuilt into documentation.
# Note: required arguments such as df in the model module above should not be documented.
description = "My cool new module species occurrence module",
paras = list(species = The species name,
extent = latitudinal, longitudinal extent as numeric vector,
databases = Character vector of databases to use from gbif, inat, ebird. Defaults to just gbif.))
# Structure of a covariate module
# Any input
LocalRaster <- function(filenames){
if(is.character(filenames)){
raster <- raster(filenames)
} else if(is.list(filenames)) {
rasterList <- lapply(filenames, raster)
raster <- stack(rasterList)
}
# Must return a raster object. Layer, stack or brick.
return(raster)
}
# Structure of process module
# Input and output are the same
# Must accept/return a list. Input list must be called data.
# First element is a df w/ value, type, fold, longitude, latitude + covs
# Second element is a raster layer, stack or brick.
# Other input arguments is ok
NoProcess <- function (data) {
occurrence <- data$df
ras <- data$ras
noccurrence <- nrow(occurrence)
df <- occurrence
names(df)[6:ncol(df)] <- names(ras)
return(list(df=df, ras=ras))
}
# Structure of an output module
# Input args are a model object called model and a raster object called ras
# + Any other arguments
# I imagine this will change so it expects the occurrence data as well
SameTimePlaceMap <- function (model, ras) {
# Output can be anything. Go nuts.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.