Nothing
#'Combine trait datasets
#'
#'Method for function `rbind()` to append objects of class 'traitdata' to each
#'other.
#'
#'@param ... two or more objects of class traitdata.
#'@param metadata a list of metadata entries which are to be added as
#' dataset-level information.
#'@param datasetID a vector of the same length as number of objects. If `NULL`
#' (default), object names will be returned as ID.
#'@param metadata_as_columns logical (defaults to FALSE) or vector of columns to
#' return. If TRUE , the output will contain the "author", "license",
#' "datasetName" and (autogenerated) "datasetID" name, if those are provided.
#' If character vector, the output will contain the listed columns.
#'@param drop FALSE by default. If true, columns that are not present in all
#' datasets will be dropped.
#'
#'@details Metadata are ideally already included in the datasets as attributes
#' (see `?as.traitdata`). The function `rbind.traitdata()` takes a list of
#' lists as its metadata argument. The outer list must have the same length as
#' the provided objects to combine, with each entry containing objects
#' according to the terms of the Ecological Traitdata Standard
#' (http://ecologicaltraitdata.github.io/ETS/#metadata-vocabulary).
#'
#' A lookup table for dataset details will be appended as an attribute to the
#' output dataset, linked to each entry via the field `datasetID`. It can be
#' accessed by calling `attributes(<dataset>)$datasets`.
#'
#'@export
#'@importFrom data.table rbindlist as.data.table
#'
#' @examples
#'
#' pulldata("carabids")
#'
#' dataset1 <- as.traitdata(carabids,
#' taxa = "name_correct",
#' traits = c("body_length", "antenna_length", "metafemur_length"),
#' units = "mm",
#' keep = c(datasetID = "source_measurement", measurementRemarks = "note"),
#' metadata = as.metadata(
#' bibliographicCitation = c(
#' "van der Plas et al. (2017) Methods in Ecol. & Evol., doi: 10.1111/2041-210x.12728"
#' ),
#' author = "Fons van der Plas",
#' license = "http://creativecommons.org/publicdomain/zero/1.0/"
#' )
#' )
#'
#'
#' traits1 <- as.thesaurus(
#' body_length = as.trait("body_length",
#' expectedUnit = "mm", valueType = "numeric",
#' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"),
#' antenna_length = as.trait("antenna_length",
#' expectedUnit = "mm", valueType = "numeric",
#' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"),
#' metafemur_length = as.trait("metafemur_length",
#' expectedUnit = "mm", valueType = "numeric",
#' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length")
#')
#'
#' dataset1Std <- standardize.traits(dataset1, thesaurus = traits1)
#'
#' # occurrence table:
#'
#' pulldata("heteroptera_raw")
#'
#' dataset2 <- as.traitdata(heteroptera_raw,
#' taxa = "SpeciesID",
#' traits = c("Body_length", "Antenna_Seg1", "Antenna_Seg2",
#' "Antenna_Seg3", "Antenna_Seg4", "Antenna_Seg5", "Hind.Femur_length"),
#' units = "mm",
#' keep = c(sex = "Sex", references = "Source", lifeStage = "Wing_development"),
#' metadata = as.metadata(
#' bibliographicCitation = "Gossner et al. (2015) Ecology, 96:1154. doi: 10.1890/14-2159.1",
#' author = "Martin Gossner",
#' license = "http://creativecommons.org/publicdomain/zero/1.0/"
#' )
#' )
#'
#'dataset2 <- mutate.traitdata(dataset2,
#' antenna_length = Antenna_Seg1 + Antenna_Seg2 + Antenna_Seg3 + Antenna_Seg4 + Antenna_Seg5
#' )
#'
#'
#' traits2 <- as.thesaurus(
#' Body_length = as.trait("body_length",
#' expectedUnit = "mm", valueType = "numeric",
#' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Body_length"),
#' antenna_length = as.trait("antenna_length",
#' expectedUnit = "mm", valueType = "numeric",
#' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Antenna_length"),
#' Hind.Femur_length = as.trait("metafemur_length",
#' expectedUnit = "mm", valueType = "numeric",
#' identifier = "http://t-sita.cesab.org/BETSI_vizInfo.jsp?trait=Femur_length")
#')
#'
#' dataset2Std <- standardize_traits(dataset2, thesaurus = traits2)
#'
#' database <- rbind(dataset1Std, dataset2Std,
#' datasetID = c("vanderplas17", "gossner15"),
#' metadata_as_columns = c("author"))
#' head(database)
rbind.traitdata <- function(...,
metadata = NULL,
datasetID = NULL,
metadata_as_columns = FALSE,
drop = NULL # drop columns that are not present in all datasets
) {
# extract object names of input
input_names <- deparse(substitute(x(...)))
input_names <- strsplit(gsub("[)]", "", gsub("[x(]", "", gsub( " ", "", input_names))), "[,]")[[1]]
input_names <- as.factor(input_names)
# compose list of input objects
input <- list(...)
if(is.null(metadata)) {
metadata <- lapply(input, function(x) attributes(x)$metadata)
has_metadata <- sapply(input, function(x) !is.null(attributes(x)$metadata))
has_id <- sapply(input, function(x) !is.null(attributes(x)$metadata$datasetID))
} else {
metadata <- list(NULL)
}
#add datasetID value as column in core data,
for(i in 1:length(metadata)) {
# get datasetID from metadata, provided vector, or object names (in that order)
if(is.null(metadata[[i]]$datasetID)) {
if(!is.null(datasetID) && length(datasetID) == length(input)) {
metadata[[i]]$datasetID <- datasetID[i]
} else {
metadata[[i]]$datasetID <- input_names[[i]]
}
}
input[[i]]$datasetID <- metadata[[i]]$datasetID
}
names(metadata) <- sapply(metadata, function(x) x$datasetID)
names(input) <- sapply(metadata, function(x) x$datasetID)
##### check for compatibility of used terms in datasets
terms_used <- lapply(input, colnames)
traits_standardized <- sapply(terms_used, function(x) c("traitNameStd") %in% x )
taxa_standardized <- sapply(terms_used, function(x) c("ScientificNameStd") %in% x )
# Check if trait names are compatible
traits <- lapply(input, function(x) levels(x$traitName))
if(all(traits_standardized)) { traitsStd <- lapply(input, function(x) levels(x$traitNameStd)) } else { traitsStd <- NA }
if(length(unlist(traits)) == length(unique(unlist(traits))) && length(unlist(traitsStd)) == length(unique(unlist(traitsStd)))) {
warning("There seems to be no overlap in trait names of the provided datasets. \nIt is recommended to map 'traitNameStd' of each dataset to the same thesaurus or ontology!")
}
# check if taxon names are compatible
taxa <- lapply(input, function(x) levels(x$ScientificName))
if(all(taxa_standardized) ) {taxaStd <- lapply(input, function(x) levels(x$ScientificNameStd)) } else { taxaStd <- NA }
if(length(unlist(taxa)) == length(unique(unlist(taxa))) && length(unlist(taxaStd)) == length(unique(unlist(taxaStd))) ) {
warning("There seems to be no overlap in taxon names of the provided datasets!\nIt is recommended to map 'ScientificNameStd' of each dataset to the same thesaurus or ontology!")
}
# rbind data objects in input list
input <- lapply(input, data.table::as.data.table)
out <- data.table::rbindlist(input, use.names = TRUE, fill = TRUE)
out <- as.data.frame(out)
# match metadata attributes according to datasetID
if(isTRUE(metadata_as_columns) | is.character(metadata_as_columns) && metadata_as_columns %in% names(metadata[[1]]) ) {
##### make metadata lookup table according to input in metadata_as_column
if(is.character(metadata_as_columns)) {
attr_table <- do.call(rbind.data.frame, lapply(metadata, function(x) lapply(x[unique(c("datasetID", metadata_as_columns))], function(y) if(is.null(y)) "NA" else y )))
} else {
attr_table <- do.call(rbind.data.frame, lapply(metadata, function(x) lapply(x[c("datasetID", "datasetName", "author", "license")], function(y) if(is.null(y)) "NA" else y )))
}
# merge lookup table
out <- merge(out, attr_table, by = "datasetID")
}
# sort columns according to glossary of terms
out <- out[, order(match(names(out), glossary$columnName) )]
# maintain attributes
attribs <- attributes(..1)
attribs$names <- attributes(out)$names
attribs$row.names <- seq_along(out[,1])
attributes(out) <- attribs
attr(out, "metadata") <- metadata
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.