#' get geo info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD geo section
getGeoNeotoma2 <- function(site){
if (!requireNamespace("neotoma2", quietly = TRUE)) {
stop(
"Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
call. = FALSE
)
}
if (!requireNamespace("lubridate", quietly = TRUE)) {
stop(
"Package 'lubridate' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
call. = FALSE
)
}
if(!methods::is(site,"site")){
stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
}
#initialize geo object
geo <- list()
#get geo metadata
sitemeta <- neotoma2::as.data.frame(site)
geo$latitude <- mean(sitemeta$lat,na.rm = TRUE)
geo$longitude <- mean(sitemeta$long,na.rm = TRUE)
geo$siteName <- sitemeta$sitename
geo$description <- sitemeta$description
geo$neotomaSiteId <- as.character(sitemeta$siteid)
if(!is.na(sitemeta$area)){
geo$lakeArea <- sitemeta$area
}
if(!is.na(sitemeta$elev)){
geo$elevation <- sitemeta$elev
}
if(!is.na(sitemeta$notes)){
geo$notes <- sitemeta$notes
}
return(geo)
}
#' get pub info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD pub section
getPubNeotoma2 <- function(site){
if (!requireNamespace("neotoma2", quietly = TRUE)) {
stop(
"Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
call. = FALSE
)
}
if(!methods::is(site,"site")){
stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
}
pubMeta <- neotoma2::get_publications(datasetid = site@collunits[[1]]@datasets[[1]]@datasetid)
nPubs <- length(pubMeta)
if(nPubs == 0){
return(NA)
}
#initialize pub object
pub <- vector(mode = "list",length = nPubs)
#lipd names converter (move this into data)
#nc <- googlesheets4::read_sheet("1Z44xjSxEDlWnThvYLsHFS9aFAN0FnYh2EdroMs9qe_Q")
#plug it in
for(p in 1:length(pub)){
thisDf <- neotoma2::as.data.frame(pubMeta@publications[[p]])
ndf <- names(thisDf)
for(n in ndf){
neo <- as.character(thisDf[n])
lipdname <- as.character(nc[nc$neotomaname == n,2])
if(!is.na(neo) & neo != "NA"){
if(lipdname == "author"){
auts <- str_split(neo,pattern = "; ")[[1]]
aut <- vector(mode = "list",length = length(auts))
for(aa in 1:length(auts)){
aut[[aa]]$name <- auts[[aa]][1]
}
pub[[p]][[lipdname]] <- aut
}else{
pub[[p]][[lipdname]] <- neo
}
}
}
}
return(pub)
}
#' get chronData info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD chronData section
getChronDataNeotoma2 <- function(site){
if (!requireNamespace("neotoma2", quietly = TRUE)) {
stop(
"Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
call. = FALSE
)
}
if(!methods::is(site,"site")){
stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
}
# #get conversion table (change this)
# cconv <- googlesheets4::read_sheet(ss = "1Z44xjSxEDlWnThvYLsHFS9aFAN0FnYh2EdroMs9qe_Q",sheet = "chronColumns")
#
nChronData <- length(site@collunits)
chronData <- vector(mode = "list",length(nChronData))
for(cd in seq_len(nChronData)){
#get all chroncontrols for this collunit
tcu <- site@collunits[[cd]]@chronologies@chronologies
ncc <- purrr::map_dbl(tcu, ~ nrow(.x$chroncontrols))
if(all(ncc == 0)){#no chroncontrols
return(NULL)
}
if(any(ncc == 0)){
tcu <- tcu[ncc >0]
}
nchronologies <- length(tcu)
mt <- vector(mode = "list",nchronologies)
for(cc in seq_len(nchronologies)){
ac4lipd <- tcu[[cc]]@chroncontrols %>%
dplyr::arrange(depth)
#calculate new columns
ac4lipd$uncertainty <- rowMeans(abs(ac4lipd$chroncontrolage - cbind(ac4lipd$agelimityounger,ac4lipd$agelimitolder)),na.rm = TRUE)
ac4lipd$depth_top <- ac4lipd$depth - ac4lipd$thickness/2
ac4lipd$depth_bottom <- ac4lipd$depth + ac4lipd$thickness/2
#rename
ac4lipd <- ac4lipd %>%
dplyr::rename(ageYoung = agelimityounger,
ageOld = agelimitolder,
age = chroncontrolage,
age_type = chroncontroltype,
ageUnc = uncertainty,
neotomaChronConrolId = chroncontrolid)
i14c <- which(grepl(pattern = "carbon",ac4lipd$age_type,ignore.case = TRUE) & !grepl(pattern = "calib",ac4lipd$age_type,ignore.case = TRUE))
#separate into age and age14C
if(length(i14c) > 0){
ac4lipd$age14C <- NA
ac4lipd$age14C[i14c] <- ac4lipd$age[i14c]
ac4lipd$age[i14c] <- NA
#and uncertainty
ac4lipd$age14CUnc <- NA
ac4lipd$age14CUnc[i14c] <- ac4lipd$ageUnc[i14c]
ac4lipd$ageUnc[i14c] <- NA
}
ac4lipd <- ac4lipd %>%
dplyr::select(depth,starts_with("depth"),thickness,age,starts_with("age"),everything())
#create chronData measurement table
acn <- names(ac4lipd)
mt[[cc]] <- list()
for(cn in seq_along(acn)){#iterate through columns
mt[[cc]][[acn[cn]]] <- list()
mt[[cc]][[acn[cn]]]$number <- cn
mt[[cc]][[acn[cn]]]$variableName <- acn[cn]
mt[[cc]][[acn[cn]]]$values <- as.vector(as.matrix(ac4lipd[cn]))
wr <- which(acn[cn] == cconv$variableName)
if(length(wr) == 1){
mt[[cc]][[acn[cn]]]$units <- cconv$units[wr]
}else{
mt[[cc]][[acn[cn]]]$units <- "unknown"
}
if(length(wr) == 1){
mt[[cc]][[acn[cn]]]$description <- cconv$description[wr]
}
mt[[cc]][[acn[cn]]]$TSid <- paste0("neotomaChronologyID_",site@collunits@collunits[[cd]]@chronologies@chronologies[[cc]]@chronologyid,acn[cn])
}
}
chronData[[cd]]$measurementTable <- mt
}
return(chronData)
}
uget <- function(x){
u <- as.character(na.omit(unique(x)))
if(length(u) == 1){
return(u)
}else{
return(NA)
}
}
#' get paleoData info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD paleoData section
getPaleoDataNeotoma2 <- function(site){
if (!requireNamespace("neotoma2", quietly = TRUE)) {
stop(
"Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
call. = FALSE
)
}
if(!methods::is(site, "site")){
stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
}
#get sample data from neotoma
sampData <- neotoma2::samples(site)
#collection units map to paleoData objects
uCol <- unique(sampData$collunitid)
npaleoData <- length(uCol)
paleoData <- vector(mode = "list",npaleoData)
for(cc in seq_len(npaleoData)){
#filter to just this cc
tsd <- dplyr::filter(sampData,collunitid == uCol[cc])
#get units and other column metadata
colMeta2get <- c("units","ecologicalgroup","taxongroup","taxonid")
allMeta <- list()
for(i in colMeta2get){
attemp <- tidyr::pivot_wider(tsd,
id_cols = c("depth","age","agetype"),
names_from = c("variablename","elementtype","context"),
values_from = i)
dataOut <- purrr::map_chr(purrr::array_tree(attemp,2),uget)
#remove agetype
dataOut <- dataOut[-3]
allMeta[[i]] <- dataOut
}
#deal with ageUnits
ageUnits <- unique(attemp$agetype)
wd <- which(names(allMeta$units) == "depth")
if(length(wd) == 1){
allMeta$units[wd] <- "cm"
}
wd <- which(names(allMeta$units) == "age")
if(length(wd) == 1 & length(ageUnits) == 1){
allMeta$units[wd] <- ageUnits
}
#find columns
rs <- tidyr::pivot_wider(tsd,
id_cols = c("depth","age"),
names_from = c("variablename","elementtype","context"),
values_from = c("value"))
if(any(purrr::map(rs,class) == "list")){
stop("Failed to uniquely pivot the neotoma download")
}
rs[is.null(rs)] <- NA
#create measurement table
#prep the names
acn <- names(rs) %>%
stringr::str_remove("_NA") %>%
stringr::str_remove_all("[^A-Za-z0-9_]")
names(rs) <- acn
mt <- vector(mode = "list",length = npaleoData)
# mt[[cc]] <- list()
for(cn in seq_along(acn)){#iterate through columns
mt[[1]][[acn[cn]]] <- list()
mt[[1]][[acn[cn]]]$number <- cn
mt[[1]][[acn[cn]]]$variableName <- acn[cn]
mt[[1]][[acn[cn]]]$values <- as.matrix(rs[,cn])
mt[[1]][[acn[cn]]]$TSid <- createTSid()
#add additional column metadata
for(i in colMeta2get){
mt[[1]][[acn[cn]]][[i]] <- allMeta[[i]][cn]
#check names
checkName <- names(allMeta[[i]][cn]) %>%
stringr::str_remove("_NA") %>%
stringr::str_remove_all("[^A-Za-z0-9_]")
if(length(checkName) > 0){
if(checkName != acn[cn]){
stop(paste("variable name mismatch",checkName,"!=",acn[cn]))
}
}
#print(allMeta[[i]][cn])
}
}
paleoData[[cc]]$measurementTable <- mt
}
return(paleoData)
}
#build into lipd file
#' Convert a neotoma dataset into a LiPD object
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#'
#' @return a LiPD object
#' @export
#'
#' @importFrom methods is
#'
#' @examples
#'
#' \dontrun{
#' B <- neotoma2::get_sites(sitename = "Bambili 2")
#' D <- neotoma2::get_downloads(B)
#' L <- neotoma2lipd(D)
#' }
neotoma2lipd <- function(site){
if (!requireNamespace("neotoma2", quietly = TRUE)) {
stop(
"Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
call. = FALSE
)
}
#initialize LiPD
if(methods::is(site, "sites")){
if(length(site) == 1){
site <- site@sites[[1]]
}else if(length(site) > 1){
stop("There are multiple sites in this object, please select just 1 site to convert to LiPD")
}else{
stop("There are no sites in this sites object")
}
}
if(!methods::is(site, "site")){
stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
}
L <- list()
L$geo <- getGeoNeotoma2(site)
L$pub <- getPubNeotoma2(site)
L$paleoData <- getPaleoDataNeotoma2(site)
L$chronData <- getChronDataNeotoma2(site)
sn <- L$geo$siteName
fa <- strsplit(L$pub[[1]]$author[[1]]$name,",")[[1]][1]
py <- L$pub[[1]]$year
L$dataSetName <- paste(sn,fa,py,sep = ".") %>%
str_remove_all("[^a-zA-Z0-9.]")
L$datasetId <- paste0("neotomaSiteId_",site@siteid)
timestamp <- lubridate::now(tzone = "UTC")
version = "1.0.0"
thisChange <- list(version = "1.0.0",
curator = "lipd2neotoma",
timestamp = paste(timestamp,lubridate::tz(timestamp)),
notes = "Starting the changelog")
L$changelog <- list(thisChange)
L$lipdVersion <- 1.3
dataContributor <- neotoma2::get_contacts(familyname = fa)
L$dataContributor <- try(dataContributor@contacts[[1]]$contactid,silent = TRUE)
L$originalDataUrl <- paste0("https://data.neotomadb.org/datasets/",site@collunits@collunits[[1]]@datasets@datasets[[1]]@datasetid)
#make this work with multiple collection units and datasets
L$dataDoi <- site@collunits@collunits[[1]]@datasets@datasets[[1]]@doi %>%
unlist() %>%
paste(collapse = "; ")
L$createdBy <- "neotoma2lipd"
#take a guess at archive type
if(grepl(L$geo$siteName,pattern = "lake",ignore.case = T)){
L$archiveType <- "LakeSediment"
}else if(grepl(L$geo$siteName,pattern = "bog",ignore.case = T) |
grepl(L$geo$siteName,pattern = "fen",ignore.case = T) |
grepl(L$geo$siteName,pattern = "peat",ignore.case = T)){
L$archiveType <- "Peat"
}else{
L$archiveType <- "unknown"
}
L <- new_lipd(L)
return(L)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.