R/neotoma2lipd.R

Defines functions neotoma2lipd getPaleoDataNeotoma2 uget getChronDataNeotoma2 getPubNeotoma2 getGeoNeotoma2

Documented in getChronDataNeotoma2 getGeoNeotoma2 getPaleoDataNeotoma2 getPubNeotoma2 neotoma2lipd

#' get geo info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD geo section
getGeoNeotoma2 <- function(site){

  if (!requireNamespace("neotoma2", quietly = TRUE)) {
    stop(
      "Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
      call. = FALSE
    )
  }

  if (!requireNamespace("lubridate", quietly = TRUE)) {
    stop(
      "Package 'lubridate' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
      call. = FALSE
    )
  }

  if(!methods::is(site,"site")){
    stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
  }

  #initialize geo object
  geo <- list()

  #get geo metadata
  sitemeta <- neotoma2::as.data.frame(site)

  geo$latitude <- mean(sitemeta$lat,na.rm = TRUE)
  geo$longitude <-  mean(sitemeta$long,na.rm = TRUE)
  geo$siteName <- sitemeta$sitename
  geo$description <- sitemeta$description
  geo$neotomaSiteId <- as.character(sitemeta$siteid)

  if(!is.na(sitemeta$area)){
    geo$lakeArea <- sitemeta$area
  }

  if(!is.na(sitemeta$elev)){
    geo$elevation <- sitemeta$elev
  }

  if(!is.na(sitemeta$notes)){
    geo$notes <- sitemeta$notes
  }

  return(geo)
}

#' get pub info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD pub section
getPubNeotoma2 <- function(site){

  if (!requireNamespace("neotoma2", quietly = TRUE)) {
    stop(
      "Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
      call. = FALSE
    )
  }


  if(!methods::is(site,"site")){
    stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
  }

  pubMeta <- neotoma2::get_publications(datasetid = site@collunits[[1]]@datasets[[1]]@datasetid)

  nPubs <- length(pubMeta)

  if(nPubs == 0){
    return(NA)
  }

  #initialize pub object
  pub <- vector(mode = "list",length = nPubs)

  #lipd names converter (move this into data)
  #nc <- googlesheets4::read_sheet("1Z44xjSxEDlWnThvYLsHFS9aFAN0FnYh2EdroMs9qe_Q")

  #plug it in
  for(p in 1:length(pub)){
    thisDf <- neotoma2::as.data.frame(pubMeta@publications[[p]])
    ndf <- names(thisDf)
    for(n in ndf){
      neo <- as.character(thisDf[n])
      lipdname <- as.character(nc[nc$neotomaname == n,2])
      if(!is.na(neo) & neo != "NA"){
        if(lipdname == "author"){
          auts <- str_split(neo,pattern = "; ")[[1]]
          aut <- vector(mode = "list",length = length(auts))
          for(aa in 1:length(auts)){
            aut[[aa]]$name <- auts[[aa]][1]
          }
          pub[[p]][[lipdname]] <- aut
        }else{
          pub[[p]][[lipdname]] <- neo
        }
      }
    }

  }

  return(pub)
}

#' get chronData info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD chronData section
getChronDataNeotoma2 <- function(site){


  if (!requireNamespace("neotoma2", quietly = TRUE)) {
    stop(
      "Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
      call. = FALSE
    )
  }


  if(!methods::is(site,"site")){
    stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
  }

  # #get conversion table (change this)
  # cconv <- googlesheets4::read_sheet(ss = "1Z44xjSxEDlWnThvYLsHFS9aFAN0FnYh2EdroMs9qe_Q",sheet = "chronColumns")
  #


  nChronData <- length(site@collunits)

  chronData <- vector(mode = "list",length(nChronData))
  for(cd in seq_len(nChronData)){
    #get all chroncontrols for this collunit
    tcu <- site@collunits[[cd]]@chronologies@chronologies
    ncc <- purrr::map_dbl(tcu, ~ nrow(.x$chroncontrols))
    if(all(ncc == 0)){#no chroncontrols
      return(NULL)
    }

    if(any(ncc == 0)){
      tcu <- tcu[ncc >0]
    }

    nchronologies <- length(tcu)
    mt <- vector(mode = "list",nchronologies)
    for(cc in seq_len(nchronologies)){

      ac4lipd <- tcu[[cc]]@chroncontrols %>%
        dplyr::arrange(depth)

      #calculate new columns

      ac4lipd$uncertainty <- rowMeans(abs(ac4lipd$chroncontrolage - cbind(ac4lipd$agelimityounger,ac4lipd$agelimitolder)),na.rm = TRUE)

      ac4lipd$depth_top <- ac4lipd$depth - ac4lipd$thickness/2
      ac4lipd$depth_bottom <- ac4lipd$depth + ac4lipd$thickness/2

      #rename
      ac4lipd <- ac4lipd %>%
        dplyr::rename(ageYoung = agelimityounger,
                      ageOld = agelimitolder,
                      age = chroncontrolage,
                      age_type = chroncontroltype,
                      ageUnc = uncertainty,
                      neotomaChronConrolId = chroncontrolid)

      i14c <- which(grepl(pattern = "carbon",ac4lipd$age_type,ignore.case = TRUE) & !grepl(pattern = "calib",ac4lipd$age_type,ignore.case = TRUE))

      #separate into age and age14C
      if(length(i14c) > 0){
        ac4lipd$age14C <- NA
        ac4lipd$age14C[i14c] <- ac4lipd$age[i14c]
        ac4lipd$age[i14c] <- NA
        #and uncertainty
        ac4lipd$age14CUnc <- NA
        ac4lipd$age14CUnc[i14c] <- ac4lipd$ageUnc[i14c]
        ac4lipd$ageUnc[i14c] <- NA
      }

      ac4lipd <- ac4lipd %>%
        dplyr::select(depth,starts_with("depth"),thickness,age,starts_with("age"),everything())



      #create chronData measurement table
      acn <- names(ac4lipd)
      mt[[cc]] <- list()

      for(cn in seq_along(acn)){#iterate through columns
        mt[[cc]][[acn[cn]]] <- list()
        mt[[cc]][[acn[cn]]]$number <- cn
        mt[[cc]][[acn[cn]]]$variableName <- acn[cn]
        mt[[cc]][[acn[cn]]]$values <- as.vector(as.matrix(ac4lipd[cn]))


        wr <- which(acn[cn] == cconv$variableName)
        if(length(wr) == 1){
          mt[[cc]][[acn[cn]]]$units <- cconv$units[wr]
        }else{
          mt[[cc]][[acn[cn]]]$units <- "unknown"
        }
        if(length(wr) == 1){
          mt[[cc]][[acn[cn]]]$description <- cconv$description[wr]
        }
        mt[[cc]][[acn[cn]]]$TSid <- paste0("neotomaChronologyID_",site@collunits@collunits[[cd]]@chronologies@chronologies[[cc]]@chronologyid,acn[cn])

      }

    }
    chronData[[cd]]$measurementTable <- mt

  }

  return(chronData)


}

uget <- function(x){
  u <- as.character(na.omit(unique(x)))
  if(length(u) == 1){
    return(u)
  }else{
    return(NA)
  }
}

#' get paleoData info from neotoma
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#' @importFrom methods is
#'
#' @return LiPD paleoData section
getPaleoDataNeotoma2 <- function(site){

  if (!requireNamespace("neotoma2", quietly = TRUE)) {
    stop(
      "Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
      call. = FALSE
    )
  }


  if(!methods::is(site, "site")){
    stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
  }


  #get sample data from neotoma
  sampData <- neotoma2::samples(site)

  #collection units map to paleoData objects
  uCol <- unique(sampData$collunitid)
  npaleoData <- length(uCol)

  paleoData <- vector(mode = "list",npaleoData)
  for(cc in seq_len(npaleoData)){
    #filter to just this cc
    tsd <- dplyr::filter(sampData,collunitid == uCol[cc])

    #get units and other column metadata

    colMeta2get <- c("units","ecologicalgroup","taxongroup","taxonid")

    allMeta <- list()
    for(i in colMeta2get){

      attemp <- tidyr::pivot_wider(tsd,
                                   id_cols = c("depth","age","agetype"),
                                   names_from = c("variablename","elementtype","context"),
                                   values_from = i)


      dataOut <- purrr::map_chr(purrr::array_tree(attemp,2),uget)

      #remove agetype
      dataOut <- dataOut[-3]

      allMeta[[i]] <- dataOut
    }

    #deal with ageUnits
    ageUnits <- unique(attemp$agetype)

    wd <- which(names(allMeta$units) == "depth")
    if(length(wd) == 1){
      allMeta$units[wd] <- "cm"
    }
    wd <- which(names(allMeta$units) == "age")
    if(length(wd) == 1 & length(ageUnits) == 1){
      allMeta$units[wd] <- ageUnits
    }
    #find columns
    rs <- tidyr::pivot_wider(tsd,
                             id_cols = c("depth","age"),
                             names_from = c("variablename","elementtype","context"),
                             values_from = c("value"))

    if(any(purrr::map(rs,class) == "list")){
      stop("Failed to uniquely pivot the neotoma download")
    }

    rs[is.null(rs)] <- NA

    #create measurement table
    #prep the names
    acn <- names(rs) %>%
      stringr::str_remove("_NA") %>%
      stringr::str_remove_all("[^A-Za-z0-9_]")

    names(rs) <- acn


    mt <- vector(mode = "list",length = npaleoData)

    # mt[[cc]] <- list()

    for(cn in seq_along(acn)){#iterate through columns
      mt[[1]][[acn[cn]]] <- list()
      mt[[1]][[acn[cn]]]$number <- cn
      mt[[1]][[acn[cn]]]$variableName <- acn[cn]
      mt[[1]][[acn[cn]]]$values <- as.matrix(rs[,cn])
      mt[[1]][[acn[cn]]]$TSid <- createTSid()
      #add additional column metadata
      for(i in colMeta2get){
        mt[[1]][[acn[cn]]][[i]] <- allMeta[[i]][cn]
        #check names
        checkName <- names(allMeta[[i]][cn]) %>%
          stringr::str_remove("_NA") %>%
          stringr::str_remove_all("[^A-Za-z0-9_]")

          if(length(checkName) > 0){
            if(checkName != acn[cn]){
              stop(paste("variable name mismatch",checkName,"!=",acn[cn]))
            }
          }


        #print(allMeta[[i]][cn])
      }

    }


    paleoData[[cc]]$measurementTable <- mt

  }

  return(paleoData)


}

#build into lipd file

#' Convert a neotoma dataset into a LiPD object
#'
#' @param site a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package
#'
#' @return a LiPD object
#' @export
#'
#' @importFrom methods is
#'
#' @examples
#'
#' \dontrun{
#' B <- neotoma2::get_sites(sitename = "Bambili 2")
#' D <- neotoma2::get_downloads(B)
#' L <- neotoma2lipd(D)
#' }
neotoma2lipd <- function(site){

  if (!requireNamespace("neotoma2", quietly = TRUE)) {
    stop(
      "Package 'neotoma2' must be installed to use this function. Install it from github using `remotes::install_github('neotomadb/neotoma2')`",
      call. = FALSE
    )
  }

  #initialize LiPD
  if(methods::is(site, "sites")){
    if(length(site) == 1){
      site <- site@sites[[1]]
    }else if(length(site) > 1){
      stop("There are multiple sites in this object, please select just 1 site to convert to LiPD")
    }else{
      stop("There are no sites in this sites object")
    }
  }
  if(!methods::is(site, "site")){
    stop("You must enter a single 'site' (not multiple 'sites') object generated by the 'neotoma2' package")
  }


  L <- list()

  L$geo <- getGeoNeotoma2(site)
  L$pub <- getPubNeotoma2(site)
  L$paleoData <- getPaleoDataNeotoma2(site)
  L$chronData <- getChronDataNeotoma2(site)

  sn <- L$geo$siteName
  fa <- strsplit(L$pub[[1]]$author[[1]]$name,",")[[1]][1]
  py <- L$pub[[1]]$year

  L$dataSetName <- paste(sn,fa,py,sep = ".") %>%
    str_remove_all("[^a-zA-Z0-9.]")

  L$datasetId <- paste0("neotomaSiteId_",site@siteid)
  timestamp <- lubridate::now(tzone = "UTC")
  version = "1.0.0"
  thisChange <- list(version = "1.0.0",
                     curator = "lipd2neotoma",
                     timestamp = paste(timestamp,lubridate::tz(timestamp)),
                     notes  =  "Starting the changelog")

  L$changelog <- list(thisChange)

  L$lipdVersion <- 1.3

  dataContributor <- neotoma2::get_contacts(familyname = fa)
  L$dataContributor <- try(dataContributor@contacts[[1]]$contactid,silent = TRUE)
  L$originalDataUrl <- paste0("https://data.neotomadb.org/datasets/",site@collunits@collunits[[1]]@datasets@datasets[[1]]@datasetid)

  #make this work with multiple collection units and datasets
  L$dataDoi <- site@collunits@collunits[[1]]@datasets@datasets[[1]]@doi %>%
    unlist() %>%
    paste(collapse = "; ")

  L$createdBy <- "neotoma2lipd"

  #take a guess at archive type
  if(grepl(L$geo$siteName,pattern = "lake",ignore.case = T)){
    L$archiveType <- "LakeSediment"
  }else if(grepl(L$geo$siteName,pattern = "bog",ignore.case = T) |
           grepl(L$geo$siteName,pattern = "fen",ignore.case = T) |
           grepl(L$geo$siteName,pattern = "peat",ignore.case = T)){
    L$archiveType <- "Peat"
  }else{
    L$archiveType <- "unknown"
  }



  L <- new_lipd(L)

  return(L)




}
nickmckay/lipdR documentation built on April 13, 2025, 5:58 p.m.