knitr::opts_chunk$set( collapse = TRUE, require(fortedata) ) # include utils.R functions that are not exported from fortedata read_csv_file <- function(...) { weak_as_tibble( read.csv( system.file("extdata", ..., package = "fortedata", mustWork = TRUE), # Empty strings, and ONLY empty strings, should be interpreted as missing values. na.strings = "", stringsAsFactors = FALSE ) ) } # weak_tibble - use tibble() if available but fall back to # data.frame() if necessary # not a user-facing function; document via roxygen? weak_tibble <- function(..., .force_df = FALSE) { no_tibble <- !suppressWarnings(requireNamespace("tibble", quietly = TRUE)) out <- data.frame(..., stringsAsFactors = FALSE) if (!(.force_df || no_tibble)) out <- weak_as_tibble(out) out } # weak_as_tibble - use as_tibble() if available but fall back to # as.data.frame() if necessary weak_as_tibble <- function(..., .force_df = FALSE) { no_tibble <- !suppressWarnings(requireNamespace("tibble", quietly = TRUE)) if (.force_df || no_tibble) { as.data.frame(..., stringsAsFactors = FALSE) } else { tibble::as_tibble(...) } } #' Data tables' metadata. fd_metadata <- function(table = NULL) { md <- read_csv_file("forte_table_metadata.csv") if (!is.null(table)) { md <- md[md$table == table,] if (nrow(md) < 1) { stop("Table ", table, " is not present in metadata.") } } weak_as_tibble(md) } #' Split the SubplotID column into more useful individual columns split_subplot_id <- function(df) { stopifnot("subplot_id" %in% names(df)) df$replicate <- substr(df$subplot_id, 1, 1) df$plot <- as.integer(substr(df$subplot_id, 3, 3)) df$subplot <- substr(df$subplot_id, 4, 4) df } data_conditions <- function(x, published = FALSE, contact_person, citation) { if(!published) { warning("These data are unpublished. Please contact ", contact_person, " to ask about using") } message("Data citation: ", citation) message("Contact person: ", contact_person) # add the above information to `x` as attributes... invisible(x) }
At the core of FoRTE is the collection of heterogeneous data, from many different instruments, requiring multiple different approaches, to measure different environmental variables. Below we outline the collection and preparation procedures for all data products in fortedata
.
For each data product listed includes a Data Preparation section that includes:
1) the location of raw data in Google Drive 2) the format and structure of the raw data 3) any pre-processing of data 4) an outline of the process needed to move from raw data to package-ready data
CST data are located in the /data/lidar
on Google Drive.
The raw data from the PCL instrument is in the form of a two column .csv file containing raw distance from instrument values (i.e. lidar pulse returns) and intensity values for each measured transect. These data are processed externally using forestr in R, with the output for each transect containing several .csv files: 1) a two row .csv containing a head of CST metrics and a second row containing values for those metrics; 2) an adjusted leaf area/vegetation area hit grid matrix; 3) a three column, x, z, VAI file; 4) summary matrix file containing x, z, and column specific values (NEED TO ADJUST); and 5) a hit grid plot of VAI.
The first of these files, the two row CST metrics file(s) for each transect are collated into one file where each row represents each transect--the source canopy_structural_traits.csv
file in /inst/extdata/
from which fd_canopy_structure
draws.
#set random seed cst <- read_csv_file("canopy_structural_traits.csv") # show the top of str(cst)
This section summarizes general formatting and preparation guidelines for all data shared and uploaded to fortedata.
forte_table_metadata.csv
(if not already listed)# call forte_table_metadata fd_metadata()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.