#---------------------------------------------------------- # Reset R's brain #---------------------------------------------------------- rm(list=ls())
#------------------------------------------------- # Make sure the working directory is set to the desired location #------------------------------------------------- getwd() #------------------------------------------------- # If the working directory is incorrect # Uncomment the following command and change the directory path before execution #------------------------------------------------- # setwd("<path_to_dir>")
#------------------------------------------------- # Make sure the rtry package is installed before loading #------------------------------------------------- library(rtry) packageVersion("rtry")
#------------------------------------------------- # Obtain and print the path to the sample dataset within the rtry package #------------------------------------------------- path_to_data <- system.file("testdata", "data_TRY_15160.txt", package = "rtry") path_to_data
#------------------------------------------------- # Import TRY data requests into data frames #------------------------------------------------- TRYdata1 <- rtry_import(path_to_data)
There are two ways to view the imported data
# Method 1: Print the first 6 rows of the data using the head() function head(TRYdata1)
# Method 2: Open the imported data in using data viewer (only available in RStudio) # using the View() function View(TRYdata1)
Import another TRY dataset and view the data
path_to_data <- system.file("testdata", "data_TRY_15161.txt", package = "rtry") path_to_data
TRYdata2 <- rtry_import(path_to_data)
head(TRYdata2) View(TRYdata2)
#------------------------------------------------- # Explore the imported data using rtry_explore() # 1. Group the input data based on TraitID and TraitName # 2. Group the input data based on AccSpeciesID, AccSpeciesName, TraitID and TraitName # 3. Group the input data based on DataID, DataName, TraitID and TraitName, and sort by TraitID # Note: For TraitID == "NA", meaning that entry is an ancillary data #------------------------------------------------- # Explore TRYdata1 TRYdata1_explore_trait <- rtry_explore(TRYdata1, TraitID, TraitName) TRYdata1_explore_trait # Print the entire data frame # View(TRYdata1_explore_trait) TRYdata1_explore_species <- rtry_explore(TRYdata1, AccSpeciesID, AccSpeciesName, TraitID, TraitName) TRYdata1_explore_species # View(TRYdata1_explore_species) TRYdata1_explore_anc <- rtry_explore(TRYdata1, DataID, DataName, TraitID, TraitName, sortBy = TraitID) TRYdata1_explore_anc # View(TRYdata1_explore_anc)
# Explore TRYdata2 # Group the input data based on TraitID and TraitName TRYdata2_explore_trait <- rtry_explore(TRYdata2, TraitID, TraitName) TRYdata2_explore_trait # View(TRYdata2_explore_trait) # Group the input data based on AccSpeciesID, AccSpeciesName, TraitID and TraitName # Note: For TraitID == "NA", meaning that entry is an ancillary data TRYdata2_explore_species <- rtry_explore(TRYdata2, AccSpeciesID, AccSpeciesName, TraitID, TraitName) TRYdata2_explore_species # View(TRYdata2_explore_species) # Group the input data based on DataID, DataName, TraitID and TraitName # Then sort the output by TraitID using the sortBy argument TRYdata2_explore_anc <- rtry_explore(TRYdata2, DataID, DataName, TraitID, TraitName, sortBy = TraitID) TRYdata2_explore_anc # View(TRYdata2_explore_anc)
# Combine TRYdata1 and TRYdata2 by rows TRYdata <- rtry_bind_row(TRYdata1, TRYdata2) # View the combined data TRYdata head(TRYdata) # View(TRYdata)
# Group the input data based on TraitID and TraitName TRYdata_explore_trait <- rtry_explore(TRYdata, TraitID, TraitName) TRYdata_explore_trait # View(TRYdata_explore_trait) # Group the input data based on AccSpeciesID, AccSpeciesName, TraitID and TraitName # Note: For TraitID == "NA", meaning that entry is an ancillary data TRYdata_explore_species <- rtry_explore(TRYdata, AccSpeciesID, AccSpeciesName, TraitID, TraitName) TRYdata_explore_species # View(TRYdata_explore_species) # Group the input data based on DataID, DataName, TraitID and TraitName # Then sort the output by TraitID using the sortBy argument TRYdata_explore_anc <- rtry_explore(TRYdata, DataID, DataName, TraitID, TraitName, sortBy = TraitID) TRYdata_explore_anc # View(TRYdata_explore_anc)
# Remove a small fraction of the data column workdata <- rtry_remove_col(TRYdata, V28) # Select relevant columns directly workdata <- rtry_select_col(workdata, ObsDataID, ObservationID, AccSpeciesID, AccSpeciesName, ValueKindName, TraitID, TraitName, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, UnitName, OrigObsDataID, ErrorRisk, Comment)
# First identify relevant ancillary data using the rtry_explore function workdata_explore_anc <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, sortBy = TraitID) workdata_explore_anc # View(workdata_explore_anc) # Select all trait records and only the following ancillary data # 59 Latitude # 60 Longitude # 61 Altitude # 6601 Sampling date # 327 Exposition # 413 Plant developmental status / plant age / maturity / plant life stage # 1961 Health status of plants (vitality) # 113 Reference / source workdata <- rtry_select_row(workdata, TraitID > 0 | DataID %in% c(59, 60, 61, 6601, 327, 413, 1961, 113)) workdata # View(workdata) # Double check if all the traits and necessary ancillary data are selected workdata_explore_anc <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, sortBy = TraitID) workdata_explore_anc # View(workdata_explore_anc)
# Save workdata_unfiltered as backup workdata_unfiltered <- workdata
# Load workdata_unfiltered workdata <- workdata_unfiltered
#------------------------------------------------- # Select the rows where DataID is 413, i.e. the data containing the plant development status # Then explore the unique values of the OrigValueStr within the selected data #------------------------------------------------- tmp_unfiltered <- rtry_select_row(workdata, DataID %in% 413) tmp_unfiltered <- rtry_explore(tmp_unfiltered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, Comment, sortBy = OrigValueStr) tmp_unfiltered # View(tmp_unfiltered) #------------------------------------------------- # Criteria # 1. DataID equals to 413 # 2. OrigValueStr equals to "juvenile" or "saplings" #------------------------------------------------- workdata <- rtry_exclude(workdata, (DataID %in% 413) & (OrigValueStr %in% c("juvenile", "saplings")), baseOn = ObservationID) workdata # View(workdata) #------------------------------------------------- # Double check the workdata to ensure the excluding worked as expected # Select the rows where DataID is 413, i.e. the data containing the plant development status # Then explore the unique values of the OrigValueStr within the selected data #------------------------------------------------- tmp_filtered <- rtry_select_row(workdata, DataID %in% 413) tmp_filtered <- rtry_explore(tmp_filtered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, Comment, sortBy = OrigValueStr) tmp_filtered # View(tmp_filtered)
#------------------------------------------------- # To further confirmed if the trait and/or ancillary data were also removed, explore the data # Group the input data based on DataID, DataName, TraitID and TraitName # Then sort the output by TraitID using the sortBy argument #------------------------------------------------- workdata_explore_anc_filtered <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, sortBy = TraitID) workdata_explore_anc_filtered # View(workdata_explore_anc_filtered)
#------------------------------------------------- # Select only the geo-referenced observations, i.e. with DataID 59 Latitude # Set getAncillary to TRUE to obtain (keep) all traits and ancillary data #------------------------------------------------- workdata <- rtry_select_row(workdata, DataID %in% 59, getAncillary = TRUE) #------------------------------------------------- # Explore the selected geo-referenced observations # Select the rows that contain DataID 59, i.e. latitude information # Then explore the unique values of the StdValue within the selected data #------------------------------------------------- tmp_unfiltered <- rtry_select_row(workdata, DataID %in% 59) tmp_unfiltered <- rtry_explore(tmp_unfiltered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, Comment, sortBy = StdValue) tmp_unfiltered # View(tmp_unfiltered) #------------------------------------------------- # Exclude observations using latitude information # Criteria # 1. DataID equals to 59 # 2. StdValue smaller than 40 or NA #------------------------------------------------- workdata <- rtry_exclude(workdata, (DataID %in% 59) & (StdValue < 40 | is.na(StdValue)), baseOn = ObservationID) workdata # View(workdata) #------------------------------------------------- # Double check the workdata to ensure the excluding worked as expected # Select the rows where DataID is 59 (Latitude) # Then explore the unique values of the StdValue within the selected data # Sort the exploration by StdValue #------------------------------------------------- tmp_filtered <- rtry_select_row(workdata, DataID %in% 59) tmp_filtered <- rtry_explore(tmp_filtered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, Comment, sortBy = StdValue) tmp_filtered # View(tmp_filtered)
#------------------------------------------------- # Select only the geo-referenced observations with DataID 60 Longitude # Set getAncillary to TRUE to obtain (keep) all traits and ancillary data #------------------------------------------------- workdata <- rtry_select_row(workdata, DataID %in% 60, getAncillary = TRUE) #------------------------------------------------- # Select the rows that contain DataID 60, i.e. longitude information # Then explore the unique values of the StdValue within the selected data #------------------------------------------------- tmp_unfiltered <- rtry_select_row(workdata, DataID %in% 60) tmp_unfiltered <- rtry_explore(tmp_unfiltered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, Comment, sortBy = StdValue) tmp_unfiltered # View(tmp_unfiltered) #------------------------------------------------- # Exclude observations using longitude information # Criteria # 1. DataID equals to 60 # 2. StdValue smaller than 10 or larger than 60 or NA #------------------------------------------------- workdata <- rtry_exclude(workdata, (DataID %in% 60) & (StdValue < 10 | StdValue > 60 | is.na(StdValue)), baseOn = ObservationID) workdata # View(workdata) #------------------------------------------------- # Double check the workdata to ensure the excluding worked as expected # Select the rows where DataID is 60 (Longitude) # Then explore the unique values of the StdValue within the selected data # Sort the exploration by StdValue #------------------------------------------------- tmp_filtered <- rtry_select_row(workdata, DataID %in% 60) tmp_filtered <- rtry_explore(tmp_filtered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, Comment, sortBy = StdValue) tmp_filtered # View(tmp_filtered)
#------------------------------------------------- # Group the input data based on DataID, DataName, TraitID and TraitName # Then sort the output by TraitID using the sortBy argument #------------------------------------------------- tmp_unfiltered <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, sortBy = TraitID) tmp_unfiltered # View(tmp_unfiltered) #------------------------------------------------- # Criteria # 1. DataID equals to 7222, 7223 or 6598 #------------------------------------------------- workdata <- rtry_exclude(workdata, DataID %in% c(7222, 7223, 6598), baseOn = ObsDataID) workdata # View(workdata) #------------------------------------------------- # Double check the workdata to ensure the excluding worked as expected # Group the input data based on DataID, DataName, TraitID and TraitName # Then sort the output by TraitID using the sortBy argument #------------------------------------------------- tmp_filtered <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, sortBy = TraitID) tmp_filtered # View(tmp_filtered)
StdValue
)#------------------------------------------------- # Select the rows where DataID is 6582, 6583 and 6584, i.e. the data containing the SLA information # Then explore the unique values of the StdValue within the selected data #------------------------------------------------- tmp_unfiltered <- rtry_select_row(workdata, DataID %in% c(6582, 6583, 6584)) tmp_unfiltered <- rtry_explore(tmp_unfiltered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, UnitName, Comment, sortBy = StdValue) tmp_unfiltered # View(tmp_unfiltered) #------------------------------------------------- # Criteria # 1. DataID equals to 6582, 6583 or 6584 # 2. StdValue smaller than 5 #------------------------------------------------- workdata <- rtry_exclude(workdata, (DataID %in% c(6582, 6583, 6584)) & (StdValue < 5), baseOn = ObsDataID) workdata # View(workdata) #------------------------------------------------- # Double check the workdata to ensure the excluding worked as expected # Select the rows where DataID is 6582, 6583 and 6584, i.e. the data containing the SLA information # Then explore the unique values of the StdValue within the selected data #------------------------------------------------- tmp_filtered <- rtry_select_row(workdata, DataID %in% c(6582, 6583, 6584)) tmp_filtered <- rtry_explore(tmp_filtered, DataID, DataName, OriglName, OrigValueStr, OrigUnitStr, StdValue, UnitName, Comment, sortBy = StdValue) tmp_filtered # View(tmp_filtered)
ErrorRisk
)#------------------------------------------------- # Group the input data based on DataID, DataName, TraitID, TraitName and ErrorRisk # Then sort the output by ErrorRisk using the sortBy argument #------------------------------------------------- tmp_unfiltered <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, ErrorRisk, sortBy = ErrorRisk) tmp_unfiltered # View(tmp_unfiltered) #------------------------------------------------- # Criteria # 1. ErrorRisk larger than or equal to 3 #------------------------------------------------- workdata <- rtry_exclude(workdata, ErrorRisk >= 3, baseOn = ObsDataID) workdata # View(workdata) #------------------------------------------------- # Double check the workdata to ensure the excluding worked as expected # Group the input data based on DataID, DataName, TraitID, TraitName and ErrorRisk # Then sort the output by ErrorRisk using the sortBy argument #------------------------------------------------- tmp_filtered <- rtry_explore(workdata, DataID, DataName, TraitID, TraitName, ErrorRisk, sortBy = ErrorRisk) tmp_filtered # View(tmp_filtered)
OrigObsDataID
)#------------------------------------------------- # The TRY database provided a duplicate identifier OrigObsDataID to duplicate entries # This unique identifier is used within the rtry_rm_dup() for duplicates removal # Note: if the column OrigObsDataID has been removed, the function will not work #------------------------------------------------- workdata <- rtry_remove_dup(workdata)
#------------------------------------------------- # Exclude # 1. All entries with "" in TraitID # 2. Potential categorical traits that don't have a StdValue # 3. Traits that have not yet been standardized in TRY # Note: The complete.cases() is used to ensure the cases are complete, # i.e. have no missing values #------------------------------------------------- num_traits <- rtry_select_row(workdata, complete.cases(TraitID) & complete.cases(StdValue)) num_traits # View(num_traits)
# Select the columns for transformation num_traits <- rtry_select_col(num_traits, ObservationID, AccSpeciesID, AccSpeciesName, TraitID, TraitName, StdValue, UnitName) num_traits # View(num_traits)
#------------------------------------------------- # To transform long table into wide table on traits while keeping the ancillary data, # the ancillary data needs to be added manually as additional columns before proceeding. # Take latitude and longitude as example, extract these information from the input data # using the function rtry_select_anc #------------------------------------------------- # Extract the unique value of latitude (DataID 59) and the corresponding ObservationID workdata_lat <- rtry_select_anc(workdata, 59) # Extract the unique value of longitude (DataID 60) and the corresponding ObservationID workdata_lon <- rtry_select_anc(workdata, 60)
#------------------------------------------------- # To merge the extracted ancillary data with the numerical traits # Merge the relevant data frames based on the ObservationID using rtry_join_left (left join) #------------------------------------------------- num_traits_georef <- rtry_join_left(num_traits, workdata_lat, baseOn = ObservationID) num_traits_georef <- rtry_join_left(num_traits_georef, workdata_lon, baseOn = ObservationID) num_traits_georef # View(num_traits_georef)
#------------------------------------------------- # Perform wide table transformation on TraitID, TraitName and UnitName # With cell values to be the mean values calculated for StdValue #------------------------------------------------- num_traits_georef_wider <- rtry_trans_wider(num_traits_georef, names_from = c(TraitID, TraitName, UnitName), values_from = c(StdValue), values_fn = list(StdValue = mean)) num_traits_georef_wider # View(num_traits_georef_wider)
#------------------------------------------------- # Export the data into a CSV file # Note: If the specified output directory does not exist, it will be created automatically. #------------------------------------------------- output_file = file.path(tempdir(), "workdata_wider_traits.csv") rtry_export(num_traits_georef_wider, output_file)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.