Purpose:

The International Soil Carbon Network Database vs 3.1 (ISCN3.1) is a collection of layer-level soil observations pulled together to explain and investigate soil carbon stocks world wide. ISCN 3.0 was published in 2015 (archived with EDI 2022), vs 3.1 addressed some known shortcomings and removed the ISCN inferred soil organic carbon to emphasize fit-for-purpose decisions needed for end use.

There are 29 contributing data sets with 434,119 layer-level observations across 56,926 geo-referenced locations and 10,934 un-referenced locations.

In this document we will present summary statistics for each contributing data set and illustrate how to work with the ISCN3.

library(SOCDRaH2)
library(tidyverse)
library(knitr)
library(ggplot2)
library(ggmap)
library(kableExtra)
dataDir <- '~/Documents/Datasets/ISCN'

knitr::opts_chunk$set(eval=TRUE, echo=FALSE)
#compare and confirm changes that cleaning things up did
ISCN_old <- ISCN3(dataDir=dataDir, orginalFormat = TRUE)

#read in the 'new' clean data set that we are basing these figures on
ISCN3.ls <- ISCN3_3(data_dir = dataDir)
#Create an overall summary of the entire collection.

site_locations <- ISCN3.ls$layer %>%
  #reduce size of dataset by only looking at necessary columns
  select(dataset_name_sub, site_name, #site identifiers
         `lat (dec. deg)`, `long (dec. deg)`) %>% #location information
  #remove duplicate entries of multiple layer observations at a given site
  unique() %>% 
  #create column as flag for lat/long entries to create counting groups that partitions locations into geolocated vs unlocated layers
  mutate(has_lat_long = is.finite(`lat (dec. deg)` + `long (dec. deg)`)) %>% 
  #for each of the datasets, count number of sites that are geolocated vs unlocated
  group_by(dataset_name_sub, has_lat_long) %>%
  tally() %>%
  #create informative column names and move tallies into appropriate columns
  mutate(my_label = if_else(has_lat_long, "geolocated_layer", "unlocated_layer")) %>%
  #remove unneeded column
  select(-has_lat_long) %>%
  #make the table more human readable
  pivot_wider(names_from = my_label, values_from = n) %>% 
  #do the same thing for profile and join
  full_join(ISCN3.ls$profile %>%
              select(dataset_name_sub, site_name, `lat (dec. deg)`, `long (dec. deg)`) %>%
              unique() %>%
              mutate(has_lat_long = is.finite(`lat (dec. deg)` + `long (dec. deg)`)) %>%
              group_by(dataset_name_sub, has_lat_long) %>%
              tally() %>%
              mutate(my_label = if_else(has_lat_long, "geolocated_profile", "unlocated_profile")) %>% 
              select(-has_lat_long) %>%
              pivot_wider(names_from = my_label, values_from = n), by = "dataset_name_sub")

table_summary <- ISCN3.ls$study %>% 
  #reduce size of dataset by only looking at necessary columns
  select(dataset_name, `dataset_type (dataset_type)`, dataset_description) %>% 
  #remove duplicate entries of datasets
  unique() %>% 
  #combine lat/long counts from site_locations table with respective datasets
  right_join(site_locations, by=c('dataset_name' = 'dataset_name_sub')) %>% 
  #create column showing total layer site count
  mutate(layer_site_count = sum(unlocated_layer, geolocated_layer, na.rm = TRUE)) %>% 
    #sort rows in ascending order based on layer site count
    arrange(layer_site_count) 


# sum(table_summary$geolocated_layer, na.rm = TRUE)
# sum(table_summary$unlocated_layer, na.rm = TRUE)

knitr::kable(table_summary, 
  caption = 'Site counts for each contributing dataset submision (dataname_sub) in the profile and layer data table.')

National Cooperative Soil Characterization Database (11 September 2014) (NRCS Sept/2014)

Layer-level soils data of pedons from the NRCS-Soil Survey Lab, National Cooperative Soil Characterization Database (version date: 11 September 2014). Sites are concentrated in the United states with a smaller number of international sites. NRCS Sept/2014 has 373,326 layer-level and 26,096 profile-level observations across 44,896 and 13,016 sites, respectively. Please see @Nrcs2014a and @Nrcs2014b for additional details.

ISCN vs 3 to 3.3 changes

Future issues

#This first data collection will be extensively commented, subsequent data sets will follow similar patterns.

#identify the current dataset
current_dataset <- "NRCS Sept/2014"

study <- ISCN3.ls$study %>% 
  #separate the dataset
  filter(dataset_name == current_dataset) %>%
  #remove columns with no information
  select(where(~any(!is.na(.)))) %>%
  #flip table to make more readable
  t()

profile <- ISCN3.ls$profile %>%
  #separate the data set based on the submission name (dataset_name_sub)
  filter(dataset_name_sub == current_dataset)%>% 
  #remove columns with no information
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>% 
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

##Pull out layers with more then one row
# duplicate_layers <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  # filter(length(layer_name) >= 2)

#random_slice <- layer %>%
#  slice_sample(n=100)

#ordered_slice <- layer[200:240,]

##Check to see if there is different/new information in the layer table that is not in the profile
##This code is left in for manual checks
# profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
# 
# series <- profile_info_layer %>%
#  select(profile_name, soil_taxon, soil_series) %>%
#  unique() %>%
#  anti_join(profile)



# layer_sites <- ISCN3.ls$layer %>% 
# filter(dataset_name_sub == current_dataset) %>%
# select(site_name)
# layer_sites_unique <- unique(layer_sites)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.')) 
#create factor table for layer data
layer %>% select_if(is.factor) %>% 
  #modify to be more readable
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  #count number of entries for all columns
  group_by_all() %>%
  #count how many of each factor there is
  tally() %>% 
  #list the most common first
  arrange(-n) %>%
  #this is over 42000 rows so trimming this down a bit
  slice_head(n = 3) %>% 
  knitr::kable(caption = 'Layer-level catagorical variable counts.')
#if profile data exists, generate factor table with profile data
if(nrow(profile) != 0) { 
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    #count how many of each factor there is
    tally() %>%
    #list the most common first
    arrange(-n) %>%
    #this is over 17000 rows so trimming this down a bit
    slice_head(n = 3) %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts (subset of all catagories).')
}
#pull the unique locations
locations <- layer %>%
  #reduce size of dataset by only looking at necessary columns
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>% 
  #remove duplicate lat/long entries
  unique() %>%
  #remove entries with no information
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`)) 

#if profile data exists, assign locations using profile data instead
if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) { 
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group), fill = 'white', color = 'grey') +
  geom_hex(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), bins = 200)+#, color = 'red', size = 3) +
  scale_fill_gradient(name = 'Site count', trans = 'log10') +
  theme(axis.title = element_blank()) +
  labs(title = current_dataset)
#Layer level histograms and depth plots

#NRCS is so large that we only look at a representative subset here
set.seed(42)
subset_site <- sample(x = unique(c(layer$site_name, profile$site_name)), size = 1000)

#temp <- layer %>%
#  filter(site_name %in% subset_site)


#Again, the dataset is so large that we are subsetting

#pull out bd c and loi
oc_bd <- names(layer %>% select(starts_with('bd'), starts_with('c_tot'), starts_with('loi'), starts_with('oc')))

##oc_bd
plot.df <- layer %>%
         select(dataset_name_sub, site_name, profile_name,
                `layer_top (cm)`, `layer_bot (cm)`,
                all_of(oc_bd)) %>%
         filter(site_name %in% subset_site) %>%
         pivot_longer(cols = where(is.numeric) & one_of(oc_bd), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')

ggplot(data = plot.df %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins = 20) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = plot.df %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable') %>%
         filter(is.finite(depth + measurements))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name)),
            alpha = 0.8) +
  facet_wrap(~variable,scales = 'free')

#pull out p, n, and ph
p_n_ph <- names(layer %>% select(starts_with('p_'), 
                                 starts_with('n_'), starts_with('ph_'),
                                 starts_with('cec'), starts_with('bs'),
                                 starts_with('ecec')))

plot.df <- layer %>%
         select(dataset_name_sub, site_name, profile_name,
                `layer_top (cm)`, `layer_bot (cm)`,
                all_of(p_n_ph)) %>%
         filter(site_name %in% subset_site) %>%
         pivot_longer(cols = where(is.numeric) & one_of(p_n_ph), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')

ggplot(data = plot.df %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins = 20) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = plot.df %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable') %>%
         filter(is.finite(depth + measurements))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name)),
            alpha = 0.8) +
  facet_wrap(~variable,scales = 'free')

#everything else
other <- setdiff(names(layer %>% select(where(is.numeric))), c(oc_bd, p_n_ph,
                                                               "layer_top (cm)", "layer_bot (cm)",
                                                               "lat (dec. deg)", "long (dec. deg)"))
plot.df <- layer %>%
         select(dataset_name_sub, site_name, profile_name,
                `layer_top (cm)`, `layer_bot (cm)`,
                all_of(other)) %>%
         filter(site_name %in% subset_site) %>%
         pivot_longer(cols = where(is.numeric) & one_of(other), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')

ggplot(data = plot.df %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins = 20) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = plot.df %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable') %>%
         filter(is.finite(depth + measurements))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name)),
            alpha = 0.8) +
  facet_wrap(~variable,scales = 'free')

rm(plot.df, oc_bd, other, p_n_ph, subset_site)

Worldwide soil carbon and nitrogen data (Worldwide soil carbon and nitrogen data)

This data set is located in the United States. Worldwide soil carbon and nitrogen data has 21972 layer-level and 7423 profile-level observations across 1930 and 1834 sites, respectively. Please see @Zinke1986 for additional details and if you are using ISCN3 please cite.

ISCN 3 to 3.3

Future issues

#subset the data
current_dataset <- "Worldwide soil carbon and nitrogen data"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_WSCN <- layer %>%
# group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
# filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  arrange(-n) %>%
  slice_head(n = 3) %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    arrange(-n) %>%
    slice_head(n = 3) %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group), fill = 'white', color = 'grey') +
  geom_hex(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), bins = 200)+#, color = 'red', size = 3) +
  scale_fill_gradient(name = 'Site count', trans = 'log10') +
  theme(axis.title = element_blank()) +
  labs(title = current_dataset)

ggplot(data = profile %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)') & !starts_with('layer_'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins=20) + #make a histrogram
  facet_wrap(~variable,scales = 'free') +
  labs(title = 'Profile measurements')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins=20) + #make a histrogram
  facet_wrap(~variable,scales = 'free') +
  labs(title = 'Layer measurements')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')%>%
         filter(is.finite(measurements + depth))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name),
                alpha = 0.5)) +
  facet_wrap(~variable,scales = 'free') +
  labs(title = 'Depth profile')

USGS Site-specific soil C (USGS_S3C)

This data set is located in the United States. USGS_S3C has 10434 layer-level and 1674 profile-level observations across 1068 and 748 geolocated sites, respectively. Please see @Buell2004 for additional details.

ISCN vs 3 to 3.3 changes

Future issues

#subset the data
current_dataset <- "USGS_S3C"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

##Pull out layers with more then one row
# duplicate_layers_usgs <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  arrange(-n) %>%
  slice_head(n=3) %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    arrange(-n) %>%
    slice_head(n=3) %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('state'), aes(x=long, y = lat, group = group), fill = 'white', color = 'grey') +
  geom_hex(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), bins = 200)+#, color = 'red', size = 3) +
  scale_fill_gradient(name = 'Site count', trans = 'log10') +
  theme(axis.title = element_blank()) +
  labs(title = current_dataset)

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = profile %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)') &
                        !starts_with('layer'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins = 20) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable') %>%
         filter(is.finite(measurements + depth))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Alaska Deep Soil Carbon Project (AK DSC Project SOC stock computation)

This data set is located in the United States. AK DSC Project SOC stock computation has 9002 layer-level and 3159 profile-level observations across 743 and 743 sites, respectively. Please see @Johnson2011 for additional details.

ISCN 3 to 3.3 changes

Future issues

Tables and figures

#subset the data
current_dataset <- "AK DSC Project SOC stock computation"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

##Pull out layers with more then one row
# duplicate_layers_akdsc <- layer %>%
 # group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
 # filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  arrange(-n) %>%
  slice_head(n=3) %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    arrange(-n) %>%
    slice_head(n=3) %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('state'), aes(x=long, y = lat, group = group), fill = 'white', color = 'grey') +
  geom_hex(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), bins = 200)+#, color = 'red', size = 3) +
  scale_fill_gradient(name = 'Site count', trans = 'log10') +
  theme(axis.title = element_blank()) +
  labs(title = current_dataset)

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = profile %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)') &
                        !starts_with('layer'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements), bins = 20) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable') %>%
         filter(is.finite(measurements + depth))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Jorgensen_NPS (Jorgensen_NPS)

This data set is located in the United States. Jorgensen_NPS has 2531 layer-level and 0 profile-level observations across 529 and 0 sites, respectively. Please see @Jorgenson2008 for additional details and cite.

ISCN 3 to 3.3 changes

Future issues

#subset the data
current_dataset <- "Jorgensen_NPS"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

##Pull out layers with more then one row
# duplicate_layers_j_nps <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  arrange(-n) %>%
  slice_head(n=3) %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world') %>% filter(subregion == 'Alaska'), 
               aes(x=long, y = lat, group = group), fill = 'white', color = 'grey') +
  geom_hex(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), bins = 20)+#, color = 'red', size = 3) +
  scale_fill_gradient(name = 'Site count', trans = 'log10') +
  xlim(-200, -100)  +
  theme(axis.title = element_blank()) +
  labs(title = current_dataset)

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable') %>%
         filter(is.finite(measurements))) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

# ggplot(data = profile %>%
#          pivot_longer(cols = where(is.numeric) &
#                         !ends_with('(dec. deg)') &
#                         !starts_with('layer'), #pull out the numerics that aren't lat/long
#                       values_to = 'measurements', names_to = 'variable') %>%
#          filter(is.finite(measurements))) +
#   geom_histogram(aes(x = measurements), bins = 20) + #make a histrogram
#  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable') %>%
         filter(is.finite(measurements + depth))) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Northern Circumpolar Soil Carbon Database (Northern Circumpolar Soil Carbon Database (NCSCD))

This data set is located in the United States. Northern Circumpolar Soil Carbon Database (NCSCD) has 1907 layer-level and 1020 profile-level observations across 476 and 471 sites, respectively.

obs

Please see @Hugelius2013 for additional details and if you are using ISCN3 please cite.

#subset the data
current_dataset <- "Northern Circumpolar Soil Carbon Database (NCSCD)"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_ncscd <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >=2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Jorgensen_ARCN (Jorgensen_ARCN)

This data set is located in the United States. Jorgensen_ARCN has 1108 layer-level and 0 profile-level observations across 316 and 0 sites, respectively. Please see @Jorgenson2008 for additional details.

#subset the data
current_dataset <- "Jorgensen_ARCN"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_j_arcn <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Bockheim (Bockheim)

Please see @Bockheim1998, @Bockheim1999, @Bockheim2001, @Bockheim2003 and @Bockheim2004 for additional details. This data set is located in the United States. Bockheim has 1611 layer-level and 46 profile-level observations across 126 and 24 sites, respectively.

#subset the data
current_dataset <- "Bockheim"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_bock <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Permafrost_RCN (Permafrost_RCN)

This data set is located in the United States. Permafrost_RCN has 3696 layer-level and 0 profile-level observations across XX and 0 sites, respectively. Please see @Harden2012 for additional details and if you are using ISCN3 please cite. Cite the individual author if using their data, in addition cite Harden if using that data as part of the broader Permafrost RCN dataset.

#subset the data
current_dataset <- "Permafrost_RCN"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

# locations <- layer %>%
#               select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>%
#   unique() %>%
#   filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_p_rcn <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
# ggplot(locations) +
#   geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
#   geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
#   theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Lehmann_NE_US_soils (Lehmann NE US soils)

This data set is located in the United States. Lehmann NE US soils has 172 layer-level and 0 profile-level observations across 172 and 0 sites, respectively. The Lehmann NE US soils dataset references @Schmidt2000.

#subset the data
current_dataset <- "Lehmann NE US soils"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_leh_ne <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

# ggplot(data = layer %>%
#          pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
#                         !starts_with('layer') & #as long as they aren't layer bounds
#                         !ends_with('(dec. deg)'), #and aren't lat/lon
#                       #create the paired measurement-variable tables
#                       values_to = 'measurements', names_to = 'variable') %>%
#          pivot_longer(cols = c('layer_top (cm)',
#                                'layer_bot (cm)',), #pull out the layers
#                       #create a paired depth-variable
#                       values_to='depth', names_to = 'layer_variable')) +
#   #make nifty line plots that span the layer boundaries
#   geom_line(aes(x = depth, y = measurements,
#                  group = paste(dataset_name_sub, site_name, profile_name))) +
#   facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

None we are aware of

USDA-FS_NRS_LandscapeCarbonInventory (USDA-FS NRS Landscape Carbon Inventory)

This data set is located in the United States. USDA-FS NRS Landscape Carbon Inventory has 1510 layer-level and 0 profile-level observations across 143 and 0 sites, respectively. The USDA-FS NRS Landscape Carbon Inventory dataset references @Cole2013.

#subset the data
current_dataset <- "USDA-FS NRS Landscape Carbon Inventory"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_fs_nrs <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

None we are aware of

Jorgensen_YKDE (Jorgensen_YKDE)

This data set is located in the United States. Jorgensen_YKDE has 697 layer-level and 0 profile-level observations across 60 and 0 sites, respectively. The Jorgensen_YKDE dataset references @Jorgenson2000.

#subset the data
current_dataset <- "Jorgensen_YKDE"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_j_ykde <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

# ggplot(data = layer %>%
#          pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
#                         !starts_with('layer') & #as long as they aren't layer bounds
#                         !ends_with('(dec. deg)'), #and aren't lat/lon
#                       #create the paired measurement-variable tables
#                       values_to = 'measurements', names_to = 'variable') %>%
#          pivot_longer(cols = c('layer_top (cm)',
#                                'layer_bot (cm)',), #pull out the layers
#                       #create a paired depth-variable
#                       values_to='depth', names_to = 'layer_variable')) +
#   #make nifty line plots that span the layer boundaries
#   geom_line(aes(x = depth, y = measurements,
#                  group = paste(dataset_name_sub, site_name, profile_name))) +
#   facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Boby_Mack (Boby_Mack)

This data set is located in the United States. Boby_Mack has 482 layer-level and 0 profile-level observations across 38 and 0 sites, respectively. See @Boby2010 for additional details and if you are using ISCN3 please cite.

#subset the data
current_dataset <- "Boby_Mack"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if (any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){
locations <- profile %>% 
  select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
  bind_rows(locations) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_boby_mack <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if (nrow(profile) != 0){
profile %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Below is a list of know issues that were addressed between ISCN 3 and ISCN 3.3 + Repeated information on soil carbon stocks was removed from the layer and profile information of the data set. + There are no duplicate rows that needed to be removed, however, the soil carbon stock columns gap filled by ISCN were removed by ISCN3.5 cleaning. + The site note in the data set profile was expanded to a formal citation and added to the ISCN bib file. + Note that data recorded as "Unknown" or "?" was left as is and NOT shifted to an NA value. + In profile, the ISCN SOC to 1m computation rows were removed, getting rid of duplicate data

Future issues

None we are aware of

Lehmann_Soil_CBC_1 (Lehmann Soil C&BC #1)

This data set is located in the United States. Lehmann Soil C&BC #1 has 139 layer-level and 0 profile-level observations across 31 and 0 sites, respectively. The Lehmann Soil C&BC #1 dataset references @Schmidt2000.

#subset the data
current_dataset <- "Lehmann Soil C&BC #1"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_leh_cbc <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

None we are aware of

USGS_Harden (USGS Harden)

This data set is located in the United States. USGS Harden has 1203 layer-level and 18 profile-level observations across 23 and 6 sites, respectively. The USGS Harden dataset is references @Harden2008a, @ODonnell2011, @Manies2004, @Harden2008b, @Harden2006 and @Neff2005.

#subset the data
current_dataset <- "USGS Harden"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_usgs_harden <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Bonanza_LTER (Bonanza LTER)

This data set is located in the United States. Bonanza LTER has 583 layer-level and 44 profile-level observations across 20 and 15 sites, respectively. The Bonanza_LTER dataset references @Cleve1993 and @Yarie2013.

#subset the data
current_dataset <- "Bonanza LTER"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- profile %>% 
  select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
  bind_rows(layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

# #Pull out layers with more then one row
# duplicate_layers_bonanza_lter <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

profile %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Profile-level catagorical variable counts.')
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Below is a list of know issues that were addressed between ISCN 3 and ISCN 3.3

Future issues

Lu_LTER(Lu_LTER)

The Lu_LTER data set in ISCN3 contains 103 layer-level observations and 14 profile-level observations after cleaning for ISCN3.5. The Lu_LTER data set references @Ping2000, @Ping2002, @Ping2004, @Ping2005, and @Michaelson1996.

current_dataset <- "Lu_LTER"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_lu_lter <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) > 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Below is a list of know issues that were addressed between ISCN 3 and ISCN 3.3

Future issues

None we are aware of

HeckmanSwanstonBiscuitBurn (Heckman/Swanston Biscuit Burn)

This data set is located in the United States. Heckman/Swanston Biscuit Burn has 28 layer-level and 0 profile-level observations across 13 and 0 sites, respectively. The Heckman/Swanston Biscuit Burn dataset references @Heckman2013.

#subset the data
current_dataset <- "Heckman/Swanston Biscuit Burn"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_heck_swanston <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Kane (Kane)

This data set is located in the United States. Kane has 574 layer-level and 0 profile-level observations across 12 and 0 sites, respectively. Please see @Kane2004, @Kane2005, @Kane2009, and @Valentine2004 for additional details.

#subset the data
current_dataset <- "Kane"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_kane <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Below is a list of know issues that were addressed between ISCN 3 and ISCN 3.3 + Rows weren't removed from ISCN3 because there were no duplicated entries within the layer-level and profile-level information. + Columns were removed from ISCN3 where information was not available throughout the data set. + Unclear information on vegetation remains in the vegclass_local section of the layer-level and profile-level information. + This data set required the use of a map centered around Alaska. + In profile, the ISCN SOC to 1m computation rows were removed, getting rid of duplicate data

Future issues

Lu_PIMA (Lu_PIMA)

This dataset contains 69 layer-level information and 2 profile-level observations of 11 and 2 unique latitude-longitude locations in the layer-level and profile-level tables. The Lu_PIMA data set references @Ping2010.

#subset the data
current_dataset <- "Lu_PIMA"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_lu_pima <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

USGS Harden Yazoo (USGS Harden Yazoo)

This data set is located in the United States. USGS Harden Yazoo has 234 layer-level and 17 profile-level observations across 4 and 3 sites, respectively. The USGS Harden Yazoo dataset references @Harden1999 and @Huntington1998.

#subset the data
current_dataset <- "USGS Harden Yazoo"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_usgs_yazoo <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Vogel (Vogel)

This data set is located in the United States. Vogel has 230 layer-level and 0 profile-level observations across 8 and 0 sites, respectively. The Vogel dataset references @Kane2009, @Vogel2007, and @Vogel2008.

#subset the data
current_dataset <- "Vogel"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if (any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_vogel <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if (nrow(profile) != 0)
{
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Below is a list of know issues that were addressed between ISCN 3 and ISCN 3.3

Future issues

USGS Muhs (USGS Muhs)

This data set is located in the United States. USGS Muhs has 395 layer-level and 0 profile-level observations across 6 and 0 sites, respectively. The USGS Muhs dataset references @Muhs2003.

#subset the data
current_dataset <- "USGS Muhs"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_usgs_muhs <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

None we are aware of

HeckmanLithosequence (Heckman lithosequence)

This data set is located in the United States. Heckman lithosequence has 46 layer-level and 12 profile-level observations across 4 and 4 sites, respectively. The Heckman lithosequence dataset is references @Heckman2009.

#subset the data
current_dataset <- "Heckman lithosequence"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){

  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_heck_lith <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

Schuur (Schuur)

This data set is located in the United States. Schuur has 153 layer-level and 4 profile-level observations across 1 and 1 sites, respectively. The Schuur data set references @Schuur2007.

current_dataset <- "Schuur"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_schuur <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

In profile, the ISCN SOC to 1m computation rows were removed, getting rid of duplicate data.

Future issues

None that we are aware of

Myers_Smith (Myers-Smith)

This data set is located in the United States. Myers-Smith has 231 layer-level and 0 profile-level observations across 1 and 0 sites, respectively. The Myers-Smith dataset references @MyersSmithHarden2007 and @MyersSmithMcGuire2007.

#subset the data
current_dataset <- "Myers-Smith"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_myer_smith <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())


ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

OakRidgeNationalLab_Loblolly_DWJ (Oak Ridge National Lab_Loblolly_DWJ)

The Oak Ridge National Lab_Loblolly_DWJ dataset references @Parr2006. This data set is located in the United States. Oak Ridge National Lab_Loblolly_DWJ has 102 layer-level and 0 profile-level observations across 1 and 0 sites, respectively.

#subset the data
current_dataset <- "Oak Ridge National Lab_Loblolly_DWJ"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_oak_ridge <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

OakRidgeNationalLab_TDE (Oak Ridge National Lab_TDE)

This data set is located in the United States. Oak Ridge National Lab_TDE has 1176 layer-level and 0 profile-level observations across 1 and 0 sites, respectively. The Oak Ridge National Lab_TDE dataset references @Froberg2008.

#subset the data
current_dataset <- "Oak Ridge National Lab_TDE"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>%
              select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)%>% #) %>%
  unique() %>%
  filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)){
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_oak_tde <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

Fixed issues

Future issues

None that we are aware of

University of Michigan Biological Station Forest Accelerated Succession ExperimenT (UMBS_FASET)

This data set is located in the United States. UMBS_FASET has 405 layer-level and 137 profile-level observations across 1 and 1 sites, respectively. The University of Michigan Biological Station Forest Accelerated Succession Experiment (UMBS_FASET) data set is uncited and unpublished.

#subset the data
current_dataset <- "UMBS_FASET"

study <- ISCN3.ls$study %>% 
  filter(dataset_name == current_dataset) %>%
  select(where(~any(!is.na(.)))) %>%
  t()

profile <- ISCN3.ls$profile %>%
  filter(dataset_name_sub == current_dataset)%>%
  select(where(~any(!is.na(.))))

layer <- ISCN3.ls$layer %>%
  filter(dataset_name_sub == current_dataset) %>%
  select(where(~any(!is.na(.))))

locations <- layer %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(layer %>%
                select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`)) %>%

    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))

if(any(ISCN3.ls$profile$dataset_name_sub == current_dataset)) {
  locations <- profile %>% 
    select(`lat (dec. deg)`, `long (dec. deg)`, `datum (datum)`) %>%
    bind_rows(locations) %>%
    unique() %>%
    filter(is.finite(`lat (dec. deg)` + `long (dec. deg)`))
}

# #Pull out layers with more then one row
# duplicate_layers_umbs <- layer %>%
#  group_by(dataset_name_sub, site_name, profile_name, layer_name) %>%
#  filter(length(layer_name) >= 2)

#Check to see if there is different/new information in the layer table that is not in the profile
#profile_info_layer <- layer %>%
#  select(any_of(setdiff(names(profile), c("layer_top (cm)", "layer_bot (cm)", "soc (g cm-2)")))) %>%
#  unique()
#
#series <- profile_info_layer %>% 
#  select(profile_name, soil_taxon, soil_series) %>% 
#  unique() %>%
#  anti_join(profile)
study %>%
  knitr::kable(caption = paste('Citations for the ', current_dataset, ' data set.'))

layer %>% select_if(is.factor) %>%
  pivot_longer(cols = everything(), names_to = 'column_name') %>%
  group_by_all() %>%
  tally() %>%
  knitr::kable(caption = 'Layer-level catagorical variable counts.')

if(nrow(profile) != 0) {
  profile %>% select_if(is.factor) %>%
    pivot_longer(cols = everything(), names_to = 'column_name') %>%
    group_by_all() %>%
    tally() %>%
    knitr::kable(caption = 'Profile-level catagorical variable counts.')
}
ggplot(locations) +
  geom_polygon(data = map_data('world'), aes(x=long, y = lat, group = group)) +
  geom_point(aes(x=`long (dec. deg)`, y = `lat (dec. deg)`), color = 'red', size = 3) +
  theme(axis.title = element_blank())

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) &
                        !ends_with('(dec. deg)'), #pull out the numerics that aren't lat/long
                      values_to = 'measurements', names_to = 'variable')) +
  geom_histogram(aes(x = measurements)) + #make a histrogram
  facet_wrap(~variable,scales = 'free')

ggplot(data = layer %>%
         pivot_longer(cols = where(is.numeric) & #pull out all the numerical colums
                        !starts_with('layer') & #as long as they aren't layer bounds
                        !ends_with('(dec. deg)'), #and aren't lat/lon
                      #create the paired measurement-variable tables
                      values_to = 'measurements', names_to = 'variable') %>%
         pivot_longer(cols = c('layer_top (cm)',
                               'layer_bot (cm)',), #pull out the layers
                      #create a paired depth-variable
                      values_to='depth', names_to = 'layer_variable')) +
  #make nifty line plots that span the layer boundaries
  geom_line(aes(x = depth, y = measurements,
                 group = paste(dataset_name_sub, site_name, profile_name))) +
  facet_wrap(~variable,scales = 'free')

#remove data from the layer that appears in the profile that is not the top/bottom/soc
#select all numerical data, pivot and histogram
#select all factor data and tally

Fixed issues

Below is a list of known issues that were addressed between ISCN 3 and ISCN 3.3 for the University of Michigan Biological Station Forest Accelerated Succession Experiment (UMBS_FASET) data set.

Future issues



ISCN/SOCDRaHR2 documentation built on May 26, 2023, 6:44 a.m.