In forestgeo/fgeo.analyze: Analyze ForestGEO Data

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/README-"
)

options(fgeo.quiet = TRUE)

Analyze ForestGEO data

fgeo.analyze provides functions to analyze ForestGEO data.

Installation

Install the latest stable version of fgeo.analyze from CRAN with:

install.packages("fgeo.analyze", repos = these_repos)

Install the development version of fgeo.analyze with:

# install.packages("devtools")
devtools::install_github("forestgeo/fgeo.analyze")

Or install all fgeo packages in one step.

Example

library(dplyr)
library(fgeo.x)
library(fgeo.tool)
library(fgeo.analyze)

Abundance

Your data may have multiple stems per treeid and even multiple measures per stemid (if trees have buttresses).

# Trees with buttresses may have multiple measurements of a single stem. 
# Main stems have highest `HOM`, then largest `DBH`.
vft <- tribble(
  ~CensusID, ~TreeID, ~StemID, ~DBH, ~HOM,
          1,     "1",   "1.1",   88,  130,
          1,     "1",   "1.1",   10,  160,  # Main stem
          1,     "2",   "2.1",   20,  130,
          1,     "2",   "2.2",   30,  130,  # Main stem
)

Fundamentally, abundance() counts rows. All of these results are the same:

nrow(vft)
count(vft)
summarize(vft, n = n())
abundance(vft)

But that result is likely not what you expect. Instead, you likely expect this:

summarize(vft, n = n_distinct(TreeID))

As shown above, you can get a correct result by combining summarize() and n_distinct() (from the dplyr package). But abundance() includes some useful additional features (see ?abundance()). This code conveys your intention more clearly, i.e. to calculate tree abundance by counting the number of main stems:

(main_stems <- pick_main_stem(vft))
abundance(main_stems)

If you have data from multiple censuses, then you can compute by census (or any other group).

vft2 <- tribble(
  ~CensusID, ~TreeID, ~StemID, ~DBH, ~HOM,
          1,     "1",   "1.1",   10,  130,
          1,     "1",   "1.2",   20,  130,  # Main stem
          2,     "1",   "1.1",   12,  130,
          2,     "1",   "1.2",   22,  130   # Main stem
)
by_census <- group_by(vft2, CensusID)
(main_stems_by_census <- pick_main_stem(by_census))
abundance(main_stems_by_census)

Often you will need to first subset data (e.g. by status or DBH) and then count.

over20 <- filter(main_stems_by_census, DBH > 20)
abundance(over20)

Basal area

If trees have buttresses, then you may need to pick the main stemid of each stem so you do not count the same stem more than once.

vft3 <- tribble(
  ~CensusID, ~TreeID, ~StemID, ~DBH, ~HOM,
          1,     "1",   "1.1",   88,  130,
          1,     "1",   "1.1",   10,  160,  # Main stem
          1,     "2",   "2.1",   20,  130,
          1,     "2",   "2.2",   30,  130,  # Main stem
          2,     "1",   "1.1",   98,  130,
          2,     "1",   "1.1",   20,  160,  # Main stem
          2,     "2",   "2.1",   30,  130,
          2,     "2",   "2.2",   40,  130,  # Main stem
)
(main_stemids <- pick_main_stemid(vft3))
main_stemids
basal_area(main_stemids)

basal_area() also allows you to compute by groups.

by_census <- group_by(main_stemids, CensusID)
basal_area(by_census)

But if you want to compute on a subset of data, then you need to pick the data first.

ten_to_twenty <- filter(by_census, DBH >= 10, DBH <= 20)
basal_area(ten_to_twenty)

Abundance and basal area aggregated by year

Example data.

vft <- tibble(
  PlotName = c("luq", "luq", "luq", "luq", "luq", "luq", "luq", "luq"),
  CensusID = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
  TreeID = c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L),
  StemID = c(1.1, 1.2, 2.1, 2.2, 1.1, 1.2, 2.1, 2.2),
  Status = c("alive", "dead", "alive", "alive", "alive", "gone",
    "dead", "dead"),
  DBH = c(10L, NA, 20L, 30L, 20L, NA, NA, NA),
  Genus = c("Gn", "Gn", "Gn", "Gn", "Gn", "Gn", "Gn", "Gn"),
  SpeciesName = c("spp", "spp", "spp", "spp", "spp", "spp", "spp", "spp"),
  ExactDate = c("2001-01-01", "2001-01-01", "2001-01-01", "2001-01-01",
    "2002-01-01", "2002-01-01", "2002-01-01",
    "2002-01-01"),
  PlotCensusNumber = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L),
  Family = c("f", "f", "f", "f", "f", "f", "f", "f"),
  Tag = c(1L, 1L, 2L, 2L, 1L, 1L, 2L, 2L),
  HOM = c(130L, 130L, 130L, 130L, 130L, 130L, 130L, 130L)
)

vft

Abundance by year.

abundance_byyr(vft, DBH >= 10, DBH < 20)
abundance_byyr(vft, DBH >= 10)

Basal area by year.

basal_area_byyr(vft, DBH >= 10)

Demography

census1 <- fgeo.x::tree5
census2 <- fgeo.x::tree6

Demography functions output a list that you can convert to a more convenient dataframe with as_tibble().

recruitment_ctfs(census1, census2)

as_tibble(
  recruitment_ctfs(census1, census2, quiet = TRUE)
)

Except if you use split2: This argument creates a complex data structure that as_tibble() cannot handle.

# Errs
as_tibble(
  recruitment_ctfs(
    census1, census2, 
    split1 = census1$sp, 
    split2 = census1$quadrat,  # `as_tibble()` can't handle this
    quiet = TRUE
  )
)

Instead, pass the multiple grouping variables to split via interaction(). This approach allows you to use any number of grouping variables and the output always works with as_tibble().

# Recommended
by_sp_and_quadrat <- interaction(census1$sp, census1$quadrat)

as_tibble(
  recruitment_ctfs(
    census1, census2, 
    split1 = by_sp_and_quadrat, 
    quiet = TRUE
  )
)

The same applies for other demography functions.

as_tibble(
  mortality_ctfs(
    census1, census2, 
    split1 = by_sp_and_quadrat, 
    quiet = TRUE
  )
)

A simple way to separate the grouping variables is with tidyr::separate().

growth <- growth_ctfs(
  census1, census2, 
  split1 = by_sp_and_quadrat, 
  quiet = TRUE
)
as_tibble(growth)

as_tibble(growth) %>% 
  tidyr::separate(groups, into = c("species", "quadrats"))

Species-habitat associations

# Pick alive trees, of 10 mm or more
tree <- download_data("luquillo_tree5_random")
census <- filter(tree, status == "A", dbh >= 10)
# Pick sufficiently abundant species
pick <- filter(add_count(census, sp), n > 50)

# Use your habitat data or create it from elevation data
elevation <- download_data("luquillo_elevation")
habitat <- fgeo_habitat(elevation, gridsize = 20, n = 4)

tt_test_result <- tt_test(pick, habitat)

# A list or matrices
tt_test_result

# A dataframe
as_tibble(tt_test_result)

# A simple summary to help you interpret the results
summary(tt_test_result)