R/data_sum.R

Defines functions data_sum FUN

Documented in data_sum

#' Mangrove Data Summary
#'
#' Output summaries for plot areas, species observation, and tree measurement parameters.
#'
#'
#'
#'
#' @param data Main data frame obtained from \code{\link{data_prep}}.
#' @param group.by String or list to categorize at which spatial level of grouping the data analysis should be.
#'
#'
#' @return OUTPUTS include:
# 1. [name of cluster].spe = sites, plot areas, and n species observed
# 2. [name of cluster].pn = individuals observed per species and number of plots that species was observed
# 3. [name of cluster].meas = DBH, Height, and BA summary for all species
# 4. [name of cluster].meas.spread = DBH, Height, and BA summary per species
# 5. [name of cluster].bastem = stand basal area and stem density computations
#'
#' @keywords mangrove data summary
#'
#'
#'
#'
#' @export

# Function for data summary
data_sum<- function(data = data,
                     group.by = group.by)
  {

  # Declare variables
  x = as.data.frame(data)
  clustlvls = group.by

  # Defines the `%>%` operator to the current environment
  `%>%` <- dplyr::`%>%`

  # Compute for basal areas of each individual
  x$BA <- (x$DBH^2*pi)/40000

  ## A for-loop to process level(s) of clusters
  for (i in 1:length(clustlvls)) {
    print(unique(x[[clustlvls[i]]]))

    # Site location summary
    {
      ## Summarizes data frame by number of plots and plot area per site for all cluster
      s.all<- plyr::ddply(x,
                          plyr::.(x[[clustlvls[i]]], SITE, `PLOT #`),
                          plyr::summarize,
                            `n plots` = dplyr::n_distinct(`PLOT #`),
                            `Plot size` = mean(`Plot size`))

      ## Rename first column
      colnames(s.all)[1]<- clustlvls[i]

      ## Summarizes the data frame by number of sites, plots, and area for all cluster
      s.sum <- plyr::ddply(s.all,
                           plyr::.(s.all[[clustlvls[i]]]),
                           plyr::summarize,
                             `Number of sites` = dplyr::n_distinct(SITE),
                             `Number of plots` = sum(`n plots`),
                             `Plot area (sqm)`= sum(`Plot size`),
                             `Plot area (ha)` = sum(`Plot size`)/10000)

      ## Rename first column
      colnames(s.sum)[1]<- clustlvls[i]

      ## Summarizes number of species observed for all cluster
      s.spe<- plyr::ddply(x,
                          plyr::.(x[[clustlvls[i]]]),
                          plyr::summarize,
                          `Species observed` = dplyr::n_distinct(Species))

      ## Rename first column
      colnames(s.spe)[1]<- clustlvls[i]

      ## Combine two data frames into one
      s.sumspe<- merge(s.sum, s.spe, by=clustlvls[i])

      ## Renames data frame and returns it back to the global environment
      assign(paste(tolower(clustlvls[i]), "spe", sep="."), s.sumspe,
             pos = .GlobalEnv)

    }

    # Plot summary: Determine number of individuals observed per species and
    # number of plots where that particular species was observed
    {
      ## Number of individuals per species observed for the declared cluster
      plot.n<- as.data.frame(x %>% dplyr::count(x[[clustlvls[i]]], Species))

      ## Rename column names
      colnames(plot.n)<- c(clustlvls[i], "Species","Individuals observed")

      ## Compute number of plots where particular species was observed
      plot.p<- as.data.frame(with(x, aggregate(`PLOT #`, list(x[[clustlvls[i]]], Species),
                                               FUN = function(x) length(unique(x)))))
      ## Rename column names
      colnames(plot.p)<- c(clustlvls[i], "Species","Observed in n plots")

      ## Combine two data frames into one
      plot.pn<- merge(plot.p, plot.n, by=c(clustlvls[i],"Species"))

      ## Transforms to long format data
      plot.pn<- reshape::melt(plot.pn, id=c(clustlvls[i], "Species"))

      ## Renames variable column to append cluster names
      plot.pn$variable<- paste(plot.pn[[clustlvls[i]]], plot.pn$variable, sep="_")

      ## Reassembles data frame to wide format so it can be in a nice format
      plot.pn.spread<- reshape::cast(plot.pn, Species~variable, fill="-")

      ## Renames data frame and returns it back to the global environment
      assign(paste(tolower(clustlvls[i]), "pn", sep="."), plot.pn.spread,
             pos = .GlobalEnv)
    }

    # Tree parameter measurements summary for whole cluster
    {
      if("Height (m)" %in% colnames(x)){
        ## Summarizes data frame by given cluster for the following parameters
        meas<- plyr::ddply(x,
                           plyr::.(x[[clustlvls[i]]]),
                           plyr::summarize,

                           ## Diameter at breast height (min, max, mean, and SE)
                           `DBH.min` = round(min(DBH), digits = 2),
                           `DBH.max` = round(max(DBH), digits = 2),
                           `DBH.mean (cm)` = round(mean(DBH), digits = 3),
                           `DBH.se` = round(sd(DBH)/sqrt(length(DBH)), digits=3),

                           ## Vertical tree height (min, max, mean, and SE)
                           `Height.min` = min(`Height (m)`),
                           `Height.max` = max(`Height (m)`),
                           `Height.mean` = round(mean(`Height (m)`), digits = 2),
                           `Height.se` = round(sd(`Height (m)`)/sqrt(length(`Height (m)`)), digits=3),

                           ## Basal areas (min, max, mean, and SE)
                           `BA.min` = round(min(BA), digits = 5),
                           `BA.max` = round(max(BA), digits = 5),
                           `BA.mean` = round(mean(BA), digits = 5),
                           `BA.se` = round(sd(BA)/sqrt(length(BA)), digits=5))

      }else{
        ## Summarizes data frame by given cluster for the following parameters
        meas<- plyr::ddply(x,
                           plyr::.(x[[clustlvls[i]]]),
                           plyr::summarize,

                           ## Diameter at breast height (min, max, mean, and SE)
                           `DBH.min` = round(min(DBH), digits = 2),
                           `DBH.max` = round(max(DBH), digits = 2),
                           `DBH.mean (cm)` = round(mean(DBH), digits = 3),
                           `DBH.se` = round(sd(DBH)/sqrt(length(DBH)), digits=3),

                           ## Basal areas (min, max, mean, and SE)
                           `BA.min` = round(min(BA), digits = 5),
                           `BA.max` = round(max(BA), digits = 5),
                           `BA.mean` = round(mean(BA), digits = 5),
                           `BA.se` = round(sd(BA)/sqrt(length(BA)), digits=5))
      }

      ## Rename first column
      colnames(meas)[1]<- clustlvls[i]

      ## Renames data frame and returns it back to the global environment
      assign(paste(tolower(clustlvls[i]), "meas", sep="."), meas,
             pos = .GlobalEnv)
    }

    # Tree parameter measurements summary for whole cluster per species
    {
      ## Summarizes data frame by given cluster per species for the following parameters

      if("Height (m)" %in% colnames(x)){
        meas.spe<- plyr::ddply(x,
                               plyr::.(x[[clustlvls[i]]], Species),
                               plyr::summarize,

                               ## Diameter at breast height (min, max, mean, and SE)
                               `DBH.min` = round(min(DBH), digits = 2),
                               `DBH.max` = round(max(DBH), digits = 2),
                               `DBH.mean (cm)` = round(mean(DBH), digits = 3),
                               `DBH.se` = round(sd(DBH)/sqrt(length(DBH)), digits=3),

                               ## Vertical tree height (min, max, mean, and SE)
                               `Height.min` = min(`Height (m)`),
                               `Height.max` = max(`Height (m)`),
                               `Height.mean` = round(mean(`Height (m)`), digits = 2),
                               `Height.se` = round(sd(`Height (m)`)/sqrt(length(`Height (m)`)), digits=3),

                               ## Basal areas (min, max, mean, and SE)
                               `BA.min` = round(min(BA), digits = 5),
                               `BA.max` = round(max(BA), digits = 5),
                               `BA.mean` = round(mean(BA), digits = 5),
                               `BA.se` = round(sd(BA)/sqrt(length(BA)), digits=5))

      }else{
        meas.spe<- plyr::ddply(x,
                               plyr::.(x[[clustlvls[i]]], Species),
                               plyr::summarize,

                               ## Diameter at breast height (min, max, mean, and SE)
                               `DBH.min` = round(min(DBH), digits = 2),
                               `DBH.max` = round(max(DBH), digits = 2),
                               `DBH.mean (cm)` = round(mean(DBH), digits = 3),
                               `DBH.se` = round(sd(DBH)/sqrt(length(DBH)), digits=3),

                               ## Basal areas (min, max, mean, and SE)
                               `BA.min` = round(min(BA), digits = 5),
                               `BA.max` = round(max(BA), digits = 5),
                               `BA.mean` = round(mean(BA), digits = 5),
                               `BA.se` = round(sd(BA)/sqrt(length(BA)), digits=5))
      }

      ## Rename first column
      colnames(meas.spe)[1]<- clustlvls[i]

      ## Transforms to long format data
      meas.spe<- reshape::melt(meas.spe, id=c(clustlvls[i], "Species"))

      ## Renames variable column to append cluster names
      meas.spe$variable<- paste(meas.spe[[clustlvls[i]]], meas.spe$variable, sep="_")

      ## Reassembles data frame to wide format so it can be in a nice format
      meas.spe.spread<- reshape::cast(meas.spe, Species~variable, fill="-")

      ## Renames data frame and returns it back to the global environment
      assign(paste(tolower(clustlvls[i]), "meas.spread", sep="."), meas.spe.spread,
             pos = .GlobalEnv)
    }

    # Stem density and Stand Basal Area
    {
      ## Merge number of individuals observed (from plot.n) and plot area (from s.sumspe)
      stem<- merge(plot.n, s.sumspe[,c(1,4,5)], by=clustlvls[i])

      ## Compute for stem density per hectare to the upper whole number
      stem$`Stem density (stems/ha)`<- ceiling((stem$`Individuals observed`*10000)/stem$`Plot area (ha)`)


      ## Summarizes Basal Areas per species for the given cluster
      ba <- plyr::ddply(x, plyr::.(x[[clustlvls[i]]], Species), plyr::summarize,
                           `BA` = sum(BA))

      ## Rename first column
      colnames(ba)[1]<- clustlvls[i]

      ## Merge basal area computations per species with area computations for given cluster
      ba<- merge(ba, s.sumspe[,c(1,4,5)], by=clustlvls[i])

      ## Compute for stand basal area per hectare
      ba$`Stand basal area (m2/ha)`<- round(ba$BA/ba$`Plot area (ha)`, digits = 3)

      ## Merge two data frames (Basal area and Stem density) into one data frame
      bastem<- merge(ba[,c(1,2,6)], stem[,c(1,2,6)], by=c(clustlvls[i],"Species"))

      ## Transforms to long format data
      bastem<- reshape::melt(bastem, id=c(clustlvls[i], "Species"))

      ## Renames variable column to append cluster names
      bastem$variable<- paste(bastem[[clustlvls[i]]], bastem$variable, sep="_")

      ## Reassembles data frame to wide format so it can be in a nice format
      bastem.spread<- reshape::cast(bastem, Species~variable, fill="-")

      ## Renames data frame and returns it back to the global environment
      assign(paste(tolower(clustlvls[i]), "bastem", sep="."), bastem.spread,
             pos = .GlobalEnv)

    }

    }

  }
ppcadelina/bucs documentation built on April 4, 2020, 5:52 a.m.