bucs: Bakawans Under Constrained Survey (BUCS) package

Documented in compute_IVI

#' Mangrove Importance Value Index computation
#'
#' Computes for relative frequencies, relative dominances, and relative densities
#' that is necessary for computing for importance value index per species.
#'
#'
#'
#' @param data Processed data frame obtained from \code{\link{data_prep}}.
#' @param group.by String or list to categorize at which spatial level of grouping the data analysis should be.
#'
#'
#' @return Outputs include
#' 1. [name of cluster].ivi, a data frame containing the values for importance values per species
#' 2. Print out in the console
#'
#' @keywords importance value index, relative density, relative frequency, relative dominance
#'
#'
#'
#'
#' @export

compute_IVI<-function(data = data,
                     group.by = group.by){

  # Set variables
  x <- as.data.frame(data)
  clustlvls <- group.by

  # Defines the `%>%` operator to the current environment
  `%>%` <- dplyr::`%>%`

  # Make a for-loop in case list was supplied.
  for (i in 1:length(clustlvls)) {

    ## 1. RELATIVE FREQUENCY ##
    {

      ### Summarizes how many plots a particular species was observed per cluster ###
      plots.rfrq <- plyr::ddply(x,
                          plyr::.(x[[clustlvls[i]]], Species),
                          plyr::summarize,
                          `n plots` = dplyr::n_distinct(`PLOT #`))

      ### Summarizes total number of plots per cluster ###
      tplot.rfrq <- plyr::ddply(x,
                                plyr::.(x[[clustlvls[i]]]),
                                plyr::summarize,
                                `total plots` = dplyr::n_distinct(`PLOT #`))

      ### merges the data frame for observed
      ntplot.rfrq<- merge(plots.rfrq, tplot.rfrq, by='x[[clustlvls[i]]]')

      ### Computes for frequency for every species
      ntplot.rfrq$freq<- ntplot.rfrq$`n plots`/ntplot.rfrq$`total plots`


      ### Renames the first column based on cluster level
      colnames(ntplot.rfrq)[1]<- clustlvls[i]

      ### Computes for total frequenc(y/ies) per element of cluster
      sums.rfrq <- plyr::ddply(ntplot.rfrq,
                                plyr::.(ntplot.rfrq[[clustlvls[i]]]),
                                plyr::summarize,
                                `total freq` = sum(freq))

      ### Renames the first column based on cluster level
      colnames(sums.rfrq)[1]<- clustlvls[i]

      ### Merges frequency data frame with total frequency
      rfrq<- merge(ntplot.rfrq,sums.rfrq, by=clustlvls[i])

      ### Computes for relative frequency percentage
      rfrq$rel.freq<- round(rfrq$freq/rfrq$`total freq`, digits = 4)*100

    } ## End for relative frequency ##

    ## 2. RELATIVE DOMINANCE ##
    {
      ### Copies the original data frame so it won't mess up the original one
      ba.df <- x

      ### Computes for each basal area for all trees
      ba.df$BA<- (ba.df$DBH^2*pi)/40000

      ### Summarizes the data frame by adding basal areas for each species per cluster level
      sumba.rdom <- plyr::ddply(ba.df,
                               plyr::.(ba.df[[clustlvls[i]]], Species),
                               plyr::summarize,
                               `Sum BA` = sum(BA))

      ### Rename first column
      colnames(sumba.rdom)[1]<- clustlvls[i]

      ### Summarizes the data frame by getting the plot size per site, per cluster
      plotarea.rdom <- plyr::ddply(ba.df,
                                   plyr::.(ba.df[[clustlvls[i]]], SITE, `PLOT #`),
                                   plyr::summarize,
                                   `Plot area` = mean(`Plot size`))

      ### Rename first column
      colnames(plotarea.rdom)[1]<- clustlvls[i]

      ### Computes total plot area size
      tplotarea <- plyr::ddply(plotarea.rdom,
                                    plyr::.(plotarea.rdom[[clustlvls[i]]]),
                                    plyr::summarize,
                                    `Sum plot area` = sum(`Plot area`))

      ### Rename first column
      colnames(tplotarea)[1]<- clustlvls[i]

      ### Merge two data frames for sum basal areas and sum plot areas
      batplot.rdom<- merge(sumba.rdom, tplotarea, by=clustlvls[i])

      ### Compute for dominance for each species per cluster
      batplot.rdom$dom <- batplot.rdom$`Sum BA`/batplot.rdom$`Sum plot area`

      ### Compute for total dominance for each cluster
      sumdoms.rdom <- plyr::ddply(batplot.rdom,
                                    plyr::.(batplot.rdom[[clustlvls[i]]]),
                                    plyr::summarize,
                                    `Total dom` = sum(dom))

      ### Rename first column
      colnames(sumdoms.rdom)[1]<- clustlvls[i]

      ### Merges two data frames for species dominance and sum of dominance per cluster
      rdom<- merge(batplot.rdom,sumdoms.rdom, by=clustlvls[i])

      ### Computes for relative dominance percentage
      rdom$rel.doms<- round(rdom$dom/rdom$`Total dom`, digits = 4)*100

    } ## End for relative dominance ##

    ## 3. RELATIVE DENSITY ##
    {
      ### Counts how many individuals each species was observed in the given subgroup
      sps.rden<-as.data.frame(x %>%
                               dplyr::group_by(x[[clustlvls[i]]], Species) %>%
                               dplyr::tally())

      ### Rename first column
      colnames(sps.rden)[1]<- clustlvls[i]

      ### Merge two data frames of
      spst.rden<- merge(sps.rden, tplotarea, by=clustlvls[i])

      ### Compute
      spst.rden$den<- spst.rden$n/spst.rden$`Sum plot area`


      sumdens.rden <- plyr::ddply(spst.rden,
                                 plyr::.(spst.rden[[clustlvls[i]]]),
                                 plyr::summarize,
                                 `Total den` = sum(den))

      colnames(sumdens.rden)[1]<- clustlvls[i]

      rden<- merge(spst.rden,sumdens.rden, by=clustlvls[i])


      rden$rel.dens<- round(rden$den/rden$`Total den`, digits = 4)*100
    }

    ## 4. IMPORTANCE VALUE INDEX ##
    {
      ### Retains all essential data columns (cluster, species, value) for the three data frames
      rfrq.iv<- rfrq[,c(1,2,7)]
      rdom.iv<- rdom[,c(1,2,7)]
      rden.iv<- rden[,c(1,2,7)]

      ### Merges the three data frames (rfreq, rdoms, rdens) into one
      ivi<- Reduce(function(x,y) merge(x = x, y = y, by = c(clustlvls[i], "Species")),
                             list(rfrq.iv, rdom.iv, rden.iv))

      colnames(ivi)<- c(clustlvls[i], "Species", "RF", "RDom", "RD")

      ### Computes for Importance Value Index
      ivi$IVI<- (ivi$RF + ivi$RDom + ivi$RD)

      ### Transforms data frame into long format
      ivi.melt<- reshape::melt(ivi, id=c(clustlvls[i],"Species"))

      ### Appends cluster name to variable
      ivi.melt$variable<- paste(ivi.melt[[clustlvls[i]]],ivi.melt$variable,sep = "_")

      ### Transforms data frame back into wide format with blank observations filled with "-"
      ivi.spread<- reshape::cast(ivi.melt, Species~variable, fill = "-")

      # This will print the result to the console
      cat("\n Per",tolower(clustlvls[i]),"Importance Value Index: \n")
      cat("\n")
      print.noquote(ivi, row.names=FALSE)

      ## Renames data frame and returns it back to the global environment
      assign(paste(tolower(clustlvls[i]), "ivi", sep="."), ivi.spread,
             pos = .GlobalEnv)
    }

  }

}