guide/fordataanalysis.R

##*## Backbone script  ##*##

# For a more comprehensive, step-by-step guide,
# kindly read "guide.pdf" located inside the "guide" repository

##### 0. Guides and Installation #####

# Install devtools. Ignor if you have it already
install.packages('devtools')

# Downloads the BUCS package from github.
devtools::install_github('ppcadelina/bucs')


##### 1. DATA PREPARATION #####

## REMEMBER to always load the library whenever running this ##
library(bucs)

# 1.1 Change working directory
## 1.1a Run this to launch directory selector
setwd(choose.dir())

## 1.1b Or manualy change by editting out the text in between the quotes then run.
setwd("D:\\mangrove")

# 1.2 Point where the excel file is.
## 1.2a Run this to launch file selector
excelpath<- file.choose()

# 1.2b Or manually change by editiong out the text in between the quotes then run.
excelpath<-"datatrial.xlsx" ##example using datatrial.xlsx

# 1.3 Input the number of tabs that DOES NOT CONTAIN plot measurement data.
nontabs<- 2 ##example using datatrial.xlsx

# 1.4 Name the general location where the survey took place. Edit ou
location<- "Feelipines" ##example using datatrial.xlsx

# If it is already included in your excel sheet, just include the column name to the
# cluster level list below, and leave the location blank

# 1.5 Indicate columns containing  the spatial information included in your data.
# This will also be basis for different levels of analysis later on.
# Edit out between quotation marks
levels<- c("Province", "Municipality") ##example using datatrial.xlsx


##**## BASE_RUN FUNCTION ##**##
data_prep(excelpath,
          nontabs,
          location,
          levels=levels) #RUN this if you have the exact column names as stated above

# If you have your columns named differently, you can set these within the function.
# FOR our example, the column names do not match with the default. Hence we indicate them here.

##**## RUN FUNCTION ##**##
# Example using "datatrial.xlsx"
data_prep(excelpath = excelpath,
          nontabs = nontabs,
          location = location,
          levels = levels,
          sitename = "Site Name",
          site = "Site",
          plotnumber = "Plot number",
          plotsize = "Plot size",
          species = "Species",
          height = "Height",
          dbh = "DBH",
          gbh = "GBH")


# OUTPUTS:
# 1. 'm.data', main data frame which you can check the first few rows here:
head(m.data)
# 2. 'm.data.saps', observations that fall less than 5 DBH
head(m.saps)

### Optional: Saving the current data frame into a .csv file
write.csv(m.data, file = "m_data.csv", row.names = F) #you may change the file name if you wish
##*## End of STEP 1 ##*##



##### 2. DATA SUMMARY #####
# This portion aims to produce summary outputs for given parameters collected

## Optional: In case you saved the data frame as .csv from step 1,
## you may recall it using this function
m.data<- read.csv("m_data.csv")

# IF you wish to run only one grouping, use this function
group.by<- "Municipality" ##example using datatrial.xlsx

# IF you wish to run multiple groups, just change the elements of the list.
# Remember that this is case sensitive
group.by<- c("LOCATION", "Province") ##example using datatrial.xlsx

##**## RUN FUNCTION ##**##
data_sum(data = m.data, group.by = group.by)

# OUTPUTS include:
# 1. [name of cluster].spe = sites, plot areas, and n species observed
# 2. [name of cluster].pn = individuals observed per species and number of plots that species was observed
# 3. [name of cluster].meas = DBH, Height, and BA summary for all species
# 4. [name of cluster].meas.spread = DBH, Height, and BA summary per species
# 5. [name of cluster].bastem = stand basal area and stem density computations

##*## End of STEP 2 ##*##



##### 3. DIVERSITY INDICES  #####

### 3.1. COMPUTATION ###

## Indicate level of grouping. You can choose to run up to only one level
group.by = "Municipality" ##example using datatrial.xlsx

## OR make a list of levels you want to check
group.by<- c("LOCATION", "Province") ##example using datatrial.xlsx


##**## RUN FUNCTION ##**##
compute_DIV(data = m.data, group.by = group.by)

## OUTPUTS should include:
## 1.[name of cluster].div = a data frame containing the values for diversity indices PER site
head(location.div) ##example using datatrial.xlsx

## 2.[name of cluster].div.sum = a data frame containing the values for diversity indices
head(location.div.sum) ##example using datatrial.xlsx

## OPTIONAL: You can export the data frame into csv files
write.csv(province.div.sum, file = "location_diversitysummary.csv", row.names = F)


### 3.2. VISUALIZATION ###

## you could just run only one
dataframes<- municipality.div

## OR run multiple levels at once (PROVIDED YOU HAVE THE PROPER .div) for each data frame
dataframes<- list(location.div, province.div) ##example using datatrial.xlsx

##**## RUN FUNCTION ##**##
plot_DIV(data = dataframes)

## OUTPUT is an object named `diversityindex`, a box-plot that shows the diversity indices
## computed for each site per cluster levels
plot(diversityindex)

### If you want to export the plot into a file, you may run this function.
### This will export the plot to a .png file
ggplot2::ggsave(filename = "div.png", plot = diversityindex,
                device = function(...) png(..., units = "in", res = 600),
                width = 6, height = 5)

# REMINDER: what you see in the plots window won't necessarily reflect with the exported file.
# I suggest to check the actual file, and adjust the sizes accordingly.

##*## End of STEP 3 ##*##



##### 4. IMPORTANCE VALUE INDEX  #####

### 4.1. COMPUTATION ###

## Indicate level of grouping. You can choose to run up to only one level
group.by = "Municipality" ##example using datatrial.xlsx

## OR make a list of levels you want to check. Example here is Location (entire data) and per province
group.by<- c("LOCATION", "Province") ##example using datatrial.xlsx

##**## RUN FUNCTION ##**##
compute_IVI(data = m.data, group.by = group.by)

## OUTPUTS should include:
## 1.[name of cluster].ivi = a data frame containing the values for importance values per species
head(location.ivi) #example

## 2. and an output in the console

## OPTIONAL: You can export the data frame into csv files
write.csv(location.ivi, file = "location_ivi.csv", row.names = F) #example

### 4.2. VISUALIZATION ###

## Run just one level (.div)
dataframes<- location.ivi ##example using datatrial.xlsx

## You can run multiple levels at once (provided you have the .ivi) data frames
dataframes<- list(location.ivi, province.ivi) ##example using datatrial.xlsx

##**## RUN FUNCTION ##**##
plot_IVI(data = dataframes)


## OUTPUT is an object named `importancevalues`, a horizontal stacked bar graph
## showing RD, RDom, and RF stacked together, with IVI printed at the end
plot(importancevalues)

### If you want to export the plot into a file, you may run this function.
### This will export the plot to a .png file
ggplot2::ggsave(filename = "ivi.png", plot = importancevalues,
                device = function(...) png(..., units = "in", res = 600),
                width = 6, height = 5)

# REMINDER: what you see in the plots window won't necessarily reflect with the exported file.
# I suggest to check the actual file, and adjust the sizes accordingly.

##*## End of STEP 4 ##*##




##### 5. WILDLINGS AND SAPLINGS DATA PREPARATION  ####

## Change this if your data for wildlings and saplings are separated from the main file,

## 5.1a Run this to launch file selector
excelpath<- file.choose()
# 5.1b Or manually change by editiong out the text in between the quotes then run.
excelpath<-"datatrial.xlsx" ##example using datatrial.xlsx

## You may skip this IF you executed this code from STEP 1.

# If you want the output for BOTH saplings and wildlings, execute this code.
sheet<- c("Saplings", "Wildlings")

# If you want just ONE output (either "Wildlings" or "Saplings") replace 'sheet' with
# "Saplings" or "Wildlings" in the function
sheet<- "Saplings"

# Set up to which level of cluster to compare (ONLY ONE)
clustlvl = "Province" ##example using datatrial.xlsx

##**## RUN FUNCTION ##**##
data_sapwld(excelpath, sheet, clustlvl)

# OUTPUTS include:
# 1. `m.saps`, a data frame for "Saplings"
head(m.saps) # Check output for saplings

# and/or 2. `m.wilds`, a data frame for "Wildlings"
head(m.wilds) # Check output for wildlings

### You may save the following data frames (as .csv) using the code below:
write.csv(m.wilds, "wildlings.csv", row.names = F) ## For Wildlings data
write.csv(m.saps, "saplings.csv", row.names = F)   ## For Saplings data

##*## END OF STEP 5 ##*##




##### 6. DBH FREQUENCY TABLE AND FIGURE #####

# Indicate level of grouping. You can only choose one level
cluster = "Province" ##example using datatrial.xlsx

# The default ranges built-in the code is: "5-15", "15-30", ">30"
# You can set your own by editing the list below. Just list down the lowest values.
breaks=c(5,10,15,20) #example here is "5-10", "10-15", "15-20", ">20".

##**## RUN FUNCTION ##**##
mdfrq(data = m.data, cluster = cluster) #RUN this if you're okay with the default ranges

mdfrq(data = m.data, cluster = cluster, breaks = breaks) #RUN this instead if you set another values for ranges


# OUTPUTS should include:
# 1. `[cluster].freq`, a data frame for dbh frequency table for the chosen cluster
head(province.freq) #example, to check

# 2. `[cluster].frqplot`, a simple histogram showing the frequency of trees falling
# under the given ranges of tree diameter.

# You can visualize the histogram by running the function below
plot(province.frqplot)


### If you want to export the plot into a file, you may run this function.
### This will export the plot to a .png file
ggplot2::ggsave(filename = "histogram.png", plot = province.frqplot,
                device = function(...) png(..., units = "in", res = 600),
                width = 6, height = 5)

# REMINDER: what you see in the plots window won't necessarily reflect with the exported file.
# I suggest to check the actual file, and adjust the sizes accordingly.

##*## END OF STEP 6 ##*##
ppcadelina/bucs documentation built on April 4, 2020, 5:52 a.m.