#' Mangrove Data Preparation
#'
#' Reads singular excel file with tabs containing data gathered in a plot-method survey of mangrove stands.
#'
#'
#'
#'
#' @param excelpath String where the excel file is located.
#' @param nontabs Number of tabs in excel file which does are not part of the plot data. These tabs must be located at the end of all tabs with necessary data.
#' @param location Name the general location where the survey took place.
#' @param levels List of column names set for varying spatial levels.
#' @param sitename Column name indicating the name of the site. Default is "SITE NAME".
#' @param site Column name indicating the tag or code for the site. Default is "SITE".
#' @param plotnumber Column name indicating the plot tag for each site. Default is "PLOT #".
#' @param plotsize Column name indicating the area of plots. Default is "Plot size".
#' @param species Column name indicating the type of species. Default is "Species".
#' @param height Column name indicating the height measured. Default is "Height (m)". Optional.
#' @param dbh Column name indicating measured diameter at breast height. Default is "DBH (cm)". Optional IF GBH is present.
#' @param gbh Column name indicating measured girth at breast height. Default is "GBH (cm)". Optional IF DBH is present.
#'
#'
#' @return Outputs should include'm.data', a consolidated data frame containing information,
#' and 'm.data.saps', contains observations that fall less than 5 DBH
#'
#' @keywords data preparation
#'
#'
#'
#' @export
# Function to rearrange data
data_prep<- function(excelpath=excelpath,
nontabs=0,
location=NA,
sitename = 'SITE NAME',
site = 'SITE',
plotnumber = 'PLOT #',
plotsize = 'Plot size',
species = 'Species',
gbh = 'GBH (cm)',
dbh = 'DBH (cm)',
height = 'Height (m)',
levels = list())
{
# Defines the `%>%` operator to the current environment
`%>%` <- dplyr::`%>%`
# Declaring number of tabs containing plot data.
tab.lengths<- length(readxl::excel_sheets(excelpath))-nontabs
# Preparing an empty data frame to gather all information from plot tabs.
man_data<- data.frame(sitename = NA, site = NA,
plotnumber = NA, plotsize = NA,
gbh = NA, dbh = NA, height = NA)
colnames(man_data)<- c("SITE NAME", "SITE", "PLOT #", "Plot size", "GBH", "DBH", "Height")
# For loop reading all tabs with plot data.
for (l in 1:tab.lengths) {
d<- readxl::read_excel(excelpath, sheet=l)
## Transforms all data as characters for now
d<- d %>%
dplyr::mutate_all(as.character)
# Compiling data per loop
man_data<- dplyr::bind_rows(d,man_data)
}
# Declares the man_data as a data frame for processing
man_data<- as.data.frame(man_data)
# Load columns if some column names were named different from default
man_data$`SITE NAME` <- man_data[,sitename]
man_data$SITE <- man_data[,site]
man_data$`PLOT #` <- man_data[,plotnumber]
man_data$`Plot size`<- as.numeric(man_data[,plotsize])
if(gbh %in% colnames(man_data)) man_data$GBH<- as.numeric(man_data[,gbh])
if(dbh %in% colnames(man_data)) man_data$DBH<- as.numeric(man_data[,dbh])
if(height %in% colnames(man_data)) man_data$`Height (m)` <- as.numeric(man_data[,height])
## Removes initial empty row
man_data<- man_data %>% tidyr::drop_na(SITE)
## Includes a column with the general location of the sites if indicated
if(!is.na(location)){
man_data$LOCATION <- location
# and add it to cluster levels
levels<- unlist(c("LOCATION", levels))
}
# Row by row evaluation
for (row in 1:nrow(man_data)) {
## Computing for diameter at breast height if DBH is not present
if(is.na(man_data[row,"DBH"])){
# Formula for computing for DBH
man_data[row,"DBH"]<- man_data[row,"GBH"]/pi
## IF both GBH and DBH are not present, function will stop.
if(is.na(man_data[row,"DBH"])){ stop("Blanks detected in both DBH and GBH columns.")}
}
}
# Creates a data frame for those entries with diameter at breast height measurement less than 5cm
man_saps<- subset(man_data, man_data$DBH<5)
# Removes entries containing diameter at breast height measurement that is less than 5cm
man_data<- subset(man_data, man_data$DBH>5)
# Removes all data columns that contain at least one NA/NAN
man_data<- man_data[ , colSums(is.na(man_data)) == 0]
# This will print out the summary results to the console
cat("\n ---------------------------------------------------")
cat("\n Mangrove data preparation summary")
cat("\n ---------------------------------------------------\n\n")
## Prints out column names
cat("Column names: ")
cat("\n ")
print(names(man_data))
## Prints out list of sites
cat("\n List of sites:")
cat("\n ")
print(unique(man_data$`SITE`))
## Prints out list of species
cat("\n List of species observed:")
cat("\n ")
print(unique(man_data$Species))
## Sorts important variables to export. Will vary if Height column is present.
if("Height" %in% colnames(man_data)){
man_data<- man_data[,c(levels,"SITE NAME","SITE","PLOT #", "Plot size", "Species","DBH", "Height (m)")]
man_saps<- man_saps[,c(levels,"SITE NAME","SITE","PLOT #", "Species","DBH")]
}else{
man_data<- man_data[,c(levels,"SITE NAME","SITE","PLOT #", "Plot size", "Species","DBH")]
man_saps<- man_saps[,c(levels,"SITE NAME","SITE","PLOT #", "Species","DBH")]
}
## Outputs back to the global environment a data frame
assign("m.data", man_data, pos = .GlobalEnv)
assign("m.saps", man_saps, pos = .GlobalEnv)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.