R/utils.R

Defines functions mod_iter pct_dif st_err coef_det z_trans graph_mat_all graph_matrix density_specific density_all_stands density_summary density_calc density_conv circ_area defined_period_annual_growth nbhd_captured non_na_len tri_opp circ_area segment_area

if(getRversion() >= "2.15.1")  
  utils::globalVariables(c(".", "year", "tree_id", "begin_dif", "end_dif", 
                           "stand_id", "species", "dbh", "annual_growth", 
                           "dbh_diff", "year_diff", "begin_size", "dist", 
                           "abh", "y_azim", "st_convex_hull", "stands_locs_raw", 
                           "gather", "location_id", "deg_min_sec", "conv_unit", 
                           "corner_id", "coord", "dec_deg", "spread", "stand_locs_raw", 
                           "stand_utm", "st_as_sf", "mapview", "stand_polygons"))

#==============================
# Calculate circle segment area
#==============================

segment_area <- function(a, r){
  return(0.5 * (a - sin(a)) * (r ^ 2))
}

#=========================
# Calculate area of circle
#=========================

circ_area <- function(radius){
  pi * (radius ^ 2)
}

#===================================================
# Calculate opposite length of right-angled triangle
#===================================================

tri_opp <- function(hyp, adj){
  return(sqrt((hyp ^ 2) - (adj ^ 2)))
}

#==================================
# Get variable length excluding NAs
#==================================

non_na_len <- function(x){length(na.omit(x))}

#=======================================================
# Calculate proportion of neighborhood captured in stand
#=======================================================

nbhd_captured <- function(coord_data, x_limit, y_limit, radius){
  
  result <- coord_data %>%
    mutate(
      
      # Calculate distance to each stand boundary
      left_dist = x_coord,
      right_dist = x_limit - x_coord,
      bot_dist = y_coord,
      top_dist = y_limit - y_coord,
      
      # Calculate smallest vertical and horizontal distance to stand boundary
      tb_dist = if_else(bot_dist <= top_dist, bot_dist, top_dist),
      lr_dist = if_else(left_dist <= right_dist, left_dist, right_dist),
      
      # For edge trees, calculate angle to sector with external segment
      tb_sec_ang = ifelse(tb_dist < radius, 2 * acos(tb_dist / radius), 0),
      lr_sec_ang = ifelse(lr_dist < radius, 2 * acos(lr_dist / radius), 0),
      
      # For corner trees, calculate angle to largest internal sector
      int_sec_ang = ifelse(tb_dist < radius & lr_dist < radius,
                           1.5 * pi - acos(lr_dist / radius) -
                             acos(tb_dist / radius), 0),
      
      # Calculate areas of all segments defined by sector angles
      tb_seg_area = segment_area(tb_sec_ang, radius),
      lr_seg_area = segment_area(lr_sec_ang, radius),
      int_seg_area = segment_area(int_sec_ang, radius),
      
      # For corner trees, calculate internal area excluding segment
      int_tri_area = ifelse(int_seg_area > 0, 0.5 *
                              (tri_opp(radius, lr_dist) + tb_dist) * 
                              (tri_opp(radius, tb_dist) + lr_dist), 0),
      
      # Calculate total internal area for all trees
      total_int_area = ifelse(int_seg_area > 0, int_seg_area + int_tri_area,
                              circ_area(radius) - (tb_seg_area + lr_seg_area)),
      
      # Convert to proportion of total neighborhood area
      prop_area_inc = total_int_area / circ_area(radius)) %>%
    
    # Remove unneeded columns
    select(-c(left_dist, right_dist, bot_dist, top_dist, tb_dist, lr_dist,
              tb_sec_ang, lr_sec_ang, int_sec_ang, tb_seg_area, lr_seg_area,
              int_seg_area, int_tri_area, total_int_area))
  
  # Return output
  return(result)
  
}

#==============================================
# Calculate annual growth over a defined period
#==============================================

# Function requires a data frame where separate measurements of the same tree
# appear in different rows. The data frame needs to have a column called "tree_id"
# that contains unique tree ID values, a column named "year" containing the year
# of the measurement, and a column named "dbh" containing the dbh measurements. The
# function also requires the user to provide the beginning and end date (as a 4 
# digit year) of the period over which annual growth rates are desired

defined_period_annual_growth <- function(data, begin, end){
  
  # Calculate differences between each measurement and begin and end
  data <- data %>%
    mutate(
      begin_dif = abs(begin - year),
      end_dif = abs(end - year))
  
  # Subset measurements earlier than begin
  before <- data %>%
    filter(year <= begin) %>%
    group_by(tree_id) %>%
    filter(begin_dif == min(begin_dif))
  
  # Subset measurements later than end
  after <- data %>%
    filter(year >= end) %>%
    group_by(tree_id) %>%
    filter(end_dif == min(end_dif))
  
  # Combine subsets
  before_after <- rbind(before, after)
  
  # Calculate annual growth
  period_growth = before_after %>% 
    group_by(tree_id) %>% 
    arrange(year) %>%
    summarize(
      stand_id = stand_id[1],
      species = species[1],
      begin_size = dbh[1],
      annual_growth = (dbh[2] - dbh[1]) / (year[2] - year[1])
    ) %>% 
    filter(!is.na(annual_growth)) %>%
    mutate(size_corr_growth = sqrt(annual_growth / begin_size))
  
  # Return new data frame
  period_growth
}

#======================
# Calculate circle area
#======================

circ_area <- function(radius){
  pi * (radius ^ 2)
}

#================================================
# Calculate densities (m^2/hectare) from abh sums
#================================================

# This function takes abh values and converts them into densities with units
# of m^2/hectare. The formula looks surprisingly simple because it is a 
# simplified version of: abh/10000 * 10000/neighborhood_area. The abh values
# are in cm^2 so must be divided by 10000 to convert to m^2, then to get the 
# density for a hectare we divide a hectare (10000 m^2) by the size of our
# sample area (neighborhood). The 10000s cancel to give this simple formula

density_conv <- function(dens, radius){
  dens / circ_area(radius)
}

#=================================================
# Summarize density by species from abh data frame
#=================================================

# Takes a data frame where each row represents a different tree in the stand
# and each column represents a different set of coordinates for which a summary
# of neighborhood density is desired. In each column, the rows representing
# trees that are not in the neighborhood of that tree should contain NA, while
# other cells contain the size of the tree in that row. There is also an 
# additional column for the species identity of the trees. The function returns
# a data frame where each row represents a set of coordinates and each column
# represents a different summary of density at those coordinates

density_calc <- function(data, radius){
  
  # Create list of species
  sps_list <- sort(unique(data$species))
  
  # Create output data frame
  output <- as.data.frame(matrix(nrow = ncol(data) - 1,
                                 ncol = length(sps_list) + 1))
  colnames(output)[1] <- "all_density"
  for(i in 1:length(sps_list)){
    colnames(output)[i + 1] <- paste(sps_list[i], "density", sep = "_")
  }
  
  if(nrow(output) > 1){
    
    # Add density including all species to output
    output[, "all_density"] <- apply(data[, 2:ncol(data)], 2, sum, na.rm = T)
    
    # Loop through species adding species-specific densities to output
    for(sps in 1:length(sps_list)){
      one_sps <- data %>%
        filter(species == sps_list[sps])
      output[, sps + 1] <- apply(one_sps[, 2:ncol(data)], 2, sum, na.rm = T)
    }
  } else {
    
    # Add density including all species to output
    output[, "all_density"] <- sum(data[, 2], na.rm = T)
    
    # Loop through species adding species-specific densities to output
    for(sps in 1:length(sps_list)){
      one_sps <- data %>%
        filter(species == sps_list[sps])
      output[, sps + 1] <- sum(one_sps[, 2], na.rm = T)
    }
  }
  
  # Convert densities to m^2/hectare
  output <- output %>%
    mutate_all(density_conv, radius)
  
  # Return output
  output
}

#===============================
# Calculate neighborhood density
#===============================

# Takes a set of mapping data and returns a species-specific summary of
# neighborhood density for each tree in the mapping dataset. Required inputs
# are a data frame of mapping data, the name of the stand for which 
# neighborhood densities are to be calculated (to iterate over all stands,
# see "density_all_stands" function which calls "density_summary") entered as
# a character string, and the radius of the desired neighborhood size.

density_summary <- function(mapping, stand, radius){
  
  # Convert species column to factor
  mapping$species <- as.factor(mapping$species)
  
  # Extract relevant coordinates
  coords <- mapping %>%
    filter(stand_id == stand) %>%
    mutate(abh = circ_area(dbh / 2)) %>%
    select(tree_id, species, abh, x_coord, y_coord)
  
  # Create matrix of distances between each tree pair
  dist_mat <- as.matrix(dist(coords[, 4:5], diag = T, upper = T))
  
  # Create matrix where each column contains abh measurements of all trees
  nbhds <- matrix(coords$abh, nrow = nrow(coords), ncol = nrow(coords))
  
  # Change abh values of trees not in each neighborhood to NA
  nbhds[which(dist_mat > radius | dist_mat == 0)] <- NA
  
  # Add species information
  all <-  cbind(coords$species, as.data.frame(nbhds))
  colnames(all)[1] <- "species"
  
  # Summarize density data
  output <- density_calc(all, radius)
  
  # Add tree_id and x/y coordinate columns
  output <- cbind(coords$tree_id, coords$x_coord, coords$y_coord, output)
  colnames(output)[1:3] <- c("tree_id", "x_coord", "y_coord")
  
  # Remove focals whose neighborhood overlaps with stand boundary
  output <- output %>%
    filter(x_coord >= radius & x_coord <= 100 - radius & y_coord >= radius & y_coord <= 100 - radius)
  
  # Return output
  output
}

#======================================================================
# Calculate neighborhood density for all trees in a multi-stand dataset
#======================================================================

density_all_stands <- function(all_stands, radius){
  
  # Identify unique stands in dataset
  stand_ids <- unique(all_stands$stand_id)
  
  # Loop through stands
  for(stand_num in 1:length(stand_ids)){
    
    # If first stand, make the output table
    if(stand_num == 1){
      
      output <- density_summary(all_stands,
                                stand = stand_ids[stand_num],
                                radius = radius)
    } else { 
      
      # If not first stand, append results to those from earlier stands
      new_stand <- density_summary(all_stands,
                                   stand = stand_ids[stand_num],
                                   radius = radius)
      
      output <- bind_rows(output, new_stand)
    }
  }
  
  # Change any NA to 0
  output[, 4:ncol(output)][is.na(output[, 4:ncol(output)])] <- 0
  
  # Return output
  output
}

#=================================================================
# Calculate neighborhood density for any user-provided coordinates
#=================================================================

# Returns neighborhood density for either a single set of coordinates or
# a vector of provided coordinates. By setting the focal_coords argument
# equal to "grid" you can also return estimates of density at every
# intersection of a 5 m grid. Required inputs are a mapping data frame,
# a stand name (as character string), neighborhood radius, and the
# coordinates for which density measurements are desired (entered as a 
# data frame with two columns: x_coord, and y_coord)

density_specific <- function(mapping, stand, radius, focal_coords){
  
  # Extract relevant tree data
  coords <- mapping %>%
    filter(stand_id == stand) %>%
    mutate(abh = circ_area(dbh / 2)) %>%
    select(species, abh, x_coord, y_coord)
  
  if(any(focal_coords == "grid")){
    
    x_coord <- rep(seq(0, 100, 5), each = 21)
    y_coord <- rep(seq(0, 100, 5), times = 21)
    input_coords <- cbind(x_coord, y_coord)
  } else {
    
    input_coords <- focal_coords
  }
  
  # Create matrix of distances between each focal point and each tree
  all_coords <- rbind(coords[, 3:4], input_coords) 
  dist_mat <- as.matrix(dist(all_coords, diag = T, upper = T))
  dist_mat <- dist_mat[1:nrow(coords), (nrow(coords) + 1):ncol(dist_mat)]
  
  # Create matrix where each column contains abh measurements of all trees
  nbhds <- matrix(coords$abh, nrow = nrow(coords), ncol = nrow(input_coords))
  
  # Change abh values of trees not in each neighborhood to NA
  nbhds[which(dist_mat > radius)] <- NA
  
  # Add species information
  all <-  cbind(coords$species, as.data.frame(nbhds))
  colnames(all)[1] <- "species"
  
  # Summarize density data
  output <- density_calc(all, radius)
  
  # Add input coordinates to output
  output <- cbind(input_coords, output)
  
  # Return output
  output
}

#==========================================================================
# Generate matrix from mapping data with one row for every interacting pair
#==========================================================================

graph_matrix <- function(mapping, stand, radius){
  
  # Determine if stand exists
  if(stand %in% mapping$stand_id == F){
    print("Error: stand ID not found in mapping data")
  } else {
    
    # Convert species column to factor
    mapping$species <- as.factor(mapping$species)
    
    # Subset to focal stand
    one_stand <- mapping %>%
      filter(stand_id == stand) %>%
      mutate(abh = circ_area(dbh / 2)) %>%
      select(tree_id, stand_id, species, dbh, abh, size_cat, x_coord, y_coord)
    
    # Create distance, species, and size matrices
    dist_mat <- as.matrix(dist(one_stand[, c("x_coord", "y_coord")], diag = T, upper = T))
    sps_mat <- matrix(one_stand$species, nrow = nrow(one_stand), ncol = nrow(one_stand))
    size_mat_dens <- matrix(one_stand$abh, nrow = nrow(one_stand), ncol = nrow(one_stand))
    size_mat <- matrix(one_stand$dbh, nrow = nrow(one_stand), ncol = nrow(one_stand))
    cat_mat <- matrix(one_stand$size_cat, nrow = nrow(one_stand), ncol = nrow(one_stand))
    
    # Convert values of non-competitors to NA
    non_comp <- which(dist_mat > radius | dist_mat == 0)
    dist_mat[non_comp] <- NA
    sps_mat[non_comp] <- NA
    size_mat_dens[non_comp] <- NA
    size_mat[non_comp] <- NA
    cat_mat[non_comp] <- NA
    
    # Create dist, species and size vectors not including NAs
    prox <- dist_mat[!is.na(dist_mat)]
    sps_comp <- sps_mat[!is.na(sps_mat)]
    size_comp <- size_mat[!is.na(size_mat)]
    size_cat_comp <- cat_mat[!is.na(cat_mat)]
    comp_dat <- data.frame(prox, sps_comp, size_comp, size_cat_comp)
    
    # Get vector of how many rows each focal tree should have
    repeats <- unname(apply(dist_mat, 2, non_na_len))
    
    # Create data frame with repeated rows
    all_rows <- one_stand[rep(1:nrow(one_stand), repeats), ]
    
    # Combine focal and comp data
    all_dat <- cbind(all_rows, comp_dat)
    
    # Add species information
    nbhds <-  cbind(one_stand$species, as.data.frame(size_mat_dens))
    colnames(nbhds)[1] <- "species"
    
    # Summarize density data
    dens <- density_calc(nbhds, radius)
    
    # Add tree_id column
    dens <- cbind(one_stand$tree_id, dens)
    colnames(dens)[1] <- "tree_id"
    dens$tree_id <- as.character(dens$tree_id)
    
    # Add density information to all_dat
    output <- left_join(all_dat, dens)
    
    # Return output
    output
  }
}

#=================================
# Apply graph_matrix to all stands
#=================================

graph_mat_all <- function(all_stands, radius){
  
  # Identify unique stands in dataset
  stand_ids <- unique(all_stands$stand_id)
  
  # Loop through stands
  for(stand_num in 1:length(stand_ids)){
    
    # If first stand, make the output table
    if(stand_num == 1){
      
      output <- graph_matrix(all_stands,
                                stand = stand_ids[stand_num],
                                radius = radius)
    } else { 
      
      # If not first stand, append results to those from earlier stands
      new_stand <- graph_matrix(all_stands,
                                   stand = stand_ids[stand_num],
                                   radius = radius)
      
      output <- bind_rows(output, new_stand)
    }
  }
  
  # Replace NA densities with zero
  dens_cols <- grep("density", names(output))
  output[, dens_cols][is.na(output[, grep("density", names(output))])] <- 0
  
  # Return output
  output
}

#===============================
# Standardize values in a matrix
#===============================

z_trans <- function(x){(x - mean(x)) / sd(x)}

#=======================================
# Calculate coefficient of determination
#=======================================

coef_det <- function(x){
  1 - (sum((x$observations - x$predictions)^2) / 
         sum((x$observations - mean(x$observations))^2))
}

#=========================
# Calculate standard error
#=========================

st_err <- function(x){sd(x, na.rm = T) / sqrt(length(na.omit(x)))}

#============================================================================
# Calculate percentage of values in a vector greater or less than a threshold
#============================================================================

pct_dif <- function(x, direction, value){
  if(direction == "greater"){
    sum(x > value) * 100 / length(x)
  } else if(direction == "less"){
    sum(x < value) * 100 / length(x)
  } else {
    print("direction input not recognized")
  }
}


#============================================
# Fit regularized regression model many times
#============================================

# This function takes as input a design matrix already formatted for the 
# regularized regression model, a data frame of the tree growth and neighborhood
# data used to build the design matrix, and the desired number of runs for the
# model. It fits the model this many times and outputs a matrix containing the
# fitted parameter values, coefficient of determination for the training data, 
# and the cross-validated mean square error for each model run. The function 
# also keeps track of the best model (lowest MSE) and includes this model object
# in the list output

mod_iter <- function(des_mat, growth_dat, runs){
  
  # Create output table
  results <- matrix(NA, nrow = runs, ncol = ncol(des_mat) + 3)
  
  # Iterate model fitting process
  for(i in 1:runs){
    
    # Fit model
    mod <- cv.glmnet(des_mat, y = growth_dat$size_corr_growth, family = "gaussian")
    
    # Store model coefficients
    results[i, 1:(ncol(des_mat) + 1)] <- as.vector(as.matrix(coef(mod)))
    
    # Make data frame of growth predictions and observations
    int_pred <- predict(mod, newx = des_mat, s = "lambda.1se")
    int_obs <- growth_dat %>% select(tree_id, size_corr_growth)
    comparison <- cbind(int_obs, int_pred)
    colnames(comparison)[2:3] <- c("obs", "pred")
    comparison <- comparison %>%
      group_by(tree_id) %>%
      summarize(observations = obs[1],
                predictions = mean(pred))
    
    # Store coefficient of determination
    results[i, ncol(des_mat) + 2] <- coef_det(comparison)
    
    # Store mean square error of 1se model
    results[i, ncol(des_mat) + 3] <- mod$cvm[mod$lambda == mod$lambda.1se]
    
    # Remove added intercept column and name remaining columns
    colnames(results) <- c(rownames(coef(mod)), "R_square", "MSE")
    
    # Store/update best model
    if(i == 1){
      best_mod <- mod
    } else if(mod$cvm[mod$lambda == mod$lambda.1se] < 
              best_mod$cvm[best_mod$lambda == best_mod$lambda.1se]){
      best_mod <- mod
    }
    
  }
  
  # Convert results to data frame and output results table and best mod
  results <- as.data.frame(results)
  output <- list(results, best_mod)
  
}
sgraham9319/ForestPlotR documentation built on April 15, 2022, 4:01 p.m.