R/function_UpstreamGeoData.R
In HYPEtools: Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE

Documented in UpstreamGeoData

#' 
#' Calculate upstream sums and averages of selected GeoData contents
#'
#' Function to calculate upstream sums and averages for selected variables of imported GeoData.txt files. 
#'
#' @param subid Integer vector of SUBIDs for which to calculate upstream properties (must exist in \code{gd}). 
#' If \code{NULL} (default), upstream areas for all SUBIDs will be calculated.
#' @param gd A data frame containing a column with SUBIDs and a column with areas, e.g. an imported 'GeoData.txt' file.
#' @param bd A data frame with bifurcation connections, e.g. an imported 'BranchData.txt' file. Optional argument.
#' @param olake.slc Integer,SLC class number which represents outlet lake fractions. Mandatory for weighted averaging of outlet lake depths. 
#' @param bd.weight Logical, if set to \code{TRUE}, flow weights will be applied for areas upstream of stream bifurcations. See 
#' \code{\link{AllUpstreamSubids}} for further details on flow fraction computation.
#' @param signif.digits Integer, number of significant digits to round upstream variables to. See also \code{\link{signif}}. 
#' Set to \code{NULL} to prevent rounding. 
#' @param progbar Logical, display a progress bar while calculating SLC class fractions. Adds overhead to calculation time but useful 
#' when \code{subid} is \code{NULL} or contains many SUBIDs.
#' 
#' @details
#' \code{UpstreamGeoData} calculates upstream averages or sums of selected variables in a GeoData data frame, including branch connections 
#' in case of stream bifurcations but not including potential irrigation links or groundwater flows. Averages are weighted by sub-catchment area, with 
#' the exception of outlet lake depths and rural household emission concentrations provided in GeoData variables 'lake_depth', 'loc_tn', 
#' and 'loc_tp'. Outlet lake depths are weighted by outlet lake area and the GeoData column with 
#' SLC class fractions for outlet lakes must be provided in function argument \code{col.olake.slc}. Rural household emissions are weighted by 
#' emission volume as provided in column 'loc_vol'. Elevation and slope standard deviations are 
#' averaged if the corresponding mean values exist (sample means are required to calculate overall means of standard deviations).
#' 
#' Currently, the following variables are considered:
#' \describe{
#'   \item{Area-weighted average}{elev_mean, slope_mean, buffer, close_w, latitude, longitude, all SLC classes, lake depths, elev_std, slope_std}
#'   \item{Volume-weighted average}{loc_tn, loc_tp}
#'   \item{Sum}{area, rivlen, loc_vol}
#' }
#' 
#' @return
#' \code{UpstreamGeoData} returns a data frame with the same number of columns as argument \code{gd} and number of rows corresponding to number of 
#' SUBIDs in argument \code{subid}, with updated upstream columns marked with a leading 'UP_' in the column names.
#' 
#' @seealso
#' \code{\link{UpstreamSLCClasses}}
#' \code{\link{SumUpstreamArea}}
#' \code{\link{AllUpstreamSubids}}
#' 
#' @examples
#' te <- ReadGeoData(filename = system.file("demo_model", "GeoData.txt", package = "HYPEtools"))
#' # Upstream stats for domain outlet
#' UpstreamGeoData(subid = OutletSubids(te), gd = te, olake.slc = 1, progbar = FALSE)
#' 
#' @importFrom pbapply pblapply pbsapply
#' @importFrom stats weighted.mean
#' @export

UpstreamGeoData <- function(subid = NULL, gd, bd = NULL, olake.slc = NULL, bd.weight = FALSE, signif.digits = 5, progbar = TRUE) {
  
  # extract column positions of subid and area in gd
  pos.sbd <- which(toupper(names(gd)) == "SUBID")
  pos.area <- which(toupper(names(gd)) == "AREA")
  
  # check if gd contains necessary information
  if (length(pos.sbd) == 0) {
    stop("No SUBID column found in 'gd'. Exiting.")
  }
  if (length(pos.area) == 0) {
    stop("No AREA column found in 'gd'. Exiting.")
  }
  
  # warn if argument specification indicates user confusion
  if (is.null(bd) && bd.weight) {
    warning("Bifurcation weights requested with 'bd.weight' but 'bd' not specified. Ignoring request.")
    bd.weight <- FALSE
  }
  
  # conditional: fill subid vector if not user-provided, otherwise check that all subids are in gd and get row numbers
  if (is.null(subid)) {
    subid <- gd[, pos.sbd]
    row.sbd <- NULL
  } else {
    row.sbd <- match(x = c(subid), table = gd[, pos.sbd])
    if (any(is.na(row.sbd))) {
      stop("At least one SUBID in 'subid' not found in 'gd'. Exiting.")
    }
  }
  
  # safety measure: force type of area to numeric to prevent integer overflow when summing below
  gd[, pos.area] <- as.numeric(gd[, pos.area])
  
  # create vectors of treated geodata column positions for weighted means, sums, weighted standard deviations
  # unknown/untreated columns will be returned unchanged
  pos.wmean <- c(which(tolower(names(gd)) %in% c("elev_mean", "slope_mean", "buffer", "close_w", "latitude", "longitude")), 
                 which(toupper(substr(names(gd), 1, 3)) == "SLC"))
  # lake depths are special, because they will be weighted by lake area, if olake slc is provided by user
  pos.wmean.ldepth <- which(tolower(names(gd)) == "lake_depth")
  pos.wmean.slc.olake <- which(toupper(names(gd)) == paste0("SLC_", olake.slc))
  pos.wmean.lconc <- which(tolower(names(gd))  %in% c("loc_tn", "loc_tp"))
  # loc_vol is extracted two times, first for using it as weight for loc_tn/p, then for summing
  pos.wmean.lvol <- which(tolower(names(gd))  %in% c("loc_vol"))
  pos.sum <- which(tolower(names(gd)) %in% c("area", "rivlen", "loc_vol"))
  pos.wsd.elev <- which(tolower(names(gd)) %in% c("elev_std", "elev_mean"))
  pos.wsd.slope <- which(tolower(names(gd)) %in% c("slope_std", "slope_mean"))
  
  # warn user if upstream lake_depth cannot be calculated
  if (length(pos.wmean.ldepth) == 1 && is.null(olake.slc)) {
    warning("'lake_depth' found in GeoData, but no outlet lake SLC class provided in argument 'olake.slc'. Skipping upstream 'lake_depth'.")
  }
  if (length(pos.wmean.ldepth) == 0 && !is.null(olake.slc)) {
    warning("Outlet lake SLC class provided in argument 'olake.slc', but no 'lake_depth' found in GeoData. Skipping upstream 'lake_depth'.")
  }
  if (length(pos.wmean.ldepth) == 1 && !is.null(olake.slc) && length(pos.wmean.slc.olake) == 0) {
    warning("'lake_depth' found in GeoData, but outlet lake SLC class provided in argument 'olake.slc' does not exist. Skipping upstream 'lake_depth'.")
  }
  
  # warn user if rural household releases cannot be calculated
  if (length(pos.wmean.lconc) >= 1 && is.null(pos.wmean.lvol)) {
    warning("'loc_tn' and/or 'loc_tp' found in GeoData, but no corresponding 'loc_vol'. Skipping upstream rural household releases.")
  }
  
  # get a list of upstream SUBIDs for all SUBIDs in subid
  # conditional: use the progress bar version of lapply if requested by user
  if (progbar) {
    cat("\nFinding upstream SUBIDs.\n")
    up.sbd <- pblapply(subid, function(x, g, b) {AllUpstreamSubids(subid = x, g, b, get.weights = bd.weight)}, g = gd, b = bd)
  } else {
    up.sbd <- lapply(subid, function(x, g, b) {AllUpstreamSubids(subid = x, g, b, get.weights = bd.weight)}, g = gd, b = bd)
  }
  
  ########################
  ### internal functions applied below
  
  # internal function to calculate area-weighted means for one group of upstream areas (one element in up.sbd)
  # x: vector of upstream subids
  WeightedMean <- function(x, g, p.sbd, p.wmean, p.area) {
    
    # extract dataframe with areas and variables in x for which to calculate weighted means
    if (bd.weight) {
      df.wmean <- g[g[, p.sbd] %in% x[, 1], c(p.area, p.wmean)]
      df.wmean[, 1] <- df.wmean[, 1] * x[, 2]
    } else {
      df.wmean <- g[g[, p.sbd] %in% x, c(p.area, p.wmean)]
    }
    
    # averaging only necessary if more than one subid, also avoids NaN result if stddev is 0
    if (nrow(df.wmean) > 1) {
      
      # area-weighted mean of all columns, with exception handling where all weights are 0
      res <- apply(as.data.frame(df.wmean[, -1]), 2, weighted.mean, w = df.wmean[, 1])
      if (all(df.wmean[, 1] == 0)) {
        res[] <- 0
      }
      
      # catch
    } else {
      res <- as.numeric(df.wmean[, -1])
      names(res) <- names(df.wmean)[-1]
    }
    
    return(res)
  }
  
  # internal function to calculate area-weighted standard deviations, different to the above because there are just 2 weighted sd
  # variables, slope_sd and elev_sd, and the column positions are variable-specific combinations of mean and sd (see e.g. pos.wsd.slope)
  # used with sapply below
  WeightedSd   <- function(x, g, p.sbd, p.wsd, p.area) {
    
    # extract dataframe with areas and variables in x for which to calculate weighted stdevs
    if (bd.weight) {
      df.wsd <- g[g[, p.sbd] %in% x[, 1], c(p.area, p.wsd)]
      df.wsd[, 1] <- df.wsd[, 1] * x[, 2]
    } else {
      df.wsd <- g[g[, p.sbd] %in% x, c(p.area, p.wsd)]
    }
    
    # averaging only necessary if more than one subid, also avoids NaN result if stddev is 0
    if (nrow(df.wsd) > 1) {
      
      # area-weighted std.dev. of variable, see also http://stackoverflow.com/questions/9222056/existing-function-to-combine-standard-deviations-in-r
      res <- sqrt(weighted.mean(df.wsd[, 3]^2 + df.wsd[, 2]^2, df.wsd[, 1]) - weighted.mean(df.wsd[, 2], df.wsd[, 1])^2)
      
    } else {
      res <- df.wsd[, 3]
    }
    
    return(res)
  }
  
  # internal function to calculate sums
  Sum <- function(x, g, p.sum, p.sbd) {
    # extract dataframe with areas and variables in x, for which to calculate weighted stdevs
    if (bd.weight) {
      df.sum <- data.frame(g[g[, p.sbd] %in% x[, 1], p.sum])
      for (i in 1:ncol(df.sum)) {
        df.sum[, i] <- df.sum[, i] * x[, 2]
      }
    } else {
      df.sum <- data.frame(g[g[, p.sbd] %in% x, p.sum])
    }
    # columnwise sum
    res <- colSums(df.sum)
    return(res)
  }
  
  ##############################################
  
  

  # apply area-weighted mean function to all SUBIDs in variable 'subid', for all relevant variables
  # conditional: use the progress bar version of sapply if set by function argument
  if (length(pos.wmean) > 0) {
    if (progbar) {
      cat("\nCalculating upstream area-weighted means.\n")
      te <- pbsapply(up.sbd, WeightedMean, g = gd, p.sbd = pos.sbd, p.wmean = pos.wmean, p.area = pos.area)
    } else {
      te <- sapply(up.sbd, WeightedMean, g = gd, p.sbd = pos.sbd, p.wmean = pos.wmean, p.area = pos.area)
    }
    # create result dataframe, conditional on if the was just one variable to be summed, because the apply result is a vector then, not a dataframe..
    if(length(pos.wmean) > 1) {
      up.wmean <- data.frame(SUBID = subid, t(te))
    } else {
      up.wmean <- data.frame(SUBID = subid, te)
      names(up.wmean)[2] <- names(gd)[pos.wmean]
    }
    rm(te)
  } else {
    up.wmean <- NULL
  }
  
  # olake-area weighted mean for lake depths
  if (length(pos.wmean.ldepth) == 1 && length(pos.wmean.slc.olake) == 1) {
    if (progbar) {
      cat("\nCalculating upstream lake-area-weighted lake depths.\n")
      up.wmean.ldepth <- data.frame(SUBID = subid, LAKE_DEPTH = pbsapply(up.sbd, WeightedMean, g = gd, p.sbd = pos.sbd, p.wmean = pos.wmean.ldepth, p.area = pos.wmean.slc.olake))
    } else {
      up.wmean.ldepth <- data.frame(SUBID = subid, LAKE_DEPTH = sapply(up.sbd, WeightedMean, g = gd, p.sbd = pos.sbd, p.wmean = pos.wmean.ldepth, p.area = pos.wmean.slc.olake))
    }
    # if no lakes exist, all weights are 0 and NaN is computed. Replace those with zeros
    up.wmean.ldepth[is.nan(up.wmean.ldepth[, 2]), 2] <- 0
  } else {
    up.wmean.ldepth <- NULL
  }
  
  # volume-weighted rural household concentrations
  if (length(pos.wmean.lconc) >= 1 && !is.null(pos.wmean.lvol)) {
    if (progbar) {
      cat("\nCalculating upstream volume-weighted rural household releases.\n")
      te <- pbsapply(up.sbd, WeightedMean, g = gd, p.sbd = pos.sbd, p.wmean = pos.wmean.lconc, p.area = pos.wmean.lvol)
    } else {
      te <- sapply(up.sbd, WeightedMean, g = gd, p.sbd = pos.sbd, p.wmean = pos.wmean.lconc, p.area = pos.wmean.lvol)
    }
    # create result dataframe, conditional on if the was just one variable to be summed, because the apply result is a vector then, not a dataframe..
    if(length(pos.wmean.lconc) > 1) {
      up.wmean.lconc <- data.frame(SUBID = subid, t(te))
    } else {
      up.wmean.lconc <- data.frame(SUBID = subid, te)
      names(up.wmean.lconc)[2] <- names(gd)[pos.wmean.lconc]
    }
    rm(te)
  } else {
    up.wmean.lconc <- NULL
  }
  
  # apply area-weighted sd function to all SUBIDs in variable 'subid'
  # do separately for slope and elev, if they exist in gd
  if (length(pos.wsd.elev) == 2) {
    if (progbar) {
      cat("\nCalculating upstream area-weighted elevation standard deviations.\n")
      up.wsd.elev <- data.frame(SUBID = subid, ELEV_STD = pbsapply(up.sbd, WeightedSd, g = gd, p.sbd = pos.sbd, p.wsd = pos.wsd.elev, p.area = pos.area))
    } else {
      up.wsd.elev <- data.frame(SUBID = subid, ELEV_STD = sapply(up.sbd, WeightedSd, g = gd, p.sbd = pos.sbd, p.wsd = pos.wsd.elev, p.area = pos.area))
    }
  } else {
    up.wsd.elev <- NULL
  }
  
  if (length(pos.wsd.slope) == 2) {
    if (progbar) {
      cat("\nCalculating upstream area-weighted slope standard deviations.\n")
      up.wsd.slope <- data.frame(SUBID = subid, SLOPE_STD = pbsapply(up.sbd, WeightedSd, g = gd, p.sbd = pos.sbd, p.wsd = pos.wsd.slope, p.area = pos.area))
    } else {
      up.wsd.slope <- data.frame(SUBID = subid, SLOPE_STD = sapply(up.sbd, WeightedSd, g = gd, p.sbd = pos.sbd, p.wsd = pos.wsd.slope, p.area = pos.area))
    }
  } else {
    up.wsd.slope <- NULL
  }
  
  # apply sum function to all SUBIDs in variable 'subid', for all relevant variables
  if (length(pos.sum) > 0) {
    if (progbar) {
      cat("\nCalculating upstream sums.\n")
      te <- pbsapply(up.sbd, Sum, g = gd, p.sum = pos.sum, p.sbd = pos.sbd)
    } else {
      te <- sapply(up.sbd, Sum, g = gd, p.sum = pos.sum, p.sbd = pos.sbd)
    }
    # create result dataframe, conditional on if the was just one variable to be summed, because the apply result is a vector then, not a dataframe..
    if(length(pos.sum) > 1) {
      up.sum <- data.frame(SUBID = subid, t(te))
    } else {
      up.sum <- data.frame(SUBID = subid, te)
      names(up.sum)[2] <- names(gd)[pos.sum]
    }
  } else {
    up.sum <- NULL
  }
  
  
  #######################################################
  
  ## post-processing
  if (progbar) {
    cat("\nPost-processing.\n")
    }
  
  # round to requested number of digits, conditional on existing results for lake_depth and stddev variables
  # the data frame dummy column adding and removing is a workaround for single-column cases (names get messed up because 
  # apply returns a names vector then..)
  if (!is.null(signif.digits)) {
    if (!is.null(up.wmean)) {
      te <- apply(data.frame(1, up.wmean[, -1]), 2, signif, digits = signif.digits)
      # te is a vector for single-subid cases
      if (length(subid) == 1) {
        up.wmean[, -1] <- te[-1]
      } else {
        up.wmean[, -1] <- te[, -1]
      }
    }
    if (!is.null(up.wmean.ldepth)) {
      up.wmean.ldepth[, -1] <- signif(up.wmean.ldepth[, -1], digits = signif.digits)
    }
    if (!is.null(up.wmean.lconc)) {
      te <- apply(data.frame(1, up.wmean.lconc[, -1]), 2, signif, digits = signif.digits)
      # te is a vector for single-subid cases
      if (length(subid) == 1) {
        up.wmean.lconc[, -1] <- te[-1]
      } else {
        up.wmean.lconc[, -1] <- te[, -1]
      }
    }
    if (!is.null(up.wsd.elev)) {
      up.wsd.elev[, -1] <- signif(up.wsd.elev[, -1], digits = signif.digits)
    }
    if (!is.null(up.wsd.slope)) {
      up.wsd.slope[, -1] <- signif(up.wsd.slope[, -1], digits = signif.digits)
    }
    if (!is.null(up.sum)) {
      te <- apply(data.frame(1, up.sum[, -1]), 2, signif, digits = signif.digits)
      if (length(subid) == 1) {
        up.sum[, -1] <- te[-1]
      } else {
        up.sum[, -1] <- te[, -1]
      }
    }
  }
  
  ## copy all upstream calculations to result GeoData, replacing the originals, conditional on presence of argument subid
  # create result dataframe
  if (is.null(row.sbd)) {
    res <- gd
  } else {
    res <- gd[row.sbd, ]
  }
  # update result dataframe with upstream variables
  res[, pos.wmean] <- up.wmean[, -1]
  if (!is.null(up.wmean.ldepth)) {
    res[, pos.wmean.ldepth] <- up.wmean.ldepth[2]
    }
  if (!is.null(up.wmean.lconc)) {
    res[, pos.wmean.lconc] <- up.wmean.lconc[, -1]
    }
  if (!is.null(up.wsd.elev)) {
    res[, which(tolower(names(gd)) == "elev_std")] <- up.wsd.elev[, -1]
    }
  if (!is.null(up.wsd.slope)) {
    res[, which(tolower(names(gd)) == "slope_std")] <- up.wsd.slope[, -1]
    }
  res[, pos.sum] <- up.sum[, -1]
  
  
  # rename upstream variables to clarify they are upstream values
  pos.up <- c(pos.wmean, pos.sum, pos.wmean.ldepth, pos.wmean.lconc, 
              if (length(pos.wsd.elev) == 2) which(tolower(names(gd)) == "elev_std") else NULL, 
              if (length(pos.wsd.slope) == 2) which(tolower(names(gd)) == "slope_std") else NULL)
  names(res)[pos.up] <- paste0("UP_", names(res)[pos.up])
  
  # return result
  return(res)
}
Any scripts or data that you put into this service are public.
HYPEtools documentation built on June 8, 2025, 11:02 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
HYPEtools
Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE

R/function_UpstreamGeoData.R
In HYPEtools: Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE

Defines functions UpstreamGeoData

Documented in UpstreamGeoData

Try the HYPEtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HYPEtools Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE

R/function_UpstreamGeoData.R In HYPEtools: Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE

Defines functions UpstreamGeoData

Documented in UpstreamGeoData

Try the HYPEtools package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

HYPEtools
Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE

R/function_UpstreamGeoData.R
In HYPEtools: Tools for Processing and Analyzing Files from the Hydrological Catchment Model HYPE