R/ms_conversions.R

Defines functions ms_conversions

Documented in ms_conversions

#' Convert MacroSheds concentration units and molecular representations
#'
#' MacroSheds concentration data are represented in either mass, moles, or equivalents
#' per liter (mg/L, mol/L, or uEq/L, respectively, but note that one variable, "ANC" is represented by default in Eq/L). Use this function to convert among
#' these units. You may also use this function to convert SI prefixes, e.g. mg/L to kg/L,
#' or to convert between molecular and atomic representations for some elements and compounds.
#' For example, nitrate (NO3) can be converted to nitrate-N and vice versa.
#'
#' @author Spencer Rhea
#' @author Mike Vlah, \email{vlahm13@@gmail.com}
#' @author Wes Slaughter
#' @param d \code{data.frame}. A \code{data.frame} in MacroSheds format (see details),
#'    containing stream_chemistry or precip_chemistry data. A \code{tibble} in
#'    MacroSheds format is generated by [ms_load_product()].
#' @param convert_units_from (named) character vector.
#'    If named, names are variable_codes and values are the units of those variables (see [ms_load_variables()]).
#'    Omit variables that don't need to be converted. Or provide a single unit as an unnamed character if all
#'    variables are in the same unit.
#' @param convert_units_to (named) character vector. If named, names are
#'    variable_codes and values are the units those variables should be converted to (see [ms_load_variables()]).
#'    Omit variables that don't need to be converted. Or provide a single unit as an unnamed character if all
#'    variables will be converted to the same unit.
#' @param convert_molecules character vector. Molecules that will be
#'    converted according to the atomic mass of their main constituent. For example,
#'    this can be used to convert NO3-N to NO3, or NO3 to NO3-N. See details.
#' @return returns a \code{tibble} in MacroSheds format, containing concentration data converted to new units.
#' @details In MacroSheds, concentrations of the following molecules are represented
#'    according to the atomic mass of their primary constituent atom: NO3, NO2, NO3_NO2 (variable code for NO3 + NO2), NH4, NH3, NH3_NH4,
#'    SO4, SiO2, SiO3, PO4, orthophosphate. For example, if MacroSheds retrieved
#'    SiO2 concentration in mg/L from some domain, we converted it to mg/L SiO2-Si (coded as "SiO2_Si").
#'    You could use this function to convert it back to SiO2 by setting convert_molecules = 'SiO2_Si'.
#'    You could then convert it to SiO2-Si again by setting convert_molecules = 'SiO2. See examples
#'    for more.
#'
#'    Default units for all variables can be seen via [ms_load_variables()].
#'
#'    MacroSheds format (only var and val columns required as inputs to this function):
#' | header value  | column_definition |
#' | ------------- | ----------------- |
#' | date          | Date in YYYY-mm-dd |
#' | site_code     | A unique identifier for each MacroSheds site, identical to primary source site code where possible. See [ms_load_sites()]. |
#' | grab_sample   | Boolean integer indicating whether the observation was obtained via grab sample or installed sensor. 1 = TRUE (grab sample), 0 = FALSE (installed sensor). |
#' | var           | Variable code. See [ms_load_variables()]. |
#' | val           | Data value. See [ms_load_variables()] for units. |
#' | ms_status     | Boolean integer. 0 = clean value. 1 = questionable value. See "Technical Validation" section of [the MacroSheds data paper](https://aslopubs.onlinelibrary.wiley.com/doi/full/10.1002/lol2.10325) for details. |
#' | ms_interp     | Boolean integer. 0 = measured or imputed by primary source. 1 = interpolated by MacroSheds. See "Temporal Imputation and Aggregation" section of [the MacroSheds data paper](https://aslopubs.onlinelibrary.wiley.com/doi/full/10.1002/lol2.10325) for details. |
#' | val_err       | The combined standard uncertainty associated with the corresponding data point, if estimable. See "Detection Limits and Propagation of Uncertainty" section of [the MacroSheds data paper](https://aslopubs.onlinelibrary.wiley.com/doi/full/10.1002/lol2.10325) for details. |
#' @seealso [ms_load_product()], [ms_load_variables()]
#' @export
#' @examples
#' ms_root = 'data/macrosheds'
#' ms_download_core_data(macrosheds_root = ms_root,
#'                       domains = 'hbef')
#' d <- ms_load_product(macrosheds_root = ms_root,
#'                      prodname = 'stream_chemistry',
#'                      domains = 'hbef',
#'                      filter_vars = c('NO3_N', 'Na', 'Mg', 'SO4_S'))
#'
#' # Convert all variables from mg/L to ug/L (micrograms per liter)
#' converted_data <- ms_conversions(d = d,
#'                                  convert_units_from = 'mg/l',
#'                                  convert_units_to = 'ug/l')
#'
#' # Convert mg/L NO3-N to mg/L NO3
#' converted_data <- ms_conversions(d = d,
#'                                  convert_units_from = 'mg/l',
#'                                  convert_units_to = 'mg/l',
#'                                  convert_molecules = 'NO3_N')
#'
#' # Convert from mg/L to mmol/L
#' converted_data <- ms_conversions(d = d,
#'                                  convert_units_from = 'mg/l',
#'                                  convert_units_to = 'mmol/l')
#'
#' # Convert from mg/L to mEq/L
#' converted_data <- ms_conversions(d = d,
#'                                  convert_units_from = 'mg/l',
#'                                  convert_units_to = 'meq/l')
#'
#' # Convert variables to different units
#' converted_data <- ms_conversions(d = d,
#'                                  convert_units_from = c('NO3_N' = 'mg/l',
#'                                                         'Na' = 'mg/l',
#'                                                         'Mg' = 'mg/l',
#'                                                         'SO4_S' = 'mg/l'),
#'                                  convert_units_to = c('NO3_N' = 'meq/l',
#'                                                       'Na' = 'ug/l',
#'                                                       'Mg' = 'umol/l',
#'                                                       'SO4_S' = 'g/l'))

ms_conversions <- function(d,
                           convert_units_from = 'mg/l',
                           convert_units_to,
                           convert_molecules){

    library("dplyr", quietly = TRUE)

    requireNamespace('macrosheds', quietly = TRUE)

    ms_vars <- macrosheds::ms_vars_ts %>%
        filter(unit != 'kg/ha/d') %>%
        dplyr::select(variable_code, unit, molecule, valence) %>%
        distinct()

    #checks
    cm <- ! missing(convert_molecules)
    cuF <- ! missing(convert_units_from) && ! is.null(convert_units_from)
    cuT <- ! missing(convert_units_to) && ! is.null(convert_units_to)

    if(sum(cuF, cuT) == 1){
        stop('convert_units_from and convert_units_to must be supplied together')
    }
    if(length(convert_units_from) != length(convert_units_to)){
        stop('convert_units_from and convert_units_to must have the same length')
    }

    vars <- ms_drop_var_prefix_(d$var)

    if(any(! vars %in% ms_vars$variable_code)){
        not_ms_var <- unique(vars[!vars %in% ms_vars$variable_code])
        stop('Unrecognized variable(s): ', paste0(paste(not_ms_var, collapse = ', ')))
    }

    if(any(duplicated(names(convert_units_from)))){
        stop('duplicated names in convert_units_from')
    }
    if(any(duplicated(names(convert_units_to)))){
        stop('Duplicated names in convert_units_to')
    }

    vars_convertable <- ms_vars %>%
        filter(variable_code %in% !!vars) %>%
        pull(unit) %>%
        tolower()

    if(length(convert_units_from) == 1 && length(convert_units_to) == 1){
        if(! all(vars_convertable == 'mg/l')){
            stop('Unable to convert non-concentration variables.')
        }
    } else {
        if(! all(vars %in% names(convert_units_from)) || ! all(vars %in% names(convert_units_to))){
            stop('When specifying individual variable conversions, all variables in `d` must be accounted for.')
        }
        cu_shared_names <- base::intersect(names(convert_units_from),
                                           names(convert_units_to))

        if(length(cu_shared_names) != length(convert_units_to)){
            stop('Names of convert_units_from and convert_units_to must match.')
        }
    }

    if(any(grepl('M', c(convert_units_from, convert_units_to)))){
        stop('Specify molarity as "mol", rather than "M"')
    }
    if(any(grepl('moles', c(convert_units_from, convert_units_to)))){
        stop('Specify molarity as "mol", rather than "moles"')
    }

    convert_units_from <- tolower(convert_units_from)
    convert_units_to <- tolower(convert_units_to)

    whole_molecule <- c('NO3', 'NO2', 'SO4', 'PO4', 'SiO2', 'SiO3', 'NH4', 'NH3',
                        'NO3_NO2', 'NH3_NH4', 'orthophosphate')
    element_molecule <- c('NO3_N', 'NO2_N', 'SO4_S', 'PO4_P', 'SiO2_S', 'SiO3_S', 'NH4_N',
                          'NH3_N', 'NO3_NO2_N', 'NH3_NH4_N', 'orthophosphate_P')

    if(cm){
        whole_to_element <- grep(paste0(paste0('^', convert_molecules, '$'), collapse = '|'),
                                 whole_molecule)
        element_to_whole <- grep(paste0(paste0('^', convert_molecules, '$'), collapse = '|'),
                                 element_molecule)

        if(length(element_to_whole) == 0 && length(whole_to_element) == 0){
            stop(paste0('convert_molecules must be one of: ', paste(whole_molecule, collapse = ' '),
                        ' or: ', paste(element_molecule, collapse = ' ')))
        }
    } else {
        convert_molecules <- NULL
    }

    using_conv_mol_wrong <- ! convert_molecules %in% drop_var_prefix(unique(d$var))
    if(any(using_conv_mol_wrong)){
        these_ones_questionable <- convert_molecules[using_conv_mol_wrong]
        warning('These variables are not in `d$var`: ',
                paste(these_ones_questionable, collapse = ', '),
                ".\n  Note that convert_molecules is used to specify forms you're converting *from*")
    }

    molecular_conversion_map <- list(
        NH4 = 'N',
        NO3 = 'N',
        NO2 = 'N',
        NH3 = 'N',
        NH3_NH4 = 'N',
        SiO2 = 'Si',
        SiO3 = 'Si',
        SO4 = 'S',
        PO4 = 'P',
        orthophosphate = 'P',
        NO3_NO2 = 'N'
    )

    # handle molecular conversions, like NO3 -> NO3_N
    if(cm && length(whole_to_element) > 0){
        convert_molecules_element <-  whole_molecule[whole_to_element]
        for(v in 1:length(convert_molecules_element)){

            molecule_real <- ms_vars %>%
                filter(variable_code == !!convert_molecules_element[v]) %>%
                pull(molecule)

            if(is.na(molecule_real)){
                molecule_real <- convert_molecules_element[v]
            }

            d$val[vars == convert_molecules_element[v]] <-
                convert_molecule(x = d$val[vars == convert_molecules_element[v]],
                                 from = molecule_real,
                                 to = unname(molecular_conversion_map[v]))

            check_double <- stringr::str_split_fixed(unname(molecular_conversion_map[v]), '', n = Inf)[1, ]

            if(length(check_double) > 1 && length(unique(check_double)) == 1){
                molecular_conversion_map[v] <- unique(check_double)
            }

            new_name <- paste0(d$var[vars == convert_molecules_element[v]], '_',
                               unname(molecular_conversion_map[v]))
            d$var[vars == convert_molecules_element[v]] <- new_name
        }
    }

    # handle molecular conversions, like NO3_N -> NO3
    if(cm && length(element_to_whole) > 0){
        convert_molecules_element <-  element_molecule[element_to_whole]
        for(v in 1:length(convert_molecules_element)){

            molecule_real <- ms_vars %>%
                filter(variable_code == !!convert_molecules_element[v]) %>%
                pull(molecule)

            if(is.na(molecule_real)){
                molecule_real <- convert_molecules_element[v]
            }

            to_molec <- whole_molecule[element_to_whole[v]]
            if(to_molec == 'orthophosphate') to_molec <- 'PO4'

            d$val[vars == convert_molecules_element[v]] <-
                convert_molecule(x = d$val[vars == convert_molecules_element[v]],
                                 from = molecule_real,
                                 to = to_molec)

            old_var <- unique(d$var[vars == convert_molecules_element[v]])
            new_name <- substr(d$var[vars == convert_molecules_element[v]], 0, nchar(old_var) - 2)

            d$var[vars == convert_molecules_element[v]] <- new_name
        }
    }

    # Turn a single input into a named vector with all variables in dataframe
    if(length(convert_units_from) == 1){
        all_vars <- unique(vars)
        convert_units_from <- rep(convert_units_from, length(all_vars))
        names(convert_units_from) <- all_vars
        convert_units_to <- rep(convert_units_to, length(all_vars))
        names(convert_units_to) <- all_vars
    }

    # Converts input to grams if the final unit contains grams
    for(i in 1:length(convert_units_from)){

        unitfrom <- convert_units_from[i]
        unitto <- convert_units_to[i]
        v <- names(unitfrom)

        g_conver <- FALSE
        if(grepl('mol|eq', unitfrom) && grepl('g', unitto) || v %in% convert_molecules){

            molecule_real <- ms_vars %>%
                filter(variable_code == !!v) %>%
                pull(molecule)

            if(! is.na(molecule_real)){
                formula <- molecule_real
            } else {
                formula <- v
            }

            d$val[vars == v] <- convert_to_gl(
                x = d$val[vars == v],
                input_unit = unitfrom,
                formula = formula,
                ms_vars = ms_vars
            )

            g_conver <- TRUE
        }

        #convert prefix
        d$val[vars == v] <- convert_unit(
            x = d$val[vars == v],
            input_unit = unitfrom,
            output_unit = unitto
        )

        #Convert to mol or eq if that is the output unit
        if(grepl('mol|eq', unitto)){
            d$val[vars == v] <- convert_from_gl(
                x = d$val[vars == v],
                input_unit = unitfrom,
                output_unit = unitto,
                molecule = v,
                g_conver = g_conver,
                ms_vars = ms_vars
            )
        }
    }

    d <- dplyr::select(d, -any_of('val_err'))

    return(d)
}
MacroSHEDS/macrosheds documentation built on Oct. 30, 2024, 11:15 a.m.