R/BCG.Level.Membership.R

Defines functions BCG.Level.Membership

Documented in BCG.Level.Membership

#' @title BCG Level Membership
#' 
#' @description Biological Condition Gradient Level assignment given metric 
#' memberships.
#' 
#' @details Input is metric memberships and a rules tables.  
#' 
#' Output is a data frame with the membership for each row to each Level (1:6).
#' 
#' Minimum of:
#' 
#' * 1- sum of previous levels
#' 
#' * Rule0 memberships
#' 
#' * max of Rule1 (Alternate1) rules (and min of Rule2 (Alternate2) rules)
#' 
#' That is, perform calculations in this order:
#' 
#' 1. Min of Rule2 (Alternate2) metric memberships
#' 
#' 2. Max of Rule1 (Alternate1) rules and the above result.
#' 
#' 3. Min of: Rule0, the above results, and 1 - the sum of previous levels.
#' 
#' Some exceptions exist for particular models.
#' 
#' |Index_Name       |INDEX_CLASS|
#' |:----------------|:----------|
#' |CT_BCG_2015      |fish02     |
#' |CT_BCG_2015      |fish03     |
#' |BCG_NMSandyRivers|bugs       |
#'  
#' These exceptions are mostly hard coded into the function but gather some 
#' information with the parameter col_EXC_RULE from the rules table.  A future
#' update may fully automate this process.
#' 
#' 2021 saw the introduction of Median Exception rule.  
#' For the Pacific Northwest some metrics were grouped and the 2nd of 3 values
#' is used and the other 2 values tossed when determining level membership.  
#' This equates to using the median of the 3 values.  This is handled by 
#' including "MEDIAN" in the Exc_Rule column in Rules.xlsx.
#' 
#' 2024 added SMALL2 and SMALL3 Exception rules.
#' For New Mexico BCG some metrics are grouped so use the 2nd or 3rd smallest
#' value instead of the minimum.  As above, this is handled by including 
#' "SMALL2" or "SMALL3" in the Exc_Rule column in Rules.xlsx.
#' 
#' Deprecated col_SITE_TYPE for col_INDEX_CLASS in v2.0.0.9001.
#' @md
#' 
#' @param df.metric.membership Data frame of metric memberships 
#' (long format, the same as the output of BCG.Metric.Membership).
#' @param df.rules Data frame of BCG model rules.
#' @param col_SAMPLEID column name for sample id. Default = SAMPLEID
#' @param col_INDEX_NAME column name for index name. Default = INDEX_NAME
#' @param col_INDEX_CLASS column name for site type.Default = INDEX_CLASS
#' @param col_LEVEL column name for level.  Default = LEVEL
#' @param col_METRIC_NAME column name for metric name. Default = METRIC_NAME
#' @param col_RULE_TYPE column name for rule type (e.g., Rule0, Rule1, or 
#' Rule2). Default = RULE_TYPE 
#' @param col_MEMBERSHIP column name for metric membership. Default = MEMBERSHIP
#' @param col_EXC_RULE column name for exception rules. Default = EXC_RULE 
#' @param ... Arguments passed to `BCG.MetricMembership` used internally
#'
#' @return Returns a data frame of results in the wide format.
#' 
#' @examples
#' # library(readxl)
#' # library(BioMonTools)
#' 
#' # Calculate Metrics
#' df_samps_bugs <- readxl::read_excel(
#'                            system.file("extdata/Data_BCG_PugLowWilVal.xlsx"
#'                                              , package="BCGcalc")
#'                            , guess_max = 10^6)
#' myDF <- df_samps_bugs
#' myCols <- c("Area_mi2", "SurfaceArea", "Density_m2", "Density_ft2")
#' # populate missing columns prior to metric calculation
#' col_missing <- c("INFRAORDER", "HABITAT", "ELEVATION_ATTR", "GRADIENT_ATTR"
#'                  , "WSAREA_ATTR", "HABSTRUCT", "UFC")
#' myDF[, col_missing] <- NA
#' df_met_val_bugs <- BioMonTools::metric.values(myDF
#'                                               , "bugs"
#'                                               , fun.cols2keep = myCols)
#' 
#' 
#' # Import Rules
#' df_rules <- readxl::read_excel(system.file("extdata/Rules.xlsx"
#'                                            , package = "BCGcalc")
#'                       , sheet="Rules") 
#' 
#' # Calculate Metric Memberships
#' df_met_memb <- BCG.Metric.Membership(df_met_val_bugs, df_rules)
#' 
#' # Calculate Level Memberships
#' df_lev_memb <- BCG.Level.Membership(df_met_memb, df_rules)
#' 
#' # Show results
#' #View(df_lev_memb)
#' 
#' # Save Results
#' write.table(df_lev_memb
#'              , file.path(tempdir(), "Level_Membership.tsv")
#'              , row.names = FALSE
#'              , col.names = TRUE
#'              , sep = "\t")
#~~~~~~~~~~~~~~~~~~~~~~~~~~
# QC
# library(BCGcalc)
# library(readxl)
# #  Calculate Metrics
# df.samps.bugs <- read_excel(system.file("extdata/Data_BCG_PacNW.xlsx"
#                                         , package="BCGcalc")
#                             , guess_max = 10^6)
# myDF <- df.samps.bugs
# df.metric.values.bugs <- BioMonTools::metric.values(myDF, "bugs")
# 
# # Import Rules
# df.rules <- read_excel(system.file("extdata/Rules.xlsx"
#                              , package="BCGcalc"), sheet="BCG_PacNW_v1_500ct")
# # Calculate Membership
# df.metric.membership <- BCG.Metric.Membership(df.metric.values.bugs, df.rules)
# #
# input.shape <- "long"
# df.Level.Membership <- BCG.Level.Membership(df.metric.membership, df.rules)
# # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# df.metric.membership <- as.data.frame(df.metric.membership)
# df.rules <- as.data.frame(df.rules)
# 
# # Convert names to upper case
# names(df.metric.membership) <- toupper(names(df.metric.membership))
# names(df.rules) <- toupper(names(df.rules))
# 
# # INDEX_CLASS to lowercase
# df.metric.membership[,"INDEX_CLASS"] <- tolower(df.metric.membership[
#,"INDEX_CLASS"])
# df.rules[,"INDEX_CLASS"] <- tolower(df.rules[,"INDEX_CLASS"])
# 
# # Drop extra columns from df.metric.membership
# # (otherwise duplicates in merge)
# col.drop <- c("NUMERIC_RULES", "SYMBOL", "LOWER", "UPPER", "INCREASE"
#, "DESCRIPTION")
# col.keep <- names(df.metric.membership)[!(names(df.metric.membership) %in% 
#col.drop)]
# 
# # merge metrics and rules
# df.merge <- merge(df.metric.membership[,col.keep], df.rules
#       , by.x=c("INDEX_NAME", "INDEX_CLASS", "LEVEL", "METRIC_NAME", "RULE_TYPE")
#     , by.y=c("INDEX_NAME", "INDEX_CLASS", "LEVEL", "METRIC_NAME", "RULE_TYPE"))
# 
# 
# df.merge <- df.merge[df.merge$SAMPLEID=="06029CSR_Bug_2006-09-27_0", ]
# 
# 
# df.lev <- dplyr::summarise(dplyr::group_by(df.merge, SAMPLEID, INDEX_NAME
#                                            , INDEX_CLASS, LEVEL)
#                            #
#                            # Min of Alt2
#         , MembCalc_Alt2_min=min(MEMBERSHIP[RULE_TYPE == "Alt2"], na.rm=TRUE)
#                          # Max of Alt1
#          , MembCalc_Alt1_max=max(MEMBERSHIP[RULE_TYPE == "Alt1"], na.rm=TRUE)
#                          # Min of Rule0 (with alt above)
#        , MembCalc_Rule0_min=min(MEMBERSHIP[RULE_TYPE == "Rule0"], na.rm=TRUE)
# )
# View(df.merge)
# View(df.lev)
# 
# # convert from tibble to df
# df.lev <- as.data.frame(df.lev)
# # replace Inf and -Inf with NA
# df.lev[!is.finite(df.lev[,"MembCalc_Alt2_min"]), "MembCalc_Alt2_min"] <- NA
# df.lev[!is.finite(df.lev[,"MembCalc_Alt1_max"]), "MembCalc_Alt1_max"] <- NA
# # this one shouldn't happen.  Use zero just in case.
# df.lev[!is.finite(df.lev[,"MembCalc_Rule0_min"]), "MembCalc_Rule0_min"] <- 0
# 
# View(df.lev)
# 
# df.lev[,"MembCalc_Alt12_max"] <- apply(df.lev[,c("MembCalc_Alt2_min"
# , "MembCalc_Alt1_max")]
#                                        , 1, max, na.rm=TRUE)
# View(df.lev)
# 
# df.lev[!is.finite(df.lev[,"MembCalc_Alt12_max"]), "MembCalc_Alt12_max"] <- NA
# View(df.lev)
# 
# df.lev[,"Level.Membership"] <- apply(df.lev[,c("MembCalc_Alt12_max"
# , "MembCalc_Rule0_min")]
#                                      , 1, min, na.rm=TRUE)
# View(df.lev)
#~~~~~~~~~~~~~~~~~~~~~~~~~~
#' @export
BCG.Level.Membership <- function(df.metric.membership
                                 , df.rules
                                 , col_SAMPLEID = "SAMPLEID"
                                 , col_INDEX_NAME = "INDEX_NAME"
                                 , col_INDEX_CLASS = "INDEX_CLASS"
                                 , col_LEVEL = "LEVEL"
                                 , col_METRIC_NAME = "METRIC_NAME"
                                 , col_RULE_TYPE = "RULE_TYPE"
                                 , col_EXC_RULE = "EXC_RULE"
                                 , col_MEMBERSHIP = "MEMBERSHIP"
                                 , ...) {
  #
  boo_QC <- FALSE
  if (isTRUE(boo_QC)) {
    df.metric.membership <- df_met_memb
    df.rules <- df_rules
    col_SAMPLEID <- "SAMPLEID"
    col_INDEX_NAME <- "INDEX_NAME"
    col_INDEX_CLASS <- "INDEX_CLASS"
    col_LEVEL <- "LEVEL"
    col_METRIC_NAME <- "METRIC_NAME"
    col_RULE_TYPE <- "RULE_TYPE"
    col_EXC_RULE <- "EXC_RULE"
    col_MEMBERSHIP <- "MEMBERSHIP"
    a <- c(col_INDEX_NAME
           , col_INDEX_CLASS
           , col_LEVEL
           , col_METRIC_NAME
           , col_RULE_TYPE
           , col_EXC_RULE)
  }## IF ~ boo_QC ~ END
  
  # QC----
  # DEPRECATE SITE_TYPE.
  if (exists("col_SITE_TYPE")) {
    col_INDEX_CLASS <- col_SITE_TYPE
    msg <- "The parameter 'col_SITE_TYPE' was deprecated in v2.0.0.9001. \n
    Use 'col_INDEX_CLASS' instead."
    message(msg)
  } ## IF ~ col_SITE_TYPE
  
  # # convert membership to long format if provided
  # # Metrics to long
  # if (input.shape=="wide") {##IF.input.shape.START
  #   df.long <- reshape2::melt(df.metric.membership, id.vars=c("SAMPLEID"
  # , "INDEX_NAME", "INDEX_CLASS")
  #                             , variable.name="METRIC_NAME"
  # , value.name="METRIC_VALUE")
  # } else {
  #   df.long <- df.metric.membership
  # }##IF.input.shape.END
  
  # Convert to data.frame (from Tibble)
  df.metric.membership <- as.data.frame(df.metric.membership)
  df.rules <- as.data.frame(df.rules)
  
  # Convert names to upper case
  names(df.metric.membership) <- toupper(names(df.metric.membership))
  names(df.rules) <- toupper(names(df.rules))
  
  # QC, Columns
  # col_SAMPLEID
  # col_INDEX_NAME
  # col_INDEX_CLASS
  # col_LEVEL
  # col_METRIC_NAME
  # col_RULE_TYPE
  
  # INDEX_CLASS to lowercase
  df.metric.membership[, col_INDEX_CLASS] <- tolower(df.metric.membership[
                                                               , col_INDEX_CLASS])
  df.rules[, col_INDEX_CLASS] <- tolower(df.rules[, col_INDEX_CLASS])
  
  # EXC_RULE to uppercase
  df.metric.membership[, col_EXC_RULE] <- toupper(df.metric.membership[
                                                                , col_EXC_RULE])
  df.rules[, col_EXC_RULE] <- toupper(df.rules[, col_EXC_RULE])
  
  # RULE_TYPE to uppercase
  df.metric.membership[, col_RULE_TYPE] <- toupper(df.metric.membership[
    , col_RULE_TYPE])
  df.rules[, col_RULE_TYPE] <- toupper(df.rules[, col_RULE_TYPE])
  
  # Drop extra columns from df.metric.membership
  # (otherwise duplicates in merge)
  col.drop <- c("NUMERIC_RULES"
                , "SYMBOL"
                , "LOWER"
                , "UPPER"
                , "INCREASE"
                , "DESCRIPTION")
  col.keep <- names(df.metric.membership)[!(names(df.metric.membership) %in% 
                                                                      col.drop)]
  # MERGE----
  # merge metrics and rules
  df.merge <- merge(df.metric.membership[, col.keep]
                    , df.rules
                    , by.x = c(col_INDEX_NAME
                               , col_INDEX_CLASS
                               , col_LEVEL
                               , col_METRIC_NAME
                               , col_RULE_TYPE
                               , col_EXC_RULE)
                    , by.y = c(col_INDEX_NAME
                               , col_INDEX_CLASS
                               , col_LEVEL
                               , col_METRIC_NAME
                               , col_RULE_TYPE
                               , col_EXC_RULE))
  # Match on all to ensure using the same rules
  
  nrow_metmemb <- nrow(df.metric.membership)
  nrow_merge <- nrow(df.merge)
  if (nrow_metmemb != nrow_merge) {
    msg <- paste("Rules and Metric Membership not matching."
                 , paste0(nrow_metmemb, " = rows Metric Membership")
                 , paste0(nrow_merge, " = rows after merge with Rules")
                 , sep = "\n")
    message(msg)
  }## IF ~ nrow QC
  
  # Min of Rule2 (Alt2)
  # Max of Rule1 (Alt1) (with Min of Rule2 (Alt2))
  # Min of Rule0 (with alt above)
  
  # QC
  if (nrow(df.merge) == 0) {
    msg <- "Merging of Metric Membership and Rules data frames failed.
    Check columns col_INDEX_NAME, col_INDEX_CLASS, col_LEVEL, col_METRIC_NAME, col_RULE_TYPE, and col_EXC_RULE."
    stop(msg)
  }## IF ~ nrow(df.merge) ~ END
  
  # dplyr fix 1 ----
  # Ensure have correct names for summarise(group_by))
  names(df.merge)[names(df.merge) == col_SAMPLEID] <- "SAMPLEID"
  names(df.merge)[names(df.merge) == col_INDEX_NAME] <- "INDEX_NAME"
  names(df.merge)[names(df.merge) == col_INDEX_CLASS] <- "INDEX_CLASS"
  names(df.merge)[names(df.merge) == col_LEVEL] <- "LEVEL"
  names(df.merge)[names(df.merge) == col_RULE_TYPE] <- "RULE_TYPE"
  names(df.merge)[names(df.merge) == col_MEMBERSHIP] <- "MEMBERSHIP"
  ## Exceptions
  names(df.merge)[names(df.merge) == col_EXC_RULE] <- "EXC_RULE"
  
  
  # EXCEPTIONS ----
  # no harm done in allowing to run if not present
  
  ## EXC_RULE, MEDIAN ----
  df_er_median <- dplyr::filter(df.merge, EXC_RULE == "MEDIAN")
  df_er_median_calc <- dplyr::summarise(dplyr::group_by(df_er_median
                                                     , SAMPLEID
                                                     , INDEX_NAME
                                                     , INDEX_CLASS
                                                     , LEVEL
                                                     , RULE_TYPE
                                                      )
                                      , .groups = "drop_last"
                #
                # Calc MEDIAN
                , MEMBERSHIP = median(MEMBERSHIP, na.rm = TRUE)
                # , MEMBERSHIP_MEDIAN = median(MEMBERSHIP, na.rm = TRUE)
                # , MEMBERSHIP_COUNT = dplyr::n()
  )##summarise ~ MEDIAN
  # This assumes have 3 values
  # Rename MEMBERSHIP_MEDIAN and drop Count
  #
  # Update df.merge
  # Remove EXC_RULE == "MEDIAN"
  df.merge <- dplyr::filter(df.merge, EXC_RULE != "MEDIAN" | is.na(EXC_RULE))
  # Add new memberships back to df.merge
  df.merge <- dplyr::bind_rows(df.merge, df_er_median_calc)
  # 2024-01-03
  # Median same as Small2 when have 3 values
  # Median came first
  
  ## EXC_RULE, SMALL2----
  df_er_small2 <- dplyr::filter(df.merge, EXC_RULE == "SMALL2")
  # default sort in arrange is ascending (NA are at end)
  # group
  # filter for 2nd row 
  df_er_small2_calc <- dplyr::group_by(df_er_small2
                                       , SAMPLEID
                                       , INDEX_NAME
                                       , INDEX_CLASS
                                       , LEVEL) %>%
    dplyr::arrange(MEMBERSHIP) %>%
    dplyr::filter(dplyr::row_number() == 2)
  #
  # Update df.merge
  # Remove EXC_RULE == "SMALL2"
  df.merge <- dplyr::filter(df.merge, EXC_RULE != "SMALL2" | is.na(EXC_RULE))
  # Add new memberships back to df.merge
  df.merge <- dplyr::bind_rows(df.merge, df_er_small2_calc)
  
  ## EXC_RULE, SMALL3----
  # Need to handle Rule01 (max) [part of Small3 not a separate rule]
  df_er_small3_rule0 <- dplyr::filter(df.merge
                                      , EXC_RULE == "SMALL3" 
                                      & RULE_TYPE == "RULE0")
  df_er_small3_rule1 <- dplyr::filter(df.merge
                                      , EXC_RULE == "SMALL3" 
                                      & RULE_TYPE == "RULE1")
  # Rule1 before Rule0
  # Rule1, max then add to rule0
  ## not using summarize and max to maintain column order
  ## mutate to Rule0 to ensure not issues later if no other Rule0
  df_er_small3_rule1_calc <- dplyr::group_by(df_er_small3_rule1
                                             , SAMPLEID
                                             , INDEX_NAME
                                             , INDEX_CLASS
                                             , LEVEL) %>%
    dplyr::arrange(-MEMBERSHIP) %>% # desc
    dplyr::filter(dplyr::row_number() == 1) %>% 
    dplyr::mutate(RULE_TYPE = "RULE0")
  # Add Rule1 max to Rule0
  df_er_small3_rule0_calc <- dplyr::bind_rows(df_er_small3_rule0
                                              , df_er_small3_rule1_calc)
  # Small3 calc
  # default sort in arrange is ascending (NA are at end)
  # group
  # filter for 3rd row 
  df_er_small3_calc <- dplyr::group_by(df_er_small3_rule0_calc
                                             , SAMPLEID
                                             , INDEX_NAME
                                             , INDEX_CLASS
                                             , LEVEL) %>%
    dplyr::arrange(MEMBERSHIP) %>%
    dplyr::filter(dplyr::row_number() == 3)
  #
  # Update df.merge
  # Remove EXC_RULE == "SMALL3"
  df.merge <- dplyr::filter(df.merge, EXC_RULE != "SMALL3" | is.na(EXC_RULE))
  # Add new memberships back to df.merge
  df.merge <- dplyr::bind_rows(df.merge, df_er_small3_calc)
  
  
  # Summarize ----
  # Will get lots of warnings, SampIDs without alt 1 or alt 2 rules
  suppressWarnings(
    df.lev <- dplyr::summarise(dplyr::group_by(df.merge
                                               , SAMPLEID
                                               , INDEX_NAME
                                               , INDEX_CLASS
                                               , LEVEL
                                               )
                               , .groups = "drop_last"
                              #
                # Min of Rule2 (Alt2)
                , MembCalc_Rule2_min = min(MEMBERSHIP[RULE_TYPE == "RULE2"]
                                        , na.rm = TRUE)
                # Max of Rule1 (Alt1)
                , MembCalc_Rule1_max = max(MEMBERSHIP[RULE_TYPE == "RULE1"]
                                          , na.rm = TRUE)
                # Min of Rule0 (with alt above)
                , MembCalc_Rule0_min = min(MEMBERSHIP[RULE_TYPE == "RULE0"]
                                           , na.rm = TRUE)
                # Exceptions for CT
                , MembCalc_Exc0_min = min(MEMBERSHIP[EXC_RULE == "EXCMEM0"]
                                          , na.rm = TRUE)
                , MembCalc_Exc1_max = max(MEMBERSHIP[EXC_RULE == "EXCMEM1"]
                                          , na.rm = TRUE)
                , MembCalc_Exc2_min = min(MEMBERSHIP[EXC_RULE == "EXCMEM2"]
                                          , na.rm = TRUE)
                

    )## summarise ~ END
  )## suppressWarnings ~ END

  # dplyr fix 2 ----
  # Change names back to variable inputs
  names(df.lev)[names(df.lev) == "SAMPLEID"] <- toupper(col_SAMPLEID)
  names(df.lev)[names(df.lev) == "INDEX_NAME"] <- toupper(col_INDEX_NAME)
  names(df.lev)[names(df.lev) == "INDEX_CLASS"] <- toupper(col_INDEX_CLASS)
  names(df.lev)[names(df.lev) == "LEVEL"] <- toupper(col_LEVEL)
  names(df.lev)[names(df.lev) == "RULE_TYPE"] <- toupper(col_RULE_TYPE)
  ## Exceptions
  names(df.lev)[names(df.lev) == "EXC_RULE"] <- toupper(col_EXC_RULE)
  
  # convert from tibble to df
  df.lev <- as.data.frame(df.lev)
  # replace Inf and -Inf with NA
  df.lev[!is.finite(df.lev[, "MembCalc_Rule2_min"]), "MembCalc_Rule2_min"] <- NA
  df.lev[!is.finite(df.lev[, "MembCalc_Rule1_max"]), "MembCalc_Rule1_max"] <- NA
  # this one shouldn't happen.  Use zero just in case.
  df.lev[!is.finite(df.lev[, "MembCalc_Rule0_min"]), "MembCalc_Rule0_min"] <- 0
  ## Exceptions
  df.lev[!is.finite(df.lev[, "MembCalc_Exc0_min"]), "MembCalc_Exc0_min"] <- NA
  df.lev[!is.finite(df.lev[, "MembCalc_Exc1_max"]), "MembCalc_Exc1_max"] <- NA
  df.lev[!is.finite(df.lev[, "MembCalc_Exc2_min"]), "MembCalc_Exc2_min"] <- NA
  
  # dplyr fix
  # Have to do outside of dplyr to get rid of Inf and -Inf
  
  # Need to suppress warnings again
  suppressWarnings(
    df.lev[,"MembCalc_Rule12_max"] <- apply(df.lev[, c("MembCalc_Rule2_min"
                                                      , "MembCalc_Rule1_max")]
                                          , 1
                                          , max
                                          , na.rm = TRUE)
  )
  # replace Inf with NA
  df.lev[!is.finite(df.lev[,"MembCalc_Rule12_max"])
         , "MembCalc_Rule12_max"] <- NA
  
  # Final Calc
  # df.lev[,"Level.Membership"] <- min(df.lev[,"MembCalc_Rule12_max"]
  #                                      , df.lev[,"MembCalc_Rule0_min"]
  #                                      , na.rm=TRUE)
  
  # Assign Level ----
  df.lev[,"Level.Membership"] <- apply(df.lev[,c("MembCalc_Rule12_max"
                                                 , "MembCalc_Rule0_min")]
                                       , 1, min, na.rm = TRUE)
  
  
  # ## Exceptions, Level Mem ----
  boo_exceptions <- FALSE
  #
  if (isTRUE(boo_exceptions)) {
    ## CT_F1_L4
    boo_CT_F1_L4 <- df.lev[, col_INDEX_NAME] == "BCG_CT_2015" &
      df.lev[, col_INDEX_CLASS] == "fish01" &
      df.lev[, col_LEVEL] == 4
    ### Exc1, Recalc
    df.lev[boo_CT_F1_L4, "MembCalc_Exc1_max"] <- max(c(
      df.lev[boo_CT_F1_L4, "MembCalc_Exc1_max"]
      , df.lev[boo_CT_F1_L4, "MembCalc_Exc0_min"])
      , na.rm = TRUE)
    ### replace Inf with NA
    df.lev[!is.finite(df.lev[, "MembCalc_Exc1_max"]), "MembCalc_Exc1_max"] <- NA
    ### Final Lev, Recalc
    df.lev[boo_CT_F1_L4, "Level.Membership"] <- min(c(
      df.lev[boo_CT_F1_L4, "MembCalc_Exc1_max"]
      , df.lev[boo_CT_F1_L4, "MembCalc_Exc2_min"])
      , na.rm = TRUE)
    #
    ## CT_F23_L2 (Combine fish02 and fish03)
    boo_CT_F23_L4 <- df.lev[, col_INDEX_NAME] == "BCG_CT_2015" &
      (df.lev[, col_INDEX_CLASS] == "fish02" | 
         df.lev[, col_INDEX_CLASS] == "fish03") &
      df.lev[, col_LEVEL] == 2
    # Final Lev, Recalc
    df.lev[boo_CT_F23_L4, "Level.Membership"] <- min(c(
      df.lev[boo_CT_F23_L4, "MembCalc_Exc0_min"]
      , df.lev[boo_CT_F23_L4, "MembCalc_Exc1_max"])
      , na.rm = TRUE)
    #
  }## IF ~ boo_exceptions ~ END
  
  # replace Inf with NA (Redo, just in case for base and exceptions)
  df.lev[!is.finite(df.lev[, "Level.Membership"]), "Level.Membership"] <- NA

  # add extra to "Level"
  df.lev[, col_LEVEL] <- paste0("L", df.lev[, col_LEVEL])
  
  # column fix
  ## Ensure have expected values
  names(df.lev)[names(df.lev) == col_SAMPLEID] <- "SAMPLEID"
  names(df.lev)[names(df.lev) == col_INDEX_NAME] <- "INDEX_NAME"
  names(df.lev)[names(df.lev) == col_INDEX_CLASS] <- "INDEX_CLASS"
  names(df.lev)[names(df.lev) == col_LEVEL] <- "LEVEL"
  
  # Convert to wide format
  df.lev.wide <- reshape2::dcast(df.lev
                                 , SAMPLEID + INDEX_NAME + INDEX_CLASS
                                 ~ LEVEL
                                 , value.var = "Level.Membership"
                                 )
  
  # Column fix
  ## Return to input parameters
  names(df.lev.wide)[names(df.lev.wide) == "SAMPLEID"] <- toupper(col_SAMPLEID)
  names(df.lev.wide)[names(df.lev.wide) == "INDEX_NAME"] <- toupper(
                                                                col_INDEX_NAME)
  names(df.lev.wide)[names(df.lev.wide) == "INDEX_CLASS"] <- toupper(
                                                                  col_INDEX_CLASS)
  names(df.lev.wide)[names(df.lev.wide) == "LEVEL"] <- toupper(col_LEVEL)
  
  
  # __EXCEPTIONS ----
  # Not sure if need here or elsewhere
  
  
  # Add missing Levels and sort L1:L6
  col.Levels <- c(paste0("L",1:6))
  col.Other <- names(df.lev.wide)[!(names(df.lev.wide) %in% col.Levels)]
  col.Levels.Present <- names(df.lev.wide)[(names(df.lev.wide) %in% col.Levels)]
  col.Levels.Absent  <- col.Levels[!col.Levels %in% names(df.lev.wide)]
  # Add missing Level columns
  df.lev.wide[, col.Levels.Absent] <- 0
   # Sort columns
  df.subtotal <- df.lev.wide[,c(col.Other, col.Levels)]
  # rename L1:6 with .sub
  col.rename <- names(df.subtotal) %in% col.Levels
  col.sub <- paste0(names(df.subtotal)[col.rename], ".Sub")
  names(df.subtotal)[col.rename] <- col.sub
  
  # Calculate Final scoring ----
  ## Need to consider other final scores (use apply)
  ### Level Membership affected by previous level assignments.
  # 20180613, added "round" 8 for floating point error 
  # (e.g., a value of 1.1E-16).
  # 20230602, add check no L5 and trigger to calc L5 instead of L6
  
  # Num Rules by model
  df.rules.numruleslev <- dplyr::summarize(dplyr::group_by(df.rules
                                                           , INDEX_NAME
                                                           , INDEX_CLASS)
                                           , .groups = "drop_last"
                                           , rules_lev_min = min(LEVEL
                                                                 , na.rm = TRUE)
                                           , rules_lev_max = max(LEVEL
                                                                 , na.rm = TRUE)
                                    )## summarize ~ END
  # Merge num rules
  df.subtotal <- merge(df.subtotal
                       , df.rules.numruleslev
                       , by = c("INDEX_NAME", "INDEX_CLASS")
                       , all.x = TRUE
                       )
  # name columns so can remove later
  col.ruleslev <- c("rules_lev_min", "rules_lev_max")
  
  rnd_dig <- 8
  
  df.subtotal[, "L1"] <- df.subtotal[, "L1.Sub"]
  
  df.subtotal[, "L2"] <- apply(df.subtotal[, c("L1", "L2.Sub")]
                               , 1
                               , function(x) min(round(1 - x[1], rnd_dig)
                                                 , x[2]
                                                 , na.rm = TRUE))
  
  df.subtotal[, "L3"] <- apply(df.subtotal[, c("L1", "L2", "L3.Sub")]
                               , 1
                               , function(x) min(round(1 - sum(x[1]
                                                               , x[2]
                                                               , na.rm = TRUE)
                                                       , rnd_dig)
                                                 , x[3]
                                                 , na.rm = TRUE))
  
  df.subtotal[, "L4"] <- apply(df.subtotal[, c("L1", "L2", "L3", "L4.Sub")]
                               , 1
                              , function(x) min(round(1 - sum(x[1]
                                                            , x[2]
                                                            , x[3]
                                                            , na.rm = TRUE)
                                                      , rnd_dig)
                                                , x[4], na.rm = TRUE))
  
  df.subtotal[, "L5"] <- apply(df.subtotal[, c("L1"
                                              , "L2"
                                              , "L3"
                                              , "L4"
                                              , "L5.Sub")]
                               , 1
                               , function(x) min(round(1 - sum(x[1]
                                                               , x[2]
                                                               , x[3]
                                                               , x[4]
                                                               , na.rm = TRUE)
                                                       , rnd_dig)
                                                 , x[5], na.rm = TRUE))
  
 
  # Exception for only 4 rules
  # df.subtotal[, "L5"] <- ifelse(df.subtotal[, "rules_lev_max"] == 4
  #                               , 1 - sum(df.subtotal[, c("L1", "L2", "L3", "L4")])
  #                               , df.subtotal[, "L5"])
  boo_L5fix <- df.subtotal[, "rules_lev_max"] == 4
  if (sum(boo_L5fix) > 0) {
    df.subtotal[boo_L5fix, "L5"] <- apply(df.subtotal[boo_L5fix
                                           , c("L1", "L2", "L3", "L4")]
                               , 1
                               , function(x) round(1 - sum(x[1]
                                                           , x[2]
                                                           , x[3]
                                                           , x[4]
                                                           , x[5]
                                                           , na.rm = TRUE)
                                                   , rnd_dig))
  }## IF ~ L5fix
  
  
  
  df.subtotal[, "L6"] <- apply(df.subtotal[,c("L1"
                                              , "L2"
                                              , "L3"
                                              , "L4"
                                              , "L5")]
                               , 1
                               , function(x) round(1 - sum(x[1]
                                                           , x[2]
                                                           , x[3]
                                                           , x[4]
                                                           , x[5]
                                                           , na.rm = TRUE)
                                                   , rnd_dig))

  
  # Return RESULTS ####
  # Remove sub fields
  df.results <- df.subtotal[, !(names(df.subtotal) %in% c(col.sub, col.ruleslev))]
  # Results are for each SAMPLEID, INDEX_NAME, INDEX_CLASS, and 
  #                                                  LEVEL Assignment/Membership
  # create output
  return(df.results)
  #
}##FUNCTION.END
leppott/BCGcalc documentation built on April 12, 2024, 8:35 a.m.