R/classification-functions.R

Defines functions combined_seasons_minta combined_probability_classes getObsIDX9r getObsIDX8rB getMINTA_ntaxa_aspt getUbias9r_new getZbias_9r getAvgEQR_SprAut getClassarray_aspt getClassarray_ntaxa getMostProbableClass getProbClassLabelFromEQR getUbias8r_new getObsIDXniner getObsIDX8r sdobs_one_year_new getZObs_r_new compute_RjAj getWeighted_proportion_Rj computeScoreProportions

# Begin Exclude Linting
# Classification functions for RIVPACS III+ for GB

# 1. Compute the proportions of table with sumofRows divide by each element in the row
computeScoreProportions <- function(ScoreDFrame) {
  # ScoreDFrame <- as.matrix(ScoreDFrame)
  for (i in 1:nrow(ScoreDFrame)) {
    ScoreDFrame[i, ] <- ScoreDFrame[i, ] / (rowSums(ScoreDFrame[, ])[i])
  }
  return(as.data.frame(ScoreDFrame))
}

# 2.  Multiply each sites probabilities with these scores, i.e. Pi*Qij, use computeScoreProportions() function
getWeighted_proportion_Rj <- function(allProbabilities, computedScoreProp) { # j = 1 to 5,
  return(as.matrix(allProbabilities) %*% as.matrix(computedScoreProp))
}

# 3. RjAj  - #Multiply Rj by Aj, note each row of Aj is for NTAXA, ASPT, so transpose to multiply by Rj
# This function is the same as function "getAdjustedExpected"

compute_RjAj <- function(Rj, Aj) {
  temp_mult <- as.data.frame(Rj %*% t(Aj))
  colnames(temp_mult) <- c("NTAXA", "ASPT")
  return(temp_mult)
}

## WITH RALPH

# Sec 6.3.2.1

# New by Ralph
# ZObsir = = ZNormir * SDObsi # Random deviate for potential average observed value of index i in simulation r

getZObs_r_new <- function(sdobs, N_sim, seed, set_seed) { # N_sim = no. of simulations
  if(!seed) {
    set.seed(set_seed[2])
  }
  dframe <- as.data.frame(sdobs * stats::rnorm(N_sim, 0, 1)) ## With a mean of 0.0 and SD of 1.0 for index i in simulation r
  return(dframe)
}

# Section 6.3.2.1b
# Calculation of standard deviation for SDObs_i, for single year runs
# 5.8 Get SDObs_i = sqtr((SDRep_i)^2 + (SDTSeas_i)^2)/NObsSeas
# Single-season run

sdobs_one_year_new <- function(SDRep_i, SDTSeas_i, NObs_seas) { # NObs_seas = 1
  return(
    sqrt((SDRep_i^2 + SDTSeas_i^2) / NObs_seas)
  )
}

# for ntaxa
getObsIDX8r <- function(ObsIDX8, znorm_ir) { # needs 10,000 simulations, and store these for each index(ntaxa, aspt) for each season (autumn, spring)
  # Do this and store these 10,000 times. Have a matrix array to store them
  # ZObs8r <- getZObs_ir(znorm_ir, SDObs_ir)
  return(
    (sqrt(ObsIDX8) + znorm_ir)^2
  )
}

# for aspt, we compute 9r
getObsIDXniner <- function(ObsIDX9, znorm_ir) {
  # needs 10,000 simulations, and store these for each index(ntaxa, aspt) for each season (autumn, spring)
  # Do this and store these 10,000 times. Have a matrix array to store them
  # ZObs8r <- getZObs_ir(znorm_ir, SDObs_ir)
  return(ObsIDX9 + znorm_ir)
}

# Next Bias correction foir Ubias8
getUbias8r_new <- function(N_sim, Ubias8, seed, set_seed) { # N_sim = no. of simulations, and  Ubias8 = 1.62
  if(!seed) {
    set.seed(set_seed[3])
  }
  dframe <- as.data.frame(stats::rpois(N_sim, Ubias8)) ## With a mean of 0.0 and SD of 1.0 for index i in simulation r
  return(dframe)
}

# Step 2. Now let us do the uncertainty in the reference Adjusted Expected values = Exp_ref
# use the function getZObs_r_new <- function (sdobs, N_sim), with SDExp_i of that index

# Function to get the classification value for the class for WHPT Abundance
# Return the class labels of probability of class i.e. use 1 = H, 2 = G, 3 = M, 4 = P, 5 = B
getProbClassLabelFromEQR <- function(area) {
  if(area == "iom") {
    probClassFrame <- data.frame(class = c("E", "G", "M", "P", "B"))
  } else {
    probClassFrame <- data.frame(class = c("H", "G", "M", "P", "B"))
    }
  return(probClassFrame)
}

# Find tne most probable class
getMostProbableClass <- function(dframe) {
  return(colnames(dframe)[apply(dframe, 1, which.max)])
}

# Find the class array of each site
getClassarray_ntaxa <- function(EQR_ntaxa) {
  EQR_ntaxa[, 1][EQR_ntaxa >= 0.8] <- 1 # class = H
  EQR_ntaxa[, 1][EQR_ntaxa >= 0.68 & EQR_ntaxa < 0.8] <- 2 # class = G
  EQR_ntaxa[, 1][EQR_ntaxa >= 0.56 & EQR_ntaxa < 0.68] <- 3 # class = M
  EQR_ntaxa[, 1][EQR_ntaxa >= 0.47 & EQR_ntaxa < 0.56] <- 4 # class = P
  EQR_ntaxa[, 1][EQR_ntaxa >= 0.0 & EQR_ntaxa < 0.47] <- 5 # class = B
  EQR_ntaxa[, 1][EQR_ntaxa < 0.0] <- 9 # Default if no condition is satisfied?
  EQR_ntaxa[, 1][is.na(EQR_ntaxa)] <- 9 # Default if observation is missing
  return(EQR_ntaxa)
}

# Find the class array
getClassarray_aspt <- function(EQR_aspt) {
  EQR_aspt[, 1][EQR_aspt >= 0.97] <- 1 # class = H
  EQR_aspt[, 1][EQR_aspt >= 0.86 & EQR_aspt < 0.97] <- 2 # class = G
  EQR_aspt[, 1][EQR_aspt >= 0.72 & EQR_aspt < 0.86] <- 3 # class = M
  EQR_aspt[, 1][EQR_aspt >= 0.59 & EQR_aspt < 0.72] <- 4 # class = P
  EQR_aspt[, 1][EQR_aspt >= 0.0 & EQR_aspt < 0.59] <- 5 # class = B
  EQR_aspt[, 1][EQR_aspt < 0.0] <- 9 # Default if no condition is satisfied?
  EQR_aspt[, 1][is.na(EQR_aspt)] <- 9 # Default if observation is missing
  return(EQR_aspt)
}
# Find the averages of both spr and autum, declare a function to compute this
getAvgEQR_SprAut <- function(EQR_spr, EQR_aut, k, row_name = FALSE) {
  eqr_av_spr <- colMeans(EQR_spr)
  eqr_av_aut <- colMeans(EQR_aut)
  eqr_av <- cbind(eqr_av_spr, eqr_av_aut)
  if (row_name == TRUE) {
    rownames(eqr_av) <- c(paste0("TST-", k))
  }
  return(eqr_av)
}

# 5.4 Calculate Zbias9r - Random number deviate from a standard Normal distribution of mean 0.0 and SD of 1.0
# zbias mean and standard deviation
getZbias_9r <- function(N_sims, zbias_mean, zbias_sd, seed, set_seed) {
  if(!seed) {
    set.seed(set_seed[1])
  }
  ZNorm_ir_whpt <- stats::rnorm(N_sims, 0, 1)
  return(ZNorm_ir_whpt)
}

# 5.5 Calculating Ubias9r -abundance weighted wHPT taxa of the ubias8r, ubias_8r is a list/dframe of n simulations, so loop around
getUbias9r_new <- function(u_9a, u_9b, u_9c, obsIDX_9, N_runs, ubias_8r, seed, set_seed) {
  ubias_8r[ubias_8r == 0] <- 1
  # if(ubias_8r>0) {
  rnorm_runs <- getZbias_9r(N_runs, seed = seed, set_seed = set_seed)
  rep_u9a <- rep(u_9a, N_runs)
  rep_u9b <- rep(u_9b, N_runs)
  rep_obsIDX_9 <- rep(obsIDX_9, N_runs)
  rep_u9c <- rep(u_9c, N_runs)
  return(rep_u9a + rep_u9b * rep_obsIDX_9 + rnorm_runs * (rep_u9c / sqrt(ubias_8r)))
}

# getMINTA_ntaxa_aspt() - Function to calculate MINTA - minimum taxa - he worst of spr aut EQR for NTAXA, ASPT
# Each dataframe has 10,000 simulated EQR - ntaxa and aspt
getMINTA_ntaxa_aspt <- function(ntaxa_EQR, aspt_EQR) {
  localDFrame <- ntaxa_EQR # as.matrix(0, nrow=nrow(ntaxa_EQR), ncol=1)
  # localDFrame <- as.matrix(0, nrow = nrow(ntaxa_EQR), byrow = TRUE)
  for (i in 1:length(ntaxa_EQR)) {
    localDFrame[i] <- max(ntaxa_EQR[i], aspt_EQR[i])
  }

  return(localDFrame)
} # getMINTA_ntaxa_aspt

# section 6.3.2.1b,
# 6.1 Get ObsIDX8r = (???(ObsIDX8) + ZObs8r)^2   = rth simulated value for observed weighted WHPT NTAXA , ObsIDX8 is an input value, getZObs_ir = ZObs8r

getObsIDX8rB <- function(ObsIDX8, znorm_ir) { # needs 10,000 simulations, and store these for each index(ntaxa, aspt) for each season (autumn, spring)
  # Do this and store these 10,000 times. Have a matrix array to store them
  # ZObs8r <- getZObs_ir(znorm_ir, SDObs_ir)
  return((sqrt(ObsIDX8) + znorm_ir)^2)
}

# 6.2. Get ObsIDX9r = ObsIDX9 + ZObs9r, the rth simulated  for observed weighted WHPT ASPT, ObsIDX9 is user-supplied value
getObsIDX9r <- function(ObsIDX9, znorm_ir) { # needs 10,000 simulations, and store these for each index(ntaxa, aspt) for each season (autumn, spring)
  # Do this and store these 10,000 times. Have a matrix array to store them
  # ZObs9r <- getZObs_ir(znorm_ir, SDObs_ir)
  return((ObsIDX9 + znorm_ir))
}

combined_probability_classes <- function(spr_eqrs = NULL,
                                         sum_eqrs = NULL,
                                         aut_eqrs = NULL,
                                         aspt = TRUE,
                                         ntaxa = FALSE,
                                         n_runs = n_runs,
                                         predictions = NULL,
                                         area = NULL,
                                         k = NULL) {
  EQR_avg <- data.frame(rowMeans(cbind(spr_eqrs, sum_eqrs, aut_eqrs),
                                 na.rm = TRUE))
  if(aspt) {
  class_array_combined <- getClassarray_aspt(EQR_avg)
  }
  if(ntaxa) {
    class_array_combined <- getClassarray_ntaxa(EQR_avg)
  }
  prob_class_comb <- matrix(0, ncol = 1, nrow = 5)
  # Process probabilities
  for (i in 1:5) {
    prob_class_comb[i] <- (100 / n_runs) *
      sum(class_array_combined[class_array_combined == i, ] / i)
  }
  prob_class_comb <- t(prob_class_comb)
  # Rename the columns to H G M P B
  colnames(prob_class_comb) <- getProbClassLabelFromEQR(area)[, 1]
  rownames(prob_class_comb) <- as.character(predictions[k, "SITE"])
  # Find most probable class, i.e the maximum, and add it to the site
  mostProb <- getMostProbableClass(prob_class_comb)
  prob_class_comb <- cbind(prob_class_comb, mostProb)
  probability_classes <- data.frame()
  probability_classes <- rbind(probability_classes, prob_class_comb)
  eqr <- data.frame("EQR" = mean(EQR_avg[,1]))
  probability_classes <- cbind(probability_classes, eqr)
  return(probability_classes)
}

combined_seasons_minta <- function(spr_aspt = NULL,
                                   sum_aspt = NULL,
                                   aut_aspt = NULL,
                                   spr_ntaxa = NULL,
                                   sum_ntaxa = NULL,
                                   aut_ntaxa = NULL,
                                   predictions = NULL,
                                   area = NULL,
                                   k = NULL,
                                   n_runs = n_runs){

    aspt <- spr_aspt[,1]
    for (i in 1:nrow(spr_aspt)) {
      aspt[i] <- max(c(spr_aspt[i,], aut_aspt[i,], sum_aspt[i,]),na.rm = TRUE)
    }

    ntaxa <- spr_ntaxa[,1]
    for (i in 1:nrow(spr_ntaxa)) {
      ntaxa[i] <- max(c(spr_aspt[i,], aut_aspt[i,], sum_aspt[i,]),na.rm = TRUE)
    }

    aspt <- data.frame(aspt)
    ntaxa <- data.frame(ntaxa)
    aspt_array <- getClassarray_aspt(aspt)
    ntaxa_array <- getClassarray_ntaxa(ntaxa)
    minta_ntaxa_aspt <- getMINTA_ntaxa_aspt(
      as.matrix(aspt_array),
      as.matrix(ntaxa_array)
    )

    minta_prob_class <- matrix(0, ncol = 1, nrow = 5)
    for (i in 1:5) {
      minta_prob_class[i] <- 100 * sum(minta_ntaxa_aspt[minta_ntaxa_aspt == i, ] / i) / n_runs
    }
    aa <- t(minta_prob_class) # aut
    colnames(aa) <- getProbClassLabelFromEQR(area)[, 1]
    rownames(aa) <- as.character(predictions[k, "SITE"])
    # Find most probable MINTA class, i.e the maximum, and add it to the site
    mostProb <- getMostProbableClass(aa)
    aa <- cbind(aa, mostProb)
    return(list(aa,minta_ntaxa_aspt))
  }



# End Exclude Linting
aquaMetrics/rict documentation built on March 1, 2025, 1:31 p.m.