R/Algorithms_assessment.R

Defines functions Scoring_system_bootstrap Scoring_system Getting_Asses_results Score_algorithms_sort2 Score_algorithms_sort Score_algorithms_interval Assessment_via_cluster2 def_validation cal_precision Assessment_via_cluster

Documented in Assessment_via_cluster Getting_Asses_results Score_algorithms_interval Score_algorithms_sort Score_algorithms_sort2 Scoring_system Scoring_system_bootstrap

#' @name Assessment_via_cluster
#' @title Calculate error metrics for all algorithm per cluster
#' @param pred prediction of Chla
#' @param meas in-situ measurement of Chla
#' @param memb membership value matrix
#' @param metrics metrics need to be calculated
#' @param log10 Should pred and meas be log10-transformed? (default as \code{FALSE})
#' @param total Whether to calculate summarized metrics (default as \code{TRUE})
#' @param hard.mode If \code{FALSE}, the membership values are used to calculate validation metrics
#' @param cal.precision Whether to calculate precision (only support for vectorized metrics), default as \code{FALSE}
#' @param valid.definition The definition of valid prediction, default as \code{list(negative=FALSE, percent = 0.6)}.
#'   The invalid prediction will not be removed to calculate error metrics.
#' 
#' \code{negative=FALSE} means the negative values are considered as invalid predictions while \code{percent} means
#'   the tolerance of error-percentage that only the predictions between \code{(y-y*percent), y+y*percent)} are valid.
#' 
#' @param na.process na.process and choose to statistic NA value percent
#' @param plot.col option to plot col result for selected metrics (default as \code{FALSE})
#' 
#' @note If the \code{cal.precision} is \code{TRUE}, the \code{hard.mode == TRUE} is used. In that case,
#'   mean and sd calculation is conducted for hard mode based on result from \link{cal.metrics.vector}.
#'   
#' @export
#' @return Results of \code{Assessment_via_cluster} are returned as a list including:
#' \item{Metrcs}{A list of the selected metric values for all algorithms. \code{Valid_percent} 
#'   would be included if \code{na.process} are set as \code{TRUE}}
#' \item{res_plot}{Bar plots by using ggplot function for metrics value at every cluster.}
#' \item{res_plot_dt}{Dataframe for plotting \code{res_plot}. I just keep it in case of plotting other types}
#' \item{res_plot_facet}{\code{res_plot} added on \code{facet_wrap}.}
#' \item{input}{input parameters of \link{Assessment_via_cluster}}
#' 
#' @family Algorithm assessment
#' 
#' @import ggplot2  
#' @importFrom stats runif quantile
#' @importFrom reshape2 melt
#' 
#' @examples 
#' library(FCMm) 
#' library(ggplot2) 
#' library(magrittr)
#' library(stringr)
#' data("Nechad2015")
#' x <- Nechad2015[,3:11]
#' wv <- gsub("X","",names(x)) %>% as.numeric
#' set.seed(1234)
#' w <- sample(1:nrow(x), 100) 
#' x <- x[w, ]
#' names(x) <- wv
#' nb = 4 # Obtained from the vignette "Cluster a new dataset by FCMm"
#' set.seed(1234)
#' FD <- FuzzifierDetermination(x, wv, stand=FALSE)
#' result <- FCM.new(FD, nb, fast.mode = TRUE)
#' p.spec <- plot_spec(result, show.stand=TRUE)
#' print(p.spec$p.cluster.spec)
#' Chla <- Nechad2015$X.Chl_a..ug.L.[w]
#' Chla[Chla >= 999] <- NA
#' dt_Chla <- run_all_Chla_algorithms(x) %>% as.data.frame
#' dt_Chla <- data.frame(Chla_true = Chla, 
#' BR_Gil10 = dt_Chla$BR_Gil10, 
#' OC4_OLCI = dt_Chla$OC4_OLCI, 
#' OCI_Hu12 = dt_Chla$OCI_Hu12, 
#' NDCI_Mi12= dt_Chla$NDCI_Mi12) %>% round(3)
#' w = which(!is.na(dt_Chla$Chla_true))
#' dt_Chla = dt_Chla[w,]
#' memb = result$res.FCM$u[w,] %>% round(4)
#' Asses_soft <- Assessment_via_cluster(pred = dt_Chla[,-1], 
#' meas = dt_Chla[,1], memb = memb, log10 = TRUE, hard.mode = FALSE, 
#' na.process = TRUE, plot.col = TRUE)
#' Asses_soft$res_plot_facet
#' knitr::kable(Asses_soft$MAE %>% round(3))
#' knitr::kable(Asses_soft$MAPE %>% round(2))
#' 
#' 
Assessment_via_cluster <- function(pred, meas, memb,
                                   metrics = c('MAE','MAPE'),
                                   log10 = FALSE, 
                                   total = TRUE,
                                   hard.mode= TRUE,
                                   cal.precision = FALSE,
                                   valid.definition = list(negative=FALSE, percent = 0.6),
                                   na.process = FALSE,
                                   plot.col = FALSE){
  
  pred <- as.data.frame(pred)
  meas <- as.data.frame(meas)
  
  c.value = ifelse(log10, mean(log10(meas[,1]), na.rm=TRUE), mean(meas[,1], na.rm=TRUE))
  
  if(nrow(pred) != nrow(meas) | nrow(pred) != nrow(memb))
    stop('Rows of input are different!')
  
  if(!na.process){
    if(anyNA(pred) | anyNA(meas) | anyNA(memb)){
      stop('Not choose to process NA values. However, predicted, measured or membership values including NA values!')
    }
  }else{
    if(anyNA(meas)){
      stop('Choose to process NA values. But measured values including NA values!')
    }
    if(length(which(meas == 0)) > 0){
      stop('Choose to process NA values. But measured values including ZERO values!')
    }
  }
  
  if(hard.mode == TRUE){
    for(i in 1:length(metrics))
      metrics[i] <- match.arg(metrics[i], cal.metrics.names())
  }else{
    for(i in 1:length(metrics))
      metrics[i] <- match.arg(metrics[i], cal.metrics.vector.names())
  }
  
  if(cal.precision == TRUE){ # precision calculation is only for vectorized metrics
    for(i in 1:length(metrics)){
      metrics[i] <- match.arg(metrics[i], cal.metrics.vector.names())
    }
    hard.mode = TRUE # default to hard.mode for mean and sd calculation in each cluster
  }
  
  # generate the output dataframe  
  model_names <- colnames(pred)
  cluster_names <- colnames(memb)
  cluster_crisp <- apply(memb,1,which.max)
  
  validation <- matrix(data=0,
                       nrow=length(cluster_names), 
                       ncol=length(model_names)) %>% as.data.frame()
  colnames(validation) <- model_names
  rownames(validation) <- cluster_names
  
  # output is a list, create it!
  result <- list()
  if(cal.precision == TRUE){
    
    for(i in 1:length(metrics)){
      result[[metrics[i]]] <- validation
      precision_name <- paste0(metrics[i], '_p')
      result[[precision_name]] <- validation
    }
    
  }else{ # no need for precision
    
    for(i in 1:length(metrics))
      result[[metrics[i]]] <- validation
  
  }
  
  
  if(na.process){
    result[["Valid_percent"]] <- validation
  }
  
  #Hard model####
  for(model in 1:ncol(pred)){
    for(cluster in 1:ncol(memb)){

      # if(colnames(pred)[model] == "Chla_blend" & cluster == 1){
      #   print(model)
      # }
      
      w <- which(cluster_crisp == cluster)
      x <- meas[w,] # true
      y <- pred[w, model] # pred
      num_raw <- length(x)
      
      if(na.process){
        
        #item-valid####
        w_finite <- which(is.finite(y) & y>0)
        
        # default valid.definition is set as 0.6 which is two times of meeting of GOSC
        # If valid.threshold is set as NULL then it would pass this process
        if(!is.null(valid.definition)) {
          w_finite2 <- def_validation(x, y, valid.definition, log10)
          num_new <- length(w_finite2)
        }else {
          w_finite2 <- w_finite
          num_new <- length(w_finite)
        }
        
        x <- x[w_finite]
        y <- y[w_finite]
        
        # x <- x[w_finite2]
        # y <- y[w_finite2]
        
        if(num_new > num_raw)
          stop("Error! The subseted sample number is smaller the raw.")
      }
      
      #---------------------# 
      # calculate precision #
      #---------------------#
      if(cal.precision == TRUE){
      
        for(metric in metrics){
          
          if(metric == "RATIO") {
            center.value = 1
          }else {
            center.value = 0
          }
          
          precision_name <- paste0(metric, '_p')
          metric_value <- cal.metrics.vector(x, y, metric, 
                                             log10 = log10, 
                                             c.value = c.value)
          if(log10){
            x_ = log10(x)
            y_ = log10(y)
          }else{
            x_ = x
            y_ = y
          }
          # quant <- quantile(metric_value, c(0.001, 0.999))
          # metric_value_ = metric_value[metric_value > quant[1] & metric_value < quant[2]]
          metric_value_ = metric_value # for cluster specific metrics, their point numbers are too small to limit
          result[[metric]][cluster, model] <- mean(metric_value_, na.rm=TRUE)
          if(length(metric_value) == 0){
            result[[precision_name]][cluster, model] <- NA
          }else{
            # if(median(y_, na.rm=TRUE) / median(x_, na.rm=TRUE) > 10){
            #   # if the error are extremely biased, for instance, 10 fold than measurement,
            #   #   the precision (standard deviation) will reduce since they are uncorrectly high.
            #   #   Given that, the precision for this condition is assigned to NA values.
            #   #   Same as total calculation.
            #   result[[precision_name]][cluster, model] <- NA
            # }else{
            #   result[[precision_name]][cluster, model] <- sd(metric_value_, na.rm=TRUE)
            # }
            #item-precision####
            result[[precision_name]][cluster, model] <- 
              cal_precision(metric_value_, center.value, w_finite2, x_=x_)
          }
        }
           
      }else{ # no need for precision
        
        for(metric in metrics){
          
          # if(colnames(pred)[model] == "Chla_blend"){
          #   print(model)
          # }
          
          result[[metric]][cluster, model] <- cal.metrics(x, y, metric, 
                                                          log10 = log10, 
                                                          c.value = c.value)
        }
        
      }
      
      if(na.process){
        result[["Valid_percent"]][cluster, model] <- num_new / num_raw * 100
      }
    }
    
    if(total == TRUE){
      
      x <- meas[, 1]
      y <- pred[, model]
      num_raw <- length(x)
      
      if(na.process){
        #total-valid####
        w_finite <- which(is.finite(y) & y > 0)
        
        if(!is.null(valid.definition)) {
          w_finite2 <- def_validation(x, y, valid.definition, log10)
          num_new <- length(w_finite2)
        }else {
          w_finite2 <- w_finite
          num_new <- length(w_finite)
        }
        
        x <- x[w_finite]
        y <- y[w_finite]
        
        # x <- x[w_finite2]
        # y <- y[w_finite2]
        
      }
      
      for(metric in metrics){
        
        if(rownames(result[[metric]])[nrow(result[[metric]])] != 'SUM'){
          result[[metric]] %<>% rbind(.,NA)
          rownames(result[[metric]])[nrow(result[[metric]])] <- 'SUM'
        }
        
        # calculate precision for total
        if(cal.precision == TRUE){
          
          precision_name <- paste0(metric, '_p')
          if(rownames(result[[precision_name]])[nrow(result[[precision_name]])] != 'SUM'){
            result[[precision_name]] %<>% rbind(.,NA)
            rownames(result[[precision_name]])[nrow(result[[precision_name]])] <- 'SUM'
          }
          
          for(metric in metrics){
            
            if(metric == "RATIO") {
              center.value = 1
            }else {
              center.value = 0
            }
            
            metric_value <- cal.metrics.vector(x, y, metric, 
                                               log10 = log10,
                                               c.value = c.value)
            # Fot total calculation, it is unfair when outliers are included.
            # I add a quantile calculation to limit the statistic range which is betwene 0.1% and 99.9%
            quant <- quantile(metric_value, c(0.001, 0.999))
            metric_value_ = metric_value[metric_value > quant[1] & metric_value < quant[2]]
            result[[metric]]['SUM', model] <- mean(metric_value_, na.rm=TRUE)
            precision_name <- paste0(metric, '_p')
            if(log10){
              x_ = log10(x)
              y_ = log10(y)
            }else{
              x_ = x
              y_ = y
            }
            # if(median(y_, na.rm=TRUE) / median(x_, na.rm=TRUE) > 10){
            #   result[[precision_name]]['SUM', model] <- NA
            # }else{
            #   result[[precision_name]]['SUM', model] <- sd(metric_value_, na.rm=TRUE)
            # }
            #total-precision####
            result[[precision_name]]['SUM', model] <- 
              cal_precision(metric_value_, center.value, w_finite2, x_=x_)
          }
          
        }else{ # no need for precision  total
          
          result[[metric]]['SUM', model] <- cal.metrics(x, y, metric, 
                                                        log10 = log10,
                                                        c.value = c.value)  
          
        }
        
      }
      
      if(na.process){
        result[["Valid_percent"]]['SUM', model] <- num_new / num_raw * 100
      }
    }
  }
  
  
  
  
  #Fuzzy mode####
  if(hard.mode == FALSE){
    
    result_fz <- result
    
    for(i in 1:ncol(memb)){
      for(j in 1:ncol(pred)){
        for(metric in metrics){
          
          x <- meas[, 1]
          y <- pred[, j]
          
          w_cluster <- which(cluster_crisp == i)
          
          if(all(is.na(y[w_cluster]))){ # all prediction from j model in cluster i are NA values. Return metrics as NA
            
            result_fz[[metric]][i,j] <- NA
            
          }else{
            
            if(na.process){
              w <- which(is.finite(y))
              x <- x[w]
              y <- y[w]
              memb_ <- memb[w,]
            }
            
            if(dim(memb_)[1] != length(x))
              stop("The fuzzy metrics are calculated with different rows from memb and pred")
            
            Er <- cal.metrics.vector(x, y, metric, 
                                     log10 = log10,
                                     c.value = c.value)

            result_fz[[metric]][i,j] <- sum(memb_[,i] * Er, na.rm=TRUE) / sum(memb_[,i], na.rm=TRUE)
            
          } # ENDIF
    
        }
      }
    }
    
    result <- result_fz
    
  }
  
  # Plot work
  if(plot.col == TRUE){
    
    res_plot <- list()
    res_plot_dt <- list()
    
    num.model <- ncol(pred)
    set.seed(1234)
    ind = runif(num.model) %>% sort.int(., index.return=TRUE) %>% .$ix
    cp = Spectral(num.model)[ind]

    for(metric in names(result)){
      
      tmp <- result[[metric]]
      
      tmp <- cbind(x=rownames(tmp), tmp)
      
      tmp <- melt(tmp, id="x", variable.name='Models')
      
      p <- ggplot(tmp) + 
        geom_col(aes(x=x,y=value,group=Models,fill=Models),
                 position="dodge") + 
        scale_fill_manual(values=cp) + 
        labs(y=metric) + 
        theme_bw() + 
        theme(axis.text.x.bottom = element_text(angle = 90, hjust=1))
      
      res_plot[[metric]] <- p
      
      names(tmp)[3] <- metric
      res_plot_dt[[metric]] <- tmp
      
    }
    
    result$res_plot <- res_plot
    result$res_plot_dt <- res_plot_dt
    
    tmp <- data.frame()
    for(i in 1:length(res_plot_dt)){
      if(i == 1){
        tmp = res_plot_dt[[1]]
      }else{
        tmp <- data.frame(tmp, res_plot_dt[[i]][,3])
        names(tmp)[ncol(tmp)] <- names(res_plot_dt[[i]])[3]
      }
    }
    tmpp <- melt(tmp, id=c("x","Models"), variable.name="Metric")
    
    result$res_plot_facet <- 
      ggplot(tmpp) + 
      geom_col(aes(x=x,y=value,group=Models,fill=Models), position='dodge') + 
      scale_fill_manual(values=cp) + 
      labs(x=NULL, y=NULL) + 
      theme_bw() + 
      theme(axis.text.x.bottom = element_text(angle = 90, hjust=1)) + 
      facet_wrap(~Metric, scales="free_y")
    
  }else{
    
    result$res_plot <- NULL
    result$res_plot_dt <- NULL
    result$res_plot_facet <- NULL
    
  }
  
  result$input <- list(
    
    pred           = pred,
    meas           = meas,
    memb           = memb,
    metrics        = metrics,
    log10          = log10,
    total          = total,
    hard.mode      = hard.mode,
    cal.precision  = cal.precision,
    na.process     = na.process
    
  )
  
  return(result)
  
}

#' @noRd
#' @param metric_value_ metric_value_ of x and y. It is a vector
#' @param center.value the center value of metric. For instance, the value of BIAS is 0
#'   but the RATIO is 1
#' @param w_finite2 the index of valid value based on function \code{def_validation}
#' @param coef coefficient in calculating precision. Default is 2.
#' @param x_ Actual value
cal_precision <- function(metric_value_, center.value, w_finite2, coef = 2, x_) {
  
  # if(length(na.omit(metric_value_)) <= 2) {
  #   result <- sd(metric_value_, na.rm=TRUE) + abs(mean(metric_value_, na.rm=TRUE) - center.value) / coef
  # }else {
  #   result <- {density(metric_value_, na.rm = TRUE) %$% x[which.max(y)]}
  #   result <- abs(result) / coef + sd(metric_value_, na.rm=TRUE)
  # }
  
  ## just sd
  # result <- sd(metric_value_[w_finite2], na.rm = TRUE)
  
  ## sd plus abs of the average of decentralized er over a fixed value
  # result <- sd(metric_value_, na.rm=TRUE) + 
  #   abs(mean(metric_value_, na.rm=TRUE) - center.value) / 3
  
  ## sd plus abs of the average of decentralized er over the range of the true value
  result <- sd( metric_value_, na.rm=TRUE ) *
    abs( mean(metric_value_, na.rm=TRUE) - center.value ) /
    diff( range(x_, na.rm=TRUE) )
  
  return(result)
}

#' @noRd
#' @param x actual value
#' @param y predicted value
#' @param valid.definition list of definition \code{list(negative=FALSE, percent = 0.6)}
#' @param log10 log10
def_validation <- function(x, y, 
                           valid.definition = list(negative=FALSE, percent = 0.6), 
                           log10) {
  
  if(!log10) {
    tmp <- cbind(
      lower = x - x * valid.definition$percent,
      upper = x + x * valid.definition$percent
    )
    bounds <- apply(tmp, 1, sort)
    lower <- bounds[1, ]
    upper <- bounds[2, ]
  }else {
    lgx <- log10(x) 
    # lower  <- 10^lgx / 10^(lgx*valid.definition$percent)
    # upper  <- 10^lgx * 10^(lgx*valid.definition$percent)
    tmp <- cbind(
      lower = lgx - lgx * valid.definition$percent,
      upper = lgx + lgx * valid.definition$percent
    )
    bounds <- 10^apply(tmp, 1, sort)
    lower <- bounds[1, ]
    upper <- bounds[2, ]
  }
  
  if(valid.definition$negative == FALSE) { # means the negative value is invalid
    if(!log10) {
      lower[lower < 0] <- 0
    }
  }
  w_finite2 <- which(y > lower & y < upper)
  
  return(w_finite2)
  
}


Assessment_via_cluster2 <- function(pred, meas, memb, 
                                    log10 = TRUE, total = TRUE,
                                    valid.definition = list(negative = FALSE, percent = 0.6)) {
  
  cluster <- apply(memb, 1, which.max)
  cluster_names <- colnames(memb)
  cluster <- cluster_names[cluster]
  model_names <- colnames(pred)
  
  if(log10) {
    pred <- log10(pred)
    meas <- log10(meas)
  }
  
  if(total) {
    
    result <- matrix(NA, ncol=ncol(pred), nrow = ncol(memb) + 1)
    colnames(result) <- model_names
    rownames(result) <- c(cluster_names, "SUM")
      
  }else {
    
    result <- matrix(NA, ncol=ncol(pred), nrow = ncol(memb))
    colnames(result) <- model_names
    rownames(result) <- c(cluster_names)
    
  }
  
  result_bias <- result_crpe <- result
  
  c.value <- mean(meas)
  
  for(i in rownames(result)) {
    
    for(j in colnames(result)) {
      
      if(i == "SUM") {
        w_sel <- 1:nrow(pred)
      }else {
        w_sel <- which(cluster %in% i)
      }
      
      x <- meas[w_sel]
      y <- pred[w_sel, j]
      
      lower <- x * (1 - valid.definition$percent)
      upper <- x * (1 + valid.definition$percent)
      
      w_valid <- which( y > lower & y < upper)
      
      Valid_percent <- length(w_valid) / length(x)
      
      bias <- x - y
      crpe <- 2 * (x - y) / (x + c.value) * 100
      
      result_bias[i, j] <- (sd(bias, na.rm=TRUE) + mean(abs(bias), na.rm = TRUE)) / Valid_percent
      result_crpe[i, j] <- (sd(crpe, na.rm=TRUE) + mean(abs(crpe), na.rm = TRUE)) / Valid_percent
      
      if(is.infinite(result_bias[i, j])) result_bias[i, j] <- NA
      if(is.infinite(result_crpe[i, j])) result_crpe[i, j] <- NA
      
    }
    
  }
  
  score_bias <- apply(result_bias, 1, function(x) Score_algorithms_sort2(x, max.score = 50)) %>% t
  score_crpe <- apply(result_crpe, 1, function(x) Score_algorithms_sort2(x, max.score = 50)) %>% t
  score_total <- score_bias + score_crpe
  colnames(score_total) <- model_names
  if(total) score_total <- score_total[-nrow(score_total), ] 
  
  Opt_algorihtm <- apply(score_total, 1, function(x) names(which.max(x)))
  
  return(list(
    result_bias = result_bias,
    result_crpe = result_crpe,
    score_bias  = score_bias,
    score_crpe  = score_crpe,
    score_total = score_total,
    Opt_algorihtm = Opt_algorihtm
  ))
  
}










#' @name Score_algorithms_interval
#' @title Score a vector of error metrics for algorithms by the interval
#' @param x Input vector of error metrics (NA values are allowed)
#' @param trim whether to run a trim process to calculate mean and standard deviation of 
#'   input vector x (Default as \code{FALSE})
#' @param reward.punishment Whether to conduct the reward and punishment mechanism in scoring
#'   system (Default as \code{TRUE})
#' @param decreasing the order of the good metric to be evaluated. For instance, MAE should use
#'   \code{decreasing = TRUE} (Default) since the algorithm performs better when MAE becomes smaller. 
#'   However, when comes to \code{Rsquare} from linear regression (maximum is 1), it should be
#'   \code{FALSE}
#' @param hundred.percent A variable constrain for metrics that the maximun of input \code{x} 
#'   should not be greater than 100.
#' @export
#' 
#' @return Results of \code{Score_algorithms_interval()} are returned as a list including:
#' \item{p}{A ggplot list of the scoring result.}
#' \item{score}{The final score from by the interval score.}
#' \item{u}{Trimmed mean of input x with \code{NA} values removed.}
#' \item{bds}{Up and low boundaries for determining scores.}
#' \item{x}{The input x.}
#' 
#' @family Algorithm assessment
#' 
#' @importFrom stats qt sd
#' @import ggplot2
#' 
#' @examples 
#' set.seed(1234)
#' x = runif(10)
#' result = Score_algorithms_interval(x)
#' 

Score_algorithms_interval <- function(x, trim=FALSE, reward.punishment=TRUE, 
                             decreasing=TRUE, hundred.percent=FALSE
                             ){
  
  x <- as.numeric(x)
  
  if(length(x) <= 4){
    stop("Candidate number are less equal than 4, no need use this assessment system.")
  }else{
    if(anyNA(x)){
      n <- length(which(is.na(x) == FALSE))
      n_trim <- ceiling(n/2*0.1)
    }else{
      n <- length(x)
      n_trim <- ceiling(n/2*0.1)
    }
  }
  
  if(trim==TRUE){
    x_ = sort(x)[(n_trim+1):(n-n_trim)]
    u <- mean(x_, na.rm=TRUE)
    sig <- sd(x_, na.rm=TRUE)
  }else{
    u <- mean(x, na.rm=TRUE)
    sig <- sd(x, na.rm=TRUE)
  }
  
  tCL <- qt(0.975,length(x)-1) # 95% Student distribution
  
  if(hundred.percent == FALSE){
    UB = u + tCL*sig/sqrt(n)
    LB = u - tCL*sig/sqrt(n)
    bds = c(UB,LB)
  }else{
    bds = c(50,60,75,95)
    decreasing = FALSE
    reward.punishment = FALSE
  }
  
  tmp = data.frame(x=seq(1,length(x)), y=x)
  
  p <- ggplot() + 
    geom_point(data=tmp, aes(x,y)) + 
    geom_hline(yintercept=u,color='blue') + 
    geom_hline(yintercept=bds,color='blue',linetype=2) + 
    scale_x_continuous(breaks=seq(length(x))) +
    labs(x='Model ID', y='Error metric') + 
    theme_bw()
  
  score <- x * 0
  
  if(hundred.percent == TRUE){
    if(max(x) > 100 | min(x) < 0){
      stop("The input x has a number larger than 100 in Valid_percent metric!")
    }
    score[which(x >= 95)]          = 0
    score[which(x >= 75 & x < 95)] = -0.5 * 2
    score[which(x >= 60 & x < 75)] = -1   * 2   
    score[which(x >= 50 & x < 60)] = -1.5 * 2
    score[which(x <  50)]          = -2   * 2
    
  }else{
    
    if(decreasing == TRUE){ # The smaller value, the better performance
      score[which(x >= UB)] <- 0
      score[which(x < UB & x >= LB)] <- 1
      score[which(x < LB)] <- 2
      # Reward and punishment mechanism for worst and best model
      if(reward.punishment == TRUE){
        score[which.max(x)] <- score[which.max(x)] - 0.5
        score[which.min(x)] <- score[which.min(x)] + 0.5
      }    
    }else{
      score[which(x >= UB)] <- 2
      score[which(x < UB & x >= LB)] <- 1
      score[which(x < LB)] <- 0
      # Reward and punishment mechanism for worst and best model
      if(reward.punishment == TRUE){
        score[which.max(x)] <- score[which.max(x)] + 0.5
        score[which.min(x)] <- score[which.min(x)] - 0.5
      }  
    }
    
    # punish the NA results with -1 score
    if(hundred.percent == TRUE){
      score[which(is.na(x) == TRUE)] <- -2
    }else{
      score[which(is.na(x) == TRUE)] <- -1      
    }
    
    
  }
  
  result <- list(p     = p,
                 score = score,
                 u     = u,
                 bds   = bds,
                 x     = x)
  
  return(result)
}


#' @name Score_algorithms_sort
#' @title Score a vector of error metrics for algorithms by the sort
#' 
#' @param x Input vector of error metrics (NA values are allowed)
#' @param decreasing the order of the good metric to be evaluated. For instance, MAE should use
#'   \code{decreasing = TRUE} (Default) since the algorithm performs better when MAE becomes smaller. 
#'   However, when comes to \code{Rsquare} from linear regression (maximum is 1), it should be
#'   \code{FALSE}
#'   
#' @export
#' 
#' @return Results of \code{Score_algorithms_sort()} are returned as a vector presenting score values.
#' 
#' @family Algorithm assessment
#' 
#' @examples 
#' set.seed(1234)
#' x = runif(10)
#' result = Score_algorithms_sort(x)
Score_algorithms_sort <- function(x, decreasing = TRUE, max.score = 100/4){
  
  x <- as.numeric(x)
  
  # decreasing == TRUE for smaller metrics that are better
  if(decreasing == TRUE){
    na.last = FALSE
  }else{
    na.last = TRUE
  }
  
  r <- sort.int(x, decreasing=decreasing, index.return = TRUE, na.last = na.last)
  
  score <- rep(NA, length(x))

  for(i in 1:length(x)){
    if(is.na(x[i])){
      score[i] = 0
    }else{
      score[i] = which(x[i] == r$x)
    }
  }
  
  score <- scales::rescale(score, to = c(0, max.score))
  
  # plot(density(x, na.rm=TRUE))
  # plot(density(score, na.rm=TRUE))
  
  return(score)

}

#' @rdname Score_algorithms_sort
#' @param max.score The max.score for \code{scales::rescale}. The default is \code{100/4}.
#' @export
#' @return Results of \code{Score_algorithms_sort()} and \code{Score_algorithms_sort2()} are 
#'   returned as a vector presenting score values.
#' @importFrom scales rescale
Score_algorithms_sort2 <- function(x, decreasing = TRUE, max.score = 100/4){
  
  x <- as.numeric(x)
  
  # decreasing == TRUE for smaller metrics that are better
  if(decreasing == TRUE){
    x_ <- -x
  }else{
    x_ <- x
  }
  
  score <- scales::rescale(x_, to = c(0, max.score))
  
  score[is.na(score)] <- 0
  
  return(score)
  
}


#' @name Getting_Asses_results
#' @title Get the result of function Assessment_via_cluster
#' @description This function mainly use function \link{Assessment_via_cluster} to get
#'   assessments both from fuzzy and hard mode. Specifically, it will return the accuracy and precision of 
#'   \code{MAE},\code{CMAPE},\code{BIAS}, and \code{CMRPE} which would be seemed as the input value of 
#'   function \link{Scoring_system}.
#' @param sample.size Sample size. This supports a bootstrap way to run the function 
#'   \link{Assessment_via_cluster}. The number should not be larger than the row number 
#'   of pred or so.
#' @param replace Logical, replace, default as \code{FALSE}
#' @param pred Prediction matrix or data.frame
#' @param meas Measured (actual) matrix or data.frame
#' @param memb Membership matrix
#' @param metrics_used The metric combination used in the function. Default is \code{1}.
#' 
#' If \code{metrics_used = 1} then the used metrics are \code{c("MAE", "CMAPE", "BIAS", "CMRPE")}
#' 
#' If \code{metrics_used = 2} then the used metrics are \code{c("MAE", "CMAPE", "BIAS", "CMRPE", "RATIO")}
#' 
#' @param cluster Cluster vector. Could be calculated by the parameter \code{memb}. Will be deprecated later.
#' @param seed Seed number for fixing the random process. See \code{help(set.seed)} for more details.
#' @param log10 pass to \link{Sampling_by_sort}.
#' @param valid.definition The definition of valid prediction, default as \code{list(negative=FALSE, percent = 0.6)}.
#'   The invalid prediction will not be removed to calculate error metrics.
#' 
#' \code{negative=FALSE} means the negative values are considered as invalid predictions while \code{percent} means
#'   the tolerance of error-percentage that only the predictions between \code{(y-y*percent), y+y*percent)} are valid.
#'   
#' @note The row number of \code{pred}, \code{meas}, \code{memb}, and \code{cluster} should be the same. 
#'   This function is designed for bootstrapping process to get Chla algorithms assessment. Therefore, 
#'   parameters of \link{Assessment_via_cluster} is set as fixed such as \code{log10 = TRUE}, 
#'   \code{na.process = TRUE}. Given that, I will not export this function in latter to avoid confuses.
#' @export
#' @return A list containing fuzzy and hard results from \link{Assessment_via_cluster}
#' @family Algorithm assessment
#' 
#' @examples 
#' library(FCMm) 
#' library(ggplot2) 
#' library(magrittr)
#' library(stringr)
#' data("Nechad2015")
#' x <- Nechad2015[,3:11]
#' wv <- gsub("X","",names(x)) %>% as.numeric
#' set.seed(1234)
#' w <- sample.int(nrow(x), 300)
#' x <- x[w, ]
#' names(x) <- wv
#' nb = 4 # Obtained from the vignette "Cluster a new dataset by FCMm"
#' set.seed(1234)
#' FD <- FuzzifierDetermination(x, wv, do.stand=TRUE)
#' result <- FCM.new(FD, nb, fast.mode = TRUE)
#' p.spec <- plot_spec(result, show.stand=TRUE)
#' print(p.spec$p.cluster.spec)
#' Chla <- Nechad2015$X.Chl_a..ug.L.[w]
#' Chla[Chla >= 999] <- NA
#' dt_Chla <- run_all_Chla_algorithms(x) %>% as.data.frame
#' dt_Chla <- data.frame(Chla_true = Chla,
#' BR_Gil10 = dt_Chla$BR_Gil10, 
#' OC4_OLCI = dt_Chla$OC4_OLCI, 
#' OCI_Hu12 = dt_Chla$OCI_Hu12, 
#' NDCI_Mi12= dt_Chla$NDCI_Mi12) %>% round(3)
#' w = which(!is.na(dt_Chla$Chla_true))
#' dt_Chla = dt_Chla[w,]
#' memb = result$res.FCM$u[w,] %>% round(4)
#' cluster =  result$res.FCM$cluster[w]
#' Asses_results <- Getting_Asses_results(sample.size=length(cluster), 
#' pred = dt_Chla[,-1], meas = data.frame(dt_Chla[,1]), memb = memb, 
#' cluster = cluster)
#' 
Getting_Asses_results <- function(sample.size, replace = FALSE,
                                  pred, meas, memb,
                                  metrics_used = 1,
                                  cluster = apply(memb, 1, which.max), 
                                  seed = NULL, log10 = TRUE,
                                  valid.definition = list(negative=FALSE, percent = 0.6)){
  
  if(metrics_used == 1) {
    metrics = c("MAE", "CMAPE", "BIAS", "CMRPE")
  }else if(metrics_used == 2) {
    metrics = c("MAE", "CMAPE", "BIAS", "CMRPE", "RATIO")
  }
  
  meas <- data.frame(meas)
  
  if(sample.size > nrow(pred))
    stop("Enter a smaller sample size to subset the data set.")
  
  if(var(c(nrow(pred), nrow(meas), nrow(memb), nrow(cluster))) != 0)
    stop("Row numbers of pred, meas, and memb are different.")

  # Stratified sampling by cluster
  if(is.null(seed)){
    w <- Sampling_via_cluster(cluster, num=sample.size, log10 = log10,
                              replace=replace, order.value = unlist(meas))
  }else{
    set.seed(as.numeric(seed))
    w <- Sampling_via_cluster(cluster, num=sample.size, log10 = log10,
                              replace=replace, order.value = unlist(meas))
  }
  
  w <- sort(w)
  
  pred_ <- pred[w,]
  meas_ <- meas[w,]
  memb_ <- memb[w,]
  cluster_ <- cluster[w]
  
  Asses_fz <-  Assessment_via_cluster(pred=pred_,
                                      meas=meas_,
                                      memb=memb_,
                                      metrics = metrics,
                                      log10 = TRUE,
                                      hard.mode = FALSE,
                                      na.process = TRUE,
                                      plot.col = FALSE,
                                      valid.definition = valid.definition)
  
  Asses_p <-   Assessment_via_cluster(pred=pred_,
                                      meas=meas_,
                                      memb=memb_,
                                      metrics = metrics,
                                      log10 = TRUE,
                                      hard.mode = TRUE,
                                      cal.precision = TRUE,
                                      na.process = TRUE,
                                      plot.col = FALSE,
                                      valid.definition = valid.definition)
  
  result <- list(
                  # Asses_hd=Asses_hd, 
                  Asses_fz = Asses_fz,
                  Asses_p  = Asses_p, 
                  metrics = metrics,
                  valid.definition = valid.definition
                  )
  
  return(result)
}


#' @name Scoring_system
#' 
#' @title The main function for algorithms scoring based on accuracy, precision, and effectiveness.
#' 
#' @param Inputs The list returned form function \link{Getting_Asses_results}
#' @param method The method selected to score algorithms: 
#'   \itemize{
#'     \item \code{sort-based} (default) which is scored by the sort of accuracy and precision 
#'       metrics (see more in \link{Score_algorithms_sort}). 
#'     \item \code{sort-based2} which is scored by the sort of accuracy and precision 
#'       metrics (see more in \link{Score_algorithms_sort}). 
#'     \item \code{interval-based} which is relatively scored by the interval of accuracy and 
#'       precision (used by Brewin et al. (2015) and Neil et al. (2019)). 
#'       See more in \link{Score_algorithms_interval}).
#'   }
#' @param param_sort The parameters of function \link{Score_algorithms_sort}
#' @param param_interval The parameters of function \link{Score_algorithms_interval}
#' @param remove.negative Option to replace the negative score as zero (default as \code{FALSE})
#' @param Times Parameter of \code{Scoring_system_bootstrap}. The bootstrap time for running 
#'   \code{Scoring_system} (default as \code{1000})
#' @param replace Parameter of \code{Scoring_system_bootstrap}. The sample method for bootstrap running.
#'   Default as \code{TRUE}. See more details in \link{sample}.
#' @param accuracy.metrics accuracy used metrics, default as \code{c("MAE", "CMAPE")}
#' @param precision.metrics precision used metrics, default as \code{c("BIAS", "CMRPE")}
#' 
#' @export
#' 
#' @return The result of \code{Scoring_system} are including:
#' \itemize{
#'   \item \strong{Total_score} Data.frame of final score result with algorithm as column and cluster as row.
#'   \item \strong{Accuracy} Data.frame of \code{Accuracy} score with algorithm as column and cluster as row.
#'   \item \strong{Precision} Data.frame of \code{Precision} score with algorithm as column and cluster as row.
#'   \item \strong{Effectiveness} Data.frame of \code{Effectiveness} score with algorithm as column and cluster as row.
#'   \item \strong{Accuracy_list} List including data.frames of used \code{Accuracy} metrics.
#'   \item \strong{Precision_list} List including data.frames of used \code{Precision} metrics.
#'   \item \strong{Total_score.melt} Melted data.frame of \strong{Total_score} for plotting.
#'   \item \strong{Opt_algorithm} The optimal algorithm names for each cluster.
#'   \item \strong{Inputs} Inputs of this function.
#' } 
#' 
#' @note 
#'   \code{Scoring_system_bootstrap} is the bootstrap mode of \code{Scoring_system} which is useful when
#'     the outcome is unstable for large number of samples. The default boostrap time in \code{Scoring_system_bootstrap}
#'     is set as \code{1000} and the result of it is the list of several aggregated data.frames and standard deviations.
#'   
#' @details  
#' The \code{Accuracy} and \code{Precision} is newly defined in \code{FCMm} package (referred by 
#'   Hooker et al. (2005)):
#' \itemize{
#'   \item \code{Accuracy} is the estimation of how close the result of the experiment is to the 
#'     true value.
#'   \item \code{Precision} is the estimation of how excatly the result is determined independently 
#'     of any true value.
#' }
#' In other words, \code{Accuracy} is telling a story truthfully and precision is how similarly
#'   the story is represented over and over again.
#' Here we use AE, a vector for each sample, for instance:
#' \itemize{
#'   \item \code{Accuracy} is the aggregation (no matter mean or median, in fuzzy calculation process), 
#'     we use mean to some extent. 
#'   \item \code{Precision} is actually the \strong{stability} of AE (reproducebility) which means the error
#'     produced by the algorithm is under certain control.
#' }
#' Finally, the function will multiply the total score (\code{Accuracy} + \code{Precision}) by the 
#'   effectiveness (i.e., Valid_percent returned by \link{Assessment_via_cluster}).
#' 
#' @family Algorithm assessment
#' 
#' @importFrom reshape2 melt
#' @importFrom magrittr %>%
#' 
#' @references
#' \itemize{
#'   \item Hooker S B. Second SeaWiFS HPLC analysis round-robin experiment (SeaHARRE-2)[M]. 
#'     National Aeronautics and Space Administration, Goddard Space Flight Center, 2005. 
#'   \item Seegers B N, Stumpf R P, Schaeffer B A, et al. Performance metrics for the assessment 
#'     of satellite data products: an ocean color case study[J]. Optics express, 2018, 26(6): 7404-7422.
#'   \item Neil C, Spyrakos E, Hunter P D, et al. A global approach for chlorophyll-a retrieval 
#'     across optically complex inland waters based on optical water types[J]. Remote Sensing of 
#'     Environment, 2019, 229: 159-178.
#'   \item Brewin R J W, Sathyendranath S, Müller D, et al. The Ocean Colour Climate Change 
#'     Initiative: III. A round-robin comparison on in-water bio-optical algorithms[J]. Remote Sensing of 
#'     Environment, 2015, 162: 271-294.
#'   \item Moore T S, Dowell M D, Bradt S, et al. An optical water type framework for selecting and 
#'     blending retrievals from bio-optical algorithms in lakes and coastal waters[J]. Remote sensing of 
#'     environment, 2014, 143: 97-111.
#' } 
#' 
#' @examples 
#' library(FCMm) 
#' library(ggplot2) 
#' library(magrittr)
#' library(stringr)
#' data("Nechad2015")
#' x <- Nechad2015[,3:11]
#' wv <- gsub("X","",names(x)) %>% as.numeric
#' set.seed(1234)
#' w <- sample.int(nrow(x))
#' x <- x[w, ]
#' names(x) <- wv
#' nb = 4 # Obtained from the vignette "Cluster a new dataset by FCMm"
#' set.seed(1234)
#' FD <- FuzzifierDetermination(x, wv, do.stand=TRUE)
#' result <- FCM.new(FD, nb, fast.mode = TRUE)
#' p.spec <- plot_spec(result, show.stand=TRUE)
#' print(p.spec$p.cluster.spec)
#' Chla <- Nechad2015$X.Chl_a..ug.L.[w]
#' Chla[Chla >= 999] <- NA
#' dt_Chla <- run_all_Chla_algorithms(x) %>% as.data.frame
#' dt_Chla <- data.frame(Chla_true = Chla,
#' BR_Gil10 = dt_Chla$BR_Gil10, 
#' OC4_OLCI = dt_Chla$OC4_OLCI, 
#' OCI_Hu12 = dt_Chla$OCI_Hu12, 
#' NDCI_Mi12= dt_Chla$NDCI_Mi12) %>% round(3)
#' w = which(!is.na(dt_Chla$Chla_true))
#' dt_Chla = dt_Chla[w,]
#' memb = result$res.FCM$u[w,] %>% round(4)
#' cluster =  result$res.FCM$cluster[w]
#' Asses_results <- Getting_Asses_results(sample.size=length(cluster), 
#' pred = dt_Chla[,-1], meas = data.frame(dt_Chla[,1]), memb = memb, 
#' cluster = cluster)
#' Score = Scoring_system(Asses_results)
#' # show the total score table
#' knitr::kable(round(Score$Total_score, 2))
#' 
Scoring_system <- function(Inputs, # return of Getting_Asses_results
                           method = 'sort-based',
                           param_sort = list(decreasing = TRUE, max.score = NULL),
                           param_interval = list(trim=FALSE, reward.punishment=TRUE,
                                             decreasing=TRUE, hundred.percent=FALSE),
                           remove.negative = FALSE,
                           accuracy.metrics = c( "MAE", "CMAPE" ),
                           precision.metrics = c( "BIAS", "CMRPE" )){
  
  Asses_fz <- Inputs$Asses_fz
  Asses_p  <- Inputs$Asses_p # If on precision, the mode is hard
  valid.definition <- Inputs$valid.definition
  n_metrics <- length(Inputs$metrics)
  metrics <- Inputs$metrics
  
  if(is.null(param_sort$max.score)) {
    param_sort$max.score = 100/n_metrics  
  }
  
  if("RATIO" %in% names(Inputs$Asses_fz)) {
    Inputs$Asses_fz$RATIO = abs(Inputs$Asses_fz$RATIO - 1)
  }
  
  # scoring method definition
  method = match.arg(method, c('sort-based', 'sort-based2', 'interval-based'))
  if(method == 'sort-based') {
    Score_algorithms <- function(x) {
      return(Score_algorithms_sort(x, 
                                   decreasing = param_sort$decreasing,
                                   max.score = param_sort$max.score))
    }
  }else if(method == 'interval-based') {
    Score_algorithms <- function(x) {
      a = param_interval
      r = Score_algorithms_interval(x, 
                                    trim=a$trim, 
                                    reward.punishment = a$reward.punishment,
                                    decreasing=a$decreasing, 
                                    hundred.percent = a$hundred.percent)
      return(r$score)
    }
  }else if(method == "sort-based2") {
    Score_algorithms <- function(x) {
      return(Score_algorithms_sort2(x, 
                                    decreasing = param_sort$decreasing, 
                                    max.score = param_sort$max.score))
    }
  }else {
    stop('Method selection error.')
  }
  
  #---------------# 
  # Accuracy part #
  #---------------#
  Accuracy_list <- list()
  a.metric <- Inputs$metrics[Inputs$metrics %in% accuracy.metrics]
  j = 1
  for(metric in a.metric) {
    tmp <- Asses_fz[[metric]]
    tmp_score <- tmp * NA
    for(i in 1:nrow(tmp)) {
      tmp_score[i, ] <- Score_algorithms(tmp[i, ])
    }
    Accuracy_list[[j]] <- tmp_score
    names(Accuracy_list[j]) <- metric
    rm(tmp, tmp_score)
    j = j + 1
  }
  Accuracy <- Accuracy_list[[1]] * 0
  for(i in 1:length(Accuracy_list)) {
    Accuracy <- Accuracy + Accuracy_list[[i]]
  }
  
  #----------------#
  # Precision part #
  #----------------#
  Precision_list <- list()
  p.metric <- Inputs$metrics[Inputs$metrics %in% precision.metrics]
  j = 1
  for(metric in p.metric) {
    metric_name <- sprintf("%s_p",metric)
    tmp <- Asses_p[[metric_name]]
    tmp_score <- tmp * NA
    for(i in 1:nrow(tmp)) {
      tmp_score[i, ] <- Score_algorithms(abs(tmp[i, ]))
    }
    Precision_list[[j]] <- tmp_score
    names(Precision_list[j]) <- metric
    rm(tmp, tmp_score)
    j = j + 1
  }
  Precision <- Precision_list[[1]] * 0
  for(i in 1:length(Precision_list)) {
    Precision <- Precision + Precision_list[[i]]
  }
  
  #-------------#
  # total score # 
  #-------------#
  Total_score <- (Accuracy + Precision) * Asses_fz$Valid_percent / 100
  
  if(remove.negative == TRUE){
    w = which(Total_score < 0, arr.ind=TRUE)
    for(i in 1:nrow(w))
      Total_score[w[i,1],w[i,2]] <- 0
  }
  
  w = which(is.na(Total_score), arr.ind=TRUE)
  if(nrow(w) != 0){
    for(i in 1:nrow(w))
      Total_score[w[i,1],w[i,2]] <- 0
  }
  
  # output optimal algorithm names
  Opt_algorithm <- Total_score[-nrow(Total_score), ] %>% 
    apply(., 1, which.max) %>% 
    names(Total_score)[.] %>% 
    setNames(., rownames(Total_score)[-nrow(Total_score)]) 
  
  Opt_algorithm.sec <- Total_score[-nrow(Total_score), ] %>% 
    apply(., 1, which.max.sec) %>% 
    names(Total_score)[.] %>% 
    setNames(., rownames(Total_score)[-nrow(Total_score)]) 
  
  Total_score.melt <- melt(cbind(x=rownames(Total_score), Total_score), id='x')
  
  result <- list(
                 Total_score           = Total_score,
                 Accuarcy              = Accuracy,
                 Precision             = Precision,
                 Effectiveness         = Asses_fz$Valid_percent,
                 Accuracy_list         = Accuracy_list,
                 Precision_list        = Precision_list,
                 Total_score.melt      = Total_score.melt,
                 Opt_algorithm         = Opt_algorithm,
                 Opt_algorithm.sec     = Opt_algorithm.sec,
                 Inputs                = Inputs,
                 valid.definition      = valid.definition
                )
  
  return(result)
  
}



#' @export
#' @rdname Scoring_system
#' @param metrics_used The metric combination used in the function. Default is \code{1}.
#' 
#' If \code{metrics_used = 1} then the used metrics 
#'   are \code{c("MAE", "CMAPE", "BIAS", "CMRPE")}
#' 
#' If \code{metrics_used = 2} (dont use this) then the used metrics 
#'   are \code{c("MAE", "CMAPE", "BIAS", "CMRPE", "RATIO")}
#' 
#' @param dont_blend Whether to runing the algorithm blending process. Default is \code{FALSE}.
#'   This is useful when you just want to score the candidate algorithms.
#'   
#' @param verbose Show the iteration message.
#' 
#' @return The result of \code{Scoring_system_bootstrap} are including:
#' \itemize{
#'   \item \strong{Times} The times of bootstrap running.
#'   \item \strong{Score_all_clusters} The total score for algorithms across all clusters.
#'   \item \strong{Score_list} All times of bootstrapping results are recorded in it.
#'   \item \strong{Score_list_melt} Melted \code{Score_list}.
#'   \item \strong{Opt_algorithm_list} The optimal algorithm for every runing.
#'   \item \strong{Opt_algorithm} The optimal algorithm defined by mode of \code{Opt_algorithm_list}
#'     for each cluster.
#'   \item \strong{Remove_algorithm} The algorithms to be removed when blending.
#'   \item \strong{plot_col} The col plot of \code{Score_list_melt}.
#'   \item \strong{plot_scatter} The scatter plot of measured and predicted Chla concentration
#'     colored by clusters.
#'   \item \strong{plot_scatter_opt} The scatter plot of measured and predicted Chla concentration
#'     colored by clusters for optimized algorithms.
#'   \item \strong{Blend_result} The results from the inherent function \code{Chla_algorithms_blend}.
#'   \item \strong{dt_Chla} Data.frame with combination of candidate algortihms and blended results.
#'   \item \strong{Chla_blend} The blended Chla concentration by score results.
#'   \item \strong{Results_of_scoring_system} A list including all results of \link{Scoring_system} function.
#'   \item \strong{metric_results} A result of \link{Assessment_via_cluster} which includes the Chla blend results.
#' } 
#' 
#' @importFrom stats aggregate
#' @importFrom stringr str_split str_extract_all str_detect
#' 
#' @examples 
#' # Examples of `Scoring_system_bootstrap`
#' 
#' set.seed(1234)
#' Score_boo <- Scoring_system_bootstrap(Times = 3, Asses_results) 
#' # try to set large `Times` when using your own data
#' 
#' # Show the bar plot of scores
#' Score_boo$plot_col
#' 
#' # Show the scatter plot of measure-estimation pairs
#' Score_boo$plot_scatter
#' 
#' # Show error metrics
#' knitr::kable(round(Score_boo$metric_results$MAE, 2), caption = "MAE")
#' knitr::kable(round(Score_boo$metric_results$CMAPE, 2), caption = "CAPE")
#' knitr::kable(round(Score_boo$metric_results$BIAS, 2), caption = "BIAS")
#' knitr::kable(round(Score_boo$metric_results$CMRPE, 2), caption = "CRPE")
#' 
#' # you would see the blending estimations outperform than other candidates
#' 
Scoring_system_bootstrap <- function(Times = 1000, 
                                     Inputs, replace = TRUE, 
                                     method = 'sort-based',
                                     metrics_used = 1,
                                     param_sort = list(decreasing = TRUE, max.score = NULL),
                                     param_interval = list(trim=FALSE, reward.punishment=TRUE,
                                                           decreasing=TRUE, hundred.percent=FALSE),
                                     remove.negative = FALSE,
                                     dont_blend = FALSE,
                                     verbose = TRUE){
  
  if(Times <= 1) stop("Times should be larger than 1")
  
  pred = round(Inputs$Asses_fz$input$pred, 4)
  meas = round(Inputs$Asses_fz$input$meas, 4)
  memb = round(Inputs$Asses_fz$input$memb, 4)
  cluster = apply(memb, 1, which.max)
  K = ncol(memb)
  metrics = Inputs$metrics
  
  valid.definition = Inputs$valid.definition
  
  Score_list <- NULL
  Results_of_scoring_system <- list()
  
  for(i in 1:Times){
    
    # if(verbose) cat(i, "/", Times, "\n")
    
    if(verbose) {
      
      cat(".")
      if(i %% 20 == 0) cat(i, "[", Times,"]\n") 
      if(i == Times) cat("\n")
    }
    
    Asses_list <- Getting_Asses_results(sample.size= nrow(memb),
                                        metrics_used = metrics_used,
                                        pred, meas, memb, replace = replace,
                                        valid.definition = valid.definition)
    Score <- Scoring_system(Inputs = Asses_list,
                            method = method,
                            param_sort = param_sort,
                            param_interval = param_interval,
                            remove.negative = remove.negative)
    
    Results_of_scoring_system[[i]] <- Score
    
    if(i == 1){
      Score_list <- Score$Total_score.melt
      names(Score_list)[ncol(Score_list)] <- i
      Opt_algorithm_list <- Score$Opt_algorithm
      Opt_algorithm.sec_list <- Score$Opt_algorithm.sec
    }else{
      Score_list <- cbind(Score_list, Score$Total_score.melt[,3])
      names(Score_list)[ncol(Score_list)] <- i
      Opt_algorithm_list <- rbind(Opt_algorithm_list, Score$Opt_algorithm)
      Opt_algorithm.sec_list <- rbind(Opt_algorithm.sec_list, Score$Opt_algorithm.sec)
    }
  
  }
  
  # Opt algorithms for each time
  Opt_algorithm_list <- cbind(seq(1, Times), Opt_algorithm_list)
  Opt_algorithm_list <- data.frame(Opt_algorithm_list, stringsAsFactors = FALSE)
  colnames(Opt_algorithm_list) <- c("Times", names(Score$Opt_algorithm))
  
  Opt_algorithm.sec_list <- cbind(seq(1, Times), Opt_algorithm.sec_list)
  Opt_algorithm.sec_list <- data.frame(Opt_algorithm.sec_list, stringsAsFactors = FALSE)
  colnames(Opt_algorithm.sec_list) <- c("Times", names(Score$Opt_algorithm))
  

  # Calculate the score statistic information
  u_arr   = apply(Score_list[, 3:ncol(Score_list)], 1, mean) %>% round(2)
  sig_arr = apply(Score_list[, 3:ncol(Score_list)], 1, sd) %>% round(2)
  ymin    = u_arr - sig_arr
  ymax    = u_arr + sig_arr
  
  res_Score <- cbind(Score_list[,1:2],
                     value    = u_arr,
                     sig_arr  = sig_arr) %>% level_to_variable
  Score_all_clusters = res_Score[res_Score$x == "SUM", -1]
  res_Score = res_Score[!(res_Score$x %in% 'SUM'),]
  x_nums <- names(table(res_Score$x)) %>%
    stringr::str_extract_all("[:digit:]+", simplify = TRUE) %>%
    as.vector()
  x_prefix <- names(table(res_Score$x)) %>%
    stringr::str_extract_all("[:letter:]+", simplify = TRUE) %>% 
    table() %>% .[which.max(.)] %>% names()
  x_space <- names(table(res_Score$x)) %>%
    stringr::str_detect("[:blank:]") %>% all()
  x_space <- ifelse(x_space, " ", "")
  x_levels <- paste0(x_prefix, x_space, sort(as.numeric(x_nums)))
  res_Score$x_f = factor(res_Score$x, levels = x_levels)
  
  # The optimal algorithm defined by maximum scores per cluster
  # Opt_algorithm = rep("", K) %>% setNames(., x_levels)
  # for(i in x_levels) {
  #   Opt_algorithm[i] <- res_Score[res_Score$x == i, c("x", "variable", "value")] %$%
  #     variable[which.max(value)]
  # }
  Opt_algorithm <- apply(Opt_algorithm_list[,-1], 2, function(x) table(x) %>% which.max() %>% names() )
  Opt_algorithm.sec <- apply(Opt_algorithm.sec_list[,-1], 2, function(x) table(x) %>% which.max() %>% names() )

  ## To-do: 
  ## In some extents, the optimal could not return finite value especially for image rasters,
  ##   mostly because of inappropriate atmospheric correction. Given that, it is necessary to select 
  ##   several (two or more) substitutes as a reinforcement.
  
  # define the position of optimal and removed algorithms
  res_Score$pos_opt.sec <- res_Score$pos_opt <- res_Score$pos_removed <- res_Score$sig_arr * NA
  for(i in 1:length(Opt_algorithm)){
    w = which(res_Score$x == names(Opt_algorithm)[i] & res_Score$variable == Opt_algorithm[i])
    res_Score$pos_opt[w] = res_Score$value[w]
  }
  for(i in 1:length(Opt_algorithm.sec)){
    w = which(res_Score$x == names(Opt_algorithm.sec)[i] & res_Score$variable == Opt_algorithm.sec[i])
    res_Score$pos_opt.sec[w] = res_Score$value[w]
  }
  
  # The removed algorithms defined by the lower half res_Score per cluster
  remove_ratio = 1/2
  if(remove_ratio > 1) stop("The remove ratio should be between zero and one!")
  for(i in levels(res_Score$x_f)){
    tmpdt = res_Score[res_Score$x == i, ]
    tmpdt = tmpdt[tmpdt$variable %in% Opt_algorithm, ]
    num_removed = floor(length(unique(tmpdt$variable)) * remove_ratio)
    w_removed = which(res_Score$x == i & res_Score$value <= sort(tmpdt$value)[num_removed])
    res_Score$pos_removed[w_removed] = res_Score$value[w_removed]
  }
  ws_removed <- which(!is.na(res_Score$pos_removed))
  w_cancel <- which(!(res_Score$variable[ws_removed] %in% Opt_algorithm))
  res_Score$pos_removed[ws_removed][w_cancel] <- NA
 
  # For these algorithms, they are not taken into account when blending
  Remove_algorithm <- list()
  for(i in 1:length(unique(res_Score$x))){
    Remove_algorithm[[i]] <- res_Score$variable[which(!is.na(res_Score$pos_removed) & res_Score$x == unique(res_Score$x)[i])]
  }
  
  # add box
  res_Score$box <- res_Score$sig_arr * NA
  w_box <- which(res_Score$variable %in% Opt_algorithm)
  res_Score$box[w_box] <- res_Score$value[w_box]
  
  # add levels to res_Score$variable as the sort of colnames(pred)
  res_Score$variable <- factor(res_Score$variable, levels = colnames(pred))
  
  # plot Score_list
  num.model <- Score_list$variable %>% unique %>% length
  set.seed(1234)
  ind = runif(num.model) %>% sort.int(., index.return=TRUE) %>% .$ix
  
  dodge <- position_dodge(width=0.9)
  
  # plot_col <- ggplot(data=res_Score) +
  #   geom_col(aes(x=x_f, y=value, group=variable, fill=variable), position=dodge) + 
  #   geom_errorbar(aes(x=x_f, ymin=value-sig_arr, ymax=value+sig_arr, group=variable),
  #                 position=dodge, width=0.6) + 
  #   scale_fill_manual(values=Spectral(num.model)[ind]) +
  #   labs(y='Score', fill='Algorithm', x=NULL)+
  #   facet_wrap(~x_f, scales='free_x') + 
  #   theme_bw() + 
  #   theme(axis.text.x.bottom = element_blank(),
  #         axis.ticks.x.bottom = element_blank(),
  #         strip.background = element_rect(fill='white', color='white'),
  #         strip.text = element_text(face='bold'),
  #         text = element_text(size=13),
  #         legend.position = "right")
  #plot column for score####
  plot_col <- 
    ggplot() +
    geom_col(data=res_Score, 
             aes(x=x_f, y=value, group=variable, fill=variable),
             alpha=0.5, position=dodge) + 
    geom_col(data=res_Score, 
             aes(x=x_f, y=box, group=variable, fill=variable), color="gray20", 
             position=dodge) +
    geom_errorbar(data=res_Score, 
                  aes(x=x_f, ymin=value-sig_arr, ymax=value+sig_arr, group=variable),
                  position=dodge, width=0.6, alpha=1) + 
    geom_point(data=res_Score, 
               aes(x=x_f, 
                   # y=pos_opt, 
                   y= pos_opt*0-4,
                   group=variable),
               position=dodge, color="darkgreen", fill="darkgreen", shape=23, stroke = 0.5, size=2.5) + 
    geom_point(data=res_Score, 
               aes(x=x_f, 
                   # y=pos_removed, 
                   y=pos_removed*0-5, 
                   group=variable),
               position=dodge, color="darkred", fill="darkred", shape=24, stroke = 0.5, size=2) + 
    scale_fill_manual(values=Spectral(num.model)[ind]) +
    scale_y_continuous(expand = expansion(0.03)) + 
    scale_x_discrete(expand = expansion(add = 0.5)) + 
    labs(y='Score', fill='Algorithm', x=NULL)+
    facet_wrap(~x_f, scales='free_x', nrow=3) + 
    theme_bw() + 
    theme(axis.text.x.bottom = element_blank(),
          axis.ticks.x.bottom = element_blank(),
          # panel.grid.major.x = element_blank(),
          # strip.background = element_rect(fill='white', color='white'),
          strip.background = element_blank(), 
          strip.text = element_text(face='bold'),
          panel.spacing.y = unit(0.5, "lines"),
          text = element_text(size=13),
          # legend.position = c(1,0),
          legend.position = "right",
          # legend.justification = c(1,0),
          legend.justification = c(0.5, 0.5),
          legend.text = element_text(size=8),
          legend.title = element_text(size=9, face="bold")) +
    # guides(fill = guide_legend(ncol=3))
    guides(fill = guide_legend(ncol=1))
  
  
  #-----------------------#
  #                       #
  ######Blending work######
  #                       #
  #-----------------------#
  
  if(dont_blend) {
    
    Chla_blend <- NULL
    plot_scatter <- NULL
    plot_scatter_opt <- NULL
    Blend_result <- NULL
    metric_results <- NULL
    Chla_plots <- NULL
    
  }else { # run blending
    
    dt_Chla_opt <- pred[, Opt_algorithm]
    Blend_result <- Chla_algorithms_blend(dt_Chla_opt, memb, Opt_algorithm, Remove_algorithm)
    Chla_blend <- Blend_result$Chla_blend
    
    # draw a scatter plot colored by clusters
    Chla_limits <- (range(meas) * c(0.9, 1.1)) %>% round(4)
    if(Chla_limits[1] <= 0) Chla_limits[1] = 0.1
    Chla_plots <- data.frame(Chla_true=meas, 
                             cluster = paste0(x_prefix, x_space, cluster), 
                             pred, Chla_blend)
    Chla_plots$cluster <- factor(Chla_plots$cluster, levels = x_levels, ordered = TRUE)
    levels(Chla_plots$cluster) <- x_nums
    names(Chla_plots)[1] <- "Chla_true"
    dt_Chla_ <- reshape2::melt(Chla_plots, id=c("Chla_true", "cluster"))
    dt_Chla_$value_opt <- NA
    for(i in x_nums){
      w = which(dt_Chla_$cluster == i & dt_Chla_$variable == Opt_algorithm[x_levels[as.numeric(i)]])
      dt_Chla_$value_opt[w] <- dt_Chla_$value[w]
    }
    w = which(dt_Chla_$variable == "Chla_blend")
    dt_Chla_$value_opt[w] <- dt_Chla_$value[w]
    
    cp = RdYlBu(K)
    names(cp) <- x_nums
    
    levels(dt_Chla_$variable) %<>% gsub("Chla_blend", "Blend_FCMm", .)
    
    #plot scatter for all####
    plot_scatter <- 
      ggplot() + 
      geom_abline(intercept=0, slope=1, linetype=2) + 
      geom_point(data=dt_Chla_, 
                 aes(x=Chla_true, y=value, color=cluster), 
                 # shape = "circle open", stroke = 1, 
                 alpha=0.3) + 
      geom_point(data=dt_Chla_, 
                 aes(x=Chla_true, y=value_opt, fill=cluster), 
                 shape = "circle filled", color = "black",
                 alpha=1) + 
      geom_rug(data=dt_Chla_, 
               aes(x=Chla_true, y=value, color=cluster), alpha=0.2, show.legend = F) + 
      scale_x_log10(limits=Chla_limits) + scale_y_log10(limits=Chla_limits) +
      labs(x='Measured Chla [ug/L]', y='Predicted Chla [ug/L]', color='OWT', fill="OWT") + 
      scale_color_manual(values=cp) +
      scale_fill_manual(values=cp) + 
      theme_bw() + 
      facet_wrap(~variable) +
      # facet_grid(variable~cluster) + 
      theme(legend.position = "right",
            strip.background = element_rect(fill='white', color='black'),
            strip.text = element_text(face='bold'),
            text = element_text(size=13)) + 
      guides(col = guide_legend(ncol=1))
    
    dt_Chla_sub <- dt_Chla_[dt_Chla_$variable %in% c(
      "Blend_FCMm", unique(Opt_algorithm)
    ),]
    #plot scatter for optimals####
    plot_scatter_opt <- 
      ggplot() + 
      geom_abline(intercept=0, slope=1, linetype=2) + 
      geom_point(data=dt_Chla_sub, 
                 aes(x=Chla_true, y=value, color=cluster), 
                 # shape = "circle open", stroke = 1, 
                 alpha=0.3) + 
      geom_point(data=dt_Chla_sub, 
                 aes(x=Chla_true, y=value_opt, fill=cluster), 
                 shape = "circle filled", color = "black",
                 alpha=1) + 
      geom_rug(data=dt_Chla_sub, 
               aes(x=Chla_true, y=value, color=cluster), alpha=0.2, show.legend = F) + 
      scale_x_log10(limits=Chla_limits) + scale_y_log10(limits=Chla_limits) +
      labs(x='Measured Chla [ug/L]', y='Predicted Chla [ug/L]', color='OWT', fill="OWT") + 
      scale_color_manual(values=cp) +
      scale_fill_manual(values=cp) + 
      theme_bw() + 
      # facet_wrap(~variable) +
      facet_grid(variable~cluster) +
      theme(legend.position = "right",
            strip.background = element_rect(fill='white', color='white'),
            strip.text.x = element_text(face='bold', angle = 0),
            strip.text.y = element_text(face='bold', angle = 45, hjust=1),
            axis.text.x.bottom = element_text(angle = 45),
            text = element_text(size=13)) + 
      guides(col = guide_legend(ncol=1))
    
    metric_results <- Assessment_via_cluster(
      pred = cbind(Chla_plots[, c(unique(Opt_algorithm))], Chla_blend = Chla_blend),
      meas = Chla_plots$Chla_true,
      memb = memb,
      metrics = metrics,
      log10 = TRUE,
      total = TRUE,
      hard.mode = TRUE,
      na.process = TRUE,
      valid.definition = valid.definition
    )
    
  } # run blending
  
  
  #####return outputs#####
  result = list(
    Times              = Times,
    Score_all_clusters = Score_all_clusters,
    Score_list         = Score_list, 
    Score_list_melt    = res_Score,
    Opt_algorithm_list = Opt_algorithm_list,
    Opt_algorithm      = Opt_algorithm,
    Opt_algorithm.sec  = Opt_algorithm.sec, 
    Remove_algorithm   = Remove_algorithm,
    plot_col           = plot_col,
    plot_scatter       = plot_scatter,
    plot_scatter_opt   = plot_scatter_opt,
    Blend_result       = Blend_result, 
    dt_Chla            = Chla_plots,
    Chla_blend         = Chla_blend,
    Results_of_scoring_system = Results_of_scoring_system,
    metric_results     = metric_results,
    misc_scatter       = list(dt_Chla_ = dt_Chla_, 
                              dt_Chla_sub = dt_Chla_sub, 
                              Chla_limits = Chla_limits,
                              cp = cp)
  )
  
  return(result)
  
}
  

#' @name Chla_algorithms_blend
#' 
#' @title Algorithms blending via membership values based on optimal candidates
#' 
#' @param dt_Chla_opt The data.frame includes estimation by optimal algorithms per cluster
#' @param memb Membership values
#' @param Opt_algorithm Optimal algorithm name
#' @param Remove_algorithm Removed algorithm name
#' 
#' @noRd
#' @return The result of \code{Chla_algorithm_blend} is a list including:
#'   \itemize{
#'     \item \strong{modified_memb_list} The vector recording the modified membership values across all samples.
#'     \item \strong{Chla_blend} The final blended Chla result after modification.
#'     \item \strong{new_memb} The new membership values after modification.
#'   }
#' 
#' @importFrom stringr str_split
#' @importFrom magrittr %>%
Chla_algorithms_blend <- function(dt_Chla_opt, memb, Opt_algorithm, Remove_algorithm){
  
  if(!all.equal(
    length(Opt_algorithm),
    ncol(dt_Chla_opt),
    ncol(memb)
  )){
    stop("The input dt_Chla_opt, memb, and Opt_algorithm should have same length or column number!")
  }
  
  if(nrow(dt_Chla_opt) != nrow(memb)) 
    stop("The row number of dt_Chla_opt and memb should be equal!")

  # if(all(Opt_algorithm %in% names(dt_Chla_opt))){
  #   stop("The Opt_algorithm should be parts of dt_Chla_opt colnames")
  # }
  
  if(!(is.list(Remove_algorithm) | is.null(Remove_algorithm))){
    stop("Remove_algorithm should be a list or NULL.")
  }
  
  cluster <- apply(memb, 1, which.max)
  
  dt_Chla_opt <- round(dt_Chla_opt, 4)
  memb        <- round(memb, 4)
  
  dt_Chla_opt_ <- dt_Chla_opt
  memb_     <- memb
  modified_memb_list <- rep(NA, nrow(memb_))
  
  # modify the membership values
  for(i in 1:nrow(memb_)){
    # removed algorithms
    if(!is.null(Remove_algorithm)){
      w_rm <- which(str_split(names(dt_Chla_opt_), "\\.", simplify = TRUE)[, 1] %in%
                      Remove_algorithm[[cluster[i]]])
      modified_memb <- sum(memb_[i, w_rm])
    }else{
      w_rm = NULL
      modified_memb <- 0
    }
    modified_memb_list[i] <- modified_memb
    memb_[i, w_rm] <- 0
    cluster_pos <- as.numeric(cluster[i])
    memb_[i, cluster_pos] <- memb_[i, cluster_pos] + modified_memb
    
    # failure avoid
    w_fail <- which(is.na(dt_Chla_opt[i,] * memb_[i, ]) | (dt_Chla_opt[i,] * memb_[i, ]) <= 0)
    if(length(w_fail) == 0){
      modified_memb <- 0
    }else{
      modified_memb <- sum(memb_[i, w_fail])
    }
    modified_memb_list[i] <- modified_memb_list[i] + modified_memb
    memb_[i, w_fail] <- 0
    memb_[i, cluster_pos] <- memb_[i, cluster_pos] + modified_memb
    
    # compensate the sum of membership to one
    memb_bias = 1 - sum(memb_[i, ])
    memb_[i, cluster_pos] = memb_[i, cluster_pos] + memb_bias
  }
  
  Chla_blend <- apply(dt_Chla_opt_ * memb_, 1, function(x) sum(x, na.rm=TRUE)) 
  
  result <- list(
    modified_memb_list = modified_memb_list,
    Chla_blend         = Chla_blend,
    new_memb           = memb_
  )
  
}  

#' @name Chla_algorithms_blend2
#' @noRd
Chla_algorithms_blend2 <- function(dt_Chla_opt, memb,
                                   Opt_algorithm,
                                   Remove_algorithm,
                                   verbose = FALSE) {
  
  # Opt_algorithm <- Score$Opt_algorithm
  # dt_memb -> memb
  # dt_Chla[, Opt_algorithm] -> dt_Chla_opt
  # res_Score <- Score$Score_list_melt
  # Remove_algorithm <- def_remove_algorithm(res_Score, Opt_algorithm, remove_ratio = 1/4)
  
  if(!all.equal(
    length(Opt_algorithm),
    ncol(dt_Chla_opt),
    ncol(memb)
  )){
    stop("The input dt_Chla_opt, memb, and Opt_algorithm should have same length or column number!")
  }
  
  if(nrow(dt_Chla_opt) != nrow(memb)) 
    stop("The row number of dt_Chla_opt and memb should be equal!")
  
  # if(all(Opt_algorithm %in% names(dt_Chla_opt))){
  #   stop("The Opt_algorithm should be parts of dt_Chla_opt colnames")
  # }
  
  if(!(is.list(Remove_algorithm) | is.null(Remove_algorithm))){
    stop("Remove_algorithm should be a list or NULL.")
  }
  
  # modify the membership values
  colnames(dt_Chla_opt) <- 
    gsub(paste0(paste0("\\.", 1:length(Opt_algorithm)), collapse = "|"), "", colnames(dt_Chla_opt))
  
  dt_cluster <- apply(memb, 1, which.max)
  dt_Chla_new <- dt_Chla_opt
  memb_new    <- memb
  
  # move membership values from Remove_algorithm to Opt_algorithm per row
  
  if(verbose)
    cat("Membership value transformation: \n")
  
  for(clus in unique(dt_cluster)) {
    
    w_row <- which(as.vector(dt_cluster) == clus)
    w_col_rm  <- which(Opt_algorithm %in% Remove_algorithm[[clus]])
    
    if(length(w_row) == 1) {
      m_rm_sum <- sum(memb[w_row, w_col_rm])
    } else {
      m_rm_sum <- apply(memb[w_row, w_col_rm], 1, sum)
    }
    
    memb_new[w_row, w_col_rm] <- 0
    memb_new[w_row, clus]     <- m_rm_sum + memb[w_row, clus]
    
    if(verbose) 
      cat(sprintf("trans %s -> %s\n", paste0(w_col_rm, collapse = " "), clus))
    
  }
  
  # move membership values from negative estimation to Opt_algorithm per row
  
  memb_new2 <- memb_new
  w_na <- which(is.na(dt_Chla_opt) | dt_Chla_opt < 0, arr.ind = TRUE)
  memb_new2[w_na] <- 0
  
  for(i in 1:ncol(memb)) {
    
    if(length(which(dt_cluster == i)) == 1) {
      tmp <- sum(memb_new2[dt_cluster == i, ])
    } else {
      tmp <- rowSums(memb_new2[dt_cluster == i, ])  
    }
    
    memb_new2[dt_cluster ==i, i] <- memb_new2[dt_cluster ==i, i] + (1 - tmp) 
    
  }
  
  dt_Chla_opt[dt_Chla_opt <= 0] <- NA
  Chla_blend <- apply(dt_Chla_opt * memb_new2, 1, function(x) sum(x, na.rm=TRUE)) 
  
  return(list(
    Chla_blend = Chla_blend,
    new_memb   = memb_new2
  ))
  
}



  
#' @name def_remove_algorithm
#' @title define the removed algorithms
#' @param res_Score res_Score
#' @param Opt_algorithm Opt_algorithm
#' @param remove_ratio 1/2
#' @return A list includes the algorithms to be removed
#' @noRd
def_remove_algorithm <- function(res_Score, Opt_algorithm, remove_ratio = 1/2) {
  
  # Score_boo$Score_list_melt -> res_Score
  # Score_boo$Opt_algorithm -> Opt_algorithm
  # The removed algorithms defined by the lower half res_Score per cluster
  # remove_ratio = 1/2
  Remove_algorithm <- list()
  if(remove_ratio > 1) stop("The remove ratio should be between zero and one!")
  for(i in 1:length(levels(res_Score$x_f))){
    owt <- levels(res_Score$x_f)[i]
    tmpdt = res_Score[res_Score$x == owt, ]
    tmpdt = tmpdt[tmpdt$variable %in% Opt_algorithm, ]
    num_removed = floor(length(unique(tmpdt$variable)) * remove_ratio)
    w_removed = which(tmpdt$value <= sort(tmpdt$value)[num_removed])
    Remove_algorithm[[i]] <- as.vector(tmpdt$variable[w_removed])
  }
  
  return(Remove_algorithm)
  
}
bishun945/FCMm documentation built on Oct. 15, 2021, 6:43 p.m.