qualityr: Calculate Quality Statistics

Documented in asample

#' Develop an Attribute Sampling Plan
#'
#' @param AQL Acceptance Quality Limit and is the defined as the quality level that is the worst tolerable
#' process average when continuing series of lots is submitted for acceptance sampling.
#' The AQL represents the poorest level of quality for the manufacturer's process that the customer would consider to be
#' acceptable as a process average and is thus a property of the manufacture's  process and is not a property of the sampling
#' plan.  Note that the designation of an AQL does not suggest that this is a desirable quality level only that the process
#' averages are consistently better than the AQL.  Furthermore, the AQL is not intended to be a specification on the product,
#' nor it is a target value for the manufacture's production process.  The AQL is simply a standard against which to judge the
#' lots as the manufacture's intent is to operate at a fallout level that is considerably better than the AQL.  The value used
#' is a rounded percentage value.  For example, if a 1 percent non conformance is used for the process average acceptance then
#' ALQ = 1 AND NOT 0.01.  AQLs are reported in percentage values.
#' @param CI Confidence Interval (CI) = 1-Manufacturer's Risk(alpha).  Denotes 1 - the probability that a good lot will be
#' rejected, or 1 - the probability that a process producing acceptable value of quality characteristic will be rejected as
#' performing unsatisfactory.  This is also known as 1 - Type-I error.  Unlike AQL and RQL, CI is a non-percentage value.
#' For example, a 95 percent confidence level for an associated AQL would be CI = 0.95.
#' @param RQL Reject able Quality Level and is the poorest level of quality that the consumer is willing to accept in an
#' individual lot.  Note that the RQL is not a characteristic of the sampling plan, but is a level of lot quality specified
#' by the consumer.  Alternate names for the RQL are the Lot Tolerance Percent Defective (LTPD) and the Limiting Quality Level
#' (LQL).  Similar to AQL, the RQL is percentage value and must be less than the AQL.
#' @param Power The power is the probability of correctly rejecting the null hypothesis and is denoted as
#' Power = 1 - beta.  Beta is the probability of accepting a lot of poor quality, or allowing a process that is
#' operating in an unsatisfactory manner relative to some quality characteristic to continue in operation.
#' Beta is also known as the Type-II error.  Similar to CI, Power is a non-percentage value and 1-Power must be less
#' than the CI.
#' @param N 500,000 by default.  N = total lot size.  If using the default value the hyper geometric reduces to the binomial
#' in which the sampling plan can be thought of as with replacement.  However, for small lot size AND where sampling is done
#' without replacement N should be defined appropriately.
#' @param n 1,000 by default.  n is simply a defined starting point for the sample size.  In the event that n is greater than
#' N then n will be set to equal N.  Depending on how small the AQL and RQL get or how large the CI and Power get n may need to
#' be increased to capture sample sizes that exceed 1,000.
#' @param c Acceptance Number.  Unless otherwise specified an acceptance number (c) is not used and the 4-5 parameters needed
#' to calculate a sample size is used.  However, c can be a single value or a vector of values in the event that one needs to
#' compare sampling plans with different levels of acceptance numbers.
#' @param listed FALSE by default.  If TRUE the output is a list that contains the data frames from the computations as well as
#' the plot in case further analysis is needed from the output.  If FALSE, the output is only the OC curve.  By default this
#' is set to FALSE and only the OC curve is provided in the output.
#' @param oc TRUE by default.  If TRUE an OC curve will be generated.  If FALSE an OC curve will not be gernated any only
#' raw data from the sampling plan will be reported.  NOTE:  This is really only meant to prevent the computations for the OC
#' curve from running if the OC curve isn't needed and just the raw sampling plan.
#' @return The sampling plan from the paramaters of \code{AQL}, \code{CI}, \code{RQL}, \code{Power}, \code{N}, &/or \code{c}
#' @export
#' @examples
#' asample(AQL = 1, CI = 0.95, RQL = 5, Power = 0.90)
#' asample(AQL = 1, CI = 0.95, RQL = 5, Power = 0.90, N = 1000, c = 0:3, listed = TRUE, oc = FALSE)
#' @import dplyr
#' @import ggplot2
#' @importFrom stats phyper dhyper qhyper


asample <- function(AQL, CI, RQL,Power,
                    N = 500000,
                    n = 1000,
                    c = NULL,
                    listed = FALSE,
                    oc = TRUE){

  if(n <= 0) {
    print(paste0("n = ", n))
    stop('The argument "n" must be positive & not 0')
  }

  if(n > N){
    n <- N
  }

  if(RQL <= AQL | RQL <= 0 | AQL <= 0 | RQL/100 >= 1 | AQL/100 >= 1) {
    stop('Insure that 0 < AQL < RQL < 1')
  }

  if((1-Power) >= CI | Power <= 0 | CI <= 0 | Power >= 1 | CI >= 1){
    stop('Insure that 0 < beta < CI < 1')
  }

  # Hypergeometric: Type-A/B

  if(is.null(c)){

    sampling_plan <- data_frame()

    for(i in 1:n){
      c <- qhyper(p = CI,
                  m = N*AQL/100,
                  n = N*(1-AQL/100),
                  k = i)

      alpha <- phyper(q = c,
                      m = N*AQL/100,
                      n = N*(1-AQL/100),
                      k = i)

      beta <- phyper(q = c,
                     m = N*RQL/100,
                     n = N*(1-RQL/100),
                     k = i)

      step <- data_frame("n" = i, "c" = c, "alpha" = alpha, "beta" = beta)

      sampling_plan <- rbind(sampling_plan, step)
    }

    sampling_plan <-  sampling_plan %>%
      dplyr::filter(beta <= (1-(Power))) %>%
      dplyr::filter(n == min(n)) %>%
      dplyr::mutate(AQL = AQL,
                    RQL = RQL,
                    CI = round(alpha, 3),
                    Beta = round(beta, 3),
                    AQL_Label = paste0("[", AQL, ", ", CI*100, "%]"),
                    RQL_Label = paste0("[", RQL, ", ", Beta*100, "%]"))

    if(dim(sampling_plan)[1] == 0){
      stop('Base unit "n" needs to be increased beyond the default / set value. \n
        Try by a factor of 5 first so long as "n" <= "N". \n
        OR "N" is too small and 100% sampling is required based on the paramaters used.')
    }

    if(oc){

      aql <- seq(0.01, 10, 0.01)
      OC.a <- data_frame()

      for(i in aql){
        prob_acc <- phyper(q = sampling_plan$c,
                           m = N*i/100,
                           n = N*(1-i/100),
                           k = sampling_plan$n)

        prob_acc_df <- data_frame("Lot Percent Defective" = i,
                                  "Probability of Acceptance" = prob_acc)

        OC.a <- rbind(OC.a, prob_acc_df)
      }

      Type_A_OC <- ggplot(OC.a, aes(x = `Lot Percent Defective`, y = `Probability of Acceptance`)) +
        geom_path(color = "black", lwd = 1.5, alpha = 0.70) +
        geom_point(data = sampling_plan, inherit.aes = FALSE,
                   aes(x = AQL, y = alpha),
                   color = "Blue",
                   size = 4) +
        geom_point(data = sampling_plan, inherit.aes = FALSE,
                   aes(x = RQL, y = beta),
                   color = "Red",
                   size = 4) +
        geom_text(data = sampling_plan, inherit.aes = FALSE,
                  aes(x = AQL, y = alpha, label = AQL_Label),
                  color = "Blue",
                  nudge_x = 0.4,
                  nudge_y = 0.03) +
        geom_text(data = sampling_plan, inherit.aes = FALSE,
                  aes(x = RQL, y = beta, label = RQL_Label),
                  color = "Red",
                  nudge_x = 0.4,
                  nudge_y = 0.03) +
        scale_x_continuous(breaks = seq(0, 10, 1)) +
        scale_y_continuous(breaks = seq(0, 1, 0.1)) +
        labs(title = "Attribute Type-A/B OC Curve",
             subtitle = paste0("Sample Size (n) = ", sampling_plan$n,
                               ", Acceptance Number (c) = ", sampling_plan$c,
                               "\n inputs: [N, AQL, CI, RQL, Power] = ",
                               "[", N, ", ",
                               AQL, ", ",
                               round(sampling_plan$alpha, 2)*100, "%, ",
                               RQL, ", ",
                               round(1-sampling_plan$beta, 2)*100, "%]")) +
        theme_classic() +
        theme(axis.title.x = element_text(face = "bold", size = 14, vjust=-1),
              axis.title.y = element_text(face = "bold", size = 14, vjust=3),
              plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
              plot.subtitle = element_text(face = "italic", size = 10, color = "red",
                                           hjust = 0.5),
              axis.text = element_text(size = 10, color = "black")
        )

      Full.Computation.List <- list(
        "Sampling.Plan" = sampling_plan[,c(1, 2, 5, 6, 7, 8)],
        "OC.Curve.Data" = OC.a,
        "OC.Curve" = Type_A_OC)

    } else {

      Full.Computation.List <- list(
        "Sampling.Plan" = sampling_plan[,c(1, 2, 5, 6, 7, 8)])

    }

    if (listed & oc){

      return(Full.Computation.List)

    } else if (!listed & oc) {

      return(Full.Computation.List[[3]])

    } else {

      return(Full.Computation.List)

    }

  }

  if(!is.null(c)){

    #C Sampling Plan

    base_input <- data_frame(AQL = AQL, CI = CI)

    sampling_plan_j <- data_frame()

    for(j in c){

      C <- c(j)

      sampling_plan_i <- data_frame()

      for(i in 1:n){

        alpha <- phyper(q = C,
                        m = N*AQL/100,
                        n = N*(1-AQL/100),
                        k = i)

        beta <- phyper(q = C,
                       m = N*RQL/100,
                       n = N*(1-RQL/100),
                       k = i)

        step <- data_frame("n" = i, "C" = C, "alpha" = alpha, "beta" = beta)

        sampling_plan_i <- rbind(sampling_plan_i, step)
      }

      sampling_plan_i <- sampling_plan_i %>%
        dplyr::filter(beta <= (1-(Power))) %>%
        dplyr::filter(n == min(n))

      sampling_plan_j <- rbind(sampling_plan_j, sampling_plan_i)

    }

    sampling_plan <- sampling_plan_j %>%
      dplyr::mutate(AQL = AQL,
                    RQL = RQL,
                    CI = round(alpha, 2),
                    Beta = round(beta, 2),
                    AQL_Label = ifelse(CI > base_input$CI, NA,
                                       paste0("[", CI*100, "%]")),
                    RQL_Label = paste0("[", RQL, ", ", Beta*100, "%]"),
                    n.C = paste0(n, " / ", C))

    if(dim(sampling_plan)[1] == 0){
      stop('Unable to calculate a sampling plan for these paramaters.')
    }

    if(oc){

      OC.a <- data_frame()
      aql <- seq(0.001, 10, 0.01)

      for(k in 1:length(sampling_plan$C)){

        OC.k <- data_frame()

        c.k <- sampling_plan$C[k]

        n.k <- sampling_plan$n[k]

        for(i in aql){

          prob_acc <- phyper(q = c.k,
                             m = N*i/100,
                             n = N*(1-i/100),
                             k = n.k)

          prob_acc_df <- data_frame(C = c.k,
                                    "Lot Percent Defective" = i,
                                    "Probability of Acceptance" = prob_acc)

          OC.k <- rbind(OC.k, prob_acc_df)
        }

        OC.a <- rbind(OC.a, OC.k)

      }

      OC.a <- OC.a %>%
        dplyr::mutate(C = as.character(C))

      min.AQL <- OC.a %>%
        dplyr::arrange(C) %>%
        dplyr::group_by(C) %>%
        dplyr::filter(`Probability of Acceptance` >= CI) %>%
        dplyr::filter(`Lot Percent Defective` == max(`Lot Percent Defective`)) %>%
        dplyr::mutate(adj.AQL_Label = ifelse(`Lot Percent Defective` > base_input$AQL,
                                             NA,
                                             paste0("[",
                                                    format(round(`Lot Percent Defective`, 1), nsmall = 1),
                                                    "]")))

      C.0_Type <- ggplot(OC.a, aes(x = `Lot Percent Defective`,
                                   y = `Probability of Acceptance`,
                                   color = C,
                                   fill = C)) +
        geom_path(aes(colour = C), lwd = 1) +
        scale_color_discrete(name = "n / c", labels = sampling_plan$n.C) +
        geom_hline(yintercept = CI,
                   color = "Black",
                   linetype = 5,
                   alpha = 0.2) +
        geom_vline(xintercept = AQL,
                   color = "Blue",
                   linetype = 5,
                   alpha = 0.2) +
        geom_label(inherit.aes = FALSE,
                   data = base_input,
                   aes(x = AQL, label = AQL, y = 1.07),
                   fontface = "bold",
                   fill = "Blue",
                   color = "White",
                   size = 5) +
        geom_label(inherit.aes = FALSE,
                   data = base_input,
                   aes(x = 10, label = CI, y = CI),
                   fontface = "bold",
                   fill = "Black",
                   color = "White",
                   size = 5) +
        geom_point(data = sampling_plan, inherit.aes = FALSE,
                   aes(x = AQL, y = alpha),
                   color = "Blue",
                   size = 2,
                   alpha = 0.4) +
        geom_point(data = sampling_plan, inherit.aes = FALSE,
                   aes(x = RQL, y = beta),
                   color = "Red",
                   size = 4) +
        geom_point(data = min.AQL, inherit.aes = FALSE,
                   aes(x = `Lot Percent Defective`, y = `Probability of Acceptance`),
                   color = "Black",
                   size = 2, shape = 8) +
        geom_text(data = sampling_plan, inherit.aes = FALSE,
                  aes(x = AQL, y = alpha,
                      label = ifelse(is.na(AQL_Label), "", AQL_Label)),
                  color = "Blue", alpha = 1,
                  nudge_x = -0.1,
                  fontface = "bold",
                  size = 3,
                  angle = 270,
                  nudge_y = -0.04) +
        geom_text(data = sampling_plan, inherit.aes = FALSE,
                  aes(x = sampling_plan$RQL[1],
                      y = sampling_plan$beta[1],
                      label = sampling_plan$RQL_Label[1]),
                  color = "Red",
                  nudge_x = 0.4,
                  nudge_y = 0.03) +
        geom_text(data = min.AQL, inherit.aes = FALSE,
                  aes(x = `Lot Percent Defective`, y = `Probability of Acceptance`,
                      label = ifelse(is.na(adj.AQL_Label), "", adj.AQL_Label)),
                  color = "Black",
                  angle = 270,
                  fontface = "bold",
                  size = 3,
                  nudge_x = 0,
                  nudge_y = 0.06) +
        scale_x_continuous(breaks = seq(0, 10, 1)) +
        scale_y_continuous(breaks = seq(0, 1, 0.1)) +
        labs(title = paste0("Attribute c Defined OC Curve(s)"),
             subtitle = paste0("inputs: [N, AQL, CI, RQL, Power] = ",
                               "[", N, ", ",
                               AQL, ", ",
                               CI*100, "%, ",
                               RQL, ", ",
                               Power*100, "%]"),
             caption = "***Note: With c = 0 sampling plans, AQL and CI are essentially ignored!",
             color = "n / c") +
        theme_classic() +
        theme(axis.title.x = element_text(face = "bold", size = 14, vjust=-1),
              axis.title.y = element_text(face = "bold", size = 14, vjust=3),
              plot.title = element_text(face = "bold", size = 18, hjust = 0.5),
              plot.subtitle = element_text(face = "italic", size = 10, color = "red",
                                           hjust = 0.5),
              plot.caption = element_text(face = "italic", size = 8, color = "blue",
                                          hjust = 0),
              axis.text = element_text(size = 10, color = "black"))


      Full.Computation.List <- list(
        "Sampling.Plan" <- sampling_plan[,c(1, 2, 5, 6, 7, 8)],
        "OC.Curve.Data" <- OC.a,
        "OC.Curve" <- C.0_Type,
        "Adjusted.Min.AQL" <- min.AQL)

    } else {

      Full.Computation.List <- list(
        "Sampling.Plan" <- sampling_plan[,c(1, 2, 5, 6, 7, 8)])

    }


    if (listed & oc){

      return(Full.Computation.List)

    } else if (!listed & oc) {

      return(Full.Computation.List[[3]])

    } else {

      return(Full.Computation.List)

    }

  }

}