```{R setup, include = FALSE} library(knitr) opts_chunk$set(dev = "png", fig.align = "center", fig.width = 6, fig.height = 5, out.width = "80%", dev.args = list(pointsize = 12), par = TRUE, # needed for setting hook collapse = TRUE, # collapse input & ouput code in chunks warning = FALSE)
# Introduction `qcc` is a contributed R package for **statistical quality control charts** which provides: - Shewhart quality control charts for continuous, attribute and count data - Cusum and EWMA charts - Operating characteristic curves - Process capability analysis - Pareto chart and cause-and-effect chart - Multivariate control charts. This document gives a quick tour of `qcc` (version `r packageVersion("qcc")`) functionalities. Further details are provided in Scrucca (2004). This vignette is written in R Markdown using the [knitr](https://cran.r-project.org/package=knitr) package for production. ```{R, message = FALSE, echo = 1} library(qcc) # qcc.options(bg.margin = "#F8F8F8") cat(qcc:::qccStartupMessage(), sep = "")
data(pistonrings) diameter <- qccGroups(data = pistonrings, diameter, sample) head(diameter) (q1 <- qcc(diameter[1:25,], type = "xbar", newdata = diameter[26:40,])) plot(q1, fill = FALSE) plot(q1, chart.all = FALSE) plot(q1, add.stats = FALSE) plot(qcc(diameter[1:25,], type = "xbar", newdata = diameter[26:40,], confidence.level = 0.99))
Western Electric rules:
q1 <- qcc(diameter[1:25,], type = "xbar", newdata = diameter[26:40,], rules = 1:4) plot(q1) plot(q1, fill = FALSE)
In a Shewhart control chart the x-axis represents the rationale subgroups. This means that in the data collection process "subgroups or samples should be selected so that if assignable causes are present, the chance for differences between subgroups will be maximized, while the chance for differences due to these assignable causes within a subgroup will be minimized" (Montgomery, 2013, Sec. 5.3.4).
Typically, for control charts used in production processes the time order of production is a logical basis for rational subgroups. In this case a user may want to represent the date of production along the x-axis.
(dates <- seq(Sys.Date() - nrow(diameter)+1, Sys.Date(), by = 1)) q = qcc(diameter[1:25,], type = "xbar", newdata = diameter[26:40,]) plot(q, xtime = dates)
Further fine-tuning is also available as shown in the example below:
plot(q, xtime = dates, xlab = NULL, add.stats = FALSE) + scale_x_date(date_breaks = "1 week", date_labels = "%d-%b")
Note than we must remove the summary statistics at bottom (add.stats = FALSE
) in order to change the x-axis settings.
(q2 <- qcc(diameter[1:25,], type = "R")) plot(q2) (q3 <- qcc(diameter[1:25,], type = "R", newdata = diameter[26:40,])) plot(q3)
(q4 <- qcc(diameter[1:25,], type = "S")) plot(q4) (q5 <- qcc(diameter[1:25,], type = "S", newdata = diameter[26:40,])) plot(q5)
out <- c(9, 10, 30, 35, 45, 64, 65, 74, 75, 85, 99, 100) diameter2 <- qccGroups(data = pistonrings[-out,], diameter, sample) plot(qcc(diameter2[1:25,], type = "xbar")) plot(qcc(diameter2[1:25,], type = "R"))
q = qcc(diameter2[1:25,], type = "xbar", rules = 1:4) plot(qcc(diameter2[1:25,], type = "xbar", newdata = diameter2[26:40,], rules = 1:4))
data(orangejuice) (q = with(orangejuice, qcc(D[trial], sizes = size[trial], type = "p"))) plot(q) (q = with(orangejuice, qcc(D[trial], sizes = size[trial], type = "np"))) plot(q)
Remove out-of-control points (see help(orangejuice)
for the reasons):
inc <- setdiff(which(orangejuice$trial), c(15,23)) (q = with(orangejuice, qcc(D[inc], sizes = size[inc], type = "p", newdata = D[!trial], newsizes = size[!trial]))) plot(q)
data(orangejuice2) (q = with(orangejuice2, qcc(D[trial], sizes = size[trial], type = "p", newdata = D[!trial], newsizes = size[!trial]))) plot(q)
data(circuit) (q = with(circuit, qcc(x[trial], sizes = size[trial], type = "c"))) plot(q)
Remove out-of-control points (see help(circuit)
for the reasons)
inc <- setdiff(which(circuit$trial), c(6,20)) (q = with(circuit, qcc(x[inc], sizes = size[inc], type = "c", labels = inc, newdata = x[!trial], newsizes = size[!trial], newlabels = which(!trial)))) plot(q) (q = with(circuit, qcc(x[inc], sizes = size[inc], type = "u", labels = inc, newdata = x[!trial], newsizes = size[!trial], newlabels = which(!trial)))) plot(q)
data(pcmanufact) (q = with(pcmanufact, qcc(x, sizes = size, type = "u"))) plot(q)
data(viscosity) (q = with(viscosity, qcc(viscosity[trial], type = "xbar.one"))) plot(q) (q = with(viscosity, qcc(viscosity[trial], type = "xbar.one", std.dev = "SD"))) plot(q) # batch 4 is out-of-control because of a process temperature controller # failure; remove it and recompute viscosity <- viscosity[-4,] (q = with(viscosity, qcc(viscosity[trial], type = "xbar.one", newdata = viscosity[!trial], rules = 1:4))) plot(q)
In this example we show how to extend the package by defining a new control chart, i.e. a standardized p chart (type = "p.std"
).
Function to compute group statistics and center:
stats.p.std <- function(data, sizes) { data <- as.vector(data) sizes <- as.vector(sizes) pbar <- sum(data)/sum(sizes) z <- (data/sizes - pbar)/sqrt(pbar*(1-pbar)/sizes) list(statistics = z, center = 0) }
Function to compute within-group standard deviation:
sd.p.std <- function(data, sizes, ...) { return(1) }
Function to compute control limits based on normal approximation:
limits.p.std <- function(center, std.dev, sizes, nsigmas = NULL, conf = NULL) { if(is.null(conf)) { lcl <- -nsigmas ucl <- +nsigmas } else { if(conf > 0 & conf < 1) { nsigmas <- qnorm(1 - (1 - conf)/2) lcl <- -nsigmas ucl <- +nsigmas } else stop("invalid 'conf' argument.") } limits <- matrix(c(lcl, ucl), ncol = 2) rownames(limits) <- rep("", length = nrow(limits)) colnames(limits) <- c("LCL", "UCL") return(limits) }
Example with simulated data: ```{R, echo = -1} set.seed(20171102)
n <- c(rep(50,5), rep(100,5), rep(25, 5))
x <- rbinom(length(n), n, 0.2)
(q = qcc(x, type = "p", size = n)) plot(q)
(q = qcc(x, type = "p.std", size = n)) plot(q)
# Operating Characteristic Curves An operating characteristic curve graphically provides information about the probability of not detecting a shift in the process. The function `ocCurves()` is a generic function which calls the proper function depending on the type of input `'qcc'` object. ```{R} data(pistonrings) diameter <- qccGroups(data = pistonrings, diameter, sample) q <- qcc(diameter, type = "xbar", nsigmas = 3) (beta <- ocCurves(q)) plot(beta) plot(beta, what = "ARL") data(orangejuice) q <- with(orangejuice, qcc(D[trial], sizes = size[trial], type = "p")) (beta <- ocCurves(q)) plot(beta) plot(beta, what = "ARL") data(circuit) q <- with(circuit, qcc(x[trial], sizes = size[trial], type = "c")) (beta <- ocCurves(q)) plot(beta) plot(beta, what = "ARL")
data(pistonrings) diameter <- qccGroups(diameter, sample, data = pistonrings) (q = cusum(diameter[1:25,], decision.interval = 4, se.shift = 1)) plot(q) (q <- cusum(diameter[1:25,], newdata = diameter[26:40,])) plot(q) plot(q, chart.all = FALSE) plot(q, xtime = seq(Sys.Date() - nrow(diameter)+1, Sys.Date(), by = 1), add.stats = FALSE) + scale_x_date(date_breaks = "4 days", date_labels = "%d-%b")
data(pistonrings) diameter <- qccGroups(data = pistonrings, diameter, sample) (q = ewma(diameter[1:25,], lambda = 0.2, nsigmas = 3)) plot(q) (q = ewma(diameter[1:25,], lambda = 0.2, nsigmas = 2.7, newdata = diameter[26:40,])) plot(q) plot(q, xtime = seq(Sys.Date() - nrow(diameter)+1, Sys.Date(), by = 1), add.stats = FALSE) + scale_x_date(date_breaks = "4 days", date_labels = "%d-%b")
data(viscosity) (q = with(viscosity, ewma(viscosity[trial], lambda = 0.2, nsigmas = 2.7, newdata = viscosity[!trial]))) plot(q)
data(pistonrings) diameter <- qccGroups(data = pistonrings, diameter, sample) q <- qcc(diameter[1:25,], type = "xbar", nsigmas = 3) (pc = processCapability(q, spec.limits = c(73.95,74.05))) plot(pc) (pc = processCapability(q, spec.limits = c(73.95,74.05), target = 74.02)) plot(pc) (pc = processCapability(q, spec.limits = c(73.99,74.01))) plot(pc) (pc = processCapability(q, spec.limits = c(73.99, 74.1))) plot(pc)
Multivariate subgrouped data from Ryan (2011, Table 9.2) with $p = 2$ variables, $m = 20$ samples, and $n = 4$ sample size for each sample:
data(RyanMultivar) (q <- mqcc(RyanMultivar, type = "T2")) ellipseChart(q) ellipseChart(q, show.id = TRUE) q <- mqcc(RyanMultivar, type = "T2", pred.limits = TRUE)
Ryan (2011) discussed Xbar-charts for single variables computed adjusting the confidence level of the $T^2$ chart:
with(RyanMultivar, qcc(X1, type = "xbar", confidence.level = q$confidence.level^(1/2))) with(RyanMultivar, qcc(X2, type = "xbar", confidence.level = q$confidence.level^(1/2)))
Generate new "in control" data:
Xnew <- list(X1 = matrix(NA, 10, 4), X2 = matrix(NA, 10, 4)) for(i in 1:4) { x <- MASS::mvrnorm(10, mu = q$center, Sigma = q$cov) Xnew$X1[,i] <- x[,1] Xnew$X2[,i] <- x[,2] } (q <- mqcc(RyanMultivar, type = "T2", newdata = Xnew, pred.limits = TRUE)) ellipseChart(q, show.id = TRUE)
Generate new "out of control" data:
Xnew <- list(X1 = matrix(NA, 10, 4), X2 = matrix(NA, 10, 4)) for(i in 1:4) { x <- MASS::mvrnorm(10, mu = 1.2*q$center, Sigma = q$cov) Xnew$X1[,i] <- x[,1] Xnew$X2[,i] <- x[,2] } (q <- mqcc(RyanMultivar, type = "T2", newdata = Xnew, pred.limits = TRUE)) ellipseChart(q, show.id = TRUE)
Individual observations data:
data(boiler) (q <- mqcc(boiler, type = "T2.single", confidence.level = 0.999))
Generate new "in control" data:
boilerNew <- MASS::mvrnorm(10, mu = q$center, Sigma = q$cov) mqcc(boiler, type = "T2.single", confidence.level = 0.999, newdata = boilerNew, pred.limits = TRUE)
Generate new "out of control" data:
boilerNew <- MASS::mvrnorm(10, mu = 1.01*q$center, Sigma = q$cov) mqcc(boiler, type = "T2.single", confidence.level = 0.999, newdata = boilerNew, pred.limits = TRUE)
Recompute by providing "robust" estimates for the means and the covariance matrix:
rob <- MASS::cov.rob(boiler) mqcc(boiler, type = "T2.single", center = rob$center, cov = rob$cov)
defect <- c(80, 27, 66, 94, 33) names(defect) <- c("price code", "schedule date", "supplier code", "contact num.", "part num.") (pc = paretoChart(defect, ylab = "Error frequency")) plot(pc)
causeEffectDiagram(cause = list(Measurements = c("Micrometers", "Microscopes", "Inspectors"), Materials = c("Alloys", "Lubricants", "Suppliers"), Personnel = c("Shifts", "Supervisors", "Training", "Operators"), Environment = c("Condensation", "Moisture"), Methods = c("Brake", "Engager", "Angle"), Machines = c("Speed", "Lathes", "Bits", "Sockets")), effect = "Surface Flaws")
In the following simulated data are used to describe some models for process variation. For further details see Wetherill, G.B. and Brown, D.W. (1991) Statistical Process Control, New York, Chapman and Hall, Chapter 3.
```{R, echo = FALSE} set.seed(123) # set seed for reproducibility
## Simple random variation $x_{ij} = \mu + \sigma_W \epsilon_{ij}$ ```{R} mu <- 100 sigma_W <- 10 epsilon <- rnorm(500) x <- matrix(mu + sigma_W*epsilon, ncol = 10, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
$x_{ij} = \mu + \sigma_B u_i + \sigma_W \epsilon_{ij}$
mu <- 100 sigma_W <- 10 sigma_B <- 5 epsilon <- rnorm(500) u <- as.vector(sapply(rnorm(50), rep, 10)) x <- mu + sigma_B*u + sigma_W*epsilon x <- matrix(x, ncol = 10, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
$x_{ij} = \mu + W_i + \sigma_W \epsilon_{ij}$
where $W_i = \rho W_{i-1} + \sigma_B u_i = \sigma_B u_i + \rho \sigma_B u_{i-1} + \rho^2 \sigma_B u_{i-2} + \ldots$,
and $W_0 = 0$.
mu <- 100 rho <- 0.9 sigma_W <- 10 sigma_B <- 5 epsilon <- rnorm(500) u <- rnorm(500) W <- rep(0,100) for(i in 2:length(W)) W[i] <- rho*W[i-1] + sigma_B*u[i] x <- mu + sigma_B*u + sigma_W*epsilon x <- matrix(x, ncol = 10, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
Assume we have 3 working turns of 8 hours each for each working day, so $8 \times 3 = 24$ points in time, and at each point we sample 5 units.
$x_{ij} = \mu + W_i + \sigma_W \epsilon_{ij}$
where $W_i$ ($i = 1,\ldots,8$) is the cycle.
mu <- 100 sigma_W <- 10 epsilon <- rnorm(120, sd = 0.3) W <- c(-4, 0, 1, 2, 4, 2, 0, -2) # assumed workers cycle W <- rep(rep(W, rep(5,8)), 3) x <- mu + W + sigma_W*epsilon x <- matrix(x, ncol = 5, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
$x_{ij} = \mu + W_i + \sigma_W \epsilon_{ij}$
where $W_i = 0.2 i$
mu <- 100 sigma_W <- 10 epsilon <- rnorm(500) W <- rep(0.2*1:100, rep(5,100)) x <- mu + W + sigma_W*epsilon x <- matrix(x, ncol = 10, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
$x_{ij} = \mu_1 p + \mu_2 (1-p) + \sigma_W \epsilon_{ij}$
where $p = \Pr(\text{Process #1})$.
mu1 <- 90 mu2 <- 110 sigma_W <- 10 epsilon <- rnorm(500) p <- rbinom(50, 1, 0.5) mu <- mu1*p + mu2*(1-p) x <- rep(mu, rep(10, length(mu))) + sigma_W*epsilon x <- matrix(x, ncol = 10, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
$x_{ij} = \mu_i + \sigma_W \epsilon_{ij}$
where $\mu_i$ is the mean of the process for state $i$ ($i = 1,\ldots,k)$.
mu <- rep(c(95,110,100,90), c(20,35,25,20)) sigma_W <- 10 epsilon <- rnorm(500) x <- rep(mu, rep(5, length(mu))) + sigma_W*epsilon x <- matrix(x, ncol = 10, byrow = TRUE) plot(qcc(x, type = "xbar", rules = 1:4)) plot(qcc(x, type = "R", rules = 1:4)) plot(qcc(x, type = "S", rules = 1:4))
Montgomery, D.C. (2013) Introduction to Statistical Quality Control, 7th ed. New York: John Wiley & Sons.
Ryan, T. P. (2011), Statistical Methods for Quality Improvement, 3rd ed. New York: John Wiley & Sons.
Scrucca, L. (2004) qcc: an R package for quality control charting and statistical process control. R News 4/1, 11-17.
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.