library(MASS) # for ginv function
library(qualityTools) # for taguchiDesign function
library(tidyverse) # for data handling, pipes, and plotting
library(highcharter) # for an alternative to ggplot
library(cowplot) # for formatting plots in diffMTS
computeMDs <- function(good, bad) {
# This function computes Mahalanobis Distances (MDs) for "good" and "bad" groups.
# Takes two data frames as inputs and produces a list of MD.good and MD.bad -- the object
# returned by this function can be ingested by plotMDs
if (!is.data.frame(good) || !is.data.frame(bad)) {
stop("At least one input is not a data frame")
} else {
# Compute means and sd's for "good" group
xbars <- colMeans(good)
sds <- apply(good, 2, sd)
pinv <- 1/ncol(good)
Rinv <- ginv(cor(good))
# Compute MDs for bad group by using means and sds of "good" group as a baseline
z0s <- apply(bad, 1, function(x) (x-xbars)/sds)
MD.bad <- pinv * diag( (t(z0s) %*% Rinv %*% z0s) )
# Compute MDs for good group
z0s.good <- apply(good, 1, function(x) (x-xbars)/sds)
MD.good <- pinv * diag( (t(z0s.good) %*% Rinv %*% z0s.good) )
return(list(MD.bad=MD.bad, MD.good=MD.good))
}
}
plotMDs <- function(computeMDs_obj, type="bars") {
bar.df <- data.frame(
index=seq(1, length(computeMDs_obj$MD.good) + length(computeMDs_obj$MD.bad), 1),
md=c(computeMDs_obj$MD.good, computeMDs_obj$MD.bad),
group=c(rep("normal", length(computeMDs_obj$MD.good)), rep("abnormal",length(computeMDs_obj$MD.bad) ))
)
if (type == "bars") {
bar.df %>% ggplot() + geom_bar(aes(x=index, y=md), stat="identity") +
ggtitle("Comparison of MDs between Normal and Abnormal Groups")
} else if (type == "hc_scatter") {
hchart(bar.df, "scatter", hcaes(x = index, y = md, group = group)) %>% hc_colors(c("red","blue"))
} else if (type == "hc_column") {
highchart() %>% hc_chart(type = "column") %>% hc_add_series(data = bar.df$md, name = "MD")
}
}
generateTDO <- function(good) {
# This function can take as input either the good or bad data frame, since both have same ncol.
# Alternatively, you can just give it the number of IVs directly and it will still work.
if (!is.data.frame(good)) {
ivs <- good
} else {
ivs <- ncol(good)
}
if (ivs %in% c(2:3)) {
tdo <- taguchiDesign("L4_2")@design[,1:ivs] %>% replace(.=="2",0)
} else if (ivs %in% c(4:7)) {
tdo <- taguchiDesign("L8_2")@design[,1:ivs] %>% replace(.=="2",0)
} else if (ivs %in% c(8:11)) {
tdo <- taguchiDesign("L12_2")@design[,1:ivs] %>% replace(.=="2",0)
} else if (ivs %in% c(12:15)) {
tdo <- taguchiDesign("L16_2")@design[,1:ivs] %>% replace(.=="2",0)
} else if (ivs==1 || ivs > 15) {
stop(paste0("Number of independent variables ",ivs," is not supported"))
}
return(tdo=tdo)
}
# Functions to compute Taguchi Signal to Noise (SN)
ltb <- function(x) {
-10*log10( (1/(length(x))) * sum(1/(x^2) ))
} # Larger-the-better
stb <- function(x) {
-10*log10( (1/(length(x))) * sum( (x^2) ))
} # Smaller-the-better
dyn.sn <- function(x, y) {
stb(x) - ltb(y)
}
runTaguchi <- function(good, bad, tdo) {
# This function takes the data frames of good and bad observations + the Taguchi Orthogonal Array design (tdo)
# created by the generateTDO function and runs all signal-to-noise experiments on the MDs.
# It returns a matrix of experimental results, with one row per Taguchi experiment, and one column
# per predictor/independent variable.
all.results <- NULL
for (i in 1:(nrow(tdo)) ) {
exp.bad <- 0; exp.good <- 0
exp.bad <- as.data.frame(mapply(`*`, bad, tdo[i,]))
exp.bad <- exp.bad[, colSums(exp.bad != 0) > 0] # drop all columns that contain all zeroes
exp.good <- as.data.frame(mapply(`*`, good, tdo[i,]))
exp.good <- exp.good[, colSums(exp.good != 0) > 0] # drop all columns that contain all zeroes
if (is.vector(exp.good)) { # this indicates there is only ONE active column in the Taguchi array
xbars <- mean(exp.good)
sds <- sd(exp.good)
# pinv will be 1/1 because only one predictor
Rinv <- ginv(cor(as.matrix(exp.good)))
z0s <- (exp.bad-xbars)/sds
as.vector(t(z0s)*z0s) -> results
all.results <- rbind(all.results, results)
} else if (is.data.frame(exp.good)) { # there are MULTIPLE active columns to process in the Taguchi array
xbars <- colMeans(exp.good)
sds <- apply(exp.good, 2, sd)
pinv <- 1/ncol(exp.good)
Rinv <- ginv(cor(exp.good))
z0s <- apply(exp.bad, 1, function(x) (x-xbars)/sds) # scale bad group based on xbar/sd of good group
as.vector(pinv * diag( (t(z0s) %*% Rinv %*% z0s) )) -> results
all.results <- rbind(all.results, results)
}
}
rownames(all.results) <- NULL
return(all.results)
}
addSN <- function(taguchiResults, method="ltb") {
# This function takes the results from runTaguchi, and appends a SN column to it
# based on whether you want larger-the-better, smaller-the-better, or dynamic (stb-ltb) SN.
if (is.data.frame(taguchiResults)) {
if (method == "stb") {
taguchiResults %>% mutate(sn=apply(taguchiResults,1,stb)) -> sn.df
} else if (method == "dyn") {
taguchiResults %>% mutate(sn=apply(taguchiResults,1,dyn)) -> sn.df
} else {
taguchiResults %>% mutate(sn=apply(taguchiResults,1,ltb)) -> sn.df
}
} else {
stop("The addSN function requires a data frame containing Taguchi experiment results")
}
return(sn.df)
}
calcGains <- function(tdo, df) {
# This function calculates average SN for each predictor/independent variable when it was used (Level 1)
# and also when it was not used (Level 2) in each of the Taguchi experiments. The difference between
# these two averages as you move from Level 1 to Level 2 is called the gain.
# For example, if the first predictor/independent variable was used in 3 of 8 Taguchi experiments, the
# Level 1 SN for X1 would be the average of these three SNs, while the Level 2 SN would be the average of
# SNs for the 5 experiments where X1 was not used.
level1 <- colMeans(apply(tdo, 2, function(x) x*df$sn))
level2s <- as.data.frame(matrix(as.numeric(!tdo), nrow=nrow(tdo)))
level2 <- colMeans(apply(level2s, 2, function(x) x*df$sn))
gains.df <- as.data.frame(cbind(level1, level2, gain=level1-level2), rownames=colnames(x))
gains.df$var <- colnames(good)
return(gains.df)
}
plotGains <- function(tdo, gains.df, type="bars") {
# This function plots the output of calcGains. Negative gain indicates that the predictor does not
# enhance the discriminatory power of the MTS. On the main effects plot, downward sloping lines
# indicate predictors that do not enhance the discriminatory power.
if (type == "bars") {
gains.df %>% ggplot() + geom_bar(aes(x=var, y=gain), stat="identity") +
ggtitle("Gain in SN Ratios for Each Variable")
} else if (type == "maineffects") {
gains.df %>% select(-gain) %>%
pivot_longer(cols=starts_with("level"), names_to="level", values_to="value") %>%
ggplot(aes(level, value, group=var)) + geom_line(lwd=2) +
facet_wrap(~var,ncol=ncol(good))
} else if (type == "hc_scatter") {
hchart(gains.df, "scatter", hcaes(x = var, y = gain, group = sign(gain))) %>%
hc_colors(c("red","blue")) %>% hc_add_series(showInLegend = FALSE)
} else if (type == "hc_column") {
highchart() %>% hc_chart(type = "column") %>% hc_add_series(data = gains.df$gain, name = "gain")
}
}
recommend <- function(gains.df, type="print") {
# This function tells you what variables to use for discriminating between "good" and "bad" groups.
# If argument "pretty" is used, it generates a flextable containing the recommended variables.
if (type == "print") {
gains.df %>% filter(gain > 0) %>% select(var) %>% as.list()
} else if (type == "pretty") {
gains.df %>% filter(gain > 0) %>% select(var) %>% regulartable() %>% autofit()
}
}
diffMTS <- function(mts1, mts2) {
# This function takes two objects from compareMDs: your INITIAL set of MDs from good and bad groups with all potential
# predictor variables included, and your FINAL set of MDs after feature selection.
compare <- data.frame(index = seq(1, length(unlist(mts1)), 1),
md.old = c(mts1$MD.good, mts1$MD.bad),
md.new = c(mts2$MD.good, mts2$MD.bad))
compare %>% mutate(diff = md.new - md.old) -> compare
g1 <- compare %>% ggplot() + geom_line(aes(x = index, y = md.old), color="blue") +
geom_line(aes(x = index, y = md.new), color="red", lwd=2)+
ggtitle("Discrimination Power After Feature Selection (Red)")
g2 <- compare %>% ggplot() + geom_histogram(aes(x=diff)) + geom_vline(xintercept=2, col="red") +
ggtitle("Differences in Mahalanobis Distance (MD) after Feature Selection")
cowplot::plot_grid(g1, g2)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.