knitr::opts_chunk$set( collapse = TRUE, echo = TRUE, comment = "#>" )
require(monogamy) library(dplyr) library(ggplot2) library(knitr) library(mgcv) library(tidyr) library(purrr)
This example from the mgcv
package demonstrates fitting a monotonic
GAM model with two different settings for the k
parameter.
set.seed(20200410) x <- runif(100) * 4 - 1 x <- sort(x) f <- exp(4 * x) / (1 + exp(4 * x)) y <- f + rnorm(100) * 0.1 #par(mfrow = c(1, 2)) plot(x, y) fv <- mspline(x, y, 5) lines(x, predict(fv, x), col = "#7fc97f") residfv_5 <- residuals(fv) fv <- mspline(x, y, 10) lines(x, predict(fv, x), col = "#386cb0") legend("bottomright", lty = 1, paste0("k=", c(5, 10)), col = c("#7fc97f", "#386cb0")) residfv_10 <- residuals(fv) plot(x = x, y = residfv_5, main = "Residuals vs x plot", col = "#7fc97f", ylab = "residual") points(x = x, y = residfv_10, col = "#386cb0", pch = 16) legend("bottomright", pch = c(1, 16), paste0("k=", c(5, 10)), col = c("#7fc97f", "#386cb0"))
This is an artificial data set, though, and the ground truth is that the curve is monotonic. Here we present some real data where the relationships between the variables are expected to be monotonic but may not be.
data(imagemetrics) knitr::kable(head(imagemetrics), digits = 3)
Here we have, in long format, a set of image quality metrics for a
set of 180 distorted binary images. There are 12 "ground truth" images
and 15 distorted versions of them. A number of people were surveyed
for their opinion of the quality of the distorted images compared to
the baseline on a scale from 0 - 100. The mean for each image
is in the column MOS
- designated the 'Mean Opinion Score' or MOS -
and the standard deviation of their rating
is captured in the column std
. We then compute a set of image
quality metrics whose scores are in 'score' and whose names are in 'metric'.
Here are two example images the respondents would have been shown: the baseline image on the left and the distorted image on the right. This image had a MOS rating of 52.
We expect there to be a correspondence between the image quality metrics and the MOS and that it should be monotonic, though it may be curved and may have a lot of noise in it. The quality of the images is very low compared to what might be typical for full color or grayscale images.
metric_names <- c( `catsim` = "CatSIM (Cohen)", `msssim` = "MS-SSIM", `AdjRand` = "Adjusted Rand", `catsimrand` = "CatSIM (AdjRand)", `catsimacc` = "CatSIM (Accuracy)", `Accuracy` = "Accuracy", `cwssim` = "CW-SSIM", `Cohen` = "Cohen's kappa", `catsimjacc` = "CatSIM (Jaccard)", `Jaccard` = "Jaccard" ) qplot( x = score, y = MOS, color = I("#b2df8a"), alpha = I(0.8), size = I(0.75), data = imagemetrics) + theme_bw() + geom_smooth( method = "gam", se = FALSE, color = "#1f78b4", method.args = list(method = "GCV.Cp") ) + geom_smooth(method="lm", se = FALSE, color = "#fb9a99") + ylab("MOS") + xlab(NULL) + facet_wrap(~metric, labeller = as_labeller(metric_names)) + theme( strip.background = element_rect(fill = alpha("#a6cee3", 0.3)), axis.text.x = element_text(angle = 45, hjust = 1) ) + NULL
There is a GAM model on the plot and it's fitted by generalized cross validation, but it has a problem: the fit really ought to be monotonic, though it's interesting if it isn't. We should have some sense of how well a monotonic fit works for this data and then can judge which metric best explains the variation in MOS. We can get a sense of how well each method relates to the MOS rating by looking at their correlations:
groupcorr <- imagemetrics %>% group_by(metric) %>% summarize(Spearman = cor(MOS,score, method="spearman", use = "complete.obs"), Kendall = cor(MOS, score, method="kendall", use = "complete.obs"), Pearson = cor(MOS, score, method="pearson", use = "complete.obs")) %>% arrange(Spearman) knitr::kable(groupcorr,digits=3)
A non-parametric correlation such as Spearman's, since we want a monotonic
relationship, says more about the correspondence of the metrics to the MOS
in the way that we are interested in. However, we are probably interested in
a flexible model explaining their relationship, so here we fit a monotonic
GAM with k = 20
and compare the deviance explained by the model to
an unconstrained GAM with the same parameters.
fit_mspline <- function(df, k = 20, lower = NA, upper = NA) { mspline(x = df[["score"]], y = df[["MOS"]], k = k, lower = lower, upper = upper) } fit_mgcv <- function(df, k = 20, ...) { x <- df[["score"]] y <- df[["MOS"]] pass_df <- data.frame(x = x, y = y) mgcv::gam(y ~ 1+ s(x, k = k, bs = "cr"), data = pass_df,...) } metricnames <- c("CatSIM (Cohen)", "CatSIM (AdjRand)", "CatSIM (Accuracy)", "Accuracy", "Adjusted Rand", "Cohen's kappa", "MS-SSIM", "CW-SSIM", "CatSIM (Jaccard)", "Jaccard" ) imagemetrics %>% group_by(metric) %>% nest() %>% mutate(msplinemodel = map(data, fit_mspline), totalvar = map_dbl(data, ~ var(.x$MOS)), residuals = map(msplinemodel, residuals), varMOS = map_dbl(data, ~ var(.x$MOS)), msplineVAR = map_dbl(residuals, var), msplineDevExp = 1 - msplineVAR/totalvar, mgcvmodel = map(data, possibly(fit_mgcv, otherwise = NA)), mgcvDevExp = 1 - map_dbl(mgcvmodel, ~.x$deviance/.x$null.deviance)) -> metricmodels metricmodels$label <- metricnames ## metricmodels$label <- metricmodels$metric knitr::kable(x = metricmodels %>% ungroup() %>% select(label, mgcvDevExp, msplineDevExp), col.names = c( "Metric", "GAM % Deviance", "mono-GAM % Deviance"), digits = 3)
For most of the models, there is not much difference between the fit of the monotonic GAM and the unconstrained GAM. A discrepancy between the two indicates there is some region where the image quality metric fails to be monotonic in its relation to the MOS.
One can ask about fitting the reverse, explaining the metric by the MOS, and that table can be seen here, though these results cannot be directly compared across the metrics because the response variable is not the same:
fit_revmspline <- function(df, k = 20, lower = NA, upper = NA) { mspline(y = df[["score"]], x = df[["MOS"]], k = k, lower = lower, upper = upper) } fit_revmgcv <- function(df, k = 20, ...) { y <- df[["score"]] x <- df[["MOS"]] pass_df <- data.frame(x = x, y = y) mgcv::gam(y ~ 1+ s(x, k = k, bs = "cr"), data = pass_df,...) } imagemetrics %>% group_by(metric) %>% nest() %>% mutate(msplinemodel = map(data, fit_revmspline), totalvar = map_dbl(data, ~ var(.x$score)), residuals = map(msplinemodel, residuals), msplineVAR = map_dbl(residuals, var), msplineDevExp = 1 - msplineVAR/totalvar, mgcvmodel = map(data, possibly(fit_revmgcv, otherwise = NA)), mgcvDevExp = 1 - map_dbl(mgcvmodel, ~.x$deviance/.x$null.deviance)) -> revmetricmodels revmetricmodels$label <- metricnames ## revmetricmodels$label <- revmetricmodels$metric knitr::kable(x = revmetricmodels %>% ungroup() %>% select(label, mgcvDevExp, msplineDevExp), col.names = c( "Metric", "GAM % Deviance", "mono-GAM % Deviance"), digits = 3)
We can plot the monotonic GAMs and unconstrained GAMs for each metric here:
par(mfrow = c(1,2)) for (i in 1:nrow(metricmodels)) { plot(metricmodels$msplinemodel[[i]], xlab = metricmodels$metric[i], #metricmodels$label[i], ylab = "MOS", pch = 16, xlim = c(0,1), ylim = c(0,100)) mgcvx <- metricmodels$mgcvmodel[[i]]$model$x mgcvy <- metricmodels$mgcvmodel[[i]]$fitted.values rearr <- order(mgcvx) lines(mgcvx[rearr], mgcvy[rearr], col = "blue") legend("bottomright", lty = 1, c("Mono", "Uncons"), col = c("red", "blue")) }
For several of these, there is no difference between the constrained and unconstrained models since they were already monotonic.
We can also investigate if we modeled it the other direction.
par(mfrow = c(1,2)) for (i in 1:nrow(metricmodels)) { plot(revmetricmodels$msplinemodel[[i]], ylab = revmetricmodels$metric[i],#revmetricmodels$label[i], xlab = "MOS", pch = 16, xlim = c(0,100), ylim = c(0,1)) mgcvx <- revmetricmodels$mgcvmodel[[i]]$model$x mgcvy <- revmetricmodels$mgcvmodel[[i]]$fitted.values rearr <- order(mgcvx) lines(mgcvx[rearr], mgcvy[rearr], col = "blue") legend("bottomright", lty = 1, c("Mono", "Uncons"), col = c("red", "blue")) }
These diagnostics can be inspected for completeness, but they all seem to check out, as expected.
for (i in 1:nrow(metricmodels)) { cat( metricmodels$metric[i]) #metricmodels$label[i]) mgcv::gam.check(metricmodels$mgcvmodel[[i]]) cat("\n\n\n\n\n") }
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.