In gzt/monogamy: Monotone Spline Fitting with MGCV

knitr::opts_chunk$set(
                      collapse = TRUE,
                      echo = TRUE,
  comment = "#>"
)

require(monogamy)
library(dplyr)
library(ggplot2)
library(knitr)
library(mgcv)
library(tidyr)
library(purrr)

Basic Model Fitting

This example from the mgcv package demonstrates fitting a monotonic GAM model with two different settings for the k parameter.

set.seed(20200410)
x <- runif(100) * 4 - 1
x <- sort(x)
f <- exp(4 * x) / (1 + exp(4 * x))
y <- f + rnorm(100) * 0.1
#par(mfrow = c(1, 2))
plot(x, y)
fv <- mspline(x, y, 5)
lines(x, predict(fv, x), col = "#7fc97f")
residfv_5 <- residuals(fv)
fv <- mspline(x, y, 10)
 lines(x, predict(fv, x), col = "#386cb0")
legend("bottomright", lty = 1, paste0("k=", c(5, 10)),
       col = c("#7fc97f", "#386cb0"))

residfv_10 <- residuals(fv)
plot(x = x, y = residfv_5, main = "Residuals vs x plot",
     col = "#7fc97f", ylab = "residual")
points(x = x, y = residfv_10, col = "#386cb0", pch = 16)
legend("bottomright", pch = c(1, 16), paste0("k=", c(5, 10)),
       col = c("#7fc97f", "#386cb0"))

This is an artificial data set, though, and the ground truth is that the curve is monotonic. Here we present some real data where the relationships between the variables are expected to be monotonic but may not be.

Application to Image Comparison Data

data(imagemetrics)
knitr::kable(head(imagemetrics), digits = 3)

Here we have, in long format, a set of image quality metrics for a set of 180 distorted binary images. There are 12 "ground truth" images and 15 distorted versions of them. A number of people were surveyed for their opinion of the quality of the distorted images compared to the baseline on a scale from 0 - 100. The mean for each image is in the column MOS - designated the 'Mean Opinion Score' or MOS - and the standard deviation of their rating is captured in the column std. We then compute a set of image quality metrics whose scores are in 'score' and whose names are in 'metric'.

Baseline Image #1 Distorted Image #1-8

Here are two example images the respondents would have been shown: the baseline image on the left and the distorted image on the right. This image had a MOS rating of 52.

We expect there to be a correspondence between the image quality metrics and the MOS and that it should be monotonic, though it may be curved and may have a lot of noise in it. The quality of the images is very low compared to what might be typical for full color or grayscale images.

metric_names <- c(
  `catsim` = "CatSIM (Cohen)",
  `msssim` = "MS-SSIM",
  `AdjRand` = "Adjusted Rand",
  `catsimrand` = "CatSIM (AdjRand)",
  `catsimacc` = "CatSIM (Accuracy)",
  `Accuracy` = "Accuracy",
  `cwssim` = "CW-SSIM",
  `Cohen` = "Cohen's kappa",
  `catsimjacc` = "CatSIM (Jaccard)",
  `Jaccard` = "Jaccard"
)
qplot(
  x = score, y = MOS, color = I("#b2df8a"), alpha = I(0.8), size = I(0.75),
  data = imagemetrics) +
  theme_bw() + geom_smooth(
    method = "gam", se = FALSE, color = "#1f78b4",
    method.args = list(method = "GCV.Cp")
    ) +
    geom_smooth(method="lm", se = FALSE, color = "#fb9a99") +
  ylab("MOS") + xlab(NULL) +
  facet_wrap(~metric, labeller = as_labeller(metric_names)) +
  theme(
    strip.background = element_rect(fill = alpha("#a6cee3", 0.3)),
    axis.text.x = element_text(angle = 45, hjust = 1)
  ) +
    NULL

There is a GAM model on the plot and it's fitted by generalized cross validation, but it has a problem: the fit really ought to be monotonic, though it's interesting if it isn't. We should have some sense of how well a monotonic fit works for this data and then can judge which metric best explains the variation in MOS. We can get a sense of how well each method relates to the MOS rating by looking at their correlations:

groupcorr <- imagemetrics %>% group_by(metric) %>%
    summarize(Spearman = cor(MOS,score, method="spearman", use = "complete.obs"),
              Kendall = cor(MOS, score, method="kendall", use = "complete.obs"),
              Pearson = cor(MOS, score, method="pearson", use = "complete.obs")) %>%
    arrange(Spearman)

knitr::kable(groupcorr,digits=3)

A non-parametric correlation such as Spearman's, since we want a monotonic relationship, says more about the correspondence of the metrics to the MOS in the way that we are interested in. However, we are probably interested in a flexible model explaining their relationship, so here we fit a monotonic GAM with k = 20 and compare the deviance explained by the model to an unconstrained GAM with the same parameters.

fit_mspline <- function(df, k = 20, lower = NA, upper = NA) {
    mspline(x = df[["score"]], y = df[["MOS"]],
            k = k, lower = lower, upper = upper)
}

fit_mgcv <- function(df, k = 20, ...) {
     x <- df[["score"]]
     y <- df[["MOS"]]
     pass_df <- data.frame(x = x, y = y)
    mgcv::gam(y ~ 1+ s(x, k = k, bs = "cr"), data = pass_df,...)
}

metricnames <- c("CatSIM (Cohen)",
                 "CatSIM (AdjRand)",
                 "CatSIM (Accuracy)",
                 "Accuracy",
                 "Adjusted Rand",
                 "Cohen's kappa",
                 "MS-SSIM",
                 "CW-SSIM",
                 "CatSIM (Jaccard)",
                 "Jaccard"
)

imagemetrics %>% group_by(metric) %>% nest() %>%
    mutate(msplinemodel = map(data, fit_mspline),
           totalvar = map_dbl(data, ~ var(.x$MOS)),
           residuals = map(msplinemodel, residuals),
           varMOS = map_dbl(data, ~ var(.x$MOS)),
           msplineVAR = map_dbl(residuals, var),
           msplineDevExp = 1 - msplineVAR/totalvar,
           mgcvmodel = map(data, possibly(fit_mgcv, otherwise = NA)),
           mgcvDevExp = 1 - map_dbl(mgcvmodel, ~.x$deviance/.x$null.deviance)) ->
    metricmodels
metricmodels$label <- metricnames
## metricmodels$label <- metricmodels$metric
knitr::kable(x = metricmodels %>%
                 ungroup() %>%
                 select(label, mgcvDevExp, msplineDevExp),
            col.names = c( "Metric", "GAM % Deviance", "mono-GAM % Deviance"),
             digits = 3)

For most of the models, there is not much difference between the fit of the monotonic GAM and the unconstrained GAM. A discrepancy between the two indicates there is some region where the image quality metric fails to be monotonic in its relation to the MOS.

One can ask about fitting the reverse, explaining the metric by the MOS, and that table can be seen here, though these results cannot be directly compared across the metrics because the response variable is not the same:

fit_revmspline <- function(df, k = 20, lower = NA, upper = NA) {
    mspline(y = df[["score"]], x = df[["MOS"]],
            k = k, lower = lower, upper = upper)
}

fit_revmgcv <- function(df, k = 20, ...) {
     y <- df[["score"]]
     x <- df[["MOS"]]
     pass_df <- data.frame(x = x, y = y)
    mgcv::gam(y ~ 1+ s(x, k = k, bs = "cr"), data = pass_df,...)
}

imagemetrics %>% group_by(metric) %>% nest() %>%
    mutate(msplinemodel = map(data, fit_revmspline),
           totalvar = map_dbl(data, ~ var(.x$score)),
           residuals = map(msplinemodel, residuals),
           msplineVAR = map_dbl(residuals, var),
           msplineDevExp = 1 - msplineVAR/totalvar,
           mgcvmodel = map(data, possibly(fit_revmgcv, otherwise = NA)),
           mgcvDevExp = 1 - map_dbl(mgcvmodel, ~.x$deviance/.x$null.deviance)) ->
    revmetricmodels
revmetricmodels$label <- metricnames
## revmetricmodels$label <- revmetricmodels$metric
knitr::kable(x = revmetricmodels %>%
                ungroup() %>%
                 select(label, mgcvDevExp, msplineDevExp),
            col.names = c( "Metric", "GAM % Deviance", "mono-GAM % Deviance"),
             digits = 3)

Plotting the monotonic GAM

We can plot the monotonic GAMs and unconstrained GAMs for each metric here:

par(mfrow = c(1,2))
for (i in 1:nrow(metricmodels)) {
    plot(metricmodels$msplinemodel[[i]],
         xlab = metricmodels$metric[i], #metricmodels$label[i],
         ylab = "MOS",
         pch = 16, xlim = c(0,1), ylim = c(0,100))
    mgcvx <- metricmodels$mgcvmodel[[i]]$model$x
    mgcvy <- metricmodels$mgcvmodel[[i]]$fitted.values
    rearr <- order(mgcvx)
    lines(mgcvx[rearr], mgcvy[rearr], col = "blue")
    legend("bottomright", lty = 1, c("Mono", "Uncons"),
           col = c("red", "blue"))
}

For several of these, there is no difference between the constrained and unconstrained models since they were already monotonic.

Plots with MOS as the x-axis

We can also investigate if we modeled it the other direction.

par(mfrow = c(1,2))
for (i in 1:nrow(metricmodels)) {
    plot(revmetricmodels$msplinemodel[[i]],
         ylab = revmetricmodels$metric[i],#revmetricmodels$label[i],
         xlab = "MOS", pch = 16, xlim = c(0,100), ylim = c(0,1))
    mgcvx <- revmetricmodels$mgcvmodel[[i]]$model$x
    mgcvy <- revmetricmodels$mgcvmodel[[i]]$fitted.values
    rearr <- order(mgcvx)
    lines(mgcvx[rearr], mgcvy[rearr], col = "blue")
    legend("bottomright", lty = 1, c("Mono", "Uncons"),
           col = c("red", "blue"))
}

Unconstrained GAM diagnostics

These diagnostics can be inspected for completeness, but they all seem to check out, as expected.

for (i in 1:nrow(metricmodels)) {
    cat( metricmodels$metric[i]) #metricmodels$label[i])
    mgcv::gam.check(metricmodels$mgcvmodel[[i]])
    cat("\n\n\n\n\n")
}