inst/doc/cmds-variables.R

## ----knitr options, echo=FALSE, message=FALSE, warning=FALSE------------------
knitr::opts_chunk$set(
  message = FALSE,
  fig.height = 6, fig.align = "center"
)

## ----diabetes pca-------------------------------------------------------------
x <- scale(heplots::Diabetes[, -6L], center = TRUE, scale = TRUE)
s <- svd(x)
r <- length(s$d)

## ----inertia------------------------------------------------------------------
# inertia of the (scaled) data
sum(x^2)
# inertia of the case and variable factors
sum(s$u^2)
sum(s$v^2)
# inertia of the diagonal factor
sum(s$d^2)

## ----case geometry, fig.width=5.5---------------------------------------------
# distances between cases
x.dist <- dist(x)
# distances between cases (principal coordinates)
s.dist <- dist(s$u[, 1:2] %*% diag(s$d[1:2]))
# scatterplot
plot(
  x = as.vector(x.dist),
  y = as.vector(s.dist),
  xlim = c(0, 8), ylim = c(0, 8),
  asp = 1, pch = 19, cex = .5,
  xlab = "Case distances in centered and scaled data",
  ylab = "Case point distances in planar biplot"
)
lines(x = c(0, 8), y = c(0, 8))

## ----variable geometry, fig.width=5.5-----------------------------------------
# correlations between variables
x.cor <- cor(x)
# magnitudes of variable vectors
s.len <- apply(s$v[, 1:2] %*% diag(s$d[1:2]), 1, norm, "2")
# cosines between variables (principal coordinates)
s.cor <- (s$v[, 1:2] / s.len) %*% diag(s$d[1:2]^2) %*% t(s$v[, 1:2] / s.len)
# scatterplot
plot(
  x = as.vector(x.cor[lower.tri(x.cor)]),
  y = as.vector(s.cor[lower.tri(s.cor)]),
  xlim = c(-1, 1), ylim = c(-1, 1),
  asp = 1, pch = 19, cex = .5,
  xlab = "Variable correlations in centered and scaled data",
  ylab = "Variable vector cosines in planar biplot"
)
lines(x = c(-1, 1), y = c(-1, 1))

## ----multidimensional scaling of cities, fig.width=5.5------------------------
d <- as.matrix(UScitiesD)
cent <- diag(1, nrow(d)) - matrix(1/nrow(d), nrow(d), nrow(d))
d.cent <- -.5 * cent %*% (d^2) %*% cent
d.cmds <- svd(d.cent)
d.coord <- d.cmds$u[, 1:2] %*% diag(sqrt(d.cmds$d[1:2]))
# scatterplot
plot(
  x = as.vector(UScitiesD),
  y = as.vector(dist(d.coord)),
  xlim = c(0, max(UScitiesD)), ylim = c(0, max(UScitiesD)),
  asp = 1, pch = 19, cex = .5,
  xlab = "City road distances",
  ylab = "Point distances in planar CMDS"
)
lines(x = c(0, max(UScitiesD)), y = c(0, max(UScitiesD)))

## ----multidimensional scaling of cities plot, fig.height=5, fig.width=7-------
plot(
  d.coord, pch = NA, asp = 1,
  xlab = "First principal coordinate", ylab = "Second principal coordinate"
)
text(d.coord, labels = rownames(d), cex = .9)

## ----multidimensional scaling of diabetes, fig.width=5.5----------------------
# covariances and standard deviations
c <- cov(x)
s <- diag(sqrt(diag(c)))
# eigendecomposition of covariance matrix
c.eigen <- eigen(c)
# artificial coordinates
c.coord <- c.eigen$vectors[, 1:2] %*% diag(sqrt(c.eigen$values[1:2]))
# scatterplot
c.inner <- c.coord %*% t(c.coord)
plot(
  x = as.vector(c[lower.tri(c)]),
  y = as.vector(c.inner[lower.tri(c.inner)]),
  xlim = range(c[lower.tri(c)]), ylim = range(c[lower.tri(c)]),
  asp = 1, pch = 19, cex = .5,
  xlab = "Measurement covariances in unscaled data",
  ylab = "Vector inner products in planar CMDS"
)
lines(x = range(c[lower.tri(c)]), y = range(c[lower.tri(c)]))

## ----multidimensional scaling of diabetes plot, fig.width=7-------------------
c <- cor(heplots::Diabetes[, -6L])
c.eigen <- eigen(c)
c.coord <- c.eigen$vectors[, 1:2] %*% diag(sqrt(c.eigen$values[1:2]))
plot(
  c.coord, pch = NA, asp = 1,
  xlab = "First principal coordinate", ylab = "Second principal coordinate"
)
arrows(0, 0, c.coord[, 1L], c.coord[, 2L])
text(c.coord, labels = rownames(c), cex = .9)

## ----big guns-----------------------------------------------------------------
library(ordr)
library(dplyr)

## ----QS top university rankings-----------------------------------------------
data(qswur_usa)
print(qswur_usa)

## ----calibrate rankings-------------------------------------------------------
qswur_usa %>%
  filter(year == 2020L) %>%
  select(institution, starts_with("rk_")) %>%
  mutate_at(
    vars(starts_with("rk_")),
    ~ match(., sort(unique(as.numeric(.))))
  ) %>%
  filter_at(vars(starts_with("rk_")), ~ ! is.na(.)) ->
  qswur_usa2020
print(qswur_usa2020)

## ----Kendall rank correlations, fig.width=7-----------------------------------
corr <- cor(select(qswur_usa2020, starts_with("rk_")), method = "kendall")
heatmap(corr, scale = "none")

## ----multidimensional scaling of variables, fig.width=7, eval=FALSE, echo=FALSE----
#  corr.eigen <- eigen(corr)
#  corr.coord <- corr.eigen$vectors %*% diag(sqrt(corr.eigen$values))
#  plot(corr.coord, pch = NA, asp = 1, xlab = "", ylab = "")
#  arrows(0, 0, corr.coord[, 1], corr.coord[, 2])
#  text(corr.coord, labels = rownames(corr))

## ---- fig.width=7-------------------------------------------------------------
eigen_ord(corr) %>%
  as_tbl_ord() %>%
  augment_ord() %>%
  mutate_rows(metric = stringr::str_remove(name, "rk_")) %>%
  confer_inertia(1) ->
  c_eigen
c_eigen %>%
  ggbiplot() +
  theme_minimal() +
  geom_unit_circle() +
  geom_rows_vector() +
  geom_rows_text_radiate(aes(label = metric)) +
  scale_x_reverse(expand = expansion(add = .4)) +
  scale_y_continuous(expand = expansion(add = .3)) +
  ggtitle("Kendall correlations between university rankings",
          "CMDS correlation monoplot")

## -----------------------------------------------------------------------------
c_eigen %>%
  fortify(.matrix = "rows") %>%
  select(-name, -.matrix)

## ---- fig.width=7-------------------------------------------------------------
c_eigen %>%
  ggbiplot(aes(x = 2, y = 3)) +
  theme_minimal() +
  geom_unit_circle() +
  geom_rows_vector() +
  geom_rows_text_radiate(aes(label = metric)) +
  scale_x_continuous(expand = expansion(add = .5)) +
  scale_y_continuous(expand = expansion(add = .5)) +
  ggtitle("Kendall correlations between university rankings",
          "CMDS correlation monoplot, second & third principal coordinates")

Try the ordr package in your browser

Any scripts or data that you put into this service are public.

ordr documentation built on Oct. 21, 2022, 1:07 a.m.