knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
Sys.setenv(LANGUAGE = "en") # Force locale
## Install extra packages (if needed)
# install.packages("folio")

library(nexus)

Reference Groups

Provenance studies typically rely on two approaches, which can be used together:

For the sake of the demonstration, let's assume that a third of the samples are of unknown provenance: missing values (NA) can be used to specify that a sample does not belong to any group.

## Data from Wood and Liu 2023
data("bronze", package = "folio")
dynasty <- as.character(bronze$dynasty) # Save original data for further use

## Randomly add missing values
set.seed(12345) # Set seed for reproductibility
n <- nrow(bronze)
bronze$dynasty[sample(n, size = n / 3)] <- NA

When coercing a data.frame to a CompositionMatrix object, nexus allows to specify whether an observation belongs to a specific group (or not):

## Use the third column (dynasties) for grouping
coda <- as_composition(bronze, parts = 4:11, groups = 3)

Alternatively, group() allows to set groups of an existing CompositionMatrix.

## Create a composition data matrix
coda <- as_composition(bronze, parts = 4:11)

## Use the third dynasties for grouping
coda <- group(coda, by = bronze$dynasty)

Once groups have been defined, they can be used by further methods (e.g. plotting). Note that for better readability, you can select only some of the parts (e.g. major elements):

## Select major elements
major <- coda[, is_element_major(coda)]

## Compositional bar plot
barplot(major, order_rows = "Cu", space = 0)
## Matrix of ternary plots
pairs(coda)

Log-Ratio Analysis

## CLR
clr <- transform_clr(coda, weights = TRUE)

## PCA
lra <- pca(clr)

## Visualize results
viz_individuals(
  x = lra, 
  extra_quali = group_names(clr),
  color = c("#004488", "#DDAA33", "#BB5566"),
  hull = TRUE
)

viz_variables(lra)

Discriminant Analysis

## Subset training data
train <- coda[is_assigned(coda), ]

## ILR
ilr_train <- transform_ilr(train)

## MANOVA
fit <- manova(ilr_train ~ group_names(ilr_train))
summary(fit)

The MANOVA results suggest that there are statistically significant differences between groups.

## LDA
discr <- MASS::lda(ilr_train, grouping = group_names(ilr_train))
plot(discr)
## Back transform results
transform_inverse(discr$means, origin = ilr_train)
## Subset unassigned samples
test <- coda[!is_assigned(coda), ]
ilr_test <- transform_ilr(test)

## Predict group membership
results <- predict(discr, ilr_test)

## Assess the accuracy of the prediction
(ct <- table(
  predicted = results$class, 
  expected = dynasty[!is_assigned(coda)]
))
diag(proportions(ct, margin = 1))

## Total percent correct
sum(diag(proportions(ct)))


tesselle/nexus documentation built on June 1, 2025, 9:04 p.m.