Nothing
#' Compute Peak Persistence Diagram
#'
#' This computes the peak persistence diagram over a range of
#' lambda. This can help determine the proper elasticity (penalty).
#' This can be slow and recommended to run in parallel
#'
#' @param f A numeric matrix of shape \eqn{M \times N} specifying a sample of
#' \eqn{N} curves observed on a grid of size \eqn{M}.
#' @param time A numeric vector of length \eqn{M} specifying the common grid on
#' which all curves `f` have been observed.
#' @param max_lam maximum value of lambda. Defaults to `2`
#' @param num_lam number of steps. Defaults to `10`
#' @param pt the percentile of negative curvature of raw data Defaults to `.15`
#' @param penalty_method A string specifying the penalty term used in the
#' formulation of the cost function to minimize for alignment. Choices are
#' `"roughness"` which uses the norm of the second derivative, `"geodesic"`
#' which uses the geodesic distance to the identity and `"norm"` which uses
#' the Euclidean distance to the identity. Defaults to `"roughness"`.
#' @param centroid_type A string specifying the type of centroid to align to.
#' Choices are `"mean"` or `"median"`. Defaults to `"mean"`.
#' @param center_warpings A boolean specifying whether to center the estimated
#' warping functions. Defaults to `TRUE`.
#' @param smooth_data A boolean specifying whether to smooth curves using a box
#' filter. Defaults to `FALSE`.
#' @param sparam An integer value specifying the number of times to apply the
#' box filter. Defaults to `25L`. This is used only when `smooth_data = TRUE`.
#' @param parallel A boolean specifying whether to run calculations in parallel.
#' Defaults to `TRUE`.
#' @param cores number of cores in parallel (default=-1, means all cores)
#' @param optim_method A string specifying the algorithm used for optimization.
#' Choices are `"DP"`, `"DPo"`, and `"RBFGS"`. Defaults to `"DP"`.
#' @param max_iter An integer value specifying the maximum number of iterations.
#' Defaults to `20L`.
#'
#' @return lam_opt optimal lam
#'
#' @keywords srsf alignment
#' @references Kim, Woo Min, Sutanoy Dasgupta, and Anuj Srivastava. "Peak-persistence diagrams for estimating shapes and functions from noisy data." arXiv preprint arXiv:2305.04826 (2023).
#'
#' @export
#' @examples
#' \dontrun{
#' out <- ppd(simu_data$f, simu_data$time)
#' }
ppd <- function(f,
time,
max_lam = 2,
num_lam = 10,
pt = 0.15,
penalty_method = c("roughness", "geodesic", "norm"),
centroid_type = c("mean", "median"),
center_warpings = TRUE,
smooth_data = FALSE,
sparam = 25L,
parallel = TRUE,
cores = -1,
optim_method = c("DP", "DPo", "DP2", "RBFGS"),
max_iter = 20L){
penalty_method <- rlang::arg_match(penalty_method)
centroid_type <- rlang::arg_match(centroid_type)
optim_method <- rlang::arg_match(optim_method)
# obtain a lambda candidate set
lam_vec <- seq(0, max_lam, length.out = num_lam)
fns <- list()
for (i in 1:num_lam){
obj <- time_warping(f, time, lam_vec[i],
penalty_method, centroid_type, center_warpings,
smooth_data, sparam, parallel, cores,
optim_method, max_iter)
fns[[i]] <- obj$fn
}
# peak persistent diagram
# get the trehsold for significant peak
diff_t = mean(diff(time))
taus = c()
# compute tau values
for (i in 1:ncol(f)){
idx = findpeaks(f[, i])[,2]
df2 = gradient(gradient(f[,i], diff_t), diff_t)
tau = -df2 / max(-df2)
tau[tau < 0] = 0
taus = c(taus, c(tau[idx]))
}
th = stats::quantile(taus, pt)
obj <- getPPDinfo(time, fns, lam_vec, th)
persistent_peak_labels <- getPersistentPeaks(t(obj$IndicatorMatrix))
# choose optimal lambda
ref_row = rep(0, ncol(obj$IndicatorMatrix))
ref_row[persistent_peak_labels] <- 1
n_lams = nrow(obj$IndicatorMatrix)
hamming_distances = rep(0, n_lams)
exact_match_indices = c()
for (i in 1:n_lams){
comp = !is.nan(obj$IndicatorMatrix[i,])
# check for exact match
if (identical(comp, ref_row)){
exact_match_indices = c(exact_match_indices, i)
}
# calculate Hamming distance
hamming_distances[i] = sum(comp != ref_row)
}
if (length(exact_match_indices) > 0){
idx_opt = min(exact_match_indices)
} else {
min_distance_indices = which.min(hamming_distances)
idx_opt = min(min_distance_indices)
}
# draw PPD barchart
drawPPDBarChart(obj$IndicatorMatrix, obj$Heights, lam_vec, idx_opt)
# draw PPD surface
drawPPDSurface(time, lam_vec, obj$FNm, obj$Heights, obj$Locs, obj$IndicatorMatrix, obj$Labels, idx_opt)
return(lam_vec[idx_opt])
}
getPPDinfo <- function(t, Fa, lam, th){
n_lams = length(lam)
# compute the mean of each function in FN over its rows
FNm = matrix(0, nrow=length(t), ncol=n_lams)
for (i in 1:n_lams){
FNm[,i] = rowMeans(Fa[[i]])
}
# find indices of lal maxima in the first function's mean
idxMaxFirst = findpeaks(FNm[,1])[,2]
# Initialize Labels and Locations for the first function
Labels = list()
Locs = list()
Labels[[1]] = seq(1, length(idxMaxFirst))
Locs[[1]] = idxMaxFirst
# Initialize the maximum label number
labelMax = max(Labels[[1]])
# process each function to assign labels and locate peaks
for (i in 1:(n_lams-1)){
currentLabel = Labels[[i]]
obj1 = peak_successor(Fa[[i]], Fa[[i+1]], currentLabel, labelMax)
Labels[[i+1]] <- obj1$Labels
labelMax = obj1$labelMax
# find peak location is the next function's mean
idxMaxNext = findpeaks(FNm[, i+1])[,2]
Locs[[i+1]] <- idxMaxNext
}
# preprocess data to compute indicatorMatrix
obj2 = PreprocessingForPPD(t, lam , Labels, Locs, labelMax, FNm, th)
if (all(is.nan(obj2$Heights2))){
warning('All peaks are ignored. A smaller threshold is required.')
}
obj = list()
obj$IndicatorMatrix = obj2$IndicatorMatrix
obj$Curvatures = obj2$Curvatures
obj$Heights = obj2$Heights
obj$Locs = Locs
obj$Labels = Labels
obj$FNm = FNm
return(obj)
}
peak_successor <- function(f1, f2, labels1, labelMax){
# combine f1 and f2 into a 3D array and compute the mean across the second dimension
F <- array(c(f1, f2), dim = c(nrow(f1), ncol(f1), 2))
fm <- apply(F, c(1, 3), mean)
# compute peak ranges and labels for fm[,1]
obj1 = computePeakRanges(fm[,1])
ranges = obj1$ranges
idx_max1 = obj1$idx_max
# compute indices of local maxima in fm[,2]
idx_max2 = findpeaks(fm[,2])[,2]
if (length(idx_max1) == 0){
labels2 = labelMax + seq(1,length(idx_max2))
} else {
# assign labels to peaks in fm[,2] based on matching ranges in fm[,1]
labels2 = assignLabelsToPeaks(idx_max2, ranges, idx_max1, labels1)
# ensure no overlapping labels in labels2
labels2 = resolveOverlappingLabels(labels2, idx_max1, idx_max2, labels1)
# assign new labels to unmatched peaks in fm[, 2]
unmatched = labels2 == 0
labels2[unmatched] = labelMax + seq(1,sum(unmatched))
labelMax = labelMax + sum(unmatched)
}
obj = list()
obj$Labels = labels2
obj$labelMax = labelMax
return(obj)
}
computePeakRanges <- function(data){
# computes peak ranges defined by adjacent minima in the data
idx_max = findpeaks(data)[,2]
if (length(idx_max) == 0){
warning("No peaks found in f1")
obj = list()
obj$ranges = c()
obj$idx_max = c()
return(obj)
}
idx_min = findpeaks(-1*data)[,2]
idx_min = c(1, length(data), idx_min)
idx_min = sort(unique(idx_min))
ranges = matrix(0, nrow=length(idx_max), ncol=2)
for (i in 1:length(idx_max)){
ranges[i,1] = max(idx_min[idx_min<idx_max[i]])
ranges[i,2] = min(idx_min[idx_min>idx_max[i]])
}
# remove degenerate ranges
idx = which(ranges[,1] == ranges[,2])
if (length(idx) > 0){
ranges = ranges[-idx,]
}
obj = list()
obj$ranges = ranges
obj$idx_max = idx_max
return(obj)
}
assignLabelsToPeaks <- function(idx_max2, ranges, idx_max1, labels1){
# assigns labels to peaks in idx_max2 based on matching ranges in idx_max1
labels = rep(0, length(idx_max2))
for (i in 1:length(idx_max2)){
# find the range in fm[,1] that contains the current peak in fm[,2]
in_range = idx_max2[i] >= ranges[,1] & idx_max2[i] <= ranges[,2]
matching_ranges = which(in_range)
if (length(matching_ranges) > 0){
# choose the cloest peak if multiple ranges match
if (length(matching_ranges) > 1){
closest_idx = which.min(abs(idx_max1[matching_ranges] - idx_max2[i]))
matching_range = matching_ranges[closest_idx]
} else {
matching_range = matching_ranges
}
labels[i] = labels1[matching_range]
}
}
return(labels)
}
resolveOverlappingLabels <- function(labels, idx_max1, idx_max2, labels1){
# ensures no overlapping labels in the assigned labels
unique_labels = unique(labels[labels > 0])
for (label in unique_labels){
duplicates = which(labels==label)
if (length(duplicates) > 1){
# keep the cloest peak and reset others
distances = abs(idx_max1[labels==label] - idx_max2[duplicates])
min_idx = which.min(distances)
duplicates = duplicates[-min_idx]
labels[duplicates] = 0
}
}
return(labels)
}
PreprocessingForPPD <- function(t, lam, Labels, Locs, labelMax, FNm, th){
K = length(lam)
# Initialize output matrices
IndicatorMatrix = matrix(NaN, nrow=K, ncol=labelMax)
curvatures = matrix(0, nrow=K, ncol=labelMax)
heights = matrix(NaN, nrow=K, ncol=labelMax)
# assume t is uniformly spaced; compute time step
dx = t[2] - t[1]
# process each function
for (i in 1:K){
# extract the function values for the current parameter lambda
fnm = FNm[, i]
# compute negative curvature (second derivative)
negCurvature = -gradient(gradient(fnm, dx), dx)
# ensure non-negative curvature values
negCurvature[negCurvature < 0] <- 0
# normalize non-negative curvature to [0,1] if possible
maxNegCurvature = max(negCurvature)
if (maxNegCurvature > 0){
negCurvature = negCurvature / maxNegCurvature
}
# retrieve peak locations and labels for the current function
locsCurrent = Locs[[i]]
labelsCurrent = Labels[[i]]
# select negative curvature values at specified peak locations
negCurvSelected = negCurvature[locsCurrent]
# update curvatures and heights matrices at the appropriate labels
curvatures[i, labelsCurrent] = negCurvSelected
heights[i, labelsCurrent] = fnm[locsCurrent]
# apply threshold to select significant peaks based on curvature
significantLabels = labelsCurrent[negCurvSelected >= th]
# update the indicator matrix for significant peaks
IndicatorMatrix[i, significantLabels] =1
}
# compute heights 2 by multiplying heights with the indicator matrix
heights2 = IndicatorMatrix * heights
obj = list()
obj$IndicatorMatrix = IndicatorMatrix
obj$curvatures = curvatures
obj$Heights = heights
obj$Heights2 = heights2
return(obj)
}
getPersistentPeaks <- function(IndicatorMatrix){
if (length(IndicatorMatrix) == 0){
Clt2 = c()
return(Clt2)
}
# count the number of ones (occurrences) for each peak (ignore NaNs)
occurrenceCounts = rowSums(IndicatorMatrix, na.rm = TRUE)
data = c(occurrenceCounts, 0)
if (all(data==0) | length(unique(occurrenceCounts)) == 1){
Clt2 = c()
return(Clt2)
}
# compute pairwise distances between observations
pairwiseDistances <- stats::dist(data)
hc <- stats::hclust(pairwiseDistances, method = "ward.D2")
clusterAssignments = hc$labels2
referenceCluster = clusterAssignments[length(clusterAssignments)]
clusterAssignments = clusterAssignments[-length(clusterAssignments)]
# identify indices where cluster assignments differ from the reference
Clt2 = which(clusterAssignments != referenceCluster)
return(Clt2)
}
drawPPDBarChart <- function(IndicatorMatrix, Heights, lam, idx_opt){
lam_diff = lam[2] - lam[1]
len_lam = nrow(IndicatorMatrix)
labelMax = ncol(IndicatorMatrix)
plot(c(lam[1], lam[length(lam)]), c(0.5, labelMax+0.5), type = "n", xlab = "lambda", ylab = "Peak Index",
main = "", yaxt='n')
for (i in 1:len_lam){
label_all_peaks = which(!is.nan(Heights[i,]))
label_persistent_peaks = which(!is.nan(IndicatorMatrix[i,]))
if (sum(label_all_peaks) == 0){
next
}
for (j in 1:length(label_all_peaks)){
x = lam[i]
y = label_all_peaks[j]
x1 = x - lam_diff/2
x2 = x + lam_diff/2
y1 = y - 0.5
y2 = y + 0.5
if (y %in% label_persistent_peaks){
graphics::rect(x1, y1, x2, y2, col="black")
} else {
graphics::rect(x1, y1, x2, y2, col="#717171")
}
}
}
for (j in 1:labelMax){
graphics::abline(h = j+0.5, col = "blue", lwd = .5)
}
graphics::abline(v = lam[idx_opt], col = "magenta", lty=2, lwd = 2)
ticks = seq(1,labelMax)
graphics::axis(side = 2, at = ticks)
}
drawPPDSurface <- function(t,lam,FNm,Heights,Locs,IndicatorMatrix,Labels,idx_opt){
n_lams = length(lam)
labelMax = ncol(IndicatorMatrix)
LocationMatrix_full = matrix(NaN, nrow=n_lams, ncol=labelMax)
for (i in 1:n_lams){
LocationMatrix_full[i, Labels[[i]]] = Locs[[i]]
}
LocationMatrix_sig = LocationMatrix_full * IndicatorMatrix
HeightMatrix_full = Heights
HeightMatrix_sig = HeightMatrix_full * IndicatorMatrix
if (requireNamespace("plot3Drgl", quietly = TRUE)){
plot3D::persp3D(
x = t,
y = lam,
z = FNm,
col = viridisLite::viridis(128),
plot = FALSE,
main = "",
xlab="t",
ylab="g_lambda",
zlab="lambda",
ticktype = "detailed",
box = FALSE
) +
plot3D::lines3D(
x = t,
y = lam[idx_opt]*rep(1,length(t)),
z = FNm[, idx_opt],
col = "magenta",
lty=2,
lwd = 3,
add = TRUE,
plot = FALSE
)
for (j in 1:labelMax){
# find non-NaN indices for full location matrix and plot
idx_full = which(!is.nan(LocationMatrix_full[,j]))
plot3D::points3D(
x = t[LocationMatrix_full[idx_full, j]],
y = lam[idx_full],
z = HeightMatrix_full[idx_full, j],
col = "black",
lty = 1,
lwd = 1.5,
add = TRUE,
plot = FALSE
)
# find non-naN indices for significant location matrix and plot
idx_sig = which(!is.nan(LocationMatrix_sig[, j]))
plot3D::points3D(
x = t[LocationMatrix_sig[,j]],
y = lam[idx_sig],
z = HeightMatrix_sig[idx_sig, j],
col = "black",
lty = 2,
lwd = 2,
add = TRUE,
plot = FALSE
)
}
plot3Drgl::plotrgl()
rgl::par3d("windowRect" = c(0, 0, 640, 640))
rgl::grid3d(c("x", "y+", "z"))
rgl::axes3d(c('x--', "y--", 'z'))
rgl::title3d(xlab = "t", ylab = "g_lambda", zlab="lamda")
} else {
graphics::image(t, lam, FNm, xlab="t", main="g_lambda",
ylab = "lambda", col = viridisLite::viridis(128))
graphics::lines(t, lam[idx_opt]*rep(1, length(t)), col="magenta", lwd=2)
for (j in 1:labelMax){
# find non-NaN indices for full location matrix and plot
idx_full = which(!is.nan(LocationMatrix_full[,j]))
graphics::points(t[LocationMatrix_full[idx_full, j]], lam[idx_full],
col="black", lwd=1.5)
# find non-naN indices for significant location matrix and plot
idx_sig = which(!is.nan(LocationMatrix_sig[, j]))
graphics::points(t[LocationMatrix_sig[,j]], lam[idx_sig], col="black",
lwd = 2)
}
}
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.