#' Compare subsets of data against others
#'
#' Divide an angular data set into subsets and compare the characteristics of each subset against the remaining angles, to test whether they might plausibly share the same distribution.
#' @param data Vector of angles.
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param include.noise Boolean indicator: should cluster 0 (noise cluster) be included in the output?
#' @return A matrix containing the size of the subset tested; the Rayleigh test score (\code{\link{rayleigh.test}}); Watson mean test score (\code{\link{watson.common.mean.test}}); Wallraff concentration test score (\code{\link{wallraff.concentration.test}});
#' Mardia-Watson-Wheeler large-sample test of common distribution (\code{\link{mww.common.dist.LS}}); and randomised Watson two-sample test of common distribution (\code{\link{same.dist.watson.rand}}).
#' @export
#' @examples
#' c.res <- compare.clusters(q, dbscan(centres, eps = 2.5, MinPts = 4)$cluster)
compare.clusters <- function(data, clusts, include.noise = F, reps = 999) {
cr <- c()
if (include.noise) {start <- 0} else {start <- 1}
for (i in start:max(clusts)) {
q4 <- data[clusts == i]
q4.x <- data[clusts != i]
qs <- list(q4, q4.x)
ql <- c(length(q4), length(q4.x))
cr <- rbind(cr, c(subset.size = length(q4),
rayleigh.unif = rayleigh.test(q4)$p.value,
rayleigh.unif.x = rayleigh.test(q4.x)$p.value,
same.mean = watson.common.mean.test(qs)$p.val,
same.conc = wallraff.concentration.test(qs)$p.val,
same.dist.mww.ls = mww.common.dist.LS(cs.unif.scores(qs), ql)$p.val,
same.dist.watson.rand = watson.two.test.rand(q4, q4.x, NR = reps, show.progress = F)$p.val))
}
rownames(cr) <- sort(unique(clusts))[(start + 1):length(unique(clusts))]
cr
}
#' Sample statistics of subsets of data
#'
#' Divide an angular data set into subsets and obtain sample statistics for each subset.
#' @param data Vector of angles.
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param include.noise Boolean indicator: should cluster 0 (noise cluster) be included in the output?
#' @return A matrix containing the index and size of the subset tested; sample statistics as generated by \code{bc.sample.statistics}; von Mises and Jones-Pewsey maximum-likelihood estimators; AIC and BIC scores for ML von Mises and Jones-Pewsey distributions.
#' @export
#' @examples
#' c.stats <- cluster.sample.stats(q, dbscan(centres, eps = 2.5, MinPts = 4)$cluster)
cluster.sample.stats <- function(data, clusts, include.noise = F) {
cr <- c()
if (include.noise) {start <- 0} else {start <- 1}
for (i in start:max(clusts)) {
q4 <- data[clusts == i]
q4.x <- data[clusts != i]
bc <- bc.sample.statistics(q4)
bc.x <- bc.sample.statistics(q4.x)
if (length(q4) < 5) {
s <- c(id = i, size = length(q4), theta.bar = bc$mu, mean.res = bc$rho, bc.kappa = A1inv(bc$rho), kurtosis = bc$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA, vm.AIC = NA, jp.AIC = NA, vm.BIC = NA, jp.BIC = NA)
sx <- c(id = -i, size = length(q4.x), theta.bar = bc.x$mu, mean.res = bc.x$rho, bc.kappa = A1inv(bc.x$rho), kurtosis = bc.x$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA, vm.AIC = NA, jp.AIC = NA, vm.BIC = NA, jp.BIC = NA)
} else {
vm <- mle.vonmises(q4)
mle <- JP.mle(q4)
vm.x <- mle.vonmises(q4.x)
mle.x <- JP.mle(q4.x)
comp <- JP.psi.info(q4)$comparison
comp.x <- JP.psi.info(q4.x)$comparison
s <- c(id = i, size = length(q4), theta.bar = bc$mu, mean.res = bc$rho, bc.kappa = A1inv(bc$rho), kurtosis = bc$alpha2, vm.mu = vm$mu %% (pi*2), vm.kappa = vm$kappa, jp.mu = mle$mu, jp.kappa = mle$kappa, jp.psi = mle$psi, vm.AIC = comp[4,1], jp.AIC = comp[4,2], vm.BIC = comp[5,1], jp.BIC = comp[5,2])
sx <- c(id = -i, size = length(q4.x), theta.bar = bc.x$mu, mean.res = bc.x$rho, bc.kappa = A1inv(bc.x$rho), kurtosis = bc.x$alpha2, vm.mu = vm.x$mu %% (pi*2), vm.kappa = vm.x$kappa, jp.mu = mle.x$mu, jp.kappa = mle.x$kappa, jp.psi = mle.x$psi, vm.AIC = comp.x[4,1], jp.AIC = comp.x[4,2], vm.BIC = comp.x[5,1], jp.BIC = comp.x[5,2])
}
cr <- rbind(cr, s, sx)
}
rownames(cr) <- cr[,1]
cr[,-1]
}
#' Sample statistics of subsets of data, per quadrant.
#'
#' Divide an angular data set into subsets and compare the characteristics of each subset against every other subset, to test whether they might plausibly share the same distribution.
#' @param data Vector of angles.
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param include.noise Boolean indicator: should cluster 0 (noise cluster) be included in the output?
#' @return A matrix containing the size of the subset tested; the Rayleigh test score (\code{\link{rayleigh.test}}); Watson mean test score (\code{\link{watson.common.mean.test}}); Wallraff concentration test score (\code{\link{wallraff.concentration.test}});
#' Mardia-Watson-Wheeler large-sample test of common distribution (\code{\link{mww.common.dist.LS}}); and randomised Watson two-sample test of common distribution (\code{\link{same.dist.watson.rand}}).
#' @export
#' @examples
#' c.stats <- cluster.quad.tests(q, dbscan(centres, eps = 2.5, MinPts = 4)$cluster)
cluster.quad.tests <- function(data, clusts, include.noise = F, reps = 999) {
cr <- c()
rn <- c()
qc <- c()
if (include.noise) {start <- 0} else {start <- 1}
# quadrants are based on modal angle from full data set
mx <- circular(as.numeric(names(which.max(table(round(data, 1))))))
cutpoints <- sort(circular(mx + c(pi/4, 3*pi/4, 5*pi/4, 7*pi/4)) %% (2*pi))
quadrant <- rep(0, length(data))
quadrant[data > cutpoints[1] & data < cutpoints[2]] <- 1
quadrant[data > cutpoints[3] & data < cutpoints[4]] <- 1
q.4 <- (4*data) %% (2*pi)
for (i in start:max(clusts)) {
q.4.a <- q.4[(quadrant == 0) & (clusts == i)]
q.4.b <- q.4[(quadrant == 1) & (clusts == i)]
bc.a <- bc.sample.statistics(q.4.a, symmetric = F)
if (length(q.4.a) < 5) {
sa <- c(size = length(q.4.a), theta.bar = bc.a$mu, mean.res = bc.a$rho, bc.kappa = A1inv(bc.a$rho), kurtosis = bc.a$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA)
} else {
# get bias-corrected & ML point estimates
vm <- mle.vonmises(q.4.a, bias = T)
vm$mu <- vm$mu %% (2*pi)
jp <- JP.mle(q.4.a)
sa <- c(size = length(q.4.a), theta.bar = bc.a$mu, mean.res = bc.a$rho, bc.kappa = A1inv(bc.a$rho), kurtosis = bc.a$alpha2, vm.mu = vm$mu %% (pi*2), vm.kappa = vm$kappa, jp.mu = jp$mu, jp.kappa = jp$kappa, jp.psi = jp$psi)
}
bc.b <- bc.sample.statistics(q.4.b, symmetric = F)
if (length(q.4.b) < 5) {
sb <- c(size = length(q.4.b), theta.bar = bc.b$mu, mean.res = bc.b$rho, bc.b.kappa = A1inv(bc.b$rho), kurtosis = bc.b$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA)
} else {
# get bias-corrected & ML point estimates
vm <- mle.vonmises(q.4.b, bias = T)
vm$mu <- vm$mu %% (2*pi)
jp <- JP.mle(q.4.b)
sb <- c(size = length(q.4.b), theta.bar = bc.b$mu, mean.res = bc.b$rho, bc.b.kappa = A1inv(bc.b$rho), kurtosis = bc.b$alpha2, vm.mu = vm$mu %% (pi*2), vm.kappa = vm$kappa, jp.mu = jp$mu, jp.kappa = jp$kappa, jp.psi = jp$psi)
}
if (length(q.4.a) < 5 | length(q.4.b) < 5) {
qc.n <- c(rayleigh.unif.a = rayleigh.test(q.4.a)$p.value,
rayleigh.unif.b = rayleigh.test(q.4.b)$p.value,
same.mean = NA, same.conc = NA, same.dist.mww.ls = NA, same.dist.watson.rand = NA)
} else {
qs <- list(q.4.a, q.4.b)
ql <- c(length(q.4.a), length(q.4.b))
qc.n <- c(rayleigh.unif.a = rayleigh.test(q.4.a)$p.value,
rayleigh.unif.b = rayleigh.test(q.4.b)$p.value,
same.mean = watson.common.mean.test(qs)$p.val,
same.conc = wallraff.concentration.test(qs)$p.val,
same.dist.mww.ls = mww.common.dist.LS(cs.unif.scores(qs), ql)$p.val,
same.dist.watson.rand = watson.two.test.rand(q.4.a, q.4.b, NR = reps, show.progress = F)$p.val)
}
cr <- rbind(cr, sa, sb)
rn <- rbind(rn, paste(i, "a", sep = ""), paste(i, "b", sep = ""))
qc <- rbind(qc, qc.n)
}
rownames(cr) <- rn
rownames(qc) <- sort(unique(clusts))[(start + 1):length(unique(clusts))]
list(stats = cr, comparison = qc)
}
#' Compare all clusters for similar distribution
#'
#' Divide an angular data set into subsets, then subdivide those into perpendicular quadrants and obtain sample statistics for each subset.
#' @param data Vector of raw angles (not quartered/wrapped angles).
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param reps Number of randomisation tests to apply
#' @return A three-dimensional array containing p-values for tests of similar mean, concentration and distribution.
#' @export
#' @examples
#' db <- dbscan(centres, eps = 2.5, MinPts = 4)$cluster
#' res <- compare.cluster.dists(q.4, db, reps = 99)
compare.cluster.dists <- function(data, clusts, reps = 999, show.progress = F) {
n <- length(unique(clusts))
res <- array(NA, dim = c(n,n,4), dimnames = list(c(unique(clusts)), c(unique(clusts)), c("mean", "conc", "dist.mww.LS", "dist.wats.rand")))
if (show.progress) {pb <- txtProgressBar(min = 0, max = n, style = 3)}
for (i in 1:max(clusts)-1) {
for (j in (i+1):max(clusts)) {
qi <- data[clusts == i]
qj <- data[clusts == j]
qs <- list(qi, qj)
ql <- c(length(qi), length(qj))
m <- watson.common.mean.test(qs)$p.val
c <- wallraff.concentration.test(qs)$p.val
d.mww <- mww.common.dist.LS(cs.unif.scores(qs), ql)$p.val
d.wats.r <- watson.two.test.rand(qi, qj, NR = reps, show.progress = F)$p.val
res[i+1, j+1, 1] <- m
res[j+1, i+1, 1] <- m
res[i+1, j+1, 2] <- c
res[j+1, i+1, 2] <- c
res[i+1, j+1, 3] <- d.mww
res[j+1, i+1, 3] <- d.mww
res[i+1, j+1, 4] <- d.wats.r
res[j+1, i+1, 4] <- d.wats.r
if (show.progress) {setTxtProgressBar(pb, j)}
}
}
res
}
#' Plot cluster-by-cluster heatmap of p-values
#'
#' For a matrix of p-values, as generated by \code{\link{compare.cluster.dists}}, plot a heatmap highlighting p-values less than 0.05 or less than 0.1.
#' @param m1 Matrix of p-values to be plotted.
#' @param m2 Optional: Second matrix of p-values. If supplied, the lower triangular matrix of \code{m1} is overwritten with that of \code{m2} and the two are plotted together.
#' @export
#' @examples
#' cluster.pval.heatmap(res[,,1], res[,,2])
cluster.pval.heatmap <- function(m1, m2) {
if (!missing(m2)) {m1[lower.tri(m1)] <- m2[lower.tri(m2)]}
if (is.null(rownames(m1))) {rownames(m1) <- c(1:ncol(m1))}
if (is.null(colnames(m1))) {colnames(m1) <- c(1:nrow(m1))}
image(c(as.numeric(rownames(m1))), c(as.numeric(colnames(m1))), m1, col = c("orangered", "gold", "khaki1"), breaks = c(0, 0.05, 0.1, 1), xlab = "", ylab = "")
for (x in 1:ncol(m1)) {
xx <- colnames(m1)[x]
abline(h = x - 0.5)
for (y in 1:nrow(m1)) {
yy <- as.numeric(colnames(m1)[y])
text(xx, yy, round(m1[x,y],2), cex = 0.8)
abline(v = y - 0.5)
}
}
}
#' Expectation-maximization algorithm for mixture of von Mises distributions
#'
#' For a vector of angles, will return the parameters of a mixture of von Mises distributions, using an EM algorithm.
#' @param x Vector of angles to be fitted
#' @param k Number of von Mises components to be fitted
#' @param max.runs Maximum number of iterations to attempt. Default is 1000.
#' @param conv Maximum difference in log-likelihood between successive iterations before convergence is considered to have occurred. Default is 0.00001.
#' @return List containing k, with k estimates of mu, kappa, alpha (proportion of population belonging to each component), log-likelihood found, number of iterations required, and first 10 iterations. Can be passed to \code{\link{plot.EM.vonmises}} to be displayed graphically.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
EM.vonmises <- function(x, k, max.runs = 1000, conv = 0.00001) {
x <- circular(x)
# provide starting values for mu, kappa, alpha
mu <- circular(runif(k, 0, max(x)))
kappa <- runif(k,0,1)
alpha <- runif(k,0,1)
alpha <- alpha/sum(alpha) # normalise to sum to 1
# Support function - calculate log-likelihood
log.likelihood <- function(x, mu, kappa, alpha, k) {
l <- matrix(nrow = k, ncol = length(x))
for (i in 1:k) {
l[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
}
sum(log(colSums(l)))
}
log.lh <- log.likelihood(x, mu, kappa, alpha, k)
new.log.lh <- abs(log.lh) + 100
n = 0
# create array to store initial values
first.10 <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
log.lh = log.lh)
while ((abs(log.lh - new.log.lh) > conv) && (n < max.runs)) {
# Estimation - calculate z_ij
z <- matrix(0, ncol = length(x), nrow = k)
for (i in 1:k){
z[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
}
all.z <- colSums(z)
for (i in 1:k){
z[i,] <- z[i,] / all.z
}
# Maximisation - update parameters
for (i in 1:k) {
alpha[i] <- sum(z[i,]) / length (x)
mu[i] <- atan2(sum(z[i,] * sin(x)), sum(z[i,] * cos(x)))
kappa[i] <- A1inv(sum(z[i,] * (cos(x - mu[i]))) / sum(z[i,]))
# correct for negative kappa if necessary
if (kappa[i] < 0) {
kappa[i] <- abs(kappa[i])
mu[i] <- mu[i] + pi
}
}
# calculate log-likelihoods for comparison
log.lh <- new.log.lh
new.log.lh <- log.likelihood(x, mu, kappa, alpha, k)
n <- n + 1
# save first 10 iterations
if (n < 11) {
next.iter <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
log.lh = new.log.lh)
first.10 <- rbind(first.10, next.iter)
}
}
# Output: if model hasn't converged, show error message
# if it has, output the parameters & first 10 iterations
if ((abs(log.lh - new.log.lh) > conv)) {
cat ("Data hasn't converged after", n, "iterations; \n",
"Difference in log-likelihoods is",
round(abs(log.lh - new.log.lh),6))
} else {
row.names(first.10) <- first.10[,1]
list(k = k, mu = mu %% (2*pi), kappa = kappa, alpha = alpha,
log.lh = new.log.lh)
}
}
#' Classify points using EM mixture model
#'
#' Winner-takes-all classification of points based on a von Mises mixture model obtained using \code{\link{EM.vonmises}}.
#' @param data Set of angular data
#' @param model Fitted mixture model from \code{\link{EM.vonmises}}.
#' @return Vector of cluster numbers
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
#' cl <- EM.clusters(ex1, em1)
#' plot(ex1[cl == 1], stack = T)
#' points(ex1[cl == 2], col = "blue", stack = T)
EM.clusters <- function(data, model) {
components <- matrix(nrow = model$k, ncol = length(data))
for (i in 1:model$k) {
components[i, ] <- dvonmises(data, circular(model$mu[i]), model$kappa[i])
}
apply(components, 2, which.max)
}
#' Plot mixture von Mises model found by EM algorithm.
#'
#' Plot original data with individual components and fitted mixture model.
#' @param varToPlot Set of angular data
#' @param modelToPlot Fitted mixture model from \code{\link{EM.vonmises}}.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
#' plot.EM.vonmises(ex1, em1)
plot.EM.vonmises <- function(varToPlot, modelToPlot, h.breaks = 20) {
varToPlot = matrix(varToPlot) # convert from circular data
# get density of each component
x <- circular(seq(0, 2*pi, 0.01))
components <- matrix(nrow = modelToPlot$k, ncol = length(x))
for (i in 1:modelToPlot$k) {
components[i,] <- dvonmises(x, circular(modelToPlot$mu[i]), modelToPlot$kappa[i]) * modelToPlot$alpha[i]
}
mixt <- colSums(components)
# plot original data
y.max <- max(c(mixt, hist(varToPlot, plot = F, breaks = h.breaks)$density)) * 1.1 # rescale y axis
labl <- paste("Mixture of", modelToPlot$k,"von Mises")
hist(varToPlot, freq = F, ylim = c(0, y.max), main = "", col = "lightgrey", xlab = labl, xlim = c(0, 2*pi), breaks = h.breaks)
# plot vM components
for (i in 1:modelToPlot$k) {
lines(matrix(x), components[i,], col = i+1, lwd = 2)
}
# add mixture model
lines(matrix(x), mixt, lwd = 2)
}
#' Expectation-maximization algorithm for mixture of uniform and von Mises distributions
#'
#' For a vector of angles, will return the parameters of a mixture of von Mises distributions with one component constrained to be a continuous circular uniform distribution, using an EM algorithm.
#' @param x Vector of angles to be fitted
#' @param k Number of von Mises components to be fitted
#' @param max.runs Maximum number of iterations to attempt. Default is 1000.
#' @param conv Maximum difference in log-likelihood between successive iterations before convergence is considered to have occurred. Default is 0.00001.
#' @return List containing k, with k estimates of mu, kappa, alpha (proportion of population belonging to each component), log-likelihood found, number of iterations required, and first 10 iterations. Can be passed to \code{\link{plot.EM.vonmises}} to be displayed graphically.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
EM.u.vonmises <- function(x, k, max.runs = 1000, conv = 0.00001) {
# E-M algorithm with one component fixed as uniform
x <- circular(x)
# provide starting values for mu, kappa, alpha
mu <- circular(runif(k, 0, max(x)))
kappa <- c(0, runif(k-1,0,1))
alpha <- runif(k,0,1)
alpha <- alpha/sum(alpha) # normalise to sum to 1
# Support function - calculate log-likelihood
log.likelihood <- function(x, mu, kappa, alpha, k) {
l <- matrix(nrow = k, ncol = length(x))
for (i in 1:k) {
l[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
}
sum(log(colSums(l)))
}
log.lh <- log.likelihood(x, mu, kappa, alpha, k)
new.log.lh <- abs(log.lh) + 100
n = 0
# create array to store initial values
first.10 <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
log.lh = log.lh)
while ((abs(log.lh - new.log.lh) > conv) && (n < max.runs)) {
# Estimation - calculate z_ij
z <- matrix(0, ncol = length(x), nrow = k)
for (i in 1:k){
z[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
}
all.z <- colSums(z)
for (i in 1:k){
z[i,] <- z[i,] / all.z
}
# Maximisation - update parameters
for (i in 1:k) {
alpha[i] <- sum(z[i,]) / length (x)
mu[i] <- atan2(sum(z[i,] * sin(x)), sum(z[i,] * cos(x)))
kappa[i] <- A1inv(sum(z[i,] * (cos(x - mu[i]))) / sum(z[i,]))
# correct for negative kappa if necessary
if (kappa[i] < 0) {
kappa[i] <- abs(kappa[i])
mu[i] <- mu[i] + pi
}
}
# Sort parameters by kappa for identifiability
alpha <- alpha[order(kappa)]
mu <- mu[order(kappa)]
z <- z[order(kappa),]
kappa <- kappa[order(kappa)]
# fix smallest kappa as 0 (uniform)
kappa[1] <- 0
# calculate log-likelihoods for comparison
log.lh <- new.log.lh
new.log.lh <- log.likelihood(x, mu, kappa, alpha, k)
n <- n + 1
# save first 10 iterations
if (n < 11) {
next.iter <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
log.lh = new.log.lh)
first.10 <- rbind(first.10, next.iter)
}
}
# Output: if model hasn't converged, show error message
# if it has, output the parameters & first 10 iterations
if ((abs(log.lh - new.log.lh) > conv)) {
cat ("Data hasn't converged after", n, "iterations; \n",
"Difference in log-likelihoods is",
round(abs(log.lh - new.log.lh),6))
} else {
row.names(first.10) <- first.10[,1]
list(k = k, mu = mu %% (2*pi), kappa = kappa, alpha = alpha,
log.lh = new.log.lh)
}
}
#' Mixture von Mises P-P plot
#'
#' Produces a P-P plot of the data against a specified mixture of von Mises distribution, to graphically assess the goodness of fit of the model.
#' @param data Vector of angles (in radians) to be fitted against the von Mises distribution.
#' @param mu1 Mean direction parameter for first von Mises component
#' @param kappa1 Concentration parameter for first von Mises component Must be between 0 and 1.
#' @param mu2 Mean direction parameter for second von Mises component
#' @param kappa2 Concentration parameter for second von Mises component Must be between 0 and 1.
#' @param prop Proportion of distribution assigned to first von Mises component. Must be between 0 and 1.
#' @return Vector of residuals.
#' @export
#' @examples
#' r.mvm <- rmixedvonmises(200, circular(pi), circular(0), kap, 0, prop)
#' m.pp.res <- mvM.PP(r.mvm, circular(pi), kap, circular(0),0, prop)
#' mean(m.pp.res); sd(m.pp.res)
mvM.PP <- function(data, mu1, kappa1, mu2, kappa2, prop) {
edf <- ecdf(data)
tdf <- pmixedvonmises(data, mu1, mu2, kappa1, kappa2, prop, from = circular(0), tol = 1e-06)
plot.default(tdf, edf(data), pch = 20, xlim = c(0, 1), ylim = c(0, 1), xlab = "mixture von Mises distribution function", ylab = "Empirical distribution function")
lines(c(0, 1), c(0, 1), lwd = 2, col = "lightseagreen")
edf(data) - tdf
}
#' Estimate stabilising parameter p for mean-shift clustering of circular data
#'
#' Automation of graphical method of estimating stabilising parameter to be used in mean-shift clustering algorithms. Calculates correlation of kernel density estimate for successive values of p, selecting as the optimum the maximum value of p that is less than 1 when rounded to a certain degree of precision.
#' @param data Set of angular data to be clustered.
#' @param dp How many decimal places of convergence is required before p is accepted?
#' @param plot Boolean: plot the correlation curve or not? Default is T.
#' @return Suggested value of p to be used in mean-shift clustering functions such as \code{\link{MSclust.NB}}
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' p <- MSclust.p.est(ex1, dp = 4)
MSclust.p.est <- function(data, dp = 4, plot = T) {
MSBC.kde <- function(theta, data, p, beta) {
b <- (1 - cos(theta - data))
kf <- (1 - (b/beta))^p
kf[b > beta] <- 0
sum(kf)
}
# get kde for all data points
beta <- sd.circular(data)
kde <- list()
for (j in 1:50) {
kde[[j]] <- sapply(data, FUN = MSBC.kde, data = data, p = j, beta = beta)
}
p.corr <- c()
for (j in 1:49) {
p.corr[j] <- cor(kde[[j]], kde[[j+1]])
}
p <- length(p.corr[round(p.corr, dp) < 1])
plot(c(1:49), p.corr, pch = 20, type = "o", xlab = "p", ylab = "Correlation between kde with p & p+1")
abline(h = 1, col = "red")
abline(v = p, col = "red")
as.numeric(p)
}
#' Mean-shift clustering of circular data: non-blurring mean shift
#'
#' Iterative function to cluster angular data about an unspecified number of modes.
#' @param data Set of angular data to be clustered.
#' @param p Stabilising parameter p, estimated using \code{\link{MSclust.p.est}}.
#' @param conv.lim Required degree of convergence: when the degree of correlation between successive iterations is closer to 1 than this, the data is deemed to have converged. Default is 0.00001.
#' @return Matrix containing all iterations of the data set.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' p <- MSclust.p.est(ex1, dp = 4)
#' nb <- MSclust.NB(ex1, p = 7, conv.lim = 0.000001)
MSclust.NB <- function(data, p, conv.lim = 0.00001, max.runs = 20) {
n <- length(data)
beta <- sd.circular(data)
# set all data ponts as initial values of cluster centres
vi <- matrix(data, nrow = 1)
conv <- c(0)
t <- 1
while (conv[t] < (1-conv.lim) & t < max.runs) {
vi.new <- c()
for (i in 1:n) {
kd <- apply(cbind(beta - (1 - cos(vi[t,i]-data)),0),1,max)^(p-1)
upper <- kd * sin(data) / n
lower <- kd * cos(data) / n
vi.new[i] <- atan2(sum(upper), sum(lower)) %% (2*pi)
}
vi <- rbind(vi, t = vi.new)
conv[t+1] <- cor(vi[t,], vi[t+1,])
t <- t+1
}
vi
}
#' Summarise clusters from a mean-shift clustering
#'
#' Uses hierarchical clustering algorithm to extract mean, mean resultant length, and estimated kappa for each cluster.
#' @param clusts Vector or matrix of clustered values, as produced by \code{\link{MSclust.NB}}.
#' @return List containing a vector of cluster numbers and a matrix containing estimated mu, rho and kappa for each cluster.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#' rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' p <- MSclust.p.est(ex1, dp = 4)
#' nb <- MSclust.NB(ex1, p = 7, conv.lim = 0.000001)
#' res <- MSclust.summ(nb)
MSclust.summ <- function(clusts, minPts = 5) {
final <- clusts[nrow(clusts),]
hcl <- hclust(dist(final), method = "single")
clusters <- as.data.frame(cbind(cluster = cutree(hcl, h = mean(hcl$height)),
org = clusts[1,], mode = final))
cts <- count(clusters$cluster)
res <- ddply(clusters, .(cluster), summarize,
mu = mean.circular(circular(mode)) %% (2*pi),
rho = rho.circular(circular(org)))
res <- cbind(res, kappa = A1inv(res[,3]), n.points = count(clusters$cluster)$freq)
list(summ = res, clusters = clusters$cluster)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.