AS.circular: ArchStats: circular functions

Documented in cluster.pval.heatmap cluster.quad.tests cluster.sample.stats compare.cluster.dists compare.clusters EM.clusters EM.u.vonmises EM.vonmises MSclust.NB MSclust.p.est MSclust.summ mvM.PP plot.EM.vonmises

#' Compare subsets of data against others
#'
#' Divide an angular data set into subsets and compare the characteristics of each subset against the remaining angles, to test whether they might plausibly share the same distribution. 
#' @param data Vector of angles.
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param include.noise Boolean indicator: should cluster 0 (noise cluster) be included in the output?
#' @return A matrix containing the size of the subset tested; the Rayleigh test score (\code{\link{rayleigh.test}}); Watson mean test score (\code{\link{watson.common.mean.test}}); Wallraff concentration test score (\code{\link{wallraff.concentration.test}});
#' Mardia-Watson-Wheeler large-sample test of common distribution (\code{\link{mww.common.dist.LS}}); and randomised Watson two-sample test of common distribution (\code{\link{same.dist.watson.rand}}).
#' @export
#' @examples
#' c.res <- compare.clusters(q, dbscan(centres, eps = 2.5, MinPts = 4)$cluster)
compare.clusters <- function(data, clusts, include.noise = F, reps = 999) {
    cr <- c()
    if (include.noise) {start <- 0} else {start <- 1}
    
    for (i in start:max(clusts)) {
        q4 <- data[clusts == i]
        q4.x <- data[clusts != i]
        qs <- list(q4, q4.x)
        ql <- c(length(q4), length(q4.x))
        
        cr <- rbind(cr, c(subset.size = length(q4),
                          rayleigh.unif = rayleigh.test(q4)$p.value,
                          rayleigh.unif.x = rayleigh.test(q4.x)$p.value,
                          same.mean = watson.common.mean.test(qs)$p.val,
                          same.conc = wallraff.concentration.test(qs)$p.val,
                          same.dist.mww.ls = mww.common.dist.LS(cs.unif.scores(qs), ql)$p.val,
                          same.dist.watson.rand = watson.two.test.rand(q4, q4.x, NR = reps, show.progress = F)$p.val))
    }
    rownames(cr) <- sort(unique(clusts))[(start + 1):length(unique(clusts))]
    cr
}


#' Sample statistics of subsets of data
#'
#' Divide an angular data set into subsets and obtain sample statistics for each subset. 
#' @param data Vector of angles.
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param include.noise Boolean indicator: should cluster 0 (noise cluster) be included in the output?
#' @return A matrix containing the index and size of the subset tested; sample statistics as generated by \code{bc.sample.statistics}; von Mises and Jones-Pewsey maximum-likelihood estimators; AIC and BIC scores for ML von Mises and Jones-Pewsey distributions.
#' @export
#' @examples
#' c.stats <- cluster.sample.stats(q, dbscan(centres, eps = 2.5, MinPts = 4)$cluster)
cluster.sample.stats <- function(data, clusts, include.noise = F) {
    
    cr <- c()
    if (include.noise) {start <- 0} else {start <- 1}
    
    for (i in start:max(clusts)) {
        q4 <- data[clusts == i]
        q4.x <- data[clusts != i]
        
        bc <- bc.sample.statistics(q4)
        bc.x <- bc.sample.statistics(q4.x)
        
        if (length(q4) < 5) {
            s <- c(id = i, size = length(q4), theta.bar = bc$mu, mean.res = bc$rho, bc.kappa = A1inv(bc$rho), kurtosis = bc$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA, vm.AIC = NA, jp.AIC = NA, vm.BIC = NA, jp.BIC = NA)
            sx <- c(id = -i, size = length(q4.x), theta.bar = bc.x$mu, mean.res = bc.x$rho, bc.kappa = A1inv(bc.x$rho), kurtosis = bc.x$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA, vm.AIC = NA, jp.AIC = NA, vm.BIC = NA, jp.BIC = NA)
        } else {
            vm <- mle.vonmises(q4)
            mle <- JP.mle(q4)
            
            vm.x <- mle.vonmises(q4.x)
            mle.x <- JP.mle(q4.x)
            
            comp <- JP.psi.info(q4)$comparison
            comp.x <- JP.psi.info(q4.x)$comparison
            
            s <- c(id = i, size = length(q4), theta.bar = bc$mu, mean.res = bc$rho, bc.kappa = A1inv(bc$rho), kurtosis = bc$alpha2, vm.mu = vm$mu %% (pi*2), vm.kappa = vm$kappa, jp.mu = mle$mu, jp.kappa = mle$kappa, jp.psi = mle$psi, vm.AIC = comp[4,1], jp.AIC = comp[4,2], vm.BIC = comp[5,1], jp.BIC = comp[5,2])
            sx <- c(id = -i, size = length(q4.x), theta.bar = bc.x$mu, mean.res = bc.x$rho, bc.kappa = A1inv(bc.x$rho), kurtosis = bc.x$alpha2, vm.mu = vm.x$mu %% (pi*2), vm.kappa = vm.x$kappa, jp.mu = mle.x$mu, jp.kappa = mle.x$kappa, jp.psi = mle.x$psi, vm.AIC = comp.x[4,1], jp.AIC = comp.x[4,2], vm.BIC = comp.x[5,1], jp.BIC = comp.x[5,2])
        }
        cr <- rbind(cr, s, sx)
    }
    rownames(cr) <- cr[,1]
    cr[,-1]
}


#' Sample statistics of subsets of data, per quadrant.
#'
#' Divide an angular data set into subsets and compare the characteristics of each subset against every other subset, to test whether they might plausibly share the same distribution. 
#' @param data Vector of angles.
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param include.noise Boolean indicator: should cluster 0 (noise cluster) be included in the output?
#' @return A matrix containing the size of the subset tested; the Rayleigh test score (\code{\link{rayleigh.test}}); Watson mean test score (\code{\link{watson.common.mean.test}}); Wallraff concentration test score (\code{\link{wallraff.concentration.test}});
#' Mardia-Watson-Wheeler large-sample test of common distribution (\code{\link{mww.common.dist.LS}}); and randomised Watson two-sample test of common distribution (\code{\link{same.dist.watson.rand}}).
#' @export
#' @examples
#' c.stats <- cluster.quad.tests(q, dbscan(centres, eps = 2.5, MinPts = 4)$cluster)
cluster.quad.tests <- function(data, clusts, include.noise = F, reps = 999) {
    cr <- c()
    rn <- c()
    qc <- c()
    
    if (include.noise) {start <- 0} else {start <- 1}
    
    # quadrants are based on modal angle from full data set
    mx <- circular(as.numeric(names(which.max(table(round(data, 1))))))
    cutpoints <- sort(circular(mx + c(pi/4, 3*pi/4, 5*pi/4, 7*pi/4)) %% (2*pi))
    quadrant <- rep(0, length(data))
    quadrant[data > cutpoints[1] & data < cutpoints[2]] <- 1
    quadrant[data > cutpoints[3] & data < cutpoints[4]] <- 1
    q.4 <- (4*data) %% (2*pi)
    
    for (i in start:max(clusts)) {
        q.4.a <- q.4[(quadrant == 0) & (clusts == i)]
        q.4.b <- q.4[(quadrant == 1) & (clusts == i)]
        
        bc.a <- bc.sample.statistics(q.4.a, symmetric = F)
        if (length(q.4.a) < 5) {
            sa <- c(size = length(q.4.a), theta.bar = bc.a$mu, mean.res = bc.a$rho, bc.kappa = A1inv(bc.a$rho), kurtosis = bc.a$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA)
        } else {
            # get bias-corrected & ML point estimates
            vm <-  mle.vonmises(q.4.a, bias = T)
            vm$mu <- vm$mu %% (2*pi)
            jp <- JP.mle(q.4.a)
            
            sa <- c(size = length(q.4.a), theta.bar = bc.a$mu, mean.res = bc.a$rho, bc.kappa = A1inv(bc.a$rho), kurtosis = bc.a$alpha2, vm.mu = vm$mu %% (pi*2), vm.kappa = vm$kappa, jp.mu = jp$mu, jp.kappa = jp$kappa, jp.psi = jp$psi)
        }
        
        bc.b <- bc.sample.statistics(q.4.b, symmetric = F)
        if (length(q.4.b) < 5) {
            sb <- c(size = length(q.4.b), theta.bar = bc.b$mu, mean.res = bc.b$rho, bc.b.kappa = A1inv(bc.b$rho), kurtosis = bc.b$alpha2, vm.mu = NA, vm.kappa = NA, jp.mu = NA, jp.kappa = NA, jp.psi = NA)
        } else {
            # get bias-corrected & ML point estimates
            vm <-  mle.vonmises(q.4.b, bias = T)
            vm$mu <- vm$mu %% (2*pi)
            jp <- JP.mle(q.4.b)
            
            sb <- c(size = length(q.4.b), theta.bar = bc.b$mu, mean.res = bc.b$rho, bc.b.kappa = A1inv(bc.b$rho), kurtosis = bc.b$alpha2, vm.mu = vm$mu %% (pi*2), vm.kappa = vm$kappa, jp.mu = jp$mu, jp.kappa = jp$kappa, jp.psi = jp$psi)
        }
        
        if (length(q.4.a) < 5 | length(q.4.b) < 5) {
            qc.n <- c(rayleigh.unif.a = rayleigh.test(q.4.a)$p.value,
                      rayleigh.unif.b = rayleigh.test(q.4.b)$p.value,
                      same.mean = NA, same.conc = NA, same.dist.mww.ls = NA, same.dist.watson.rand = NA)
        } else {
            qs <- list(q.4.a, q.4.b)
            ql <- c(length(q.4.a), length(q.4.b))
            qc.n <- c(rayleigh.unif.a = rayleigh.test(q.4.a)$p.value,
                      rayleigh.unif.b = rayleigh.test(q.4.b)$p.value,
                      same.mean = watson.common.mean.test(qs)$p.val,
                      same.conc = wallraff.concentration.test(qs)$p.val,
                      same.dist.mww.ls = mww.common.dist.LS(cs.unif.scores(qs), ql)$p.val,
                      same.dist.watson.rand = watson.two.test.rand(q.4.a, q.4.b, NR = reps, show.progress = F)$p.val)
        }    
        cr <- rbind(cr, sa, sb)
        rn <- rbind(rn, paste(i, "a", sep = ""), paste(i, "b", sep = ""))
        qc <- rbind(qc, qc.n)
    }
    rownames(cr) <- rn
    rownames(qc) <- sort(unique(clusts))[(start + 1):length(unique(clusts))]
    list(stats = cr, comparison = qc)
}


#' Compare all clusters for similar distribution
#'
#' Divide an angular data set into subsets, then subdivide those into perpendicular quadrants and obtain sample statistics for each subset. 
#' @param data Vector of raw angles (not quartered/wrapped angles).
#' @param clusts Vector of indices denoting the cluster to which each angle has been assigned.
#' @param reps Number of randomisation tests to apply
#' @return A three-dimensional array containing p-values for tests of similar mean, concentration and distribution.
#' @export
#' @examples
#' db <- dbscan(centres, eps = 2.5, MinPts = 4)$cluster
#' res <- compare.cluster.dists(q.4, db, reps = 99)
compare.cluster.dists <- function(data, clusts, reps = 999, show.progress = F) {
    n <- length(unique(clusts))
    res <- array(NA, dim = c(n,n,4), dimnames = list(c(unique(clusts)), c(unique(clusts)), c("mean", "conc", "dist.mww.LS", "dist.wats.rand")))
    
    if (show.progress) {pb <- txtProgressBar(min = 0, max = n, style = 3)}
    for (i in 1:max(clusts)-1) {
        for (j in (i+1):max(clusts)) {
            qi <- data[clusts == i]
            qj <- data[clusts == j]
            qs <- list(qi, qj)
            ql <- c(length(qi), length(qj))
            
            m <- watson.common.mean.test(qs)$p.val
            c <- wallraff.concentration.test(qs)$p.val
            d.mww <- mww.common.dist.LS(cs.unif.scores(qs), ql)$p.val
            d.wats.r <-  watson.two.test.rand(qi, qj, NR = reps, show.progress = F)$p.val
            
            res[i+1, j+1, 1] <- m
            res[j+1, i+1, 1] <- m
            
            res[i+1, j+1, 2] <- c
            res[j+1, i+1, 2] <- c 
            
            res[i+1, j+1, 3] <- d.mww
            res[j+1, i+1, 3] <- d.mww
            
            res[i+1, j+1, 4] <- d.wats.r
            res[j+1, i+1, 4] <- d.wats.r
            if (show.progress) {setTxtProgressBar(pb, j)}
        }
    }
    res
}


#' Plot cluster-by-cluster heatmap of p-values
#'
#' For a matrix of p-values, as generated by \code{\link{compare.cluster.dists}}, plot a heatmap highlighting p-values less than 0.05 or less than 0.1.
#' @param m1 Matrix of p-values to be plotted.
#' @param m2 Optional: Second matrix of p-values. If supplied, the lower triangular matrix of \code{m1} is overwritten with that of \code{m2} and the two are plotted together.
#' @export
#' @examples
#' cluster.pval.heatmap(res[,,1], res[,,2])
cluster.pval.heatmap <- function(m1, m2) {
    
    if (!missing(m2)) {m1[lower.tri(m1)] <- m2[lower.tri(m2)]}
    if (is.null(rownames(m1))) {rownames(m1) <- c(1:ncol(m1))}
    if (is.null(colnames(m1))) {colnames(m1) <- c(1:nrow(m1))}
    
    image(c(as.numeric(rownames(m1))), c(as.numeric(colnames(m1))), m1, col = c("orangered", "gold", "khaki1"), breaks = c(0, 0.05, 0.1, 1), xlab = "", ylab = "")
    
    for (x in 1:ncol(m1)) {
        xx <- colnames(m1)[x]
        abline(h = x - 0.5)
        for (y in 1:nrow(m1)) {
            yy <- as.numeric(colnames(m1)[y])
            text(xx, yy, round(m1[x,y],2), cex = 0.8)
            abline(v = y - 0.5)
        }
    }
}


#' Expectation-maximization algorithm for mixture of von Mises distributions
#'
#' For a vector of angles, will return the parameters of a mixture of von Mises distributions, using an EM algorithm.
#' @param x Vector of angles to be fitted
#' @param k Number of von Mises components to be fitted
#' @param max.runs Maximum number of iterations to attempt. Default is 1000.
#' @param conv Maximum difference in log-likelihood between successive iterations before convergence is considered to have occurred. Default is 0.00001.
#' @return List containing k, with k estimates of mu, kappa, alpha (proportion of population belonging to each component), log-likelihood found, number of iterations required, and first 10 iterations. Can be passed to \code{\link{plot.EM.vonmises}} to be displayed graphically.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
EM.vonmises <- function(x, k, max.runs = 1000, conv = 0.00001) {
    
    x <- circular(x)
    # provide starting values for mu, kappa, alpha
    mu <- circular(runif(k, 0, max(x)))
    kappa <- runif(k,0,1)
    alpha <- runif(k,0,1)
    alpha <- alpha/sum(alpha) # normalise to sum to 1
    
    # Support function - calculate log-likelihood
    
    log.likelihood <- function(x, mu, kappa, alpha, k) {
        l <- matrix(nrow = k, ncol = length(x))
        for (i in 1:k) {
            l[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
        }
        sum(log(colSums(l)))
    }
    
    log.lh <- log.likelihood(x, mu, kappa, alpha, k)
    new.log.lh <- abs(log.lh) + 100
    n = 0
    
    # create array to store initial values
    first.10 <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
                  log.lh = log.lh)
    
    while ((abs(log.lh - new.log.lh) > conv) && (n < max.runs)) {
        
        # Estimation - calculate z_ij
        z <- matrix(0, ncol = length(x), nrow = k)
        for (i in 1:k){
            z[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
        }
        all.z <- colSums(z)
        for (i in 1:k){
            z[i,] <- z[i,] / all.z
        }
        
        # Maximisation - update parameters
        for (i in 1:k) {
            alpha[i] <- sum(z[i,]) / length (x)
            mu[i] <- atan2(sum(z[i,] * sin(x)), sum(z[i,] * cos(x)))
            kappa[i] <- A1inv(sum(z[i,] * (cos(x - mu[i]))) / sum(z[i,]))
            
            # correct for negative kappa if necessary
            if (kappa[i] < 0) {
                kappa[i] <- abs(kappa[i])
                mu[i] <- mu[i] + pi
            }
        }
        
        # calculate log-likelihoods for comparison
        log.lh <- new.log.lh
        new.log.lh <- log.likelihood(x, mu, kappa, alpha, k)
        n <- n + 1
        
        # save first 10 iterations
        if (n < 11) {
            next.iter <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
                           log.lh = new.log.lh)
            first.10 <- rbind(first.10, next.iter)
        }
    }
    
    # Output: if model hasn't converged, show error message
    #         if it has, output the parameters & first 10 iterations
    if ((abs(log.lh - new.log.lh) > conv)) {
        cat ("Data hasn't converged after", n, "iterations; \n",
             "Difference in log-likelihoods is", 
             round(abs(log.lh - new.log.lh),6))
    } else {
        row.names(first.10) <- first.10[,1]
        list(k = k, mu = mu %% (2*pi), kappa = kappa, alpha = alpha,
             log.lh = new.log.lh)
    }
}


#' Classify points using EM mixture model
#'
#' Winner-takes-all classification of points based on a von Mises mixture model obtained using \code{\link{EM.vonmises}}.
#' @param data Set of angular data
#' @param model Fitted mixture model from \code{\link{EM.vonmises}}.
#' @return Vector of cluster numbers
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
#' cl <- EM.clusters(ex1, em1)
#' plot(ex1[cl == 1], stack = T)
#' points(ex1[cl == 2], col = "blue", stack = T)
EM.clusters <- function(data, model) {
    components <- matrix(nrow = model$k, ncol = length(data))
    for (i in 1:model$k) {
        components[i, ] <- dvonmises(data, circular(model$mu[i]), model$kappa[i])
    }
    apply(components, 2, which.max)
}


#' Plot mixture von Mises model found by EM algorithm.
#'
#' Plot original data with individual components and fitted mixture model.
#' @param varToPlot Set of angular data
#' @param modelToPlot Fitted mixture model from \code{\link{EM.vonmises}}.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
#' plot.EM.vonmises(ex1, em1)
plot.EM.vonmises <- function(varToPlot, modelToPlot, h.breaks = 20) {
    
    varToPlot = matrix(varToPlot)       # convert from circular data
    
    # get density of each component
    x <- circular(seq(0, 2*pi, 0.01))
    components <- matrix(nrow = modelToPlot$k, ncol = length(x))
    for (i in 1:modelToPlot$k) {
        components[i,] <- dvonmises(x, circular(modelToPlot$mu[i]), modelToPlot$kappa[i]) * modelToPlot$alpha[i]
    }
    mixt <- colSums(components)
    
    # plot original data
    y.max <- max(c(mixt, hist(varToPlot, plot = F, breaks = h.breaks)$density)) * 1.1 # rescale y axis
    labl <- paste("Mixture of", modelToPlot$k,"von Mises")
    hist(varToPlot, freq = F, ylim = c(0, y.max), main = "", col = "lightgrey", xlab = labl, xlim = c(0, 2*pi), breaks = h.breaks)
    
    # plot vM components
    for (i in 1:modelToPlot$k) {
        lines(matrix(x), components[i,], col = i+1, lwd = 2)
    }
    
    # add mixture model
    lines(matrix(x), mixt, lwd = 2)
}


#' Expectation-maximization algorithm for mixture of uniform and von Mises distributions
#'
#' For a vector of angles, will return the parameters of a mixture of von Mises distributions with one component constrained to be a continuous circular uniform distribution, using an EM algorithm.
#' @param x Vector of angles to be fitted
#' @param k Number of von Mises components to be fitted
#' @param max.runs Maximum number of iterations to attempt. Default is 1000.
#' @param conv Maximum difference in log-likelihood between successive iterations before convergence is considered to have occurred. Default is 0.00001.
#' @return List containing k, with k estimates of mu, kappa, alpha (proportion of population belonging to each component), log-likelihood found, number of iterations required, and first 10 iterations. Can be passed to \code{\link{plot.EM.vonmises}} to be displayed graphically.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' em1 <- EM.vonmises(ex1, k = 2)
EM.u.vonmises <- function(x, k, max.runs = 1000, conv = 0.00001) {
    
    # E-M algorithm with one component fixed as uniform
    x <- circular(x)
    # provide starting values for mu, kappa, alpha
    mu <- circular(runif(k, 0, max(x)))
    kappa <- c(0, runif(k-1,0,1))
    alpha <- runif(k,0,1)
    alpha <- alpha/sum(alpha) # normalise to sum to 1
    
    # Support function - calculate log-likelihood
    
    log.likelihood <- function(x, mu, kappa, alpha, k) {
        l <- matrix(nrow = k, ncol = length(x))
        for (i in 1:k) {
            l[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
        }
        sum(log(colSums(l)))
    }
    
    log.lh <- log.likelihood(x, mu, kappa, alpha, k)
    new.log.lh <- abs(log.lh) + 100
    n = 0
    
    # create array to store initial values
    first.10 <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
                  log.lh = log.lh)
    
    while ((abs(log.lh - new.log.lh) > conv) && (n < max.runs)) {
        
        # Estimation - calculate z_ij
        z <- matrix(0, ncol = length(x), nrow = k)
        for (i in 1:k){
            z[i,] <- alpha[i] * dvonmises(x, mu[i], kappa[i])
        }
        all.z <- colSums(z)
        for (i in 1:k){
            z[i,] <- z[i,] / all.z
        }
        
        # Maximisation - update parameters
        for (i in 1:k) {
            alpha[i] <- sum(z[i,]) / length (x)
            mu[i] <- atan2(sum(z[i,] * sin(x)), sum(z[i,] * cos(x)))
            kappa[i] <- A1inv(sum(z[i,] * (cos(x - mu[i]))) / sum(z[i,]))   
            
            # correct for negative kappa if necessary
            if (kappa[i] < 0) {
                kappa[i] <- abs(kappa[i])
                mu[i] <- mu[i] + pi
            }
            
        }
        
        # Sort parameters by kappa for identifiability
        alpha <- alpha[order(kappa)]
        mu <- mu[order(kappa)]
        z <- z[order(kappa),]
        kappa <- kappa[order(kappa)]
        # fix smallest kappa as 0 (uniform)
        kappa[1] <- 0
        
        # calculate log-likelihoods for comparison
        log.lh <- new.log.lh
        new.log.lh <- log.likelihood(x, mu, kappa, alpha, k)
        n <- n + 1
        
        # save first 10 iterations
        if (n < 11) {
            next.iter <- c(iter = n, mu = mu, kappa = kappa, alpha = alpha,
                           log.lh = new.log.lh)
            first.10 <- rbind(first.10, next.iter)
        }
    }
    
    # Output: if model hasn't converged, show error message
    #         if it has, output the parameters & first 10 iterations
    if ((abs(log.lh - new.log.lh) > conv)) {
        cat ("Data hasn't converged after", n, "iterations; \n",
             "Difference in log-likelihoods is", 
             round(abs(log.lh - new.log.lh),6))
    } else {
        row.names(first.10) <- first.10[,1]
        list(k = k, mu = mu %% (2*pi), kappa = kappa, alpha = alpha,
             log.lh = new.log.lh)
    }
}


#' Mixture von Mises P-P plot
#'
#' Produces a P-P plot of the data against a specified mixture of von Mises distribution, to graphically assess the goodness of fit of the model.
#' @param data Vector of angles (in radians) to be fitted against the von Mises distribution.
#' @param mu1 Mean direction parameter for first von Mises component
#' @param kappa1 Concentration parameter for first von Mises component Must be between 0 and 1.
#' @param mu2 Mean direction parameter for second von Mises component
#' @param kappa2 Concentration parameter for second von Mises component Must be between 0 and 1.
#' @param prop Proportion of distribution assigned to first von Mises component. Must be between 0 and 1.
#' @return Vector of residuals.
#' @export
#' @examples
#' r.mvm <- rmixedvonmises(200, circular(pi), circular(0), kap, 0, prop)
#' m.pp.res <- mvM.PP(r.mvm, circular(pi),  kap, circular(0),0, prop)
#' mean(m.pp.res); sd(m.pp.res)
mvM.PP <- function(data, mu1, kappa1, mu2, kappa2, prop) {
    edf <- ecdf(data)
    tdf <- pmixedvonmises(data, mu1, mu2, kappa1, kappa2, prop, from = circular(0), tol = 1e-06)
    plot.default(tdf, edf(data), pch = 20, xlim = c(0, 1), ylim = c(0, 1), xlab = "mixture von Mises distribution function", ylab = "Empirical distribution function")
    lines(c(0, 1), c(0, 1), lwd = 2, col = "lightseagreen")
    edf(data) - tdf
}



#' Estimate stabilising parameter p for mean-shift clustering of circular data
#'
#' Automation of graphical method of estimating stabilising parameter to be used in mean-shift clustering algorithms. Calculates correlation of kernel density estimate for successive values of p, selecting as the optimum the maximum value of p that is less than 1 when rounded to a certain degree of precision.
#' @param data Set of angular data to be clustered.
#' @param dp How many decimal places of convergence is required before p is accepted?
#' @param plot Boolean: plot the correlation curve or not? Default is T.
#' @return Suggested value of p to be used in mean-shift clustering functions such as \code{\link{MSclust.NB}}
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' p <- MSclust.p.est(ex1, dp = 4)
MSclust.p.est <- function(data, dp = 4, plot = T) {
    
    MSBC.kde <- function(theta, data, p, beta) {
        
        b <- (1 - cos(theta - data))
        kf <- (1 - (b/beta))^p
        kf[b > beta] <- 0
        sum(kf)
    }
    
    # get kde for all data points
    beta <- sd.circular(data)
    kde <- list()
    for (j in 1:50) {
        kde[[j]] <- sapply(data, FUN = MSBC.kde, data = data, p = j, beta = beta)
    }
    p.corr <- c()
    for (j in 1:49) {
        p.corr[j] <- cor(kde[[j]], kde[[j+1]])
    }
    
    p <- length(p.corr[round(p.corr, dp) < 1])
    plot(c(1:49), p.corr, pch = 20, type = "o", xlab = "p", ylab = "Correlation between kde with p & p+1")
    abline(h = 1, col = "red")
    abline(v = p, col = "red")
    as.numeric(p)
}


#' Mean-shift clustering of circular data: non-blurring mean shift
#'
#' Iterative function to cluster angular data about an unspecified number of modes.
#' @param data Set of angular data to be clustered.
#' @param p Stabilising parameter p, estimated using \code{\link{MSclust.p.est}}.
#' @param conv.lim Required degree of convergence: when the degree of correlation between successive iterations is closer to 1 than this, the data is deemed to have converged. Default is 0.00001.
#' @return Matrix containing all iterations of the data set.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' p <- MSclust.p.est(ex1, dp = 4)
#' nb <- MSclust.NB(ex1, p = 7, conv.lim = 0.000001)
MSclust.NB <- function(data, p, conv.lim = 0.00001, max.runs = 20) {
    n <- length(data)
    beta <- sd.circular(data)
    
    # set all data ponts as initial values of cluster centres
    vi <- matrix(data, nrow = 1)
    conv <- c(0)
    
    t <- 1
    while (conv[t] < (1-conv.lim) & t < max.runs) {
        vi.new <- c()
        for (i in 1:n) {
            kd <- apply(cbind(beta - (1 - cos(vi[t,i]-data)),0),1,max)^(p-1)
            upper <- kd * sin(data) / n
            lower <- kd * cos(data) / n
            vi.new[i] <- atan2(sum(upper), sum(lower)) %% (2*pi)
        }
        vi <- rbind(vi, t = vi.new)
        conv[t+1] <- cor(vi[t,], vi[t+1,])
        t <- t+1
    }
    vi
}


#' Summarise clusters from a mean-shift clustering
#'
#' Uses hierarchical clustering algorithm to extract mean, mean resultant length, and estimated kappa for each cluster.
#' @param clusts Vector or matrix of clustered values, as produced by \code{\link{MSclust.NB}}.
#' @return List containing a vector of cluster numbers and a matrix containing estimated mu, rho and kappa for each cluster.
#' @export
#' @examples
#' ex1 <- c(rvonmises(120 * 0.3, mu = circular(pi/2), kappa = 10),
#'          rvonmises(120 * 0.7, mu = circular(pi), kappa = 3))
#' p <- MSclust.p.est(ex1, dp = 4)
#' nb <- MSclust.NB(ex1, p = 7, conv.lim = 0.000001)
#' res <- MSclust.summ(nb)
MSclust.summ <- function(clusts, minPts = 5) {
    
    final <- clusts[nrow(clusts),]
    
    hcl <- hclust(dist(final), method = "single")
    clusters <- as.data.frame(cbind(cluster = cutree(hcl, h = mean(hcl$height)),
                                    org = clusts[1,], mode = final))
    cts <- count(clusters$cluster)
    res <- ddply(clusters, .(cluster), summarize,
                 mu = mean.circular(circular(mode)) %% (2*pi),
                 rho = rho.circular(circular(org)))
    res <- cbind(res, kappa = A1inv(res[,3]), n.points = count(clusters$cluster)$freq)
    list(summ = res, clusters = clusters$cluster)
}
ClairBee/AS.circular documentation built on Jan. 24, 2020, 3:57 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
ClairBee/AS.circular
ArchStats: circular functions

R/Clustering.R
In ClairBee/AS.circular: ArchStats: circular functions

Defines functions compare.clusters cluster.sample.stats cluster.quad.tests compare.cluster.dists cluster.pval.heatmap EM.vonmises EM.clusters plot.EM.vonmises EM.u.vonmises mvM.PP MSclust.p.est MSclust.NB MSclust.summ

Documented in cluster.pval.heatmap cluster.quad.tests cluster.sample.stats compare.cluster.dists compare.clusters EM.clusters EM.u.vonmises EM.vonmises MSclust.NB MSclust.p.est MSclust.summ mvM.PP plot.EM.vonmises

R Package Documentation

Browse R Packages

We want your feedback!

ClairBee/AS.circular ArchStats: circular functions

R/Clustering.R In ClairBee/AS.circular: ArchStats: circular functions

Defines functions compare.clusters cluster.sample.stats cluster.quad.tests compare.cluster.dists cluster.pval.heatmap EM.vonmises EM.clusters plot.EM.vonmises EM.u.vonmises mvM.PP MSclust.p.est MSclust.NB MSclust.summ

Documented in cluster.pval.heatmap cluster.quad.tests cluster.sample.stats compare.cluster.dists compare.clusters EM.clusters EM.u.vonmises EM.vonmises MSclust.NB MSclust.p.est MSclust.summ mvM.PP plot.EM.vonmises

R Package Documentation

Browse R Packages

We want your feedback!

ClairBee/AS.circular
ArchStats: circular functions

R/Clustering.R
In ClairBee/AS.circular: ArchStats: circular functions