Nothing
#' Fréchet Analysis of Variance
#'
#' Given sets of manifold-valued data \eqn{X^{(1)}_{1:{n_1}}, X^{(2)}_{1:{n_2}}, \ldots, X^{(m)}_{1:{n_m}}},
#' performs analysis of variance to test equality of distributions. This means, small \eqn{p}-value implies that
#' at least one of the equalities does not hold.
#'
#' @param ... S3 objects of \code{riemdata} class for manifold-valued data.
#' @param nperm the number of permutations for resampling-based test.
#' @param maxiter maximum number of iterations to be run.
#' @param eps tolerance level for stopping criterion.
#'
#' @return a (list) object of \code{S3} class \code{htest} containing: \describe{
#' \item{statistic}{a test statistic.}
#' \item{p.value}{\eqn{p}-value under \eqn{H_0}.}
#' \item{alternative}{alternative hypothesis.}
#' \item{method}{name of the test.}
#' \item{data.name}{name(s) of provided sample data.}
#' }
#'
#' @examples
#' #-------------------------------------------------------------------
#' # Example on Sphere : Uniform Samples
#' #
#' # Each of 4 classes consists of 20 uniform samples from uniform
#' # density on 2-dimensional sphere S^2 in R^3.
#' #-------------------------------------------------------------------
#' ## PREPARE DATA OF 4 CLASSES
#' ndata = 200
#' class1 = list()
#' class2 = list()
#' class3 = list()
#' class4 = list()
#' for (i in 1:ndata){
#' tmpxy = matrix(rnorm(4*2, sd=0.1), ncol=2)
#' tmpz = rep(1,4)
#' tmp3d = cbind(tmpxy, tmpz)
#' tmp = tmp3d/sqrt(rowSums(tmp3d^2))
#'
#' class1[[i]] = tmp[1,]
#' class2[[i]] = tmp[2,]
#' class3[[i]] = tmp[3,]
#' class4[[i]] = tmp[4,]
#' }
#' obj1 = wrap.sphere(class1)
#' obj2 = wrap.sphere(class2)
#' obj3 = wrap.sphere(class3)
#' obj4 = wrap.sphere(class4)
#'
#' ## RUN THE ASYMPTOTIC TEST
#' riem.fanova(obj1, obj2, obj3, obj4)
#'
#' \donttest{
#' ## RUN THE PERMUTATION TEST WITH MANY PERMUTATIONS
#' riem.fanovaP(obj1, obj2, obj3, obj4, nperm=999)
#' }
#'
#' @references
#' \insertRef{dubey_frechet_2019}{Riemann}
#'
#' @name riem.fanova
#' @concept inference
#' @rdname riem.fanova
NULL
#' @rdname riem.fanova
#' @export
riem.fanova <- function(..., maxiter=50, eps=1e-5){
## PREPARE
# data
datalist = base::list(...)
k = length(datalist)
for (i in 1:k){
riemobj = datalist[[i]]
if (!inherits(riemobj, "riemdata")){
stop(paste0("* riem.fanova : ",i,"-th input should be an object of 'riemdata' class."))
}
}
# hypothesis testing argument
DNAME = ""
DOBJ = as.list(substitute(list(...)))[-1L]
for (i in 1:(k-1)){
DNAME = paste0(DNAME, as.character(DOBJ[[i]]), ", ")
}
DNAME = paste0(DNAME, "and ",as.character(DOBJ[[k]]))
MNAME = riemobj$name
# iteration
myiter = max(50, round(maxiter))
myeps = min(max(as.double(eps),0),1e-5)
## COMPUTATION
# Step 1. compute pooled frechet objective
pooled.data = list()
for (i in 1:k){
pooled.data = c(pooled.data, datalist[[i]]$data)
}
pooled.ndata = length(pooled.data)
pooled.weight = rep(1/pooled.ndata, pooled.ndata)
frechet.pooled = inference_mean_intrinsic(MNAME, pooled.data, pooled.weight, myiter, myeps)
# Step 2. compute individual frechet objective
frechet.each = list()
for (i in 1:k){
tgt.ndata = length(datalist[[i]]$data)
tgt.weight = rep(1/tgt.ndata, tgt.ndata)
frechet.each[[i]] = inference_mean_intrinsic(MNAME, datalist[[i]]$data, tgt.weight, myiter, myeps)
}
# Step 3. get distance information only and compute via 'common_fanova'
dist.pooled = as.vector(frechet.pooled$distvec)
dist.class = list()
for (i in 1:k){
dist.class[[i]] = as.vector(frechet.each[[i]]$distvec)
}
statinfo = common_fanova(dist.pooled, dist.class, MNAME, DNAME)
# RETURN
return(statinfo)
}
#' @rdname riem.fanova
#' @export
riem.fanovaP <- function(..., maxiter=50, eps=1e-5, nperm=99){
## PREPARE
# data
datalist = base::list(...)
k = length(datalist)
for (i in 1:k){
riemobj = datalist[[i]]
if (!inherits(riemobj, "riemdata")){
stop(paste0("* riem.fanovaP : ",i,"-th input should be an object of 'riemdata' class."))
}
}
# hypothesis testing argument
DNAME = ""
DOBJ = as.list(substitute(list(...)))[-1L]
for (i in 1:(k-1)){
DNAME = paste0(DNAME, as.character(DOBJ[[i]]), ", ")
}
DNAME = paste0(DNAME, "and ",as.character(DOBJ[[k]]))
MNAME = riemobj$name
# parameters
myiter = max(50, round(maxiter))
myeps = min(max(as.double(eps),0),1e-5)
myperm = max(19, round(nperm))
## COMPUTATION
# Step 1. compute pooled frechet objective
pooled.data = list()
for (i in 1:k){
pooled.data = c(pooled.data, datalist[[i]]$data)
}
pooled.ndata = length(pooled.data)
pooled.weight = rep(1/pooled.ndata, pooled.ndata)
frechet.pooled = inference_mean_intrinsic(MNAME, pooled.data, pooled.weight, myiter, myeps)
# Step 2. compute individual frechet objective
frechet.each = list()
vec.ndata = rep(0,k)
for (i in 1:k){
tgt.ndata = length(datalist[[i]]$data)
tgt.weight = rep(1/tgt.ndata, tgt.ndata)
vec.ndata[i] = tgt.ndata
frechet.each[[i]] = inference_mean_intrinsic(MNAME, datalist[[i]]$data, tgt.weight, myiter, myeps)
}
# Step 3. get distance information only and compute via 'common_fanova'
dist.pooled = as.vector(frechet.pooled$distvec)
dist.class = list()
for (i in 1:k){
dist.class[[i]] = as.vector(frechet.each[[i]]$distvec)
}
statinfo = common_fanova(dist.pooled, dist.class, MNAME, DNAME)
Tnow = as.double(statinfo$statistic)
# Step 4. Monte Carlo simulation via permutation
Tvec = rep(0,myperm)
for (i in 1:myperm){
# permutation index generation
randidx = aux_shuffle(vec.ndata)
# compute individual-class statistic
for (j in 1:k){
tgt.data = pooled.data[randidx[[j]]]
tgt.ndata = vec.ndata[j]
tgt.weight = rep(1/tgt.ndata, tgt.ndata)
frechet.tmp = inference_mean_intrinsic(MNAME, tgt.data, tgt.weight, myiter, myeps)
dist.class[[j]] = as.vector(frechet.tmp$distvec)
}
# compute the statistic
statnow = common_fanova(dist.pooled, dist.class, MNAME, DNAME)
Tvec[i] = as.double(statnow$statistic)
}
############################################################
# WRAP AND RETURN
statinfo$p.value = (sum(Tvec >= Tnow)+1)/(myperm+1)
return(statinfo)
}
#' @keywords internal
#' @noRd
common_fanova <- function(distall, distvecs, manifold, dataname){
# get some parameters
k = length(distvecs) # number of classes
n = length(distall)
# get local information
vec.nj = rep(0,k)
vec.Vj = rep(0,k)
vec.sig2j = rep(0,k)
for (j in 1:k){
distj = as.vector(distvecs[[j]])
nj = length(distj)
vec.nj[j] = nj
vec.Vj[j] = sum(distj^2)/nj
vec.sig2j[j] = (sum(distj^4)/nj) - ((sum(distj^2)/nj)^2)
}
# get global information
Vp = sum(distall^2)/n
lbdj = vec.nj/n
# compute statistics
Fn = Vp - sum(lbdj*vec.Vj)
Un = 0
for (j in 1:(k-1)){
for (l in (j+1):k){
Un = Un + ((lbdj[j]*lbdj[l])/(vec.sig2j[j]*vec.sig2j[l]))*((vec.Vj[j]-vec.Vj[l])^2)
}
}
term1 = (n*Un)/sum(vec.nj/vec.sig2j)
term2 = (n*(Fn^2))/sum((vec.nj^2)*vec.sig2j)
thestat = term1+term2
# compute p-value
pvalue = stats::pchisq(thestat, df=(k-1), lower.tail = FALSE)
# return output
mfdname = wrap_mfd2full(manifold)
hname = paste0("Frechet Analysis of Variance on ",mfdname," Manifold")
Ha = "at least one of equalities does not hold."
names(thestat) = "Tn"
res = list(statistic=thestat, p.value=pvalue, alternative = Ha, method=hname, data.name=dataname)
class(res) = "htest"
return(res)
}
# set.seed(777)
# ntest = 1000
# pvals.a = rep(0,ntest)
# pvals.p = rep(0,ntest)
#
# for (i in 1:ntest){
# X = cbind(matrix(rnorm(30*2, sd=0.1),ncol=2), rep(1,30))
# Y = cbind(matrix(rnorm(30*2, sd=0.1),ncol=2), rep(1,30))
# Xnorm = X/sqrt(rowSums(X^2))
# Ynorm = Y/sqrt(rowSums(Y^2))
#
# Xriem = wrap.sphere(Xnorm)
# Yriem = wrap.sphere(Ynorm)
# pvals.a[i] = riem.fanova(Xriem, Yriem)$p.value
# pvals.p[i] = riem.fanovaP(Xriem, Yriem, nperm=999)$p.value
# print(paste0("iteration ",i,"/",ntest," complete.."))
# }
#
# round(sum((pvals.a <= 0.05))/ntest, 5) # asymptotic theory is nice
# round(sum((pvals.p <= 0.05))/ntest, 5) # but PERMUTATION SEEMS TO WORK BETTER! (0.052)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.