# R/inference_fanova.R In Riemann: Learning with Data on Riemannian Manifolds

#### Documented in riem.fanovariem.fanovaP

#' Fréchet Analysis of Variance
#'
#' Given sets of manifold-valued data \eqn{X^{(1)}_{1:{n_1}}, X^{(2)}_{1:{n_2}}, \ldots, X^{(m)}_{1:{n_m}}},
#' performs analysis of variance to test equality of distributions. This means, small \eqn{p}-value implies that
#' at least one of the equalities does not hold.
#'
#' @param ... S3 objects of \code{riemdata} class for manifold-valued data.
#' @param nperm the number of permutations for resampling-based test.
#' @param maxiter maximum number of iterations to be run.
#' @param eps tolerance level for stopping criterion.
#'
#' @return a (list) object of \code{S3} class \code{htest} containing: \describe{
#' \item{statistic}{a test statistic.}
#' \item{p.value}{\eqn{p}-value under \eqn{H_0}.}
#' \item{alternative}{alternative hypothesis.}
#' \item{method}{name of the test.}
#' \item{data.name}{name(s) of provided sample data.}
#' }
#'
#' @examples
#' #-------------------------------------------------------------------
#' #            Example on Sphere : Uniform Samples
#' #
#' #  Each of 4 classes consists of 20 uniform samples from uniform
#' #  density on 2-dimensional sphere S^2 in R^3.
#' #-------------------------------------------------------------------
#' ## PREPARE DATA OF 4 CLASSES
#' ndata  = 200
#' class1 = list()
#' class2 = list()
#' class3 = list()
#' class4 = list()
#' for (i in 1:ndata){
#'   tmpxy = matrix(rnorm(4*2, sd=0.1), ncol=2)
#'   tmpz  = rep(1,4)
#'   tmp3d = cbind(tmpxy, tmpz)
#'   tmp  = tmp3d/sqrt(rowSums(tmp3d^2))
#'
#'   class1[[i]] = tmp[1,]
#'   class2[[i]] = tmp[2,]
#'   class3[[i]] = tmp[3,]
#'   class4[[i]] = tmp[4,]
#' }
#' obj1 = wrap.sphere(class1)
#' obj2 = wrap.sphere(class2)
#' obj3 = wrap.sphere(class3)
#' obj4 = wrap.sphere(class4)
#'
#' ## RUN THE ASYMPTOTIC TEST
#' riem.fanova(obj1, obj2, obj3, obj4)
#'
#' \donttest{
#' ## RUN THE PERMUTATION TEST WITH MANY PERMUTATIONS
#' riem.fanovaP(obj1, obj2, obj3, obj4, nperm=999)
#' }
#'
#' @references
#' \insertRef{dubey_frechet_2019}{Riemann}
#'
#' @name riem.fanova
#' @concept inference
#' @rdname riem.fanova
NULL

#' @rdname riem.fanova
#' @export
riem.fanova <- function(..., maxiter=50, eps=1e-5){
## PREPARE
#  data
datalist = base::list(...)
k = length(datalist)
for (i in 1:k){
riemobj = datalist[[i]]
if (!inherits(riemobj, "riemdata")){
stop(paste0("* riem.fanova : ",i,"-th input should be an object of 'riemdata' class."))
}
}
# hypothesis testing argument
DNAME = ""
DOBJ  = as.list(substitute(list(...)))[-1L]
for (i in 1:(k-1)){
DNAME = paste0(DNAME, as.character(DOBJ[[i]]), ", ")
}
DNAME = paste0(DNAME, "and ",as.character(DOBJ[[k]]))
MNAME = riemobj$name # iteration myiter = max(50, round(maxiter)) myeps = min(max(as.double(eps),0),1e-5) ## COMPUTATION # Step 1. compute pooled frechet objective pooled.data = list() for (i in 1:k){ pooled.data = c(pooled.data, datalist[[i]]$data)
}
pooled.ndata   = length(pooled.data)
pooled.weight  = rep(1/pooled.ndata, pooled.ndata)
frechet.pooled = inference_mean_intrinsic(MNAME, pooled.data, pooled.weight, myiter, myeps)

# Step 2. compute individual frechet objective
frechet.each = list()
for (i in 1:k){
tgt.ndata  = length(datalist[[i]]$data) tgt.weight = rep(1/tgt.ndata, tgt.ndata) frechet.each[[i]] = inference_mean_intrinsic(MNAME, datalist[[i]]$data, tgt.weight, myiter, myeps)
}

# Step 3. get distance information only and compute via 'common_fanova'
dist.pooled = as.vector(frechet.pooled$distvec) dist.class = list() for (i in 1:k){ dist.class[[i]] = as.vector(frechet.each[[i]]$distvec)
}
statinfo = common_fanova(dist.pooled, dist.class, MNAME, DNAME)

# RETURN
return(statinfo)
}

#' @rdname riem.fanova
#' @export
riem.fanovaP <- function(..., maxiter=50, eps=1e-5, nperm=99){
## PREPARE
#  data
datalist = base::list(...)
k = length(datalist)
for (i in 1:k){
riemobj = datalist[[i]]
if (!inherits(riemobj, "riemdata")){
stop(paste0("* riem.fanovaP : ",i,"-th input should be an object of 'riemdata' class."))
}
}
# hypothesis testing argument
DNAME = ""
DOBJ  = as.list(substitute(list(...)))[-1L]
for (i in 1:(k-1)){
DNAME = paste0(DNAME, as.character(DOBJ[[i]]), ", ")
}
DNAME = paste0(DNAME, "and ",as.character(DOBJ[[k]]))
MNAME = riemobj$name # parameters myiter = max(50, round(maxiter)) myeps = min(max(as.double(eps),0),1e-5) myperm = max(19, round(nperm)) ## COMPUTATION # Step 1. compute pooled frechet objective pooled.data = list() for (i in 1:k){ pooled.data = c(pooled.data, datalist[[i]]$data)
}
pooled.ndata   = length(pooled.data)
pooled.weight  = rep(1/pooled.ndata, pooled.ndata)
frechet.pooled = inference_mean_intrinsic(MNAME, pooled.data, pooled.weight, myiter, myeps)

# Step 2. compute individual frechet objective
frechet.each = list()
vec.ndata    = rep(0,k)
for (i in 1:k){
tgt.ndata    = length(datalist[[i]]$data) tgt.weight = rep(1/tgt.ndata, tgt.ndata) vec.ndata[i] = tgt.ndata frechet.each[[i]] = inference_mean_intrinsic(MNAME, datalist[[i]]$data, tgt.weight, myiter, myeps)
}

# Step 3. get distance information only and compute via 'common_fanova'
dist.pooled = as.vector(frechet.pooled$distvec) dist.class = list() for (i in 1:k){ dist.class[[i]] = as.vector(frechet.each[[i]]$distvec)
}
statinfo = common_fanova(dist.pooled, dist.class, MNAME, DNAME)
Tnow     = as.double(statinfo$statistic) # Step 4. Monte Carlo simulation via permutation Tvec = rep(0,myperm) for (i in 1:myperm){ # permutation index generation randidx = aux_shuffle(vec.ndata) # compute individual-class statistic for (j in 1:k){ tgt.data = pooled.data[randidx[[j]]] tgt.ndata = vec.ndata[j] tgt.weight = rep(1/tgt.ndata, tgt.ndata) frechet.tmp = inference_mean_intrinsic(MNAME, tgt.data, tgt.weight, myiter, myeps) dist.class[[j]] = as.vector(frechet.tmp$distvec)
}
# compute the statistic
statnow = common_fanova(dist.pooled, dist.class, MNAME, DNAME)
Tvec[i] = as.double(statnow$statistic) } ############################################################ # WRAP AND RETURN statinfo$p.value = (sum(Tvec >= Tnow)+1)/(myperm+1)
return(statinfo)
}
#' @keywords internal
#' @noRd
common_fanova <- function(distall, distvecs, manifold, dataname){
# get some parameters
k = length(distvecs)  # number of classes
n = length(distall)

# get local information
vec.nj    = rep(0,k)
vec.Vj    = rep(0,k)
vec.sig2j = rep(0,k)
for (j in 1:k){
distj = as.vector(distvecs[[j]])
nj    = length(distj)

vec.nj[j]    = nj
vec.Vj[j]    = sum(distj^2)/nj
vec.sig2j[j] = (sum(distj^4)/nj) - ((sum(distj^2)/nj)^2)
}

# get global information
Vp   = sum(distall^2)/n
lbdj = vec.nj/n

# compute statistics
Fn = Vp - sum(lbdj*vec.Vj)
Un = 0
for (j in 1:(k-1)){
for (l in (j+1):k){
Un = Un + ((lbdj[j]*lbdj[l])/(vec.sig2j[j]*vec.sig2j[l]))*((vec.Vj[j]-vec.Vj[l])^2)
}
}
term1   = (n*Un)/sum(vec.nj/vec.sig2j)
term2   = (n*(Fn^2))/sum((vec.nj^2)*vec.sig2j)
thestat = term1+term2

# compute p-value
pvalue = stats::pchisq(thestat, df=(k-1), lower.tail = FALSE)

# return output
mfdname  = wrap_mfd2full(manifold)
hname    = paste0("Frechet Analysis of Variance on ",mfdname," Manifold")
Ha       = "at least one of equalities does not hold."
names(thestat) = "Tn"

res   = list(statistic=thestat, p.value=pvalue, alternative = Ha, method=hname, data.name=dataname)
class(res) = "htest"
return(res)
}

# set.seed(777)
# ntest = 1000
# pvals.a = rep(0,ntest)
# pvals.p = rep(0,ntest)
#
# for (i in 1:ntest){
#   X = cbind(matrix(rnorm(30*2, sd=0.1),ncol=2), rep(1,30))
#   Y = cbind(matrix(rnorm(30*2, sd=0.1),ncol=2), rep(1,30))
#   Xnorm = X/sqrt(rowSums(X^2))
#   Ynorm = Y/sqrt(rowSums(Y^2))
#
#   Xriem = wrap.sphere(Xnorm)
#   Yriem = wrap.sphere(Ynorm)
#   pvals.a[i] = riem.fanova(Xriem, Yriem)$p.value # pvals.p[i] = riem.fanovaP(Xriem, Yriem, nperm=999)$p.value
#   print(paste0("iteration ",i,"/",ntest," complete.."))
# }
#
# round(sum((pvals.a <= 0.05))/ntest, 5) # asymptotic theory is nice
# round(sum((pvals.p <= 0.05))/ntest, 5) # but PERMUTATION SEEMS TO WORK BETTER! (0.052)


## Try the Riemann package in your browser

Any scripts or data that you put into this service are public.

Riemann documentation built on June 20, 2021, 5:07 p.m.