#NNCTFunctions.R;
#Contains the ancillary functions used in NNCT calculations, such as NNCT for two classes of points
#################################################################
#####AUXILIARY FUNCTIONS#################
#################################################################
#in all these functions
#data sets are either matrices or data frames
#' @import stats
#' @import MASS
#' @import graphics
#' @importFrom Rdpack reprompt
#' @importFrom pcds Dist
#'
#' @title Interpoint Distance Matrix
#'
#' @description
#' This function computes and returns the distance matrix computed by using the specified distance measure to
#' compute the distances between the rows of the set of points \code{x} and \code{y} using the
#' \code{\link[stats]{dist}} function in the \code{stats} package of the standard R distribution.
#' If \code{y} is provided (default=\code{NULL}) it yields a matrix of distances between the rows of \code{x} and
#' rows of \code{y}. Otherwise, it provides a square matrix with i,j-th entry being the distance between row
#' \eqn{i} and row \eqn{j} of \code{x}.
#' This function is different from the \code{\link[stats]{dist}} function in the \code{stats} package.
#' \code{dist} returns the distance matrix in a lower triangular form, and \code{ipd.mat} returns in a full matrix.
#' \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @param x A set of points in matrix or data frame form where points correspond to the rows.
#' @param y A set of points in matrix or data frame form where points correspond to the rows (default=\code{NULL}).
#' @param \dots Additional parameters to be passed on the \code{dist} function.
#'
#' @return A distance matrix whose i,j-th entry is the distance between row \eqn{i} of \code{x} and row \eqn{j} of \code{y} if \code{y} is provided,
#' otherwise i,j-th entry is the distance between rows \eqn{i} and \eqn{j} of \code{x}.
#'
#' @seealso \code{\link[stats]{dist}}, \code{\link{ipd.mat.euc}}, \code{\link{dist.std.data}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-3
#' X<-matrix(runif(3*n),ncol=3)
#' mtd<-"euclidean" #try also "maximum", "manhattan", "canberra", "binary"
#' ipd.mat(X,method=mtd)
#' ipd.mat(X,method="minkowski",p=6)
#'
#' n<-5
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd.mat(X,Y,method=mtd)
#' ipd.mat(X[1,],Y,method=mtd)
#' ipd.mat(c(.1,.2,.3),Y,method=mtd)
#' ipd.mat(X[1,],Y[3,],method=mtd)
#'
#' #1D data points
#' X<-as.matrix(runif(3)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(3) would not work
#' ipd.mat(X)
#'
#' Y<-as.matrix(runif(5))
#' ipd.mat(X,Y)
#' ipd.mat(X[1,],Y)
#' ipd.mat(X[1,],Y[3,])
#'
#' @export
ipd.mat <- function(x,y=NULL, ...)
{
if (!is.numeric(x) | (!is.null(y) & !is.numeric(y)))
{stop('first argument and (if provided) second argument must be of type numeric')}
if (is.null(y))
{dis<-dist(x,...)
ipd<-dist2full(dis)
} else
{
ifelse(is.vector(x),x<-matrix(x,ncol=length(x)),x<-as.matrix(x))
ifelse(is.vector(y),y<-matrix(y,ncol=length(y)),y<-as.matrix(y))
xy<-rbind(x,y)
nx<-nrow(x)
ny<-nrow(y)
dis<-dist(xy,...)
ipd0<-dist2full(dis)
ipd<-ipd0[1:nx,(nx+1):(nx+ny)]
}
ipd
} #end for the function
#'
#################################################################
#' @title Converts a lower triangular distance matrix to a full distance matrix
#'
#' @description
#' Converts a lower triangular distance matrix to a full distance matrix with zeroes in the diagonal.
#' The input is usually the result of the \code{\link[stats]{dist}} function in the \code{stats} package.
#' This function is adapted from Everitt's book (\insertCite{everitt:2004;textual}{nnspat})
#'
#' @param dis A lower triangular matrix, resulting from the \code{\link[stats]{dist}}
#' function in the \code{stats} package
#'
#' @return A square (symmetric) distance matrix with zeroes in the diagonal.
#'
#' @seealso \code{\link[stats]{dist}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-3
#' X<-matrix(runif(3*n),ncol=3)
#' dst<-dist(X)
#' dist2full(dst)
#'
#' @export
dist2full <- function(dis) {
n<-attr(dis,"Size")
full<-matrix(0,n,n)
full[lower.tri(full)]<-dis
full+t(full)
} #end for the function
#################################################################
#' @title Euclidean Interpoint Distance Matrix
#'
#' @description
#' Returns the Euclidean interpoint distance (IPD) matrix of a given the set of points \code{x} and \code{y} using
#' two for loops with the \code{\link{euc.dist}} function of the current package.
#' If \code{y} is provided (default=\code{NULL}) it yields a matrix of Euclidean distances between the rows of \code{x} and rows of \code{y},
#' otherwise it provides a square matrix with i,j-th entry being the Euclidean distance between row \eqn{i} and row \eqn{j}
#' of \code{x}. This function is different from the \code{\link{ipd.mat}} function in this package.
#' \code{\link{ipd.mat}} returns the full distance matrix for a variety of distance metrics (including the
#' Euclidean metric), while \code{\link{ipd.mat.euc}} uses the Euclidean distance metric only.
#' \code{ipd.mat.euc(X)} and \code{ipd.mat(X)} yield the same output for a set of points \code{X},
#' as the default metric in \code{\link{ipd.mat}} is also \code{"euclidean"}.
#'
#' @param x A set of points in matrix or data frame form where points correspond to the rows.
#' @param y A set of points in matrix or data frame form where points correspond to the rows (default=\code{NULL}).
#'
#' @return A distance matrix whose i,j-th entry is the Euclidean distance between row \eqn{i} of \code{x} and
#' row \eqn{j} of \code{y} if \code{y} is provided, otherwise i,j-th entry is
#' the Euclidean distance between rows \eqn{i} and \eqn{j} of \code{x}.
#'
#' @seealso \code{\link[stats]{dist}}, \code{\link{ipd.mat.euc}}, \code{\link{dist.std.data}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-3
#' X<-matrix(runif(3*n),ncol=3)
#' ipd.mat.euc(X)
#'
#' n<-5
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd.mat.euc(X,Y)
#' ipd.mat.euc(X[1,],Y)
#' ipd.mat.euc(c(.1,.2,.3),Y)
#' ipd.mat.euc(X[1,],Y[3,])
#'
#' #1D data points
#' X<-as.matrix(runif(3)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(3) would not work
#' ipd.mat.euc(X)
#'
#' Y<-as.matrix(runif(5))
#' ipd.mat.euc(X,Y)
#' ipd.mat.euc(X[1,],Y)
#' ipd.mat.euc(X[1,],Y[3,])
#'
#' @export
ipd.mat.euc <- function(x,y=NULL)
{
ifelse(is.vector(x),x<-matrix(x,ncol=length(x)),x<-as.matrix(x))
nx<-nrow(x)
if (nx==1)
{ipd<-euc.dist(x,x)
} else
{
if (is.null(y))
{
ipd<-matrix(0,nx,nx)
for (i in 1:(nx-1))
{
for (j in (i+1):nx)
{
ipd[i,j]<-euc.dist(x[i,],x[j,]);
ipd[j,i]<-ipd[i,j]
}
}
} else
{
ifelse(is.vector(y),y<-matrix(y,ncol=length(y)),y<-as.matrix(y))
ny<-nrow(y)
ipd<-matrix(0,nx,ny)
for (i in 1:nx)
{
for (j in 1:ny)
{
ipd[i,j]<-euc.dist(x[i,],y[j,]);
}
}
}
}
ipd
} #end for the function
#################################################################
#' @title The Euclidean distance between two vectors, matrices, or data frames
#'
#' @description Returns the Euclidean distance between \code{x} and \code{y} which can be vectors
#' or matrices or data frames of any dimension (\code{x} and \code{y} should be of same dimension).
#'
#' This function is equivalent to \code{\link[pcds]{Dist}} function in the \code{pcds} package but is
#' different from the \code{\link[stats]{dist}} function in the \code{stats} package of the standard R
#' distribution.
#' \code{dist} requires its argument to be a data matrix and \code{\link[stats]{dist}} computes and returns the distance matrix computed
#' by using the specified distance measure to compute the distances between the rows of a data matrix
#' (\insertCite{S-Book:1998;textual}{nnspat}),
#' while \code{euc.dist} needs two arguments to find the distances between.
#' For two data matrices \code{A} and \code{B},
#' \code{dist(rbind(as.vector(A),as.vector(B)))} and \code{euc.dist(A,B)} yield the same result.
#'
#' @param x,y Vectors, matrices or data frames (both should be of the same type).
#'
#' @return Euclidean distance between \code{x} and \code{y}
#'
#' @seealso \code{\link[stats]{dist}} from the base package \code{stats} and
#' \code{\link[pcds]{Dist}} from the package \code{pcds}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' B<-c(1,0); C<-c(1/2,sqrt(3)/2);
#' euc.dist(B,C);
#' euc.dist(B,B);
#'
#' x<-runif(10)
#' y<-runif(10)
#' euc.dist(x,y)
#'
#' xm<-matrix(x,ncol=2)
#' ym<-matrix(y,ncol=2)
#' euc.dist(xm,ym)
#'
#' euc.dist(xm,xm)
#'
#' dat.fr<-data.frame(b=B,c=C)
#' euc.dist(dat.fr,dat.fr)
#' euc.dist(dat.fr,cbind(B,C))
#'
#' @export
euc.dist <- function(x,y)
{
x<-as.matrix(x)
y<-as.matrix(y)
dis<-sqrt(sum((x-y)^2))
dis
} #end of the function
#'
#################################################################
#' @title The incidence matrix \code{W} for the NN digraph
#'
#' @description
#' Returns the \eqn{W=(w_ij)} matrix which is used to compute \eqn{Q}, \eqn{R} and \eqn{T} values in the NN structure.
#' \eqn{w_{ij}=I(} point eqn{j} is a NN of point \eqn{i))} i.e. \eqn{w_{ij}=1} if point \eqn{j} is a NN of point \eqn{i} and 0 otherwise.
#'
#' The argument \code{ties} is a logical argument (default=\code{FALSE}) to take ties into account or not. If \code{TRUE} the function
#' takes ties into account by making \eqn{w_{ij}=1/m} if point \eqn{j} is a NN of point \eqn{i}
#' and there are \eqn{m} tied NNs and 0 otherwise. If \code{FALSE}, \eqn{w_{ij}=1} if point \eqn{j} is a NN of point \eqn{i} and 0 otherwise.
#' The matrix \eqn{W} is equivalent to \eqn{A=(a_{ij})} matrix with \eqn{k=1}, i.e., \code{Wmat(X)=aij.mat(X,k=1)}.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param ties A logical parameter (default=\code{FALSE}) to take ties into account in computing the \eqn{W} matrix,
#' so if it is \code{TRUE}, \eqn{w_{ij}=1/m} if point \eqn{j} is a NN of point \eqn{i} and there are \eqn{m} tied NNs and 0 otherwise
#' and if \code{FALSE}, \eqn{w_{ij}=1} if point \eqn{j} is a NN of point \eqn{i} and 0 otherwise.
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return The incidence matrix \eqn{W=(w_ij)} where \eqn{w_{ij}=I(} point eqn{j} is a NN of point \eqn{i))},
#' i.e. \eqn{w_{ij}=1} if point \eqn{j} is a NN of point \eqn{i} and 0 otherwise.
#'
#' @seealso \code{\link{aij.mat}}, \code{\link{aij.nonzero}}, and \code{\link{aij.theta}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-3
#' X<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(X)
#' Wmat(ipd)
#' Wmat(X,is.ipd = FALSE)
#'
#' n<-5
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' Wmat(ipd)
#' Wmat(Y,is.ipd = FALSE)
#' Wmat(Y,is.ipd = FALSE,method="max")
#'
#' Wmat(Y,is.ipd = FALSE)
#' aij.mat(Y,k=1)
#'
#' #1D data points
#' X<-as.matrix(runif(5)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' Wmat(ipd)
#' Wmat(X,is.ipd = FALSE)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(15)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' Wmat(ipd,ties=TRUE)
#' Wmat(Y,ties=TRUE,is.ipd = FALSE)
#'
#' @export
Wmat <- function(x,ties=FALSE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
W<-matrix(0,n,n)
for (i in 1:n)
{
if (ties==FALSE)
{
W[i,ipd[i,]==sort(ipd[i,])[2]]<-1
} else
{
mindis<-sort(ipd[i,])[2]
ind.md<-which(ipd[i,]==mindis)
lties<-sum(ind.md!=i)
W[i,ind.md]<-1/lties
}
W[i,i]<-0
}
W
} #end for the function
#################################################################
# funsQandR
#'
#' @title Functions for the Number of Shared NNs, Shared NN vector and the number of reflexive NNs
#'
#' @description
#' Four functions: \code{Qval}, \code{Qvec}, \code{sharedNN} and \code{Rval}.
#'
#' \code{Qval} returns the \eqn{Q} value, the number of points with shared nearest neighbors (NNs), which occurs when two or
#' more points share a NN, for data in any dimension.
#'
#' \code{Qvec} returns the Q-value and also yields the Qv vector \eqn{Qv=(Q_0,Q_1,\ldots)} as well for data in any
#' dimension, where \eqn{Q_j} is the number of points shared as a NN by \eqn{j} other points.
#'
#' \code{sharedNN} returns the \code{vector} of number of points with shared NNs, \eqn{Q=(Q_0,Q_1,\ldots)} where \eqn{Q_i} is
#' the number of points that are NN to \eqn{i} points, and if a point is a NN of \eqn{i} points, then there are \eqn{i(i-1)}
#' points that share a NN. So \eqn{Q=\sum_{i>1} i(i-1)Q_i}.
#'
#' \code{Rval} returns the number of reflexive NNs, R (i.e., twice the number of reflexive NN pairs).
#'
#' These quantities are used, e.g., in computing the variances and covariances of the entries of the
#' nearest neighbor contingency tables used for Dixon's tests and other NNCT tests.
#' The input must be the incidence matrix, \eqn{W}, of the NN digraph.
#'
#' @param W The incidence matrix, \eqn{W}, for the NN digraph
#'
#' @return \code{Qval} returns the \eqn{Q} value
#' \code{Qvec} returns a \code{list} with two elements
#' \item{q}{the \eqn{Q} value, the number of shared NNs}
#' \item{qvec}{the \code{vector} of \eqn{Q_j} values}
#' \code{sharedNN} returns a \code{matrix} with 2 rows, where first row is the \eqn{j} values and second row is
#' the corresponding vector of \eqn{Q_j} values
#' \code{Rval}{the \eqn{R} value, the number of reflexive NNs}
#'
#' See the description above for the details of these quantities.
#'
#' @seealso \code{\link{Tval}}, \code{\link{QRval}}, \code{\link{sharedNNmc}} and \code{\link{Ninv}}
#'
#' @name funsQandR
NULL
#'
#' @rdname funsQandR
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #Examples for Qval
#' #3D data points
#' n<-10
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' Qval(W)
#'
#' #1D data points
#' X<-as.matrix(runif(10)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(10) would not work
#' ipd<-ipd.mat(X)
#' W<-Wmat(ipd)
#' Qval(W)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(15)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd,ties=TRUE)
#' Qval(W)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(15)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd,ties=TRUE)
#' Qval(W)
#'
#' @export
Qval <- function(W)
{
W<-ceiling(W) #this is to correct for the ties in W matrix
a<-colSums(W)
Q<-sum(a*a-a)
Q
} #end for the function
#'
#' @rdname funsQandR
#'
#' @examples
#' #Examples for Qvec
#' #3D data points
#' n<-10
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' Qvec(W)
#'
#' #2D data points
#' n<-15
#' Y<-matrix(runif(2*n),ncol=2)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' Qvec(W)
#'
#' #1D data points
#' X<-as.matrix(runif(15)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(15) would not work
#' ipd<-ipd.mat(X)
#' W<-Wmat(ipd)
#' Qvec(W)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(15)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd,ties=TRUE)
#' Qvec(W)
#'
#' @export
Qvec <- function(W)
{
W<-ceiling(W) #this is to correct for the ties in W matrix
colsumW<-colSums(W)
tb.csW<-table(colsumW)
Nv<-as.numeric(tb.csW)
Nlen<-length(Nv)
Nfac<-(1:Nlen-1)*(1:Nlen-2) #this is the i*(i-1) vector
qval<-sum(Nfac*Nv)
list(q=qval,qvec=Nv)
} #end for the function
#'
#' @rdname funsQandR
#'
#' @examples
#' #Examples for sharedNN
#' #3D data points
#' n<-10
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' sharedNN(W)
#' Qvec(W)
#'
#' #1D data points
#' X<-as.matrix(runif(15)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' W<-Wmat(ipd)
#' sharedNN(W)
#' Qvec(W)
#'
#' #2D data points
#' n<-15
#' Y<-matrix(runif(2*n),ncol=2)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' sharedNN(W)
#' Qvec(W)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd,ties=TRUE)
#' sharedNN(W)
#'
#' @export
sharedNN <- function(W)
{
W<-ceiling(W) #this is to correct for the ties in W matrix
CS<-colSums(W)
tb.cs<-table(CS)
sh.nn1<-as.numeric(labels(tb.cs)[[1]])
sh.nn2<-as.numeric(tb.cs)
res<-rbind(sh.nn1,sh.nn2)
row.names(res)<-c()
res
} #end for the function
#'
#' @rdname funsQandR
#'
#' @examples
#' #Examples for Rval
#' #3D data points
#' n<-10
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' Rval(W)
#'
#' #1D data points
#' X<-as.matrix(runif(15)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' W<-Wmat(ipd)
#' Rval(W)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd,ties=TRUE)
#' Rval(W)
#'
#' @export
Rval <- function(W)
{
W<-ceiling(W) #this is to correct for the ties in W matrix
sum(W*t(W))
} #end for the function
#'
#################################################################
#' @title Vector of Shared NNs and Number of Reflexive NNs
#'
#' @description Returns the \code{Qvec} and \code{R} where \eqn{Qvec=(Q_0,Q_1,\ldots)} with
#' \eqn{Q_j} is the number of points shared as a NN
#' by \eqn{j} other points i.e. number of points that are NN of \eqn{i} points, for \eqn{i=0,1,2,\ldots}
#' and \code{R} is the number of reflexive pairs where A and B are reflexive iff they are NN to each other.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return Returns a \code{list} with two elements
#' \item{Qvec}{vector of \eqn{Q_j} values}
#' \item{R}{number of reflexive points}
#'
#' @seealso \code{\link{Qval}}, \code{\link{Qvec}}, \code{\link{sharedNN}}, \code{\link{Rval}}
#' and \code{\link{QRval}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' sharedNN(W)
#' Qvec(W)
#' Ninv(ipd)
#' Ninv(Y,is.ipd = FALSE)
#' Ninv(Y,is.ipd = FALSE,method="max")
#'
#' #1D data points
#' n<-15
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' W<-Wmat(ipd)
#' sharedNN(W)
#' Qvec(W)
#' Ninv(ipd)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ny<-nrow(Y)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' sharedNN(W)
#' Qvec(W)
#' Ninv(ipd)
#'
#' @export
Ninv <- function(x,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
inv.nn <- rep(0,n);
Rf <- 0;
NN.ls <- list()
for (i in 1:n)
NN.ls[[i]] <- NN(ipd,i); #list of indices of NNs of subject i
for (k in 1:n)
{
NN.indk<-NN.ls[[k]]
L<-length(NN.indk)
for (j in 1:L)
{
inv.nn[NN.indk[j]] <- inv.nn[NN.indk[j]]+1
if ( sum(k==NN.ls[[NN.indk[j] ]])>0 )
Rf<- Rf+1
}
}
tb.inv.nn<-table(inv.nn)
Nv<-as.numeric(tb.inv.nn)
list(Qvec=Nv,
R=Rf
)
} #end for the function
#'
#################################################################
#' @title Number of Shared and Reflexive NNs
#'
#' @description Returns the \eqn{Q} and \eqn{R} values where \eqn{Q} is the number of points shared as a NN
#' by other points i.e. number of points that are NN of other points (which occurs when two or
#' more points share a NN, for data in any dimension) and \eqn{R} is the number of reflexive pairs
#' where A and B are reflexive iff they are NN to each other.
#'
#' These quantities are used, e.g., in computing the variances and covariances of the entries of the
#' nearest neighbor contingency tables used for Dixon's tests and other NNCT tests.
#'
#' @param njr A \code{list} that is the output of \code{\link{Ninv}} (with first entry in the \code{list} is \code{vector} of number of shared NNs
#' and second is the \eqn{R} value, number of reflexive points)
#'
#' @return A \code{list} with two elements
#' \item{Q}{the \eqn{Q} value, the number of shared NNs}
#' \item{R}{the \eqn{R} value, the number of reflexive NNs}
#'
#' @seealso \code{\link{Qval}}, \code{\link{Qvec}}, \code{\link{sharedNN}}, \code{\link{Rval}}
#' and \code{\link{Ninv}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' ninv<-Ninv(ipd)
#' QRval(ninv)
#' W<-Wmat(ipd)
#' Qvec(W)$q
#'
#' #1D data points
#' n<-15
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' ninv<-Ninv(ipd)
#' QRval(ninv)
#' W<-Wmat(ipd)
#' Qvec(W)$q
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ny<-nrow(Y)
#' ipd<-ipd.mat(Y)
#' ninv<-Ninv(ipd)
#' QRval(ninv)
#' W<-Wmat(ipd)
#' Qvec(W)$q
#'
#' @export
QRval <- function(njr)
{
nj<-njr[[1]]
njlen<-length(nj)
njfac<-(1:njlen-1)*(1:njlen-2) #this is the i*(i-1) vector
q<-sum(njfac*nj)
r<-njr[[2]]
list(Q=q,R=r)
} #end for the function
#'
#################################################################
#' @title \eqn{T} value in NN structure
#'
#' @description Returns the \eqn{T} value, which is the number of triplets \eqn{(z_i, z_j, z_k)} with
#' "\eqn{NN(z_i) = NN(z_j) = z_k} and \eqn{NN(z_k) = z_j}" where \eqn{NN(\cdot)} is the nearest neighbor function.
#' Note that in the NN digraph, \eqn{T+R} is the sum of the indegrees of the points in the reflexive pairs.
#'
#' This quantity (together with \eqn{Q} and \eqn{R}) is used in computing the variances and covariances of the entries of the
#' reflexivity contingency table. See (\insertCite{ceyhan:NNreflexivity2017;textual}{nnspat}) for further
#' details.
#'
#' @param W The incidence matrix, \eqn{W}, for the NN digraph
#' @param R The number of reflexive NNs (i.e., twice the number of reflexive NN pairs)
#'
#' @return Returns the \eqn{T} value. See the description above for the details of this quantity.
#'
#' @seealso \code{\link{Qval}}, \code{\link{Qvec}}, \code{\link{sharedNN}} and \code{\link{Rval}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-10
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd)
#' R<-Rval(W)
#' Tval(W,R)
#'
#' #1D data points
#' X<-as.matrix(runif(15)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' W<-Wmat(ipd)
#' R<-Rval(W)
#' Tval(W,R)
#'
#' #with ties=TRUE in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' W<-Wmat(ipd,ties=TRUE)
#' R<-Rval(W)
#' Tval(W,R)
#'
#' @export
Tval <- function(W,R)
{
W<-ceiling(W) #this is to correct for the ties in W matrix
n<-sum(W)
ref.ind<-which(as.vector(W*t(W)>0))
col.ref.ind<-ceiling(ref.ind/n)
sum(W[,col.ref.ind])
RT<-sum(W[,col.ref.ind]) #sum of the indegrees of points in the reflexive pairs
Tval<-RT-R
Tval
} #end for the function
#'
#################################################################
#' @title Finding the index of the NN of a given point
#'
#' @description
#' Returns the index (or indices) of the nearest neighbor(s) of subject \eqn{i} given data set or IPD matrix \code{x}.
#' It will yield a \code{vector} if there are ties, and subject indices correspond to rows (i.e. rows \code{1:n} ) if \code{x}
#' is the data set and to rows or columns if \code{x} is the IPD matrix.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param i index of (i.e., row number for) the subject whose NN is to be found.
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return Returns the index (indices) i.e. row number(s) of the NN of subject \eqn{i}
#'
#' @seealso \code{\link{kNN}} and \code{\link{NNsub}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' NN(ipd,1)
#' NN(Y,1,is.ipd = FALSE)
#' NN(ipd,5)
#' NN(Y,5,is.ipd = FALSE)
#' NN(Y,5,is.ipd = FALSE,method="max")
#'
#' #1D data points
#' X<-as.matrix(runif(15)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' NN(ipd,1)
#' NN(ipd,5)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ny<-nrow(Y)
#' ipd<-ipd.mat(Y)
#' for (i in 1:ny)
#' cat(i,":",NN(ipd,i),"|",NN(Y,i,is.ipd = FALSE),"\n")
#'
#' @export
NN <- function(x,i,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
if (n<=1 || i>n)
{labind<-NA
return(labind)}
D<-max(ipd)
ind <- 1:ncol(ipd)
dis.row <- ipd[i,]
min.dis <- min(dis.row[-i])
dis.row[i] <- 2*D #to avoid if min distance is zero!
labind <- ind[dis.row==min.dis]
labind
} #end for the function
#'
#################################################################
#' @title Finding the indices of the \code{k} NNs of a given point
#'
#' @description
#' Returns the indices of the \code{k} nearest neighbors of subject \eqn{i} given data set or IPD matrix \code{x}.
#' Subject indices correspond to rows (i.e. rows \code{1:n} ) if \code{x} is the data set and to rows or columns
#' if \code{x} is the IPD matrix.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @inheritParams NN
#' @param k Integer specifying the number of NNs (of subject \eqn{i}).
#'
#' @return Returns the indices (i.e. row numbers) of the \code{k} NNs of subject \eqn{i}
#'
#' @seealso \code{\link{NN}}, \code{\link{NNdist}} and \code{\link{NNdist2cl}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' k<-sample(1:5,1)
#' k
#' NN(ipd,1)
#' kNN(ipd,1,k)
#' kNN(Y,1,k,is.ipd = FALSE)
#' kNN(Y,1,k,is.ipd = FALSE,method="max")
#'
#' NN(ipd,5)
#' kNN(ipd,5,k)
#' kNN(Y,5,k,is.ipd = FALSE)
#'
#' #1D data points
#' X<-as.matrix(runif(15)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' kNN(ipd,3,k)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(30)*10),ncol=3)
#' ny<-nrow(Y)
#' ipd<-ipd.mat(Y)
#' for (i in 1:ny)
#' cat(i,":",kNN(ipd,i,k),"\n")
#'
#' @export
kNN <- function(x,i,k,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
if (n<=1 || max(i,k+1)>n)
{knndis<-NA
return(knndis)}
D<-max(ipd)
ind <- 1:ncol(ipd)
dis.row <- ipd[i,]
knndis <- order(dis.row)[2:(k+1)]
knndis
} #end for the function
#'
#################################################################
#' @title Distances between subjects and their NNs
#'
#' @description
#' Returns the distances between subjects and their NNs. The output is an \eqn{n \times 2} matrix where \eqn{n} is the data size
#' and first column is the subject index and second column contains the corresponding distances to NN subjects.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return Returns an \eqn{n \times 2} matrix where \eqn{n} is data size (i.e. number of subjects) and first column is the subject
#' index and second column is the NN distances.
#'
#' @seealso \code{\link{kthNNdist}}, \code{\link{kNNdist}}, and \code{\link{NNdist2cl}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' NNdist(ipd)
#' NNdist(Y,is.ipd = FALSE)
#' NNdist(Y,is.ipd = FALSE,method="max")
#'
#' #1D data points
#' X<-as.matrix(runif(5)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' NNdist(ipd)
#' NNdist(X,is.ipd = FALSE)
#'
#' @export
NNdist <- function(x,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
if (n<=1)
{min.dis<-NA
return(c(1,min.dis))}
min.dis<-vector()
for (i in 1:n)
{
dis.row <- ipd[i,]
min.dis <- c(min.dis,min(dis.row[-i]))
}
subj.ind<-1:n
cbind(subj.ind,min.dis)
} #end for the function
#'
#################################################################
# funs.kNNdist
#'
#' @title Functions for the \eqn{k^{th}} and \code{k} NN distances
#'
#' @description
#' Two functions: \code{kthNNdist} and \code{kNNdist}.
#'
#' \code{kthNNdist} returns the distances between subjects and their \eqn{k^{th}} NNs. The output is an \eqn{n \times 2} matrix where
#' \eqn{n} is the data size and first column is the subject index and second column contains the corresponding
#' distances to \eqn{k^{th}} NN subjects.
#'
#' \code{kNNdist} returns the distances between subjects and their \code{k} NNs.
#' The output is an \eqn{n \times (k+1)} matrix where
#' \eqn{n} is the data size and first column is the subject index and the remaining \code{k} columns contain the corresponding
#' distances to \code{k} NN subjects.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param k Integer specifying the number of NNs (of subjects).
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return \code{kthNNdist} returns an \eqn{n \times 2} matrix where \eqn{n} is data size (i.e. number of subjects) and
#' first column is the subject index and second column is the \eqn{k^{th}} NN distances.
#'
#' \code{kNNdist} returns an \eqn{n \times (k+1)} matrix where \eqn{n} is data size (i.e. number of subjects) and
#' first column is the subject index and the remaining \code{k} columns contain the corresponding
#' distances to \code{k} NN subjects.
#'
#' @seealso \code{\link{NNdist}} and \code{\link{NNdist2cl}}
#'
#' @name funs.kNNdist
NULL
#'
#' @rdname funs.kNNdist
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #Examples for kthNNdist
#' #3D data points, gives NAs when n<=k
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' kthNNdist(ipd,3)
#' kthNNdist(Y,3,is.ipd = FALSE)
#' kthNNdist(ipd,5)
#' kthNNdist(Y,5,is.ipd = FALSE)
#' kthNNdist(Y,3,is.ipd = FALSE,method="max")
#'
#' #1D data points
#' X<-as.matrix(runif(5)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' kthNNdist(ipd,3)
#'
#' @export
kthNNdist <- function(x,k,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
kth.dis<-vector()
for (i in 1:n)
{
dis.row <- ipd[i,]
kth.dis <- c(kth.dis,dis.row[order(dis.row)[k+1]] )
}
subj.ind<-1:n
res<-list(
kth.nndist=cbind(subj.ind,kth.dis),
k=k)
res
} #end for the function
#'
#' @rdname funs.kNNdist
#'
#' @examples
#' #Examples for kNNdist
#' #3D data points, gives NAs if n<=k for n,n+1,...,kNNs
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' kNNdist(ipd,3)
#' kNNdist(ipd,5)
#' kNNdist(Y,5,is.ipd = FALSE)
#'
#' kNNdist(Y,5,is.ipd = FALSE,method="max")
#'
#' kNNdist(ipd,1)
#' kthNNdist(ipd,1)
#'
#' #1D data points
#' X<-as.matrix(runif(5)) # need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(5) would not work
#' ipd<-ipd.mat(X)
#' kNNdist(ipd,3)
#'
#' @export
kNNdist <- function(x,k,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
kdist<-vector()
for (i in 1:n)
{
dist.row <- ipd[i,]
kdist <- rbind(kdist,dist.row[order(dist.row)[2:(k+1)]] )
}
res<-list(
knndist=cbind(1:n,kdist),
k=k)
res
} #end for the function
#'
#################################################################
#' @title Distances between subjects from class \eqn{i} and their NNs from class \eqn{j}
#'
#' @description
#' Returns the distances between subjects from class \eqn{i} and their nearest neighbors (NNs) from class \eqn{j}.
#' The output is a \code{list} with first entry (\code{nndist}) being an \eqn{n_i \times 3} matrix where \eqn{n_i} is the size of class \eqn{i}
#' and first column is the subject index in class \eqn{i}, second column is the subject index in NN class \eqn{j},
#' and third column contains the corresponding distances of each class \eqn{i} subject to its NN among class \eqn{j}
#' subjects. Class \eqn{i} is labeled as base class and class \eqn{j} is labeled as NN class.
#'
#' The argument \code{within.class.ind} is a logical argument (default=\code{FALSE}) to determine the indexing of
#' the class \eqn{i} subjects. If \code{TRUE}, index numbering of subjects is within the class,
#' from 1 to class size (i.e., \code{1:n_i}), according to their order in the original data;
#' otherwise, index numbering within class is just the indices in the original data.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param i,j class label of base class and NN classes, respectively.
#' @param lab The \code{vector} of class labels (numerical or categorical)
#' @param within.class.ind A logical parameter (default=\code{FALSE}). If \code{TRUE}, index numbering of subjects
#' is within the class, from 1 to class size (i.e., \code{1:n_i}), according to their order in the original data;
#' otherwise, index numbering within class is just the indices in the original data.
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return Returns a \code{list} with three elements
#' \item{nndist}{\eqn{n_i \times 3} matrix where \eqn{n_i} is the size of class \eqn{i} and first column is the subject index in
#' class \eqn{i}, second column is the subject index in NN class \eqn{j}, and third column contains the corresponding
#' distances of each class \eqn{i} subject to its NN among class \eqn{j} subjects.}
#' \item{base.class}{label of base class}
#' \item{nn.class}{label of NN class}
#'
#' @seealso \code{\link{kthNNdist}}, \code{\link{kNNdist}}, and \code{\link{NNdist2cl}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' table(clab)
#' NNdist2cl(ipd,1,2,clab)
#' NNdist2cl(Y,1,2,clab,is.ipd = FALSE)
#'
#' NNdist2cl(ipd,1,2,clab,within = TRUE)
#'
#' #three class case
#' clab<-sample(1:3,n,replace=TRUE) #class labels
#' table(clab)
#' NNdist2cl(ipd,2,1,clab)
#'
#' #1D data points
#' n<-15
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' table(clab)
#' NNdist2cl(ipd,1,2,clab)
#' NNdist2cl(X,1,2,clab,is.ipd = FALSE)
#'
#' @export
NNdist2cl <- function(x,i,j,lab,within.class.ind=FALSE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
if (any(lab==i)*any(lab==j)==0)
{stop('given labels i and j are not among the class labels')}
if (within.class.ind==FALSE)
{
ns<-length(lab) #sample size of data for which IPDM is computed
i.ind<-(1:ns)[lab==i]
j.ind<-(1:ns)[lab==j]
ni<-length(i.ind)
} else
{
ni<-sum(lab==i)
nj<-sum(lab==j)
i.ind<-1:ni
j.ind<-1:nj
}
ipdij<-matrix(ipd[lab==i,lab==j],nrow=ni)
min.dist<-nnj.ind<-vector()
for (ind in 1:ni)
{
dist.row <- ipdij[ind,]
min.dist.row<-min(dist.row)
nnj.ind<-c(nnj.ind,j.ind[which(dist.row==min.dist.row)])
min.dist <- c(min.dist,min.dist.row)
}
res<-list(
nndist=cbind(i.ind,nnj.ind,min.dist),
base.class=i,
nn.class=j)
res
} #end for the function
#'
#################################################################
# funs.kNNdist2cl
#'
#' @title Functions for the \eqn{k^{th}} and \code{k} NN distances
#'
#' @description
#' Two functions: \code{kthNNdist2cl} and \code{kNNdist2cl}.
#'
#' \code{kthNNdist2cl} returns the distances between subjects from class \eqn{i} and their \eqn{k^{th}} NNs from class \eqn{j}.
#' The output is a \code{list} with first entry (\code{kth.nndist}) is an \eqn{n_i \times 3} matrix where \eqn{n_i} is the size of class \eqn{i}
#' and first column is the subject index for class \eqn{i},
#' second column is the index of the \eqn{k^{th}} NN of class \eqn{i} subjects among class \eqn{j} subjects and third column
#' contains the corresponding \eqn{k^{th}} NN distances. The other entries in the \code{list} are labels of base class and NN class
#' and the value of \code{k}, respectively.
#'
#' \code{kNNdist2cl} returns the distances between subjects from class \eqn{i} and their \code{k} NNs from class \eqn{j}.
#' The output is a \code{list} with first entry (\code{ind.knndist}) is an \eqn{n_i \times (k+1)} matrix where \eqn{n_i} is the size of class \eqn{i},
#' first column is the indices of class \eqn{i} subjects, 2nd to \eqn{(k+1)}-st columns are the indices of \code{k} NNs of class \eqn{i}
#' subjects among class \eqn{j} subjects. The second \code{list} entry (\code{knndist}) is an \eqn{n_i \times k} matrix where \eqn{n_i} is the
#' size of class \eqn{i} and the columns are the \code{k}NN distances of class \eqn{i} subjects to class \eqn{j} subjects.
#' The other entries in the \code{list} are labels of base class and NN class and the value of \code{k}, respectively.
#'
#' The argument \code{within.class.ind} is a logical argument (default=\code{FALSE}) to determine the indexing of the class \eqn{i}
#' subjects. If \code{TRUE}, index numbering of subjects is within the class, from 1 to class size (i.e., \code{1:n_i}),
#' according to their order in the original data; otherwise, index numbering within class is just the indices
#' in the original data.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param k Integer specifying the number of NNs (of subjects).
#' @param i,j class label of base class and NN classes, respectively.
#' @param lab The \code{vector} of class labels (numerical or categorical)
#' @param within.class.ind A logical parameter (default=\code{FALSE}). If \code{TRUE}, index numbering of subjects is within the class, from 1 to class size (i.e., \code{1:n_i}),
#' according to their order in the original data; otherwise, index numbering within class is just the indices
#' in the original data.
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return \code{kthNNdist2cl} returns the \code{list} of elements
#' \item{kth.nndist}{\eqn{n_i \times 3} matrix where \eqn{n_i} is the size of class \eqn{i}
#' and first column is the subject index for class \eqn{i}, second column is the index of the \code{k}-th NN of class \eqn{i}
#' subjects among class \eqn{j} subjects and third column contains the corresponding \code{k}-th NN distances,
#' , returned by \code{Zseg.ind.ct} only}
#' \item{base.class}{label of base class}
#' \item{nn.class}{label of NN class}
#' \item{k}{value of \code{k} in \code{k}NN}
#'
#' \code{kNNdist2cl} returns the \code{list} of elements
#' \item{ind.knndist}{\eqn{n_i \times (k+1)} matrix where \eqn{n_i} is the size of class \eqn{i}, first column is the indices of class \eqn{i}
#' subjects, 2nd to \eqn{(k+1)}-st columns are the indices of \eqn{k} NNs of class \eqn{i} subjects among class \eqn{j} subjects.}
#' \item{knndist}{\eqn{n_i \times k} matrix where \eqn{n_i} is the size of class \eqn{i} and the columns are the \eqn{k}NN distances of class \eqn{i}
#' subjects to class \eqn{j} subjects.}
#' \item{base.class}{label of base class}
#' \item{nn.class}{label of NN class}
#' \item{k}{value of \code{k} in \code{k}NN}
#'
#' @seealso \code{\link{NNdist2cl}}, \code{\link{kthNNdist}} and \code{\link{kNNdist}}
#'
#' @name funs.kNNdist2cl
NULL
#'
#' @rdname funs.kNNdist2cl
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #Examples for kthNNdist2cl
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' table(clab)
#' kthNNdist2cl(ipd,3,1,2,clab)
#' kthNNdist2cl(Y,3,1,2,clab,is.ipd = FALSE)
#' kthNNdist2cl(ipd,3,1,2,clab,within = TRUE)
#'
#' #three class case
#' clab<-sample(1:3,n,replace=TRUE) #class labels
#' table(clab)
#' kthNNdist2cl(ipd,3,2,3,clab)
#'
#' #1D data points
#' n<-15
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' table(clab)
#' kthNNdist2cl(ipd,3,1,2,clab) # here kthNNdist2cl(ipd,3,1,12,clab) #gives an error message
#'
#' kthNNdist2cl(ipd,3,"1",2,clab)
#'
#' @export
kthNNdist2cl <- function(x,k,i,j,lab,within.class.ind=FALSE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
if (any(lab==i)*any(lab==j)==0)
{stop('given labels i and j are not among the class labels')}
if (within.class.ind==FALSE)
{
ns<-length(lab) #sample size of data for which IPDM is computed
i.ind<-(1:ns)[lab==i]
j.ind<-(1:ns)[lab==j]
ni<-length(i.ind)
} else
{
ni<-sum(lab==i)
nj<-sum(lab==j)
i.ind<-1:ni
j.ind<-1:nj
}
ipdij<-matrix(ipd[lab==i,lab==j],nrow=ni)
kth.dist<-kth.nnj.ind<-vector()
for (ind in 1:ni)
{
dist.row <- ipdij[ind,]
ord<-order(dist.row)
kth.nnj.ind<-c(kth.nnj.ind,j.ind[ord[k]])
kth.dist <- c(kth.dist,dist.row[ord[k]] )
}
res<-list(
kth.nndist=cbind(i.ind,kth.nnj.ind,kth.dist),
base.class=i,
nn.class=j,
k=k)
res
} #end for the function
#'
#' @rdname funs.kNNdist2cl
#'
#' @examples
#' #Examples for kNNdist2cl
#' #3D data points
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' table(clab)
#' kNNdist2cl(ipd,3,1,2,clab)
#' kNNdist2cl(Y,3,1,2,clab,is.ipd = FALSE)
#'
#' kNNdist2cl(ipd,3,1,2,clab,within = TRUE)
#'
#' #three class case
#' clab<-sample(1:3,n,replace=TRUE) #class labels
#' table(clab)
#' kNNdist2cl(ipd,3,1,2,clab)
#'
#' #1D data points
#' n<-15
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' table(clab)
#'
#' kNNdist2cl(ipd,3,1,2,clab)
#' kNNdist2cl(ipd,3,"1",2,clab) #here kNNdist2cl(ipd,3,"a",2,clab) #gives an error message
#'
#' @export
kNNdist2cl <- function(x,k,i,j,lab,within.class.ind=FALSE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
if (any(lab==i)*any(lab==j)==0)
{stop('given labels for i and j are not among the class labels')}
if (within.class.ind==FALSE)
{
ns<-length(lab) #sample size of data for which IPDM is computed
i.ind<-(1:ns)[lab==i]
j.ind<-(1:ns)[lab==j]
ni<-length(i.ind)
} else
{
ni<-sum(lab==i)
nj<-sum(lab==j)
i.ind<-1:ni
j.ind<-1:nj
}
ipdij<-matrix(ipd[lab==i,lab==j],nrow=ni)
n<-nrow(ipdij)
kdist<-nnj.ind<-vector()
for (ind in 1:n)
{
dist.row <- ipdij[ind,]
ord<-order(dist.row)
nnj.ind<-rbind(nnj.ind,j.ind[ord[1:k]])
kdist <- rbind(kdist,dist.row[ord[1:k]] )
}
res<-list(
ind.knndist=cbind(i.ind,nnj.ind),
knndist=kdist,
base.class=i,
nn.class=j,
k=k)
res
} #end for the function
#'
#################################################################
#' @title Finding the index of the NN of a given point among a subset of points
#'
#' @description
#' Returns the index (indices) of the nearest neighbor(s) of subject \eqn{i} (other than subject \eqn{i}) among the indices of points
#' provided in the subsample \code{ss} using the given data set or IPD matrix \code{x}. The indices in \code{ss} determine the
#' columns of the IPD matrix to be used in this function.
#' It will yield a \code{vector} if there are ties, and subject indices correspond to rows (i.e. rows \code{1:n} ) if \code{x}
#' is the data set and to rows or columns if \code{x} is the IPD matrix.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param ss indices of subjects (i.e., row indices in the data set) among with the NN of subject is to be found
#' @inheritParams NN
#'
#' @return Returns a \code{list} with the elements
#' \item{base.ind}{index of the base subject}
#' \item{ss.ind}{the index (indices) i.e. row number(s) of the NN of subject \eqn{i} among the subjects with indices
#' provided in \code{ss}}
#' \item{ss.dis}{distance from subject \eqn{i} to its NN among the subjects in \code{ss}}
#'
#' @seealso \code{\link{NN}} and \code{\link{kNN}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' #3D data points bura
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' #indices of the subsample ss
#' ss<-sample(1:n,floor(n/2),replace=FALSE)
#' NNsub(ss,ipd,2)
#' NNsub(ss,Y,2,is.ipd = FALSE)
#' NNsub(ss,ipd,5)
#'
#' #1D data points
#' n<-15
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' #two class case
#' clab<-sample(1:2,n,replace=TRUE) #class labels
#' #indices of the subsample ss
#' ss<-sample(1:n,floor(n/2),replace=FALSE)
#' NNsub(ss,ipd,2)
#' NNsub(ss,ipd,5)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(60)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' ss<-sample(1:20,10,replace=FALSE) #class labels
#' NNsub(ss,ipd,2)
#' NNsub(ss,ipd,5)
#'
#' @export
NNsub <- function(ss,x,i,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
if (n<=1 || i>n)
{
res<-list(base.ind=NA,
ss.ind=NA,
ss.dis=NA)
return(res)
}
if (sum(ss==i)==1)
{
ssi<-ss[ss!=i]
dis.vec<-ipd[i,ssi] #distances from i to points whose indices are in \code{ss}
min.dis<-min(dis.vec)
dlen<-sum(dis.vec==min.dis)
ord<-order(dis.vec)
ss.ind<-ss[ord[1:dlen]]
D<-dis.vec[ord[1:dlen]]
} else
{
dis.vec<-ipd[i,ss] #distances from i to points whose indices are in \code{ss}
min.dis<-min(dis.vec)
dlen<-sum(dis.vec==min.dis)
ord<-order(dis.vec)
ss.ind<-ss[ord[1:dlen]]
D<-dis.vec[ord[1:dlen]]
}
list(base.ind=i,
ss.ind=ss.ind,
ss.dis=D)
} #end for the function
#'
#################################################################
#' @title Nearest Neighbor Contingency Table (NNCT)
#'
#' @description
#' Returns the \eqn{k \times k} NNCT given the IPD matrix or data set \code{x} where \eqn{k} is
#' the number of classes in the data set.
#' Rows and columns of the NNCT are labeled with the corresponding class labels.
#'
#' The argument \code{ties} is a logical argument (default=\code{FALSE}) to take ties into account or not.
#' If \code{TRUE} a NN
#' contributes \eqn{1/m} to the NN count if it is one of the \eqn{m} tied NNs of a subject.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' See also (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:jkss-posthoc-2017;textual}{nnspat})
#' and the references therein.
#'
#' @param x The IPD matrix (if \code{is.ipd=TRUE}) or a data set of points in matrix or data frame form where points
#' correspond to the rows (if \code{is.ipd = FALSEALSE}).
#' @param lab The \code{vector} of class labels (numerical or categorical)
#' @param ties A logical argument (default=\code{FALSE}) to take ties into account or not. If \code{TRUE} a NN
#' contributes \eqn{1/m} to the NN count if it is one of the \eqn{m} tied NNs of a subject.
#' @param is.ipd A logical parameter (default=\code{TRUE}). If \code{TRUE}, \code{x} is taken as the inter-point distance
#' matrix, otherwise, \code{x} is taken as the data set with rows representing the data points.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#'
#' @return Returns the \eqn{k \times k} NNCT where \eqn{k} is the number of classes in the data set.
#'
#' @seealso \code{\link{nnct.sub}}, \code{\link{scct}}, \code{\link{rct}}, and \code{\link{tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct(ipd,cls)
#' nnct(ipd,cls,ties = TRUE)
#'
#' nnct(Y,cls,is.ipd = FALSE)
#' nnct(Y,cls,is.ipd = FALSE,method="max")
#' nnct(Y,cls,is.ipd = FALSE,method="mink",p=6)
#'
#' #with one class, it works but really uninformative
#' cls<-rep(1,n)
#' nnct(ipd,cls)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' nnct(ipd,fcls)
#'
#' #cls as an unsorted factor
#' fcls1<-sample(c("a","b"),n,replace = TRUE)
#' nnct(ipd,fcls1)
#'
#' fcls2<-sort(fcls1)
#' nnct(ipd,fcls2) #ipd needs to be sorted as well, otherwise this result will not agree with fcls1
#'
#' nnct(Y,fcls1,ties = TRUE,is.ipd = FALSE)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct(ipd,cls)
#' nnct(Y,cls,is.ipd = FALSE)
#'
#' #cls as a factor
#' fcls<-rep(letters[1:4],rep(10,4))
#' nnct(ipd,fcls)
#'
#' #1D data points
#' n<-20 #or try sample(1:20,1)
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct(ipd,cls)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' nnct(ipd,fcls)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(3*n)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct(ipd,cls)
#' nnct(ipd,cls,ties = TRUE)
#'
#' @export
nnct <- function(x,lab,ties=FALSE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
ord<-order(lab)#ordering the class label lab first
#(to be consistent with row and column labeling in the NNCT)
lab<-sort(lab)
ipd<-ipd[ord,ord]
flab<-as.factor(lab) #converting class labels to factors
clnames<-levels(flab)
k<-length(clnames)
ct<-matrix(0,k,k)
rownames(ct)<-colnames(ct)<-clnames #row and column names for the NNCT
if (n<=1)
{return(ct)}
nlab<-as.numeric(flab) #converting class labels to numbers
for(i in 1:n)
{
ind <- NN(ipd,i);
lind<-length(ind)
for (j in 1:lind)
{
addend<-ifelse(ties==FALSE,1,1/lind)
ct[nlab[i],nlab[ind[j]]]<- ct[nlab[i],nlab[ind[j]]] + addend
}
}
ct
} #end for the function
#'
#################################################################
#' @title Nearest Neighbor Contingency Table (NNCT) with (only) base points restricted to a subsample
#'
#' @description
#' Returns the \eqn{k \times k} NNCT with (only) base points are restricted to be in the subset of indices \code{ss} using
#' the IPD matrix or data set \code{x} where \eqn{k} is the number of classes in the data set. That is, the base points
#' are the points with indices in \code{ss} but for the NNs the function checks all the points in the data set
#' (including the points in \code{ss}).
#' Row and columns of the NNCT are labeled with the corresponding class labels.
#'
#' The argument \code{ties} is a logical argument (default=\code{FALSE}) to take ties into account or not. If \code{TRUE} a NN
#' contributes \eqn{1/m} to the NN count if it is one of the \eqn{m} tied NNs of a subject.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @param ss indices of subjects (i.e., row indices in the data set) chosen to be the base points
#' @inheritParams nnct
#'
#' @return Returns the \eqn{k \times k} NNCT where \eqn{k} is the number of classes in the data set with (only) base points
#' restricted to a subsample \code{ss}.
#'
#' @seealso \code{\link{nnct}} and \code{\link{nnct.boot.dis}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct(ipd,cls)
#'
#' #subsampling indices
#' ss<-sample(1:n,floor(n/2))
#' nnct.sub(ss,ipd,cls)
#' nnct.sub(ss,Y,cls,is.ipd = FALSE)
#' nnct.sub(ss,ipd,cls,ties = TRUE)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' nnct.sub(ss,ipd,fcls)
#'
#' #cls as an unsorted factor
#' fcls<-sample(c("a","b"),n,replace = TRUE)
#' nnct(ipd,fcls)
#' nnct.sub(ss,ipd,fcls)
#'
#' fcls<-sort(fcls)
#' nnct.sub(ss,ipd,fcls)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ss<-sample(1:40,30)
#' nnct.sub(ss,ipd,cls)
#'
#' #cls as a factor
#' fcls<-rep(letters[1:4],rep(10,4))
#' nnct.sub(ss,ipd,cls)
#'
#' #1D data points
#' n<-20 #or try sample(1:20,1)
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct(ipd,cls)
#'
#' #subsampling indices
#' ss<-sample(1:n,floor(n/2))
#' nnct.sub(ss,ipd,cls)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(120)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ss<-sample(1:40,30)
#' nnct.sub(ss,ipd,cls)
#' nnct.sub(ss,ipd,cls,ties = TRUE)
#'
#' @export
nnct.sub <- function(ss,x,lab,ties=FALSE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
N<-nrow(ipd) #sample size
ssvec<-rep(0,N)
ssvec[ss]<-ss
ord<-order(lab)
lab<-sort(lab) #ordering the class label lab first
#(to be consistent with row and column labeling in the NNCT)
ipd<-ipd[ord,ord]
ssvec<-ssvec[ord]
ss<-ssvec[ssvec>0]
flab<-as.factor(lab) #converting class labels to factors
clnames<-levels(flab)
k<-length(clnames)
nss<-length(ss) #length of the indices to select for NNCT construction
ct<-matrix(0,k,k)
rownames(ct)<-colnames(ct)<-clnames #row and column names for the NNCT
if (nss<=1)
{return(ct)}
nlab<-as.numeric(flab) #converting class labels to numbers
for(i in 1:nss)
{
ind <- NN(ipd,ss[i]);
lind<-length(ind)
for (j in 1:lind)
{
addend<-ifelse(ties==FALSE,1,1/lind)
ct[nlab[ss[i]],nlab[ind[j]]]<- ct[nlab[ss[i]],nlab[ind[j]]] + addend
}
}
ct
} #end for the function
#'
#################################################################
#' @title Bootstrap Nearest Neighbor Contingency Table (NNCT)
#'
#' @description
#' Returns the \eqn{k \times k} NNCT with sampling replacement of the points for each base point. That is, for each base
#' point, the rows in the IPD matrix are sampled with replacement and the NN counts are updated accordingly.
#' Row and columns of the NNCT are labeled with the corresponding class labels.
#'
#' The argument self is a logical argument (default=\code{TRUE}) for including the base point in the resampling or not.
#' If \code{TRUE}, for each base point all entries in the row are sampled (with replacement) so the point itself can
#' also be sampled multiple times and if \code{FALSE} the point is excluded from the resampling (i.e. other points
#' are sampled with replacement).
#'
#' The argument \code{ties} is a logical argument (default=\code{FALSE}) to take ties into account or not. If \code{TRUE} a NN
#' contributes \eqn{1/m} to the NN count if it is one of the \eqn{m} tied NNs of a subject.
#'
#' The argument \code{is.ipd} is a logical argument (default=\code{TRUE}) to determine the structure of the argument \code{x}.
#' If \code{TRUE}, \code{x} is taken to be the inter-point distance (IPD) matrix, and if \code{FALSE}, \code{x} is taken to be the data set
#' with rows representing the data points.
#'
#' @inheritParams nnct
#' @param self A logical argument (default=\code{TRUE}). If \code{TRUE}, for each base point, all entries in the
#' row are sampled (with replacement) and if \code{FALSE} the point is excluded from the resampling (i.e. other points
#' are sampled with replacement).
#'
#' @return Returns the \eqn{k \times k} NNCT where \eqn{k} is the number of classes in the data set with sampling replacement
#' of the rows of the IPD matrix.
#'
#' @seealso \code{\link{nnct}} and \code{\link{nnct.sub}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct.boot.dis(ipd,cls)
#' nnct.boot.dis(Y,cls,is.ipd = FALSE) #may give different result from above due to random sub-sampling
#' nnct.boot.dis(ipd,cls,self = FALSE)
#' nnct.boot.dis(ipd,cls,ties = FALSE) #differences are due to ties and resampling of distances
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' nnct.boot.dis(ipd,fcls)
#'
#' #cls as an unsorted factor
#' fcls<-sample(c("a","b"),n,replace = TRUE)
#' nnct.boot.dis(ipd,fcls)
#'
#' fcls<-sort(fcls)
#' nnct.boot.dis(ipd,fcls)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct.boot.dis(ipd,cls)
#'
#' #cls as a factor
#' fcls<-rep(letters[1:4],rep(10,4))
#' nnct.boot.dis(ipd,fcls)
#'
#' #1D data points
#' n<-20 #or try sample(1:20,1)
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct.boot.dis(ipd,cls)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' nnct.boot.dis(ipd,fcls)
#'
#' #with possible ties in the data
#' Y<-matrix(round(runif(3*n)*10),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' nnct.boot.dis(ipd,cls)
#' nnct.boot.dis(ipd,cls,self = FALSE)
#' nnct.boot.dis(ipd,cls,ties = FALSE) #differences are due to ties and resampling of distances
#'
#' @export
nnct.boot.dis <- function(x,lab,self=TRUE,ties=TRUE,is.ipd=TRUE,...)
{
ifelse(is.ipd,ipd<-x,ipd<-ipd.mat(x,...))
n<-nrow(ipd)
ord<-order(lab)#ordering the class label lab first
#(to be consistent with row and column labeling in the NNCT)
lab<-sort(lab)
ipd<-ipd[ord,ord]
flab<-as.factor(lab) #converting class labels to factors
clnames<-levels(flab)
k<-length(clnames)
ct<-matrix(0,k,k)
rownames(ct)<-colnames(ct)<-clnames #row and column names for the NNCT
if (n<=1)
{return(ct)}
# pr<-rep(1,n)
nlab<-as.numeric(flab) #converting class labels to numbers
for(i in 1:n)
{
pr<-rep(1,n)
if (self==FALSE) { pr[i]<-0 }
ss<-sample(1:n,replace=TRUE,prob=pr) # sampling with replacement of column indices of the ipd
ind <- NNsub(ss,ipd,i)$ss.ind;
lind<-length(ind)
for (j in 1:lind)
{
addend<-ifelse(ties==FALSE,1,1/lind)
ct[nlab[i],nlab[ind[j]]]<- ct[nlab[i],nlab[ind[j]]] + addend
}
}
ct
} #end for the function
#'
#################################################################
# funsOnevsRest
#'
#' @title Functions for one versus rest type labeling
#'
#' @description
#' Two functions: \code{lab.onevsrest} and \code{classirest}.
#'
#' Both functions relabel the points, keeping class \eqn{i} label as is and relabeling the other classes as "rest".
#' Used in the one-vs-rest type comparisons after the overall segregation test is found to be significant.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param i label of the class that is to be retained in the post-hoc comparison.
#' @param lab The \code{vector} of class labels (numerical or categorical)
#'
#' @return Both functions return the data relabeled as class \eqn{i} label is retained and the remaining is
#' relabeled as "rest".
#'
#' @seealso \code{\link{pairwise.lab}}
#'
#' @name funsOnevsRest
NULL
#'
#' @rdname funsOnevsRest
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' lab.onevsrest(1,cls)
#' classirest(2,cls)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' lab.onevsrest("a",fcls)
#' lab.onevsrest("b",fcls)
#' classirest("b",fcls)
#'
#' #cls as a factor
#' fcls<-rep(letters[1:4],rep(10,4))
#' lab.onevsrest("b",fcls)
#' classirest("b",fcls)
#'
#' @export
lab.onevsrest <- function(i,lab)
{
if (any(lab==i)==FALSE)
{stop('given label i is not among the class labels')}
lab[lab!=i]<-"rest"
lab
} #end for the function
#'
#' @rdname funsOnevsRest
#'
#' @export
classirest <- function(i,lab)
{
if (any(lab==i)==FALSE)
{stop('given label i is not among the class labels')}
lab[lab!=i]<-"rest"
lab
} #end for the function
#'
#################################################################
#' @title Keeping the pair of the specified labels in the data
#'
#' @description
#' Keeps only the specified labels \eqn{i} and \eqn{j} and returns the data from classes with these labes and also
#' the corresponding label vector having class labels \eqn{i} and \eqn{j} only.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point.
#' @param lab The \code{vector} of class labels (numerical or categorical)
#' @param i,j Label of the classes that are to be retained in the post-hoc comparison.
#'
#' @return A \code{list} with two elements
#' \item{data.pair}{The type of the pattern from which points are to be generated}
#' \item{lab.pair}{The \code{"main"} title for the plot of the point pattern}
#'
#' @seealso \code{\link{lab.onevsrest}} and \code{\link{classirest}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' pairwise.lab(Y,cls,1,2)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' pairwise.lab(Y,cls,2,3)
#'
#' #cls as a factor
#' fcls<-rep(letters[1:4],rep(10,4))
#' pairwise.lab(Y,fcls,"b","c")
#'
#' @export
pairwise.lab <- function(dat,lab,i,j)
{
if (any(lab==i)*any(lab==j)==0)
{stop('at least one of the given labels for i and j are not among the class labels')}
ind<-(lab==i | lab == j)
pair.dat <-dat[ind,];
cl<-lab[ind]
list(data.pair=pair.dat,
lab.pair=cl)
} #end for the function
#'
#################################################################
# funsRowColSums
#'
#' @title Functions for row and column sums of a matrix
#'
#' @description
#' Two functions: \code{row.sum} and \code{col.sum}.
#'
#' \code{row.sum} returns the row sums of a given matrix (in particular a contingency table) as a vector and
#' \code{col.sum} returns the column sums of a given matrix as a vector. \code{row.sum} is equivalent to
#' \code{\link[base]{rowSums}} function and \code{col.sum} is equivalent to \code{\link[base]{colSums}}
#' function in the \code{base} package.
#'
#' @param ct A matrix, in particular a contingency table
#'
#' @return
#' \code{row.sum} returns the row sums of \code{ct} as a vector
#' \code{col.sum} returns the column sums of \code{ct} as a vector
#'
#' @seealso \code{\link[base]{rowSums}} and \code{\link[base]{colSums}}
#'
#' @name funsRowColSums
NULL
#'
#' @rdname funsRowColSums
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' row.sum(ct)
#' rowSums(ct)
#'
#' col.sum(ct)
#' colSums(ct)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' row.sum(ct)
#' rowSums(ct)
#'
#' col.sum(ct)
#' colSums(ct)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' row.sum(ct)
#' rowSums(ct)
#'
#' col.sum(ct)
#' colSums(ct)
#'
#' @export
row.sum <- function(ct)
{
apply(ct,1,sum)
} #end for the function
#'
#' @rdname funsRowColSums
#'
#' @export
col.sum <- function(ct)
{
apply(ct,2,sum)
} #end for the function
#'
#################################################################
#' @title Dixon's Segregation Indices for NNCTs
#'
#' @description
#' Returns Dixon's segregation indices in matrix form based on entries of the NNCT, \code{ct}.
#' Segregation index for cell \eqn{i,j} is defined as \eqn{log(N_{ii}(n-n_i)/((n_i-N_{ii})(n_i-1))} if \eqn{i=j}
#' and
#' as \eqn{log(N_{ij}(n-n_j-1)/((n_i-N_{ij})(n_j))} if \eqn{i \ne j}.
#' See (\insertCite{dixon:NNCTEco2002,ceyhan:SiM-seg-ind2014;textual}{nnspat}).
#'
#' The argument \code{inf.corr} is a logical argument (default=\code{FALSE}) to avoid \eqn{\pm \infty} for the segregation
#' indices. If \code{TRUE} indices are modified so that they are finite and if \code{FALSE} the above definition is used.
#' (See \insertCite{ceyhan:SiM-seg-ind2014;textual}{nnspat} for more detail).
#'
#' @param ct A contingency table, in particular an NNCT
#' @param inf.corr A logical argument (default=\code{FALSE}). If \code{TRUE}, indices are modified so that
#' they are finite and if \code{FALSE} the above definition in the description is used.
#'
#' @return Returns a \code{matrix} of segregation indices which is of the same dimension as \code{ct}.
#'
#' @seealso \code{\link{Pseg.coeff}}, \code{\link{seg.coeff}}, \code{\link{Zseg.ind}}
#' and \code{\link{Zseg.ind.ct}}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#' seg.ind(ct)
#' seg.ind(ct,inf.corr = TRUE)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' seg.ind(ct)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' seg.ind(ct)
#' seg.ind(ct,inf.corr = TRUE)
#'
#' ct<-matrix(c(0,10,5,5),ncol=2)
#' seg.ind(ct)
#'
#' seg.ind(ct,inf.corr = TRUE)
#'
#' @export
seg.ind <- function(ct,inf.corr=FALSE)
{
rs <- row.sum(ct);
k<-nrow(ct);
n<-sum(ct)
Sdix<- matrix(0,k,k);
if (inf.corr==FALSE) #without infinity index correction
{
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
Sdix[i,i]<- log((ct[i,i]/(rs[i]-ct[i,i]))/((rs[i]-1)/(n-rs[i])))
else
Sdix[i,j]<- log((ct[i,j]/(rs[i]-ct[i,j]))/(rs[j]/(n-rs[j]-1)));
}
} else #with infinity index correction
{
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
Sdix[i,i]<- log(((ct[i,i]+1)/(rs[i]-ct[i,i]+1))/( (rs[i]*(rs[i]-1)+n-1)/(rs[i]*(n-rs[i])+n-1) ))
else
Sdix[i,j]<- log(((ct[i,j]+1)/(rs[i]-ct[i,j]+1))/( (rs[i]*rs[j]+n-1)/(rs[i]*(n-rs[j]-1)+n-1) ));
}
}
clnames<-rownames(ct) #row and column names for the NNCT, \code{ct}
rownames(Sdix)<-colnames(Sdix)<-clnames #row and column names for the matrix
if (any(is.na(Sdix)))
{ warning("some of the segregation indices are NaN,
so use inf.corr=TRUE in the function call!")
return(Sdix)}
if( (any(abs(Sdix)==Inf)))
{warning("some of the segregation indices are +- infinity,
so use inf.corr=TRUE in the function call!")}
Sdix
} #end for the function
#'
#################################################################
# funsZsegind
#'
#' @title Z Tests for Segregation Indices
#'
#' @description
#' Two functions: \code{Zseg.ind.ct} and \code{Zseg.ind}.
#'
#' Both functions are objects of class \code{"cellhtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' segregation indices from their expected values under RL or CSR for each segregation index in the NNCT.
#' The test for each cell \eqn{i,j} is based on the normal approximation of the corresponding segregation index.
#'
#' Each function yields a contingency table of the test statistics, \eqn{p}-values for the corresponding
#' alternative, lower and upper confidence levels, sample estimates (i.e. observed values) and null value(s) (i.e. expected values) for the segregation indices
#' and also names of the test statistics, estimates, null value and the method and the data set used.
#'
#' The null hypothesis for each cell \eqn{i,j} is that the corresponding segregation index equal to the expected value
#' (which is 0) under RL or CSR.
#'
#' See also (\insertCite{ceyhan:SiM-seg-ind2014;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table, used in \code{Zseg.ind.ct} only
#' @param varN The variance matrix for cell counts in the NNCT, \code{ct} ; used in \code{Zseg.ind.ct} only
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{Zseg.ind} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{Zseg.ind} only
#' @param inf.corr A logical argument (default=\code{FALSE}). If \code{TRUE}, indices are modified so that
#' they are finite and if \code{FALSE} the above definition in the description is used.
#' @param alternative Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}.
#' @param conf.level Level of the upper and lower confidence limits, default is \code{0.95}, for the segregation indices
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function, used in \code{Zseg.ind} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The \code{matrix} of test statistics for the segregation indices}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{matrix} of \eqn{p}-values for the hypothesis test for the corresponding alternative}
#' \item{LCL,UCL}{Matrix of Lower and Upper Confidence Levels for the segregation indices at the given confidence
#' level \code{conf.level} and depends on the type of \code{alternative}.}
#' \item{cnf.lvl}{Level of the upper and lower confidence limits of the segregation indices,
#' provided in \code{conf.level}.}
#' \item{estimate}{Estimate of the parameter, i.e., matrix of the observed segregation indices}
#' \item{est.name,est.name2}{Names of the estimates, both are same in this function}
#' \item{null.value}{Hypothesized values for the parameters, i.e. the null values of the segregation indices,
#' which are all 0 under RL or CSR.}
#' \item{null.name}{Name of the null value}
#' \item{alternative}{Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{Zseg.ind.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{Zseg.ind} only}
#'
#' @seealso \code{\link{seg.ind}} and \code{\link{Zseg.coeff}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsZsegind
NULL
#'
#' @rdname funsZsegind
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#'
#' seg.ind(ct)
#' seg.ind(ct,inf.corr=TRUE)
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' varN
#'
#' Zseg.ind(Y,cls)
#' Zseg.ind(Y,cls,inf.corr=TRUE)
#' Zseg.ind.ct(ct,varN)
#'
#' Zseg.ind(Y,cls,alt="g")
#' Zseg.ind.ct(ct,varN,alt="g")
#'
#' Zseg.ind(Y,cls,method="max")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' Zseg.ind(Y,cls)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' varN
#'
#' Zseg.ind(Y,cls)
#' Zseg.ind(Y,cls,inf.corr = TRUE)
#'
#' Zseg.ind.ct(ct,varN)
#' Zseg.ind.ct(ct,varN,inf.corr = TRUE)
#'
#' #1D data points
#' n<-20 #or try sample(1:20,1)
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' Zseg.ind(X,cls)
#' Zseg.ind.ct(ct,varN)
#' Zseg.ind.ct(ct,varN,inf.corr=TRUE)
#'
#' @export
Zseg.ind.ct <- function(ct,varN,inf.corr=FALSE,
alternative=c("two.sided", "less", "greater"),conf.level = 0.95)
{
alternative <-match.arg(alternative)
if (length(alternative) > 1 || is.na(alternative))
stop("alternative must be one \"greater\", \"less\", \"two.sided\"")
if (!missing(conf.level))
if (length(conf.level) != 1 || is.na(conf.level) || conf.level < 0 || conf.level > 1)
stop("conf.level must be a number between 0 and 1")
estimate<-dsi<-seg.ind(ct,inf.corr=inf.corr) #inf.corr takes care of infinity correction in the indices
estimate.name <-"Dixon's segregation indices"
if (all(is.na(dsi)) || all(is.na(varN)))
{stop('All of the segregation indices or all entries of the variance, varN, are NaN, so the tests are not defined')}
null.seg.ind<-0
names.null <-"(expected value of each of the) Dixon's segregation indices"
rs <- row.sum(ct);
k<-nrow(ct);
n<-sum(rs)
ts<-stderrS<- matrix(0,k,k);
if (inf.corr==FALSE) #without infinity index correction
{
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
{ stderrS[i,i]<-(sqrt(varN[i,i])*(n-1)^2)/(rs[i]*(n-rs[i])*(rs[i]-1))
ts[i,i]<- dsi[i,i]/stderrS[i,i]
} else
{
stderrS[i,j]<-(sqrt(varN[i,j])*(n-1)^2)/(rs[i]*rs[j]*(n-rs[j]-1))
ts[i,j]<- dsi[i,j]/stderrS[i,j]
}
}
#for +infinity correction
for (i in 1:k)
for (j in 1:k)
{
if (!is.na(dsi[i,j]) & dsi[i,j]==Inf)
ts[i,j]<- 10^10
}
#for -infinity correction
for (i in 1:k)
for (j in 1:k)
{
if (!is.na(dsi[i,j]) & dsi[i,j]==-Inf)
ts[i,j]<- -10^10
}
} else #with infinity index correction
{
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
{ stderrS[i,i]<-(sqrt(varN[i,i])*(rs[i]+2)*(n-1)^2)/((rs[i]*(rs[i]-1)+n-1)*(rs[i]*(n-rs[i])+n-1))
ts[i,i]<- dsi[i,i]/stderrS[i,i]
} else
{
stderrS[i,j]<-(sqrt(varN[i,j])*(rs[i]+2)*(n-1)^2)/((rs[i]*rs[j]+n-1)*(rs[i]*(n-rs[j]-1)+n-1))
ts[i,j]<- dsi[i,j]/stderrS[i,j]
}
}
}
if (all(is.na(ts)))
{stop('All of the test stat statistics are NaN, the test are not well defined')}
if (alternative == "less") {
pval <-pnorm(ts)
lcl <-NULL
ucl <-estimate+qnorm(conf.level)*stderrS
}
else if (alternative == "greater") {
pval <-pnorm(ts, lower.tail = FALSE)
ucl <-NULL
lcl <-estimate-qnorm(conf.level)*stderrS
}
else {
pval <-2 * pnorm(-abs(ts))
alpha <-1 - conf.level
crit.val <-qnorm(1-alpha/2)
lcl <-estimate-crit.val*stderrS
ucl <-estimate+crit.val*stderrS
}
cnf.lvl<-conf.level
ifelse(inf.corr==FALSE,
method <-c("Large Sample z-Test for Dixon's Segregation Indices"),
method <-c("Large Sample z-Test for Dixon's Segregation Indices\n
with correction for zero cell counts"))
clnames<-rownames(ct) #row and column names for the NNCT, \code{ct}
rownames(ts)<-colnames(ts)<-clnames #row and column names for the test stat matrix
rownames(pval)<-colnames(pval)<-clnames
if (!is.null(lcl)) {rownames(lcl)<-colnames(lcl)<-clnames}
if (!is.null(ucl)) {rownames(ucl)<-colnames(ucl)<-clnames}
ts.names <-"standardized segregation indices (i.e., z-scores)"
dname <-deparse(substitute(ct))
rval <-list(
statistic=ts,
stat.names=ts.names,
p.value=pval,
LCL = lcl,UCL = ucl,
conf.int = NULL,
cnf.lvl=conf.level,
estimate = estimate,
est.name = estimate.name,
est.name2 = estimate.name, #this is for other functions to have a different description for the sample estimates
null.value = null.seg.ind,
null.name=names.null,
alternative = alternative,
method = method,
ct.name = dname
)
attr(rval, "class") <-"cellhtest"
return(rval)
} #end for the function
#'
#' @rdname funsZsegind
#'
#' @export
Zseg.ind <- function(dat,lab,inf.corr=FALSE,
alternative=c("two.sided", "less", "greater"),conf.level = 0.95,...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q
Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
res<- Zseg.ind.ct(ct,varN,inf.corr=inf.corr,alternative=alternative,conf.level=conf.level)
dname <-deparse(substitute(dat))
res$data.name<-dname
return(res)
} #end for the function
#'
#################################################################
#' @title Expected Values of the Cell Counts in NNCT
#'
#' @description Returns a \code{matrix} of same dimension as, \code{ct}, whose entries are the expected cell counts of
#' the NNCT under RL or CSR. The class sizes given as the row sums of \code{ct} and the row and column names are
#' inherited from \code{ct}.
#'
#' See also (\insertCite{dixon:1994,ceyhan:eest-2010;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table
#'
#' @return A \code{matrix} of the expected values of cell counts in the NNCT.
#'
#' @seealso \code{\link{nnct}} and \code{\link{EV.tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' EV.nnct(ct)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#' EV.nnct(ct)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' EV.nnct(ct)
#'
#' ct<-matrix(c(0,10,5,5),ncol=2)
#' EV.nnct(ct)
#'
#' @export
EV.nnct <- function(ct)
{
rs <- row.sum(ct)
k<-length(rs);
n<-sum(rs)
EN<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
EN[i,j]<- rs[i]*(rs[i]-1)/(n-1)
else
EN[i,j]<- rs[i]*rs[j]/(n-1)
}
clnames<-colnames(ct)
rownames(EN)<-colnames(EN)<-clnames #row and column names from the NNCT
EN
} #end for the function
#'
#################################################################
#' @title Expected Values of the Type I cell-specific tests
#'
#' @description Returns a \code{matrix} of same dimension as, \code{ct}, whose entries are the expected values
#' of the Type I cell-specific test statistics, \eqn{T^I_{ij}}.
#' The row and column names are inherited from \code{ct}.
#' These expected values are valid under RL or CSR.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}) and the references therein.
#'
#' @param ct A nearest neighbor contingency table
#'
#' @return A \code{matrix} of the expected values of Type I cell-specific tests.
#'
#' @seealso \code{\link{EV.tct}}, \code{\link{tct}} and \code{\link{EV.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' EV.tctI(ct)
#'
#' @export
EV.tctI <- function(ct)
{
rs <- row.sum(ct);
k<-length(rs);
n<-sum(rs)
expT<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
expT[i,j]<- rs[i]*(rs[i]-n)/(n*(n-1))
else
expT[i,j]<- rs[i]*rs[j]/(n*(n-1))
}
clnames<-colnames(ct)
rownames(expT)<-colnames(expT)<-clnames #row and column names from the NNCT
expT
} #end for the function
#'
#################################################################
#' @title Expected Values of the Types I-IV cell-specific tests
#'
#' @description Returns a \code{matrix} of same dimension as, \code{ct}, whose entries are the expected values
#' of the \eqn{T_{ij}} values which are the Types I-IV cell-specific test statistics (i.e., \eqn{T^I_{ij}-T^{IV}_{ij}})
#' under RL or CSR.
#' The row and column names are inherited from \code{ct}. The type argument specifies the type
#' of the cell-specific test among the types I-IV tests.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}) and the references therein.
#'
#' @param ct A nearest neighbor contingency table
#' @param type The type of the cell-specific test, default=\code{"III"}. Takes on values \code{"I"}-\code{"IV"} (or
#' equivalently \code{1-4}, respectively.
#'
#' @return A \code{matrix} of the expected values of Type I-IV cell-specific tests.
#'
#' @seealso \code{\link{EV.tctI}}, \code{\link{tct}} and \code{\link{EV.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' EV.tct(ct,2)
#' EV.tct(ct,"II")
#' EV.tctI(ct)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#' EV.tct(ct,2)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' EV.tct(ct,2)
#'
#' ct<-matrix(c(0,10,5,5),ncol=2)
#' EV.tct(ct,2)
#'
#' @export
EV.tct <- function(ct,type="III")
{
ET<- switch(type,
I = { ET<- EV.tctI(ct) },
II = { ET<- EV.tctI(ct) },
III = { k<-nrow(ct)
ET<- matrix(0,k,k) },
IV = { k<-nrow(ct)
ET<- matrix(0,k,k) }
)
if (is.null(ET)) stop("Enter numbers 1-4 or I-IV in quotes for type")
clnames<-colnames(ct)
rownames(ET)<-colnames(ET)<-clnames #row and column names from the nnct
ET
} #end for the function
#'
#################################################################
#' @title \eqn{T} Contingency Table (TCT)
#'
#' @description Returns the \code{T} contingency table (TCT), which is a matrix of same dimension as, \code{ct},
#' whose entries are the values of the Types I-IV cell-specific test statistics, \eqn{T^I_{ij}-T^{IV}_{ij}}.
#' The row and column names are inherited from \code{ct}. The type argument specifies the type
#' of the cell-specific test among the types I-IV tests.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}) and the references therein.
#'
#' @inheritParams EV.tct
#'
#' @return A \code{matrix} of the values of Type I-IV cell-specific tests
#'
#' @seealso \code{\link{cellsTij}} and \code{\link{nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#'
#' type.lab<-c("I","II","III","IV")
#' for (i in 1:4)
#' { print(paste("T_ij values for cell specific tests for type",type.lab[i]))
#' print(tct(ct,i))
#' }
#'
#' tct(ct,"II")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#' tct(ct,2)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' tct(ct,2)
#'
#' ct<-matrix(c(0,10,5,5),ncol=2)
#' tct(ct,2)
#'
#' @export
tct <- function(ct,type="III")
{
rs <- row.sum(ct);
cs <- col.sum(ct);
k<-nrow(ct);
n<-sum(ct)
cells<- matrix(0,k,k);
cells <- switch(type,
I = {
for (i in 1:k)
for (j in 1:k)
{
cells[i,j]<- ct[i,j]-(rs[i]*cs[j])/n
}
cells},
II = {
for (i in 1:k)
for (j in 1:k)
{
cells[i,j]<- ct[i,j]-(rs[i]*rs[j])/n
}
cells},
III = {
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
cells[i,j]<- ct[i,j]-(rs[i]-1)*cs[j]/(n-1)
else
cells[i,j]<- ct[i,j]-rs[i]*cs[j]/(n-1)
}
cells},
IV = {
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
cells[i,j]<- (rs[i]/n)* ((n-1)*ct[i,j]/(rs[i]-1)-cs[j])
else
cells[i,j]<- (1/n)*((n-1)*ct[i,j]-rs[i]*cs[j])
}
cells}
)
if (is.null(cells)) stop("Enter numbers 1-4 or I-IV in quotes for type")
clnames<-colnames(ct)
rownames(cells)<-colnames(cells)<-clnames #row and column names from the NNCT
cells
} #end for the function
#'
#################################################################
#' @title Entries for the Types I-IV cell-specific tests
#'
#' @description Returns a \code{matrix} of same dimension as, \code{ct}, whose entries are the values
#' of the Types I-IV cell-specific test statistics, \eqn{T^I_{ij}-T^{IV}_{ij}}.
#' The row and column names are inherited from \code{ct}. The type argument specifies the type
#' of the cell-specific test among the types I-IV tests.
#' Equivalent to the function \code{\link{tct}} in this package.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}) and the references therein.
#'
#' @inheritParams EV.tct
#'
#' @return A \code{matrix} of the values of Type I-IV cell-specific tests
#'
#' @seealso \code{\link{tct}} and \code{\link{nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#'
#' type.lab<-c("I","II","III","IV")
#' for (i in 1:4)
#' { print(paste("T_ij values for cell specific tests for type",type.lab[i]))
#' print(cellsTij(ct,i))
#' }
#'
#' cellsTij(ct,"II")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#' cellsTij(ct,2)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' cellsTij(ct,2)
#'
#' ct<-matrix(c(0,10,5,5),ncol=2)
#' cellsTij(ct,2)
#'
#' @export
cellsTij <- function(ct,type="III")
{
k<-nrow(ct);
rs<-row.sum(ct)
cs<- col.sum(ct)
n<-sum(ct)
cells<- matrix(0,k,k);
cells <- switch(type,
I = {
for (i in 1:k)
for (j in 1:k)
{
cells[i,j]<- ct[i,j]-(rs[i]*cs[j])/n
}
cells},
II = {
for (i in 1:k)
for (j in 1:k)
{
cells[i,j]<- ct[i,j]-(rs[i]*rs[j])/n
}
cells},
III = {
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
cells[i,j]<- ct[i,j]-(rs[i]-1)*cs[j]/(n-1)
else
cells[i,j]<- ct[i,j]-rs[i]*cs[j]/(n-1)
}
cells},
IV = {
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
cells[i,j]<- (rs[i]/n)* ((n-1)*ct[i,j]/(rs[i]-1)-cs[j])
else
cells[i,j]<- (1/n)*((n-1)*ct[i,j]-rs[i]*cs[j])
}
cells}
)
if (is.null(cells)) stop("Enter numbers 1-4 or I-IV in quotes for type")
clnames<-colnames(ct)
rownames(cells)<-colnames(cells)<-clnames #row and column names from the NNCT
cells
} #end for the function
#'
#################################################################
# funs.pijPij
#'
#' @title The functions for probability of selecting a number of points from respective classes
#'
#' @description
#' The ancillary probability functions used in computation of the variance-covariance matrices
#' of various NN spatial tests such as NNCT tests and tests based on other contingency tables.
#' These functions can be classified as \code{pij} and \code{Pij} type functions. The \code{pij} functions are for individual
#' probabilities and the corresponding \code{Pij} functions are the summed \code{pij} values. For example \eqn{p_{iijk}}
#' is the probability of any 4 points with 2 from class \eqn{i}, and others are from classes \eqn{j} and \eqn{k}.
#' These probabilities are for data from RL or CSR.
#'
#' @param n A positive integer representing the size of the data set (i.e., number of observations in the data
#' set).
#' @param k,l,m,p Positive integers, usually representing the class sizes, used in \code{pij} type functions only.
#' Number of these arguments required depends on the number of distinct indices of \eqn{p}, e.g. \eqn{p_{ij}} requires
#' \code{k,l,n} and \eqn{p_{iijk}} requires \code{k,l,m,n} as input.
#' @param nvec A \code{vector} ofpositive integers representing the sizes of classes in the data set, used in
#' \code{Pij} type functions only.
#'
#' @return Probability values for the selected points being from the indicated classes.
#'
#' @name funs.pijPij
NULL
#'
#' @seealso \code{\link{pk}}
#'
#' @rdname funs.pijPij
#'
p11 <- function(k,n)
{
k*(k-1)/(n*(n-1))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P11 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp11<-0
for (i in 1:k)
{
sp11<-sp11+p11(nvec[i],n)
}
sp11
} #end for the function
#'
#' @rdname funs.pijPij
#'
p12 <- function(k,l,n)
{
k*l/(n*(n-1))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P12 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp12<-0
for (i in 1:(k-1))
for (j in (i+1):k)
{
sp12<-sp12+p12(nvec[i],nvec[j],n)
}
2*sp12
} #end for the function
#'
#' @rdname funs.pijPij
#'
p111 <- function(k,n)
{
k*(k-1)*(k-2)/(n*(n-1)*(n-2))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P111 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp111<-0
for (i in 1:k)
{
sp111<-sp111+p111(nvec[i],n)
}
sp111
} #end for the function
#'
#' @rdname funs.pijPij
#'
p1111 <- function(k,n)
{
k*(k-1)*(k-2)*(k-3)/(n*(n-1)*(n-2)*(n-3))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P1111 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp1111<-0
for (i in 1:k)
{
sp1111<-sp1111+p1111(nvec[i],n)
}
sp1111
} #end for the function
#'
#' @rdname funs.pijPij
#'
p112 <- function(k,l,n)
{
k*(k-1)*l/(n*(n-1)*(n-2))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P112 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp112<-0
for (i in 1:k)
{
jind<-(1:k)[-i]
for (j in jind)
{
sp112<-sp112+p112(nvec[i],nvec[j],n)
}
}
sp112
} #end for the function
#'
#' @rdname funs.pijPij
#'
p122 <- function(k,l,n)
{
k*l*(l-1)/(n*(n-1)*(n-2))
} #end for the function
#'
#' @rdname funs.pijPij
#'
p123 <- function(k,l,m,n)
{
k*l*m/(n*(n-1)*(n-2))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P123 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp123<-0
for (i in 1:(k-2))
{
for (j in (i+1):(k-1))
{
for (l in (j+1):k)
{
sp123<-sp123+p123(nvec[i],nvec[j],nvec[l],n)
}
}
}
6*sp123
} #end for the function
#'
#' @rdname funs.pijPij
#'
p1234 <- function(k,l,m,p,n)
{
k*l*m*p/(n*(n-1)*(n-2)*(n-3))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P1234 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp1234<-0
for (i in 1:(k-3))
{
for (j in (i+1):(k-2))
{
for (l in (j+1):(k-1))
{
for (m in (l+1):k)
{
sp1234<-sp1234+p1234(nvec[i],nvec[j],nvec[l],nvec[m],n)
}
}
}
}
24*sp1234
} #end for the function
#'
#' @rdname funs.pijPij
#'
p1223 <- function(k,l,m,n)
{
k*l*(l-1)*m/(n*(n-1)*(n-2)*(n-3))
} #end for the function
#'
#' @rdname funs.pijPij
#'
p1123 <- function(k,l,m,n)
{
k*(k-1)*l*m/(n*(n-1)*(n-2)*(n-3))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P1123 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp1123<-0
for (i in 1:k)
{
jind<-(1:k)[-i]
for (j in jind)
{
lind<-(1:k)[-c(i,j)]
for (l in lind)
{
sp1123<-sp1123+p1123(nvec[i],nvec[j],nvec[l],n)
}
}
}
sp1123
} #end for the function
#'
#' @rdname funs.pijPij
#'
p1122 <- function(k,l,n)
{
k*(k-1)*l*(l-1)/(n*(n-1)*(n-2)*(n-3))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P1122 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp1122<-0
for (i in 1:(k-1))
for (j in (i+1):k)
{
sp1122<-sp1122+p1122(nvec[i],nvec[j],n)
}
2*sp1122
} #end for the function
#'
#' @rdname funs.pijPij
#'
p1112 <- function(k,l,n)
{
k*(k-1)*(k-2)*l/(n*(n-1)*(n-2)*(n-3))
} #end for the function
#'
#' @rdname funs.pijPij
#'
P1112 <- function(nvec)
{
k<-length(nvec)
n<-sum(nvec)
sp1112<-0
for (i in 1:k)
{
jind<-(1:k)[-i]
for (j in jind)
{
sp1112<-sp1112+p1112(nvec[i],nvec[j],n)
}
}
sp1112
} #end for the function
#'
#################################################################
#' @title Probability of \code{k} items selected from the class with size \eqn{n_1}
#'
#' @description
#' Returns the ratio \eqn{n_1(n_1-1) \cdots (n_1-(k-1))/(n(n-1) \cdots (n-(k-1))},
#' which is the probability that the \code{k} selected
#' objects are from class 1 with size \eqn{n_1} (denoted as \code{n1} as an argument)
#' and the total data size is \code{n}.
#' This probability is valid under RL or CSR.
#'
#' @param n A positive integer representing the size of the data set
#' (i.e., number of observations in the data set).
#' @param n1 A positive integer, representing the class size of interest which has size \eqn{n_1}
#' @param k Number of items selected from the data set
#'
#' @return Returns the probability of \code{k} items selected from \code{n} items are from the class of interest
#'(i.e., from the class whose size is \eqn{n_1})
#'
#' @seealso \code{\link{p11}} and \code{\link{p12}} etc.
#'
pk <- function(n,n1,k)
{
r<-prod(n1:(n1-(k-1)))/prod(n:(n-(k-1)))
r
}
#'
#################################################################
#' @title Variances of Cell Counts in an NNCT
#'
#' @description Returns the variances of cell counts \eqn{N_{ij}} for \eqn{i,j=1,\ldots,k} in the NNCT, \code{ct} in matrix form which
#' is of the same dimension as \code{ct}. These variances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table
#' @param Q The number of shared NNs
#' @param R The number of reflexive NNs (i.e., twice the number of reflexive NN pairs)
#'
#' @return A \code{matrix} of same dimension as, \code{ct}, whose entries are the variances of the cell counts
#' in the NNCT with class sizes given as the row sums of \code{ct}. The row and column names are inherited from \code{ct}.
#'
#' @seealso \code{\link{var.tct}}, \code{\link{var.nnsym}} and \code{\link{cov.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' var.nnct(ct,Qv,Rv)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#' var.nnct(ct,Qv,Rv)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' var.nnct(ct,Qv,Rv)
#'
#' @export
var.nnct <- function(ct,Q,R)
{
rs <- row.sum(ct);
k<-nrow(ct);
n<-sum(ct)
var<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{
if (i == j)
{
Paa <- p11(rs[i],n); Paaa <- p111(rs[i],n); Paaaa <- p1111(rs[i],n)
var[i,j]<- (n+R)*Paa+(2*n-2*R+Q)*Paaa+(n^2-3*n-Q+R)*Paaaa-(n*Paa)^2
}
else
{
Pab <- p12(rs[i],rs[j],n); Paab <- p112(rs[i],rs[j],n); Paabb <- p1122(rs[i],rs[j],n)
var[i,j]<- n*Pab+Q*Paab+(n^2-3*n-Q+R)*Paabb-(n*Pab)^2
}
}
clnames<-colnames(ct)
rownames(var)<-colnames(var)<-clnames #row and column names from the NNCT
var
} #end for the function
#'
#################################################################
# funsZcell.nnct
#'
#' @title Dixon's Cell-specific Z Tests of Segregation for NNCT
#'
#' @description
#' Two functions: \code{Zcell.nnct.ct} and \code{Zcell.nnct}.
#'
#' Both functions are objects of class \code{"cellhtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' cell counts from the expected values under RL or CSR for each cell (i.e., entry) in the NNCT.
#' The test for each cell \eqn{i,j} is based on the normal approximation of the corresponding cell count, \eqn{N_{ij}}
#' and are due to \insertCite{dixon:1994,dixon:NNCTEco2002;textual}{nnspat}.
#'
#' Each function yields a contingency table of the test statistics, \eqn{p}-values for the corresponding
#' alternative, expected values (i.e., null value(s)), lower and upper confidence levels, sample estimates (i.e. observed values)
#' for the cell counts and also names of the test statistics, estimates, null values and the method and
#' the data set used.
#'
#' The null hypothesis for each cell \eqn{i,j} is that the corresponding cell count is equal to the expected value
#' under RL or CSR, that is \eqn{E[N_{ii}] = n_i(n_i - 1)/(n - 1)} and \eqn{E[N_{ij}] = n_i n_j/(n - 1)}
#' where \eqn{n_i} is the size of
#' class \eqn{i} and \eqn{n} is the size of the data set.
#'
#' See also (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table, used in \code{\link{Zcell.nnct.ct}} only
#' @param varN The variance matrix for cell counts in the NNCT, \code{ct} ; used in \code{\link{Zcell.nnct.ct}} only
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{\link{Zcell.nnct}} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{\link{Zcell.nnct}} only
#' @param alternative Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}.
#' @param conf.level Level of the upper and lower confidence limits, default is \code{0.95}, for the cell counts, i.e.
#' \eqn{N_{ij}} values
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function, used in \code{\link{Zcell.nnct}} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The \code{matrix} of Dixon's cell-specific test statistics}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{matrix} of \eqn{p}-values for the hypothesis test for the corresponding alternative}
#' \item{LCL,UCL}{Matrix of Lower and Upper Confidence Levels for the cell counts at the given confidence
#' level \code{conf.level} and depends on the type of \code{alternative}.}
#' \item{conf.int}{The confidence interval for the estimates, it is \code{NULL} here, since we provide the \code{UCL} and \code{LCL}
#' in \code{matrix} form.}
#' \item{cnf.lvl}{Level of the upper and lower confidence limits of the cell counts,
#' provided in \code{conf.level}.}
#' \item{estimate}{Estimates of the parameters, i.e., matrix of the observed cell counts which is the NNCT}
#' \item{est.name,est.name2}{Names of the estimates, both are same in this function}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of
#' the cell counts.}
#' \item{null.name}{Name of the null values}
#' \item{alternative}{Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{\link{Zcell.nnct.ct}} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{\link{Zcell.nnct}} only }
#'
#' @seealso \code{\link{Zcell.nnct.2s}}, \code{\link{Zcell.nnct.rs}}, \code{\link{Zcell.nnct.ls}},
#' \code{\link{Zcell.nnct.pval}} and \code{\link{Zcell.tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsZcell.nnct
NULL
#'
#' @rdname funsZcell.nnct
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' varN
#'
#' Zcell.nnct(Y,cls)
#' Zcell.nnct(Y,cls,alt="g")
#'
#' Zcell.nnct.ct(ct,varN)
#' Zcell.nnct.ct(ct,varN,alt="g")
#'
#' Zcell.nnct(Y,cls,method="max")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' Zcell.nnct(Y,cls)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' Zcell.nnct(Y,cls)
#' Zcell.nnct.ct(ct,varN)
#'
#' @export
Zcell.nnct.ct <- function(ct,varN,alternative=c("two.sided", "less", "greater"),conf.level = 0.95)
{
if (all(is.na(varN)))
{stop('All entries of the variance, varN, are NaN (due to data size <=3), so the tests are not defined')}
alternative <-match.arg(alternative)
if (length(alternative) > 1 || is.na(alternative))
stop("alternative must be one \"greater\", \"less\", \"two.sided\"")
if (!missing(conf.level))
if (length(conf.level) != 1 || is.na(conf.level) || conf.level < 0 || conf.level > 1)
stop("conf.level must be a number between 0 and 1")
estimate<-ct
estimate.name <-c("NNCT entries")
EV.<-EV.nnct(ct)
nullNij<-EV.
names.null <-"NNCT entries"
k<-nrow(ct);
ts<-stderrN<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{ stderrN[i,j]<-sqrt(varN[i,j])
ts[i,j] <- (ct[i,j]-EV.[i,j])/stderrN[i,j]
}
if (all(is.na(ts)))
{stop('All of the test stat statistics are NaN, these cell-specific tests are not defined')}
alt<- switch(alternative,
less = {
pval <-pnorm(ts)
lcl <-NULL
ucl <-estimate+qnorm(conf.level)*stderrN
},
greater = {
pval <-pnorm(ts, lower.tail = FALSE)
ucl <-NULL
lcl <-estimate-qnorm(conf.level)*stderrN
},
two.sided = {
pval <-2 * pnorm(-abs(ts))
alpha <-1 - conf.level
crit.val <-qnorm(1-alpha/2)
lcl <-estimate-crit.val*stderrN
ucl <-estimate+crit.val*stderrN
}
)
if (is.null(alt)) stop("Alternative must be one of less, greater, or two.sided in quotes")
cnf.lvl<-conf.level
method <-c("Alternative must be one of less, greater, or two.sided")
clnames<-rownames(ct) #row and column names for the NNCT, \code{ct}
rownames(ts)<-colnames(ts)<-clnames #row and column names for the test stat matrix
rownames(pval)<-colnames(pval)<-clnames
rownames(nullNij)<-colnames(nullNij)<-clnames
if (!is.null(lcl)) {rownames(lcl)<-colnames(lcl)<-clnames}
if (!is.null(ucl)) {rownames(ucl)<-colnames(ucl)<-clnames}
ts.names <-"Dixon's NNCT cell-specific tests of segregation, Z"
dname <-deparse(substitute(ct))
rval <-list(
statistic=ts,
stat.names=ts.names,
p.value=pval,
LCL = lcl,UCL = ucl,
conf.int = NULL,
cnf.lvl=conf.level,
estimate = estimate,
est.name = estimate.name,
est.name2 = estimate.name, #this is for other functions to have a different description for the sample estimates
null.value = nullNij,
null.name=names.null,
alternative = alternative,
method = method,
ct.name = dname
)
attr(rval, "class") <-"cellhtest"
return(rval)
} #end for the function
#'
#' @rdname funsZcell.nnct
#'
#' @export
Zcell.nnct <- function(dat,lab,alternative=c("two.sided", "less", "greater"),conf.level = 0.95,...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
EV<-EV.nnct(ct)
W<-Wmat(ipd)
Qv<-Qvec(W)$q
Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
res<- Zcell.nnct.ct(ct,varN,alternative=alternative,conf.level=conf.level)
dname <-deparse(substitute(dat))
res$data.name<-dname
return(res)
} #end for the function
#'
#################################################################
# funsZcell.nnct.pval
#'
#' @title \eqn{p}-values for Cell-specific Z Test Statistics for NNCT
#'
#' @description
#' Four functions: \code{Zcell.nnct.2s}, \code{Zcell.nnct.rs}, \code{Zcell.nnct.ls} and \code{Zcell.nnct.pval}.
#'
#' These functions yield a contingency table (i.e., a matrix) of the \eqn{p}-values for the cell-specific Z
#' test statistics for the NNCT and take the cell-specific \eqn{Z} test statistics in matrix form as their argument.
#' \code{Zcell.nnct.pval} yields an array of size \eqn{k \times k \times 3} where 1st entry of the array is the matrix of \eqn{p}-values for the
#' two-sided alternative, 2nd entry of the array is the matrix of \eqn{p}-values for the left-sided alternative
#' and 3rd entry of the array is the matrix of \eqn{p}-values for the right-sided alternative.
#' And each of \code{Zcell.nnct.2s}, \code{Zcell.nnct.rs} and \code{Zcell.nnct.ls} yield a \eqn{k \times k} matrix of \eqn{p}-values
#' for the two-sided, right-sided and left-sided alternative, respectively.
#'
#' The functions \code{Zcell.nnct.2s}, \code{Zcell.nnct.rs} and \code{Zcell.nnct.ls} are equivalent to
#' \code{\link{Zcell.nnct}(...,alt)$p.val} where \code{alt="two-sided"}, \code{"greater"} and \code{"less"}, respectively, with the appropriate
#' arguments for the function \code{\link{Zcell.nnct}} (see the examples below).
#'
#' See also (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010;textual}{nnspat}).
#'
#' @param zt A \eqn{k \times k} matrix of the cell-specific \eqn{Z} test statistics
#'
#' @return
#' \code{Zcell.nnct.pval} returns a \eqn{k \times k \times 3} array whose 1st entry is the matrix of \eqn{p}-values for the
#' two-sided alternative, 2nd entry is the matrix of \eqn{p}-values for the left-sided alternative
#' and 3rd entry is the matrix of \eqn{p}-values for the right-sided alternative
#' \code{Zcell.nnct.2s} returns a \eqn{k \times k} matrix of \eqn{p}-values for the two-sided alternative
#' \code{Zcell.nnct.rs} returns a \eqn{k \times k} matrix of \eqn{p}-values for the right-sided alternative
#' \code{Zcell.nnct.ls} returns a \eqn{k \times k} matrix of \eqn{p}-values for the left-sided alternative
#'
#' @seealso \code{\link{Zcell.nnct}} and \code{\link{Zcell.nnct.ct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsZcell.nnct.pval
NULL
#'
#' @rdname funsZcell.nnct.pval
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' TS<-Zcell.nnct(Y,cls)$statistic
#' TS
#' pv<-Zcell.nnct.pval(TS)
#' pv
#'
#' Zcell.nnct(Y,cls,alt="t")$p.val
#' Zcell.nnct(Y,cls,alt="l")$p.val
#' Zcell.nnct(Y,cls,alt="g")$p.val
#'
#' Zcell.nnct.2s(TS)
#'
#' Zcell.nnct.ls(TS)
#'
#' Zcell.nnct.rs(TS)
#'
#' @export
Zcell.nnct.pval <- function(zt)
{
k<-length(zt[1,]);
pval<- array(0,dim=c(k,k,3));
for (i in 1:k)
for (j in 1:k)
{ pls<- pnorm(zt[i,j]); prs<-1-pls; p2s<-2*min(pls,prs)
pval[i,j,] <- c(p2s,pls,prs)
}
clnames<-rownames(zt)
ct.names <- c("p-values for two-sided tests","p-values for left-sided tests","p-values for right-sided tests")
dimnames(pval)<-list(clnames,clnames,ct.names)
pval
} #end for the function
#'
#' @rdname funsZcell.nnct.pval
#'
#' @export
Zcell.nnct.2s <- function(zt)
{
k<-nrow(zt);
pval<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{ pls<-pnorm(zt[i,j])
pval[i,j] <- 2*min(pls,1-pls)
}
clnames<-rownames(zt)
rownames(pval)<-colnames(pval)<-clnames #row and column names from the NNCT
pval
} #end for the function
#'
#' @rdname funsZcell.nnct.pval
#'
#' @export
Zcell.nnct.ls <- function(zt)
{
k<-nrow(zt);
pval<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{
pval[i,j] <- pnorm(zt[i,j])
}
clnames<-rownames(zt)
rownames(pval)<-colnames(pval)<-clnames #row and column names from the NNCT
pval
} #end for the function
#'
#' @rdname funsZcell.nnct.pval
#'
#' @export
Zcell.nnct.rs <- function(zt)
{
k<-nrow(zt);
pval<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{
pval[i,j] <- 1-pnorm(zt[i,j])
}
clnames<-rownames(zt)
rownames(pval)<-colnames(pval)<-clnames #row and column names from the NNCT
pval
} #end for the function
#'
#################################################################
#' @title Conversion of a Matrix to a Vector
#'
#' @description
#' Converts the contingency table (or any matrix) \code{ct} to a \code{vector} by default row-wise (i.e., by appending
#' each row one after the other) or column-wise, and also returns the entry indices (in the original matrix \code{ct})
#' in a \eqn{k^2 \times 2} matrix
#'
#' @param ct A matrix, in particular a contingency table
#' @param byrow A logical argument (default=\code{TRUE}). If \code{TRUE}, rows of \code{ct} are appended to obtain the vector
#' and if \code{FALSE} columns of \code{ct} are appended to obtain the vector.
#'
#' @return A \code{list} with two elements
#' \item{vec}{The \code{vector}ized form the matrix \code{ct}, by default appending the rows of \code{ct}}
#' \item{ind}{The \eqn{k^2 \times 2} matrix of entry indices (in the original matrix \code{ct}) whose i-th row corresponds
#' to the i-th entry in \code{vec}.}
#'
#' @seealso \code{\link{ind.nnsym}} and \code{\link{ind.seg.coeff}},
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#' mat2vec(ct)
#' mat2vec(ct,byrow=FALSE)
#'
#' #an arbitrary 3x3 matrix
#' M<-matrix(sample(10:20,9),ncol=3)
#' M
#' mat2vec(M)
#' mat2vec(M,byrow=FALSE)
#'
#' @export
mat2vec <- function(ct,byrow=TRUE)
{
k<-nrow(ct)
ifelse(byrow==TRUE,vec<-as.vector(t(ct)),vec<-as.vector(ct))
if (k==1)
{lt<-rbind(c(1,1))
return(list(vec=vec,
ind=lt))}
if (byrow==TRUE)
{lt<-cbind(rep(1,k),c(1:k))
for (j in 2:k)
lt<-rbind(lt,cbind(rep(j,k),c(1:k)));
} else
{
lt<-cbind(c(1:k),rep(1,k))
for (j in 2:k)
lt<-rbind(lt,cbind(c(1:k),rep(j,k)));
}
list(vec=vec,
ind=lt)
} #end for the function
#'
#################################################################
#' @title Covariance Matrix of the Cell Counts in an NNCT
#'
#' @description Returns the covariance matrix of cell counts \eqn{N_{ij}} for \eqn{i,j=1,\ldots,k} in the NNCT, \code{ct}.
#' The covariance matrix is of dimension \eqn{k^2 \times k^2} and its entries are \eqn{cov(N_{ij},N_{kl})} when \eqn{N_{ij}} values are
#' by default corresponding to the row-wise vectorization of \code{ct}. If \code{byrow=FALSE}, the column-wise
#' vectorization of \code{ct} is used.
#' These covariances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table
#' @param varN The \eqn{k \times k} variance matrix of cell counts of NNCT, \code{ct}.
#' @param Q The number of shared NNs
#' @param R The number of reflexive NNs (i.e., twice the number of reflexive NN pairs)
#' @param byrow A logical argument (default=\code{TRUE}). If \code{TRUE}, rows of \code{ct} are appended to obtain the vector
#' and if \code{FALSE} columns of \code{ct} are appended to obtain the vector.
#'
#' @return The \eqn{k^2 \times k^2} covariance matrix of cell counts \eqn{N_{ij}} for \eqn{i,j=1,\ldots,k} in the NNCT, \code{ct}
#'
#' @seealso \code{\link{covNrow2col}}, \code{\link{cov.tct}} and \code{\link{cov.nnsym}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' cov.nnct(ct,varN,Qv,Rv)
#' cov.nnct(ct,varN,Qv,Rv,byrow=FALSE)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' cov.nnct(ct,varN,Qv,Rv)
#' cov.nnct(ct,varN,Qv,Rv,byrow=FALSE)
#'
#' #1D data points
#' n<-20 #or try sample(1:20,1)
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' cov.nnct(ct,varN,Qv,Rv)
#'
#' @export
cov.nnct <- function(ct,varN,Q,R,byrow=TRUE)
{
varvec<-mat2vec(varN)
Vsq<-varvec$vec
ind<-varvec$ind
rs <- row.sum(ct);
n<-sum(ct)
m<-nrow(ct);
cov<-matrix(0,m^2,m^2);
for (i in 1:(m^2))
for (j in i:(m^2))
if (i==j)
cov[i,j] <- Vsq[i]
else
{if (ind[i,1]==ind[i,2] & ind[j,1]==ind[j,2])
{
k <- ind[i,1]; l<-ind[j,1]
Paa<- p11(rs[k],n); Pbb<- p11(rs[l],n); Paabb<- p1122(rs[k],rs[l],n)
cov[i,j] <- (n^2-3*n-Q+R)*Paabb-n^2*Paa*Pbb
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,1]==ind[i,2] & ind[i,1]==ind[j,1])
{
k <- ind[i,1]; l<-ind[j,2]
Paa<- p11(rs[k],n); Pbb<- p11(rs[l],n); Paaab<- p1112(rs[k],rs[l],n);
Pab<- p12(rs[k],rs[l],n); Paab<- p112(rs[k],rs[l],n);
cov[i,j] <- (n-R)*Paab+(n^2-3*n-Q+R)*Paaab-n^2*Paa*Pab
cov[j,i] <- cov[i,j]
}
else
{if (ind[j,1]==ind[j,2] & ind[j,1]==ind[i,1])
{
k <- ind[j,1]; l<-ind[i,2]
Paa<- p11(rs[k],n); Pbb<- p11(rs[l],n); Paaab<- p1112(rs[k],rs[l],n);
Pab<- p12(rs[k],rs[l],n); Paab<- p112(rs[k],rs[l],n);
cov[i,j] <- (n-R)*Paab+(n^2-3*n-Q+R)*Paaab-n^2*Paa*Pab
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,1]==ind[i,2] & ind[i,1]==ind[j,2])
{
k <- ind[i,1]; l<-ind[j,1]
Paa<- p11(rs[k],n); Paaab<- p1112(rs[k],rs[l],n);
Pab<- p12(rs[k],rs[l],n); Paab<- p112(rs[k],rs[l],n);
cov[i,j] <- (n-R+Q)*Paab+(n^2-3*n-Q+R)*Paaab-n^2*Paa*Pab
cov[j,i] <- cov[i,j]
}
else
{if (ind[j,1]==ind[j,2] & ind[j,1]==ind[i,2])
{
k <- ind[j,1]; l<-ind[i,1]
Paa<- p11(rs[k],n); Paaab<- p1112(rs[k],rs[l],n);
Pab<- p12(rs[k],rs[l],n); Paab<- p112(rs[k],rs[l],n);
cov[i,j] <- (n-R+Q)*Paab+(n^2-3*n-Q+R)*Paaab-n^2*Paa*Pab
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,1]==ind[i,2])
{
k <- ind[i,1]; l<-ind[j,1]; q<-ind[j,2]
Paa<- p11(rs[k],n); Paabc<- p1123(rs[k],rs[l],rs[q],n); Pbc<- p12(rs[l],rs[q],n);
cov[i,j] <- (n^2-3*n-Q+R)*Paabc-n^2*Paa*Pbc
cov[j,i] <- cov[i,j]
}
else
{if (ind[j,1]==ind[j,2])
{
k <- ind[j,1]; l<-ind[i,1]; q<-ind[i,2]
Paa<- p11(rs[k],n); Paabc<- p1123(rs[k],rs[l],rs[q],n); Pbc<- p12(rs[l],rs[q],n);
cov[i,j] <- (n^2-3*n-Q+R)*Paabc-n^2*Paa*Pbc
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,1]==ind[j,1])
{
k <- ind[i,1]; l<-ind[i,2]; q<-ind[j,2]
Pab<- p12(rs[k],rs[l],n); Paabc<- p1123(rs[k],rs[l],rs[q],n); Pac<- p12(rs[k],rs[q],n);
cov[i,j] <- (n^2-3*n-Q+R)*Paabc-n^2*Pab*Pac
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,1]==ind[j,2] & ind[i,2]==ind[j,1] )
{
k <- ind[i,1]; l<-ind[j,1];
Pba<-Pab<- p12(rs[k],rs[l],n); Paabb<- p1122(rs[k],rs[l],n);
Paab<- p112(rs[k],rs[l],n); Pabb<- p122(rs[k],rs[l],n)
cov[i,j] <- R*Pab+(n-R)*(Paab+Pabb)+(n^2-3*n-Q+R)*Paabb-n^2*Pab*Pba
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,2]==ind[j,1])
{
k <- ind[i,1]; l<-ind[i,2]; q<-ind[j,2]
Pab<- p12(rs[k],rs[l],n); Pbc<- p12(rs[l],rs[q],n); Pabc<- p123(rs[k],rs[l],rs[q],n);
Pabbc<- p1223(rs[k],rs[l],rs[q],n)
cov[i,j] <- (n-R)*Pabc+(n^2-3*n-Q+R)*Pabbc-n^2*Pab*Pbc
cov[j,i] <- cov[i,j]
}
else
{if (ind[j,2]==ind[i,1])
{
k <- ind[j,1]; l<-ind[j,2]; q<-ind[i,2]
Pab<- p12(rs[k],rs[l],n); Pbc<- p12(rs[l],rs[q],n); Pabc<- p123(rs[k],rs[l],rs[q],n);
Pabbc<- p1223(rs[k],rs[l],rs[q],n)
cov[i,j] <- (n-R)*Pabc+(n^2-3*n-Q+R)*Pabbc-n^2*Pab*Pbc
cov[j,i] <- cov[i,j]
}
else
{if (ind[i,2]==ind[j,2])
{
k <- ind[i,1]; l<-ind[i,2]; q<-ind[j,1]
Pab<- p12(rs[k],rs[l],n); Pbc<- p12(rs[l],rs[q],n); Pabc<- p123(rs[k],rs[l],rs[q],n);
Pabbc<- p1223(rs[k],rs[l],rs[q],n)
cov[i,j] <- Q*Pabc+(n^2-3*n-Q+R)*Pabbc-n^2*Pab*Pbc
cov[j,i] <- cov[i,j]
}
else
{
k <- ind[i,1]; l<-ind[i,2]; q<-ind[j,1]; p<-ind[j,2]
Pab<- p12(rs[k],rs[l],n); Pcd<- p12(rs[q],rs[p],n);
Pabcd<- p1234(rs[k],rs[l],rs[q],rs[p],n)
cov[i,j] <- (n^2-3*n-Q+R)*Pabcd-n^2*Pab*Pcd
cov[j,i] <- cov[i,j]
}
}}}}}}}}}}}}
if (byrow==TRUE)
{
rcov<-cov #rcov is the result for covariance
} else # if byrow==FALSE
{
#the part for the covariance of the colum-wise vectorization of N vector
q<-nrow(ct)
ind<-vector()
for (j in 1:q)
{
ind<- c(ind,j+seq(0,q-1)*q)
}
rcov<-cov[ind,ind]
}
rcov
} #end for the function
#'
#################################################################
#' @title Conversion of the Covariance Matrix of the Row-wise Vectorized Cell Counts to Column-wise
#' Vectorized Cell Counts in an NNCT
#'
#' @description Converts the \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts \eqn{N_{ij}} for \eqn{i,j=1,\ldots,k}
#' in the NNCT, \code{ct} to the covariance matrix of column-wise vectorized cell counts.
#' In the output, the covariance matrix entries are \eqn{cov(N_{ij},N_{kl})} when \eqn{N_{ij}} values are
#' corresponding to the column-wise vectorization of \code{ct}.
#' These covariances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also
#' (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts of NNCT, \code{ct}.
#'
#' @return The \eqn{k^2 \times k^2} covariance matrix of column-wise vectorized cell counts \eqn{N_{ij}} for
#' \eqn{i,j=1,\ldots,k} in the NNCT, \code{ct}.
#'
#' @seealso \code{\link{cov.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' covNrow<-cov.nnct(ct,varN,Qv,Rv)
#' covNcol1<-cov.nnct(ct,varN,Qv,Rv,byrow=FALSE)
#' covNcol2<-covNrow2col(covNrow)
#'
#' covNrow
#' covNcol1
#' covNcol2
#'
#' all.equal(covNcol1,covNcol2)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' covNrow<-cov.nnct(ct,varN,Qv,Rv)
#' covNcol1<-cov.nnct(ct,varN,Qv,Rv,byrow=FALSE)
#' covNcol2<-covNrow2col(covNrow)
#'
#' covNrow
#' covNcol1
#' covNcol2
#'
#' all.equal(covNcol1,covNcol2)
#'
#' #1D data points
#' n<-20 #or try sample(1:20,1)
#' X<-as.matrix(runif(n))# need to be entered as a matrix with one column
#' #(i.e., a column vector), hence X<-runif(n) would not work
#' ipd<-ipd.mat(X)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' cov.nnct(ct,varN,Qv,Rv)
#'
#' @export
covNrow2col <- function(covN)
{
k<-sqrt(nrow(covN))
ind<-vector()
for (j in 1:k)
{
ind<- c(ind,j+seq(0,k-1)*k)
}
rcov<-covN[ind,ind]
rcov
} #end for the function
#'
#################################################################
# funs.vartct
#'
#' @title Functions for Variances of Cell Counts in the Types I, III and IV TCTs
#'
#' @description
#' Three functions: \code{var.tctI}, \code{var.tctIII} and \code{var.tctIV}.
#'
#' These functions return the variances of \eqn{T_{ij}} values for \eqn{i,j=1,\ldots,k} in the TCT in matrix form which
#' is of the same dimension as TCT for types I, III and IV tests.
#' The argument \code{covN} must be the covariance between \eqn{N_{ij}} values which are obtained from the NNCT by row-wise
#' vectorization.
#' These variances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts of NNCT, \code{ct}.
#'
#' @return Each of these functions returns a \code{matrix} of same dimension as, \code{ct}, whose entries are the variances of
#' the entries in the TCT for the corresponding type of cell-specific test.
#' The row and column names are inherited from \code{ct}.
#'
#' @seealso \code{\link{var.tct}} and \code{\link{var.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.vartct
NULL
#'
#' @rdname funs.vartct
#'
var.tctI <- function(ct,covN)
{
rs <- row.sum(ct);
k<-nrow(ct);
n<-sum(ct)
var<- matrix(0,k,k);
for (j in 1:k)
{
ind<-j+seq(0,k-1)*k
VarCj<-sum(covN[ind,ind])
for (i in 1:k)
{
covNijCj<-sum(covN[(i-1)*k+j,ind])
var[i,j]<- covN[(i-1)*k+j,(i-1)*k+j]+(rs[i]/n)^2*VarCj-2*(rs[i]/n)*covNijCj
}
}
clnames<-rownames(ct)
rownames(var)<-colnames(var)<-clnames #row and column names from the NNCT
var
} #end for the function
#'
#' @rdname funs.vartct
#'
var.tctIII <- function(ct,covN)
{
rs <- row.sum(ct);
k<-nrow(ct);
n<-sum(ct)
var<- matrix(0,k,k);
for (j in 1:k)
{
ind<-j+seq(0,k-1)*k
VarCj<-sum(covN[ind,ind])
for (i in 1:k)
{
covNijCj<-sum(covN[(i-1)*k+j,ind])
if (i == j)
{
var[i,j]<- covN[(i-1)*k+j,(i-1)*k+j]+(rs[i]-1)^2/(n-1)^2*VarCj-2*(rs[i]-1)/(n-1)*covNijCj
}
else
{
var[i,j]<- covN[(i-1)*k+j,(i-1)*k+j]+rs[i]^2/(n-1)^2*VarCj-2*rs[i]/(n-1)*covNijCj
}
}
}
clnames<-rownames(ct)
rownames(var)<-colnames(var)<-clnames #row and column names from the NNCT
var
} #end for the function
#'
#' @rdname funs.vartct
#'
var.tctIV <- function(ct,covN)
{
rs <- row.sum(ct);
k<-nrow(ct);
n<-sum(ct)
var<- matrix(0,k,k);
for (j in 1:k)
{
ind<-j+seq(0,k-1)*k
VarCj<-sum(covN[ind,ind])
for (i in 1:k)
{
covNijCj<-sum(covN[(i-1)*k+j,ind])
if (i == j)
{
var[i,j]<- (rs[i]/n)^2*((n-1)^2/(rs[i]-1)^2*covN[(i-1)*k+j,(i-1)*k+j]+VarCj-2*(n-1)/(rs[i]-1)*covNijCj)
}
else
{
var[i,j]<- (1/n)^2*((n-1)^2*covN[(i-1)*k+j,(i-1)*k+j]+rs[i]^2*VarCj-2*rs[i]*(n-1)*covNijCj)
}
}
}
clnames<-rownames(ct)
rownames(var)<-colnames(var)<-clnames #row and column names from the NNCT
var
} #end for the function
#'
#################################################################
#' @title Variances of Entries in a TCT
#'
#' @description Returns the variances of \eqn{T_{ij}} values for \eqn{i,j=1,\ldots,k} in the TCT in matrix form which
#' is of the same dimension as TCT for types I-IV tests.
#' The argument \code{covN} must be the covariance between \eqn{N_{ij}} values which are obtained from the NNCT by row-wise
#' vectorization. type determines the type of the test for which variances are to be computed, with default=\code{"III"}.
#' These variances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also (\insertCite{ceyhan:SJScorrected2010,ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts of NNCT, \code{ct}.
#' @param type The type of the cell-specific test, default=\code{"III"}. Takes on values \code{"I"}-\code{"IV"} (or
#' equivalently \code{1-4}, respectively.
#'
#' @return A \code{matrix} of same dimension as, \code{ct}, whose entries are the variances of
#' the entries in the TCT for the corresponding type of cell-specific test.
#' The row and column names are inherited from \code{ct}.
#'
#' @seealso \code{\link{var.nnct}}, \code{\link{var.tctI}}, \code{\link{var.tctIII}}, \code{\link{var.tctIV}}
#' and \code{\link{cov.tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' var.tct(ct,covN,"I")
#' var.tct(ct,covN,2)
#' var.tct(ct,covN,"III")
#' var.tct(ct,covN,"IV")
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' var.tct(ct,covN,"I")
#' var.tct(ct,covN,2)
#'
#' @export
var.tct <- function(ct,covN,type="III")
{
var <- switch(type,
I = { var <- var.tctI(ct,covN) },
II = { k<-nrow(ct)
var <- matrix(diag(covN),nrow=k,byrow = TRUE) },
III = { var <- var.tctIII(ct,covN) },
IV = { var <- var.tctIII(ct,covN) }
)
if (is.null(var)) stop("Enter numbers 1-4 or I-IV in quotes for type")
clnames<-rownames(ct)
rownames(var)<-colnames(var)<-clnames #row and column names from the NNCT
var
} #end for the function
#'
#################################################################
# funs.auxcovtct
#'
#' @title Auxiliary Functions for Computing Covariances Between Cell Counts in the TCT
#'
#' @description
#' Five functions: \code{cov.2cells}, \code{cov.cell.col}, \code{covNijCk}, \code{cov2cols}
#' and \code{covCiCj}
#'
#' These are auxiliary functions for computing covariances between entries in the TCT for the types I-IV
#' cell-specific tests. The covariances between \eqn{T_{ij}} values for \eqn{i,j=1,\ldots,k} in the TCT require covariances
#' between two cells in the NNCT, between a cell and column sum, and between two column sums in the NNCT.
#' \code{cov.2cells} computes the covariance between two cell counts \eqn{N_{ij}} and \eqn{N_{kl}} in an NNCT,
#' \code{cov.cell.col} and \code{covNijCk} are equivalent and they compute the covariance between cell count \eqn{N_{ij}}
#' and sum of column \eqn{k}, \eqn{C_k},
#' \code{cov2cols} and \code{covCiCj} are equivalent and they compute the covariance between sums of two columns,
#' \eqn{C_i} and \eqn{C_j}.
#' The index arguments refer to which entry or column sum is intended in the NNCT.
#' The argument \code{covN} must be the covariance between \eqn{N_{ij}} values which are obtained from NNCT by row-wise vectorization.
#' These covariances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' @param i,j,k,l Indices of the cell counts or column sums whose covariance is to be computed. All four are
#' needed for \code{cov.2cells} referring to cells \eqn{(i,j)} and \eqn{(k,l)}; only three indices \eqn{i,j,k} are needed for
#' \code{cov.cell.col} and \code{covNijCk} referring to cell \eqn{(i,j)} and column \eqn{k};
#' only two indices \eqn{i,j} are needed for \code{cov2cols} and \code{covCiCj} referring to columns \eqn{i} and \eqn{j}.
#' @param ct A nearest neighbor contingency table
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts of NNCT, \code{ct}.
#'
#' @return
#' \code{cov.2cells} returns the covariance between two cell counts \eqn{N_{ij}} and \eqn{N_{kl}} in an NNCT,
#' \code{cov.cell.col} and \code{covNijCk} return the covariance between cell count \eqn{N_{ij}}
#' and sum of column \eqn{k}, \eqn{C_k},
#' \code{cov2cols} and \code{covCiCj} return the covariance between sums of two columns,
#' \eqn{C_i} and \eqn{C_j}.
#'
#' @seealso \code{\link{cov.tct}} and \code{\link{cov.nnct}}
#'
#' @name funs.auxcovtct
NULL
#'
#' @rdname funs.auxcovtct
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' cov.2cells(1,1,1,2,ct,covN)
#'
#' cov.cell.col(2,2,1,ct,covN)
#' covNijCk(2,2,1,ct,covN)
#'
#' cov.2cols(2,1,ct,covN)
#' covCiCj(2,1,ct,covN)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' cov.2cells(2,3,1,2,ct,covN)
#'
#' cov.cell.col(1,1,2,ct,covN)
#' covNijCk(1,1,2,ct,covN)
#'
#' cov.2cols(3,4,ct,covN)
#' covCiCj(3,4,ct,covN)
#'
#' @export
cov.2cells <- function(i,j,k,l,ct,covN)
{
nr<-nrow(ct);
covN[(i-1)*nr+j,(k-1)*nr+l]
} #end for the function
#'
#' @rdname funs.auxcovtct
#'
#' @export
cov.cell.col <- function(i,j,k,ct,covN)
{
nr<-nrow(ct);
covNijCk<-0
for (r in 1:nr)
{
covNijCk<-covNijCk+covN[(i-1)*nr+j,(r-1)*nr+k]
}
covNijCk
} #end for the function
#'
#' @rdname funs.auxcovtct
#'
#' @export
covNijCk <- function(i,j,k,ct,covN)
{
nr<-nrow(ct);
if (any(c(i,j,k)>nr))
{stop('Subscript(s) out of bounds of the ct')}
ind<-cbind(rep(1:nr^2,rep(nr^2,nr^2)),rep(1:nr^2),rep(1:nr,rep(nr^3,nr)),rep(1:nr,rep(nr^2,nr)),
rep(rep(1:nr,rep(nr,nr)),nr),rep(rep(1:nr,nr),nr))
indlis<-which(ind[,3]==i & ind[,4]==j & ind[,6]==k)
crs<-ind[indlis,][,1:2]
sum(covN[crs])
} #end for the function
#'
#' @rdname funs.auxcovtct
#'
#' @export
cov.2cols <- function(i,j,ct,covN)
{
nr<-nrow(ct);
covCiCj<-0
for (s in 1:nr)
{
for (r in 1:nr)
{
covCiCj<-covCiCj+covN[(s-1)*nr+i,(r-1)*nr+j]
}
}
covCiCj
} #end for the function
#'
#' @rdname funs.auxcovtct
#'
#' @export
covCiCj <- function(i,j,ct,covN)
{
nr<-nrow(ct);
if (any(c(i,j)>nr))
{stop('Subscript(s) out of bounds of the ct')}
ind<-cbind(rep(1:nr^2,rep(nr^2,nr^2)),rep(1:nr^2),rep(1:nr,rep(nr^3,nr)),rep(1:nr,rep(nr^2,nr)),
rep(rep(1:nr,rep(nr,nr)),nr),rep(rep(1:nr,nr),nr))
indlis<-which(ind[,4]==i & ind[,6]==j)
crs<-ind[indlis,][,1:2]
sum(covN[crs])
} #end for the function
#'
#################################################################
# funs.covtct
#'
#' @title Functions for Covariances of the Entries of the Types I, III and IV TCTs
#'
#' @description
#' Four functions: \code{cov.tctI}, \code{cov.tctIII}, \code{cov.tct3} and \code{cov.tctIV}.
#'
#' These functions return the covariances between between entries in the TCT for the types I, III, and IV
#' cell-specific tests in matrix form which is of dimension \eqn{k^2 \times k^2}.
#' The covariance matrix entries are \eqn{cov(T_{ij},T_{kl})} when \eqn{T_{ij}} values are by default corresponding to
#' the row-wise vectorization of TCT.
#' The argument \code{CovN} must be the covariance between \eqn{N_{ij}} values which are obtained from the NNCT by row-wise
#' vectorization.
#' The functions \code{cov.tctIII} and \code{cov.tct3} are equivalent.
#' These covariances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @param ct A nearest neighbor contingency table
#' @param CovN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts of NNCT, \code{ct}.
#'
#' @return Each of these functions returns a \eqn{k^2 \times k^2} covariance matrix, whose entries are the covariances of
#' the entries in the TCTs for the corresponding type I-IV cell-specific test.
#' The row and column names are inherited from \code{ct}.
#'
#' @seealso \code{\link{cov.tct}} and \code{\link{cov.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.covtct
NULL
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' cov.tctI(ct,covN)
#' cov.tctIII(ct,covN)
#' cov.tctIV(ct,covN)
#'
#' @rdname funs.covtct
#'
#' @export
cov.tctI <- function(ct,CovN)
{
rs <- row.sum(ct);
q<-nrow(ct);
n<-sum(ct)
cov<- matrix(0,q^2,q^2);
for (r in 1:(q^2))
for (s in 1:(q^2))
{
i<-ceiling(r/q)
j<-r-q*floor(r/q-10^(-q))
k<-ceiling(s/q)
l<-s-q*floor(s/q-10^(-q))
indl<-l+seq(0,q-1)*q
indj<-j+seq(0,q-1)*q
covNijCl<-sum(CovN[r,indl])
covNklCj<-sum(CovN[s,indj])
covCjCl<-sum(CovN[indj,indl])
cov[r,s]<-CovN[r,s]-(rs[k]/n)*covNijCl-(rs[i]/n)*covNklCj+(rs[i]*rs[k]/(n^2))*covCjCl
}
cov
} #end for the function
#'
#' @rdname funs.covtct
#'
#' @export
cov.tctIII <- function(ct,CovN)
{
rs <- row.sum(ct);
q<-nrow(ct);
qsq<-q^2
n<-sum(ct)
cov<- matrix(0,qsq,qsq);
for (r in 1:(qsq))
for (s in 1:(qsq))
{
i<-ceiling(r/q)
j<-r-q*floor(r/q-10^(-q))
k<-ceiling(s/q)
l<-s-q*floor(s/q-10^(-q))
indl<-l+seq(0,q-1)*q
indj<-j+seq(0,q-1)*q
covNijCl<-sum(CovN[r,indl])
covNklCj<-sum(CovN[s,indj])
covCjCl<-sum(CovN[indj,indl])
if (i==j & k==l)
{cov[r,s]<-CovN[r,s]-(rs[k]-1)/(n-1)*covNijCl-(rs[i]-1)/(n-1)*covNklCj+(rs[i]-1)*(rs[k]-1)/(n-1)^2*covCjCl}
else
{if (i==j & k!=l)
{cov[r,s]<-CovN[r,s]-rs[k]/(n-1)*covNijCl-(rs[i]-1)/(n-1)*covNklCj+(rs[i]-1)*rs[k]/(n-1)^2*covCjCl}
else
{if (i!=j & k==l)
{cov[r,s]<-CovN[r,s]-(rs[k]-1)/(n-1)*covNijCl-rs[i]/(n-1)*covNklCj+rs[i]*(rs[k]-1)/(n-1)^2*covCjCl}
else
{cov[r,s]<-CovN[r,s]-rs[k]/(n-1)*covNijCl-rs[i]/(n-1)*covNklCj+rs[i]*rs[k]/(n-1)^2*covCjCl}
}
}
}
cov
} #end for the function
#'
#' @rdname funs.covtct
#'
#' @export
cov.tct3 <- function(ct,CovN)
{
q<-nrow(ct)
qsq<-q^2;
rs <- row.sum(ct);
n <- sum(ct)
CM<-matrix(0,qsq,qsq)
for (t in 1:qsq)
{
for (u in 1:qsq)
{
j<- ifelse(t %% q==0,q,t %% q) #%% is the modulus or mod operator, like 5%%2 is 1
i<- (t-j)/q+1
l<- ifelse(u %% q==0,q,u %% q)
k<- (u-l)/q+1
if (i==j & k==l)
{
CM[t,u]<-cov.2cells(i,j,k,l,ct,CovN)-(rs[i]-1)/(n-1)*cov.cell.col(k,l,i,ct,CovN)-
(rs[k]-1)/(n-1)*cov.cell.col(i,j,k,ct,CovN)+(rs[i]-1)*(rs[k]-1)/(n-1)^2*cov.2cols(i,k,ct,CovN)
}
if (i==j & k!=l)
{
CM[t,u]<-cov.2cells(i,j,k,l,ct,CovN)-(rs[i]-1)/(n-1)*cov.cell.col(k,l,i,ct,CovN)-
rs[k]/(n-1)*cov.cell.col(i,j,l,ct,CovN)+(rs[i]-1)*rs[k]/(n-1)^2*cov.2cols(i,l,ct,CovN)
}
if (i!=j & k==l)
{
CM[t,u]<-cov.2cells(i,j,k,l,ct,CovN)-(rs[k]-1)/(n-1)*cov.cell.col(i,j,k,ct,CovN)-
rs[i]/(n-1)*cov.cell.col(k,l,j,ct,CovN)+rs[i]*(rs[k]-1)/(n-1)^2*cov.2cols(j,k,ct,CovN)
}
if (i!=j & k!=l)
{
CM[t,u]<-cov.2cells(i,j,k,l,ct,CovN)-rs[k]/(n-1)*cov.cell.col(i,j,l,ct,CovN)-
rs[i]/(n-1)*cov.cell.col(k,l,j,ct,CovN)+rs[i]*rs[k]/(n-1)^2*cov.2cols(j,l,ct,CovN)
}
}
}
CM
} #end for the function
#'
#' @rdname funs.covtct
#'
#' @export
cov.tctIV <- function(ct,CovN)
{
rs <- row.sum(ct);
q<-nrow(ct);
n<-sum(ct)
cov<- matrix(0,q^2,q^2);
for (r in 1:(q^2))
for (s in 1:(q^2))
{
i<-ceiling(r/q)
j<-r-q*floor(r/q-10^(-q))
k<-ceiling(s/q)
l<-s-q*floor(s/q-10^(-q))
indl<-l+seq(0,q-1)*q
indj<-j+seq(0,q-1)*q
covNijCl<-sum(CovN[r,indl])
covNklCj<-sum(CovN[s,indj])
covCjCl<-sum(CovN[indj,indl])
if (i==j & k==l)
{cov[r,s]<-rs[i]*rs[k]/(n^2)*((n-1)^2/((rs[i]-1)*(rs[k]-1))*CovN[r,s]-(n-1)/(rs[i]-1)*covNijCl-(n-1)/(rs[k]-1)*covNklCj+covCjCl)}
else
{if (i==j & k!=l)
{cov[r,s]<-rs[i]/(n^2)*((n-1)^2/(rs[i]-1)*CovN[r,s]-(n-1)*rs[k]/(rs[i]-1)*covNijCl-(n-1)*covNklCj+rs[k]*covCjCl)}
else
{if (i!=j & k==l)
{cov[r,s]<-rs[k]/(n^2)*((n-1)^2/(rs[k]-1)*CovN[r,s]-(n-1)*covNijCl-(n-1)*rs[i]/(rs[k]-1)*covNklCj+rs[i]*covCjCl)}
else
{cov[r,s]<-1/(n^2)*((n-1)^2*CovN[r,s]-(n-1)*rs[k]*covNijCl-(n-1)*rs[i]*covNklCj+rs[i]*rs[k]*covCjCl)}
}
}
}
cov
} #end for the function
#'
#################################################################
#' @title Covariance Matrix of the Entries of the Type I-IV TCTs
#'
#' @description Returns the covariance matrix of the entries \eqn{T_{ij}} for \eqn{i,j=1,\ldots,k} in the TCT for the types I, III,
#' and IV cell-specific tests. The covariance matrix is of dimension \eqn{k^2 \times k^2} and its entries are \eqn{cov(T_{ij},T_{kl})}
#' when \eqn{T_{ij}} values are by default corresponding to the row-wise vectorization of TCT.
#' The argument \code{covN} must be the covariance matrix of \eqn{N_{ij}} values which are obtained from the NNCT by row-wise
#' vectorization.
#' The functions \code{cov.tctIII} and \code{cov.tct3} are equivalent.
#' These covariances are valid under RL or conditional on \eqn{Q} and \eqn{R} under CSR.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}).
#'
#' @inheritParams var.tct
#'
#' @return The \eqn{k^2 \times k^2} covariance matrix of the entries \eqn{T_{ij}} for \eqn{i,j=1,\ldots,k} in the Type I-IV TCTs
#'
#' @seealso \code{\link{cov.nnct}} and \code{\link{cov.nnsym}}
#'
#' @references
#' \insertAllCited{}
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' cov.tct(ct,covN,type=1)
#' cov.tct(ct,covN,type="I")
#' cov.tct(ct,covN,type="II")
#' cov.tct(ct,covN,type="III")
#' cov.tct(ct,covN,type="IV")
#' cov.tctI(ct,covN)
#'
#' cov.tct(ct,covN)
#' cov.tctIII(ct,covN)
#' cov.tct3(ct,covN)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#'
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' cov.tct(ct,covN,type=3)
#' cov.tct(ct,covN,type="III")
#'
#' cov.tctIII(ct,covN)
#' cov.tct3(ct,covN)
#'
#' @export
cov.tct <- function(ct,covN,type="III")
{
cov <- switch(type,
I = { cov <- cov.tctI(ct,covN) },
II = { cov <- covN },
III = { cov <- cov.tctIII(ct,covN) },
IV = { cov <- cov.tctIII(ct,covN) }
)
if (is.null(cov)) stop("Enter numbers 1-4 or I-IV in quotes for type")
cov
} #end for the function
#'
#################################################################
# funsZcell.tct
#'
#' @title Types I-IV Cell-specific Z Tests of Segregation based on NNCTs
#'
#' @description
#' Two functions: \code{Zcell.tct.ct} and \code{Zcell.tct}.
#'
#' All functions are objects of class \code{"cellhtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' entries of types I-IV TCT, \eqn{T_{ij}}, from their expected values under RL or CSR for each entry.
#' The test for each entry \eqn{i,j} is based on the normal approximation of the corresponding \eqn{T_{ij}} value
#' and are due to \insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}.
#'
#' Each function yields a contingency table of the test statistics, \eqn{p}-values for the corresponding
#' alternative, expected values (i.e. null value(s)), lower and upper confidence levels and sample estimates (i.e. observed values)
#' for the \eqn{T_{ij}} values and also names of the test statistics, estimates, null values and the method and the data
#' set used.
#'
#' The null hypothesis for each entry \eqn{i,j} is that the corresponding value \eqn{T_{ij}} is equal to the expected value
#' under RL or CSR, see \insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat} for more detail.
#'
#' See also (\insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}) and references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{Zcell.tct.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized cell counts of NNCT, \code{ct} ;
#' used in \code{Zcell.tct.ct} only.
#' @param type The type of the cell-specific test, default=\code{"III"}. Takes on values \code{"I"}-\code{"IV"} (or
#' equivalently \code{1-4}, respectively.
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{Zcell.tct} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{Zcell.tct} only
#' @param alternative Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}.
#' @param conf.level Level of the upper and lower confidence limits, default is \code{0.95}, for the \eqn{T_{ij}} values
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function, used in \code{Zcell.tct} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The \code{matrix} of Types I-IV cell-specific test statistics}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{matrix} of \eqn{p}-values for the hypothesis test for the corresponding alternative}
#' \item{LCL,UCL}{Matrix of Lower and Upper Confidence Levels for the \eqn{T_{ij}} values at the given confidence
#' level \code{conf.level} and depends on the type of \code{alternative}.}
#' \item{conf.int}{The confidence interval for the estimates, it is \code{NULL} here, since we provide the \code{UCL} and \code{LCL}
#' in \code{matrix} form.}
#' \item{cnf.lvl}{Level of the upper and lower confidence limits of the entries, provided in \code{conf.level}.}
#' \item{estimate}{Estimates of the parameters, i.e., matrix of the observed \eqn{T_{ij}} values which is the TCT}
#' \item{est.name,est.name2}{Names of the estimates, both are same in this function}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of
#' \eqn{T_{ij}} values in the TCT.}
#' \item{null.name}{Name of the null values}
#' \item{alternative}{Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{Zcell.tct.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{Zcell.tct} only}
#'
#' @seealso \code{\link{Zcell.nnct.ct}} and \code{\link{Zcell.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsZcell.tct
NULL
#'
#' @rdname funsZcell.tct
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' type<-"I" #try also "II", "III", and "IV"
#' Zcell.tct(Y,cls,type)
#' Zcell.tct(Y,cls,type,alt="g")
#' Zcell.tct(Y,cls,type,method="max")
#'
#' Zcell.tct.ct(ct,covN)
#' Zcell.tct.ct(ct,covN,type)
#' Zcell.tct.ct(ct,covN,type,alt="g")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' Zcell.tct(Y,cls,type)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' Zcell.tct(Y,cls,type)
#' Zcell.tct.ct(ct,covN,type)
#'
#' @export
Zcell.tct.ct <- function(ct,covN,type="III",alternative=c("two.sided", "less", "greater"),conf.level = 0.95)
{
alternative <-match.arg(alternative)
if (length(alternative) > 1 || is.na(alternative))
stop("alternative must be one \"greater\", \"less\", \"two.sided\"")
if (!missing(conf.level))
if (length(conf.level) != 1 || is.na(conf.level) || conf.level < 0 || conf.level > 1)
stop("conf.level must be a number between 0 and 1")
Tct<-tct(ct,type=type)
estimate<-Tct
ET<-EV.tct(ct,type)
varT<-var.tct(ct,covN,type)
if (all(is.na(varT)))
{stop('All entries of the variance, varT, are NaN (due to data size <= 3), so the tests are not defined')}
nullTij<-ET
estimate.name <- names.null <-paste("TCT ",type," entries",sep="")
k<-nrow(ct);
ts<-stderrT<- matrix(0,k,k);
for (i in 1:k)
for (j in 1:k)
{ stderrT[i,j]<-sqrt(varT[i,j])
ts[i,j] <- (Tct[i,j]-ET[i,j])/stderrT[i,j]
}
if (all(is.na(ts)))
{stop('All of the test stat statistics are NaN, the test are not well defined')}
alt<- switch(alternative,
less = {
pval <-pnorm(ts)
lcl <-NULL
ucl <-estimate+qnorm(conf.level)*stderrT
},
greater = {
pval <-pnorm(ts, lower.tail = FALSE)
ucl <-NULL
lcl <-estimate-qnorm(conf.level)*stderrT
},
two.sided = {
pval <-2 * pnorm(-abs(ts))
alpha <-1 - conf.level
crit.val <-qnorm(1-alpha/2)
lcl <-estimate-crit.val*stderrT
ucl <-estimate+crit.val*stderrT
}
)
if (is.null(alt)) stop("Alternative must be one of less, greater, or two.sided in quotes")
cnf.lvl<-conf.level
method <-paste("Type ",type," Contingency Table (TCT ",type,") Cell-Specific Tests",sep="")
clnames<-rownames(ct) #row and column names for the NNCT, \code{ct}
rownames(ts)<-colnames(ts)<-clnames #row and column names for the test stat matrix
rownames(pval)<-colnames(pval)<-clnames
rownames(nullTij)<-colnames(nullTij)<-clnames
if (!is.null(lcl)) {rownames(lcl)<-colnames(lcl)<-clnames}
if (!is.null(ucl)) {rownames(ucl)<-colnames(ucl)<-clnames}
ts.names <-paste("Type ",type," cell-specific tests of segregation, Z",sep="")
dname <-deparse(substitute(ct))
rval <-list(
statistic=ts,
stat.names=ts.names,
p.value=pval,
LCL = lcl,UCL = ucl,
conf.int = NULL,
cnf.lvl=conf.level,
estimate = estimate,
est.name = estimate.name,
est.name2 = estimate.name, #this is for other functions to have a different description for the sample estimates
null.value = nullTij,
null.name=names.null,
alternative = alternative,
method = method,
ct.name = dname
)
attr(rval, "class") <-"cellhtest"
return(rval)
} #end for the function
#'
#' @rdname funsZcell.tct
#'
#' @export
Zcell.tct <- function(dat,lab,type="III",alternative=c("two.sided", "less", "greater"),conf.level = 0.95,...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q
Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
covN<-cov.nnct(ct,varN,Qv,Rv)
res<- Zcell.tct.ct(ct,covN,type,alternative=alternative,conf.level=conf.level)
dname <-deparse(substitute(dat))
res$data.name<-dname
return(res)
} #end for the function
#'
#################################################################
# funsZcell.spec
#'
#' @title Cell-specific Z Tests of Segregation for NNCTs
#'
#' @description
#' Two functions: \code{Zcell.spec.ct} and \code{Zcell.spec}.
#'
#' All functions are objects of class \code{"cellhtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' entries of NNCT or types I-IV TCTs from the expected values under RL or CSR for each entry.
#' The test for each entry \eqn{i,j} is based on the normal approximation of the corresponding \eqn{T_{ij}} value
#' and are due to \insertCite{dixon:NNCTEco2002;textual}{nnspat}
#' and \insertCite{ceyhan:jkss-posthoc-2017;textual}{nnspat}, respectively.
#'
#' The \code{type="dixon"} or \code{"nnct"} refers to Dixon's cell-specific test of segregation, and
#' \code{type="I"}-\code{"IV"} refers to types I-IV cell-specific tests, respectively.
#'
#' Each function yields a contingency table of the test statistics, \eqn{p}-values for the corresponding
#' alternative, expected values (i.e. null value(s)), lower and upper confidence levels and sample estimates (i.e. observed values)
#' for the \eqn{N_{ij}} or \eqn{T_{ij}} values and also names of the test statistics, estimates, null values and the method and
#' the data set used.
#'
#' The null hypothesis for each entry \eqn{i,j} is that the corresponding value \eqn{N_{ij}} or \eqn{T_{ij}} is equal to the
#' expected value under RL or CSR.
#'
#' See also
#' (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:jkss-posthoc-2017;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{Zcell.spec.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized entries of NNCT, \code{ct} ;
#' used in \code{Zcell.spec.ct} only.
#' @param type The type of the cell-specific test with no default.
#' Takes on values \code{"dixon"} or \code{"nnct"} for Dixon's cell-specific tests and \code{"I"}-\code{"IV"} for types I-IV cell-specific
#' tests (or equivalently \code{1-6}, respectively).
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{Zcell.spec} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{Zcell.spec} only
#' @param alternative Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}.
#' @param conf.level Level of the upper and lower confidence limits, default is \code{0.95}, for the \eqn{N_{ij}} or \eqn{T_{ij}} values
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function, used in \code{Zcell.spec} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The \code{matrix} of cell-specific test statistics}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{matrix} of \eqn{p}-values for the hypothesis test for the corresponding alternative}
#' \item{LCL,UCL}{Matrix of Lower and Upper Confidence Levels for the \eqn{N_{ij}} or \eqn{T_{ij}} values at the given confidence
#' level \code{conf.level} and depends on the type of \code{alternative}.}
#' \item{conf.int}{The confidence interval for the estimates, it is \code{NULL} here, since we provide the \code{UCL} and \code{LCL}
#' in \code{matrix} form.}
#' \item{cnf.lvl}{Level of the upper and lower confidence limits of the entries, provided in \code{conf.level}.}
#' \item{estimate}{Estimates of the parameters, NNCT or TCT, i.e., matrix of the observed \eqn{N_{ij}} or \eqn{T_{ij}} values
#' which is NNCT or TCT, respectively.}
#' \item{est.name,est.name2}{Names of the estimates, both are same in this function}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of the
#' the null \eqn{N_{ij}} values in an NNCT or \eqn{T_{ij}} values in an TCT.}
#' \item{null.name}{Name of the null values}
#' \item{alternative}{Type of the alternative hypothesis in the test, one of \code{"two.sided"}, \code{"less"} or \code{"greater"}}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{Zcell.spec.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{Zcell.spec} only}
#'
#' @seealso \code{\link{Zcell.nnct.ct}}, \code{\link{Zcell.nnct}}, \code{\link{Zcell.tct.ct}}
#' and \code{\link{Zcell.tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsZcell.spec
NULL
#'
#' @rdname funsZcell.spec
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' type<-"IV" #"dixon" #try also "nnct", "I", "II", "III", and "IV"
#' cell.spec(Y,cls,type)
#' cell.spec(Y,cls,type,alt="g")
#'
#' cell.spec.ct(ct,covN,type)
#' cell.spec.ct(ct,covN,type="II",alt="g")
#'
#' cell.spec(Y,cls,type,method="max")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' cell.spec(Y,cls,type="I")
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' cell.spec(Y,cls,type)
#' cell.spec.ct(ct,covN,type)
#'
#' @export
cell.spec.ct <- function(ct,covN,type,alternative=c("two.sided", "less", "greater"),conf.level = 0.95)
{
k<-nrow(ct)
if ((type %in% c("dixon","nnct","I","II","III","IV"))==FALSE)
{stop("type is misspecified, should be one of dixon, nnct, I, II, III or IV (in quotes)")}
if (type %in% c("dixon","nnct"))
{
varN<-matrix(diag(covN),byrow=T,ncol=k)
res<- Zcell.nnct.ct(ct,varN,alternative=alternative,conf.level=conf.level)
} else
{
res<- Zcell.tct.ct(ct,covN,type=type,alternative=alternative,conf.level=conf.level)
}
return(res)
} #end for the function
#'
#' @rdname funsZcell.spec
#'
#' @export
cell.spec <- function(dat,lab,type,alternative=c("two.sided", "less", "greater"),conf.level = 0.95,...)
{
if ((type %in% c("dixon","nnct","I","II","III","IV"))==FALSE)
{stop("type is misspecified, should be one of dixon, nnct, I, II, III or IV (in quotes)")}
if (type %in% c("dixon","nnct"))
{
res<- Zcell.nnct(dat,lab,alternative=alternative,conf.level=conf.level,...)
} else
{
res<- Zcell.tct(dat,lab,type=type,alternative=alternative,conf.level=conf.level,...)
}
dname <-deparse(substitute(dat))
res$data.name<-dname
return(res)
} #end for the function
#'
#################################################################
# funsC_MI_II
#'
#' @title Correction Matrices for the Covariance Matrix of NNCT entries
#'
#' @description
#' Two functions: \code{correct.cf1} and \code{correct.cf1}.
#'
#' Each function yields matrices which are used in obtaining covariance matrices of \eqn{T_{ij}} values for
#' types I and II tests from the usual Chi-Square test of contingency tables (i.e. Pielou's test) applied
#' on NNCTs.
#' The output matrices are to be term-by-term multiplied with the covariance matrix of
#' the entries of NNCT. See Sections 3.1 and 3.2 in
#' (\insertCite{ceyhan:SJScorrected2010;textual}{nnspat})
#' or
#' Sections 3.5.1 and 3.5.2 in
#' (\insertCite{ECarXivCorrected:2008;textual}{nnspat}) for more details.
#'
#' @param ct A nearest neighbor contingency table
#'
#' @return
#' Both functions return a correction matrix which is to be multiplied with the covariance matrix of
#' entries of the NNCT so as to obtain types I and II overall tests from Pielou's test of segregation.
#' See the description above for further detail.
#'
#' @seealso \code{\link{nnct.cr1}} and \code{\link{nnct.cr2}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsC_MI_II
NULL
#'
#' @rdname funsC_MI_II
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' #correction type 1
#' CM1<-correct.cf1(ct)
#' CovN.cf1<-covN*CM1
#'
#' #correction type 2
#' CM2<-correct.cf2(ct)
#' CovN.cf2<-covN*CM2
#'
#' covN
#' CovN.cf1
#' CovN.cf2
#'
#' @export
correct.cf1 <- function(ct)
{
rs <- row.sum(ct);
cs <- col.sum(ct);
n<-sum(rs)
m<-nrow(ct);
ctvec<-mat2vec(ct)
lst<-ctvec$ind
cm<-matrix(0,m^2,m^2);
for (i in 1:(m^2))
for (j in i:(m^2))
{
cm[i,j]<-rs[lst[i,1]]*cs[lst[i,2]]*rs[lst[j,1]]*cs[lst[j,2]]
cm[j,i]<-cm[i,j]
}
n/sqrt(cm)
} #end for the function
#'
#' @rdname funsC_MI_II
#'
#' @export
correct.cf2 <- function(ct)
{
rs <- row.sum(ct);
n<-sum(rs)
m<-nrow(ct);
ctvec<-mat2vec(ct)
lst<-ctvec$ind
cm<-matrix(0,m^2,m^2);
for (i in 1:(m^2))
for (j in i:(m^2))
{
cm[i,j]<-rs[lst[i,1]]*rs[lst[i,2]]*rs[lst[j,1]]*rs[lst[j,2]]
cm[j,i]<-cm[i,j]
}
n/sqrt(cm)
} #end for the function
#'
#################################################################
# funsN_I_II
#'
#' @title Correction Matrices for the NNCT entries
#'
#' @description
#' Two functions: \code{nnct.cr1} and \code{nnct.cr1}.
#'
#' Each function yields matrices which are used in obtaining the correction term to be added to
#' the usual Chi-Square test of contingency tables (i.e. Pielou's test) applied
#' on NNCTs to obtain types I and II overall tests.
#' The output contingency tables are to be row-wise vectorized to obtain \eqn{N_I} and \eqn{N_{II}} vectors.
#' See Sections 3.1 and 3.2 in
#' (\insertCite{ceyhan:SJScorrected2010;textual}{nnspat})
#' or
#' Sections 3.5.1 and 3.5.2 in
#' (\insertCite{ECarXivCorrected:2008;textual}{nnspat}) for more details.
#'
#' @param ct A nearest neighbor contingency table
#'
#' @return
#' Both functions return a \eqn{k \times k} contingency table which is to be row-wise vectorized to obtain \eqn{N_I} and \eqn{N_{II}}
#' vectors which are used in the correction summands to obtain types I and II overall tests from Pielou's
#' test of segregation.
#' See the description above for further detail.
#'
#' @seealso \code{\link{correct.cf1}} and \code{\link{correct.cf2}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsN_I_II
NULL
#'
#' @rdname funsN_I_II
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' #correction type 1
#' ct1<-nnct.cr1(ct)
#'
#' #correction type 2
#' ct2<-nnct.cr2(ct)
#'
#' ct
#' ct1
#' ct2
#'
#' @export
nnct.cr1 <- function(ct)
{
rs <- row.sum(ct);
cs <- col.sum(ct);
n<-sum(rs)
m<-nrow(ct);
til<-matrix(0,m,m);
for (i in 1:m)
for (j in 1:m)
{
til[i,j]<-(ct[i,j]-rs[i]*cs[j]/n)/sqrt(rs[i]*cs[j]/n)
}
til
} #end for the function
#'
#' @rdname funsN_I_II
#'
#' @export
nnct.cr2 <- function(ct)
{
rs <- row.sum(ct);
n<-sum(rs)
m<-nrow(ct);
til<-matrix(0,m,m);
for (i in 1:m)
for (j in 1:m)
{
til[i,j]<-(ct[i,j]-rs[i]*rs[j]/n)/sqrt(rs[i]*rs[j]/n)
}
til
} #end for the function
#'
#################################################################
# funs.base.class.spec
#'
#' @title Base Class-specific Chi-square Tests based on NNCTs
#'
#' @description
#' Two functions: \code{base.class.spec.ct} and \code{base.class.spec}.
#'
#' Both functions are objects of class \code{"classhtest"} but with different arguments (see the parameter list below).
#' Each one performs class specific segregation tests due to Dixon for \eqn{k \ge 2} classes. That is,
#' each one performs hypothesis tests of deviations of
#' entries in each row of NNCT from the expected values under RL or CSR for each row.
#' Recall that row labels in the NNCT are base class labels.
#' The test for each row \eqn{i} is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{dixon:NNCTEco2002;textual}{nnspat}.
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} for each base class \eqn{i}, description of the
#' alternative with the corresponding null values (i.e. expected values) for the row \eqn{i}, estimates for the entries in row \eqn{i}
#' for \eqn{i=1,\ldots,k}. The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis for each row is that the corresponding \eqn{N_{ij}} entries in row \eqn{i} are equal to their
#' expected values under RL or CSR.
#'
#' See also (\insertCite{dixon:NNCTEco2002,ceyhan:stat-neer-class2009;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{base.class.spec.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized entries of NNCT, \code{ct} ;
#' used in \code{base.class.spec.ct} only.
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{base.class.spec} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{base.class.spec} only
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{base.class.spec} only
#'
#' @return A \code{list} with the elements
#' \item{type}{Type of the class-specific test, which is \code{"base"} for this function}
#' \item{statistic}{The \code{vector} of base class-specific test statistics}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{vector} of \eqn{p}-values for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k-1} for this function.}
#' \item{estimate}{Estimates of the parameters, NNCT, i.e., matrix of the observed \eqn{N_{ij}} values
#' which is the NNCT.}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of
#' the \eqn{N_{ij}} values in the NNCT.}
#' \item{null.name}{Name of the null values}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{base.class.spec.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{base.class.spec} only}
#'
#' @seealso \code{\link{NN.class.spec.ct}}, \code{\link{NN.class.spec}}, \code{\link{class.spec.ct}}
#' and \code{\link{class.spec}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.base.class.spec
NULL
#'
#' @rdname funs.base.class.spec
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' base.class.spec(Y,cls)
#' base.class.spec.ct(ct,covN)
#' base.class.spec(Y,cls,method="max")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' base.class.spec(Y,fcls)
#' base.class.spec.ct(ct,covN)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' base.class.spec(Y,cls)
#' base.class.spec.ct(ct,covN)
#'
#' @export
base.class.spec.ct <- function(ct,covN)
{
typ="base" #type of the class-specific test, it is base here (for the columns) in the NNCT
k<-nrow(ct);
nsq<-mat2vec(ct)$vec #row-wise vectorization
EN<-EV.nnct(ct)
Ensq <- mat2vec(EN)$vec
ts<-vector()
for (i in 1:k)
{
Ns <- nsq[(k*(i-1)+1):(k*i)];
ENs<-Ensq[(k*(i-1)+1):(k*i)];
CovN<-covN[(k*(i-1)+1):(k*i),(k*(i-1)+1):(k*i)];
ts<- c(ts,t(Ns-ENs) %*% ginv(CovN) %*% (Ns-ENs))
}
pval<- 1-pchisq(ts,df=k-1)
method <-c("Dixon's Base Class-Specific Tests")
ts.names <-"Dixon's base class-specific tests of segregation"
dname <-deparse(substitute(ct))
rval <-list(
type=typ,
statistic=ts,
stat.names=ts.names,
p.value=pval,
df=k-1,
estimate = ct,
null.value = EN,
method = method,
ct.name = dname
)
attr(rval, "class") <-"classhtest"
return(rval)
} #end for the function
#'
#' @rdname funs.base.class.spec
#'
#' @export
base.class.spec <- function(dat,lab,...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q; Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
covN<-cov.nnct(ct,varN,Qv,Rv)
rval<-base.class.spec.ct(ct,covN)
dname <-deparse(substitute(dat))
rval$data.name<-dname
return(rval)
} #end for the function
#'
#################################################################
# funsNNclass.spec
#'
#' @title NN Class-specific Chi-square Tests based on NNCTs
#'
#' @description
#' Two functions: \code{NN.class.spec.ct} and \code{NN.class.spec}.
#'
#' Both functions are objects of class \code{"classhtest"} but with different arguments (see the parameter list below).
#' Each one performs class specific segregation tests for the columns, i.e., NN categories for \eqn{k \ge 2} classes.
#' That is,
#' each one performs hypothesis tests of deviations of
#' entries in each column of NNCT from the expected values under RL or CSR for each column.
#' Recall that column labels in the NNCT are NN class labels.
#' The test for each column \eqn{i} is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{ceyhan:stat-neer-class2009;textual}{nnspat}.
#'
#' The argument \code{covN} must be covariance of column-wise vectorization of NNCT if the logical argument \code{byrow=FALSE}
#' otherwise the function converts \code{covN} (which is done row-wise) to columnwise version with \code{\link{covNrow2col}}
#' function.
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} for each base class \eqn{i}, description of the
#' alternative with the corresponding null values (i.e. expected values) for the column \eqn{i}, estimates for the entries in column \eqn{i}
#' for \eqn{i=1,\ldots,k}. The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis for each column is that the corresponding \eqn{N_{ij}} entries in column \eqn{i} are equal to their
#' expected values under RL or CSR.
#'
#' See also (\insertCite{dixon:NNCTEco2002,ceyhan:stat-neer-class2009;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{NN.class.spec.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of column-wise vectorized entries of NNCT, \code{ct} ;
#' used in \code{NN.class.spec.ct} only.
#' @param byrow A logical argument (default=\code{TRUE}). If \code{TRUE}, rows of \code{ct} are appended to obtain the vector
#' of \eqn{N_{ij}} values and \code{covN} is the covariance matrix for this vector and if \code{FALSE} columns of \code{ct} are appended
#' to obtain the \eqn{N_{ij}} vector and \code{covN} is converted to the row-wise version by covNrow2col function;used in \code{NN.class.spec.ct} only.
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{NN.class.spec} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{NN.class.spec} only
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{NN.class.spec} only
#'
#' @return A \code{list} with the elements
#' \item{type}{Type of the class-specific test, which is \code{"NN"} for this function}
#' \item{statistic}{The \code{vector} of NN class-specific test statistics}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{vector} of \eqn{p}-values for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k} for this function.}
#' \item{estimate}{Estimates of the parameters, transpose of the NNCT, i.e., transpose of the matrix of the
#' observed \eqn{N_{ij}} values which is the transpose of NNCT.}
#' \item{null.value}{Transpose of the matrix of hypothesized null values for the parameters which are expected
#' values of the \eqn{N_{ij}} values in the NNCT.}
#' \item{null.name}{Name of the null values}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{NN.class.spec.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{NN.class.spec} only}
#'
#' @seealso \code{\link{base.class.spec.ct}}, \code{\link{base.class.spec}}, \code{\link{class.spec.ct}}
#' and \code{\link{class.spec}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsNNclass.spec
NULL
#'
#' @rdname funsNNclass.spec
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covNrow<-cov.nnct(ct,varN,Qv,Rv)
#' covNcol<-covNrow2col(covNrow)
#'
#' NN.class.spec(Y,cls)
#' NN.class.spec(Y,cls,method="max")
#'
#' NN.class.spec.ct(ct,covNrow)
#' NN.class.spec.ct(ct,covNcol,byrow = FALSE)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' NN.class.spec(Y,fcls)
#' NN.class.spec.ct(ct,covNrow)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covNrow<-cov.nnct(ct,varN,Qv,Rv)
#' covNcol<-covNrow2col(covNrow)
#'
#' NN.class.spec(Y,cls)
#'
#' NN.class.spec.ct(ct,covNrow)
#' NN.class.spec.ct(ct,covNcol,byrow = FALSE)
#'
#' @export
NN.class.spec.ct <- function(ct,covN,byrow=TRUE)
{
typ="NN" #type of the class-specific test, it is NN here (for the columns) in the NNCT
k<-nrow(ct);
nsq<-mat2vec(ct,byrow=FALSE)$vec #row-wise vectorization
EN<-EV.nnct(ct)
Ensq <- mat2vec(EN,byrow=FALSE)$vec
ifelse(byrow==TRUE,covN<-covNrow2col(covN),covN<-covN)
if (all(is.na(covN)))
{stop('All entries of the covariance matrix, covN, are NaN, so the tests are not defined')}
ts<-vector()
for (i in 1:k)
{
Ns <- nsq[(k*(i-1)+1):(k*i)];
ENs<-Ensq[(k*(i-1)+1):(k*i)];
CovN<-covN[(k*(i-1)+1):(k*i),(k*(i-1)+1):(k*i)];
ts<- c(ts,t(Ns-ENs) %*% solve(CovN) %*% (Ns-ENs))
}
pval<- 1-pchisq(ts,df=k)
method <-c("NN Class-Specific Tests")
ts.names <-"NN class-specific tests of segregation"
dname <-deparse(substitute(ct))
rval <-list(
type=typ,
statistic=ts,
stat.names=ts.names,
p.value=pval,
df=k,
estimate = t(ct), # to have columns as rows in the print function
null.value = t(EN), # to have columns as rows in the print function
method = method,
ct.name = dname
)
attr(rval, "class") <-"classhtest"
return(rval)
} #end for the function
#'
#' @rdname funsNNclass.spec
#'
#' @export
NN.class.spec <- function(dat,lab,...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q; Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
covN<-cov.nnct(ct,varN,Qv,Rv) #row-wise vectorization
rval<-NN.class.spec.ct(ct,covN,byrow=TRUE)
dname <-deparse(substitute(dat))
rval$data.name<-dname
return(rval)
} #end for the function
#'
#################################################################
# funs.class.spec
#'
#' @title Class-specific Chi-square Tests based on NNCTs
#'
#' @description
#' Two functions: \code{class.spec.ct} and \code{class.spec}.
#'
#' Both functions are objects of class \code{"classhtest"} but with different arguments (see the parameter list below).
#' Each one performs class specific segregation tests for the rows if \code{type="base"} and
#' columns if \code{type="NN"} for \eqn{k \ge 2} classes.
#' That is,
#' each one performs hypothesis tests of deviations of
#' entries in each row (column) of NNCT from the expected values under RL or CSR for each row (column)
#' if \code{type="base"} (\code{"NN"}).
#' Recall that row labels of the NNCT are base class labels and
#' column labels in the NNCT are NN class labels.
#' The test for each row (column) \eqn{i} is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{dixon:NNCTEco2002;textual}{nnspat}
#' (\insertCite{ceyhan:stat-neer-class2009;textual}{nnspat}).
#'
#' The argument \code{covN} must be covariance of row-wise (column-wise) vectorization of NNCT if \code{type="base"}
#' (\code{type="NN"}).
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} for each base class \eqn{i}, description of the
#' alternative with the corresponding null values (i.e. expected values) for the row (column) \eqn{i}, estimates for the entries in
#' row (column) \eqn{i} for \eqn{i=1,\ldots,k} if \code{type="base"} (\code{type="NN"}).
#' The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis for each row (column) is that the corresponding \eqn{N_{ij}} entries in row (column) \eqn{i} are
#' equal to their expected values under RL or CSR.
#'
#' See also (\insertCite{dixon:NNCTEco2002,ceyhan:stat-neer-class2009;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{class.spec.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized entries of NNCT, \code{ct} ;
#' used in \code{class.spec.ct} only.
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{class.spec} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{class.spec} only
#' @param type The type of the class-specific tests with default=\code{"base"}.
#' Takes on values\code{"base"} for (Dixon's) base class-specific test
#' and\code{"NN"} for NN class-specific test.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{class.spec} only
#'
#' @return A \code{list} with the elements
#' \item{type}{Type of the class-specific test, which is \code{"base"} or \code{"NN"} for this function}
#' \item{statistic}{The \code{vector} of class-specific test statistics}
#' \item{stat.names}{Name of the test statistics}
#' \item{p.value}{The \code{vector} of \eqn{p}-values for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k-1} for base class-specific test
#' and \eqn{k} for NN class-specific test.}
#' \item{estimate}{Estimates of the parameters, NNCT, i.e., the matrix of the
#' observed \eqn{N_{ij}} values for base class-specific test and transpose of the NNCT for
#' the NN class-specific test.}
#' \item{null.value}{The \code{matrix} of hypothesized null values for the parameters which are expected values
#' of the \eqn{N_{ij}} values for the base class-specific test and transpose of this
#' matrix for the NN-class specific test.}
#' \item{null.name}{Name of the null values}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{class.spec.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{class.spec} only}
#'
#' @seealso \code{\link{base.class.spec.ct}}, \code{\link{base.class.spec}}, \code{\link{NN.class.spec.ct}}
#' and \code{\link{NN.class.spec}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.class.spec
NULL
#'
#' @rdname funs.class.spec
#'
#' @examples
#' n<-20
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv) #default is byrow
#'
#' class.spec(Y,cls)
#' class.spec(Y,cls,type="NN")
#'
#' class.spec.ct(ct,covN)
#' class.spec.ct(ct,covN,type="NN")
#'
#' class.spec(Y,cls,method="max")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' class.spec(Y,fcls)
#' class.spec(Y,fcls,type="NN")
#'
#' class.spec.ct(ct,covN)
#' class.spec.ct(ct,covN,type="NN")
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' class.spec(Y,cls)
#' class.spec(Y,cls,type="NN")
#'
#' class.spec.ct(ct,covN)
#' class.spec.ct(ct,covN,type="NN")
#'
#' @export
class.spec.ct <- function(ct,covN,type="base")
{
if ((type %in% c("NN","base"))==FALSE)
{stop("type is misspecified, should be one of base or NN (in quotes)")}
if (type == "base")
{
res<- base.class.spec.ct(ct,covN)
} else
{
res<- NN.class.spec.ct(ct,covN)
}
return(res)
} #end for the function
#'
#' @rdname funs.class.spec
#'
#' @export
class.spec <- function(dat,lab,type="base",...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q; Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
covN<-cov.nnct(ct,varN,Qv,Rv)
res<-class.spec.ct(ct,covN,type)
dname <-deparse(substitute(dat))
res$data.name<-dname
return(res)
} #end for the function
#'
#################################################################
# funs.overall.nnct
#'
#' @title Dixon's Overall Test of Segregation for NNCT
#'
#' @description
#' Two functions: \code{overall.nnct.ct} and \code{overall.nnct}.
#'
#' Both functions are objects of class \code{"Chisqtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' cell counts from the expected values under RL or CSR for all cells (i.e., entries) combined in the NNCT.
#' That is, each test is Dixon's overall test of segregation based on NNCTs for \eqn{k \ge 2} classes.
#' This overall test is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{dixon:1994,dixon:NNCTEco2002;textual}{nnspat}.
#' Both functions exclude the last column of the NNCT (in fact any column will do and last column
#' is chosen without loss of generality), to avoid ill-conditioning of the covariance matrix (for its inversion
#' in the quadratic form).
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} which is \eqn{k(k-1)}, description of the
#' alternative with the corresponding null values (i.e. expected values) of NNCT entries, sample estimates (i.e. observed values) of the entries in NNCT.
#' The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis is that all \eqn{N_{ij}} entries are equal to their expected values under RL or CSR.
#'
#' See also
#' (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:jkss-posthoc-2017;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{overall.nnct.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized entries of NNCT, \code{ct} ;
#' used in \code{overall.nnct.ct} only.
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{overall.nnct} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{overall.nnct} only
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{overall.nnct} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The overall chi-squared statistic}
#' \item{stat.names}{Name of the test statistic}
#' \item{p.value}{The \eqn{p}-value for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k(k-1)} for this function.}
#' \item{estimate}{Estimates of the parameters, NNCT, i.e., matrix of the observed \eqn{N_{ij}} values
#' which is the NNCT.}
#' \item{est.name,est.name2}{Names of the estimates, former is a longer description of the estimates
#' than the latter.}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of the
#' the \eqn{N_{ij}} values in the NNCT.}
#' \item{null.name}{Name of the null values}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{overall.nnct.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{overall.nnct} only}
#'
#' @seealso \code{\link{overall.seg.ct}}, \code{\link{overall.seg}}, \code{\link{overall.tct.ct}}
#' and \code{\link{overall.tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.overall.nnct
NULL
#'
#' @rdname funs.overall.nnct
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv) #default is byrow
#'
#' overall.nnct(Y,cls)
#' overall.nnct.ct(ct,covN)
#'
#' overall.nnct(Y,cls,method="max")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' overall.nnct(Y,fcls)
#' overall.nnct.ct(ct,covN)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' overall.nnct(Y,cls)
#' overall.nnct.ct(ct,covN)
#'
#' @export
overall.nnct.ct <- function(ct,covN)
{
k<-nrow(ct);
nsq<-mat2vec(ct)$vec #row-wise vectorization
EN<-EV.nnct(ct)
Ensq <- mat2vec(EN)$vec
ind<-(1:k)*k #removes the last column in the NNCT, to avoid the rank deficiency
Nsq<-nsq[-ind]
ENsq<-Ensq[-ind]
Covar<-covN[-ind,-ind]
ts<- t(Nsq-ENsq) %*% ginv(Covar,tol=1.490116e-20) %*% (Nsq-ENsq)
nu<-k*(k-1)
pval<- 1-pchisq(ts,df=nu)
method <-"Dixon's Overall Test of Segregation"
dname <-deparse(substitute(ct))
estimate<-ct
estimate.name <-"Nearest Neighbor Contingency Table (NNCT) entries"
estimate.name2 <-"NNCT entries"
rval <-list(
statistic=ts,
p.value=pval,
df=nu,
estimate = estimate,
est.name = estimate.name,
est.name2 = estimate.name2,
null.value = EN,
method = method,
ct.name = dname
)
attr(rval, "class") <-"Chisqtest"
return(rval)
} #end for the function
#'
#' @rdname funs.overall.nnct
#'
#' @export
overall.nnct <- function(dat,lab,...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q; Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
covN<-cov.nnct(ct,varN,Qv,Rv)
rval<-overall.nnct.ct(ct,covN)
dname <-deparse(substitute(dat))
rval$data.name<-dname
return(rval)
} #end for the function
#'
#################################################################
# funs.overall.tct
#'
#' @title Types I-IV Overall Tests of Segregation for NNCT
#'
#' @description
#' Two functions: \code{overall.tct.ct} and \code{overall.tct}.
#'
#' All functions are objects of class \code{"Chisqtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' cell counts from the expected values under RL or CSR for all cells (i.e., entries) combined in the TCT.
#' That is, each test is one of Types I-IV overall test of segregation based on TCTs for \eqn{k \ge 2} classes.
#' This overall test is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{ceyhan:SJScorrected2010,ceyhan:jkss-posthoc-2017;textual}{nnspat}.
#' Both functions exclude some row and/or column of the TCT, to avoid ill-conditioning of the covariance matrix
#' of the NNCT (for its inversion in the quadratic form).
#' In particular, type-II removes the last column, and all other types remove the last row and column.
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} which is \eqn{k(k-1)} for type II test and \eqn{(k-1)^2}
#' for the other types, description of the
#' alternative with the corresponding null values (i.e. expected values) of TCT entries, sample estimates (i.e. observed values) of the entries in TCT.
#' The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis is that all Tij entries for the specified type are equal to their expected values
#' under RL or CSR.
#'
#' See also
#' (\insertCite{ceyhan:SJScorrected2010,ceyhan:jkss-posthoc-2017;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{overall.tct.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized entries of NNCT, \code{ct} ;
#' used in \code{overall.tct.ct} only.
#' @param type The type of the overall segregation test, default=\code{"III"}.
#' Takes on values \code{"I"}-\code{"IV"} (or equivalently \code{1-4}, respectively.
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{overall.tct} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{overall.tct} only
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{overall.tct} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The overall chi-squared statistic for the specified type}
#' \item{stat.names}{Name of the test statistic}
#' \item{p.value}{The \eqn{p}-value for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k(k-1)} for type=\code{"II"} and \eqn{(k-1)^2} for others.}
#' \item{estimate}{Estimates of the parameters, TCT, i.e., matrix of the observed \eqn{T_{ij}} values
#' which is the TCT.}
#' \item{est.name,est.name2}{Names of the estimates, former is a longer description of the estimates
#' than the latter.}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of the
#' the \eqn{T_{ij}} values in the TCT.}
#' \item{null.name}{Name of the null values}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{overall.tct.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{overall.tct} only}
#'
#' @seealso \code{\link{overall.seg.ct}}, \code{\link{overall.seg}}, \code{\link{overall.nnct.ct}}
#' and \code{\link{overall.nnct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.overall.tct
NULL
#'
#' @rdname funs.overall.tct
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv) #default is byrow
#'
#' overall.tct(Y,cls)
#' overall.tct(Y,cls,type="I")
#' overall.tct(Y,cls,type="II")
#' overall.tct(Y,cls,type="III")
#' overall.tct(Y,cls,type="IV")
#'
#' overall.tct(Y,cls,method="max")
#'
#' overall.tct.ct(ct,covN)
#' overall.tct.ct(ct,covN,type="I")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' overall.tct(Y,fcls)
#' overall.tct.ct(ct,covN)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' overall.tct(Y,cls)
#' overall.tct.ct(ct,covN)
#'
#' @export
overall.tct.ct <- function(ct,covN,type="III")
{
Tct<-tct(ct,type)
estimate<-Tct
estimate.name <-paste("Type ",type," Contingency Table (TCT ",type,") entries",sep="")
estimate.name2 <-paste("TCT ",type," entries",sep="")
k<-nrow(ct);
tsq<-mat2vec(Tct)$vec #row-wise vectorization
ET<-EV.tct(ct,type)
Etsq <- mat2vec(ET)$vec
covT<-cov.tct(ct,covN,type)
if (type == "II")
{nu<-k*(k-1)
ind<-(1:k)*k #removes the last column in the ct
} else
{
nu<-(k-1)^2
ind<-c((1:(k-1))*k,(k^2-k)+(1:k))#removes the last row and column in the ct
#this makes type IV equivalent to type III
#ind<-(k^2-k)+(1:k)#removes last row (makes type IV equivalent to Dixon's)
}
Tsq<-tsq[-ind]
ETsq<-Etsq[-ind]
Covar<-covT[-ind,-ind]
ts<- t(Tsq-ETsq) %*% ginv(Covar,tol=1.490116e-20) %*% (Tsq-ETsq)
pval<- 1-pchisq(ts,df=nu)
method <-paste("Type ",type," Overall Segregation Test",sep="")
dname <-deparse(substitute(ct))
rval <-list(
statistic=ts,
p.value=pval,
df=nu,
estimate = estimate,
est.name = estimate.name,
est.name2 = estimate.name2,
null.value = ET,
method = method,
ct.name = dname
)
attr(rval, "class") <-"Chisqtest"
return(rval)
} #end for the function
#'
#' @rdname funs.overall.tct
#'
#' @export
overall.tct <- function(dat,lab,type="III",...)
{
ipd<-ipd.mat(dat,...)
ct<-nnct(ipd,lab)
W<-Wmat(ipd)
Qv<-Qvec(W)$q; Rv<-Rval(W)
varN<-var.nnct(ct,Qv,Rv)
covN<-cov.nnct(ct,varN,Qv,Rv)
rval<-overall.tct.ct(ct,covN,type)
dname <-deparse(substitute(dat))
rval$data.name<-dname
return(rval)
} #end for the function
#'
#################################################################
# funs.overall.seg
#'
#' @title Overall Segregation Tests for NNCTs
#'
#' @description
#' Two functions: \code{overall.seg.ct} and \code{overall.seg}.
#'
#' All functions are objects of class \code{"Chisqtest"} but with different arguments (see the parameter list below).
#' Each one performs hypothesis tests of deviations of
#' cell counts from the expected values under RL or CSR for all cells (i.e., entries) combined in the NNCT or TCT.
#' That is, each test is one of Dixon's or Types I-IV overall test of segregation based on NNCTs or TCTs
#' for \eqn{k \ge 2} classes.
#' Each overall test is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{dixon:1994,dixon:NNCTEco2002;textual}{nnspat}
#' and to \insertCite{ceyhan:SJScorrected2010,ceyhan:jkss-posthoc-2017;textual}{nnspat}, respectively.
#' All functions exclude some row and/or column of the TCT, to avoid ill-conditioning of the covariance matrix
#' of the NNCT (for its inversion in the quadratic form), see the relevant functions under See also section below.
#'
#' The \code{type="dixon"} or \code{"nnct"} refers to Dixon's overall test of segregation, and
#' \code{type="I"}-\code{"IV"} refers to types I-IV overall tests, respectively.
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} which is \eqn{k(k-1)} for type II and Dixon's test
#' and \eqn{(k-1)^2} for the other types, description of the
#' alternative with the corresponding null values (i.e. expected values) of TCT entries, sample estimates (i.e. observed values) of the entries in TCT.
#' The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis is that all \eqn{N_{ij}} or \eqn{T_{ij}} entries for the specified type are equal to their expected values
#' under RL or CSR, respectively.
#'
#' See also
#' (\insertCite{dixon:1994,dixon:NNCTEco2002,ceyhan:eest-2010,ceyhan:SJScorrected2010,ceyhan:jkss-posthoc-2017;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{overall.seg.ct} only
#' @param covN The \eqn{k^2 \times k^2} covariance matrix of row-wise vectorized entries of NNCT, \code{ct};
#' used in \code{overall.seg.ct} only.
#' @param type The type of the overall test with no default.
#' Takes on values \code{"dixon"} or \code{"nnct"} for Dixon's overall test and \code{"I"}-\code{"IV"} for types I-IV cell-specific
#' test (or equivalently \code{1-6}, respectively).
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{overall.seg} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{overall.seg} only
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{overall.seg} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The overall chi-squared statistic for the specified type}
#' \item{stat.names}{Name of the test statistic}
#' \item{p.value}{The \eqn{p}-value for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k(k-1)} for type II and Dixon's tests
#' and \eqn{(k-1)^2} for others.}
#' \item{estimate}{Estimates of the parameters, NNCT for Dixon's test and type I-IV TCT for others.}
#' \item{est.name,est.name2}{Names of the estimates, former is a longer description of the estimates
#' than the latter.}
#' \item{null.value}{Matrix of hypothesized null values for the parameters which are expected values of the
#' the \eqn{N_{ij}} values in the NNCT or \eqn{T_{ij}} values in the TCT.}
#' \item{null.name}{Name of the null values}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{overall.seg.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{overall.seg} only}
#'
#' @seealso \code{\link{overall.nnct.ct}}, \code{\link{overall.nnct}}, \code{\link{overall.tct.ct}}
#' and \code{\link{overall.tct}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funs.overall.seg
NULL
#'
#' @rdname funs.overall.seg
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv) #default is byrow
#'
#' type<-"dixon" #try also "nnct", I", "II", "III", and "IV"
#' overall.seg(Y,cls,type)
#' overall.seg(Y,cls,type,method="max")
#' overall.seg(Y,cls,type="I")
#'
#' overall.seg.ct(ct,covN,type)
#' overall.seg.ct(ct,covN,type="I")
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' ct<-nnct(ipd,fcls)
#'
#' overall.seg(Y,fcls,type="I")
#' overall.seg.ct(ct,covN,type)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:4,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#'
#' W<-Wmat(ipd)
#' Qv<-Qvec(W)$q
#' Rv<-Rval(W)
#' varN<-var.nnct(ct,Qv,Rv)
#' covN<-cov.nnct(ct,varN,Qv,Rv)
#'
#' overall.seg(Y,cls,type="I")
#' overall.seg.ct(ct,covN,type)
#'
#' @export
overall.seg.ct <- function(ct,covN,type)
{
if ((type %in% c("dixon","nnct","I","II","III","IV"))==FALSE)
{stop("type is misspecified, should be one of dixon, nnct, I, II, III or IV (in quotes)")}
if (type %in% c("dixon","nnct"))
{
res<- overall.nnct.ct(ct,covN)
} else
{
res<- overall.tct.ct(ct,covN,type)
}
return(res)
} #end for the function
#'
#' @rdname funs.overall.seg
#'
#' @export
overall.seg <- function(dat,lab,type,...)
{
if ((type %in% c("dixon","nnct","I","II","III","IV"))==FALSE)
{stop("type is misspecified, should be one of dixon, nnct, I, II, III or IV (in quotes)")}
if (type %in% c("dixon","nnct"))
{
res<- overall.nnct(dat,lab,...)
} else
{
res<- overall.tct(dat,lab,type,...)
}
return(res)
} #end for the function
#'
################################################
#functions for symmetry, reflexivity, and niche specificity
################################################
# funsXsq.nnsym.ss
#'
#' @title Pielou's First Type of NN Symmetry Test with Chi-square Approximation for multiple classes
#' (for Sparse Sampling)
#'
#' @description
#' Two functions: \code{Xsq.nnsym.ss.ct} and \code{Xsq.nnsym.ss}.
#'
#' Both functions are objects of class \code{"Chisqtest"} but with different arguments (see the parameter list below).
#' Each one performs the hypothesis test of equality of the expected value of the off-diagonal
#' cell counts (i.e., entries) under RL or CSR in the NNCT for \eqn{k \ge 2} classes.
#' That is, each performs Pielou's first type of NN symmetry test which is also equivalent to McNemar's
#' test on the NNCT. The test is appropriate (i.e. have the appropriate asymptotic sampling distribution)
#' provided that data is obtained by sparse sampling.
#' (See \insertCite{ceyhan:SWJ-spat-sym2014;textual}{nnspat} for more detail).
#'
#' Each symmetry test is based on the chi-squared approximation of the corresponding quadratic form
#' and are due to \insertCite{pielou:1961;textual}{nnspat}.
#'
#' The argument cont.corr is a logical argument (default=\code{TRUE}) for continuity correction to this test.
#' If \code{TRUE} the continuity correction to McNemar's test is implemented,
#' and if \code{FALSE} such a correction is not implemented.
#'
#' Each function yields the test statistic, \eqn{p}-value and \code{df} which is \eqn{k(k-1)/2}, description of the
#' alternative with the corresponding null values (i.e. expected values) of differences of the off-diagonal entries,(which is
#' 0 for this function) and also the sample estimates (i.e. observed values) of absolute differences of th
#' off-diagonal entries of NNCT (in the upper-triangular form).
#' The functions also provide names of the test statistics, the method and the data set used.
#'
#' The null hypothesis is that \eqn{E(N_{ij})=E(N_{ji})} for all entries for \eqn{i \ne j} (i.e., symmetry in the
#' mixed NN structure).
#' In the output, the test statistic, \eqn{p}-value and \code{df} are valid only for (properly) sparsely sampled data.
#'
#' See also
#' (\insertCite{pielou:1961,ceyhan:SWJ-spat-sym2014;textual}{nnspat})
#' and the references therein.
#'
#' @param ct A nearest neighbor contingency table, used in \code{Xsq.nnsym.ss.ct} only
#' @param dat The data set in one or higher dimensions, each row corresponds to a data point,
#' used in \code{Xsq.nnsym.ss} only
#' @param lab The \code{vector} of class labels (numerical or categorical), used in \code{Xsq.nnsym.ss} only
#' @param cont.corr A logical argument (default=\code{TRUE}).
#' If \code{TRUE} the continuity correction to McNemar's test is implemented,
#' and if \code{FALSE} such a correction is not implemented.
#' @param \dots are for further arguments, such as \code{method} and \code{p}, passed to the \code{\link[stats]{dist}} function.
#' used in \code{Xsq.nnsym.ss} only
#'
#' @return A \code{list} with the elements
#' \item{statistic}{The chi-squared test statistic for Pielou's first type of NN symmetry test}
#' \item{stat.names}{Name of the test statistic}
#' \item{p.value}{The \eqn{p}-value for the hypothesis test}
#' \item{df}{Degrees of freedom for the chi-squared test, which is \eqn{k(k-1)/2} for this function.}
#' \item{estimate}{Estimates, i.e., absolute differences of the off-diagonal entries of
#' NNCT (in the upper-triangular form).}
#' \item{est.name,est.name2}{Names of the estimates, former is a shorter description of the estimates
#' than the latter.}
#' \item{null.value}{Hypothesized null values for the differences between the expected values of the off-diagonal
#' entries, which is 0 for this function.}
#' \item{method}{Description of the hypothesis test}
#' \item{ct.name}{Name of the contingency table, \code{ct}, returned by \code{Xsq.nnsym.ss.ct} only}
#' \item{data.name}{Name of the data set, \code{dat}, returned by \code{Xsq.nnsym.ss} only}
#'
#' @seealso \code{\link{Znnsym2cl.ss.ct}}, \code{\link{Znnsym2cl.ss}}, \code{\link{Znnsym.ss.ct}},
#' \code{\link{Znnsym.ss}}, \code{\link{Xsq.nnsym.dx.ct}}, \code{\link{Xsq.nnsym.dx}}
#' and \code{\link{Qsym.test}}
#'
#' @references
#' \insertAllCited{}
#'
#' @name funsXsq.nnsym.ss
NULL
#'
#' @rdname funsXsq.nnsym.ss
#'
#' @author Elvan Ceyhan
#'
#' @examples
#' n<-20 #or try sample(1:20,1)
#' Y<-matrix(runif(3*n),ncol=3)
#' ipd<-ipd.mat(Y)
#' cls<-sample(1:2,n,replace = TRUE) #or try cls<-rep(1:2,c(10,10))
#' ct<-nnct(ipd,cls)
#' ct
#'
#' Xsq.nnsym.ss(Y,cls)
#' Xsq.nnsym.ss.ct(ct)
#'
#' Xsq.nnsym.ss(Y,cls,method="max")
#'
#' Xsq.nnsym.ss(Y,cls,cont.corr=FALSE)
#' Xsq.nnsym.ss.ct(ct,cont.corr=FALSE)
#'
#' #cls as a factor
#' na<-floor(n/2); nb<-n-na
#' fcls<-rep(c("a","b"),c(na,nb))
#' Xsq.nnsym.ss(Y,fcls)
#'
#' #############
#' n<-40
#' Y<-matrix(runif(3*n),ncol=3)