R/RcppExports.R
In WaveSampling: Weakly Associated Vectors (WAVE) Sampling

Documented in distUnitk IB sb_vk wave wpik wpikInv

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @encoding UTF-8
#' @title Spreading measure based on Moran's \eqn{I} index
#'
#' @description
#'
#' This function implements the spreading measure based on Moran's \eqn{I} index.
#'
#' @param W a stratification matrix inheriting from \code{\link[Matrix]{sparseMatrix}} that represents the spatial weights. See \code{\link{wpik}}.
#' @param s a vector of size \eqn{N} with elements equal to 0 or 1. The value 1 indicates that the unit is selected while the value 0 is for non-chosen units.
#'
#'
#' @details
#' 
#' This index is developped by Tillé et al. (2018) and measure the spreading of a sample drawn from a population.
#' It uses a corrected version of the traditional Moran's \eqn{I} index. Each row of the matrix \eqn{\bf W} should represents a stratum. Each 
#' stratum is defined by a particular unit and its neighbouring units. See \code{\link{wpik}}.
#' The spatial balance measure is equal to
#' 
#' \deqn{I_B =\frac{( \bf s- \bar{s}_w)^\top  W ( s- \bar{s}_w)}{\bf \sqrt{( s- \bar{s}_w)^\top  D ( s- \bar{s}_w) ( s- \bar{s}_w)^\top  B ( s- \bar{s}_w)}},}
#' 
#' where \eqn{\bf D} is the diagonal matrix containing the \eqn{w_i}, 
#' 
#' \deqn{ \bf \bar{s}_w =  1 \frac{ s^\top  W  1}{ 1^\top  W  1}}
#' 
#' and 
#' 
#' \deqn{ \bf B =  W^\top  D^{-1}  W - \frac{ W^\top  1 1^\top  W}{1^\top  W  1}.}
#' 
#' To specify the spatial weights uses the argument \code{W}.
#'
#' @return A numeric value that represents the spatial balance. It could be any real value between -1 (spread) and 1 (clustered). 
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references
#' Tillé, Y., Dickson, M.M., Espa, G., and Guiliani, D. (2018). Measuring the spatial balance of a sample: A new measure based on Moran's I index.
#' \emph{Spatial Statistics}, 23, 182-192.
#' 
#' 
#' 
#' @seealso
#' \code{\link{wpik}}
#' 
#' @examples
#'   N <- 36
#'   n <- 12
#'   x <- seq(1,sqrt(N),1)
#'   X <- expand.grid(x,x)
#'   pik <- rep(n/N,N)
#'   W <- wpik(as.matrix(X),pik,bound = 1,tore = TRUE,shift = FALSE,toreBound = sqrt(N))
#'   W <- W - diag(diag(W))
#'   s <- wave(as.matrix(X),pik,tore = TRUE,shift = TRUE,comment = TRUE)
#'   IB(W,s)
#' 
#' @export
IB <- function(W, s) {
    .Call('_WaveSampling_IB', PACKAGE = 'WaveSampling', W, s)
}

#' @title Squared Euclidean distances of the unit k.
#'
#' @description
#' Calculate the squared Euclidean distance from unit \eqn{k} to the other units.
#' 
#'
#' @param X matrix representing the spatial coordinates. 
#' @param k the unit index to be used.
#' @param tore an optional logical value, if we are considering the distance on a tore. See Details.
#' @param toreBound an optional numeric value that specify the length of the tore.
#'
#'
#' @details
#' 
#' Let \eqn{\mathbf{x}_k,\mathbf{x}_l} be the spatial coordinates of the unit \eqn{k,l \in U}. The classical euclidean distance is given by
#' 
#' \deqn{d^2(k,l) = (\mathbf{x}_k - \mathbf{x}_l)^\top (\mathbf{x}_k - \mathbf{x}_l). }
#' 
#' When the points are distributed on a \eqn{N_1 \times N_2} regular grid of \eqn{R^2}.
#' It is possible to consider the units like they were placed on a tore. It can be illustrated by Pac-Man passing through the wall to get away from ghosts. Specifically,
#' we could consider two units on the same column (resp. row) that are on the opposite have a small distance,
#' 
#' \deqn{ d^2_T(k,l) = min( (x_{k_1} - x_{l_1})^2,
#'                       (x_{k_1} + N_1 - x_{l_1})^2,
#'                       (x_{k_1} - N_1 - x_{l_1})^2) +}
#' \deqn{ min( (x_{k_2} - x_{l_2})^2,
#'                       (x_{k_2} + N_2 - x_{l_2})^2,
#'                       (x_{k_2} - N_2 - x_{l_2})^2).}
#'
#' The option \code{toreBound} specify the length of the tore in the case of \eqn{N_1 = N_2 = N}. 
#' It is omitted if the \code{tore} option is equal to \code{FALSE}.
#'
#' @return a vector of length \eqn{N} that contains the distances from the unit \eqn{k} to all other units.
#'
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' 
#' @seealso
#' \code{\link{wpik}}, \code{\link{wave}} and \code{\link[stats]{dist}}.
#'
#' @examples
#'   N <- 5
#'   x <- seq(1,N,1)
#'   X <- as.matrix(expand.grid(x,x))
#'   distUnitk(X,k = 2,tore = TRUE,toreBound = 5)
#'   distUnitk(X,k = 2,tore = FALSE,toreBound = -1)
#' @export
distUnitk <- function(X, k, tore, toreBound) {
    .Call('_WaveSampling_distUnitk', PACKAGE = 'WaveSampling', X, k, tore, toreBound)
}

#' @encoding UTF-8
#' @title Projection operator
#'
#'
#' @description
#'
#' This operator projects the vector v orthogonally onto the line spanned by vector u.
#'
#' @param v vector projected.
#' @param u vector that define the line on which we project.
#' 
#' @details
#' 
#' The projection operator is defined by :
#' 
#' \deqn{proj_u(v) = \frac{\langle u , v \rangle}{\langle u, u \rangle} u}
#'  where \eqn{\langle . , . \rangle} is the inner product also written \eqn{u^\top v}.
#' 
#' @return The projection of the vector v onto the line spanned by the vector u.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' \url{https://en.wikipedia.org/wiki/Projection_(linear_algebra)}s
#' 
NULL

#' @title Column sums for sparseMatrix
#'
#' @description
#' Form column sums for sparseMatrix.
#'
#' @param x A sparse matrix, i.e., inheriting from \code{\link[Matrix]{sparseMatrix}}.
#'
#' @details
#' This function is designed to be used for internal \code{RcppArmadillo} functions. Nevertheless it could be applied in R.
#' It loops on the non-zero entries of the \code{\link[Matrix]{sparseMatrix}}. For general uses, the function
#' \code{\link[Matrix]{colSums}} should be prefered.
#'
#' @return column sums of x.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @seealso
#' \code{\link[Matrix]{colSums}}, \code{\link[Matrix]{rowSums}}.
#'
NULL

#' @title Row sums on sparse matrix.
#'
#' @description
#' Form row sums for sparseMatrix.
#'
#' @param x A sparse matrix, i.e., inheriting from \code{\link[Matrix]{sparseMatrix}}.
#'
#' @details
#' This function is designed to be used for internal \code{RcppArmadillo} functions. Nevertheless it could be applied in R.
#' It loops on the non-zero entries of the \code{\link[Matrix]{sparseMatrix}}. For general uses, the function \code{\link[Matrix]{rowSums}} should
#' be prefered.
#'
#' @return row sums of x. 
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @seealso
#' \code{\link[Matrix]{colSums}}, \code{\link[Matrix]{rowSums}}.
#' 
#' 
NULL

#' @encoding UTF-8
#' @title Values \eqn{v_k} to compute the Spatial balance
#' 
#' @description
#' 
#' Calculates the \eqn{v_k} values of the spatial balance developped by Stevens and Olsen (2004) and suggested by Grafström et al. (2012).
#' 
#' @param pik vector of the inclusion probabilities. The length should be equal to \eqn{N}.
#' @param X matrix representing the spatial coordinates.
#' @param s A vector of size \eqn{N} with elements equal 0 or 1. The value 1 indicates that the unit is selected while the value 0 is for non-chosen unit.
#' 
#' @details
#' 
#' The spatial balance measure based on the Voronoï polygons is defined by 
#' 
#' \deqn{B(S) = \frac{1}{n}\sum_{k\in S} (v_k -1)^2 .}
#' 
#' The function return the \eqn{v_k} values and is mainly based on the function \code{\link[BalancedSampling:sb]{sb}} of the package \code{BalancedSampling} Grafström and Lisic (2019).
#' 
#' @return A vector of size \eqn{N} with elements equal to the \eqn{v_k} values. If the unit is not selected then the value is equal to 0.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' 
#' Grafström, A., Lundström, N.L.P. and Schelin, L. (2012). Spatially balanced sampling through the Pivotal method. 
#' \emph{Biometrics}, 68(2), 514-520
#' 
#' Grafström, A., Lisic J. (2019). BalancedSampling: Balanced and Spatially Balanced Sampling. R package version 1.5.5.
#' https://CRAN.R-project.org/package=BalancedSampling
#' 
#' Stevens, D. L. Jr. and Olsen, A. R. (2004). Spatially balanced sampling of natural resources.
#' \emph{Journal of the American Statistical Association 99, 262-278}
#' 
#' @seealso
#' \code{\link[BalancedSampling:sb]{BalancedSampling::sb}}
#' 
#' 
#' @examples
#' N <- 50
#' n <- 10
#' X <- as.matrix(cbind(runif(N),runif(N)))
#' pik <- rep(n/N,N)
#' s <- wave(X,pik)
#' v <- sb_vk(pik,X,s)
#' 
#' @export
sb_vk <- function(pik, X, s) {
    .Call('_WaveSampling_sb_vk', PACKAGE = 'WaveSampling', pik, X, s)
}

#' @encoding UTF-8
#' @title Weakly associated vectors sampling
#'
#' @description
#'
#' Select a spread sample from inclusion probabilities using the weakly associated vectors sampling method.  
#'
#' @param X matrix representing the spatial coordinates. 
#' @param pik vector of the inclusion probabilities. The length should be equal to N.
#' @param bound a scalar representing the bound to reach. See Details. Default is 1.
#' @param tore an optional logical value, if we are considering the distance on a tore. See Details. Default is \code{TRUE}.
#' @param shift an optional logical value, if you would use a shift perturbation. See Details. Default is \code{FALSE}.
#' @param toreBound a numeric value that specify the size of the grid. Default is -1.
#' @param comment an optional logical value, indicating some informations during the execution. Default is \code{FALSE}.
#' @param fixedSize an optional logical value, if you would impose a fixed sample size. Default is \code{TRUE}
#'
#' @details
#' 
#' The main idea is derived from the cube method (Devill and Tillé, 2004). At each step, the inclusion probabilities vector \code{pik}
#' is randomly modified. This modification is carried out in a direction that best preserves the spreading of the sample.
#' 
#' A stratification matrix \eqn{\bf A} is computed from the spatial weights matrix calculated from the function \code{\link{wpik}}.
#' Depending if \eqn{\bf A} is full rank or not, the vector giving the direction is not selected in the same way.
#' 
#' If matrix \eqn{\bf A} is not full rank, a vector that is contained in the right null space is selected:
#' \deqn{ Null(\bf A) = \{ \bf x \in R^N | \bf A\bf x = \bf 0  \}. }
#' 
#' If matrix \eqn{\bf A} is full rank, we find \eqn{\bf v}, \eqn{\bf u} the singular vectors associated to the 
#' smallest singular value \eqn{\sigma } of \eqn{\bf A} such that
#' 
#' \deqn{ \bf A\bf v = \sigma \bf u,~~~~ \bf A^\top \bf u = \sigma \bf v.}
#' 
#' Vector \eqn{ \bf v } is then centered to ensure fixed sample size. At each step, inclusion probabilities is modified and at least on component is set to 0 or 1. Matrix \eqn{\bf A } is updated 
#' from the new inclusion probabilities. The whole procedure it repeated until it remains only one component that is not equal to 0 or 1.
#' 
#' For more informations on the options \code{tore} and \code{toreBound}, see \code{\link{distUnitk}}. If \code{tore} is set up \code{TRUE} and \code{toreBound} not specified the \code{toreBound} is equal to 
#' \deqn{N^{1/p}}
#' where \eqn{p} is equal to the number of column of the matrix \code{X}.
#' 
#' For more informations on the option \code{shift}, see \code{\link{wpik}}.
#' 
#' If \code{fixedSize} is equal \code{TRUE}, the weakest associated vector is centered at each step of the algorithm. This ensures that the size of the selected sample is equal to the sum of the inclusion probabilities.
#' 
#' @return A vector of size \eqn{N} with elements equal 0 or 1. The value 1 indicates that the unit is selected while the value 0 is for non-chosen unit.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Deville, J. C. and Tillé, Y. (2004). Efficient balanced sampling: the cube method. Biometrika, 91(4), 893-912
#' 
#' 
#' @seealso
#' \code{\link{wpik}}, \code{\link{distUnitk}}.
#' 
#' 
#' @examples
#' 
#' #------------
#' # Example 2D
#' #------------
#' 
#' N <- 50
#' n <- 15
#' pik <- rep(n/N,N)
#' X <- as.matrix(cbind(runif(N),runif(N)))
#' s <- wave(X,pik)
#' 
#' #------------
#' # Example 2D grid 
#' #------------
#' 
#' N <- 36 # 6 x 6 grid
#' n <- 12 # number of unit selected
#' x <- seq(1,sqrt(N),1)
#' X <- as.matrix(cbind(rep(x,times = sqrt(N)),rep(x,each = sqrt(N))))
#' pik <- rep(n/N,N)
#' s <- wave(X,pik, tore = TRUE,shift = FALSE)
#' 
#' #------------
#' # Example 1D
#' #------------
#' 
#' N <- 100
#' n <- 10
#' X <- as.matrix(seq(1,N,1))
#' pik <- rep(n/N,N)
#' s <- wave(X,pik,tore = TRUE,shift =FALSE,comment = TRUE)
#' 
#' 
#' @export
wave <- function(X, pik, bound = 1.0, tore = FALSE, shift = FALSE, toreBound = -1, comment = FALSE, fixedSize = TRUE) {
    .Call('_WaveSampling_wave', PACKAGE = 'WaveSampling', X, pik, bound, tore, shift, toreBound, comment, fixedSize)
}

#' @encoding UTF-8
#' @title Stratification matrix from inclusion probabilities
#'
#' @description
#'
#' The stratification matrix is calculated from the inclusion probabilities. It takes the distances between units into account. See Details.
#'  
#'
#' @param X matrix representing the spatial coordinates. 
#' @param pik vector of the inclusion probabilities. The length should be equal to \eqn{N}.
#' @param bound a scalar representing the bound to reach. Default is 1.
#' @param tore an optional logical value, if we are considering the distance on a tore. Default is \code{FALSE}.
#' @param shift an optional logical value, if you would use a shift perturbation. See Details for more informations. Default is \code{FALSE}.
#' @param toreBound a numeric value that specify the size of the grid. Default is -1.
#' 
#' @details
#' 
#' Entries of the stratification matrix indicates how the units are close from each others. Hence a large value \eqn{w_{kl}} means that the unit \eqn{k} 
#' is close to the unit \eqn{l}. This function considers that a unit represents its neighbor till their inclusion probabilities
#' sum up to \code{bound}.
#' 
#' We define \eqn{G_k} the set of the nearest neighbor of the unit \eqn{k} including \eqn{k} such that the sum of their inclusion
#' probabilities is just greater than \code{bound}. Moreover, let \eqn{g_k = \#{G_k}}, the number of elements in \eqn{G_k}.
#' The matrix \eqn{\bf W} is then defined as follows,
#' \itemize{
#'   \item \eqn{ w_{kl} = \pi_l} if unit \eqn{l} is in the set of the  \eqn{g_k - 1} nearest neighbor of \eqn{k}.
#'   \item \eqn{ w_{kl} = \pi_l + 1 - (\sum_{t \in G_k} \pi_t)} if unit \eqn{l} is the \eqn{g_k} nearest neighbour of \eqn{k}.
#'   \item \eqn{w_{kl} = 0} otherwise.
#' }
#' 
#' 
#' Hence, the \eqn{k}th row of the matrix represents
#' neighborhood or stratum of the unit such that the inclusion probabilities sum up to 1 and
#' the \eqn{k}th column the weights that unit \eqn{k} takes for each stratum. 
#' 
#' The option \code{shift} add a small normally distributed perturbation \code{rnorm(0,0.01)} to the coordinates
#' of the centroid of the stratum considered. This could be useful if there are many unit that have the same distances.
#' Indeed, if two units have the same distance and are the last unit before that the bound is reached, then the weights
#' of the both units is updated. If a shift perturbation is used then all the distances are differents and only one unit
#' weight is update such that the bound is reached. 
#' 
#' The shift perturbation is generated at the beginning of the procedure such that each stratum is shifted by the same perturbation.
#' 
#' @return A sparse matrix representing the spatial weights.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @seealso
#' \code{\link{wpikInv}}, \code{\link{distUnitk}}, \code{\link{wave}}.
#' 
#' @examples
#' 
#' N <- 25
#' n <- 5
#' X <- as.matrix(cbind(runif(N),runif(N)))
#' pik <- rep(n/N,N)
#' W <- wpik(X,pik)
#' 
#' @export
wpik <- function(X, pik, bound = 1.0, tore = FALSE, shift = FALSE, toreBound = -1.0) {
    .Call('_WaveSampling_wpik', PACKAGE = 'WaveSampling', X, pik, bound, tore, shift, toreBound)
}

#' @encoding UTF-8
#' @title Stratification matrix from inverse inclusion probabilities
#'
#' @description
#'
#' The stratification matrix is calculated from the inverse inclusion probabilities. It is a direct
#' implementation of the spatial weights specified in Tillé et al., (2018).
#'
#' @param X matrix representing the spatial coordinates. 
#' @param pik vector of the inclusion probabilities. The length should be equal to N.
#' @param tore an optional logical value, if we are considering the distance on a tore. Default is \code{FALSE}.
#' @param shift an optional logical value, if you would use a shift perturbation. See Details for more informations. Default is \code{FALSE}.
#' @param toreBound a numeric value that specify the size of the grid. Default is -1.
#' 
#' @details
#' 
#' Entries of the stratification matrix indicates how the units are close from each others. Hence a large value \eqn{w_{kl}} means that the unit \eqn{k} 
#' is close to the unit \eqn{l}. This function considers that if unit \eqn{k} were selected in the sample drawn from the population then
#' \eqn{k} would represent \eqn{1/\pi_k} units in the population and, as a consequence, it would be natural to consider that
#' \eqn{k} has \eqn{n_k =  (1/\pi_k - 1)} neighbours in the population. The \eqn{n_k} neighbours are the nearest neighbours of \eqn{k} according to distances.
#' The weights are so calculated as follows :
#' 
#' \itemize{
#'   \item \eqn{ w_{kl} = 1} if unit \eqn{l \in N_{\lfloor n_k \rfloor }}
#'   \item \eqn{ w_{kl} = n_k - \lfloor n_k \rfloor} if unit \eqn{l} is the \eqn{\lceil n_k \rceil} nearest neighbour of \eqn{k}.
#'   \item \eqn{w_{kl} = 0} otherwise.
#' }
#' 
#' 
#' \eqn{\lfloor n_k \rfloor } and \eqn{\lceil n_k \rceil} are the inferior and the superior integers of \eqn{n_k}.
#' 
#' The option \code{shift} add a small normally distributed perturbation \code{rnorm(0,0.01)} to the coordinates
#' of the centroid of the stratum considered. This could be useful if there are many unit that have the same distances.
#' Indeed, if two units have the same distance and are the last unit before that the bound is reached, then the weights
#' of the both units is updated. If a shift perturbation is used then all the distances are differents and only one unit
#' weight is update such that the bound is reached. 
#' 
#' The shift perturbation is generated at the beginning of the procedure such that each stratum is shifted by the same perturbation.
#'
#' 
#' @return A sparse matrix representing the spatial weights.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tillé, Y., Dickson, M.M., Espa, G., and Guiliani, D. (2018). Measuring the spatial balance of a sample: A new measure based on Moran's I index.
#' \emph{Spatial Statistics}, 23, 182-192.
#' 
#' @seealso
#' \code{\link{wpik}}, \code{\link{distUnitk}}, \code{\link{wave}}.
#' @examples
#' 
#' N <- 25
#' n <- 5
#' X <- as.matrix(cbind(runif(N),runif(N)))
#' pik <- rep(n/N,N)
#' W <- wpikInv(X,pik)
#' 
#' @export
wpikInv <- function(X, pik, tore = FALSE, shift = FALSE, toreBound = -1) {
    .Call('_WaveSampling_wpikInv', PACKAGE = 'WaveSampling', X, pik, tore, shift, toreBound)
}