R/RcppExports.R

Defines functions vApp vDBS vEst osod maxentpi2 cps piktfrompik pikfromq sfromq qfromw inclprob distUnitk disjMatrix ncat disj gencalibRaking calibRaking c_bound2 c_bound

Documented in calibRaking c_bound c_bound2 cps disj disjMatrix distUnitk gencalibRaking inclprob maxentpi2 ncat osod pikfromq piktfrompik qfromw sfromq vApp vDBS vEst

# Generated by using Rcpp::compileAttributes() -> do not edit by hand
# Generator token: 10BE3573-1514-4C36-9D1C-5A225CD40393

#' @title C bound
#' @name c_bound
#' @description This function is returning the number of unit that we need such that some conditions are fulfilled. See Details
#' 
#' @param pik vector of the inclusion probabilities.
#' 
#' @details
#' The function is computing the number of unit \eqn{K} that we need to add such that the following conditions are fulfilled :
#'
#' \itemize{
#' \item \eqn{\sum_{k = 1}^K \pi_k \geq 1}
#' \item \eqn{\sum_{k = 1}^K 1 - \pi_k \geq 1}
#' \item Let \eqn{c} be the constant such that \eqn{\sum_{k = 2}^K min(c\pi_k,1) = n }, we must have that \eqn{ \pi_1 \geq 1- 1/c}
#' }
#' 
#' @return An integer value, the number of units that we need to respect the constraints.
#'
#' @seealso \code{\link{osod}}
#'
#' @author Raphael Jauslin \email{raphael.jauslin@@unine.ch}
#'
#' 
#' @export
c_bound <- function(pik) {
    .Call(`_StratifiedSampling_c_bound`, pik)
}

#' @title C bound
#' @name c_bound2
#' @description This function is returning the number of unit that we need such that some conditions are fulfilled. See Details
#' 
#' @param pik vector of the inclusion probabilities.
#' 
#' @details
#' The function is computing the number of unit \eqn{K} that we need to add such that the following conditions are fulfilled :
#'
#' \itemize{
#' \item \eqn{\sum_{k = 1}^K \pi_k \geq 1}
#' \item \eqn{\sum_{k = 1}^K 1 - \pi_k \geq 1}
#' \item Let \eqn{c} be the constant such that \eqn{\sum_{k = 2}^K min(c\pi_k,1) = n }, we must have that \eqn{ \pi_1 \geq 1- 1/c}
#' }
#' 
#' @return An integer value, the number of units that we need to respect the constraints.
#'
#' @seealso \code{\link{osod}}
#'
#' @author Raphael Jauslin \email{raphael.jauslin@@unine.ch}
#'
#' 
#' @export
c_bound2 <- function(pik) {
    .Call(`_StratifiedSampling_c_bound2`, pik)
}

#' @title Calibration using raking ratio  
#' @name calibRaking
#' @description This function is inspired by the function \code{\link[sampling:calib]{calib}} of the package sampling. It computes the g-weights of the calibration estimator.
#' 
#' @param Xs A matrix of calibration variables.
#' @param d A vector, the initial weights.
#' @param total A vector that represents the initial weights.
#' @param q A vector of positive value that account for heteroscedasticity.
#' @param max_iter An integer, the maximum number of iterations. Default = 500.
#' @param tol A scalar that represents the tolerance value for the algorithm. Default = 1e-9.
#'
#' @details
#' More details on the different calibration methods can be read in Tillé Y. (2020).
#'
#' @return A vector, the value of the g-weights.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#'
#'
#' @references Tillé, Y. (2020). \emph{Sampling and estimation from finite populations}. Wiley, New York
#' 
#' @export
calibRaking <- function(Xs, d, total, q, max_iter = 500L, tol = 1e-9) {
    .Call(`_StratifiedSampling_calibRaking`, Xs, d, total, q, max_iter, tol)
}

#' @title Generalized calibration using raking ratio  
#' @name gencalibRaking
#' @description This function is inspired by the function \code{\link[sampling:calib]{calib}} of the package sampling. It computes the g-weights of the calibration estimator.
#' 
#' @param Xs A matrix of calibration variables.
#' @param Zs A matrix of instrumental variables with same dimension as Xs. 
#' @param d A vector, the initial weights.
#' @param total A vector that represents the initial weights.
#' @param q A vector of positive value that account for heteroscedasticity.
#' @param max_iter An integer, the maximum number of iterations. Default = 500.
#' @param tol A scalar that represents the tolerance value for the algorithm. Default = 1e-9.
#'
#' @details
#' More details on the different calibration methods can be read in Tillé Y. (2020).
#'
#' @return A vector, the value of the g-weights.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#'
#'
#' @references Tillé, Y. (2020). \emph{Sampling and estimation from finite populations}. Wiley, New York
#' 
#' @export
gencalibRaking <- function(Xs, Zs, d, total, q, max_iter = 500L, tol = 1e-9) {
    .Call(`_StratifiedSampling_gencalibRaking`, Xs, Zs, d, total, q, max_iter, tol)
}

#' @title Disjunctive
#' @name disj
#' @description
#' This function transforms a categorical vector into a matrix of indicators.
#'
#' @param strata A vector of integers that represents the categories.
#'
#' @return A matrix of indicators.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#'
#' @examples
#' strata <- rep(c(1,2,3),each = 4)
#' disj(strata)
#'
#' @export
disj <- function(strata) {
    .Call(`_StratifiedSampling_disj`, strata)
}

#' @title Number of categories
#' @name ncat
#' @description
#' This function returns the number of factor in each column of a categorical matrix.
#'
#' @param Xcat A matrix of integers that contains categorical vector in each column.
#'
#' @return A row vector that contains the number of categories in each column.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#'
#' @export
#' 
#' @examples
#' Xcat <-  matrix(c(sample(x = 1:6, size = 100, replace = TRUE),
#'             sample(x = 1:6, size = 100, replace = TRUE),
#'             sample(x = 1:6, size = 100, replace = TRUE)),ncol = 3)
#' ncat(Xcat)
ncat <- function(Xcat) {
    .Call(`_StratifiedSampling_ncat`, Xcat)
}

#' @title Disjunctive for matrix  
#' @name disjMatrix
#' @description
#' This function transforms a categorical matrix into a matrix of indicators variables.
#'
#' @param strata A matrix of integers that contains categorical vector in each column.
#'
#' @return A matrix of indicators.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @examples
#' Xcat <-  matrix(c(sample(x = 1:6, size = 100, replace = TRUE),
#'             sample(x = 1:6, size = 100, replace = TRUE),
#'             sample(x = 1:6, size = 100, replace = TRUE)),ncol = 3)
#' disjMatrix(Xcat)
#'
#' @export
disjMatrix <- function(strata) {
    .Call(`_StratifiedSampling_disjMatrix`, strata)
}

#' @title Squared Euclidean distances of the unit k.
#' @name distUnitk
#' @description
#' Calculate the squared Euclidean distance from unit \eqn{k} to the other units.
#' 
#'
#' @param X matrix representing the spatial coordinates. 
#' @param k the unit index to be used.
#' @param tore an optional logical value, if we are considering the distance on a tore. See Details.
#' @param toreBound an optional numeric value that specify the length of the tore.
#'
#'
#' @details
#' 
#' 
#' Let \eqn{\mathbf{x}_k,\mathbf{x}_l} be the spatial coordinates of the unit \eqn{k,l \in U}. The classical euclidean distance is given by
#' 
#' \deqn{d^2(k,l) = (\mathbf{x}_k - \mathbf{x}_l)^\top (\mathbf{x}_k - \mathbf{x}_l). }
#' 
#' When the points are distributed on a \eqn{N_1 \times N_2} regular grid of \eqn{R^2}.
#' It is possible to consider the units like they were placed on a tore. It can be illustrated by Pac-Man passing through the wall to get away from ghosts. Specifically,
#' we could consider two units on the same column (resp. row) that are on the opposite have a small distance,
#' 
#' \deqn{ d^2_T(k,l) = min( (x_{k_1} - x_{l_1})^2,
#'                       (x_{k_1} + N_1 - x_{l_1})^2,
#'                       (x_{k_1} - N_1 - x_{l_1})^2) +}
#' \deqn{ min( (x_{k_2} - x_{l_2})^2,
#'                       (x_{k_2} + N_2 - x_{l_2})^2,
#'                       (x_{k_2} - N_2 - x_{l_2})^2).}
#'
#' The option \code{toreBound} specify the length of the tore in the case of \eqn{N_1 = N_2 = N}. 
#' It is omitted if the \code{tore} option is equal to \code{FALSE}.
#'
#' @return a vector of length \eqn{N} that contains the distances from the unit \eqn{k} to all other units.
#'
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' 
#' @seealso
#' \code{\link[stats]{dist}}.
#'
#' @examples
#'   N <- 5
#'   x <- seq(1,N,1)
#'   X <- as.matrix(expand.grid(x,x))
#'   distUnitk(X,k = 2,tore = TRUE,toreBound = 5)
#'   distUnitk(X,k = 2,tore = FALSE,toreBound = -1)
#' @export
distUnitk <- function(X, k, tore, toreBound) {
    .Call(`_StratifiedSampling_distUnitk`, X, k, tore, toreBound)
}

#' @title Inclusion Probabilities
#' @name inclprob
#' @description Computes first-order inclusion probabilities from a vector of positive numbers.
#' 
#' @param x vector of positive numbers.
#' @param n sample size (could be a positive real value).
#' 
#' @details
#' The function is implemented in C++ so that it can be used in the code of other C++ functions. The implementation is based on the function  \code{\link[sampling]{inclusionprobabilities}} of the package sampling.
#'
#' @return A vector of inclusion probabilities proportional to \code{x} and such that the sum is equal to the value \code{n}.
#'
#' @author Raphael Jauslin \email{raphael.jauslin@@unine.ch}
#'
#' @seealso \code{\link[sampling]{inclusionprobabilities}}
#' 
#' @examples
#' 
#' x <- runif(100)
#' pik <- inclprob(x,70)
#' sum(pik)
#' 
#' @export
inclprob <- function(x, n) {
    .Call(`_StratifiedSampling_inclprob`, x, n)
}

#' @title q from w
#' @name qfromw
#' @description This function finds the matrix \code{q} form a particular \code{w}.
#'  
#' @param w A vector of weights.
#' @param n An integer that is equal to the sum of the inclusion probabilities.
#' 
#' @details
#' 
#' \code{w} is generally computed by the formula \code{pik/(1-pik)}, where \code{n} is equal to the sum of the vector \code{pik}.
#' More details could be find in Tille (2006).
#' 
#' @return A matrix of size \code{N} x \code{n}, where \code{N} is equal to the length of the vector \code{w}.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tille, Y. (2006), Sampling Algorithms, springer
#' 
#' @export
qfromw <- function(w, n) {
    .Call(`_StratifiedSampling_qfromw`, w, n)
}

#' @title s from q
#' @name sfromq
#' @description This function finds sample \code{s} form the matrix \code{q}.
#'  
#' @param q A matrix that is computed from the function \code{\link{qfromw}}. 
#' 
#' @details
#' 
#' More details could be find in Tille (2006).
#' 
#' @return A vector with elements equal to 0 or 1. The value 1 indicates that the unit is selected while the value 0 is for rejected units.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tille, Y. (2006), Sampling Algorithms, springer
#' 
#' @export
sfromq <- function(q) {
    .Call(`_StratifiedSampling_sfromq`, q)
}

#' @title pik from q
#' @name pikfromq
#' @description This function finds the \code{pik} from an initial \code{q}.
#'  
#' @param q A matrix that is computed from the function \code{\link{qfromw}}. 
#' 
#' @details
#' 
#' More details could be find in Tille (2006).
#' 
#' @return A vector of inclusion probability computed from the matrix \code{q}.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tille, Y. (2006), Sampling Algorithms, springer
#' 
#' @export
pikfromq <- function(q) {
    .Call(`_StratifiedSampling_pikfromq`, q)
}

#' @title pikt from pik
#' @name piktfrompik
#' @description This function finds the \code{pikt} from an initial \code{pik}.
#'  
#' @param pik A vector of inclusion probabilities. The vector must contains only value that are not integer.
#' @param max_iter An integer that specify the maximum iteration in the Newton-Raphson algorithm. Default \code{500}.
#' @param tol A scalar that specify the tolerance convergence for the Newton-Raphson algorithm. Default \code{1e-8}.
#' 
#' @details
#' 
#' The management of probabilities equal to 0 or 1 is done in the cps function.
#' 
#' \code{pikt} is the vector of inclusion probabilities of a Poisson sampling with the right parameter. The vector is found by Newtwon-Raphson algorithm.
#' 
#' More details could be find in Tille (2006).
#' 
#' @return An updated vector of inclusion probability.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tille, Y. (2006), Sampling Algorithms, springer
#' 
#' @export
piktfrompik <- function(pik, max_iter = 500L, tol = 1e-8) {
    .Call(`_StratifiedSampling_piktfrompik`, pik, max_iter, tol)
}

#' @title Conditional Poisson sampling design
#' @name cps
#' @description Maximum entropy sampling with fixed sample size. It select a sample with fixed sample size with unequal inclusion probabilities.
#'  
#' @param pik A vector of inclusion probabilities. 
#' @param eps A scalar that specify the tolerance to transform a small value to the value 0.
#' 
#' @details
#' Conditional Poisson sampling, the sampling design maximizes the entropy:
#' \deqn{I(p) = - \sum s p(s) log[p(s)].}
#' where s is of fixed sample size. Indeed, Poisson sampling is known for maximizing the entropy but has no fixed sample size. The function selects a sample of fixed sample that maximizes entropy.
#' 
#' This function is a C++ implementation of \code{\link[sampling:UPmaxentropy]{UPmaxentropy}} of the package \code{sampling}. More details could be find in Tille (2006).
#' 
#' @return A vector with elements equal to 0 or 1. The value 1 indicates that the unit is selected while the value 0 is for rejected units.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tille, Y. (2006), Sampling Algorithms, springer
#' 
#' @examples
#' 
#' pik <- inclprob(seq(100,1,length.out = 100),10)
#' s <-  cps(pik)
#' 
#' 
#' 
#' # simulation with piktfrompik MUCH MORE FASTER
#' s <- rep(0,length(pik))
#' SIM <- 100
#' pikt <- piktfrompik(pik)
#' w <- pikt/(1-pikt)
#' q <- qfromw(w,sum(pik))
#' for(i in 1 :SIM){
#'   s <- s + sfromq(q)
#' }
#' p <- s/SIM # estimated inclusion probabilities
#' t <- (p-pik)/sqrt(pik*(1-pik)/SIM)
#' 1 - sum(t > 1.6449)/length(pik) # should be approximately equal to 0.95 
#' 
#' @export
cps <- function(pik, eps = 1e-6) {
    .Call(`_StratifiedSampling_cps`, pik, eps)
}

#' @title Joint inclusion probabilities of maximum entropy.
#' @name maxentpi2
#' @description This function computes the matrix of the joint inclusion of the maximum entropy sampling with fixed sample size. It can handle unequal inclusion probabilities.
#' 
#' @param pikr A vector of inclusion probabilities.
#' 
#' @details
#' The sampling design maximizes the entropy design:
#' \deqn{I(p) = - \sum s p(s) log[p(s)].}
#' 
#' This function is a C++ implementation of \code{\link[sampling:UPMEpik2frompikw]{UPMEpik2frompikw}}.
#' More details could be find in Tille (2006).
#' @return A matrix, the joint inclusion probabilities.
#'
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @references 
#' Tille, Y. (2006), Sampling Algorithms, springer
#' 
#' @export
maxentpi2 <- function(pikr) {
    .Call(`_StratifiedSampling_maxentpi2`, pikr)
}

#' @title One-step One Decision sampling method
#' @name osod
#' @description This function implements the One-step One Decision method. It can be used using equal or unequal inclusion probabilities. The method is particularly useful for selecting a sample from a stream. 
#' 
#' @param pikr A vector of inclusion probabilities.
#' @param full An optional boolean value, to specify whether the full population (the entire vector) is used to update inclusion probabilities. Default: FALSE 
#' 
#' @details
#' 
#' The method sequentially transforms the vector of inclusion probabilities into a sample whose values are equal to 0 or 1. The method respects the inclusion probabilities and can
#' handle equal or unequal inclusion probabilities.
#' 
#' The method does not take into account the whole vector of inclusion probabilities by having a sequential implementation. This means that the method is fast and can be implemented in a flow.
#' 
#' @return A vector with elements equal to 0 or 1. The value 1 indicates that the unit is selected while the value 0 is for rejected units.
#'
#' @author Raphael Jauslin \email{raphael.jauslin@@unine.ch}
#'
#' @seealso \code{\link{c_bound}}
#' 
#' @examples
#' 
#' N <- 1000
#' n <- 100
#' pik <- inclprob(runif(N),n)
#' s <- osod(pik)
#' 
#' @export
osod <- function(pikr, full = FALSE) {
    .Call(`_StratifiedSampling_osod`, pikr, full)
}

#' @encoding UTF-8
#' @title Variance Estimation for balanced sample
#' @name vEst
#' @description
#' Estimated variance approximation calculated as the conditional variance with respect to the balancing equations of a particular Poisson design. See Tillé (2020)
#' 
#' @param Xauxs A matrix of size (\eqn{n} x \eqn{p}) of auxiliary variables on which the sample must be balanced.
#' @param piks A vector of inclusion probabilities. The vector has the size of the sample \eqn{s}.
#' @param ys A variable of interest.The vector has the size \eqn{n} of the sample \eqn{s}.
#' 
#' @references 
#' Tillé, Y. (2020), Sampling and Estimation from finite populations, Wiley,
#'
#' @return Estimated variance of the horvitz-thompson estimator.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @seealso \code{\link{vDBS}} \code{\link{vApp}} 
#' 
#' @export
#' 
#' @examples
#' 
#' N <- 100 
#' n <- 40
#' x1 <- rgamma(N,4,25)
#' x2 <- rgamma(N,4,25)
#' 
#' pik <- rep(n/N,N)
#' Xaux <- cbind(pik,as.matrix(matrix(c(x1,x2),ncol = 2)))
#' Xspread <- cbind(runif(N),runif(N))
#'   
#' 
#' s <- balseq(pik,Xaux,Xspread)
#'   
#' y <- Xaux%*%c(1,1,3) + rnorm(N,120) # variable of interest
#'   
#' vEst(Xaux[s,],pik[s],y[s])
#' vDBS(Xaux[s,],Xspread[s,],pik[s],y[s])
#' vApp(Xaux,pik,y)
#' 
vEst <- function(Xauxs, piks, ys) {
    .Call(`_StratifiedSampling_vEst`, Xauxs, piks, ys)
}

#' @encoding UTF-8
#' @title Variance Estimation for Doubly Balanced Sample.
#' @name vDBS
#' @description
#' Variance estimator for sample that are at the same time well spread and balanced on auxiliary variables. See Grafstr\"om and Till\'e (2013)
#' 
#' @param Xauxs A matrix of size (\eqn{n} x \eqn{p}) of auxiliary variables on which the sample must be balanced.
#' @param Xspreads Matrix of spatial coordinates.
#' @param piks A vector of inclusion probabilities. The vector has the size \eqn{n} of the sample \eqn{s}.
#' @param ys A variable of interest. The vector has the size \eqn{n} of the sample \eqn{s}.
#' 
#' @references 
#' Grafstr\"om, A. and Till\'e, Y. (2013), Doubly balanced spatial sampling with spreading and restitution of auxiliary totals, Environmetrics, 14(2):120-131
#'
#' @return Estimated variance of the horvitz-thompson estimator.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @seealso \code{\link{vDBS}} \code{\link{vApp}} 
#' 
#' @export
#' 
#' @examples
#' 
#' N <- 100 
#' n <- 40
#' x1 <- rgamma(N,4,25)
#' x2 <- rgamma(N,4,25)
#' 
#' pik <- rep(n/N,N)
#' Xaux <- cbind(pik,as.matrix(matrix(c(x1,x2),ncol = 2)))
#' Xspread <- cbind(runif(N),runif(N))
#'   
#' 
#' s <- balseq(pik,Xaux,Xspread)
#'   
#' y <- Xaux%*%c(1,1,3) + rnorm(N,120) # variable of interest
#'   
#' vEst(Xaux[s,],pik[s],y[s])
#' vDBS(Xaux[s,],Xspread[s,],pik[s],y[s])
#' vApp(Xaux,pik,y)
#' 
vDBS <- function(Xauxs, Xspreads, piks, ys) {
    .Call(`_StratifiedSampling_vDBS`, Xauxs, Xspreads, piks, ys)
}

#' @encoding UTF-8
#' @title Approximated variance for balanced sample
#' @name vApp
#' @description
#' 
#' Variance approximation calculated as the conditional variance with respect to the balancing equations of a particular Poisson design. See Tillé (2020)
#' 
#' @param Xaux A matrix of size (\eqn{N} x \eqn{p}) of auxiliary variables on which the sample must be balanced.
#' @param pik A vector of inclusion probabilities. The vector has the size \eqn{N} of the population \eqn{U}.
#' @param y A variable of interest.
#' 
#' @references 
#' Tillé, Y. (2020), Sampling and Estimation from finite populations, Wiley,
#' 
#' @return Approximated variance of the Horvitz-Thompson estimator.
#' 
#' @author Raphaël Jauslin \email{raphael.jauslin@@unine.ch}
#' 
#' @seealso \code{\link{vDBS}} \code{\link{vApp}} 
#' 
#' @export
#' 
#' @examples
#' 
#' N <- 100 
#' n <- 40
#' x1 <- rgamma(N,4,25)
#' x2 <- rgamma(N,4,25)
#' 
#' pik <- rep(n/N,N)
#' Xaux <- cbind(pik,as.matrix(matrix(c(x1,x2),ncol = 2)))
#' Xspread <- cbind(runif(N),runif(N))
#'   
#' 
#' s <- balseq(pik,Xaux,Xspread)
#'   
#' y <- Xaux%*%c(1,1,3) + rnorm(N,120) # variable of interest
#'   
#' vEst(Xaux[s,],pik[s],y[s])
#' vDBS(Xaux[s,],Xspread[s,],pik[s],y[s])
#' vApp(Xaux,pik,y)
#' 
vApp <- function(Xaux, pik, y) {
    .Call(`_StratifiedSampling_vApp`, Xaux, pik, y)
}

Try the StratifiedSampling package in your browser

Any scripts or data that you put into this service are public.

StratifiedSampling documentation built on Oct. 26, 2022, 5:09 p.m.