# R/limDis.R In otinference: Inference for Optimal Transport

```#' Sample from the limiting distribution under the null.
#'
#' @param B number of samples to generate. Defaults to 500.
#' @param r vector of probabilities in the original problem.
#' @param distMat distance matrix in the original problem.
#' @param p cost exponent. Defaults to 1.
#' @return A vector of samples.
#' @export
limDisNull <- function(B = 500, r, distMat, p = 1){

N <- nrow(distMat)

# Matrix for constraints.
A <- kronecker(rep(1, N), diag(1, N, N)) -
kronecker(diag(1, N, N), rep(1, N))

rhs <- as.vector(distMat)^p

dir <- rep("<=", N^2)

# Define the derivative. If Rcplex is available use it to solve the LP,
# else use Rglpk.
if(requireNamespace("Rcplex", quietly = TRUE)){
deriv <- function(g){
Rcplex::Rcplex(cvec = g, Amat = A, bvec = rhs, lb = -Inf, objsense = "max",
sense = "L")\$obj
}
} else{
deriv <- function(g){
Rglpk::Rglpk_solve_LP(obj = g, mat = as.matrix(A),
bounds = list(
lower = list(ind = 1:N, val = rep(-Inf, N))),
dir = dir, rhs = rhs, max = T)\$optimum
}
}

# Covariance matrix for limiting G.
Sigma <- - r %o% r + diag(r)

# Create sample from limiting distribution.
apply(MASS::mvrnorm(n = B, mu = rep(0, N), Sigma = Sigma), 1, deriv)^(1/p)
}

#' Sample from the limiting distribution under the null when the underlying
#' space is a grid.
#'
#' @param B Number of bootstrap samples to generate. Defaults to 500.
#' @param r vector of probabilities in the original problem. Is interpreted as
#'          a square matrix.
#' @param p cost exponent.
#' @return A vector of samples.
#' @export
limDisNullGrid <- function(B = 500, r, p=1){
L <- sqrt(length(r))
coord <- seq(from = 0, to = 1, length.out = L)
distMat <- as.matrix(stats::dist(expand.grid(coord, coord)))
limDisNull(B = B, r = r, distMat = distMat, p = p)
}

#' Sample from the limit distribution under the alternative.
#'
#' @param B Number of samples to generate.
#' @param r,s Number of counts giving the two samples.
#' @param distMat Distance matrix.
#' @param p Cost exponent. Defaults to 1.
#' @return A vector of samples.
#' @export
limDisAlt <- function(B = 1000, r, s, distMat, p = 1){

N <- length(r)
n <- sum(r)
m <- sum(s)
lambda <- m / (n + m)
r <- r / n
s <- s / m
wd <- wassDist(a = r, b = s, distMat = distMat, p = p)

# Define target function.
foo <- function(z, h1, h2){
if(!all(c(h1 - z * r, h2 - z * s) >= 0)){
return(Inf)
} else{
(wassDist(a = h1 - z * r,
b = h2 - z * s, distMat = distMat, p = p)^p + z * wd^p)
}
}

# Define the covariance functions.
Sigma1 <- - r %o% r + diag(r)
Sigma2 <- - s %o% s + diag(s)

sample <- c()
# Minimize the target function.
for(k in 1:B){
G <- MASS::mvrnorm(n = 1, mu = rep(0, N), Sigma = Sigma1) * (r > 0)
H <- MASS::mvrnorm(n = 1, mu = rep(0, N), Sigma = Sigma2) * (s > 0)
sample[k] <- stats::optimize(foo, lower = -1e8, upper = 1e8,
h1 = sqrt(lambda) * G, h2 = sqrt(1 - lambda) * H)\$objective
}

return(wd^(1 - p) * sample / p)
}

#' m-out-of-n Bootstrap for the limiting distribution.
#'
#' @param r,s Vectors of counts giving the two samples.
#' @param distMat Distance matrix.
#' @param B The number of samples to generate. Defaults to 1000.
#' @param p Cost exponent. Defaults to 1.
#' @param gamma m = n^gamma. Defaults to 0.9.
#' @return A sample from the limiting distribution.
#' @export
limDisAltBoot <- function(r, s, distMat, B = 1000, p = 1, gamma = 0.9){
n <- sum(r)
m <- sum(s)
nB <- n^gamma
mB <- m^gamma

ev <- function(){
rB <- stats::rmultinom(n = 1, size = nB, prob = r / sum(r))
sB <- stats::rmultinom(n = 1, size = mB, prob = s / sum(s))

wassDist(rB / nB, sB / mB, distMat = distMat, p = p)
}

sample <- replicate(n = B, ev())

sample <- sqrt(((nB * mB) / (nB + mB))) *
(sample - wassDist(r / sum(r), s / sum(s), distMat, p = p))
}
```

## Try the otinference package in your browser

Any scripts or data that you put into this service are public.

otinference documentation built on May 2, 2019, 3:36 p.m.