################# The asymptotic tests #######################################################
#################### gives the asymptotic test statistics for log-concave tests based on distance #######
logcon_a<- function(x,y, pr1, pr2)
{
m <- length(x)
n <- length(y)
N <- m+n
a <- ceiling(pr1*N) #Truncation
b <- ceiling((1-pr2)*N) #Truncation
pool1 <- sort(c(x,y))
mlef1 <- logcondens::logConDens(x,smoothed = FALSE)
mlef2 <- logcondens::logConDens(y,smoothed = FALSE)
pool <- sort(c(mlef1$knots,mlef2$knots))
ph1 <- logcondens::evaluateLogConDens(pool,mlef1,1)[,2]
ph2 <- logcondens::evaluateLogConDens(pool,mlef2,1)[,2]
php1 <- diff(ph1)/diff(pool)
php2 <- diff(ph2)/diff(pool)
# checking the knots between where F2-F1 attains maximum in some intermediate points, not at the kbnots
ind <- which(php1*php2<0)
# Combining the points where to have checked
poolt1 <- pool[pool>=pool[a] & pool<=pool[b]]
poolt <- c(poolt1,seq(pool1[a],pool1[b], by=0.05))
poolt <- sort(poolt)
#T1
F2 <- logcondens::evaluateLogConDens(poolt,mlef2,3)[,4]
F1 <- logcondens::evaluateLogConDens(poolt,mlef1,3)[,4]
diff.lc <- F2 - F1
t1 <- (F2-F1)/sqrt(F2*(1-F2)/n+F1*(1-F1)/m)
t1 <- min(t1)
z <- seq(pr1,1-pr2,0.05)
z1 <- c(1:N)/N
z1 <- z1[z1>pr1 & z1 < 1-pr2]
z <- sort(unique(c(z,z1)))
#Calculating T4
pool <- quantile(sort(c(x,y)),z)
F1 <- logcondens::evaluateLogConDens(pool,mlef1,3)[,4]
F2 <- logcondens::evaluateLogConDens(pool,mlef2,3)[,4]
t4 <- sqrt(m*n/(m+n))*(F2-F1)/sqrt(z*(1-z))
t4 <- min(t4)
# t2 and t4 stand for the truncated ones
c(t1,t4)
c(t1,t4)
}
############# gives statistic of the unimodal tests based on the difference #######
unimod_a <- function(x,y,p, pr1, pr2)
{
m <- length(x)
n <- length(y)
N <- m+n
eta <- 1/N^p
a <- ceiling(pr1*N) #truncation
b <- ceiling((1-pr2)*N) #truncation
s1 <- calc_mode(sort(x),eta)
s2 <- calc_mode(sort(y),eta)
poolin <- sort(c(x,y))
pool <- c(poolin[a:b], seq(poolin[a], poolin[b], 0.05))
pool <- sort(pool)
F1 <- grenander.inter(pool,s1$x.knots, s1$F.knots, s1$f.knots)
F2 <- grenander.inter(pool,s2$x.knots, s2$F.knots, s2$f.knots)
tmp1 <- (F2-F1)/sqrt(F2*(1-F2)/n+F1*(1-F1)/m)
t1 <- min(tmp1)
z <- seq(pr1,1-pr2,0.05)
z1 <- c(1:N)/N
z1 <- z1[a:b]
z <- sort(unique(c(z,z1)))
pool <- quantile(poolin,z)
F1 <- grenander.inter(pool,s1$x.knots, s1$F.knots, s1$f.knots)
F2 <- grenander.inter(pool,s2$x.knots, s2$F.knots, s2$f.knots)
t4 <- sqrt(m*n/(m+n))*(F2-F1)/sqrt(z*(1-z))
t4 <- min(t4)
# t2 and t4 stand for the truncated ones
c(t1,t4)
}
############# gives statistic of the nonparametric tests based on the difference #######
nonparam_a <- function(x,y, pr1, pr2)
{
m <- length(x)
n <- length(y)
N <- m+n
a <- ceiling(pr1*N) #truncation
b <- ceiling((1-pr2)*N) #truncation
pool <- sort(c(x,y))
F1 <- ecdf(x)(pool)
F2 <- ecdf(y)(pool)
tmp1 <-(F2-F1)/sqrt(F2*(1-F2)/n+F1*(1-F1)/m)
t1 <- min(tmp1[a:b])
z <- seq(pr1, 1-pr2, 0.01)
z1 <- c(1:N)/N
z1 <- z1[a:b]
z <- sort(unique(c(z,z1)))
pool <- quantile(pool,z)
F1 <- ecdf(x)(pool)
F2 <- ecdf(y)(pool)
t4 <- sqrt(m*n/(m+n))*(F2-F1)/sqrt(z*(1-z))
t4 <- min(t4)
# t2 and t4 stand for the truncated ones
c(t1,t4)
}
############## combining the two shape constrained tests ###################################
boot_test_a <- function(y1,y2, p, pr1, pr2, Method)
{
if(Method=="NP") temp <- nonparam_a(y1, y2, pr1, pr2)
if(Method=="UM") temp <- unimod_a(y1, y2, p, pr1, pr2)
if(Method=="LC") temp <- logcon_a(y1, y2, pr1, pr2)
temp
}
############## SDNN ################################
#' One-sided test of stochastic dominance against the null of non-dominance
#'
#' Calculates the p-values of one-sided
#' tests of restricted stochastic dominance against the null of non-dominance.
#' The concerned tests are the minimum t-statistic test
#' and the two sample empirical process (TSEP)
#' test of \emph{Laha et al. (2021)}. Each test can be either nonparametric,
#' or semiparametric, ie. using unimodality or log-concavity assumption on the underlying
#' densities.
#'
#'
#' @details Suppose \eqn{X_1,\ldots, X_m} and \eqn{Y_1,\ldots, y_n} are independent random
#' variables with distribution \eqn{F} and \eqn{G}, respectively. Denote by
#' \eqn{D_{p,m,n}} the set \deqn{[H_n^{-1}(p), H_n^{-1}(1-p)]} where
#' \eqn{p\in[0,0.5)} and \eqn{H_n} is the empirical distribution
#' function of the combined sample \deqn{\{X_1,\ldots, x_m,Y_1,\ldots,Y_n\}.} The function SDNN tests
#' \eqn{H_0: F(x)\geq G(x)} for some \eqn{x\in D_{p,m,n}} vs
#' \eqn{H_1: F(x)<G(x)} for all \eqn{x\in D_{p,m,n}}.
#' For more details, see Laha et al., 2021.
#'
#' @details \code{Method}:
#' "NP" corresponds to the nonparametric tests. "UM" corresponds
#' to tests which use the function \code{\link{calc_mode}}
#' to estimate the densities of \eqn{X_i}'s and \eqn{Y_i}'s. This function
#' estimates the unimodal density estimator of Birge (1997).
#' "LC" corresponds to tests which use the log-concave MLE (given by \link[logcondens]{logConDens} of R package ``logcondens")
#' of Dumbgen and Rufibatch (2009) to estimate the latter densities. For more detail, see Laha et al. (2021).
#' @details \code{t}: The parameter \eqn{t}
#' corresponds to the parameter \eqn{\tau}
#' in Birge (1997). Higher values of \eqn{t} leads to more accurate
#' estimation of the unimodal densities. This value ontrols the accuracy in
#' unimodal density estimation upto the term \eqn{(m+n)^{-t}}. A value greater
#' than or equal to one is recommended. See Birge (1997)
#' for more details.
#' @details \code{p}: p corresponds to the set \eqn{D_{p,m,n}} in \eqn{H_0} and \eqn{H_1}.
#' To overcome the difficulties arising from the tail region,
#' \eqn{100p} percent of data is trimmed from both sides of the
#' combined sample.
#'
#' @param x Vector of m independent and identically distributed random variables;
#' corresponds to the first sample.
#' @param y Vector of n independent and identically distributed random variables;
#' corresponds to the second sample.
#' @param Method Must be one among "NP" (nonparametric), "UM" (unimodal), and "LC" (log-concave). See 'Details'.
#' @param t A positive real number. Only required when Method={"UM"}, default value is 1.
#' See Details.
#' @param p1 The proportion of combined data to be trimmed from the left prior to testing,
#' should take value in \eqn{[0,0.50)},
#' the default is set to 0.05.
#' @param p2 The proportion of combined data to be trimmed from the right prior to testing,
#' should take value in \eqn{[0,0.50)},
#' the default is set to 0.05.
#'@return A list of two numbers.
#'\itemize{
#'\item T1 - The p-value of the test based on minimum T-statistic of
#' Laha \emph{et al.}, (2021)
#'\item T2 - The p-value of the TSEP test of Laha \emph{et al.}, (2021).
#' }
#'@references Laha, N., Moodie, Z., Huang, Y., and Luedtke, A. (2021).
#' \emph{ Improved inference for vaccine-induced immune responses
#' via shape-constrained methods}. Submitted.
#'@references Dumbgen, L. and Rufibatch, K. (2009). \emph{Maximum likelihood estimation of a logconcave density and
#' its distribution function: Basic properties and uniform
#' consistency}, Bernoulli, 15, 40–68.
#'@references Birge, L. (1997). \emph{Estimation of unimodal densities
#' without smoothness assumptions}, Ann. Statist., 25, 970–981.
#'@author \href{https://connects.catalyst.harvard.edu/Profiles/display/Person/184207}{Nilanjana Laha}
#' (maintainer), \email{nlaha@@hsph.harvard.edu},
#' Alex Luedtke, \email{aluedtke@@uw.edu}.
#' @seealso \code{\link{calc_mode}}, \link[logcondens]{logConDens}
#' @examples
#' x <- rnorm(100); y <- rgamma(50, shape=1);
#' SDNN(x, y, Method="UM", t=1, p1=0.01, p2=0.05)
#' @export
SDNN <- function(x, y, Method, t=1, p1=0.05, p2=0.05)
{
#setting default values of pr and p
if(missing(p1)) p1 <- 0.05
if(missing(p1)) p2 <- 0.05
if(missing(t)) t <- 1
x <- sort(x)
y <- sort(y)
test_stat <- boot_test_a(x, y, t, p1, p2, Method)
#Calculating the p-value
pval <- 1-pnorm(test_stat)
list(T1=pval[1],T2=pval[2])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.