#' Moving average data smoother.
#'
#' Computation of a moving average data smoother.
#'
#' @param x A numeric vector containing the data to analyze.
#' @param p A length-one numeric vector.
#' @return A \code{matrix} object.
#' @author Sergio Venturini \email{sergio.venturini@unito.it}
#' @seealso \code{\link{betaKS3_R}} for computing the sensitivity measures.
#' @references
#' Venturini, S., Borgonovo, E. (2020), "Sensitivity Analysis Using
#' \code{R}: the \pkg{SAuR} Package", Technical report.
#' @examples
#' \dontrun{
#' data(simdata_sub, package = "SAuR")
#'
#' }
#'
#' @export
mollify_R <- function(x, p) {
x_mat <- as.matrix(x)
n <- nrow(x_mat)
k <- ncol(x_mat)
p <- as.integer(min(c(p, floor(n/2 - 1))))
if (p > 1) {
X <- cumsum(as.vector(t(x_mat)))
X_idx_1 <- numeric(n)
count <- 1
for (i in p:(n + p - 1)) {
X_idx_1[count] <- ifelse(i < n, i + 1, n)
count <- count + 1
}
X_idx_2 <- numeric(n)
count <- 1
for (i in (-p):(n - p - 1)) {
X_idx_2[count] <- ifelse(i > 0, i + 1, 1)
count <- count + 1
}
s <- X[X_idx_1] - X[X_idx_2]
s_den <- c((2*p - 1):(2*p), rep(2*p, n - 2 - 2), (2*p):(2*p - 1))
s <- s/s_den
}
return(s)
}
#' Kolmogorov-Smirnov Sensitivity Using Smoothing CDF.
#'
#' Computation of the following sensitivity measures: Kolmogorov-Smirnov
#' distance beta, Borgonovo's delta, Kuiper's discrepancy kappa, Pearson's
#' correlation ratio eta2 and Wald-Wolfowitz number of runs (normalized),
#' as well as the corresponding conditional shift/separation measurements.
#'
#' @param data An object of class \code{SAuR_data} containing the model's inputs
#' @param MP A numeric vector indicating the number of partition to use.
#' @param gfx A length-one character vector providing the plot main title
#' @param outcol A length-one numeric vector indicating the output column
#' to focus on
#' @return A \code{list} object containing the sensitivity measures described above.
#' @author Sergio Venturini \email{sergio.venturini@unito.it}
#' @references
#' Venturini, S., Borgonovo, E. (2020), "Sensitivity Analysis Using
#' \code{R}: the \pkg{SAuR} Package", Technical report.
#' @examples
#' \dontrun{
#' data(simdata_sub, package = "SAuR")
#'
#' }
#'
#' @export
betaKS3_R <- function(data, MP = NULL, gfx = NULL, outcol = NULL) {
x <- data@X
y <- data@Y
if (!is.null(outcol))
y <- y[, outcol, drop = FALSE]
n <- nrow(x)
k <- ncol(x)
if (is.matrix(y)) {
if (nrow(y) != n)
stop("number of observations of y must match those of x.")
} else {
if (length(y) != n)
stop("number of observations of y must match those of x.")
}
if (!is.null(dim(y)) & ncol(y) > 1)
stop("the betaKS3_R() function computes the sensitivity measures for a single output.")
if (!is.null(dim(y)))
y <- as.numeric(y)
check_rstudio <- try(rstudioapi::isAvailable(), silent = TRUE)
plotdiff <- 1
gfxrows <- ceiling(sqrt(k))
nclrs <- 100 # [[TODO]]
colors <- rainbow(nclrs)
if (is.null(MP)) {
MP <- c(32, 3)
} else {
MP <- as.matrix(MP)
MP <- as.numeric(MP[nrow(MP), ])
}
if (length(MP) > 1) {
M <- MP[1]
P <- MP[length(MP)]
} else {
M <- MP[1]
P <- 3
}
if (M == 0) {
# test debiasing: predict value at partition size M = 0
res1 <- betaKS3_R(data, MP = 10, outcol = outcol)
res2 <- betaKS3_R(data, MP = 20, outcol = outcol) # double partition
res <- list()
res[["b"]] <- 2*(res1[["b"]] - .5*res2[["b"]])
res[["d"]] <- 2*(res1[["d"]] - .5*res2[["d"]])
res[["t"]] <- 2*(res1[["t"]] - .5*res2[["t"]])
res[["e"]] <- 2*(res1[["e"]] - .5*res2[["e"]])
res[["w"]] <- res[["bm"]] <- res[["dm"]] <- res[["km"]] <- res[["em"]] <- res[["wm"]] <- NULL
return(res)
}
# cumulative distribution: with ties
ys <- sort(unique(y))
indx <- match(ys, y)
rindx <- match(y, ys)
nn <- length(ys) # get the number of unique sample values
if (nn == n) {
cdf <- seq(1, n)/n # if all samples unique, cdf is easy
} else {
rle <- numeric(nn) # otherwise, build run length encoder
for (i in rindx) {
rle[i] <- rle[i] + 1 # count of each value
}
cdf <- cumsum(rle)/n
cdf <- cdf[sort(rindx)] # compute probability at each location
y_tmp <- sort.int(y, index.return = TRUE)
ys <- y_tmp$x
indx <- y_tmp$ix
}
xs <- x[indx, ]
n <- nrow(x)
k <- ncol(x)
VY <- var(y)*(length(y) - 1)/length(y) # [[SERGIO: shouldn't we use the sample variance?]]
m <- seq(from = 0, to = 1, length.out = (M + 1))
bm <- matrix(0, nrow = k, ncol = M) # Kolmogorov
dm <- matrix(0, nrow = k, ncol = M) # L1 Borgonovo
tm <- matrix(0, nrow = k, ncol = M) # Kuiper
wm <- matrix(0, nrow = k, ncol = M) # Wald-Wolfowitz
em <- matrix(VY/n, nrow = k, ncol = M) # conditional expectation
nm <- matrix(0, nrow = k, ncol = M)
# keep only values from the partition
indxx <- apply(xs, 2, order)
xr <- apply(xs, 2, sort)
# test for constant input
if (any((xr[1, ] == xr[nrow(xr), ]))) {
stop("constant input factors detected.")
}
factorlist <- (1:ncol(xr))[xr[1, ] != xr[nrow(xr), ]]
for (i in factorlist) {
xr[indxx[, i], i] <- cumsum(c(1, (diff(xr[, i]) != 0))) # cheap tied ranks
}
xr <- xr/max(xr) # scale to 1
# prepare for plotting
if (!is.null(gfx)) {
if ((class(check_rstudio) != "try-error") & check_rstudio) {
# do nothing --> the graph is automatically provided in a new plot window
} else {
dev.new()
}
split.screen(figs = c(gfxrows, ceiling(k/gfxrows)))
for (i in factorlist) {
screen(i)
plot(range(y), c(-.75, .75), type = "n", main = paste0(gfx, " conditioning on x_", i),
xlab = "y", ylab = ifelse(plotdiff, "Delta CDF", "CDF"),
cex.lab = .7, cex.main = .7, cex.axis = .7)
}
clrs <- hcl(h = seq(120, 0, length = M) + 150)
}
for (j in 1:M) {
print(paste0("step ", j, " of ", M))
indx <- (m[j] < xr) & (xr <= m[j + 1])
xtremew <- xor(indx[-1, ], indx[-n, ])
xtreme <- indx
cdfc <- apply(indx, 2, cumsum)
nm[, j] <- cdfc[nrow(cdfc), ] # if no ties: always same number of realizations
xtreme[1, ] <- xtreme[n, ] <- TRUE
for (i in factorlist) { # sum of conditionals
scc <- nm[i, j]
if (!scc) next
# difference of conditionals
smoothdcdf <- mollify_rcpp(cdf - cdfc[, i]/scc, ceiling(P*M))
xxtrem <- xtreme[, i] | c(xtreme[-1, i], FALSE)
critdcdf <- smoothdcdf[xxtrem]
mx <- max(critdcdf)
mn <- min(critdcdf)
# Kolmogorov-Smirnov distance
bm[i, j] <- max(c(0, mx, -mn))
# Kuiper dicrepancy
tm[i, j] <- max(c(0, mx - mn))
# find max in each positive run, min in each negative
delta <- sgn <- exval <- 0
if (!is.null(gfx) & plotdiff) {
screen(n = i, new = FALSE)
if (j == 1) erase.screen(n = i)
plot(ys[xxtrem], critdcdf, col = clrs[j], cex = .3, axes = FALSE, xlab = "", ylab = "",
xlim = range(y), ylim = c(-.75, .75), pch = 21)
}
for (val in critdcdf) {
sg <- sign(val)
if (sg == sgn) {
exval <- max(c(exval, sg*val))
} else {
if (exval > 0.5*sqrt(1/n + 1/scc)) {
delta <- delta + exval
}
sgn <- sg
exval <- sg*val
}
}
dm[i, j] <- delta + exval
# and more indicators:
# - Wald-Wolfowitz number of runs
mju <- 2*scc*(n - scc)/n - 1
# - +-1
wm[i, j] = (mju - sum(xtremew[, i]) + 1)/sqrt(mju*(mju + 1)/(n - 1))
# - correlation ratio
em[i, j] = var(ys[indx[, i]])*(length(ys[indx[, i]]) - 1)/length(ys[indx[, i]]) # [[SERGIO: shouldn't we use the
# sample variance?]]
}
}
if (!is.null(gfx)) close.screen(all.screens = TRUE)
b <- rowSums(bm*nm)/n
d <- rowSums(dm*nm)/n
t <- rowSums(tm*nm)/n
w <- rowSums(wm*nm)/n
f <- 1 - (rowSums(em*nm)/n)/VY
e <- numeric(k)
e[factorlist] <- f[factorlist]
res <- list(b = b, d = d, t = t, e = e, w = w, bm = bm, dm = dm, tm = tm, em = em, wm = wm)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.