# Tests to see if vectorised versions give same result as slower for-loops
context("Vectorisation")
eps <- sqrt(.Machine$double.eps)
data(secura)
X <- sort(secura$size)
n <- length(X)
censored <- sample(c(0,1),length(secura$size),replace=TRUE)
##############################################
# Censoring.R
test_that("Censored estimators for-loops", {
###################################################
# cHill
delta <- !censored
# Slow for-loop
Hill2 <- numeric(n-1)
for (k in 1:(n-1)) {
if (sum(delta[n-(1:k)+1])!=0) {
Hill2[k] <- (1/sum(delta[n-(1:k)+1]))*sum(log(X[n-(1:k)+1])-log(X[n-k]))
} else {
Hill2[k] <- NA
}
}
expect_true(max(abs(cHill(X,censored=censored)$gamma-Hill2)/Hill2,
na.rm=TRUE)<eps)
######################
# cgenHill
UH.scores2 <- numeric(n)
Hill2 <- numeric(max((n-2),1))
gamma <- Hill(X)$gamma
# Slow for-loops
UH.scores2 <- numeric(n)
Hill2 <- numeric(max((n-2),1))
for (i in 1:(n-1)) {
UH.scores2[i] <- X[n-i]*gamma[i]
}
for (k in 1:max((n-2),1)) {
Hill2[k] <- sum(log(UH.scores2[1:k])-log(UH.scores2[k+1]))/sum(delta[n-(1:k)+1])
}
expect_true(max(abs(cgenHill(X,censored=censored)$gamma-Hill2)/Hill2,
na.rm=TRUE)<eps)
######################
# cParetoQQ
pqq.the3 <- numeric(n-1)
for (j in 1:(n-1)) {
pqq.the3[j] <- -log(KaplanMeier(X[n-j+1],X,censored)$surv)
}
expect_true(max(abs(cParetoQQ(X,censored=censored,plot=FALSE)$pqq.the- pqq.the3 )/ pqq.the3 ,
na.rm=TRUE)<eps)
################################
# p_hat_fun
delta.n <- delta
p.hat <- numeric(n-1)
for (k in 1:(n-1)) {
p.hat[k] <- mean(delta.n[n:(n-k+1)])
}
expect_true(max(abs(p.hat - .p_hat_fun(delta.n))/ p.hat,
na.rm=TRUE)<eps)
})
##############################################
# Hill.R
test_that("Hill for-loop", {
# Hill
Hill2 <- numeric(n-1)
for (k in 1:(n-1)) {
Hill2[k] <- (1/k)*sum(log(X[n:(n-k+1)])) - log(X[n-k])
}
expect_true(max(abs(Hill(X)$gamma-Hill2)/Hill2,na.rm=TRUE)<eps)
######################
# genHill
UH.scores2 <- numeric(n)
Hill2 <- numeric(max((n-2),1))
gamma = Hill(X)$gamma
for (i in 1:(n-1)) {
UH.scores2[i] <- X[n-i]*gamma[i]
}
for (k in 1:max((n-2),1)) {
Hill2[k] <- sum(log(UH.scores2[1:k])-log(UH.scores2[k+1]))/k
}
expect_true(max(abs(genHill(X,gamma=gamma)$gamma-Hill2)/Hill2,na.rm=TRUE)<eps)
})
##############################################
# LStail.R
test_that("LStail for-loop", {
rho <- -1
par2 <- matrix(nrow=n-1,ncol=3)
par2[,3] <- rho
UH.scores <- numeric(n)
K <- 1:(n-1)
UH.scores[K] <- X[n-K] * Hill(X)$gamma[K]
# Obtain LS estimators for gamma and b using full model Z_j = gamma + b*(j/k)^(-rho) + eps_j
for (k in (n-1):1) {
Z <- ((1:k)+1)*log(UH.scores[(1:k)]/UH.scores[(1:k)+1])
par2[k,2] <- ((1-par2[k,3])^2*(1-2*par2[k,3])/par2[k,3]^2)*sum((((1:k)/k)^(-par2[k,3])-1/(1-par2[k,3]))*Z)/k
par2[k,1] <- sum(Z)/k-par2[k,2]/(1-par2[k,3])
}
res <- LStail(X,rho=rho)
expect_true(max(abs(res$gamma-par2[,1])/par2[,1],na.rm=TRUE)<eps)
expect_true(max(abs(res$b-par2[,2])/par2[,2],na.rm=TRUE)<eps)
###################################################################
# Test with rho a vector
res <- LStail(X,rho=NULL)
par2 <- matrix(nrow=n-1,ncol=3)
par2[,3] <- res$rho
UH.scores <- numeric(n)
K <- 1:(n-1)
UH.scores[K] <- X[n-K] * Hill(X)$gamma[K]
# Obtain LS estimators for gamma and b using full model Z_j = gamma + b*(j/k)^(-rho) + eps_j
for (k in (n-1):1) {
Z <- ((1:k)+1)*log(UH.scores[(1:k)]/UH.scores[(1:k)+1])
par2[k,2] <- ((1-par2[k,3])^2*(1-2*par2[k,3])/par2[k,3]^2)*sum((((1:k)/k)^(-par2[k,3])-1/(1-par2[k,3]))*Z)/k
par2[k,1] <- sum(Z)/k-par2[k,2]/(1-par2[k,3])
}
expect_true(max(abs(res$gamma-par2[,1])/par2[,1],na.rm=TRUE)<eps)
expect_true(max(abs(res$b-par2[,2])/par2[,2],na.rm=TRUE)<eps)
})
##############################################
# Moment.R
test_that("Moment for-loop", {
M12 <- numeric(n-1)
M22 <- numeric(n-1)
for (k in (n-1):1) {
Z.values <- log(X[n:(n-k+1)]/X[n-k])
M12[k] <- (1/k)*sum(Z.values)
M22[k] <- (1/k)*sum(Z.values^2)
}
Mom2 <- M12 + 1 - (1-M12^2/M22)^(-1)/2
expect_true(max(abs(Moment(X)$gamma[-1]-Mom2[-1])/Mom2[-1],na.rm=TRUE)<eps)
})
##############################################
# QQplots.R
test_that("MeanExcess for-loop", {
# Slow for-loop
e2 <- numeric(n-1)
for (kk in 1:(n-1)) {
e2[kk] <- sum(X[n-(1:kk)+1])/kk-X[n-kk]
}
e <- MeanExcess(X,plot=FALSE)$e
expect_true(max(abs(e-e2)/e2,na.rm=TRUE)<eps)
})
##############################################
# Truncated estimators
test_that("Truncation (gamma pos) for-loops", {
r <- 4
# tr.gamma.pos
H_for <- H1_for <- numeric(n-1)
for (k in (n-1):r) {
Z.values <- log(X[(n-r+1):(n-k+1)]/X[n-k])
Z1.values <- log(X[n:(n-k+1)]/X[n-k])
H_for[k] <- (1/(k-r+1))*sum(Z.values)
}
expect_true(max(abs(trHill(X,r=r)$H-H_for[r:(n-1)])/H_for[r:(n-1)],na.rm=TRUE)<eps)
# tr.DT.pos
K <- r:(n-1)
DT <- numeric(n)
R <- numeric(n)
A <- numeric(n)
gamma <- trHill(X,r=r)$gamma
gamma <- abs(gamma)
A[K] <- 1/gamma
R[K] <- X[n-K]/X[n-r+1]
for (k in (n-1):r) {
DT[k] <- max(((k+1)/(n+1))*(R[k]^A[k]-r/(k+1))/(1-R[k]^A[k]),0)
}
expect_true(max(abs(trDT(X,gamma=gamma,r=r)$DT-DT[K]),na.rm=TRUE)<eps)
# tr.Tk.pos
K <- r:(n-1)
Tk <- numeric(n)
R <- numeric(n)
G <- numeric(n)
gamma <- trHill(X,r=r)$gamma
G[K] <- abs(gamma)
R[K] <- X[n-K] / X[n]
for (k in (n-1):r) {
Tk[k] <- max(X[n-k] * ((R[k]^(1/G[k])-1/(k+1)) / (1-1/(k+1)))^(-G[k]), X[n])
}
expect_true(max(abs(trEndpoint(X,gamma=gamma,r=r)$Tk-Tk[K]),na.rm=TRUE)<eps)
# tr.quant.pos
p <- 0.01
K <- r:(n-1)
quant <- numeric(n)
R <- numeric(n)
A <- numeric(n)
Dt <- numeric(n)
gamma <- trHill(X,r=r)$gamma
gamma <- abs(gamma)
DT <- trDT(X,gamma=gamma,r=r)$DT
Dt[K] <- DT
A[K] <- 1/gamma
for (k in (n-1):r) {
quant[k] <- X[n-k]*((Dt[k]+(k+1)/(n+1))/(Dt[k]+p))^(1/A[k])
}
expect_true(max(abs(trQuant(X,p=p,gamma=gamma,DT=DT,r=r)$Q-quant[K]),na.rm=TRUE)<eps)
##############
# Test for truncation
K <- 1:(n-1)
H <- Hill(X)$gamma
# Compute L(H_{k,n})
E <- numeric(n)
for (k in K) {
E[k] <- 1/k * sum((X[n-k]/X[n-(1:k)+1])^(1/H[k]))
}
L <- (E[K]-0.5)/(1-E[K])
#Test value
tv <- sqrt(12*K)*L
expect_true(max(abs(trTest(X,plot=FALSE)$testVal-tv[K]),na.rm=TRUE)<eps)
})
########################################
# crHill
# Set seed
set.seed(29072016)
# Pareto random sample
Y <- rpareto(100, shape=2)
# Censoring variable
C <- rpareto(100, shape=1)
# Observed (censored) sample of variable Y
Ytilde <- pmin(Y, C)
# Censoring indicator
censored <- (Y>C)
# Conditioning variable
X <- seq(1, 10, length.out=length(Y))
# Observed (censored) sample of conditioning variable
Xtilde <- X
Xtilde[censored] <- X[censored] - runif (sum(censored), 0, 1)
# Bandwidth
h <- 5
# Kernel
Kernel = "biweight"
# Value of covariate
x <- 2
test_that("crHill for-loop", {
###
# Slower old function
n <- length(Xtilde)
kernel <- .kernel_aux(Kernel, h=1)
# Convert censored to Delta
Delta <- !as.logical(censored)
# Sort data according to Ytilde values
s <- sort(Ytilde, index.return=TRUE)
Ytilde_sort <- s$x
Xtilde_sort <- Xtilde[s$ix]
Delta_sort <- Delta[s$ix]
# Nadaraya weights, only non-zero when uncensored
# Sorted with Ytilde!
weights_sort <- numeric(n)
weights_sort[Delta_sort] <- kernel((x-Xtilde_sort[Delta_sort])/h)
weights_sort <- weights_sort / sum(weights_sort)
K <- 1:(n-1)
H2 <- numeric(n-1)
# Compute cumulative sum of weights,
# add 0 to avoid problems when ind is empty
wsum <- c(0, cumsum(weights_sort))
# Avoid numerical problems
wsum[wsum==1] <- wsum[wsum==1] - 10^(-16)
for (k in 1:(n-1)) {
prods <- numeric(k)
# Compute products for i=1,...,k
for (i in 1:k) {
ind <- 1:(n-i)
# Note that wsum[1]=0 !
prods[i] <- prod((1 - weights_sort[ind] / (1-wsum[ind]))^Delta_sort[ind])
}
H2[k] <- sum(prods * (log(Ytilde_sort[n-(1:k)+1])-log(Ytilde_sort[n-(1:k)]))) / prods[k]
}
###
# Faster new version
H <- crHill(x=x, Xtilde=Xtilde, Ytilde=Ytilde, censored=censored, h=h, kernel=Kernel)$gamma
expect_true(max(abs(H-H2)/H2,na.rm=TRUE)<eps)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.