Nothing
##########################################################################
########### 2 STATES
##########################################################################
######################################################################
########### DISTRIBUTION CHOICE
######################################################################
######## RETURNS THE DENSITY OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
##### implemented in R
##### value: the chosen density function
######################################################################
dens <- function(dist){
fun <- get(paste0("d", dist))
return(fun)
}
######################################################################
######## RETURNS THE QUANTILES OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
##### implemented in R
##### value: the chosen quantile function
######################################################################
quant <- function(dist){
fun <- get(paste0("q", dist))
return(fun)
}
######################################################################
######## RETURNS THE DISTRIBUTION FUNCTION OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
##### implemented in R
##### value: the chosen distribution function
######################################################################
p <- function(dist){
fun <- get(paste0("p", dist))
return(fun)
}
######################################################################
######## GENERATES RANDOM NUMBERS OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
##### implemented in R
##### value: the chosen r function
######################################################################
rand <- function(dist){
fun <- get(paste0("r", dist))
return(fun)
}
############################################################################
######## BETA OR R FUNCTION (COST-MINIMISING SLOPE)
##### arguments: rho=disease prevalence
##### costs=cost matrix
##### value: cost-minimising slope
############################################################################
slope <- function(rho, costs=matrix(c(0,0,1,(1-rho)/rho), 2, 2, byrow=TRUE)){
c.t.pos <- costs[1, 1]
c.t.neg <- costs[1, 2]
c.f.pos <- costs[2, 1]
c.f.neg <- costs[2, 2]
beta <- ((1 - rho)/rho) * ((c.t.neg - c.f.pos)/(c.t.pos - c.f.neg))
return(beta)
}
############################################################################
######## CHECKING POSITIVE SQUARE ROOT
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: discriminant of the expression for the optimum threshold
############################################################################
sqroot <- function(k1, k2, rho, costs){
ctrl <- (mean(k2) - mean(k1))^2 + 2*log((sd(k2)/sd(k1))*slope(rho,costs))*(var(k2) - var(k1))
return(ctrl)
}
############################################################################
##### THRESHOLD DEPENDING ON THE UNDERLYING DISTRIBUTION OF THE POPULATIONS
##### COST-MINIMISING THRESHOLD ONE-VARIABLE EQUATION
##### DENSITY RATIO FORMULA
##### arguments: p=vector containing the interval extremes at which the 'uniroot' function will look for
##### the one-variable equation solution
##### dist1=choose the healthy distribution
##### dist2=choose the diseased distribution
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
##### value: the threshold one-variable equation evaluated at p
############################################################################
DensRatio2 <- function(p, dist1, dist2, par1.1, par1.2, par2.1, par2.2, rho, costs) {
ratio <- (dens(dist2)(p,par2.1,par2.2)/dens(dist1)(p,par1.1,par1.2)) - slope(rho,costs)
return(ratio)
}
############################################################################
##### "uniroot" function looks for the one-variable equation solution
##### arguments: q1=probability of the "left" distribution in order to determine a "low" quantile.
##### q2=probability of the "right" distribution in order to determine a "high" quantile.
##### dist1, dist2=choose the healthy and the diseased distributions, respectively,
##### between the following 2-parameter distributions: "beta", "cauchy",
##### "chisq" (chi-squared), "gamma", "lnorm" (lognormal), "logis" (logistic), "norm" (normal)
##### and "weibull".
##### par1.1=first parameter of the distribution chosen for the healthy population
##### par1.2=second parameter of the distribution chosen for the healthy population
##### par2.1=first parameter of the distribution chosen for the diseased population
##### par2.2=second parameter of the distribution chosen for the diseased population
##### rho=disease prevalence
##### costs=cost matrix
##### tol=tolerance to be used in uniroot. Default, 10^(-8).
##### value: an object of class "thresTH2" containing the threshold one-variable equation solution,
##### the prevalence, the cost matrix and the slope.
############################################################################
thresTH2 <- function(dist1, dist2, par1.1, par1.2, par2.1, par2.2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), R=NULL, q1=0.05, q2=0.95, tol=10^(-8)){
# error handling
if (!(rho > 0 & rho < 1)){
stop("The disease prevalence rho must be a number in (0,1)")
}
# if (!is.matrix(costs)){
# stop("'costs' must be a matrix")
# }
# if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
# stop("'costs' must be a 2x2 matrix")
# }
# costs or R
if (is.null(costs) & is.null(R)){
stop("Both 'costs' and 'R' are NULL. Please specify one of them.")
}else if (!is.null(costs) & !is.null(R)){
stop("Either 'costs' or 'R' must be NULL")
}else if (is.null(costs) & !is.null(R)){
if (!is.numeric(R) | length(R)!=1){
stop("R must be a single number")
}
costs <- matrix(c(0, 0, 1, (1-rho)/(R*rho)), 2, 2, byrow=TRUE)
}else if (!is.null(costs) & is.null(R)){
if (!is.matrix(costs)){
stop("'costs' must be a matrix")
}
if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
stop("'costs' must be a 2x2 matrix")
}
}
# function
costs.origin <- costs
rho.origin <- rho
median1 <- quant(dist1)(0.5, par1.1, par1.2)
median2 <- quant(dist2)(0.5, par2.1, par2.2)
if(median1 > median2){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- par2.1; par2.1 <- par1.1; par1.1 <- g
f <- par2.2; par2.2 <- par1.2; par1.2 <- f
auxdist <- dist2; dist2 <- dist1; dist1 <- auxdist
}
p1 <- quant(dist1)(q1, par1.1, par1.2)
p2 <- quant(dist2)(q2, par2.1, par2.2)
# threshold estimate
cut.t <- uniroot(DensRatio2,c(p1,p2),tol=tol,dist1,dist2,par1.1,par1.2,par2.1,par2.2,rho,costs,extendInt="yes")$root
# add slope
beta <- slope(rho, costs)
# check interval for R
if (dist1=="norm" & dist2=="norm" & par1.2!=par2.2){
LL <- par1.2/par2.2*exp(-(par2.1-par1.1)^2/(2*par2.2^2))
UL <- par1.2/par2.2*exp((par2.1-par1.1)^2/(2*par1.2^2))
if (!(LL<=beta & beta<=UL)) warning("The choice of costs/R leads to a cut-off that is not between the means of the two distributions")
}
# results
re <- list(thres=cut.t, prev=rho.origin, costs=costs.origin, R=beta, method="theoretical")
# return
class(re) <- "thresTH2"
return(re)
}
############################################################################
# Print function for class "thresTH2"
############################################################################
print.thresTH2 <- function(x, ...){
cat("\nThreshold:", x$thres)
cat("\n")
cat("\nParameters used")
cat("\n Disease prevalence:", x$prev)
cat("\n Costs (Ctp, Cfp, Ctn, Cfn):", x$costs)
cat("\n R:", x$R)
cat("\n")
}
##########################################################################
##### Second derivative of the cost function (2 states): just a check
##### arguments: x = thres2 object
##### value: the second derivative function
##########################################################################
secondDer2 <- function(x){
# error handling
if (!inherits(x, "thres2")){
stop("'x' must be of class 'thres2'")
}
# function
if (x$T$method=="empirical"){
stop("'x' has been computed with method='empirical', cannot compute the second derivative of the cost function")
}else if (x$T$method=="equal" | x$T$method=="unequal"){
k1 <- x$T$k1
k2 <- x$T$k2
rho <- x$T$prev
costs <- x$T$costs
Thr <- x$T$thres
par1.1 <- mean(k1)
par2.1 <- mean(k2)
par1.2 <- sd(k1)
par2.2 <- sd(k2)
n1 <- length(k1)
n2 <- length(k2)
beta <- slope(rho,costs)
de <- (n1+n2)*rho*(costs[1,1]-costs[2,2])
der <- de/sqrt(2*pi)*(((Thr-par2.1)/par2.2^3)*exp(-(Thr-par2.1)^2/(2*par2.2^2))-beta*((Thr-par1.1)/par1.2^3)*exp(-(Thr-par1.1)^2/(2*par1.2^2)))
}else if (x$T$method=="parametric"){
dist1 <- x$T$dist1
dist2 <- x$T$dist2
pars1 <- x$T$pars1
pars2 <- x$T$pars2
densratio <- function(y){
DR <- dens(dist2)(y, pars2[1], pars2[2])/dens(dist1)(y, pars1[1], pars1[2])-x$T$R
return(DR)
}
der <- grad(densratio, x$T$thres)
}
return(der)
}
###########################################################################
############ ASSUMING EQUAL VARIANCES
##########################################################################
############ FUNCTION OF POOLED VARIANCE
##########################################################################
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### value: the pooled variance of the two samples
##########################################################################
varPooled <- function(k1, k2){
n1 <- length(k1)
n2 <- length(k2)
pool <- ((n1 - 1)*var(k1) + (n2 - 1)*var(k2))/(n1 + n2 - 2)
return(pool)
}
##########################################################################
############ 1) FUNCTION OF THRESHOLD - JUND (EQUAL VARIANCES)
##########################################################################
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: the threshold estimated by the two samples considering the population variances equal
##########################################################################
thresEq2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE)){
costs.origin <- costs
k1.origin <- k1
k2.origin <- k2
rho.origin <- rho
if(mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
beta <- slope(rho, costs)
# threshold estimate
cut <- (2*varPooled(k1,k2)*log(beta) - (mean(k1)^2 - mean(k2)^2))/(2*(mean(k2) - mean(k1)))
# returning results
re <- list(thres=cut, prev=rho.origin, costs=costs.origin, R=beta, method="equal", k1=k1.origin, k2=k2.origin)
return(re)
}
##########################################################################
########### 2) FUNCTION OF THRESHOLD - JUND (UNEQUAL VARIANCES)
##########################################################################
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: the threshold estimated by the two samples considering the population variances unequal
##########################################################################
thresUn2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE)){
costs.origin <- costs
k1.origin <- k1
k2.origin <- k2
rho.origin <- rho
if(mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
ctrl <- sqroot(k1, k2, rho, costs)
if (ctrl>=0){
# threshold estimate
cut <- (var(k2)*mean(k1) - var(k1)*mean(k2) + sd(k1)*sd(k2)*sqrt(ctrl))/(var(k2) - var(k1))}
else{
cut <- NA
warning("Negative discriminant; cannot solve the second-degree equation")
}
beta <- slope(rho, costs)
# check interval for R
LL <- sd(k1)/sd(k2)*exp(-(mean(k2)-mean(k1))^2/(2*sd(k2)^2))
UL <- sd(k1)/sd(k2)*exp((mean(k2)-mean(k1))^2/(2*sd(k1)^2))
if (!(LL<=beta & beta<=UL)) warning("The choice of costs/R leads to a cut-off that is not between the means of the two distributions")
# returning results
re <- list(thres=cut, prev=rho.origin, costs=costs.origin, R=beta, method="unequal", k1=k1.origin, k2=k2.origin)
return(re)
}
##########################################################################
######### EMPIRICAL METHOD
##########################################################################
######### 3) COST-MINIMISING THRESHOLD (EMPIRICAL) FUNCTION
##########################################################################
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### extra.info=should extra information be added to the output? Default, FALSE
##### value: the threshold estimated by the two samples through the empirical estimator
##########################################################################
thresEmp2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), extra.info=FALSE){
k1.origin <- k1
k2.origin <- k2
rho.origin <- rho
costs.origin <- costs
if (mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
# cost matrix
c.t.pos <- costs[1,1]
c.t.neg <- costs[1,2]
c.f.pos <- costs[2,1]
c.f.neg <- costs[2,2]
# empirical estimation
n1 <- length(k1)
n2 <- length(k2)
n <- n1+n2
v <- c(k1, k2)
ind.origin <- c(rep(0, n1), rep(1, n2))
vi <- cbind(v, ind.origin)
ord.v <- vi[order(vi[, 1], vi[, 2]), ]
sens <- rep(NA, n)
spec <- rep(NA, n)
# sensitivity and specificity for each observation
for (j in 1:n){
cut <- ord.v[j, 1]
sens[j] <- sum(ord.v[, 1]>=cut & ord.v[, 2]==1)/n2
spec[j] <- sum(ord.v[, 1]<cut & ord.v[, 2]==0)/n1
}
# cost for each observation
D <- n*rho*(c.t.pos-c.f.neg)
R <- ((1-rho)/rho)*((c.t.neg-c.f.pos)/(c.t.pos-c.f.neg))
G <- (rho*c.f.neg+(1-rho)*c.f.pos)/(rho*(c.t.pos-c.f.neg))
cost.non.par <- D*(sens+spec*R+G)
# together
total <- data.frame(ord.v, cost.non.par, sens, spec)
# minimum cost
ind.min.cost <- which(total[, "cost.non.par"]==min(total[, "cost.non.par"]))
sens.min <- total[ind.min.cost, "sens"]
spec.min <- total[ind.min.cost, "spec"]
cut.min <- total[ind.min.cost, "v"]
cost.min <- total[ind.min.cost, "cost.non.par"]
# if there are two observations that lead to the same cost...
howmany <- length(cut.min)
if (howmany>1){
interval <- subset(total, v >= cut.min[1] & v <= cut.min[length(cut.min)])
cut.min <- mean(interval$v)
# sens, spec
sens.min <- sum(ord.v[, 1]>=cut.min & ord.v[, 2]==1)/n2
spec.min <- sum(ord.v[, 1]<cut.min & ord.v[, 2]==0)/n1
# cost
cost.min <- D*(sens.min+spec.min*R+G)
warning(paste(howmany, "observations lead to the minimum cost. The mean of the values between them is returned as threshold."), call.=FALSE)
}
# slope
beta <- slope(rho, costs)
# results
re <- list(thres=cut.min, prev=rho.origin, costs=costs.origin, R=beta, method="empirical", k1=k1.origin, k2=k2.origin, sens=sens.min, spec=spec.min, cost=cost.min)
# for plotCostROC if extra.info is TRUE
if (extra.info){
re$tot.thres <- total[, "v"]
re$tot.cost <- total[, "cost.non.par"]
re$tot.spec <- spec
re$tot.sens <- sens
}
return(re)
}
##########################################################################
######### SMOOTH METHOD
##########################################################################
##########################################################################
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: the threshold estimated by the two samples through the smooth estimator
##########################################################################
thresSmooth2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), extra.info=FALSE){
k1.origin <- k1
k2.origin <- k2
rho.origin <- rho
costs.origin <- costs
if (mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
# cost matrix
c.t.pos <- costs[1,1]
c.t.neg <- costs[1,2]
c.f.pos <- costs[2,1]
c.f.neg <- costs[2,2]
# smooth estimation
dk1 <- kde(k1)
dk2 <- kde(k2)
lmin <- mean(k1)
lmax <- mean(k2)
beta <- slope(rho, costs)
fx <- function(x) predict(dk2, x=x)-beta*predict(dk1, x=x)
out <- uniroot(fx, c(lmin, lmax))
# results
re <- list(thres=out$root, prev=rho.origin, costs=costs.origin, R=beta, method="smooth", k1=k1.origin, k2=k2.origin)
return(re)
}
#############################################################
## Print function for class "thres2"
#############################################################
print.thres2 <- function(x, ...){
if (x$T$method == "parametric"){
cat("\nEstimate:")
cat("\n Threshold: ", x$T$thres)
if (!is.null(x$CI)){
cat("\n")
cat("\nConfidence intervals (parametric bootstrap):")
cat("\n CI based on normal distribution:", x$CI$low.norm, " - ", x$CI$up.norm)
cat("\n CI based on percentiles:", x$CI$low.perc, " - ", x$CI$up.perc)
cat("\n Bootstrap resamples:", x$CI$B)
}
cat("\n")
cat("\nParameters used:")
cat("\n Disease prevalence:", x$T$prev)
cat("\n Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
cat("\n R:", x$T$R)
if (!is.null(x$CI)){
cat("\n Significance Level: ", x$CI$alpha)
}
cat("\n Method:", x$T$method)
cat("\n Distribution assumed for the healthy sample: ", x$T$dist1, "(", round(x$T$pars1[1], 2), ", ", round(x$T$pars1[2], 2), ")", sep="")
cat("\n Distribution assumed for the diseased sample: ", x$T$dist2, "(", round(x$T$pars2[1], 2), ", ", round(x$T$pars2[2], 2), ")", sep="")
cat("\n")
}
if (x$T$method == "empirical"){
cat("\nEstimate:")
cat("\n Threshold: ", x$T$thres)
cat("\n Minimum Cost: ", x$T$cost)
if (!is.null(x$CI)){
cat("\n")
cat("\nConfidence intervals (bootstrap):")
cat("\n CI based on normal distribution:", x$CI$low.norm, " - ", x$CI$up.norm)
cat("\n CI based on percentiles:", x$CI$low.perc, " - ", x$CI$up.perc)
cat("\n Bootstrap resamples:", x$CI$B)
}
cat("\n")
cat("\nParameters used:")
cat("\n Disease prevalence:", x$T$prev)
cat("\n Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
cat("\n R:", x$T$R)
cat("\n Method:", x$T$method)
if (!is.null(x$CI)){
cat("\n Significance Level:", x$CI$alpha)
}
cat("\n")
}
if (x$T$method == "smooth"){
cat("\nEstimate:")
cat("\n Threshold: ", x$T$thres)
if (!is.null(x$CI)){
cat("\n")
cat("\nConfidence intervals (bootstrap):")
cat("\n CI based on normal distribution:", x$CI$low.norm, " - ", x$CI$up.norm)
cat("\n CI based on percentiles:", x$CI$low.perc, " - ", x$CI$up.perc)
cat("\n Bootstrap resamples:", x$CI$B)
}
cat("\n")
cat("\nParameters used:")
cat("\n Disease prevalence:", x$T$prev)
cat("\n Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
cat("\n R:", x$T$R)
cat("\n Method:", x$T$method)
if (!is.null(x$CI)){
cat("\n Significance Level:", x$CI$alpha)
}
cat("\n")
}
if (x$T$method == "equal" | x$T$method == "unequal"){
cat("\nEstimate:")
cat("\n Threshold: ", x$T$thres)
cat("\n")
if (!is.null(x$CI)){
if(x$CI$ci.method == "delta"){
cat("\nConfidence interval (delta method):")
cat("\n Lower Limit:", x$CI$lower)
cat("\n Upper Limit:", x$CI$upper)
cat("\n")
}
if(x$CI$ci.method == "boot"){
cat("\nConfidence intervals (bootstrap):")
cat("\n CI based on normal distribution: ", x$CI$low.norm, " - ", x$CI$up.norm)
cat("\n CI based on percentiles: ", x$CI$low.perc, " - ", x$CI$up.perc)
cat("\n Bootstrap resamples: ", x$CI$B)
cat("\n")
}
}
cat("\nParameters used:")
cat("\n Disease prevalence:", x$T$prev)
cat("\n Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
cat("\n R:", x$T$R)
cat("\n Method:", x$T$method)
if (!is.null(x$CI)){
cat("\n Significance Level: ", x$CI$alpha)
}
cat("\n")
}
}
##########################################################################
######### GETTING PARAMETERS OF A DISTRIBUTION THROUGH fitdistr() [library MASS]
##########################################################################
getParams <- function(k, dist){
if (dist %in% c("cauchy", "gamma", "weibull")){
pars <- fitdistr(k, dist)$estimate
}else if(dist=="beta"){
# needs initial values
sigma2 <- var(k)
mu <- mean(k)
shape1.start <- ((1-mu)/sigma2-1/mu)*mu^2
shape2.start <- shape1.start*(1/mu-1)
pars <- fitdistr(k, "beta", start=list(shape1=shape1.start, shape2=shape2.start))$estimate
}else if(dist=="chisq"){
# needs initial values
sigma2 <- var(k)
mu <- mean(k)
ncp.start <- sigma2/2-mu
df.start <- mu-ncp.start
pars <- fitdistr(k, "chi-squared", start=list(df=df.start, ncp=ncp.start))$estimate
}else if(dist=="lnorm"){
pars <- fitdistr(k, "lognormal")$estimate
}else if(dist=="logis"){
pars <- fitdistr(k, "logistic")$estimate
}else if(dist=="norm"){
pars <- fitdistr(k, "normal")$estimate
}
out <- pars
return(out)
}
##########################################################################
######### THRESHOLD COMPUTATION
##########################################################################
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### method=method used in the estimation. The user can choose between:
##### "equal": assumes binormality and equal variances
##### "unequal": assumes binormality and unequal variances
##### "empirical": leaves out any distributional assumption
##### "parametric": based on the distribution assumed for the two populations.
##### The user must specify these distributions through parameters 'dist1'
##### (for the healthy population) and 'dist2' (for the healthy population).
##### They can be chosen between the following 2-parameter distributions:
##### "beta", "cauchy", "chisq" (chi-squared), "gamma", "lnorm" (lognormal),
##### "logis" (logistic), "norm" (normal) and "weibull".
##### Its parameters are estimated from the samples k1 and k2. Uses 'thresTH2()'.
##### dist1, dist2: distribution to be assumed for healthy and disease populations,
##### respectively. Ignored when method!="parametric".
##### Default, "equal". The user can specify just the initial letters.
##### ci=compute confidence interval? default, TRUE.
##### ci.method=method to be used for the confidence intervals computation. The user can
##### choose between:
##### "delta": delta method is used to estimate the threshold standard error
##### assuming a binormal underlying model.
##### "boot": the confidence interval is computed by bootstrap.
##### Default, "delta". The user can specify just just the initial letters.
##### B=number of bootstrap resamples when ci.method="boot". Otherwise, ignored.
##### Default, 1000.
##### alpha=significance level for the confidence interval. Default, 0.05.
##### extra.info=when using method="empirical", if set to T it returns
##### extra information about the computation of the threshold. Ignored when method!="empirical".
##### na.rm=a logical value indicating whether NA values in k1 and k2 should
##### be stripped before the computation proceeds. Default, FALSE.
##### value: the threshold estimated
##########################################################################
thres2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), R=NULL, method=c("equal", "unequal", "empirical", "smooth", "parametric"), dist1=NULL, dist2=NULL, ci=TRUE, ci.method=c("delta", "boot"), B=1000, alpha=0.05, extra.info=FALSE, na.rm=FALSE, q1=0.05, q2=0.95){
# error handling
if (!(rho > 0 & rho < 1)){
stop("The disease prevalence 'rho' must be a number in (0,1)")
}
# if (!is.matrix(costs)){
# stop("'costs' must be a matrix")
# }
# if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
# stop("'costs' must be a 2x2 matrix")
# }
if (!is.numeric(k1) | !is.numeric(k2)){
stop("'k1' and 'k2' must be numeric vectors")
}
if (!is.logical(ci) | is.na(ci)){
stop("'ci' must be TRUE or FALSE")
}
# costs or R
if (is.null(costs) & is.null(R)){
stop("Both 'costs' and 'R' are NULL. Please specify one of them.")
}else if (!is.null(costs) & !is.null(R)){
stop("Either 'costs' or 'R' must be NULL")
}else if (is.null(costs) & !is.null(R)){
if (!is.numeric(R) | length(R)!=1){
stop("R must be a single number")
}
costs <- matrix(c(0, 0, 1, (1-rho)/(R*rho)), 2, 2, byrow=TRUE)
}else if (!is.null(costs) & is.null(R)){
if (!is.matrix(costs)){
stop("'costs' must be a matrix")
}
if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
stop("'costs' must be a 2x2 matrix")
}
}
# NAs handling
if (na.rm){
k1 <- k1[!is.na(k1)]
k2 <- k2[!is.na(k2)]
}
# function
method <- match.arg(method)
ci.method <- match.arg(ci.method)
if (method=="equal"){
T <- thresEq2(k1, k2, rho, costs)
if (ci){
if (ci.method=="delta"){
CI <- icDeltaEq2(k1, k2, rho, costs, T$thres, a=alpha)
}
if (ci.method=="boot"){
CI <- icBootEq2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
}
}else{
CI <- NULL
}
}
if (method=="unequal"){
T <- thresUn2(k1, k2, rho, costs)
if (ci){
if (ci.method=="delta"){
CI <- icDeltaUn2(k1, k2, rho, costs, T$thres, a=alpha)
}
if (ci.method=="boot"){
CI <- icBootUn2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
}
}else{
CI <- NULL
}
}
if (method=="empirical"){
if (ci.method=="delta" & ci){
stop("When method='empirical', CIs cannot be computed based on delta method (choose ci.method='boot')")
}
T <- thresEmp2(k1, k2, rho, costs, extra.info)
if (ci){
CI <- icEmp2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
}else{
CI <- NULL
}
}
if (method=="smooth"){
if (ci.method=="delta" & ci){
stop("When method='smooth', CIs cannot be computed based on delta method (choose ci.method='boot')")
}
T <- thresSmooth2(k1, k2, rho, costs)
if (ci){
CI <- icSmooth2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
}else{
CI <- NULL
}
}
if (method=="parametric"){
# error handling
if (ci.method=="delta" & ci){
stop("When method='parametric', CIs cannot be computed based on delta method (choose ci.method='boot')")
}
if (is.null(dist1) | is.null(dist2)){
stop("When method='parametric', 'dist1' and 'dist2' must be specified")
}
if (dist1=="norm" & dist2=="norm"){
stop("When assuming a binormal distribution, choose method='equal' or 'unequal'")
}
if (!(dist1 %in% c("beta", "cauchy", "chisq", "gamma", "lnorm", "logis", "nbinom", "norm", "weibull"))){
stop("Unsupported distribution for 'dist1'")
}
if (!(dist2 %in% c("beta", "cauchy", "chisq", "gamma", "lnorm", "logis", "nbinom", "norm", "weibull"))){
stop("Unsupported distribution for 'dist2'")
}
# parameter estimation through 'fitdistr()'
# dist1
pars1 <- getParams(k1, dist1)
# dist2
pars2 <- getParams(k2, dist2)
# threshold+CI estimation
T <- thresTH2(dist1, dist2, pars1[1], pars1[2], pars2[1], pars2[2], rho, costs, q1=q1, q2=q2)
T <- unclass(T)
T$method <- "parametric"
# adding some information to the output
T$k1 <- k1
T$k2 <- k2
T$dist1 <- dist1
T$dist2 <- dist2
T$pars1 <- pars1
T$pars2 <- pars2
# confidence interval by parametric bootstrap
if (ci){
CI <- icBootTH(dist1, dist2, pars1[1], pars1[2], pars2[1], pars2[2], length(k1), length(k2), rho, costs, T$thres, B=B, a=alpha)
}else{
CI <- NULL
}
}
out <- list(T=T, CI=CI)
class(out) <- "thres2"
return(out)
}
##########################################################################
############ VARIANCE ESTIMATORS
############ EQUAL VARIANCES
##########################################################################
############ FUNCTION OF ML VARIANCE OF VARIANCE ESTIMATOR
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### value: Maximum-Likelihood variance of variance estimator (equal variances assumed)
##########################################################################
varVarEq <- function(k1, k2){
n1 <- length(k1)
n2 <- length(k2)
est <- 2*(varPooled(k1,k2))^2/(n1 + n2 - 1)
return(est)
}
##########################################################################
############ FUNCTION OF Maximum Likelihood VARIANCE OF MEAN ESTIMATOR (DIS+NON.DIS)
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### t=size of the sample whose mean estimator variance we wish to estimate
##### value: Maximum-Likelihood variance of mean estimator (equal variances assumed)
##########################################################################
varMeanEq <- function(k1, k2, t){
est <- varPooled(k1, k2)/t
return(est)
}
##########################################################################
##########################################################################
############ PARTIAL DERIVATIVES... (Skaltsa et al 2010)
##########################################################################
############ ...OF COMMON VARIANCE
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased or healthy population
##### variance
##########################################################################
derVarEq <- function(k1, k2, rho, costs){
est <- log(slope(rho,costs))/(mean(k2) - mean(k1))
return(est)
}
##########################################################################
############ ...OF DISEASED MEAN
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population mean
##########################################################################
derMeanDisEq <- function(k1, k2, rho, costs){
est <- 1/2 - (varPooled(k1,k2)*log(slope(rho,costs)))/((mean(k2) - mean(k1))^2)
return(est)
}
###################################################################################
############ ...OF NON-DISEASED MEAN
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population variance
##################################################################################
derMeanNDisEq <- function(k1, k2, rho, costs){
est <- 1/2 + (varPooled(k1,k2)*log(slope(rho,costs)))/((mean(k2) - mean(k1))^2)
return(est)
}
##################################################################################
############ 1) FUNCTION OF VARIANCE OF THRESHOLD (EQUAL VARIANCES)
############ DELTA METHOD
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=diseased sample size
##### costs=cost matrix
##### value: delta method threshold variance (assuming equal variances)
##################################################################################
varDeltaEq2 <- function(k1, k2, rho, costs){
if(mean(k1) > mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
# n's
n1 <- length(k1)
n2 <- length(k2)
# matrices
d <- matrix(c(derMeanNDisEq(k1, k2, rho, costs), derMeanDisEq(k1, k2, rho, costs), derVarEq(k1, k2, rho, costs)), byrow=T, ncol=3, nrow=1)
sigma <- matrix(c(varMeanEq(k1, k2, n2), 0, 0, 0, varMeanEq(k1, k2, n1), 0, 0, 0, varVarEq(k1, k2)), byrow=T, ncol=3, nrow=3)
# Var(T)=d*sigma*d^T, Skaltsa et al 2010
est <- d%*%sigma%*%t(d)
return(est)
}
##################################################################################
############ CONFIDENCE INTERVAL FUNCTION 1 - EQUAL VARIANCES
############ DELTA METHOD-NORMAL APPROXIMATION
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=diseased sample size
##### costs=cost matrix
##### Thres=threshold point estimate
##### a=significance level
##### value: confidence interval limits
##################################################################################
icDeltaEq2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, a=0.05){
stdev <- sqrt(varDeltaEq2(k1, k2, rho, costs))
ic1 <- Thres + qnorm(a/2)*stdev
ic2 <- Thres + qnorm(1-a/2)*stdev
ic <- list(lower=ic1, upper=ic2, se=stdev, alpha=a, ci.method="delta")
return(ic)
}
##################################################################################
############ VARIANCES OF ESTIMATORS
############ UNEQUAL VARIANCES
##################################################################################
############ FUNCTION OF ML VARIANCE OF VARIANCE ESTIMATOR
##### arguments: k=vector containing the sample values
##### t=sample size
##### value: Maximum-Likelihood variance of variance estimator (unequal variances assumed)
##################################################################################
varVarUn <- function(k, t){
est <- 2*(var(k))^2/(t - 1)
return(est)
}
##################################################################################
############ FUNCTION OF ML VARIANCE OF MEAN ESTIMATOR (DIS+NON.DIS)
##### arguments: k=vector containing the sample values
##### t=sample size
##### value: Maximum-Likelihood variance of mean estimator (unequal variances assumed)
##################################################################################
varMeanUn <- function(k, t){
est <- var(k)/t
return(est)
}
#################################################################################
############ PARTIAL DERIVATIVES...
#################################################################################
############ ...OF DISEASED MEAN
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=diseased sample size
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population mean
##### (assuming unequal variances)
#################################################################################
derMeanDisUn <- function(k1, k2, rho, costs){
est <- (sd(k2)*sd(k1)*(mean(k2) - mean(k1))/sqrt((mean(k2) - mean(k1))^2+2*(var(k2) - var(k1))*log(sd(k2)*slope(rho,costs)/sd(k1))) - var(k1))/(var(k2) - var(k1))
return(est)
}
#################################################################################
############ ...OF NON-DISEASED MEAN
##### arguments: k1=vector containing the healthy sample values
##### k2= vector containing the diseased sample values
##### rho=diseased sample size
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the healthy population mean
##### (assuming unequal variances)
#################################################################################
derMeanNDisUn <- function(k1, k2, rho, costs){
est <- (var(k2) - sd(k2)*sd(k1)*(mean(k2) - mean(k1))/sqrt((mean(k2) - mean(k1))^2+2*(var(k2) - var(k1))*log(sd(k2)*slope(rho,costs)/sd(k1))))/(var(k2) - var(k1))
return(est)
}
#################################################################################
############ ...OF DISEASED VARIANCE
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population variance
##### (assuming unequal variances)
#################################################################################
derVarDisUn <- function(k1, k2, rho, costs){
beta <- slope(rho,costs)
est <- (sd(k1)*sd(k2)*((var(k2) - var(k1))/var(k2) + 2*log(beta*sd(k2)/sd(k1)))/(2*sqrt(2*log(beta*sd(k2)/sd(k1))*(var(k2) - var(k1))+(mean(k2) - mean(k1))^2)) + sd(k1)*sqrt(2*log(beta*sd(k2)/sd(k1))*(var(k2) - var(k1)) + (mean(k2) - mean(k1))^2)/(2*sd(k2)) + mean(k1)) /(var(k2) - var(k1)) - (sd(k1)*sd(k2)*sqrt(2*log(beta*sd(k2)/sd(k1))*(var(k2) - var(k1))+(mean(k2)-mean(k1))^2)+mean(k1)*var(k2)-mean(k2)*var(k1))/(var(k2)-var(k1))^2
return(est)
}
#################################################################################
############ ...OF NON-DISEASED VARIANCE
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: threshold estimator partial derivative with respect to the healthy population variance
##### (assuming unequal variances)
#################################################################################
derVarNDisUn <- function(k1, k2, rho, costs){
beta <- slope(rho, costs)
est <- (-mean(k2)*var(k1)+sd(k2)*sqrt(2*log(sd(k2)*beta/sd(k1))*(var(k2)-var(k1))+(mean(k2)-mean(k1))^2)*sd(k1)+mean(k1)*var(k2)) /(var(k2)-var(k1))^2 +(sd(k2)*(-(var(k2)-var(k1))/var(k1)-2*log(sd(k2)*beta/sd(k1)))*sd(k1) /(2*sqrt(2*log(sd(k2)*beta/sd(k1))*(var(k2)-var(k1))+(mean(k2)-mean(k1))^2)) +sd(k2)*sqrt(2*log(sd(k2)*beta/sd(k1))*(var(k2)-var(k1))+(mean(k2)-mean(k1))^2)/(2*sd(k1))-mean(k2)) /(var(k2)-var(k1))
return(est)
}
#################################################################################
############ 2) FUNCTION OF VARIANCE OF THRESHOLD (UNEQUAL VARIANCES)
############ DELTA METHOD
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=disease prevalence
##### costs=cost matrix
##### value: delta method threshold variance (assuming unequal variances)
#################################################################################
varDeltaUn2 <- function(k1, k2, rho, costs){
if(mean(k1) > mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
# n's
n1 <- length(k1)
n2 <- length(k2)
ctrl <- sqroot(k1,k2,rho,costs)
if(ctrl < 0){
est <- NA
warning("Negative discriminant; cannot solve the second-degree equation")
}else{
# matrices
d <- matrix(c(derMeanNDisUn(k1, k2, rho, costs), derMeanDisUn(k1, k2, rho, costs), derVarNDisUn(k1, k2, rho, costs), derVarDisUn(k1, k2, rho, costs)), byrow=T, ncol=4, nrow=1)
sigma <- matrix(c(varMeanUn(k1, n1), 0, 0, 0, 0, varMeanUn(k2, n2), 0, 0, 0, 0, varVarUn(k1, n1), 0, 0, 0, 0, varVarUn(k2, n2)), byrow=T, ncol=4, nrow=4)
# Var(T) following Skaltsa et al 2010
est <- d%*%sigma%*%t(d)
}
return(est)
}
#################################################################################
############ FUNCTION OF CONFIDENCE INTERVAL 1 - UNEQUAL
############ DELTA METHOD-NORMAL APPROXIMATION
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### rho=diseased sample size
##### costs=cost matrix
##### a=significance level
##### value: confidence interval limits
##################################################################################
icDeltaUn2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, a=0.05){
stdev <- sqrt(varDeltaUn2(k1, k2, rho, costs))
ic1 <- Thres + qnorm(a/2)*stdev
ic2 <- Thres + qnorm(1-a/2)*stdev
ic <- list(lower=ic1, upper=ic2, se=stdev, alpha=a, ci.method="delta")
return(ic)
}
################################################################################
############ DATA GENERATION
############ NON-PARAMETRIC RESAMPLING
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### B=number of bootstrap resamples
##### value: two-object list [[1]]: healthy resample matrix, [[2]]: diseased resample matrix
################################################################################
resample2 <- function(k1, k2, B){
n1 <- length(k1)
n2 <- length(k2)
t0 <- matrix(sample(k1, n1*B, replace=TRUE), nrow=n1) # B resamples of k1 with replacement
t1 <- matrix(sample(k2, n2*B, replace=TRUE), nrow=n2) # B resamples of k2 with replacement
t <- list(t0, t1)
return(t)
}
###################################################################################
###################################################################################
############ CONFIDENCE INTERVALS
###################################################################################
############ BOOTSTRAP
###################################################################################
###################################################################################
############### FUNCTION OF BOOTSTRAP CONFIDENCE INTERVALS
############### EQUAL VARIANCES
########## 1) BOOTSTRAP VERSION OF S.E.- NORMAL
########## 2) PERCENTILE BIAS-CORRECTED
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### B=number of bootstrap resamples
##### a=significance level
##### rho=disease prevalence
##### costs=cost matrix
##### returns: the 2 interval limits
##################################################################################
icBootEq2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, B, a=0.05){
if(mean(k1) > mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
# resamples
t <- resample2(k1, k2, B)
t0 <- t[[1]]
t1 <- t[[2]]
# bootstrap
cut <- sapply(1:B,function(i){thresEq2(t0[,i],t1[,i],rho,costs)[[1]]})
est.se <- sd(cut)
###### 1) NORMAL-BOOTSTRAP SE
norm.bootSE <- c(Thres + qnorm(a/2)*est.se, Thres + qnorm(1-a/2)*est.se)
###### 2) PERCENTILE
percentil <- (c(quantile(cut,a/2), quantile(cut,1-a/2)))
# results
re <- list(low.norm=norm.bootSE[1], up.norm=norm.bootSE[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
return(re)
}
###################################################################################
###################################################################################
############### BOOTSTRAP CONFIDENCE INTERVAL FUNCTION
############### UNEQUAL VARIANCES
########## 1) BOOTSTRAP VERSION OF S.E.- NORMAL
########## 2) PERCENTILE
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### B=number of bootstrap resamples
##### a=significance level
##### rho=disease prevalence
##### costs=cost matrix
##### value: the 2 interval limits
##################################################################################
icBootUn2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, B, a=0.05){
if(mean(k1) > mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
t <- resample2(k1, k2, B)
t0 <- t[[1]]
t1 <- t[[2]]
cut <- sapply(1:B,function(i){thresUn2(t0[,i],t1[,i],rho,costs)[[1]]})
est.se <- sd(na.omit(cut))
###### 1) NORMAL-BOOTSTRAP SE
norm.bootSE <- c(Thres + qnorm(a/2)*est.se, Thres + qnorm(1-a/2)*est.se)
###### 2) PERCENTILE
percentil <- (c(quantile(na.omit(cut),a/2), quantile(na.omit(cut),1-a/2)))
# results
re <- list(low.norm=norm.bootSE[1], up.norm=norm.bootSE[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
return(re)
}
##################################################################################
##################################################################################
################ BOOTSTRAP CONFIDENCE INTERVAL
################ EMPIRICAL
##################################################################################
############ 1) BOOTSTRAP VERSION OF S.E.- NORMAL
############ 2) BOOTSTRAP PERCENTILE
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### B=number of bootstrap resamples
##### a=significance level
##### rho=disease prevalence
##### costs=cost matrix
##### value: the 2 interval limits
##################################################################################
icEmp2 <- function(k1,k2,rho,costs=matrix(c(0,0,1,(1-rho)/rho),2,2, byrow=TRUE),Thres,B=500,a=0.05){
if(mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
t <- resample2(k1, k2, B)
t0 <- t[[1]]
t1 <- t[[2]]
cut <- rep(NA,B)
cut <- suppressWarnings(sapply(1:B,function(j){thresEmp2(t0[,j],t1[,j],rho,costs)[[1]]}))
est.se <- sd(cut)
###### 1) NORMAL-BOOTSTRAP SE
norm <- c(Thres+qnorm(a/2)*est.se, Thres+qnorm(1-a/2)*est.se)
###### 2) PERCENTILE
percentil <- (c(quantile(cut,a/2), quantile(cut,1-a/2)))
# results
re <- list(low.norm=norm[1], up.norm=norm[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
return(re)
}
##################################################################################
##################################################################################
################ BOOTSTRAP CONFIDENCE INTERVAL
################ SMOOTH
##################################################################################
############ 1) BOOTSTRAP VERSION OF S.E.- NORMAL
############ 2) BOOTSTRAP PERCENTILE
##### arguments: k1=vector containing the healthy sample values
##### k2=vector containing the diseased sample values
##### B=number of bootstrap resamples
##### a=significance level
##### rho=disease prevalence
##### costs=cost matrix
##### value: the 2 interval limits
##################################################################################
icSmooth2 <- function(k1,k2,rho,costs=matrix(c(0,0,1,(1-rho)/rho),2,2, byrow=TRUE),Thres,B=500,a=0.05){
if(mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
}
t <- resample2(k1, k2, B)
t0 <- t[[1]]
t1 <- t[[2]]
cut <- rep(NA,B)
cut <- suppressWarnings(sapply(1:B,function(j){thresSmooth2(t0[,j],t1[,j],rho,costs)[[1]]}))
est.se <- sd(cut)
###### 1) NORMAL-BOOTSTRAP SE
norm <- c(Thres+qnorm(a/2)*est.se, Thres+qnorm(1-a/2)*est.se)
###### 2) PERCENTILE
percentil <- (c(quantile(cut,a/2), quantile(cut,1-a/2)))
# results
re <- list(low.norm=norm[1], up.norm=norm[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
return(re)
}
################################################################################
############ DATA GENERATION
############ PARAMETRIC RESAMPLING
##### arguments: dist1, dist2=distributions assumed for the healthy and diseased population, respectively.
##### B=number of bootstrap resamples
##### value: two-object list [[1]]:healthy resample matrix, [[2]]:diseased resample matrix
################################################################################
aux.par.boot <- function(dist1, dist2, par1.1, par1.2, par2.1, par2.2, n1, n2, B){
t0 <- matrix(rand(dist1)(n1*B, par1.1, par1.2), nrow=n1) # B resamples of dist1(par1.1, par1.2) with replacement
t1 <- matrix(rand(dist2)(n2*B, par2.1, par2.2), nrow=n2) # B resamples of dist2(par2.1, par2.2) with replacement
t <- list(t0,t1)
return(t)
}
###################################################################################
###################################################################################
############### FUNCTION OF PARAMETRIC BOOTSTRAP FOR THEORETICAL METHOD
########## 1) BOOTSTRAP VERSION OF S.E.- NORMAL
########## 2) PERCENTILE BIAS-CORRECTED
##### arguments: dist1, dist2=distributions assumed for the healthy and diseased population, respectively.
##### par1.1, par1.2, par2.1, par2.2, n1, n2=parameters and sample sizes
##### B=number of bootstrap resamples
##### a=significance level
##### rho=disease prevalence
##### costs=cost matrix
##### returns: the 2 interval limits
##################################################################################
icBootTH <- function(dist1, dist2, par1.1, par1.2, par2.1, par2.2, n1, n2, rho, costs=matrix(c(0,0,1,(1-rho)/rho), 2, 2, byrow=TRUE), Thres, B=500, a=0.05){
median1 <- quant(dist1)(0.5, par1.1, par1.2)
median2 <- quant(dist2)(0.5, par2.1, par2.2)
if(median1 > median2){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- par2.1; par2.1 <- par1.1; par1.1 <- g
f <- par2.2; par2.2 <- par1.2; par1.2 <- f
auxdist <- dist2; dist2 <- dist1; dist1 <- auxdist
}
# bootstrap resamples
t <- aux.par.boot(dist1, dist2, par1.1, par1.2, par2.1, par2.2, n1, n2, B)
t0 <- t[[1]]
t1 <- t[[2]]
# computing threshold of the resamples...
# 1) parameter estimation for each resample
pars1 <- sapply(1:B, function(i){getParams(t0[, i], dist1)})
pars2 <- sapply(1:B, function(i){getParams(t1[, i], dist2)})
# 2) TH cut
cut <- sapply(1:B,function(i){thresTH2(dist1, dist2, pars1[1, i], pars1[2, i], pars2[1, i], pars2[2, i], rho, costs)[[1]]})
# sd
est.se <- sd(na.omit(cut))
###### 1) NORMAL-BOOTSTRAP SE
norm.bootSE <- c(Thres + qnorm(a/2)*est.se, Thres + qnorm(1-a/2)*est.se)
###### 2) PERCENTILE
percentil <- (c(quantile(na.omit(cut),a/2), quantile(na.omit(cut),1-a/2)))
# results
re <- list(low.norm=norm.bootSE[1], up.norm=norm.bootSE[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
return(re)
}
##################################################################################
###### PLOTCOSTROC ("thres2" and "thres3" classes)
##################################################################################
###### COST FUNCTION PLOT + ROC CURVE, OPTIMAL THRESHOLD, SENSITIVITY, SPECIFICITY (2-state)
###### 2-states: plotCost function provides two graphics:
###### 1) the cost function with the cost minimising threshold in red
###### 2) the ROC curve with the sensitivity and specificity achieved in red
###### 3-states: plotCost function provides two graphics:
###### The contribution of both thresholds to the cost function, with the minimising thresholds in res
###### arguments:
###### x: 'thres2' or 'thres3' object
###### type: type of pline for plots
##################################################################################
plotCostROC <- function(x, type="l", which.plot=c("both", "cost", "roc"), ...){
which.plot <- match.arg(which.plot)
# error handling
if (!inherits(x, c("thres2", "thres3"))){
stop("'x' must be a 'thres2' or 'thres3' object")
}
oldpar <- par(no.readonly=TRUE)
on.exit(par(oldpar))
if (inherits(x, "thres2")){ # 2 states
if (x$T$method == "smooth"){
k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
# changes
changed <- FALSE
if (mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
changed <- TRUE
}
# plot limits
k <- c(x$T$k1, x$T$k2)
rangex <- range(k)
xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
if (which.plot %in% c("both", "cost")){
# plot 1: cost function
y <- suppressWarnings(thres2(x$T$k1, x$T$k2, x$T$prev, x$T$costs, method="empirical", extra.info=TRUE, ci=FALSE))
xx <- y$T$tot.thres
yy <- y$T$tot.cost
lo <- loess(yy~xx)
pred <- predict(lo)
plot(xx, yy, xlim=xlim, ylim=c(min(pred), max(pred)), type="n", xlab="t", ylab="cost(t)")
lines(xx, pred, ...)
points(x$T$thres, predict(lo, x$T$thres), col=2, pch=19)
}
if (which.plot %in% c("both", "roc")){
if (which.plot=="both") par(ask=T)
# plot 2: ROC curve
CUT <- x$T$thres
if (!changed){
resp.CUT <- ifelse(k<CUT, 0, 1)
}else{
resp.CUT <- ifelse(k>CUT, 0, 1)
}
resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
resp.CUT <- factor(resp.CUT, c("0", "1"))
roc <- smooth(roc(response=resp, predictor=k, quiet=TRUE))
plot(roc)
if (which.plot=="both") par(ask=F)
}
}
if (x$T$method == "empirical"){
if (length(x$T) != 14){
stop("use argument 'extra.info = TRUE' in 'thres2()'")
}
# plots
if (which.plot %in% c("both", "cost")){
# plot 1: empirical cost function
plot(x$T$tot.thres, x$T$tot.cost, xlab="Threshold", ylab="Cost", main="Empirical Cost function", type=type, ...)
points(x$T$thres, x$T$cost, col=2, pch=19)
}
if (which.plot %in% c("both", "roc")){
if (which.plot=="both") par(ask=T)
# plot 2: empirical ROC curve
plot(1-x$T$tot.spec,x$T$tot.sens, main="Empirical ROC curve", xlab="1-Specificity", ylab="Sensitivity", type=type, ...)
points(1-x$T$spec, x$T$sens, col=2, pch=19)
if (which.plot=="both") par(ask=F)
}
}
if (x$T$method =="equal"){
k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
# changes
changed <- FALSE
if (mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
changed <- TRUE
}
# plot limits
k <- c(x$T$k1, x$T$k2)
rangex <- range(k)
xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
# cost function
cost <- function(t){
sd <- sqrt(varPooled(k1, k2))
TP <- (1-pnorm(t, mean(k2), sd))*rho
FN <- (pnorm(t, mean(k2), sd))*rho
FP <- (1-pnorm(t, mean(k1), sd))*(1-rho)
TN <- (pnorm(t, mean(k1), sd))*(1-rho)
n <- length(k1)+length(k2)
C <- n*(TP*costs[1,1]+FN*costs[2, 2]+FP*costs[2, 1]+TN*costs[1, 2])
return(as.numeric(C))
}
if (which.plot %in% c("both", "cost")){
# plot 1: cost function
plot(cost, xlim=xlim, type=type, xlab="t", ylab="cost(t)", ...)
points(x$T$thres, cost(x$T$thres), col=2, pch=19)
}
if (which.plot %in% c("both", "roc")){
if (which.plot=="both") par(ask=T)
# plot 2: ROC curve
CUT <- x$T$thres
if (!changed){
resp.CUT <- ifelse(k<CUT, 0, 1)
}else{
resp.CUT <- ifelse(k>CUT, 0, 1)
}
resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
resp.CUT <- factor(resp.CUT, c("0", "1"))
roc <- roc(response=resp, predictor=k, quiet=TRUE)
plot(roc)
# add sens and spec given by the threshold
tab <- table(resp.CUT, resp)[2:1, 2:1]
SENS <- tab[1, 1]/(tab[1, 1]+tab[2, 1])
SPEC <- tab[2, 2]/(tab[2, 2]+tab[1, 2])
points(SPEC, SENS, col=2, pch=19)
if (which.plot=="both") par(ask=F)
}
}
if (x$T$method=="unequal"){
k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
# changes
changed <- FALSE
if (mean(k1)>mean(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
changed <- TRUE
}
# plot limits
k <- c(x$T$k1, x$T$k2)
rangex <- range(k)
xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
# cost function
cost <- function(t){
sd1 <- sd(k1)
sd2 <- sd(k2)
TP <- (1-pnorm(t, mean(k2), sd2))*rho
FN <- (pnorm(t, mean(k2), sd2))*rho
FP <- (1-pnorm(t, mean(k1), sd1))*(1-rho)
TN <- (pnorm(t, mean(k1), sd1))*(1-rho)
n <- length(k1)+length(k2)
C <- n*(TP*costs[1,1]+FN*costs[2, 2]+FP*costs[2, 1]+TN*costs[1, 2])
return(as.numeric(C))
}
if (which.plot %in% c("both", "cost")){
# plot 1: cost function
plot(cost, xlim=xlim, type=type, xlab="t", ylab="cost(t)", ...)
points(x$T$thres, cost(x$T$thres), col=2, pch=19)
}
if (which.plot %in% c("both", "roc")){
if (which.plot=="both") par(ask=T)
# plot 2: ROC curve
CUT <- x$T$thres
if (!changed){
resp.CUT <- ifelse(k<CUT, 0, 1)
}else{
resp.CUT <- ifelse(k>CUT, 0, 1)
}
resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
resp.CUT <- factor(resp.CUT, c("0", "1"))
roc <- roc(response=resp, predictor=k, quiet=TRUE)
plot(roc)
# add sens and spec given by the threshold
tab <- table(resp.CUT, resp)[2:1, 2:1]
SENS <- tab[1, 1]/(tab[1, 1]+tab[2, 1])
SPEC <- tab[2, 2]/(tab[2, 2]+tab[1, 2])
points(SPEC, SENS, col=2, pch=19)
if (which.plot=="both") par(ask=F)
}
}
if (x$T$method=="parametric"){
k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
dist1 <- x$T$dist1; dist2 <- x$T$dist2
par1.1 <- x$T$pars1[1]; par1.2 <- x$T$pars1[2]; par2.1 <- x$T$pars2[1]; par2.2 <- x$T$pars2[2]
# changes
changed <- FALSE
if (median(k1)>median(k2)){
rho <- 1-rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- k1; k1 <- k2; k2 <- g
changed <- TRUE
g <- par2.1; par2.1 <- par1.1; par1.1 <- g
f <- par2.2; par2.2 <- par1.2; par1.2 <- f
auxdist <- dist2; dist2 <- dist1; dist1 <- auxdist
}
# plot limits
k <- c(x$T$k1, x$T$k2)
rangex <- range(k)
xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
# cost function
cost <- function(t){
TP <- (1-p(dist2)(t, par2.1, par2.2))*rho
FN <- (p(dist2)(t, par2.1, par2.2))*rho
FP <- (1-p(dist1)(t, par1.1, par1.2))*(1-rho)
TN <- (p(dist1)(t, par1.1, par1.2))*(1-rho)
n <- length(k1)+length(k2)
C <- n*(TP*costs[1,1]+FN*costs[2, 2]+FP*costs[2, 1]+TN*costs[1, 2])
return(as.numeric(C))
}
if (which.plot %in% c("both", "cost")){
# plot 1: cost function
plot(cost, xlim=xlim, type=type, xlab="t", ylab="cost(t)", ...)
points(x$T$thres, cost(x$T$thres), col=2, pch=19)
}
if (which.plot %in% c("both", "roc")){
if (which.plot=="both") par(ask=T)
# plot 2: ROC curve
CUT <- x$T$thres
if (!changed){
resp.CUT <- ifelse(k<CUT, 0, 1)
}else{
resp.CUT <- ifelse(k>CUT, 0, 1)
}
resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
resp.CUT <- factor(resp.CUT, c("0", "1"))
roc <- roc(response=resp, predictor=k, quiet=TRUE)
plot(roc)
# add sens and spec given by the threshold
tab <- table(resp.CUT, resp)[2:1, 2:1]
SENS <- tab[1, 1]/(tab[1, 1]+tab[2, 1])
SPEC <- tab[2, 2]/(tab[2, 2]+tab[1, 2])
points(SPEC, SENS, col=2, pch=19)
if (which.plot=="both") par(ask=F)
}
}
}else{ # 3-states
k <- c(x$T$k1, x$T$k2, x$T$k3)
rangex <- range(k)
xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
# cost function
dist1 <- x$T$dist1; dist2 <- x$T$dist2; dist3 <- x$T$dist3
rho <- x$T$prev; costs <- x$T$costs
if (dist1 =="norm" & dist2=="norm" & dist3=="norm"){
par1.1 <- mean(x$T$k1); par1.2 <- sd(x$T$k1)
par2.1 <- mean(x$T$k2); par2.2 <- sd(x$T$k2)
par3.1 <- mean(x$T$k3); par3.2 <- sd(x$T$k3)
}else{
par1.1 <- x$T$pars1[1]; par1.2 <- x$T$pars1[2]
par2.1 <- x$T$pars2[1]; par2.2 <- x$T$pars2[2]
par3.1 <- x$T$pars3[1]; par3.2 <- x$T$pars3[2]
}
cost.t1 <- function(t){
aux1 <- rho[1]*p(dist1)(t, par1.1, par1.2)*(costs[1, 1]-costs[1, 2])
aux2 <- rho[2]*p(dist2)(t, par2.1, par2.2)*(costs[2, 1]-costs[2, 2])
aux3 <- rho[3]*p(dist3)(t, par3.1, par3.2)*(costs[3, 1]-costs[3, 2])
C <- aux1+aux2+aux3
return(as.numeric(C))
}
cost.t2 <- function(t){
aux1 <- rho[1]*p(dist1)(t, par1.1, par1.2)*(costs[1, 2]-costs[1, 3])
aux2 <- rho[2]*p(dist2)(t, par2.1, par2.2)*(costs[2, 2]-costs[2, 3])
aux3 <- rho[3]*p(dist3)(t, par3.1, par3.2)*(costs[3, 2]-costs[3, 3])
C <- aux1+aux2+aux3
return(as.numeric(C))
}
# plot 1: C(T1)
plot(cost.t1, xlim=xlim, type=type, ylab="Cost(thres1)", xlab="thres1", ...)
points(x$T$thres1, cost.t1(x$T$thres1), col=2, pch=19)
par(ask=T)
# plot 2: C(T2)
plot(cost.t2, xlim=xlim, type=type, ylab="Cost(thres2)", xlab="thres2", ...)
points(x$T$thres2, cost.t2(x$T$thres2), col=2, pch=19)
par(ask=F)
}
}
##################################################################################
###### DENSITY PLOT
##################################################################################
###### plot.thres2 function provides a graphic including the sample densities (diseased
###### and non-diseased populations), the threshold and its confidence interval
###### arguments:
###### x: 'thres2' object
###### bw: vector containing the bandwith for the non-diseased sample in the 1st
###### position and the bandwith for the diseased sample in the 2nd position
###### (to be passed to 'density()'). Default, c("nrd0", "nrd0").
###### ci: should the confidence interval be plotted? Default, T.
###### which.boot: in case 'x' contains CI computed by bootstrapping, which one should be printed?
###### the user can choose between "norm" (based on normal distribution)
###### or "perc" (based on percentiles). Default, "norm". This argument
###### is ignored if the CI were computed by the delta method.
###### col: 3-dimensional vector:
###### col[1]: color for the density of the non-diseased sample
###### col[2]: color for the density for the diseased sample
###### col[3]: color for the threshold and its corresponding CI
###### Default, c(1, 2, 3).
###### lty: 4-dimensional vector:
###### lty[1]: line type for the density of the non-diseased sample
###### lty[2]: line type for the density of the diseased sample
###### lty[3]: line type for the threshold
###### lty[4]: line type for the CI
###### Default, c(1, 1, 1, 2).
###### lwd: 3-dimensional vector:
###### lwd[1]: line width for the density of the non-diseased sample
###### lwd[2]: line width for the density for the diseased sample
###### lwd[3]: line width for the threshold and its corresponding CI
###### Default, c(1, 1, 1).
###### legend: logical asking if an automatic legend should be added to the graph. Default, TRUE.
###### leg.pos: position of the legend. Default, "topleft". Ignored if legend=FALSE.
###### leg.cex: a number that reescales the size of the legend. Ignored if legend=FALSE. Default, 1.
###### xlim, ylim: 2-dimensional vectors indicating the limits for the plot
###### main, xlab...: further arguments to be passed to 'plot()'.
##################################################################################
plot.thres2 <- function(x, bw=c("nrd0", "nrd0"), ci=TRUE, which.boot=c("norm", "perc"), col=c(1, 2, 3), lty=c(1, 1, 1, 2), lwd=c(1, 1, 1), legend=TRUE, leg.pos="topleft", leg.cex=1, xlim=NULL, ylim=NULL, main=paste0("Threshold estimate ", ifelse(ci, "and CI ", ""), "(method ", x$T$method, ")"), xlab="", ...){
# error handling
if (!is.logical(ci) | is.na(ci)){
stop("'ci' must be TRUE or FALSE")
}
if (!is.logical(legend) | is.na(legend)){
stop("'legend' must be TRUE or FALSE")
}
if (!is.null(xlim)){
if (any(is.na(xlim)) | length(xlim)!=2){
stop("'xlim' must be NULL or a 2-dimensional vector containing no NAs")
}
}
if (!is.null(ylim)){
if (any(is.na(ylim)) | length(ylim)!=2){
stop("'ylim' must be NULL or a 2-dimensional vector containing no NAs")
}
}
# recycle col, lty, lwd
if (length(col)!=3){
col <- rep_len(col, 3)
}
if (length(lty)!=4){
lty <- rep_len(lty, 4)
}
if (length(lwd)!=3){
lwd <- rep_len(lwd, 3)
}
which.boot <- match.arg(which.boot)
k1 <- x$T$k1
k2 <- x$T$k2
dens.k1 <- density(k1, bw=bw[1])
dens.k2 <- density(k2, bw=bw[2])
if (is.null(xlim)){
min.x <- min(min(dens.k1$x), min(dens.k2$x))
max.x <- max(max(dens.k1$x), max(dens.k2$x))
}else{
min.x <- xlim[1]
max.x <- xlim[2]
}
if (is.null(ylim)){
min.y <- 0
max.y <- max(max(dens.k1$y), max(dens.k2$y))
}else{
min.y <- ylim[1]
max.y <- ylim[2]
}
plot(dens.k1, xlim=c(min.x, max.x), ylim=c(min.y, max.y), col=col[1], lty=lty[1], lwd=lwd[1], main=main, xlab=xlab, ...)
lines(dens.k2, col=col[2], lty=lty[2], lwd=lwd[2])
# thres
abline(v=x$T$thres, col=col[3], lty=lty[3], lwd=lwd[3])
# CI
if(ci & !is.null(x$CI)){
if (x$CI$ci.method != "boot"){
abline(v=c(x$CI$lower, x$CI$upper), col=col[3], lty=lty[4], lwd=lwd[3])
}else{
abline(v=c(x$CI[paste0("low.", which.boot)], x$CI[paste0("up.", which.boot)]), col=col[3], lty=lty[4], lwd=lwd[3])
}
}
# legend
if (legend){
legend(leg.pos, c(expression(bar(D)), "D", ifelse(ci & !is.null(x$CI), "Thres+CI", "Thres")), col=col, lty=lty, lwd=lwd, cex=leg.cex)
}
}
##################################################################################
###### LINES DENSITY PLOT
##################################################################################
###### lines.thres2 function includes vertical lines for the thresholds and CIs in
###### a plot.thres2.
###### arguments:
###### x: 'thres2' object
###### ci: should the confidence interval be plotted? Default, TRUE.
###### which.boot: in case 'x' contains CI computed by bootstrapping, which one should be printed?
###### the user can choose between "norm" (based on normal distribution)
###### or "perc" (based on percentiles). Default, "norm". This argument
###### is ignored if the CI were computed by the delta method.
###### col: color for the threshold and its corresponding CI. Default, 1.
###### lty: 2-dimensional vector:
###### lty[1]: line type for the threshold
###### lty[2]: line type for the CI
###### Default, c(1, 2).
###### lwd: line width for the threshold and its corresponding CI. Default, 1.
###### ...: further arguments to be passed to 'abline()'.
##################################################################################
lines.thres2 <- function(x, ci=TRUE, which.boot=c("norm", "perc"), col=1, lty=c(1, 2), lwd=1, ...){
# error handling
if (!is.logical(ci) | is.na(ci)){
stop("'ci' must be TRUE or FALSE")
}
# recicle lty
if (length(lty)!=2){
lty <- rep_len(lty, 2)
}
which.boot <- match.arg(which.boot)
# thres
abline(v=x$T$thres, col=col, lty=lty[1], lwd=lwd)
# CI
if(ci & !is.null(x$CI)){
if (x$CI$ci.method != "boot"){
abline(v=c(x$CI$lower, x$CI$upper), col=col, lty=lty[2], lwd=lwd, ...)
}else{
abline(v=c(x$CI[paste0("low.", which.boot)], x$CI[paste0("up.", which.boot)]), col=col, lty=lty[2], lwd=lwd, ...)
}
}
}
############################################################################
#### SAMPLE SIZE ESTIMATION
############################################################################
############################################################################
############################################################################
######### PARAMETRIC
############################################################################
######### LIMITING BETA
######### CHECKING POSITIVE SQUARE ROOT
##### arguments:
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
############################################################################
control <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
ctrl <- (par2.1-par1.1)^2+2*log((par2.2/par1.2)*slope(rho,costs))*(par2.2^2-par1.2^2)
return(ctrl)
}
###########################################################################
############ STANDARD ERROR
###########################################################################
##########################################################################
############ VARIANCES OF ESTIMATORS
############ UNEQUAL VARIANCES
##########################################################################
############ FUNCTION OF ML VARIANCE OF VARIANCE ESTIMATOR
##########################################################################
parVarUn <- function(sdev, t){
est <- 2*sdev^4/(t-1)
return(est)
}
##########################################################################
############ FUNCTION OF ML VARIANCE OF MEAN ESTIMATOR (DIS+NON.DIS)
##########################################################################
parMeanUn <- function(sdev,t){
est <- sdev^2/t
return(est)
}
##########################################################################
############ PARTIAL DERIVATIVES.............
##########################################################################
############ OF DISEASED MEAN
##### arguments:
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
##########################################################################
parDerMeanDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
est <- (par2.2*par1.2*(par2.1-par1.1)/sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs) )-par1.2^2)/(par2.2^2-par1.2^2)
return(est)
}
##########################################################################
############ OF NON-DISEASED MEAN
##### arguments:
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
##########################################################################
parDerMeanNonDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
est <- (par2.2^2-par2.2*par2.1*(par1.2-par1.1)/sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs)))/(par2.2^2-par2.1^2)
return(est)
}
##########################################################################
############ OF DISEASED VARIANCE
##### arguments:
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
##########################################################################
parDerVarDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
est <- ((par1.2*par2.2*((par2.2^2-par1.2^2)/par2.2^2+2*log(slope(rho,costs)*par2.2/par1.2)))/(2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs)))+par1.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))/(2*par2.2)+par1.1)/(par2.2^2-par1.2^2)-(par1.2*par2.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))+par1.1*par2.2^2-par2.1*par1.2^2)/(par2.2^2-par1.2^2)^2
return(est)
}
##########################################################################
############ OF NON-DISEASED VARIANCE
##### arguments:
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
##########################################################################
parDerVarNonDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
est <- ((par1.2*par2.2*((par1.2^2-par2.2^2)/par2.2^2-2*log(slope(rho,costs)*par2.2/par1.2)))/(2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs)))+par2.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))/(2*par1.2)-par2.1)/(par2.2^2-par1.2^2)+(par1.2*par2.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))+par1.1*par2.2^2-par2.1*par1.2^2)/(par2.2^2-par1.2^2)^2
return(est)
}
##########################################################################
############ SAMPLE SIZE RATIO AND MINIMUM REQUIRED SAMPLE SIZE
##### arguments:
##### par1.1=healthy population mean
##### par1.2=healthy population standard deviation
##### par2.1=diseased population mean
##### par2.2=diseased population standard deviation
##### rho=disease prevalence
##### costs=cost matrix
##########################################################################
SS <- function(par1.1, par1.2, par2.1, par2.2=NULL, rho, width, costs=matrix(c(0,0,1,(1-rho)/rho),2,2, byrow=TRUE), R=NULL, var.equal=FALSE, alpha=0.05){
# error handling
if (!(rho > 0 & rho < 1)){
stop("The disease prevalence rho must be a number in (0,1)")
}
# if (!is.matrix(costs)){
# stop("'costs' must be a matrix")
# }
# if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
# stop("'costs' must be a 2x2 matrix")
# }
if (width<=0){
stop("'width' must be a positive number")
}
if (!is.logical(var.equal) | is.na(var.equal)){
stop("'var.equal' must be TRUE or FALSE")
}
if (var.equal){
if (!is.null(par2.2)){
if (par1.2 != par2.2){
stop("'var.equal' is set to TRUE, but 'par1.2' and 'par2.2' are different")
}
}else{
par2.2 <- par1.2
}
}else{
if (is.null(par2.2)){
stop("When 'var.equal' is set to FALSE, a value for 'par2.2' must be given")
}
if (par1.2==par2.2){
stop("'var.equal' is set to FALSE, but par1.2==par2.2")
}
}
# costs or R
if (is.null(costs) & is.null(R)){
stop("Both 'costs' and 'R' are NULL. Please specify one of them.")
}else if (!is.null(costs) & !is.null(R)){
stop("Either 'costs' or 'R' must be NULL")
}else if (is.null(costs) & !is.null(R)){
if (!is.numeric(R) | length(R)!=1){
stop("R must be a single number")
}
costs <- matrix(c(0, 0, 1, (1-rho)/(R*rho)), 2, 2, byrow=TRUE)
}else if (!is.null(costs) & is.null(R)){
if (!is.matrix(costs)){
stop("'costs' must be a matrix")
}
if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
stop("'costs' must be a 2x2 matrix")
}
}
costs.origin <- costs
rho.origin <- rho
par1.1.origin <- par1.1
par2.1.origin <- par2.1
L <- width/2
if (par1.1 > par2.1){
rho <- 1 - rho
costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
g <- par1.1; par1.1 <- par2.1; par2.1 <- g
f <- par1.2; par1.2 <- par2.2; par2.2 <- f
}
# slope
R <- slope(rho, costs)
if(var.equal){
num <- 1/2-par1.2^2*log(R)/(par2.1-par1.1)^2
den <- 1/2+par1.2^2*log(R)/(par2.1-par1.1)^2
epsilon <- abs(num/den)
est.non.dis <- (qnorm(1-alpha/2)/L)^2*((log(R)/(par2.1-par1.1))^2*2*par1.2^4/(1+epsilon)
+(1/2-par1.2^2*log(R)/(par2.1-par1.1)^2)^2*par1.2^2/epsilon
+(1/2+par1.2^2*log(R)/(par2.1-par1.1)^2)^2*par1.2^2)
}else{
threshold <- expression((sigma2D*muND-sigma2ND*muD+sqrt(sigma2ND*sigma2D)*sqrt((muD-muND)^2+2*log(R*sqrt(sigma2D/sigma2ND))*(sigma2D-sigma2ND)))/(sigma2D-sigma2ND))
where <- list(muND=par1.1, sigma2ND=par1.2^2, muD=par2.1, sigma2D=par2.2^2, R=R)
a <- (as.numeric(attributes(eval(deriv(threshold, "muD"), where))$gradient))^2
b <- (as.numeric(attributes(eval(deriv(threshold, "muND"), where))$gradient))^2
c <- (as.numeric(attributes(eval(deriv(threshold, "sigma2D"), where))$gradient))^2
d <- (as.numeric(attributes(eval(deriv(threshold, "sigma2ND"), where))$gradient))^2
epsilon <- sqrt((a*par2.2^2+2*c*par2.2^4)/(b*par1.2^2+2*d*par1.2^4))
est.non.dis <- (qnorm(1-alpha/2)/L)^2*(a*par2.2^2/epsilon+b*par1.2^2+2*c*par2.2^4/epsilon+2*d*par1.2^4)
}
est.dis <- epsilon*est.non.dis
if (par1.1.origin > par2.1.origin){
auxiliar <- est.non.dis
est.non.dis <- est.dis
est.dis <- auxiliar
epsilon <- 1/epsilon
}
re <- list(ss2=est.dis, ss1=est.non.dis, epsilon=epsilon, width=width, alpha=alpha, costs=costs.origin, R=R, prev=rho.origin)
class(re) <- "SS"
return(re)
}
#############################################################
## Print function for class "SS"
#############################################################
print.SS <- function(x, ...){
cat("Optimum SS Ratio: ", x$epsilon)
cat("\n\nSample size for")
cat("\n Diseased: ", x$ss2)
cat("\n Non-diseased: ", x$ss1)
cat("\n")
cat("\nParameters used")
cat("\n Significance Level: ", x$alpha)
cat("\n CI width: ", x$width)
cat("\n Disease prevalence:", x$prev)
cat("\n Costs (Ctp, Cfp, Ctn, Cfn):", x$costs)
cat("\n R:", x$R)
cat("\n")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.