R/ThresholdROC-2states.R

Defines functions print.SS SS parDerVarNonDisUn parDerVarDisUn parDerMeanNonDisUn parDerMeanDisUn parMeanUn parVarUn control lines.thres2 plot.thres2 plotCostROC icBootTH aux.par.boot icSmooth2 icEmp2 icBootUn2 icBootEq2 resample2 icDeltaUn2 varDeltaUn2 derVarNDisUn derVarDisUn derMeanNDisUn derMeanDisUn varMeanUn varVarUn icDeltaEq2 varDeltaEq2 derMeanNDisEq derMeanDisEq derVarEq varMeanEq varVarEq thres2 getParams print.thres2 thresSmooth2 thresEmp2 thresUn2 thresEq2 varPooled secondDer2 print.thresTH2 thresTH2 DensRatio2 sqroot slope rand p quant dens

Documented in lines.thres2 plotCostROC plot.thres2 secondDer2 SS thres2 thresTH2

##########################################################################
###########  2 STATES
##########################################################################


######################################################################
###########       DISTRIBUTION CHOICE
######################################################################
########          RETURNS THE DENSITY OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
#####            implemented in R
##### value: the chosen density function
######################################################################
dens <- function(dist){
  fun <- get(paste0("d", dist))
  return(fun)
}


######################################################################
########          RETURNS THE QUANTILES OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
#####            implemented in R
##### value: the chosen quantile function
######################################################################
quant <- function(dist){
  fun <- get(paste0("q", dist))
  return(fun)
}


######################################################################
########          RETURNS THE DISTRIBUTION FUNCTION OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
#####            implemented in R
##### value: the chosen distribution function
######################################################################
p <- function(dist){
  fun <- get(paste0("p", dist))
  return(fun)
}

######################################################################
########          GENERATES RANDOM NUMBERS OF THE CHOSEN DISTRIBUTION
##### arguments: dist=character indicating any 2-parameter distribution
#####            implemented in R
##### value: the chosen r function
######################################################################
rand <- function(dist){
  fun <- get(paste0("r", dist))
  return(fun)
}


############################################################################
########          BETA OR R FUNCTION (COST-MINIMISING SLOPE)
##### arguments:  rho=disease prevalence
#####		  	      costs=cost matrix
##### value: cost-minimising slope
############################################################################
slope <- function(rho, costs=matrix(c(0,0,1,(1-rho)/rho), 2, 2, byrow=TRUE)){
  c.t.pos <- costs[1, 1]
  c.t.neg <- costs[1, 2]
  c.f.pos <- costs[2, 1]
  c.f.neg <- costs[2, 2]
  beta <- ((1 - rho)/rho) * ((c.t.neg - c.f.pos)/(c.t.pos - c.f.neg))
  return(beta)
}


############################################################################
########          CHECKING POSITIVE SQUARE ROOT
##### arguments: k1=vector containing the healthy sample values 
#####		         k2=vector containing the diseased sample values
##### 	 	       rho=disease prevalence
#####		  	     costs=cost matrix
##### value: discriminant of the expression for the optimum threshold
############################################################################
sqroot <- function(k1, k2, rho, costs){
  ctrl <- (mean(k2) - mean(k1))^2 + 2*log((sd(k2)/sd(k1))*slope(rho,costs))*(var(k2) - var(k1))
  return(ctrl)
}


############################################################################
##### THRESHOLD DEPENDING ON THE UNDERLYING DISTRIBUTION OF THE POPULATIONS
##### COST-MINIMISING THRESHOLD ONE-VARIABLE EQUATION
##### DENSITY RATIO FORMULA
##### arguments:  p=vector containing the interval extremes at which the 'uniroot' function will look for 
#####                         the one-variable equation solution  
#####		          dist1=choose the healthy distribution
#####		   	      dist2=choose the diseased distribution			
#####             par1.1=healthy population mean
#####		   	      par1.2=healthy population standard deviation
#####             par2.1=diseased population mean
#####		          par2.2=diseased population standard deviation
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: the threshold one-variable equation evaluated at p
############################################################################
DensRatio2 <- function(p, dist1, dist2, par1.1, par1.2, par2.1, par2.2, rho, costs) {
  ratio <- (dens(dist2)(p,par2.1,par2.2)/dens(dist1)(p,par1.1,par1.2)) - slope(rho,costs)
  return(ratio)
}


############################################################################
##### "uniroot" function looks for the one-variable equation solution  
##### arguments: 	q1=probability of the "left" distribution in order to determine a "low" quantile.
##### 	 	        q2=probability of the "right" distribution in order to determine a "high" quantile.
#####			        dist1, dist2=choose the healthy and the diseased distributions, respectively,
#####             between the following 2-parameter distributions: "beta", "cauchy",
#####             "chisq" (chi-squared), "gamma", "lnorm" (lognormal), "logis" (logistic), "norm" (normal)
#####             and "weibull".
#####			        par1.1=first parameter of the distribution chosen for the healthy population
#####		   	      par1.2=second parameter of the distribution chosen for the healthy population
#####             par2.1=first parameter of the distribution chosen for the diseased population
#####		          par2.2=second parameter of the distribution chosen for the diseased population
#####		          rho=disease prevalence
#####		  	      costs=cost matrix
#####             tol=tolerance to be used in uniroot. Default, 10^(-8).
##### value: an object of class "thresTH2" containing the threshold one-variable equation solution,
#####        the prevalence, the cost matrix and the slope.      
############################################################################
thresTH2 <- function(dist1, dist2, par1.1, par1.2, par2.1, par2.2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), R=NULL, q1=0.05, q2=0.95, tol=10^(-8)){
  # error handling
  if (!(rho > 0 & rho < 1)){
    stop("The disease prevalence rho must be a number in (0,1)")
  }
  # if (!is.matrix(costs)){
  #   stop("'costs' must be a matrix")
  # }
  # if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
  #   stop("'costs' must be a 2x2 matrix")
  # }
  
  # costs or R
  if (is.null(costs) & is.null(R)){
    stop("Both 'costs' and 'R' are NULL. Please specify one of them.")
  }else if (!is.null(costs) & !is.null(R)){
    stop("Either 'costs' or 'R' must be NULL")
  }else if (is.null(costs) & !is.null(R)){
    if (!is.numeric(R) | length(R)!=1){
      stop("R must be a single number")
    }
    costs <- matrix(c(0, 0, 1, (1-rho)/(R*rho)), 2, 2, byrow=TRUE)
  }else if (!is.null(costs) & is.null(R)){
    if (!is.matrix(costs)){
      stop("'costs' must be a matrix")
    }
    if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
      stop("'costs' must be a 2x2 matrix")
    }
  }
  
  # function
  costs.origin <- costs
  rho.origin <- rho
  median1 <- quant(dist1)(0.5, par1.1, par1.2)
  median2 <- quant(dist2)(0.5, par2.1, par2.2)
  if(median1 > median2){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- par2.1; par2.1 <- par1.1; par1.1 <- g
    f <- par2.2; par2.2 <- par1.2; par1.2 <- f
    auxdist <- dist2; dist2 <- dist1; dist1 <- auxdist
  }
  p1 <- quant(dist1)(q1, par1.1, par1.2)
  p2 <- quant(dist2)(q2, par2.1, par2.2)
  # threshold estimate
  cut.t <- uniroot(DensRatio2,c(p1,p2),tol=tol,dist1,dist2,par1.1,par1.2,par2.1,par2.2,rho,costs,extendInt="yes")$root
  # add slope
  beta <- slope(rho, costs)
  # check interval for R
  if (dist1=="norm" & dist2=="norm" & par1.2!=par2.2){
   LL <- par1.2/par2.2*exp(-(par2.1-par1.1)^2/(2*par2.2^2))
   UL <- par1.2/par2.2*exp((par2.1-par1.1)^2/(2*par1.2^2))
   if (!(LL<=beta & beta<=UL)) warning("The choice of costs/R leads to a cut-off that is not between the means of the two distributions")
  }
  # results
  re <- list(thres=cut.t, prev=rho.origin, costs=costs.origin, R=beta, method="theoretical")
  # return
  class(re) <- "thresTH2"
  return(re)
}


############################################################################
# Print function for class "thresTH2"
############################################################################

print.thresTH2 <- function(x, ...){
    cat("\nThreshold:", x$thres)
    cat("\n")
    cat("\nParameters used")
    cat("\n  Disease prevalence:", x$prev)
    cat("\n  Costs (Ctp, Cfp, Ctn, Cfn):", x$costs)
    cat("\n  R:", x$R)
    cat("\n")
}


##########################################################################
##### Second derivative of the cost function (2 states): just a check
##### arguments:  x = thres2 object
##### value:      the second derivative function
##########################################################################
secondDer2 <- function(x){
  # error handling
  if (!inherits(x, "thres2")){
    stop("'x' must be of class 'thres2'")
  }
  # function
  if (x$T$method=="empirical"){
    stop("'x' has been computed with method='empirical', cannot compute the second derivative of the cost function")
  }else if (x$T$method=="equal" | x$T$method=="unequal"){
    k1 <- x$T$k1
    k2 <- x$T$k2
    rho <- x$T$prev
    costs <- x$T$costs
    Thr <- x$T$thres
    par1.1 <- mean(k1)
    par2.1 <- mean(k2)
    par1.2 <- sd(k1)
    par2.2 <- sd(k2)
    n1 <- length(k1)
    n2 <- length(k2)
    beta <- slope(rho,costs)
    de <- (n1+n2)*rho*(costs[1,1]-costs[2,2])
    der <- de/sqrt(2*pi)*(((Thr-par2.1)/par2.2^3)*exp(-(Thr-par2.1)^2/(2*par2.2^2))-beta*((Thr-par1.1)/par1.2^3)*exp(-(Thr-par1.1)^2/(2*par1.2^2)))
  }else if (x$T$method=="parametric"){
    dist1 <- x$T$dist1
    dist2 <- x$T$dist2
    pars1 <- x$T$pars1
    pars2 <- x$T$pars2
    densratio <- function(y){
      DR <- dens(dist2)(y, pars2[1], pars2[2])/dens(dist1)(y, pars1[1], pars1[2])-x$T$R
      return(DR)
    }
    der <- grad(densratio, x$T$thres)
  }
  return(der)
}


###########################################################################
############      ASSUMING EQUAL VARIANCES
##########################################################################
############      FUNCTION OF POOLED VARIANCE
##########################################################################
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
##### value:      the pooled variance of the two samples
##########################################################################
varPooled <- function(k1, k2){
  n1 <- length(k1)
  n2 <- length(k2)
  pool <- ((n1 - 1)*var(k1) + (n2 - 1)*var(k2))/(n1 + n2 - 2)
  return(pool)
}

##########################################################################
############   1) FUNCTION OF THRESHOLD - JUND (EQUAL VARIANCES)
##########################################################################
##### arguments:  k1=vector containing the healthy sample values 
#####		         	k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: the threshold estimated by the two samples considering the population variances equal
##########################################################################
thresEq2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE)){
  costs.origin <- costs
  k1.origin <- k1
  k2.origin <- k2
  rho.origin <- rho
  if(mean(k1)>mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  beta <- slope(rho, costs)
  # threshold estimate
  cut <- (2*varPooled(k1,k2)*log(beta) - (mean(k1)^2 - mean(k2)^2))/(2*(mean(k2) - mean(k1)))
  # returning results
  re <- list(thres=cut, prev=rho.origin, costs=costs.origin, R=beta, method="equal", k1=k1.origin, k2=k2.origin)
  return(re)
}



##########################################################################
###########    2) FUNCTION OF THRESHOLD - JUND (UNEQUAL VARIANCES)
##########################################################################
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		        	rho=disease prevalence
#####		        	costs=cost matrix
##### value: the threshold estimated by the two samples considering the population variances unequal
##########################################################################
thresUn2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE)){
  costs.origin <- costs
  k1.origin <- k1
  k2.origin <- k2
  rho.origin <- rho
  if(mean(k1)>mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  ctrl <- sqroot(k1, k2, rho, costs) 	
  if (ctrl>=0){
    # threshold estimate
    cut <- (var(k2)*mean(k1) - var(k1)*mean(k2) + sd(k1)*sd(k2)*sqrt(ctrl))/(var(k2) - var(k1))}
  else{
    cut <- NA
    warning("Negative discriminant; cannot solve the second-degree equation")
  }
  beta <- slope(rho, costs)
  # check interval for R
  LL <- sd(k1)/sd(k2)*exp(-(mean(k2)-mean(k1))^2/(2*sd(k2)^2))
  UL <- sd(k1)/sd(k2)*exp((mean(k2)-mean(k1))^2/(2*sd(k1)^2))
  if (!(LL<=beta & beta<=UL)) warning("The choice of costs/R leads to a cut-off that is not between the means of the two distributions")
  # returning results
  re <- list(thres=cut, prev=rho.origin, costs=costs.origin, R=beta, method="unequal", k1=k1.origin, k2=k2.origin)
  return(re)
}



##########################################################################
#########         EMPIRICAL METHOD
##########################################################################
#########      3) COST-MINIMISING THRESHOLD (EMPIRICAL) FUNCTION
##########################################################################
##### arguments: k1=vector containing the healthy sample values 
#####		   	     k2=vector containing the diseased sample values
#####  	   	     rho=disease prevalence
#####		   	     costs=cost matrix
#####            extra.info=should extra information be added to the output? Default, FALSE
##### value: the threshold estimated by the two samples through the empirical estimator
##########################################################################
thresEmp2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), extra.info=FALSE){
  k1.origin <- k1
  k2.origin <- k2
  rho.origin <- rho
  costs.origin <- costs
  if (mean(k1)>mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  # cost matrix
  c.t.pos <- costs[1,1]
  c.t.neg <- costs[1,2]
  c.f.pos <- costs[2,1]
  c.f.neg <- costs[2,2]
  
  # empirical estimation
  n1 <- length(k1)
  n2 <- length(k2)
  n <- n1+n2
  v <- c(k1, k2)
  ind.origin <- c(rep(0, n1), rep(1, n2))
  vi <- cbind(v, ind.origin)
  ord.v <- vi[order(vi[, 1], vi[, 2]), ]
  sens <- rep(NA, n)
  spec <- rep(NA, n)
  
  # sensitivity and specificity for each observation
  for (j in 1:n){
    cut <- ord.v[j, 1]
    sens[j] <- sum(ord.v[, 1]>=cut & ord.v[, 2]==1)/n2
    spec[j] <- sum(ord.v[, 1]<cut & ord.v[, 2]==0)/n1
  }
  
  # cost for each observation
  D <- n*rho*(c.t.pos-c.f.neg)
  R <- ((1-rho)/rho)*((c.t.neg-c.f.pos)/(c.t.pos-c.f.neg))
  G <- (rho*c.f.neg+(1-rho)*c.f.pos)/(rho*(c.t.pos-c.f.neg))
  cost.non.par <- D*(sens+spec*R+G)
  
  # together
  total <- data.frame(ord.v, cost.non.par, sens, spec)

  # minimum cost
  ind.min.cost <- which(total[, "cost.non.par"]==min(total[, "cost.non.par"]))
  sens.min <- total[ind.min.cost, "sens"]
  spec.min <- total[ind.min.cost, "spec"]
  cut.min <- total[ind.min.cost, "v"]
  cost.min <- total[ind.min.cost, "cost.non.par"]
  # if there are two observations that lead to the same cost...
  howmany <- length(cut.min)
  if (howmany>1){
    interval <- subset(total, v >= cut.min[1] & v <= cut.min[length(cut.min)])
    cut.min <- mean(interval$v)
    # sens, spec
    sens.min <- sum(ord.v[, 1]>=cut.min & ord.v[, 2]==1)/n2
    spec.min <- sum(ord.v[, 1]<cut.min & ord.v[, 2]==0)/n1
    # cost
    cost.min <- D*(sens.min+spec.min*R+G)
    warning(paste(howmany, "observations lead to the minimum cost. The mean of the values between them is returned as threshold."), call.=FALSE)
  }
  
  # slope
  beta <- slope(rho, costs)
  
  # results
  re <- list(thres=cut.min, prev=rho.origin, costs=costs.origin, R=beta, method="empirical", k1=k1.origin, k2=k2.origin, sens=sens.min, spec=spec.min, cost=cost.min)
  
  # for plotCostROC if extra.info is TRUE
  if (extra.info){
    re$tot.thres <- total[, "v"]
    re$tot.cost <- total[, "cost.non.par"]
    re$tot.spec <- spec
    re$tot.sens <- sens
  }
  return(re)
}


##########################################################################
#########         SMOOTH METHOD
##########################################################################
##########################################################################
##### arguments: k1=vector containing the healthy sample values 
#####		   	     k2=vector containing the diseased sample values
#####  	   	     rho=disease prevalence
#####		   	     costs=cost matrix
##### value: the threshold estimated by the two samples through the smooth estimator
##########################################################################
thresSmooth2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), extra.info=FALSE){
  k1.origin <- k1
  k2.origin <- k2
  rho.origin <- rho
  costs.origin <- costs
  if (mean(k1)>mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  # cost matrix
  c.t.pos <- costs[1,1]
  c.t.neg <- costs[1,2]
  c.f.pos <- costs[2,1]
  c.f.neg <- costs[2,2]
  
  # smooth estimation
  dk1 <- kde(k1)
  dk2 <- kde(k2)
  lmin <- mean(k1)
  lmax <- mean(k2)
  beta <- slope(rho, costs)
  fx <- function(x) predict(dk2, x=x)-beta*predict(dk1, x=x)
  out <- uniroot(fx, c(lmin, lmax))
  
  # results
  re <- list(thres=out$root, prev=rho.origin, costs=costs.origin, R=beta, method="smooth", k1=k1.origin, k2=k2.origin)

  return(re)
}















#############################################################
## Print function for class "thres2"
#############################################################
print.thres2 <- function(x, ...){
  if (x$T$method == "parametric"){
    cat("\nEstimate:")
    cat("\n  Threshold: ", x$T$thres)
    if (!is.null(x$CI)){
      cat("\n")
      cat("\nConfidence intervals (parametric bootstrap):")
      cat("\n  CI based on normal distribution:", x$CI$low.norm, " - ", x$CI$up.norm)
      cat("\n  CI based on percentiles:", x$CI$low.perc, " - ", x$CI$up.perc)
      cat("\n  Bootstrap resamples:", x$CI$B)
    }
    cat("\n")
    cat("\nParameters used:")
    cat("\n  Disease prevalence:", x$T$prev)
    cat("\n  Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
    cat("\n  R:", x$T$R)
    if (!is.null(x$CI)){
      cat("\n  Significance Level: ", x$CI$alpha)
    }
    cat("\n  Method:", x$T$method)
    cat("\n  Distribution assumed for the healthy sample: ", x$T$dist1, "(", round(x$T$pars1[1], 2), ", ", round(x$T$pars1[2], 2), ")", sep="")
    cat("\n  Distribution assumed for the diseased sample: ", x$T$dist2, "(", round(x$T$pars2[1], 2), ", ", round(x$T$pars2[2], 2), ")", sep="")
    cat("\n")
  }
  if (x$T$method == "empirical"){
    cat("\nEstimate:")
    cat("\n  Threshold: ", x$T$thres)
    cat("\n  Minimum Cost: ", x$T$cost)
    if (!is.null(x$CI)){
      cat("\n")
      cat("\nConfidence intervals (bootstrap):")
      cat("\n  CI based on normal distribution:", x$CI$low.norm, " - ", x$CI$up.norm)
      cat("\n  CI based on percentiles:", x$CI$low.perc, " - ", x$CI$up.perc)
      cat("\n  Bootstrap resamples:", x$CI$B)
    }
    cat("\n")
    cat("\nParameters used:")
    cat("\n  Disease prevalence:", x$T$prev)
    cat("\n  Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
    cat("\n  R:", x$T$R)
    cat("\n  Method:", x$T$method)
    if (!is.null(x$CI)){
      cat("\n  Significance Level:", x$CI$alpha)
    }
    cat("\n")
  }
  if (x$T$method == "smooth"){
    cat("\nEstimate:")
    cat("\n  Threshold: ", x$T$thres)
    if (!is.null(x$CI)){
      cat("\n")
      cat("\nConfidence intervals (bootstrap):")
      cat("\n  CI based on normal distribution:", x$CI$low.norm, " - ", x$CI$up.norm)
      cat("\n  CI based on percentiles:", x$CI$low.perc, " - ", x$CI$up.perc)
      cat("\n  Bootstrap resamples:", x$CI$B)
    }
    cat("\n")
    cat("\nParameters used:")
    cat("\n  Disease prevalence:", x$T$prev)
    cat("\n  Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
    cat("\n  R:", x$T$R)
    cat("\n  Method:", x$T$method)
    if (!is.null(x$CI)){
      cat("\n  Significance Level:", x$CI$alpha)
    }
    cat("\n")
  }
  if (x$T$method == "equal" | x$T$method == "unequal"){
    cat("\nEstimate:")
    cat("\n  Threshold: ", x$T$thres)
    cat("\n")
    if (!is.null(x$CI)){
      if(x$CI$ci.method == "delta"){
        cat("\nConfidence interval (delta method):")
        cat("\n  Lower Limit:", x$CI$lower)
        cat("\n  Upper Limit:", x$CI$upper)
        cat("\n")
      }
      if(x$CI$ci.method == "boot"){
        cat("\nConfidence intervals (bootstrap):")
        cat("\n  CI based on normal distribution: ", x$CI$low.norm, " - ", x$CI$up.norm)
        cat("\n  CI based on percentiles: ", x$CI$low.perc, " - ", x$CI$up.perc)
        cat("\n  Bootstrap resamples: ", x$CI$B)
        cat("\n")
      }
    }
    cat("\nParameters used:")
    cat("\n  Disease prevalence:", x$T$prev)
    cat("\n  Costs (Ctp, Cfp, Ctn, Cfn):", x$T$costs)
    cat("\n  R:", x$T$R)
    cat("\n  Method:", x$T$method)
    if (!is.null(x$CI)){
      cat("\n  Significance Level: ", x$CI$alpha)
    }
    cat("\n")
  }
}


##########################################################################
#########         GETTING PARAMETERS OF A DISTRIBUTION THROUGH fitdistr() [library MASS]
##########################################################################
getParams <- function(k, dist){
  if (dist %in% c("cauchy", "gamma", "weibull")){
    pars <- fitdistr(k, dist)$estimate
  }else if(dist=="beta"){
    # needs initial values
    sigma2 <- var(k)
    mu <- mean(k)
    shape1.start <- ((1-mu)/sigma2-1/mu)*mu^2
    shape2.start <- shape1.start*(1/mu-1)
    pars <- fitdistr(k, "beta", start=list(shape1=shape1.start, shape2=shape2.start))$estimate
  }else if(dist=="chisq"){
    # needs initial values
    sigma2 <- var(k)
    mu <- mean(k)
    ncp.start <- sigma2/2-mu
    df.start <- mu-ncp.start
    pars <- fitdistr(k, "chi-squared", start=list(df=df.start, ncp=ncp.start))$estimate
  }else if(dist=="lnorm"){
    pars <- fitdistr(k, "lognormal")$estimate
  }else if(dist=="logis"){
    pars <- fitdistr(k, "logistic")$estimate
  }else if(dist=="norm"){
    pars <- fitdistr(k, "normal")$estimate
  }
  out <- pars
  return(out)
}


##########################################################################
#########         THRESHOLD COMPUTATION
##########################################################################
##### arguments: k1=vector containing the healthy sample values 
#####  	   	     k2=vector containing the diseased sample values
#####  	   	     rho=disease prevalence
#####		   	     costs=cost matrix
#####            method=method used in the estimation. The user can choose between:
#####              "equal": assumes binormality and equal variances
#####              "unequal": assumes binormality and unequal variances
#####              "empirical": leaves out any distributional assumption
#####              "parametric": based on the distribution assumed for the two populations.
#####              The user must specify these distributions through parameters 'dist1'
#####              (for the healthy population) and 'dist2' (for the healthy population).
#####              They can be chosen between the following 2-parameter distributions:
#####              "beta", "cauchy", "chisq" (chi-squared), "gamma", "lnorm" (lognormal),
#####              "logis" (logistic), "norm" (normal) and "weibull".
#####              Its parameters are estimated from the samples k1 and k2. Uses 'thresTH2()'.
#####            dist1, dist2: distribution to be assumed for healthy and disease populations,
#####            respectively. Ignored when method!="parametric".
#####            Default, "equal". The user can specify just the initial letters.
#####            ci=compute confidence interval? default, TRUE.
#####            ci.method=method to be used for the confidence intervals computation. The user can
#####            choose between:
#####              "delta": delta method is used to estimate the threshold standard error
#####              assuming a binormal underlying model.
#####              "boot": the confidence interval is computed by bootstrap.
#####            Default, "delta". The user can specify just just the initial letters.
#####            B=number of bootstrap resamples when ci.method="boot". Otherwise, ignored.
#####            Default, 1000.
#####  	   	     alpha=significance level for the confidence interval. Default, 0.05.
#####            extra.info=when using method="empirical", if set to T it returns
#####            extra information about the computation of the threshold. Ignored when method!="empirical".
#####            na.rm=a logical value indicating whether NA values in k1 and k2 should
#####            be stripped before the computation proceeds. Default, FALSE.
##### value: the threshold estimated
##########################################################################
thres2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), R=NULL, method=c("equal", "unequal", "empirical", "smooth", "parametric"), dist1=NULL, dist2=NULL, ci=TRUE, ci.method=c("delta", "boot"), B=1000, alpha=0.05, extra.info=FALSE, na.rm=FALSE, q1=0.05, q2=0.95){
  # error handling
  if (!(rho > 0 & rho < 1)){
    stop("The disease prevalence 'rho' must be a number in (0,1)")
  }
  # if (!is.matrix(costs)){
  #   stop("'costs' must be a matrix")
  # }
  # if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
  #   stop("'costs' must be a 2x2 matrix")
  # }
  if (!is.numeric(k1) | !is.numeric(k2)){
    stop("'k1' and 'k2' must be numeric vectors")
  }
  if (!is.logical(ci) | is.na(ci)){
    stop("'ci' must be TRUE or FALSE")
  }
  # costs or R
  if (is.null(costs) & is.null(R)){
    stop("Both 'costs' and 'R' are NULL. Please specify one of them.")
  }else if (!is.null(costs) & !is.null(R)){
    stop("Either 'costs' or 'R' must be NULL")
  }else if (is.null(costs) & !is.null(R)){
    if (!is.numeric(R) | length(R)!=1){
      stop("R must be a single number")
    }
    costs <- matrix(c(0, 0, 1, (1-rho)/(R*rho)), 2, 2, byrow=TRUE)
  }else if (!is.null(costs) & is.null(R)){
    if (!is.matrix(costs)){
      stop("'costs' must be a matrix")
    }
    if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
      stop("'costs' must be a 2x2 matrix")
    }
  }
  
  # NAs handling
  if (na.rm){
    k1 <- k1[!is.na(k1)]
    k2 <- k2[!is.na(k2)]
  }
  # function
  method <- match.arg(method)
  ci.method <- match.arg(ci.method)
  if (method=="equal"){
    T <- thresEq2(k1, k2, rho, costs)
    if (ci){
      if (ci.method=="delta"){
        CI <- icDeltaEq2(k1, k2, rho, costs, T$thres, a=alpha)
      }
      if (ci.method=="boot"){
        CI <- icBootEq2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
      }
    }else{
      CI <- NULL
    }
  }
  if (method=="unequal"){
    T <- thresUn2(k1, k2, rho, costs)
    if (ci){
      if (ci.method=="delta"){
        CI <- icDeltaUn2(k1, k2, rho, costs, T$thres, a=alpha)
      }
      if (ci.method=="boot"){
        CI <- icBootUn2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
      }
    }else{
      CI <- NULL
    }
  }
  if (method=="empirical"){
    if (ci.method=="delta" & ci){
      stop("When method='empirical', CIs cannot be computed based on delta method (choose ci.method='boot')")
    }
    T <- thresEmp2(k1, k2, rho, costs, extra.info)
    if (ci){
      CI <- icEmp2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
    }else{
      CI <- NULL
    }
  }
  if (method=="smooth"){
    if (ci.method=="delta" & ci){
      stop("When method='smooth', CIs cannot be computed based on delta method (choose ci.method='boot')")
    }
    T <- thresSmooth2(k1, k2, rho, costs)
    if (ci){
      CI <- icSmooth2(k1, k2, rho, costs, T$thres, B=B, a=alpha)
    }else{
      CI <- NULL
    }
  }
  if (method=="parametric"){
    # error handling
    if (ci.method=="delta" & ci){
      stop("When method='parametric', CIs cannot be computed based on delta method (choose ci.method='boot')")
    }
    if (is.null(dist1) | is.null(dist2)){
      stop("When method='parametric', 'dist1' and 'dist2' must be specified")
    }
    if (dist1=="norm" & dist2=="norm"){
      stop("When assuming a binormal distribution, choose method='equal' or 'unequal'")
    }
    if (!(dist1 %in% c("beta", "cauchy", "chisq", "gamma", "lnorm", "logis", "nbinom", "norm", "weibull"))){
      stop("Unsupported distribution for 'dist1'")
    }
    if (!(dist2 %in% c("beta", "cauchy", "chisq", "gamma", "lnorm", "logis", "nbinom", "norm", "weibull"))){
      stop("Unsupported distribution for 'dist2'")
    }
    # parameter estimation through 'fitdistr()'
    # dist1
    pars1 <- getParams(k1, dist1)
    # dist2
    pars2 <- getParams(k2, dist2)
    # threshold+CI estimation
    T <- thresTH2(dist1, dist2, pars1[1], pars1[2], pars2[1], pars2[2], rho, costs, q1=q1, q2=q2)
    T <- unclass(T)
    T$method <- "parametric"
    # adding some information to the output
    T$k1 <- k1
    T$k2 <- k2
    T$dist1 <- dist1
    T$dist2 <- dist2
    T$pars1 <- pars1
    T$pars2 <- pars2
    # confidence interval by parametric bootstrap
    if (ci){
      CI <- icBootTH(dist1, dist2, pars1[1], pars1[2], pars2[1], pars2[2], length(k1), length(k2), rho, costs, T$thres, B=B, a=alpha)
    }else{
      CI <- NULL
    }
  }
  out <- list(T=T, CI=CI)
  class(out) <- "thres2"
  return(out)
}


##########################################################################
############      VARIANCE ESTIMATORS
############	    EQUAL VARIANCES
##########################################################################
############       FUNCTION OF ML VARIANCE OF VARIANCE ESTIMATOR
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
##### value: Maximum-Likelihood variance of variance estimator (equal variances assumed)
##########################################################################
varVarEq <- function(k1, k2){
  n1 <- length(k1)
  n2 <- length(k2)
  est <- 2*(varPooled(k1,k2))^2/(n1 + n2 - 1)
  return(est)
}
##########################################################################
############       FUNCTION OF Maximum Likelihood VARIANCE OF MEAN ESTIMATOR  (DIS+NON.DIS)
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      t=size of the sample whose mean estimator variance we wish to estimate 
##### value: Maximum-Likelihood variance of mean estimator (equal variances assumed)
##########################################################################
varMeanEq <- function(k1, k2, t){
  est <- varPooled(k1, k2)/t
  return(est)
}

##########################################################################
##########################################################################
############       PARTIAL DERIVATIVES... (Skaltsa et al 2010)
##########################################################################
############       ...OF COMMON VARIANCE
##### arguments:  k1=vector containing the healthy sample values 
#####		          k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased or healthy population
#####              variance
##########################################################################
derVarEq <- function(k1, k2, rho, costs){
  est <- log(slope(rho,costs))/(mean(k2) - mean(k1))
  return(est)
}

##########################################################################
############       ...OF DISEASED MEAN
##### arguments:  k1=vector containing the healthy sample values 
#####		        	k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population mean
##########################################################################
derMeanDisEq <- function(k1, k2, rho, costs){
  est <- 1/2 - (varPooled(k1,k2)*log(slope(rho,costs)))/((mean(k2) - mean(k1))^2)
  return(est)
}

###################################################################################
############      ...OF NON-DISEASED MEAN
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population variance
##################################################################################
derMeanNDisEq <- function(k1, k2, rho, costs){
  est <- 1/2 + (varPooled(k1,k2)*log(slope(rho,costs)))/((mean(k2) - mean(k1))^2)
  return(est)
}

##################################################################################
############ 1) FUNCTION OF VARIANCE OF THRESHOLD (EQUAL VARIANCES)
############       DELTA METHOD
##### arguments:  k1=vector containing the healthy sample values 
#####		        	k2=vector containing the diseased sample values 
#####		         	rho=diseased sample size
#####		  	      costs=cost matrix
##### value: delta method threshold variance (assuming equal variances)
##################################################################################
varDeltaEq2 <- function(k1, k2, rho, costs){
  if(mean(k1) > mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  # n's
  n1 <- length(k1)
  n2 <- length(k2)
  # matrices
  d <- matrix(c(derMeanNDisEq(k1, k2, rho, costs), derMeanDisEq(k1, k2, rho, costs), derVarEq(k1, k2, rho, costs)), byrow=T, ncol=3, nrow=1)
  sigma <- matrix(c(varMeanEq(k1, k2, n2), 0, 0, 0, varMeanEq(k1, k2, n1), 0, 0, 0, varVarEq(k1, k2)), byrow=T, ncol=3, nrow=3)
  # Var(T)=d*sigma*d^T, Skaltsa et al 2010
  est <- d%*%sigma%*%t(d)
  return(est)
}

##################################################################################
############	 CONFIDENCE INTERVAL FUNCTION 1 - EQUAL VARIANCES
############       DELTA METHOD-NORMAL APPROXIMATION
##### arguments:  	k1=vector containing the healthy sample values 
#####		   	        k2=vector containing the diseased sample values 
#####		   	        rho=diseased sample size
#####		  	        costs=cost matrix
#####               Thres=threshold point estimate
#####		   	        a=significance level
##### value: confidence interval limits
##################################################################################
icDeltaEq2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, a=0.05){
  stdev <- sqrt(varDeltaEq2(k1, k2, rho, costs))  
  ic1 <- Thres + qnorm(a/2)*stdev
  ic2 <- Thres + qnorm(1-a/2)*stdev  
  ic <- list(lower=ic1, upper=ic2, se=stdev, alpha=a, ci.method="delta")
  return(ic)
}





##################################################################################
############       VARIANCES OF ESTIMATORS
############       UNEQUAL VARIANCES
##################################################################################
############       FUNCTION OF ML VARIANCE OF VARIANCE ESTIMATOR
##### arguments:  k=vector containing the sample values
##### 		        t=sample size
##### value: Maximum-Likelihood variance of variance estimator (unequal variances assumed)
##################################################################################
varVarUn <- function(k, t){
  est <- 2*(var(k))^2/(t - 1)
  return(est)
} 
##################################################################################
############       FUNCTION OF ML VARIANCE OF MEAN ESTIMATOR  (DIS+NON.DIS)
##### arguments:  k=vector containing the sample values
##### 		        t=sample size
##### value: Maximum-Likelihood variance of mean estimator (unequal variances assumed)
##################################################################################
varMeanUn <- function(k, t){
  est <- var(k)/t
  return(est)
}

#################################################################################
############       PARTIAL DERIVATIVES...
#################################################################################
############       ...OF DISEASED MEAN
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      rho=diseased sample size
#####		  	      costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population mean
#####              (assuming unequal variances)
#################################################################################
derMeanDisUn <- function(k1, k2, rho, costs){
  est <- (sd(k2)*sd(k1)*(mean(k2) - mean(k1))/sqrt((mean(k2) - mean(k1))^2+2*(var(k2) - var(k1))*log(sd(k2)*slope(rho,costs)/sd(k1))) - var(k1))/(var(k2) - var(k1)) 
  return(est)
}

#################################################################################
############       ...OF NON-DISEASED MEAN
##### arguments:  k1=vector containing the healthy sample values 
#####		   	k2= vector containing the diseased sample values 
#####		   	rho=diseased sample size
#####		  	costs=cost matrix
##### value: threshold estimator partial derivative with respect to the healthy population mean
#####              (assuming unequal variances)
#################################################################################
derMeanNDisUn <- function(k1, k2, rho, costs){
  est <- (var(k2) - sd(k2)*sd(k1)*(mean(k2) - mean(k1))/sqrt((mean(k2) - mean(k1))^2+2*(var(k2) - var(k1))*log(sd(k2)*slope(rho,costs)/sd(k1))))/(var(k2) - var(k1))
  return(est)
}

#################################################################################
############       ...OF DISEASED VARIANCE
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: threshold estimator partial derivative with respect to the diseased population variance
#####              (assuming unequal variances)
#################################################################################
derVarDisUn <- function(k1, k2, rho, costs){
  beta <- slope(rho,costs)
  est <- (sd(k1)*sd(k2)*((var(k2) - var(k1))/var(k2) + 2*log(beta*sd(k2)/sd(k1)))/(2*sqrt(2*log(beta*sd(k2)/sd(k1))*(var(k2) - var(k1))+(mean(k2) - mean(k1))^2)) + sd(k1)*sqrt(2*log(beta*sd(k2)/sd(k1))*(var(k2) - var(k1)) + (mean(k2) - mean(k1))^2)/(2*sd(k2)) + mean(k1)) /(var(k2) - var(k1)) - (sd(k1)*sd(k2)*sqrt(2*log(beta*sd(k2)/sd(k1))*(var(k2) - var(k1))+(mean(k2)-mean(k1))^2)+mean(k1)*var(k2)-mean(k2)*var(k1))/(var(k2)-var(k1))^2 
  return(est)
}

#################################################################################
############       ...OF NON-DISEASED VARIANCE
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: threshold estimator partial derivative with respect to the healthy population variance
#####              (assuming unequal variances)
#################################################################################
derVarNDisUn <- function(k1, k2, rho, costs){
  beta <- slope(rho, costs)
  est <- (-mean(k2)*var(k1)+sd(k2)*sqrt(2*log(sd(k2)*beta/sd(k1))*(var(k2)-var(k1))+(mean(k2)-mean(k1))^2)*sd(k1)+mean(k1)*var(k2)) /(var(k2)-var(k1))^2 +(sd(k2)*(-(var(k2)-var(k1))/var(k1)-2*log(sd(k2)*beta/sd(k1)))*sd(k1) /(2*sqrt(2*log(sd(k2)*beta/sd(k1))*(var(k2)-var(k1))+(mean(k2)-mean(k1))^2)) +sd(k2)*sqrt(2*log(sd(k2)*beta/sd(k1))*(var(k2)-var(k1))+(mean(k2)-mean(k1))^2)/(2*sd(k1))-mean(k2)) /(var(k2)-var(k1)) 
  return(est)
}

#################################################################################
############ 2) FUNCTION OF VARIANCE OF THRESHOLD (UNEQUAL VARIANCES)
############       DELTA METHOD
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: delta method threshold variance (assuming unequal variances)
#################################################################################
varDeltaUn2 <- function(k1, k2, rho, costs){
  if(mean(k1) > mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  # n's
  n1 <- length(k1)
  n2 <- length(k2)
  ctrl <- sqroot(k1,k2,rho,costs) 	
  if(ctrl < 0){
    est <- NA
    warning("Negative discriminant; cannot solve the second-degree equation")
  }else{
    # matrices
    d <- matrix(c(derMeanNDisUn(k1, k2, rho, costs), derMeanDisUn(k1, k2, rho, costs), derVarNDisUn(k1, k2, rho, costs), derVarDisUn(k1, k2, rho, costs)), byrow=T, ncol=4, nrow=1)
    sigma <- matrix(c(varMeanUn(k1, n1), 0, 0, 0, 0, varMeanUn(k2, n2), 0, 0, 0, 0, varVarUn(k1, n1), 0, 0, 0, 0, varVarUn(k2, n2)), byrow=T, ncol=4, nrow=4)
    # Var(T) following Skaltsa et al 2010
    est <- d%*%sigma%*%t(d)
  }
  return(est)
}

#################################################################################
############ 	 FUNCTION OF CONFIDENCE INTERVAL 1 - UNEQUAL
############       DELTA METHOD-NORMAL APPROXIMATION
##### arguments:  k1=vector containing the healthy sample values 
#####		         	k2=vector containing the diseased sample values 
#####		   	      rho=diseased sample size
#####		  	      costs=cost matrix
#####		   	      a=significance level
##### value: confidence interval limits
##################################################################################
icDeltaUn2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, a=0.05){
  stdev <- sqrt(varDeltaUn2(k1, k2, rho, costs))	
  ic1 <- Thres + qnorm(a/2)*stdev
  ic2 <- Thres + qnorm(1-a/2)*stdev
  ic <- list(lower=ic1, upper=ic2, se=stdev, alpha=a, ci.method="delta")  
  return(ic)
}





################################################################################
############       DATA GENERATION
############ 	 NON-PARAMETRIC RESAMPLING
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      B=number of bootstrap resamples
##### value: two-object list [[1]]: healthy resample matrix, [[2]]: diseased resample matrix
################################################################################
resample2 <- function(k1, k2, B){
  n1 <- length(k1)
  n2 <- length(k2)
  t0 <- matrix(sample(k1, n1*B, replace=TRUE), nrow=n1) # B resamples of k1 with replacement
  t1 <- matrix(sample(k2, n2*B, replace=TRUE), nrow=n2) # B resamples of k2 with replacement
  t <- list(t0, t1)
  return(t)
}

###################################################################################
###################################################################################
############          CONFIDENCE INTERVALS
###################################################################################
############               BOOTSTRAP
###################################################################################
###################################################################################
###############    FUNCTION OF BOOTSTRAP CONFIDENCE INTERVALS 
###############    EQUAL VARIANCES
##########              1) BOOTSTRAP VERSION OF S.E.- NORMAL
##########              2) PERCENTILE BIAS-CORRECTED
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      B=number of bootstrap resamples
#####		   	      a=significance level
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### returns: the 2 interval limits
##################################################################################
icBootEq2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, B, a=0.05){
  if(mean(k1) > mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  # resamples
  t <- resample2(k1, k2, B)
  t0 <- t[[1]]
  t1 <- t[[2]]
  
  # bootstrap
  cut <- sapply(1:B,function(i){thresEq2(t0[,i],t1[,i],rho,costs)[[1]]})
  est.se <- sd(cut)
  
  ###### 1) NORMAL-BOOTSTRAP SE
  norm.bootSE <- c(Thres + qnorm(a/2)*est.se, Thres + qnorm(1-a/2)*est.se)
  
  ###### 2) PERCENTILE
  percentil <- (c(quantile(cut,a/2), quantile(cut,1-a/2)))
  
  # results
  re <- list(low.norm=norm.bootSE[1], up.norm=norm.bootSE[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
  return(re)
}


###################################################################################
###################################################################################
###############     BOOTSTRAP CONFIDENCE INTERVAL FUNCTION
###############     UNEQUAL VARIANCES
##########               1) BOOTSTRAP VERSION OF S.E.- NORMAL
##########               2) PERCENTILE
##### arguments:   k1=vector containing the healthy sample values 
#####		   	       k2=vector containing the diseased sample values 
#####		   	       B=number of bootstrap resamples
#####		   	       a=significance level
#####		   	       rho=disease prevalence
#####		  	       costs=cost matrix
##### value: the 2 interval limits
##################################################################################
icBootUn2 <- function(k1, k2, rho, costs=matrix(c(0, 0, 1, (1-rho)/rho), 2, 2, byrow=TRUE), Thres, B, a=0.05){
  if(mean(k1) > mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  
  t <- resample2(k1, k2, B)
  t0 <- t[[1]]
  t1 <- t[[2]]
  
  cut <- sapply(1:B,function(i){thresUn2(t0[,i],t1[,i],rho,costs)[[1]]})
  
  est.se <- sd(na.omit(cut))
  
  ###### 1) NORMAL-BOOTSTRAP SE
  norm.bootSE <- c(Thres + qnorm(a/2)*est.se, Thres + qnorm(1-a/2)*est.se)
  
  ###### 2) PERCENTILE
  percentil <- (c(quantile(na.omit(cut),a/2), quantile(na.omit(cut),1-a/2)))
  
  # results
  re <- list(low.norm=norm.bootSE[1], up.norm=norm.bootSE[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
  return(re)
}





##################################################################################
##################################################################################
################    BOOTSTRAP CONFIDENCE INTERVAL
################    EMPIRICAL
##################################################################################
############           1) BOOTSTRAP VERSION OF S.E.- NORMAL
############           2) BOOTSTRAP PERCENTILE
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      B=number of bootstrap resamples
#####		   	      a=significance level
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: the 2 interval limits
##################################################################################
icEmp2 <- function(k1,k2,rho,costs=matrix(c(0,0,1,(1-rho)/rho),2,2, byrow=TRUE),Thres,B=500,a=0.05){
  if(mean(k1)>mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  
  t <- resample2(k1, k2, B)
  t0 <- t[[1]]
  t1 <- t[[2]]
  
  cut <- rep(NA,B)	
  cut <- suppressWarnings(sapply(1:B,function(j){thresEmp2(t0[,j],t1[,j],rho,costs)[[1]]}))

  est.se <- sd(cut)
  
  ###### 1) NORMAL-BOOTSTRAP SE
  norm <- c(Thres+qnorm(a/2)*est.se, Thres+qnorm(1-a/2)*est.se)
  
  ###### 2) PERCENTILE
  percentil <- (c(quantile(cut,a/2), quantile(cut,1-a/2)))
  
  # results
  re <- list(low.norm=norm[1], up.norm=norm[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
  return(re)
}

##################################################################################
##################################################################################
################    BOOTSTRAP CONFIDENCE INTERVAL
################    SMOOTH
##################################################################################
############           1) BOOTSTRAP VERSION OF S.E.- NORMAL
############           2) BOOTSTRAP PERCENTILE
##### arguments:  k1=vector containing the healthy sample values 
#####		   	      k2=vector containing the diseased sample values 
#####		   	      B=number of bootstrap resamples
#####		   	      a=significance level
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### value: the 2 interval limits
##################################################################################
icSmooth2 <- function(k1,k2,rho,costs=matrix(c(0,0,1,(1-rho)/rho),2,2, byrow=TRUE),Thres,B=500,a=0.05){
  if(mean(k1)>mean(k2)){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- k1; k1 <- k2; k2 <- g
  }
  
  t <- resample2(k1, k2, B)
  t0 <- t[[1]]
  t1 <- t[[2]]
  
  cut <- rep(NA,B)	
  cut <- suppressWarnings(sapply(1:B,function(j){thresSmooth2(t0[,j],t1[,j],rho,costs)[[1]]}))
  
  est.se <- sd(cut)
  
  ###### 1) NORMAL-BOOTSTRAP SE
  norm <- c(Thres+qnorm(a/2)*est.se, Thres+qnorm(1-a/2)*est.se)
  
  ###### 2) PERCENTILE
  percentil <- (c(quantile(cut,a/2), quantile(cut,1-a/2)))
  
  # results
  re <- list(low.norm=norm[1], up.norm=norm[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
  return(re)
}





################################################################################
############       DATA GENERATION
############    PARAMETRIC RESAMPLING
##### arguments:  dist1, dist2=distributions assumed for the healthy and diseased population, respectively. 
#####  	   	      B=number of bootstrap resamples
##### value: two-object list [[1]]:healthy resample matrix, [[2]]:diseased resample matrix
################################################################################
aux.par.boot <- function(dist1, dist2, par1.1, par1.2, par2.1, par2.2, n1, n2, B){
  t0 <- matrix(rand(dist1)(n1*B, par1.1, par1.2), nrow=n1) # B resamples of dist1(par1.1, par1.2) with replacement
  t1 <- matrix(rand(dist2)(n2*B, par2.1, par2.2), nrow=n2) # B resamples of dist2(par2.1, par2.2) with replacement
  t <- list(t0,t1)
  return(t)
}

###################################################################################
###################################################################################
###############    FUNCTION OF PARAMETRIC BOOTSTRAP FOR THEORETICAL METHOD
##########              1) BOOTSTRAP VERSION OF S.E.- NORMAL
##########              2) PERCENTILE BIAS-CORRECTED
##### arguments:  dist1, dist2=distributions assumed for the healthy and diseased population, respectively. 
#####             par1.1, par1.2, par2.1, par2.2, n1, n2=parameters and sample sizes
#####		   	      B=number of bootstrap resamples
#####		   	      a=significance level
#####		   	      rho=disease prevalence
#####		  	      costs=cost matrix
##### returns: the 2 interval limits
##################################################################################
icBootTH <- function(dist1, dist2, par1.1, par1.2, par2.1, par2.2, n1, n2, rho, costs=matrix(c(0,0,1,(1-rho)/rho), 2, 2, byrow=TRUE), Thres, B=500, a=0.05){
  median1 <- quant(dist1)(0.5, par1.1, par1.2)
  median2 <- quant(dist2)(0.5, par2.1, par2.2)
  if(median1 > median2){
    rho <- 1-rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- par2.1; par2.1 <- par1.1; par1.1 <- g
    f <- par2.2; par2.2 <- par1.2; par1.2 <- f
    auxdist <- dist2; dist2 <- dist1; dist1 <- auxdist
  }
  # bootstrap resamples
  t <- aux.par.boot(dist1, dist2, par1.1, par1.2, par2.1, par2.2, n1, n2, B)
  t0 <- t[[1]]
  t1 <- t[[2]]
  # computing threshold of the resamples...
  # 1) parameter estimation for each resample
  pars1 <- sapply(1:B, function(i){getParams(t0[, i], dist1)})
  pars2 <- sapply(1:B, function(i){getParams(t1[, i], dist2)})
  # 2) TH cut
  cut <- sapply(1:B,function(i){thresTH2(dist1, dist2, pars1[1, i], pars1[2, i], pars2[1, i], pars2[2, i], rho, costs)[[1]]})
  # sd
  est.se <- sd(na.omit(cut))
  
  ###### 1) NORMAL-BOOTSTRAP SE
  norm.bootSE <- c(Thres + qnorm(a/2)*est.se, Thres + qnorm(1-a/2)*est.se)
  
  ###### 2) PERCENTILE
  percentil <- (c(quantile(na.omit(cut),a/2), quantile(na.omit(cut),1-a/2)))
  
  # results
  re <- list(low.norm=norm.bootSE[1], up.norm=norm.bootSE[2], se=est.se, low.perc=percentil[1], up.perc=percentil[2], alpha=a, B=B, ci.method="boot")
  return(re)
}



##################################################################################
###### 	PLOTCOSTROC ("thres2" and "thres3" classes)
##################################################################################
######		COST FUNCTION PLOT + ROC CURVE, OPTIMAL THRESHOLD, SENSITIVITY, SPECIFICITY (2-state)
######  2-states: plotCost function provides two graphics:
######  1) the cost function with the cost minimising threshold in red
######  2) the ROC curve with the sensitivity and specificity achieved in red
######  3-states: plotCost function provides two graphics:
######  The contribution of both thresholds to the cost function, with the minimising thresholds in res
######  arguments:
######        x: 'thres2' or 'thres3' object 
######        type: type of pline for plots
##################################################################################
plotCostROC <- function(x, type="l", which.plot=c("both", "cost", "roc"), ...){
  which.plot <- match.arg(which.plot)
  # error handling
  if (!inherits(x, c("thres2", "thres3"))){
    stop("'x' must be a 'thres2' or 'thres3' object")
  }
  oldpar <- par(no.readonly=TRUE)
  on.exit(par(oldpar))
  if (inherits(x, "thres2")){ # 2 states
    if (x$T$method == "smooth"){
      k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
      # changes
      changed <- FALSE
      if (mean(k1)>mean(k2)){
        rho <- 1-rho
        costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
        g <- k1; k1 <- k2; k2 <- g
        changed <- TRUE
      }
      # plot limits
      k <- c(x$T$k1, x$T$k2)
      rangex <- range(k)
      xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
      if (which.plot %in% c("both", "cost")){
        # plot 1: cost function
        y <- suppressWarnings(thres2(x$T$k1, x$T$k2, x$T$prev, x$T$costs, method="empirical", extra.info=TRUE, ci=FALSE))
        xx <- y$T$tot.thres
        yy <- y$T$tot.cost
        lo <- loess(yy~xx)
        pred <- predict(lo)
        plot(xx, yy, xlim=xlim, ylim=c(min(pred), max(pred)), type="n", xlab="t", ylab="cost(t)")
        lines(xx, pred, ...)
        points(x$T$thres, predict(lo, x$T$thres), col=2, pch=19)
      }
      if (which.plot %in% c("both", "roc")){
        if (which.plot=="both") par(ask=T)
        # plot 2: ROC curve
        CUT <- x$T$thres
        if (!changed){
          resp.CUT <- ifelse(k<CUT, 0, 1)
        }else{
          resp.CUT <- ifelse(k>CUT, 0, 1)
        }   
        resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
        resp.CUT <- factor(resp.CUT, c("0", "1"))
        roc <- smooth(roc(response=resp, predictor=k, quiet=TRUE))
        plot(roc)
        if (which.plot=="both") par(ask=F)
      }
    }
    if (x$T$method == "empirical"){
      if (length(x$T) != 14){
        stop("use argument 'extra.info = TRUE' in 'thres2()'")
      }
      # plots
      if (which.plot %in% c("both", "cost")){
        # plot 1: empirical cost function
        plot(x$T$tot.thres, x$T$tot.cost, xlab="Threshold", ylab="Cost", main="Empirical Cost function", type=type, ...)
        points(x$T$thres, x$T$cost, col=2, pch=19)
      }
      if (which.plot %in% c("both", "roc")){
        if (which.plot=="both") par(ask=T)
        # plot 2: empirical ROC curve
        plot(1-x$T$tot.spec,x$T$tot.sens, main="Empirical ROC curve", xlab="1-Specificity", ylab="Sensitivity", type=type, ...)
        points(1-x$T$spec, x$T$sens, col=2, pch=19)
        if (which.plot=="both") par(ask=F)
      }
    }
    if (x$T$method =="equal"){
      k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
      # changes
      changed <- FALSE
      if (mean(k1)>mean(k2)){
        rho <- 1-rho
        costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
        g <- k1; k1 <- k2; k2 <- g
        changed <- TRUE
      }
      # plot limits
      k <- c(x$T$k1, x$T$k2)
      rangex <- range(k)
      xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
      # cost function
      cost <- function(t){
        sd <- sqrt(varPooled(k1, k2))
        TP <- (1-pnorm(t, mean(k2), sd))*rho
        FN <- (pnorm(t, mean(k2), sd))*rho
        FP <- (1-pnorm(t, mean(k1), sd))*(1-rho)
        TN <- (pnorm(t, mean(k1), sd))*(1-rho)
        n <- length(k1)+length(k2)
        C <- n*(TP*costs[1,1]+FN*costs[2, 2]+FP*costs[2, 1]+TN*costs[1, 2])
        return(as.numeric(C))
      }
      if (which.plot %in% c("both", "cost")){
        # plot 1: cost function
        plot(cost, xlim=xlim, type=type, xlab="t", ylab="cost(t)", ...)
        points(x$T$thres, cost(x$T$thres), col=2, pch=19)
      }
      if (which.plot %in% c("both", "roc")){
        if (which.plot=="both") par(ask=T)
        # plot 2: ROC curve
        CUT <- x$T$thres
        if (!changed){
          resp.CUT <- ifelse(k<CUT, 0, 1)
        }else{
          resp.CUT <- ifelse(k>CUT, 0, 1)
        }   
        resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
        resp.CUT <- factor(resp.CUT, c("0", "1"))
        roc <- roc(response=resp, predictor=k, quiet=TRUE)
        plot(roc)
        # add sens and spec given by the threshold
        tab <- table(resp.CUT, resp)[2:1, 2:1]
        SENS <- tab[1, 1]/(tab[1, 1]+tab[2, 1])
        SPEC <- tab[2, 2]/(tab[2, 2]+tab[1, 2])
        points(SPEC, SENS, col=2, pch=19)       
        if (which.plot=="both") par(ask=F)
      }
    }
    if (x$T$method=="unequal"){
      k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
      # changes
      changed <- FALSE
      if (mean(k1)>mean(k2)){
        rho <- 1-rho
        costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
        g <- k1; k1 <- k2; k2 <- g
        changed <- TRUE
      }
      # plot limits
      k <- c(x$T$k1, x$T$k2)
      rangex <- range(k)
      xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
      # cost function
      cost <- function(t){
        sd1 <- sd(k1)
        sd2 <- sd(k2)
        TP <- (1-pnorm(t, mean(k2), sd2))*rho
        FN <- (pnorm(t, mean(k2), sd2))*rho
        FP <- (1-pnorm(t, mean(k1), sd1))*(1-rho)
        TN <- (pnorm(t, mean(k1), sd1))*(1-rho)
        n <- length(k1)+length(k2)
        C <- n*(TP*costs[1,1]+FN*costs[2, 2]+FP*costs[2, 1]+TN*costs[1, 2])
        return(as.numeric(C))
      }
      if (which.plot %in% c("both", "cost")){
        # plot 1: cost function
        plot(cost, xlim=xlim, type=type, xlab="t", ylab="cost(t)", ...)
        points(x$T$thres, cost(x$T$thres), col=2, pch=19)
      }
      if (which.plot %in% c("both", "roc")){
        if (which.plot=="both") par(ask=T)
          # plot 2: ROC curve
        CUT <- x$T$thres
        if (!changed){
          resp.CUT <- ifelse(k<CUT, 0, 1)
        }else{
          resp.CUT <- ifelse(k>CUT, 0, 1)
        }   
        resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
        resp.CUT <- factor(resp.CUT, c("0", "1"))
        roc <- roc(response=resp, predictor=k, quiet=TRUE)
        plot(roc)
        # add sens and spec given by the threshold
        tab <- table(resp.CUT, resp)[2:1, 2:1]
        SENS <- tab[1, 1]/(tab[1, 1]+tab[2, 1])
        SPEC <- tab[2, 2]/(tab[2, 2]+tab[1, 2])
        points(SPEC, SENS, col=2, pch=19)
        if (which.plot=="both") par(ask=F)
      }
    }
    if (x$T$method=="parametric"){
       k1 <- x$T$k1; k2 <- x$T$k2; rho <- x$T$prev; costs <- x$T$costs
       dist1 <- x$T$dist1; dist2 <- x$T$dist2
       par1.1 <- x$T$pars1[1]; par1.2 <- x$T$pars1[2]; par2.1 <- x$T$pars2[1]; par2.2 <- x$T$pars2[2]
       # changes
       changed <- FALSE
       if (median(k1)>median(k2)){
         rho <- 1-rho
         costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
         g <- k1; k1 <- k2; k2 <- g
         changed <- TRUE
         g <- par2.1; par2.1 <- par1.1; par1.1 <- g
         f <- par2.2; par2.2 <- par1.2; par1.2 <- f
         auxdist <- dist2; dist2 <- dist1; dist1 <- auxdist        
       }
       # plot limits
       k <- c(x$T$k1, x$T$k2)
       rangex <- range(k)
       xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
       # cost function
       cost <- function(t){
         TP <- (1-p(dist2)(t, par2.1, par2.2))*rho
         FN <- (p(dist2)(t, par2.1, par2.2))*rho
         FP <- (1-p(dist1)(t, par1.1, par1.2))*(1-rho)
         TN <- (p(dist1)(t, par1.1, par1.2))*(1-rho)
         n <- length(k1)+length(k2)
         C <- n*(TP*costs[1,1]+FN*costs[2, 2]+FP*costs[2, 1]+TN*costs[1, 2])
         return(as.numeric(C))
       }
       if (which.plot %in% c("both", "cost")){
         # plot 1: cost function
         plot(cost, xlim=xlim, type=type, xlab="t", ylab="cost(t)", ...)
         points(x$T$thres, cost(x$T$thres), col=2, pch=19)
       }
       if (which.plot %in% c("both", "roc")){
         if (which.plot=="both") par(ask=T)
         # plot 2: ROC curve
         CUT <- x$T$thres
         if (!changed){
           resp.CUT <- ifelse(k<CUT, 0, 1)
         }else{
           resp.CUT <- ifelse(k>CUT, 0, 1)
         }   
         resp <- c(rep(0, length(x$T$k1)), rep(1, length(x$T$k2)))
         resp.CUT <- factor(resp.CUT, c("0", "1"))
         roc <- roc(response=resp, predictor=k, quiet=TRUE)
         plot(roc)
         # add sens and spec given by the threshold
         tab <- table(resp.CUT, resp)[2:1, 2:1]
         SENS <- tab[1, 1]/(tab[1, 1]+tab[2, 1])
         SPEC <- tab[2, 2]/(tab[2, 2]+tab[1, 2])
         points(SPEC, SENS, col=2, pch=19)
         if (which.plot=="both") par(ask=F)
       }
     }
  }else{ # 3-states
    k <- c(x$T$k1, x$T$k2, x$T$k3)
    rangex <- range(k)
    xlim <- c(floor(rangex[1]), ceiling(rangex[2]))
    # cost function
    dist1 <- x$T$dist1; dist2 <- x$T$dist2; dist3 <- x$T$dist3
    rho <- x$T$prev; costs <- x$T$costs
    if (dist1 =="norm" & dist2=="norm" & dist3=="norm"){
      par1.1 <- mean(x$T$k1); par1.2 <- sd(x$T$k1)
      par2.1 <- mean(x$T$k2); par2.2 <- sd(x$T$k2)
      par3.1 <- mean(x$T$k3); par3.2 <- sd(x$T$k3)
    }else{
      par1.1 <- x$T$pars1[1]; par1.2 <- x$T$pars1[2]
      par2.1 <- x$T$pars2[1]; par2.2 <- x$T$pars2[2]
      par3.1 <- x$T$pars3[1]; par3.2 <- x$T$pars3[2]
    }
    cost.t1 <- function(t){
      aux1 <- rho[1]*p(dist1)(t, par1.1, par1.2)*(costs[1, 1]-costs[1, 2])
      aux2 <- rho[2]*p(dist2)(t, par2.1, par2.2)*(costs[2, 1]-costs[2, 2])
      aux3 <- rho[3]*p(dist3)(t, par3.1, par3.2)*(costs[3, 1]-costs[3, 2])
      C <- aux1+aux2+aux3
      return(as.numeric(C))
    }
    cost.t2 <- function(t){
      aux1 <- rho[1]*p(dist1)(t, par1.1, par1.2)*(costs[1, 2]-costs[1, 3])
      aux2 <- rho[2]*p(dist2)(t, par2.1, par2.2)*(costs[2, 2]-costs[2, 3])
      aux3 <- rho[3]*p(dist3)(t, par3.1, par3.2)*(costs[3, 2]-costs[3, 3])
      C <- aux1+aux2+aux3
      return(as.numeric(C))
    }
    # plot 1: C(T1)
    plot(cost.t1, xlim=xlim, type=type, ylab="Cost(thres1)", xlab="thres1", ...)
    points(x$T$thres1, cost.t1(x$T$thres1), col=2, pch=19)
    par(ask=T)
    # plot 2: C(T2)
    plot(cost.t2, xlim=xlim, type=type, ylab="Cost(thres2)", xlab="thres2", ...)
    points(x$T$thres2, cost.t2(x$T$thres2), col=2, pch=19)
    par(ask=F)
  }
}

##################################################################################
######	DENSITY PLOT
##################################################################################
###### plot.thres2 function provides a graphic including the sample densities (diseased
###### and non-diseased populations), the threshold and its confidence interval
###### arguments:
######  x: 'thres2' object
######  bw: vector containing the bandwith for the non-diseased sample in the 1st
######      position and the bandwith for the diseased sample in the 2nd position
######      (to be passed to 'density()'). Default, c("nrd0", "nrd0").
######  ci: should the confidence interval be plotted? Default, T.
######  which.boot: in case 'x' contains CI computed by bootstrapping, which one should be printed?
######              the user can choose between "norm" (based on normal distribution)
######              or "perc" (based on percentiles). Default, "norm". This argument
######              is ignored if the CI were computed by the delta method.
######  col: 3-dimensional vector:
######       col[1]: color for the density of the non-diseased sample
######       col[2]: color for the density for the diseased sample
######       col[3]: color for the threshold and its corresponding CI
######       Default, c(1, 2, 3).
######  lty: 4-dimensional vector:
######       lty[1]: line type for the density of the non-diseased sample
######       lty[2]: line type for the density of the diseased sample
######       lty[3]: line type for the threshold
######       lty[4]: line type for the CI
######       Default, c(1, 1, 1, 2).
######  lwd: 3-dimensional vector:
######       lwd[1]: line width for the density of the non-diseased sample
######       lwd[2]: line width for the density for the diseased sample
######       lwd[3]: line width for the threshold and its corresponding CI
######       Default, c(1, 1, 1).
######  legend: logical asking if an automatic legend should be added to the graph. Default, TRUE.
######  leg.pos: position of the legend. Default, "topleft". Ignored if legend=FALSE.
######  leg.cex: a number that reescales the size of the legend. Ignored if legend=FALSE. Default, 1.
######  xlim, ylim: 2-dimensional vectors indicating the limits for the plot
######  main, xlab...: further arguments to be passed to 'plot()'.
##################################################################################
plot.thres2 <- function(x, bw=c("nrd0", "nrd0"), ci=TRUE, which.boot=c("norm", "perc"), col=c(1, 2, 3), lty=c(1, 1, 1, 2), lwd=c(1, 1, 1), legend=TRUE, leg.pos="topleft", leg.cex=1, xlim=NULL, ylim=NULL, main=paste0("Threshold estimate ", ifelse(ci, "and CI ", ""), "(method ", x$T$method, ")"), xlab="", ...){
  # error handling
  if (!is.logical(ci) | is.na(ci)){
    stop("'ci' must be TRUE or FALSE")
  }
  if (!is.logical(legend) | is.na(legend)){
    stop("'legend' must be TRUE or FALSE")
  }
  if (!is.null(xlim)){
    if (any(is.na(xlim)) | length(xlim)!=2){
      stop("'xlim' must be NULL or a 2-dimensional vector containing no NAs")
    }
  }
  if (!is.null(ylim)){
    if (any(is.na(ylim)) | length(ylim)!=2){
      stop("'ylim' must be NULL or a 2-dimensional vector containing no NAs")
    }
  }
  # recycle col, lty, lwd
  if (length(col)!=3){
    col <- rep_len(col, 3)
  }
  if (length(lty)!=4){
    lty <- rep_len(lty, 4)
  }
  if (length(lwd)!=3){
    lwd <- rep_len(lwd, 3)
  }
  which.boot <- match.arg(which.boot)
  k1 <- x$T$k1
  k2 <- x$T$k2
  dens.k1 <- density(k1, bw=bw[1])
  dens.k2 <- density(k2, bw=bw[2])
  if (is.null(xlim)){
    min.x <- min(min(dens.k1$x), min(dens.k2$x))
    max.x <- max(max(dens.k1$x), max(dens.k2$x))
  }else{
    min.x <- xlim[1]
    max.x <- xlim[2]
  }
  if (is.null(ylim)){
    min.y <- 0
    max.y <- max(max(dens.k1$y), max(dens.k2$y))
  }else{
    min.y <- ylim[1]
    max.y <- ylim[2]
  }
  plot(dens.k1, xlim=c(min.x, max.x), ylim=c(min.y, max.y), col=col[1], lty=lty[1], lwd=lwd[1], main=main, xlab=xlab, ...)
  lines(dens.k2, col=col[2], lty=lty[2], lwd=lwd[2])
  # thres
  abline(v=x$T$thres, col=col[3], lty=lty[3], lwd=lwd[3])
  # CI
  if(ci & !is.null(x$CI)){
    if (x$CI$ci.method != "boot"){
      abline(v=c(x$CI$lower, x$CI$upper), col=col[3], lty=lty[4], lwd=lwd[3])
    }else{
      abline(v=c(x$CI[paste0("low.", which.boot)], x$CI[paste0("up.", which.boot)]), col=col[3], lty=lty[4], lwd=lwd[3])
    }
  }  
  # legend
  if (legend){
    legend(leg.pos, c(expression(bar(D)), "D", ifelse(ci & !is.null(x$CI), "Thres+CI", "Thres")), col=col, lty=lty, lwd=lwd, cex=leg.cex)
  }
}



##################################################################################
######  LINES DENSITY PLOT
##################################################################################
###### lines.thres2 function includes vertical lines for the thresholds and CIs in
###### a plot.thres2.
###### arguments:
######  x: 'thres2' object
######  ci: should the confidence interval be plotted? Default, TRUE.
######  which.boot: in case 'x' contains CI computed by bootstrapping, which one should be printed?
######              the user can choose between "norm" (based on normal distribution)
######              or "perc" (based on percentiles). Default, "norm". This argument
######              is ignored if the CI were computed by the delta method.
######  col: color for the threshold and its corresponding CI. Default, 1.
######  lty: 2-dimensional vector:
######       lty[1]: line type for the threshold
######       lty[2]: line type for the CI
######       Default, c(1, 2).
######  lwd: line width for the threshold and its corresponding CI. Default, 1.
######  ...: further arguments to be passed to 'abline()'.
##################################################################################
lines.thres2 <- function(x, ci=TRUE, which.boot=c("norm", "perc"), col=1, lty=c(1, 2), lwd=1, ...){
  # error handling
  if (!is.logical(ci) | is.na(ci)){
    stop("'ci' must be TRUE or FALSE")
  }
  # recicle lty
  if (length(lty)!=2){
    lty <- rep_len(lty, 2)
  }
  which.boot <- match.arg(which.boot)
  # thres
  abline(v=x$T$thres, col=col, lty=lty[1], lwd=lwd)
  # CI
  if(ci & !is.null(x$CI)){
    if (x$CI$ci.method != "boot"){
      abline(v=c(x$CI$lower, x$CI$upper), col=col, lty=lty[2], lwd=lwd, ...)
    }else{
      abline(v=c(x$CI[paste0("low.", which.boot)], x$CI[paste0("up.", which.boot)]), col=col, lty=lty[2], lwd=lwd, ...)
    }
  }
}
   
                


############################################################################
####	    SAMPLE SIZE ESTIMATION
############################################################################

############################################################################
############################################################################
######### PARAMETRIC
############################################################################
######### LIMITING BETA
######### CHECKING POSITIVE SQUARE ROOT
##### arguments:
#####  par1.1=healthy population mean
#####  par1.2=healthy population standard deviation
#####  par2.1=diseased population mean
#####	 par2.2=diseased population standard deviation
#####  rho=disease prevalence
#####	 costs=cost matrix
############################################################################
control <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
  ctrl <- (par2.1-par1.1)^2+2*log((par2.2/par1.2)*slope(rho,costs))*(par2.2^2-par1.2^2)
  return(ctrl)
}

###########################################################################
############       STANDARD ERROR
###########################################################################
##########################################################################
############       VARIANCES OF ESTIMATORS
############       UNEQUAL VARIANCES
##########################################################################
############ FUNCTION OF ML VARIANCE OF VARIANCE ESTIMATOR
##########################################################################
parVarUn <- function(sdev, t){
  est <- 2*sdev^4/(t-1)
  return(est)
} 

##########################################################################
############ FUNCTION OF ML VARIANCE OF MEAN ESTIMATOR  (DIS+NON.DIS)
##########################################################################
parMeanUn <- function(sdev,t){
  est <- sdev^2/t
  return(est)
}

##########################################################################
############        PARTIAL DERIVATIVES.............
##########################################################################
############         OF DISEASED MEAN
##### arguments:
#####  par1.1=healthy population mean
#####  par1.2=healthy population standard deviation
#####  par2.1=diseased population mean
#####  par2.2=diseased population standard deviation
#####  rho=disease prevalence
#####	 costs=cost matrix
##########################################################################
parDerMeanDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
  est <- (par2.2*par1.2*(par2.1-par1.1)/sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs) )-par1.2^2)/(par2.2^2-par1.2^2) 
  return(est)
}

##########################################################################
############         OF NON-DISEASED MEAN
##### arguments:
#####  par1.1=healthy population mean
#####  par1.2=healthy population standard deviation
#####  par2.1=diseased population mean
#####  par2.2=diseased population standard deviation
#####  rho=disease prevalence
#####  costs=cost matrix
##########################################################################
parDerMeanNonDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
  est <- (par2.2^2-par2.2*par2.1*(par1.2-par1.1)/sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs)))/(par2.2^2-par2.1^2)
  return(est)
}
##########################################################################
############         OF DISEASED VARIANCE
##### arguments:
#####  par1.1=healthy population mean
#####  par1.2=healthy population standard deviation
#####  par2.1=diseased population mean
#####  par2.2=diseased population standard deviation
#####  rho=disease prevalence
#####  costs=cost matrix
##########################################################################
parDerVarDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
  est <- ((par1.2*par2.2*((par2.2^2-par1.2^2)/par2.2^2+2*log(slope(rho,costs)*par2.2/par1.2)))/(2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs)))+par1.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))/(2*par2.2)+par1.1)/(par2.2^2-par1.2^2)-(par1.2*par2.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))+par1.1*par2.2^2-par2.1*par1.2^2)/(par2.2^2-par1.2^2)^2 
  return(est)
}

##########################################################################
############         OF NON-DISEASED VARIANCE
##### arguments:
#####  par1.1=healthy population mean
#####  par1.2=healthy population standard deviation
#####  par2.1=diseased population mean
#####  par2.2=diseased population standard deviation
#####  rho=disease prevalence
#####  costs=cost matrix
##########################################################################
parDerVarNonDisUn <- function(par1.1,par1.2,par2.1,par2.2,rho,costs){
  est <- ((par1.2*par2.2*((par1.2^2-par2.2^2)/par2.2^2-2*log(slope(rho,costs)*par2.2/par1.2)))/(2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs)))+par2.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))/(2*par1.2)-par2.1)/(par2.2^2-par1.2^2)+(par1.2*par2.2*sqrt(control(par1.1,par1.2,par2.1,par2.2,rho,costs))+par1.1*par2.2^2-par2.1*par1.2^2)/(par2.2^2-par1.2^2)^2 
  return(est)
}

##########################################################################
############ SAMPLE SIZE RATIO AND MINIMUM REQUIRED SAMPLE SIZE
##### arguments:
#####  par1.1=healthy population mean
#####  par1.2=healthy population standard deviation
#####  par2.1=diseased population mean
#####  par2.2=diseased population standard deviation
#####  rho=disease prevalence
#####  costs=cost matrix
##########################################################################
SS <- function(par1.1, par1.2, par2.1, par2.2=NULL, rho, width, costs=matrix(c(0,0,1,(1-rho)/rho),2,2, byrow=TRUE), R=NULL, var.equal=FALSE, alpha=0.05){
  # error handling
  if (!(rho > 0 & rho < 1)){
    stop("The disease prevalence rho must be a number in (0,1)")
  }
  # if (!is.matrix(costs)){
  #   stop("'costs' must be a matrix")
  # }
  # if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
  #   stop("'costs' must be a 2x2 matrix")
  # }
  if (width<=0){
    stop("'width' must be a positive number")
  }
  if (!is.logical(var.equal) | is.na(var.equal)){
    stop("'var.equal' must be TRUE or FALSE")
  }
  if (var.equal){
    if (!is.null(par2.2)){
      if (par1.2 != par2.2){
        stop("'var.equal' is set to TRUE, but 'par1.2' and 'par2.2' are different")
      }
    }else{
      par2.2 <- par1.2
    }
  }else{
    if (is.null(par2.2)){
      stop("When 'var.equal' is set to FALSE, a value for 'par2.2' must be given")
    }
    if (par1.2==par2.2){
      stop("'var.equal' is set to FALSE, but par1.2==par2.2")
    }
  }
  # costs or R
  if (is.null(costs) & is.null(R)){
    stop("Both 'costs' and 'R' are NULL. Please specify one of them.")
  }else if (!is.null(costs) & !is.null(R)){
    stop("Either 'costs' or 'R' must be NULL")
  }else if (is.null(costs) & !is.null(R)){
    if (!is.numeric(R) | length(R)!=1){
      stop("R must be a single number")
    }
    costs <- matrix(c(0, 0, 1, (1-rho)/(R*rho)), 2, 2, byrow=TRUE)
  }else if (!is.null(costs) & is.null(R)){
    if (!is.matrix(costs)){
      stop("'costs' must be a matrix")
    }
    if (dim(costs)[1] != 2 | dim(costs)[2] != 2){
      stop("'costs' must be a 2x2 matrix")
    }
  }
  
  costs.origin <- costs
  rho.origin <- rho
  par1.1.origin <- par1.1
  par2.1.origin <- par2.1
  L <- width/2
  if (par1.1 > par2.1){
    rho <- 1 - rho
    costs <- costs[, 2:1] # change c.t.pos <-> c.t.neg and c.f.pos <-> c.f.neg
    g <- par1.1; par1.1 <- par2.1; par2.1 <- g
    f <- par1.2; par1.2 <- par2.2; par2.2 <- f
  }
  # slope
  R <- slope(rho, costs)
  if(var.equal){
    num <- 1/2-par1.2^2*log(R)/(par2.1-par1.1)^2
    den <- 1/2+par1.2^2*log(R)/(par2.1-par1.1)^2
    epsilon <- abs(num/den)
    est.non.dis <- (qnorm(1-alpha/2)/L)^2*((log(R)/(par2.1-par1.1))^2*2*par1.2^4/(1+epsilon)
                                           +(1/2-par1.2^2*log(R)/(par2.1-par1.1)^2)^2*par1.2^2/epsilon
                                           +(1/2+par1.2^2*log(R)/(par2.1-par1.1)^2)^2*par1.2^2)
  }else{
    threshold <- expression((sigma2D*muND-sigma2ND*muD+sqrt(sigma2ND*sigma2D)*sqrt((muD-muND)^2+2*log(R*sqrt(sigma2D/sigma2ND))*(sigma2D-sigma2ND)))/(sigma2D-sigma2ND))
    where <- list(muND=par1.1, sigma2ND=par1.2^2, muD=par2.1, sigma2D=par2.2^2, R=R)
    a <- (as.numeric(attributes(eval(deriv(threshold, "muD"), where))$gradient))^2
    b <- (as.numeric(attributes(eval(deriv(threshold, "muND"), where))$gradient))^2
    c <- (as.numeric(attributes(eval(deriv(threshold, "sigma2D"), where))$gradient))^2
    d <- (as.numeric(attributes(eval(deriv(threshold, "sigma2ND"), where))$gradient))^2
    epsilon <- sqrt((a*par2.2^2+2*c*par2.2^4)/(b*par1.2^2+2*d*par1.2^4))
    est.non.dis <- (qnorm(1-alpha/2)/L)^2*(a*par2.2^2/epsilon+b*par1.2^2+2*c*par2.2^4/epsilon+2*d*par1.2^4)
  }
  est.dis <- epsilon*est.non.dis
  
  if (par1.1.origin > par2.1.origin){
    auxiliar <- est.non.dis
    est.non.dis <- est.dis
    est.dis <- auxiliar
    epsilon <- 1/epsilon
  }
  
  re <- list(ss2=est.dis, ss1=est.non.dis, epsilon=epsilon, width=width, alpha=alpha, costs=costs.origin, R=R, prev=rho.origin)
  class(re) <- "SS"
  return(re)
} 

#############################################################
## Print function for class "SS"
#############################################################
print.SS <- function(x, ...){
    cat("Optimum SS Ratio: ", x$epsilon)
    cat("\n\nSample size for")
    cat("\n  Diseased: ", x$ss2)
    cat("\n  Non-diseased: ", x$ss1)
    cat("\n")
    cat("\nParameters used")
    cat("\n  Significance Level: ", x$alpha)
    cat("\n  CI width: ", x$width)
    cat("\n  Disease prevalence:", x$prev)
    cat("\n  Costs (Ctp, Cfp, Ctn, Cfn):", x$costs)
    cat("\n  R:", x$R)
    cat("\n")
}

Try the ThresholdROC package in your browser

Any scripts or data that you put into this service are public.

ThresholdROC documentation built on Aug. 30, 2023, 1:08 a.m.