Nothing
###############################################################################
##
## mcBootstrap.R
##
## Function for computing point estimations and standard errors
## for regression coefficients with bootstrap or jackknife method.
##
## Copyright (C) 2011 Roche Diagnostics GmbH
##
## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## any later version.
##
## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
## GNU General Public License for more details.
##
## You should have received a copy of the GNU General Public License
## along with this program. If not, see <http://www.gnu.org/licenses/>.
##
###############################################################################
#' Resampling estimation of regression parameters and standard errors.
#'
#' Generate jackknife or (nested-) bootstrap replicates of a statistic applied to data.
#' Only a nonparametric ballanced design is possible. For each sample calculate
#' point estimations and standard errors for regression coefficients.
#'
#' @param X Measurement values of reference method
#' @param Y Measurement values of test method
#' @param error.ratio Ratio between squared measurement errors of reference- and test method,
#' necessary for Deming regression. Default 1.
#' @param method.reg Regression method. It is possible to choose between five regression types:
#' \code{"LinReg"} - ordinary least square regression,
#' \code{"WLinReg"} - weighted ordinary least square regression,\code{"Deming"} - Deming regression,
#' \code{"WDeming"} - weighted Deming regression, \code{"PaBa"} - Passing-Bablok regression.
#' @param bootstrap Bootstrap based confidence interval estimation method.
#' @param jackknife Logical value. If TRUE - Jackknife based confidence interval estimation method.
#' @param nsamples Number of bootstrap samples.
#' @param nnested Number of nested bootstrap samples.
#' @param iter.max maximum number of iterations for weighted Deming iterative algorithm.
#' @param threshold Numerical tolerance for weighted Deming iterative algorithm convergence.
#' @param NBins number of bins used when 'reg.method="PaBaLarge"' to classify each slope in one of 'NBins' bins of constant slope angle covering the range of all slopes.
#' @param slope.measure angular measure of pairwise slopes used for exact PaBa regression (see \code{\link{mcreg}} for details).\cr
#' \code{"radian"} - for data sets with even sample numbers median slope is calculated as average of two central slope angles.\cr
#' \code{"tangent"} - for data sets with even sample numbers median slope is calculated as average of two central slopes (tan(angle)).\cr
#' @return a list consisting of
#' \item{glob.coef}{Numeric vector of length two with global point estimations of intercept and slope.}
#' \item{glob.sigma}{Numeric vector of length two with global estimations of standard errors of intercept and slope.}
#' \item{xmean}{Global (weighted-)average of reference method values.}
#' \item{B0jack}{Numeric vector with point estimations of intercept for jackknife samples.
#' The i-th element contains point estimation for data set without i-th observation}
#' \item{B1jack}{Numeric vector with point estimations of slope for jackknife samples.
#' The i-th element contains point estimation for data set without i-th observation}
#' \item{B0}{Numeric vector with point estimations of intercept for each bootstrap sample.
#' The i-th element contains point estimation for i-th bootstrap sample.}
#' \item{B1}{Numeric vector with point estimations of slope for each bootstrap sample.
#' The i-th element contains point estimation for i-th bootstrap sample.}
#' \item{MX}{Numeric vector with point estimations of (weighted-)average of reference method values for each bootstrap sample.
#' The i-th element contains point estimation for i-th bootstrap sample.}
#' \item{sigmaB0}{Numeric vector with estimation of standard error of intercept for each bootstrap sample.
#' The i-th element contains point estimation for i-th bootstrap sample.}
#' \item{sigmaB1}{Numeric vector with estimation of standard error of slope for each bootstrap sample.
#' The i-th element contains point estimation for i-th bootstrap sample.}
#' \item{nsamples}{Number of bootstrap samples.}
#' \item{nnested}{Number of nested bootstrap samples.}
#' \item{cimeth}{Method of confidence interval calculation (bootstrap).}
#' \item{npoints}{Number of observations.}
#' @references Efron, B., Tibshirani, R.J. (1993)
#' \emph{An Introduction to the Bootstrap}. Chapman and Hall.
#' Carpenter, J., Bithell, J. (2000)
#' Bootstrap confidence intervals: when, which, what? A practical guide for medical statisticians.
#' \emph{Stat Med}, \bold{19 (9)}, 1141--1164.
#' @author Ekaterina Manuilova \email{ekaterina.manuilova@@roche.com}, Fabian Model \email{fabian.model@@roche.com}, Sergej Potapov \email{sergej.potapov@@roche.com}
mc.bootstrap <- function(method.reg=c("LinReg","WLinReg","Deming","WDeming","PaBa", "PaBaLarge","TS","PBequi"),
jackknife=TRUE, bootstrap=c("none","bootstrap", "nestedbootstrap"),
X, Y, error.ratio, nsamples=1000,
nnested=25, iter.max=30, threshold=0.00000001, NBins=1000000, slope.measure=c("radian","tangent"))
{
method.reg <- match.arg(method.reg)
bootstrap <- match.arg(bootstrap)
slope.measure <- match.arg(slope.measure)
# Check validity of parameters
stopifnot(is.numeric(X))
stopifnot(is.numeric(Y))
stopifnot(length(X) == length(Y))
stopifnot(!is.na(X) & !is.na(Y))
stopifnot(length(X) > 0)
if(method.reg %in% c("Deming", "WDeming"))
{
stopifnot(!is.na(error.ratio))
stopifnot(is.numeric(error.ratio))
stopifnot(error.ratio >= 0)
stopifnot(length(error.ratio) > 0)
if( method.reg == "WDeming")
{
stopifnot(!is.na(threshold))
stopifnot(is.numeric(threshold))
stopifnot(threshold >= 0)
stopifnot(length(threshold) > 0)
stopifnot(!is.na(iter.max))
stopifnot(is.numeric(iter.max))
stopifnot(length(iter.max) > 0)
stopifnot(round(iter.max) == iter.max)
stopifnot(iter.max > 0)
}
}
if( bootstrap %in% c("bootstrap", "nestedbootstrap") )
{
stopifnot(!is.na(nsamples))
stopifnot(is.numeric(nsamples))
stopifnot(round(nsamples)==nsamples)
stopifnot(nsamples>0)
stopifnot(length(nsamples) > 0)
if( bootstrap == "nestedbootstrap")
{
stopifnot(!is.na(nnested))
stopifnot(is.numeric(nnested))
stopifnot(round(nnested)==nnested)
stopifnot(nnested>0)
stopifnot(length(nnested) > 0)
}
}
stopifnot(!is.na(jackknife))
stopifnot(is.logical(jackknife))
stopifnot(length(jackknife) > 0)
if(method.reg %in% c("WLinReg","WDeming") & (min(X)<=0 | min(Y)<=0))
stop("Weighted regression for non-positive values is not available.")
if(method.reg %in% c("PaBa", "PaBaLarge") & (min(X)<0 | min(Y)<0))
stop("Passing-Bablok regression for negative values is not available.")
# Number of data points
npoints <- length(X)
# Choice of arguments for regfun
if(method.reg == "LinReg"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if(sd(X[idx]) <= 0){
## All identical points, no estimation possible
warning("Resampling LinReg regression: all x coordinates in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA), b1=as.numeric(NA),
se.b0=as.numeric(NA), se.b1=as.numeric(NA),
xw=as.numeric(NA))
}
mc.linreg(X[idx], Y[idx])
}
}else if(method.reg == "WLinReg"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if(sd(X[idx]) <= 0){
## All identical points, no estimation possible
warning("Resampling WLinReg regression: all x coordinates in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA), b1=as.numeric(NA),
se.b0=as.numeric(NA), se.b1=as.numeric(NA),
xw=as.numeric(NA))
}
mc.wlinreg(X[idx], Y[idx])
}
}else if(method.reg == "PBequi"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if(sd(X[idx]) <= 0){
## All identical points, no estimation possible
warning("Resampling equivariant PaBa regression: all x coordinates in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA), b1=as.numeric(NA),
se.b0=as.numeric(NA), se.b1=as.numeric(NA),
xw=as.numeric(NA))
}
mc.PBequi(X[idx], Y[idx],method.reg="PBequi",slope.measure=slope.measure,calcCI=TRUE)
}
}else if(method.reg == "TS"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if(sd(X[idx]) <= 0){
## All identical points, no estimation possible
warning("Resampling Theil-Sen regression: all x coordinates in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA), b1=as.numeric(NA),
se.b0=as.numeric(NA), se.b1=as.numeric(NA),
xw=as.numeric(NA))
}
mc.PBequi(X[idx], Y[idx],method.reg="TS",slope.measure=slope.measure,calcCI=TRUE)
}
}else if(method.reg == "Deming"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if(sd(X[idx]) <= 0){
## All identical points, no estimation possible
warning("Resampling Deming regression: all x coordinates in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA),b1=as.numeric(NA),se.b0=as.numeric(NA),se.b1=as.numeric(NA),xw=as.numeric(NA))
}
mc.deming(X[idx], Y[idx], error.ratio=error.ratio)
}
}else if(method.reg == "WDeming"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if(sd(X[idx]) <= 0){
## All identical points, no estimation possible
warning("Resampling WDeming regression: all x coordinates in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA), b1=as.numeric(NA), iter=0, xw=as.numeric(NA))
}
mc.wdemingConstCV(X[idx], Y[idx], error.ratio=error.ratio,
iter.max=iter.max, threshold=threshold)
}
}else if(method.reg == "PaBaLarge"){
callfun.reg <- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if((sd(X[idx]) <= 0) | (sd(Y[idx]) <= 0)){
## All identical points on one axis, no estimation possible
warning("Resampling PaBaLarge regression: x or y coordinates of all data points in subsample identical - regression coefficients undetermined!")
list(b0=as.numeric(NA), b1=as.numeric(NA), xw=as.numeric(NA))
}
paba.posCor <- cor(X[idx], Y[idx], method="kendall") >= 0
tmpRes <- mc.paba.LargeData(X[idx], Y[idx], NBins=NBins, posCor=paba.posCor, slope.measure=slope.measure)
list( b0=tmpRes["Intercept", "EST"], b1=tmpRes["Slope", "EST"], xw=as.numeric(NA) )
}
}else if(method.reg == "PaBa"){
## For slope matrix determine if slope 1 or -1 is expected based on full data set
paba.posCor.global <- cor(X, Y, method="kendall") >= 0
## Compute slope matrix once for all further computations,
## global estimation of posCor means global +/-Inf assignment - needs to be corrected if correlation changes due to resampling
## Regression function
callfun.reg<- function(idx, X, Y, error.ratio, iter.max, NBins, slope.measure){
if((sd(X[idx]) <= 0) | (sd(Y[idx]) <= 0)){
## All identical points on one axis, no estimation possible
warning("Resampling PaBa regression: x or y coordinates of all data points in subsample identical - regression coefficients undetermined!")
return(list(b0=as.numeric(NA), b1=as.numeric(NA), xw=as.numeric(NA)))
}
## For actual regression determine if slope 1 or -1 is expected based on resampled data set!
paba.posCor <- cor(X[idx], Y[idx], method="kendall") >= 0
## Run Passing-Bablok
mc.res <- mc.paba(angM = NULL, X[idx], Y[idx],
posCor=paba.posCor, calcCI=FALSE,
slope.measure=slope.measure)
list(b0=mc.res["Intercept","EST"], b1=mc.res["Slope","EST"], xw=as.numeric(NA))
}
}else stop("Unknown regression function!")
# Calculation of regression parameters for whole data set (global coefficients)
d <- callfun.reg(1:npoints, X, Y, error.ratio, iter.max, NBins, slope.measure)
glob.b0 <- d$b0
glob.b1 <- d$b1
xmean <- d$xw
if (method.reg %in% c("PaBa", "PaBaLarge")){
weight <- rep(1, npoints)
}else{
weight <- d$weight
}
if(method.reg %in% c("WDeming","PaBa", "PaBaLarge")){
## Analytical standard errors not available for weighted Deming and Passing-Bablok
glob.seb0 <- as.numeric(NA)
glob.seb1 <- as.numeric(NA)
}else{
glob.seb0 <- d$se.b0
glob.seb1 <- d$se.b1
}
rm(d)
# ----------------------------------------------------------------------------
## Calculation of jackknife coefficients
cal.parallel <- options()$parallel
cores.parallel <- options()$cores
if(jackknife == TRUE){
if(is.null(cal.parallel) || !cal.parallel){
results <- lapply(as.list(1:npoints),
function(z){
d <- callfun.reg((1:length(X))[-z], X, Y, error.ratio, iter.max, NBins, slope.measure)
d[c("b0", "b1")]
})
}else{
if(is.null(cores.parallel)){
cl <- makeCluster(detectCores()/2)
}else{
cl <- makeCluster(cores.parallel)
}
out <- clusterEvalQ(cl, library(mcr))
clusterExport(cl,
varlist = c("callfun.reg", "X", "Y", "error.ratio", "iter.max", "NBins", "slope.measure"),
envir = environment())
results <- clusterApplyLB(cl, as.list(1:npoints),
function(z){
d <- callfun.reg((1:length(X))[-z], X, Y, error.ratio, iter.max, NBins, slope.measure)
d[c("b0", "b1")]
})
stopCluster(cl)
}
B0jack <- sapply(results, function(x) x$b0)
B1jack <- sapply(results, function(x) x$b1)
if(method.reg == "WDeming"){
## Use Linnet's algorithm to estimate parameter SEs
jackLinnetB0 <- mc.calcLinnetCI(B0jack, glob.b0, 0.05)
jackLinnetB1 <- mc.calcLinnetCI(B1jack, glob.b1, 0.05)
glob.seb0 <- jackLinnetB0$se
glob.seb1 <- jackLinnetB1$se
}
if(bootstrap == "none"){
return(list(glob.coef = c(glob.b0,glob.b1),
glob.sigma = c(glob.seb0,glob.seb1),
xmean = xmean,
B0jack = B0jack, B1jack = B1jack,
npoints = npoints, cimeth = "jackknife",
weight = weight))
}
}
#-----------------------------------------------------------------------------
if(bootstrap %in% c("bootstrap", "nestedbootstrap")){
##
## Confidence intervals with Bootstrap or nested Bootstrap
##
## Bootstrap
calc.bootstrap <- function(j, callfun.reg, method.reg, X, Y, error.ratio, iter.max, NBins, slope.measure, npoints, nnested, bootstrap){
## Draw bootstrap sample
index <- sample(1:npoints, size=npoints, replace=TRUE)
## Regression on bootstrap sample
d <- callfun.reg(index, X, Y, error.ratio, iter.max, NBins, slope.measure)
if(method.reg != "PaBa"){
MX <- d$xw
}else{
MX <- mean(X[index])
}
## Parameter SE estimation
if(bootstrap == "nestedbootstrap"){ # Use nested bootstrap, works for all regression methods
B0n <- B1n <- vector("numeric", length = nnested)
## Bootstrap
for(ii in 1:nnested){
## Draw nested bootstrap sample
nindex <- sample(1:npoints,size=npoints,replace=TRUE)
## Regression on nested bootstrap sample
d.nest <- callfun.reg(index[nindex], X, Y, error.ratio, iter.max, NBins, slope.measure)
B0n[ii] <- d.nest$b0
B1n[ii] <- d.nest$b1
} #end nest
## Estimate SD from nested bootstrap results
sigmaB0 <- sd(B0n, na.rm=TRUE)
sigmaB1 <- sd(B1n, na.rm=TRUE)
}else{
# Use analytical SD estimates
if(method.reg %in% c("LinReg", "WLinReg", "Deming","TS","PBequi")){
sigmaB0 <- d$se.b0
sigmaB1 <- d$se.b1
}else if(method.reg == "WDeming"){
## Use global SE estimates according to Linnet's method
sigmaB0 <- glob.seb0
sigmaB1 <- glob.seb1
}else{
sigmaB0 <- sigmaB1 <- as.numeric(NA) # No analytical SE estimates available
}
}
list(B0 = d$b0, B1 = d$b1, MX = MX, sigmaB0 = sigmaB0, sigmaB1 = sigmaB1)
} # end of bootstrap loop
if(is.null(cal.parallel) || !cal.parallel){
results <- lapply(as.list(1:nsamples),
calc.bootstrap, callfun.reg, method.reg, X, Y,
error.ratio, iter.max, NBins, slope.measure,
npoints, nnested, bootstrap)
}else{
if(is.null(cores.parallel)){
cl <- makeCluster(detectCores()/2)
}else{
cl <- makeCluster(cores.parallel)
}
out <- clusterEvalQ(cl, library(mcr))
clusterExport(cl,
varlist = c("calc.bootstrap","callfun.reg", "method.reg", "X", "Y",
"error.ratio", "iter.max", "NBins", "slope.measure",
"npoints", "nnested", "mc.deming", "bootstrap"),
envir = environment())
out <- clusterSetRNGStream(cl)
results <- clusterApplyLB(cl, as.list(1:nsamples),
calc.bootstrap, callfun.reg, method.reg, X, Y, error.ratio,
iter.max, NBins, slope.measure, npoints, nnested, bootstrap)
stopCluster(cl)
}
B0 <- sapply(results, function(x) x$B0)
B1 <- sapply(results, function(x) x$B1)
MX <- sapply(results, function(x) x$MX)
sigmaB0 <- sapply(results, function(x) x$sigmaB0)
sigmaB1 <- sapply(results, function(x) x$sigmaB1)
## Check whether there were any invalid regressions due to drawing all identical samples
na.mask <- !(is.na(B0) | is.na(B1))
## If more than 5% of bootstrap runs had invalid regressions => Error
if(sum(!na.mask)/length(na.mask) > 0.05) stop("There were too many resamples with undetermined regression coefficients!")
if (!jackknife){
## Return non-NA bootstrap results
return(list(glob.coef=c(glob.b0,glob.b1), glob.sigma=c(glob.seb0, glob.seb1), xmean=xmean,
B0=B0[na.mask], B1=B1[na.mask], MX=MX[na.mask], sigmaB0=sigmaB0[na.mask], sigmaB1=sigmaB1[na.mask],
nsamples=sum(na.mask), nnested=nnested, npoints=npoints, cimeth=bootstrap, weight=weight))
}else{
if(any(is.na(B0jack) | is.na(B1jack))) stop("Too many identical data points!")
## Return non-NA bootstrap results
return(list(glob.coef=c(glob.b0, glob.b1), glob.sigma=c(glob.seb0, glob.seb1), xmean=xmean,
B0jack=B0jack, B1jack=B1jack, B0=B0[na.mask], B1=B1[na.mask], MX=MX[na.mask], sigmaB0=sigmaB0[na.mask], sigmaB1=sigmaB1[na.mask],
nsamples=sum(na.mask), nnested=nnested, npoints=npoints, cimeth=bootstrap, weight=weight))
}
} # end of bootstrap
} # end of mc.bootstrap
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.