R/functions.R

Defines functions find_type ts.return scramble tsboot make.ends ts.array print.saddle.distn saddle.distn saddle print.simplex simplex1 simplex zero iden inv.logit logit cum3 k3.linear var.linear control tilt.boot smooth.f imp.prob imp.quantile imp.reg imp.moments const imp.weights exp.tilt glm.diag envelope linear.approx positive.jack usual.jack empinf.reg inf.jack empinf cens.resamp cens.weird cens.case cens.return censboot abc.ci bca.ci perc.ci stud.ci basic.ci norm.inter norm.ci print.bootci boot.ci cv.glm permutation.array ordinary.array index.array importance.array.bal importance.array freq.array extra.array corr print.boot boot.array boot.return normalize boot balanced.array anti.arr antithetic.array rperm isMatrix bsample sample0 get_stat prop_stronger prop_stronger_sign pct_pval calib_ests tau_CI r_to_d format_CI round2 scrape_meta parse_CI_string

Documented in calib_ests format_CI parse_CI_string pct_pval prop_stronger prop_stronger_sign round2 r_to_d scrape_meta tau_CI

################################ FNs: SCRAPING PUBLISHED META-ANALYSIS ################################


#' Parse a string with point estimate and confidence interval
#'
#' Given a vector of strings such as "0.65 (0.6, 0.70)", for example obtained by running optical character recognition (OCR)
#' software on a screenshot of a published forest plot, parses the strings into a dataframe of
#' point estimates and upper confidence interval limits. Assumes that the point estimate occurs before
#' an opening bracket of the form "(" or "[" and that the confidence interval upper limit follows a
#' the character \code{sep} (by default a comma, but might be a hyphen, for example). To further parse this dataframe
#' into point estimates and variances, see \code{MetaUtility::scrape_meta}.
#' @param string A vector of strings to be parsed.
#' @param sep The character (not including whitespaces) separating the lower from the upper limits.
#' @export
#' @importFrom
#' stringr str_replace_all str_remove_all
#' @examples
#' # messy string of confidence intervals
#' mystring = c( "0.65 [0.6, 0.7]", "0.8(0.5, 0.9]", "1.2  [0.3, 1.5)")
#' parse_CI_string(mystring)
#'
#' # now with a hyphen separator
#' mystring = c( "0.65 [0.6- 0.7]", "0.8(0.5 - 0.9]", "1.2  [0.3 -1.5)")
#' parse_CI_string(mystring, sep="-")

# assumes a comma before the upper limit
# everything before first bracket treated as point estimate
parse_CI_string = function( string, sep = "," ) {

  # standardize the CI brackets
  string = str_replace_all(string, "\\[", "\\(")
  string = str_replace_all(string, "\\]", "\\)")

  # remove all spaces
  string = str_replace_all(string, " ", "")

  # parse CI string into est and upper limit
  yi = as.numeric( unlist( lapply( strsplit( string, "\\("), function(x) x[1] ) ) )

  # hi will be the last entry after the comma
  hi = unlist( lapply( strsplit( string, sep), function(x) x[length(x)] ) )
  hi = as.numeric( str_replace_all(hi, "\\)", "") )

  # remove stupid characters
  hi = as.numeric( str_remove_all( hi, "[) ,]" ) )

  return( data.frame( yi, hi) )
}



#' Convert forest plot or summary table to meta-analytic dataset
#'
#' Given relative risks (RR) and upper bounds of 95\% confidence intervals (CI)
#' from a forest plot or summary table, returns a dataframe ready for meta-analysis
#' (e.g., via the \code{metafor} package) with the log-RRs and their variances.
#' Optionally, the user may indicate studies for which the point estimate is to be
#' interpreted as an odds ratios of a common outcome rather than a relative risk;
#' for such studies, the function applies VanderWeele (2017)'s square-root transformation to convert
#' the odds ratio to an approximate risk ratio.
#' @param type \code{RR} if point estimates are RRs or ORs (to be handled on log scale); \code{raw} if point estimates are raw differences, standardized mean differences, etc. (such that they can be handled with no transformations)
#' @param est Vector of study point estimates on RR or OR scale
#' @param hi Vector of upper bounds of 95\% CIs on RRs
#' @param sqrt Vector of booleans (TRUE/FALSE) for whether each study measured an odds ratio of a common outcome that should be approximated as a risk ratio via the square-root transformation
#' @export
#' @references
#' 1. VanderWeele TJ (2017). On a square-root transformation of the odds ratio for a common outcome. Epidemiology.
#' @import stats

scrape_meta = function( type="RR", est, hi, sqrt=FALSE ){

  if ( type == "RR" ) {
    # take square root for certain elements
    RR = est
    RR[sqrt] = sqrt( RR[sqrt] )

    # same for upper CI limit
    hi.RR = hi
    hi.RR[sqrt] = sqrt( hi.RR[sqrt] )

    sei = ( log(hi.RR) - log(RR) ) / qnorm(.975)

    return( data.frame( yi = log(RR), vyi = sei^2 ) )

  } else if ( type == "raw" ) {

    sei = ( hi - est ) / qnorm(.975)
    return( data.frame( yi = est, vyi = sei^2 ) )
  }
}



################################ FNs: FORMATTING ################################

#' Round while keeping trailing zeroes
#'
#' Rounds a numeric value and formats it as a string, keeping trailing zeroes.
#' @param x Numeric value to round
#' @param digits Digits for rounding
#' @export
#' @examples
#' round2(0.03000, digits = 4)
#'
#' # compare to base round, which drops trailing zeroes and returns a numeric
#' round(0.03000, digits = 4)
round2 = function(x, digits = 2) {
  formatC( round( x, digits ), format='f', digits=digits )
}


#' Manuscript-friendly confidence interval formatting
#'
#' Formats confidence interval lower and upper bounds into a rounded string.
#' @param lo Confidence interval lower limit (numeric)
#' @param hi Confidence interval upper limit (numeric)
#' @param digits Digits for rounding
#' @export
#' @examples
#' format_CI(0.36, 0.72, 3)
format_CI = function( lo, hi, digits = 2 ) {
  paste( "[", round2( lo, digits ), ", ", round2( hi, digits ), "]", sep="" )
}


#' Manuscript-friendly number formatting
#'
#' Formats a numeric result (e.g., p-value) as a manuscript-friendly string in which values below a minimum cutoff (e.g., 10^-5) are
#' reported for example as "< 10^-5", values between the minimum cutoff and a maximum cutoff (e.g., 0.01)
#' are reported in scientific notation, and p-values above the maximum cutoff are reported simply as, for example, 0.72.
#' @param x Numeric value to format
#' @param digits Digits for rounding
#' @param cutoffs A vector containing the two cutoffs
#' @export
#' @examples
#' format_stat(0.735253)
#' format_stat(0.735253, digits = 4)
#' format_stat(0.0123)
#' format_stat(0.0001626)
#' format_stat(0.0001626, cutoffs = c(0.01, 10^-3))

format_stat = Vectorize( function(x,
                                  digits = 2,
                                  cutoffs = c(0.01, 10^-5) ) {
  if( length(cutoffs) != 2) stop("Only 2 cutoffs are supported")

  if ( x >= max(cutoffs) ) return( round2( x, digits ) )
  if (x < max(cutoffs) & x > min(cutoffs) ) return( formatC( x, format = "e", digits = 0 ) )
  if ( x < min(cutoffs) ) return( paste("<", min(cutoffs), sep=" ") )
}, vectorize.args = "x" )





################################ FNs: EFFECT SIZE CONVERSIONS ################################


#' Convert Pearson's r to Cohen's d
#'
#' Converts Pearson's r (computed with a continuous X and Y) to Cohen's d
#' for use in meta-analysis. The resulting Cohen's d represents the
#' estimated increase in standardized Y that is associated with a delta-unit
#' increase in X.
#' @param r Pearson's correlation
#' @param sx Sample standard deviation of X
#' @param delta Contrast in X for which to compute Cohen's d, specified in raw units of X (not standard deviations).
#' @param N Sample size used to estimate \code{r}
#' @param Ns Sample size used to estimate \code{sx}, if different from \code{N}
#' @param sx.known Is \code{sx} known rather than estimated? (By default, assumes \code{sx} is estimated, which will almost always be the case.)
#' @details To preserve the sign of the effect size, the code takes the absolute value of \code{delta}. The standard error
#' estimate assumes that X is approximately normal and that \code{N} is large.
#' @references
#' 1. Mathur MB & VanderWeele TJ (2019). A simple, interpretable conversion from Pearson's correlation to Cohen's d for meta-analysis. Epidemiology.
#' @export
#' @examples
#' # d for a 1-unit vs. a 2-unit increase in X
#' r_to_d( r = 0.5,
#'         sx = 2,
#'         delta = 1,
#'         N = 100 )
#' r_to_d( r = 0.5,
#'         sx = 2,
#'         delta = 2,
#'         N = 100 )
#'
#' # d when sx is estimated in the same vs. a smaller sample
#' # point estimate will be the same, but inference will be a little
#' # less precise in second case
#' r_to_d( r = -0.3,
#'          sx = 2,
#'          delta = 2,
#'          N = 300,
#'          Ns = 300 )
#'
#' r_to_d( r = -0.3,
#'         sx = 2,
#'         delta = 2,
#'         N = 300,
#'         Ns = 30 )

r_to_d =
  function(r, sx, delta, N = NA, Ns = N, sx.known = FALSE )  {

    # delta should always be positive regardless of sign of d
    delta = abs(delta)

    # point estimate
    d = (delta * r) / ( sx * sqrt(1-r^2) )

    # standard error and CI
    if ( !is.na(N) ) {
      term1 = 1 / ( r^2 * ( N - 3) )
      term2 = 1 / ( 2 * (Ns - 1) )
      if (sx.known == TRUE) term2 = 0
      se = abs(d) * sqrt( term1 + term2 )

      # handle case where r = 0 exactly using the limit
      # see saved file "Limit as r -> 0"
      if ( r == 0 ) {
        se = (1/sx) * delta * sqrt(1 / (N-3))
      }

      d.lo = d - qnorm(.975) * se
      d.hi = d + qnorm(.975) * se
    } else {
      se = NA
      d.lo = NA
      d.hi = NA
    }

    return( data.frame( d=d, se=se, lo=d.lo, hi=d.hi ) )
  }





#' Convert Pearson's r to Fisher's z
#'
#' Converts Pearson's r to Fisher's z for use in meta-analysis.
#' @param r Pearson's correlation
#' @export
#' @examples
#' # convert a Pearson correlation of -0.8 to Fisher's z
#' r_to_z(-0.8)
r_to_z = Vectorize( function(r) {

  if ( abs(r) > 1 ) stop("Pearson's r cannot be greater than 1 in absolute value")

  .5 * ( log(1 + r) - log(1 - r) )
}, vectorize.args = "r" )


#' Convert Fisher's z to Pearson's r
#'
#' Converts Fisher's z to Pearson's r for use in meta-analysis.
#' @param z Fisher's z
#' @export
#' @examples
#' # convert Fisher's z of 1.1 to Pearson's r
#' z_to_r(1.1)
z_to_r = Vectorize( function(z) {
  ( exp( 2 * z ) - 1 ) / ( exp( 2 * z ) + 1 )
}, vectorize.args = "z" )




################################ FNs: MISC ################################


#' Return confidence interval for tau for a meta-analysis
#'
#' Returns confidence interval lower and upper limits for tau (the estimated standard deviation of
#' the true effects) for a meta-analysis fit in \code{metafor::rma}.
#' @param meta A meta-analysis object fit in \code{metafor::rma}.
#' @param CI.level Confidence interval level as a proportion (e.g., 0.95)
#' @export
#' @import
#' metafor
#' stats
#' @examples
#' # calculate effect sizes for example dataset
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                    ci=cpos, di=cneg, data=metafor::dat.bcg)
#'
#' # fit random-effects model
#' # note that metafor package returns on the log scale
#' m = metafor::rma.uni(yi= d$yi, vi=d$vi, knha=TRUE,
#' measure="RR", method="REML" )
#'
#' tau_CI(m)
#'
#' # for nicer formatting
#' format_CI( tau_CI(m)[1], tau_CI(m)[2] )
tau_CI = function( meta,
                   CI.level = 0.95 ) {

  alpha = 1 - CI.level

  t2.lb = meta$tau2 - qnorm(1 - alpha/2) * meta$se.tau2
  t2.ub = meta$tau2 + qnorm(1 - alpha/2) * meta$se.tau2

  # if lower bound is < 0, set equal to 0
  if ( t2.lb > 0 ) tau.lb = sqrt(t2.lb) else tau.lb = 0
  tau.ub = sqrt(t2.ub)

  if ( tau.lb < 0 ) tau.lb = 0

  return( c(tau.lb, tau.ub))
}


################################ FN: CALIBRATED ESTIMATES ################################

#' Return calibrated estimates of studies' true effect sizes
#'
#' Returns estimates of the true effect in each study based on the methods of Wang & Lee (2019).
#' Unlike the point estimates themselves, these "calibrated" estimates have been
#' appropriately shrunk to correct the overdispersion that arises due to the studies' finite sample sizes.
#' By default, this function uses Dersimonian-Laird moments-based estimates of the mean and variance of the true
#' effects, as Wang & Lee (2019) recommended.
#' @param yi Vector of study-level point estimates
#' @param sei Vector of study-level standard errors
#' @param method Estimation method for mean and variance of true effects (passed to \code{metafor::rma.uni})
#' @export
#' @import
#' metafor
#' stats
#' @references
#' 1. Wang C-C & Lee W-C (2019). A simple method to estimate prediction intervals and
#' predictive distributions: Summarizing meta-analyses
#' beyond means and confidence intervals. Research Synthesis Methods.
#' @examples
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                      ci=cpos, di=cneg, data=metafor::dat.bcg)
#'
#' # calculate calibrated estimates
#' d$calib = calib_ests( yi = d$yi,
#'                       sei = sqrt(d$vi) )
#'
#' # look at 5 studies with the largest calibrated estimates
#' d = d[ order(d$calib, decreasing = TRUE), ]
#' d$trial[1:5]
#'
#' # look at kernel density estimate of calibrated estimates
#' plot(density(d$calib))

calib_ests = function(yi,
                      sei,
                      method = "DL") {

  meta = rma.uni( yi = yi,
                  sei = sei,
                  method = method )

  muhat = meta$b
  t2 = meta$tau2

  # return ensemble estimates
  as.numeric( c(muhat) + ( c(t2) / ( c(t2) + sei^2 ) )^(1/2) * ( yi - c(muhat) ) )
}


################################ INTERNAL FNs FOR SIGN TEST METHODT ################################

# documentation from Rui Wang paper
##########################################################################
##########################################################################
### theta: treatment effect estimates ###
### (usually a vector of length K, K is the number of studies) ###
### theta.sd: estimated standard error of theta ###
### (usually a vector of same length as theta) ###
### mu: the specified value in the null hypothesis. ###
### pct: the percentile of interest ###
### 0.5=median, ###
### 0.25=25th percentile, ###
### 0.75=75th percentile. ###
### nperm: number of realizations in the conditional test ###

##########################################################################
### Output a 2-sided p-value. ###
##########################################################################


#' Return sign test p-value for meta-analysis percentile
#'
#' Returns a p-value for testing the hypothesis that \code{mu} is the \code{pct}^th
#' percentile of the true effect distribution based on the nonparametric sign test
#' method of Wang et al. (2010). This function is also called by \code{prop_stronger}
#' when using the sign test method.
#' @param yi Vector of study-level point estimates
#' @param sei Vector of study-level standard errors
#' @param mu The effect size to test as the \code{pct}^th percentile
#' @param pct The percentile of interest (e.g., 0.50 for the median)
#' @param R Number of simulation iterates to use when estimating null distribution of the test statistic.
#' @export
#' @references
#' 1. Wang R, Tian L, Cai T, & Wei LJ (2010). Nonparametric inference procedure for percentiles
#' of the random effects distribution in meta-analysis. Annals of Applied Statistics.
#' @examples
#' # calculate effect sizes for example dataset
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                    ci=cpos, di=cneg, data=metafor::dat.bcg)
#'
#' # test H0: the median is -0.3
#' # using only R = 100 for speed, but should be much larger (e.g., 2000) in practice
#' pct_pval( yi = d$yi,
#'           sei = sqrt(d$vi),
#'           mu = -0.3,
#'           pct = 0.5,
#'           R = 100 )

pct_pval <- function(yi,
                sei,
                mu,
                pct,
                R=2000) {

  K<-length(yi)

  # "score" is equivalent to kth contribution of sum in
  #  first eq. on page 4 (see my note in sidebar for equivalence)
  score <- pnorm( (mu-yi)/sei ) - 0.5
  # OBSERVED test stat
  stat<-sum(score)

  # initialize what will be the test stat vector UNDER H0
  # i.e., Tstar in paper
  test.stat<-rep(0,R)

  # draw Deltas for R iterations
  # this is the H0 distribution
  i<-1
  while (i<=R) {
    # here they use "ref1" and "ref2" (0/1) instead of Delta (1/-1)
    #  for computational convenience
    ref1 <- rbinom(K,1,pct)
    ref2 <- 1-ref1
    # this is the second eq. on page 4 of paper
    test.stat[i] <- sum( (abs(score)) * ref1 - (abs(score))*ref2 )
    i<-i+1
  }

  # compare test.stat (which is under H0) to observed one
  p1 <- mean(as.numeric(test.stat<=stat))
  p2 <- mean(as.numeric(test.stat>=stat))
  p3 <- mean(as.numeric(test.stat==stat))
  pval <- 2*min(p1,p2) - p3
  return(pval)
}


#' Return sign test point estimate of proportion of effects above or below threshold.
#'
#' Internal function not intended for user to call. Uses an extension of the sign test method of Wang et al. (2010) to estimate the proportion of true (i.e., population parameter) effect sizes in a meta-analysis
#' that are above or below a specified threshold of scientific importance. See important caveats in the Details section of the documentation for
#' the function \code{prop_stronger}.
#' @param q True effect size that is the threshold for "scientific importance"
#' @param yi Study-level point estimates
#' @param vi study-level variances
#' @param CI.level Confidence level as a proportion
#' @param tail \code{above} for the proportion of effects above \code{q}; \code{below} for
#' the proportion of effects below \code{q}.
#' @param R Number of simulation iterates to estimate null distribution of sign test statistic
#' @param return.vectors Should all percents and p-values from the grid search be returned?
#' @import
#' purrr
#' @importFrom
#' dplyr "%>%"
#' @references
#' 1. Wang R, Tian L, Cai T, & Wei LJ (2010). Nonparametric inference procedure for percentiles
#' of the random effects distribution in meta-analysis. Annals of Applied Statistics.
prop_stronger_sign = function(q,
                            yi,
                            vi,
                            CI.level = 0.95,
                            tail = NA,
                            R = 2000,
                            return.vectors = FALSE ) {


  # Phat values to try
  pct.vec = seq( 0, 1, 0.001 )

  pvals = pct.vec %>% map( function(x) pct_pval( yi = yi,
                                            sei = sqrt(vi),
                                            mu = q,
                                            pct = x,
                                            R = R ) ) %>%
    unlist # return a double-type vector instead of list

  # point estimate: the value of Phat.below with the largest p-value?
  Phat.below.NP = pct.vec[ which.max( pvals ) ]

  # get CI limits
  alpha = 1 - CI.level
  # in case the point estimate is already 1 or 0, avoid null objects
  if ( Phat.below.NP == 1 ) CI.hi.NP = 1
  # of the "candidate" Phat values for *upper* CI (i.e., those *above* the one with largest p-value),
  #  which has p-value closest to 0.05?
  else CI.hi.NP = pct.vec[ pct.vec > Phat.below.NP ][ which.min( abs( pvals[ pct.vec > Phat.below.NP ] - alpha ) ) ]

  if ( Phat.below.NP == 0 ) CI.lo.NP = 0
  else CI.lo.NP = pct.vec[ pct.vec < Phat.below.NP ][ which.min( abs( pvals[ pct.vec < Phat.below.NP ] - alpha ) ) ]

  # if user wanted the lower tail
  if ( tail == "below" ) {
    res = data.frame(Est = Phat.below.NP,
                     lo = CI.lo.NP,
                     hi = CI.hi.NP )
    pcts = pct.vec
  }

  # if user wanted the upper tail, reverse everything
  if ( tail == "above" ) {
    res = data.frame( Est = 1 - Phat.below.NP,
                      lo = 1 - CI.hi.NP,
                      hi = 1 - CI.lo.NP )
    pcts = 1 - pct.vec
  }

  if ( return.vectors == FALSE ) return(res)
  if ( return.vectors == TRUE ) invisible( list(res = res,
                                                pcts = pcts,
                                                pvals = pvals) )
}


################################ FN: COMPUTE PROPORTION OF EFFECTS STRONGER THAN THRESHOLD ################################

#' Estimate proportion of true effect sizes above or below a threshold
#'
#' Estimates the proportion of true (i.e., population parameter) effect sizes in a meta-analysis
#' that are above or below a specified threshold of scientific importance based on the methods of Mathur & VanderWeele (2018) and Mathur & VanderWeele (under review).
#' @param q True effect size that is the threshold for "scientific importance"
#' @param M Pooled point estimate from meta-analysis (required only for parametric estimation/inference and for Shapiro p-value)
#' @param t2 Estimated heterogeneity (tau^2) from meta-analysis (required only for parametric estimation/inference and for Shapiro p-value)
#' @param se.M Estimated standard error of pooled point estimate from meta-analysis (required only for parametric inference)
#' @param se.t2 Estimated standard error of tau^2 from meta-analysis (required only for parametric inference)
#' @param CI.level Confidence level as a proportion (e.g., 0.95 for a 95\% confidence interval)
#' @param tail \code{"above"} for the proportion of effects above \code{q}; \code{"below"} for
#' the proportion of effects below \code{q}.
#' @param estimate.method Method for point estimation of the proportion (\code{"parametric"} or \code{"calibrated"}). See Details.
#' @param ci.method Method for confidence interval estimation (\code{"parametric"}, \code{"calibrated"}, or \code{"sign.test"}). See Details.
#' @param calib.est.method Method for estimating the mean and variance of the true effects when computing calibrated estimates. See Details.
#' @param dat Dataset of point estimates (with names equal to the passed \code{yi.name}) and their variances
#' (with names equal to the passed \code{vi.name}). Not required if using \code{ci.method = "parametric"} and bootstrapping is not needed.
#' @param R Number of bootstrap or simulation iterates (depending on the methods chosen). Not required if using \code{ci.method = "parametric"} and bootstrapping is not needed.
#' @param bootstrap Only used when \code{ci.method = "parametric"}. In that case, if \code{bootstrap = "ifneeded"}, bootstraps if estimated proportion is less than 0.15 or more than
#' 0.85. If equal to \code{never}, instead does not return inference in the above edge cases.
#' @param yi.name Name of the variable in \code{dat} containing the study-level point estimates. Used for bootstrapping and conducting Shapiro test.
#' @param vi.name Name of the variable in \code{dat} containing the study-level variances. Used for bootstrapping and conducting Shapiro test.
#' @export
#' @import
#' metafor
#' stats
#' @return
#' Returns a dataframe containing the point estimate for the proportion, its estimated standard error, and confidence
#' interval limits.
#' @details
#' These methods perform well only in meta-analyses with at least 10 studies; we do not recommend reporting them in smaller
#' meta-analyses. By default, \code{prop_stronger} uses parametric estimation for the proportion of effects above or below the chosen threshold.
#' However, it is usually preferable to use the calibrated method for both point estimation and confidence interval estimation.
#' The parametric method is maintained as the default for reverse-compatibility.
#'
#' The parametric method assumes that the true effects are approximately normal and that the number of studies is large.
#' When using the parametric method and the estimated proportion is less than 0.15 or more than 0.85, it is best to bootstrap the confidence interval
#' using the bias-corrected and accelerated (BCa) method (Mathur & VanderWeele, 2018); this is the default behavior of \code{prop_stronger}.
#' Sometimes BCa confidence interval estimation fails, in which case \code{prop_stronger} instead uses the percentile method,
#' issuing a warning if this is the case. We use a modified "safe" version of the \code{boot} package code for bootstrapping
#' such that if any bootstrap iterates fail (usually because of model estimation problems), the error message is printed but the
#' bootstrap iterate is simply discarded so that confidence interval estimation can proceed.
#'
#' The preferred calibrated method is an extension of work by Wang et al. (2019). This method makes no assumptions about the distribution of true effects and performs well in meta-analyses with
#' as few as 10 studies. Calculating the calibrated estimates involves first estimating the meta-analytic mean and variance,
#' which is by default done using the moments-based Dersimonian-Laird estimator as in Wang et al. (2019). To use a different method, which will be passed
#' change the argument \code{calib.est.method} based on the documentation for to \code{metafor::rma.uni}'s \code{method} argument. For inference, the calibrated method may
#' fail to converge especially for small meta-analyses for which the threshold is distant from the mean of the true effects.
#'
#' The sign test method is an extension of work by Wang et al. (2010). This method was included in Mathur & VanderWeele's (under review) simulation study;
#' it performed adequately when there was high heterogeneity, but did not perform well with lower heterogeneity. However, in the absence of a clear criterion
#' for how much heterogeneity is enough for the method to perform well, we do not in general recommend its use. Additionally, this method requires effects that are reasonably symmetric and unimodal.
#'
#' @references
#' 1. Mathur MB & VanderWeele TJ (2018). New metrics for meta-analyses of heterogeneous effects. Statistics in Medicine.
#'
#' 2. Mathur MB & VanderWeele TJ (under review). Robust metrics for meta-analyses of heterogeneous effects: methods and software.
#'
#' 3. Wang R, Tian L, Cai T, & Wei LJ (2010). Nonparametric inference procedure for percentiles
#' of the random effects distribution in meta-analysis. Annals of Applied Statistics.
#'
#' 4. Wang C-C & Lee W-C (2019). A simple method to estimate prediction intervals and
#' predictive distributions: Summarizing meta-analyses
#' beyond means and confidence intervals. Research Synthesis Methods.
#'
#' 5. Mathur MB & VanderWeele TJ (under review). New metrics for multisite replication projects.
#' @examples
#' ##### Example 1: BCG Vaccine and Tuberculosis Meta-Analysis #####
#'
#' # calculate effect sizes for example dataset
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                     ci=cpos, di=cneg, data=metafor::dat.bcg)
#'
#' # fit random-effects model
#' # note that metafor package returns on the log scale
#' m = metafor::rma.uni(yi= d$yi, vi=d$vi, knha=TRUE,
#'                      measure="RR", method="REML" )
#'
#' # pooled point estimate (RR scale)
#' exp(m$b)
#'
#' # estimate the proportion of effects stronger than RR = 0.70
#' # as recommended, use the calibrated approach for both point estimation and CI
#' # bootstrap reps should be higher in practice (e.g., 1000)
#' # here using fewer for speed
#' prop_stronger( q = log(0.7),
#'                tail = "below",
#'                estimate.method = "calibrated",
#'                ci.method = "calibrated",
#'                dat = d,
#'                yi.name = "yi",
#'                vi.name = "vi",
#'                R = 100)
#' # warning goes away with more bootstrap iterates
#' # no Shapiro p-value because we haven't provided the dataset and its variable names
#'
#' # now use the parametric approach (Mathur & VanderWeele 2018)
#' # no bootstrapping will be needed for this choice of q
#' prop_stronger( q = log(0.7),
#'                M = as.numeric(m$b),
#'                t2 = m$tau2,
#'                se.M = as.numeric(m$vb),
#'                se.t2 = m$se.tau2,
#'                CI.level = 0.95,
#'                tail = "below",
#'                bootstrap = "ifneeded")
#'
#'
#' ##### Example 2: Meta-Analysis of Multisite Replication Studies #####
#'
#' # replication estimates (Fisher's z scale) and SEs
#' # from moral credential example in reference #2
#' r.fis = c(0.303, 0.078, 0.113, -0.055, 0.056, 0.073,
#'           0.263, 0.056, 0.002, -0.106, 0.09, 0.024, 0.069, 0.074,
#'           0.107, 0.01, -0.089, -0.187, 0.265, 0.076, 0.082)
#'
#' r.SE = c(0.111, 0.092, 0.156, 0.106, 0.105, 0.057,
#'          0.091, 0.089, 0.081, 0.1, 0.093, 0.086, 0.076,
#'          0.094, 0.065, 0.087, 0.108, 0.114, 0.073, 0.105, 0.04)
#'
#' d = data.frame( yi = r.fis,
#'                 vi = r.SE^2 )
#'
#' # meta-analyze the replications
#' m = metafor::rma.uni( yi = r.fis, vi = r.SE^2, measure = "ZCOR" )
#'
#' # probability of true effect above r = 0.10 = 28%
#' # convert threshold on r scale to Fisher's z
#' q = r_to_z(0.10)
#'
#' # bootstrap reps should be higher in practice (e.g., 1000)
#' # here using only 100 for speed
#' prop_stronger( q = q,
#'                tail = "above",
#'                estimate.method = "calibrated",
#'                ci.method = "calibrated",
#'                dat = d,
#'                yi.name = "yi",
#'                vi.name = "vi",
#'                R = 100 )
#'
#'
#' # probability of true effect equally strong in opposite direction
#' q.star = r_to_z(-0.10)
#' prop_stronger( q = q.star,
#'                tail = "below",
#'                estimate.method = "calibrated",
#'                ci.method = "calibrated",
#'                dat = d,
#'                yi.name = "yi",
#'                vi.name = "vi",
#'                R = 100 )
#' # BCa fails to converge here


prop_stronger = function( q,
                          M = NA,
                          t2 = NA,
                          se.M = NA,
                          se.t2 = NA,
                          CI.level = 0.95,
                          tail = NA,

                          estimate.method = "parametric",  # "parametric" or "calibrated"
                          ci.method = "parametric", # "parametric", "calibrated", or "sign.test"
                          calib.est.method = "DL",  # will be passed to rma.uni

                          # below arguments only needed for bootstrapping with parametric method
                          dat = NULL,
                          R = 2000,
                          bootstrap = "ifneeded",  # "ifneeded" or "never"
                          yi.name = "yi",
                          vi.name = "vi" ) {


  ##### Check for Bad Input #####

  if ( ( estimate.method == "parametric" | ci.method == "parametric" ) &
       ( is.na(t2) | is.na(M) ) ) {
    stop("Must provide M and t2 if using estimate.method = 'parametric' or ci.method = 'parametric'.")
  }

  if ( !is.na(t2) & t2 < 0 ) stop("Heterogeneity cannot be negative")

  if ( ! is.na(se.M) ) {
    if (se.M < 0) stop("se.M cannot be negative")
  }

  if ( ! is.na(se.t2) ) {
    if (se.t2 < 0) stop("se.t2 cannot be negative")
  }

  if ( !estimate.method %in% c( "parametric", "calibrated" ) ) {
    stop("Invalid estimate.method argument")
  }

  if ( !ci.method %in% c( "parametric", "calibrated", "sign.test" ) ) {
    stop("Invalid ci.method argument")
  }

  if ( !bootstrap %in% c( "ifneeded", "never" ) ) {
    stop("Invalid bootstrap argument")
  }

  if ( ci.method == "parametric" & estimate.method != "parametric"){
    stop("\nError: You chose ci.method = 'parametric' (or you used the default),\nin which case you must use estimate.method = 'parametric' as well.\n")
  }

  ##### Messages When Not All Output Can Be Computed #####

  if ( ci.method == "parametric" & ( is.na(se.M) | is.na(se.t2) ) ) message("Cannot compute parametric inference without se.M and \nse.t2.\n Returning only point estimates.\n")
  if ( is.null(dat) | is.na(M) | is.na(t2) ) message("Cannot report Shapiro normality test without dat, M, and t2.\n Returning NA for Shapiro p-value.\n")

  if ( ( estimate.method == "calibrated" | ci.method == "calibrated" ) &
       is.null(dat) ) stop("Cannot use calibrated methods without providing dat.")

  ##### Get Point Estimate #####

  if ( estimate.method == "parametric" ) {

    # same regardless of tail
    Z = (q - M) / sqrt(t2)

    if ( tail == "above" ) phat = 1 - pnorm(Z)
    else if ( tail == "below" ) phat = pnorm(Z)
  }

  if ( estimate.method == "calibrated" ) {
    # use DL method by default
    calib = calib_ests( yi = dat[[yi.name]],
                      sei = sqrt(dat[[vi.name]]),
                      calib.est.method )

    if ( tail == "above" ) phat = sum(calib > c(q)) / length(calib)
    if ( tail == "below" ) phat = sum(calib < c(q)) / length(calib)
  }

  ##### Inference #####
  if ( ci.method == "parametric") {

    warning("Warning: You chose ci.method = 'parametric' (or you used the default).\nIt is almost always better to use ci.method = 'calibrated'.\n")

    # is point estimate extreme enough to require bootstrap?
    extreme = (phat < 0.15 | phat > 0.85)

    if ( extreme ) {
      if ( bootstrap == "ifneeded" ) {
        message("The estimated proportion is close to 0 or 1,\n so the theoretical CI may perform poorly. Using \n bootstrapping instead.\n")

        # more sanity checks
        if ( is.null(dat) ) stop("Must provide dat in order to bootstrap.")
        if ( !yi.name %in% names(dat) ) stop("dat must contain variable called yi.name.")
        if ( !vi.name %in% names(dat) ) stop("dat must contain variable called vi.name.")

        bootCIs = lo = hi = SE = NULL

        boot.res = suppressWarnings( safe_boot( data = dat,
                                                parallel = "multicore",
                                                R = R,
                                                statistic = get_stat,
                                                # below arguments are being passed to get_stat
                                                q = q,
                                                tail = tail,
                                                yi.name = yi.name,
                                                vi.name = vi.name ) )

        # if BCa fails (due to infinite w adjustment issue), use percentile instead
        boot.values = tryCatch( {

          bootCIs = boot.ci(boot.res,
                            type="bca",
                            conf = CI.level )
          lo = round( bootCIs$bca[4], 2 )
          hi = round( bootCIs$bca[5], 2 )
          SE = sd(boot.res$t)

          c(lo, hi, SE)

        }, error = function(err) {
          warning("Had problems computing BCa CI. Using percentile method instead.")

          bootCIs = boot.ci(boot.res,
                            type="perc",
                            conf = CI.level )

          lo = round( bootCIs$perc[4], 2 )

          hi = round( bootCIs$perc[5], 2 )

          SE = sd(boot.res$t)
          c(lo, hi, SE)
        }
        )

        lo = boot.values[1]
        hi = boot.values[2]
        SE = boot.values[3]

      } # end loop for boot == "ifneeded"

      if ( bootstrap == "never" ) {
        warning("The estimated proportion is close to 0 or 1,\n so the theoretical CI may perform poorly. Should use \nBCa bootstrapping instead.")
        SE = lo = hi = NA
      }
    }


    if ( !extreme ) {
      # do inference only if given needed SEs
      if ( !is.na(se.M) & !is.na(se.t2) ){

        ##### Delta Method Inference on Original Scale #####
        term1.1 = se.M^2 / t2
        term1.2 = ( se.t2^2 * ( q - M )^2 ) / ( 4 * t2^3 )
        term1 = sqrt( term1.1 + term1.2 )

        SE = term1 * dnorm(Z)

        # confidence interval
        tail.prob = ( 1 - CI.level ) / 2
        lo = max( 0, phat + qnorm( tail.prob )*SE )
        hi = min( 1, phat - qnorm( tail.prob )*SE )
      } else {
        SE = lo = hi = NA
      }
    }

  } # end ci.method == "parametric"

  if ( ci.method == "calibrated") {

        # more sanity checks
        if ( is.null(dat) ) stop("Must provide dat in order to use ci.method = 'calibrated'.")
        if ( !yi.name %in% names(dat) ) stop("dat must contain variable called yi.name.")
        if ( !vi.name %in% names(dat) ) stop("dat must contain variable called vi.name.")

        bootCIs = lo = hi = SE = NULL

        boot.res = suppressWarnings( safe_boot( data = dat,
                                                parallel = "multicore",
                                                R = R,
                                                statistic = function(original, indices) {

                                                  b = original[indices,]

                                                  calib.b = calib_ests( yi = b[[yi.name]],
                                                                        sei = sqrt(b[[vi.name]]),
                                                                        calib.est.method)

                                                  if ( tail == "above" ) phatb = sum(calib.b > c(q)) / length(calib.b)
                                                  if ( tail == "below" ) phatb = sum(calib.b < c(q)) / length(calib.b)
                                                  return(phatb)
                                                } ) )

        # catch BCa failures (usually due to infinite w adjustment issue)
        boot.values = tryCatch( {

          bootCIs = boot.ci(boot.res,
                            type="bca",
                            conf = CI.level )
          lo = round( bootCIs$bca[4], 2 )
          hi = round( bootCIs$bca[5], 2 )
          SE = sd(boot.res$t)

          c(lo, hi, SE)

        }, error = function(err) {
          warning("\nHad problems computing BCa CI. \nThis typically happens when the estimated proportion is close to 0 or 1 \nand the number of studies is small.")
          boot.values = c(NA, NA, NA)
        }
        )

        lo = boot.values[1]
        hi = boot.values[2]
        SE = boot.values[3]

      } # end ci.method == "calibrated"

  if (ci.method == "sign.test") {

    warning("\n\nWarning: You are estimating a CI using the sign test method.\nThis method only works well with high heterogeneity and point estimates that are\n relatively symmetric and unimodal.\nThis method does not provide a standard error estimate, only CI limits.")

    Phat.np = prop_stronger_sign(q = q,
                               yi = dat[[yi.name]],
                               vi = dat[[vi.name]],
                               CI.level = CI.level,
                               tail = tail,
                               R = R,
                               return.vectors = FALSE )

    lo = Phat.np$lo
    hi = Phat.np$hi
    SE = NA  # this method doesn't give an SE
  }

  ##### Normality Check #####
  if ( !is.null(dat) & !is.na(M) & !is.na(t2) ) {
    if ( !yi.name %in% names(dat) ) stop("dat must contain variable called yi.name.")
    if ( !vi.name %in% names(dat) ) stop("dat must contain variable called vi.name.")

    # normalized point estimates
    yi.norm = ( dat[[yi.name]] - c(M) ) / sqrt( dat[[vi.name]] + c(t2) )

    # Shapiro-Wilk test
    shapiro.pval = shapiro.test( yi.norm )$p.value
  } else {
    shapiro.pval = NA
  }

  # return results
  res = data.frame( Est = phat,
                    SE,
                    lo,
                    hi,
                    shapiro.pval )
  rownames(res) = NULL
  res
}




# ~~~~~~~~~~~~~~~~~~~~~~ INTERNAL FUNCTIONS FOR BOOTSTRAPPING ~~~~~~~~~~~~~~~~~~~~~~ #

################################### GET P-HAT STAT FOR USE WITH BOOT PACKAGE ###################################

# Internal function not intended for the user. Meta-analyzes a bootstrap sample and estimates the proportion of effects stronger than q.
# This is a "safe" function that does not return an error if estimation fails so that bootstrapping
# can still proceed.

# original = The original dataset
# indices = indices for bootstrap resample
# method = Method of meta-analysis model-fitting passed to code{metafor::rma.uni}

get_stat = function( original,
                     indices,
                     q,
                     tail,
                     method = "REML",
                     yi.name = "yi",
                     vi.name = "vi",
                     CI.level ) {

  b = original[indices,]

  # tryCatch in case meta-analysis runs into Fisher convergence problems
  phatb = tryCatch( {

    # meta-analyze the bootstrapped data
    mb = rma.uni( yi = b[[yi.name]],
                  vi = b[[vi.name]],
                  knha = TRUE,
                  method = method,
                  # last argument helps prevent convergence problems
                  control=list(stepadj=0.5) )

    Mb = mb$b
    t2b = mb$tau2

    Z = (q - Mb) / sqrt(t2b)

    if ( tail == "above" ) phatb = 1 - pnorm(Z)
    else if ( tail == "below" ) phatb = pnorm(Z)

  }, error = function(err) {
    message(err)
    return(NA)
  } )

  if (!is.null(phatb)) return(phatb)
  #if(got.error == FALSE) return(phatb)

}



################################### MODIFIED FROM BOOT PACKAGE ###################################

# see section called "our additions"
# we minimally modified the function so that it can proceed even if some of the bootstrap iterates run into errors
# (in this case, Fisher convergence issues) because the boot package version gets confused about dimension mismatches

safe_boot = function (data, statistic, R, sim = "ordinary", stype = c("i",
                                                                      "f", "w"), strata = rep(1, n), L = NULL, m = 0, weights = NULL,
                      ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...,
                      parallel = c("no", "multicore", "snow"), ncpus = getOption("boot.ncpus",
                                                                                 1L), cl = NULL)
{


  call <- match.call()
  stype <- match.arg(stype)
  if (missing(parallel))
    parallel <- getOption("boot.parallel", "no")
  parallel <- match.arg(parallel)
  have_mc <- have_snow <- FALSE
  if (parallel != "no" && ncpus > 1L) {
    if (parallel == "multicore")
      have_mc <- .Platform$OS.type != "windows"
    else if (parallel == "snow")
      have_snow <- TRUE
    if (!have_mc && !have_snow)
      ncpus <- 1L
    loadNamespace("parallel")
  }
  if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
    warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0', so ignored")
    simple <- FALSE
  }
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
  n <- NROW(data)
  if ((n == 0) || is.null(n))
    stop("no data in call to 'boot'")
  temp.str <- strata
  strata <- tapply(seq_len(n), as.numeric(strata))
  t0 <- if (sim != "parametric") {
    if ((sim == "antithetic") && is.null(L))
      L <- empinf(data = data, statistic = statistic, stype = stype,
                  strata = strata, ...)
    if (sim != "ordinary")
      m <- 0
    else if (any(m < 0))
      stop("negative value of 'm' supplied")
    if ((length(m) != 1L) && (length(m) != length(table(strata))))
      stop("length of 'm' incompatible with 'strata'")
    if ((sim == "ordinary") || (sim == "balanced")) {
      if (isMatrix(weights) && (nrow(weights) != length(R)))
        stop("dimensions of 'R' and 'weights' do not match")
    }
    else weights <- NULL
    if (!is.null(weights))
      weights <- t(apply(matrix(weights, n, length(R),
                                byrow = TRUE), 2L, normalize, strata))
    if (!simple)
      i <- index.array(n, R, sim, strata, m, L, weights)
    original <- if (stype == "f")
      rep(1, n)
    else if (stype == "w") {
      ns <- tabulate(strata)[strata]
      1/ns
    }
    else seq_len(n)
    t0 <- if (sum(m) > 0L)
      statistic(data, original, rep(1, sum(m)), ...)
    else statistic(data, original, ...)
    rm(original)
    t0
  }
  else statistic(data, ...)
  pred.i <- NULL
  fn <- if (sim == "parametric") {
    ran.gen
    data
    mle
    function(r) {
      dd <- ran.gen(data, mle)
      statistic(dd, ...)
    }
  }
  else {
    if (!simple && ncol(i) > n) {
      pred.i <- as.matrix(i[, (n + 1L):ncol(i)])
      i <- i[, seq_len(n)]
    }
    if (stype %in% c("f", "w")) {
      f <- freq.array(i)
      rm(i)
      if (stype == "w")
        f <- f/ns
      if (sum(m) == 0L)
        function(r) statistic(data, f[r, ], ...)
      else function(r) statistic(data, f[r, ], pred.i[r,
                                                      ], ...)
    }
    else if (sum(m) > 0L)
      function(r) statistic(data, i[r, ], pred.i[r, ],
                            ...)
    else if (simple)
      function(r) statistic(data, index.array(n, 1, sim,
                                              strata, m, L, weights), ...)
    else function(r) statistic(data, i[r, ], ...)
  }
  RR <- sum(R)
  res <- if (ncpus > 1L && (have_mc || have_snow)) {
    if (have_mc) {
      parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
    }
    else if (have_snow) {
      list(...)
      if (is.null(cl)) {
        cl <- parallel::makePSOCKcluster(rep("localhost",
                                             ncpus))
        if (RNGkind()[1L] == "L'Ecuyer-CMRG")
          parallel::clusterSetRNGStream(cl)
        res <- parallel::parLapply(cl, seq_len(RR), fn)
        parallel::stopCluster(cl)
        res
      }
      else parallel::parLapply(cl, seq_len(RR), fn)
    }
  }
  else lapply(seq_len(RR), fn)
  #t.star <- matrix(, RR, length(t0))  # ~~~ MM commented out


  # ~~~~~ beginning of our additions
  # number of non-NULL elements of the results vector
  RR = length(unlist(res))
  nulls = sapply( res, is.null)
  res = res[ !nulls ]
  t.star <- matrix(, RR, length(t0))

  # if some reps failed, warn user
  if ( sum(RR) < R ) warning( "Only ", sum(RR), " of ", R, " bootstrap reps managed to fit the meta-analysis successfully. Discarding all those that did not.")

  # without this, boot.CI gets confused about number of replicates
  R = RR
  # ~~~~~ end of our additions


  for (r in seq_len(RR)) t.star[r, ] <- res[[r]]
  if (is.null(weights))
    weights <- 1/tabulate(strata)[strata]
  boot.return(sim, t0, t.star, temp.str, R, data, statistic,
              stype, call, seed, L, m, pred.i, weights, ran.gen, mle)
}


################################### UNMODIFIED INTERNAL FUNCTIONS FROM BOOT PACKAGE ###################################

# these functions need to be loaded directly in order for safe_boot to work

# part of R package boot
# copyright (C) 1997-2001 Angelo J. Canty
# corrections (C) 1997-2014 B. D. Ripley
#
# Unlimited distribution is permitted

# safe version of sample
# needs R >= 2.9.0
# only works if size is not specified in R >= 2.11.0, but it always is in boot
sample0 <- function(x, ...) x[sample.int(length(x), ...)]
bsample <- function(x, ...) x[sample.int(length(x), replace = TRUE, ...)]

isMatrix <- function(x) length(dim(x)) == 2L

## random permutation of x.
rperm <- function(x) sample0(x, length(x))



antithetic.array <- function(n, R, L, strata)
  #
  #  Create an array of indices by antithetic resampling using the
  #  empirical influence values in L.  This function just calls anti.arr
  #  to do the sampling within strata.
  #
{
  inds <- as.integer(names(table(strata)))
  out <- matrix(0L, R, n)
  for (s in inds) {
    gp <- seq_len(n)[strata == s]
    out[, gp] <- anti.arr(length(gp), R, L[gp], gp)
  }
  out
}

anti.arr <- function(n, R, L, inds=seq_len(n))
{
  #  R x n array of bootstrap indices, generated antithetically
  #  according to the empirical influence values in L.
  unique.rank <- function(x) {
    # Assign unique ranks to a numeric vector
    ranks <- rank(x)
    if (any(duplicated(ranks))) {
      inds <- seq_along(x)
      uniq <- sort(unique(ranks))
      tab <- table(ranks)
      for (i in seq_along(uniq))
        if (tab[i] > 1L) {
          gp <- inds[ranks == uniq[i]]
          ranks[gp] <- rperm(inds[sort(ranks) == uniq[i]])
        }
    }
    ranks
  }
  R1 <- floor(R/2)
  mat1 <- matrix(bsample(inds, R1*n), R1, n)
  ranks <- unique.rank(L)
  rev <- inds
  for (i in seq_len(n)) rev[i] <- inds[ranks == (n+1-ranks[i])]
  mat1 <- rbind(mat1, matrix(rev[mat1], R1, n))
  if (R != 2*R1) mat1 <- rbind(mat1, bsample(inds, n))
  mat1
}




balanced.array <- function(n, R, strata)
{
  #
  # R x n array of bootstrap indices, sampled hypergeometrically
  # within strata.
  #
  output <- matrix(rep(seq_len(n), R), n, R)
  inds <- as.integer(names(table(strata)))
  for(is in inds) {
    group <- seq_len(n)[strata == is]
    if(length(group) > 1L) {
      g <- matrix(rperm(output[group,  ]), length(group), R)
      output[group,  ] <- g
    }
  }
  t(output)
}

boot <- function(data, statistic, R, sim = "ordinary",
                 stype = c("i", "f", "w"),
                 strata  =  rep(1, n), L = NULL, m = 0, weights = NULL,
                 ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...,
                 parallel = c("no", "multicore", "snow"),
                 ncpus = getOption("boot.ncpus", 1L), cl = NULL)
{
  #
  # R replicates of bootstrap applied to  statistic(data)
  # Possible sim values are: "ordinary", "balanced", "antithetic",
  #                     "permutation", "parametric"
  # Various auxilliary functions find the indices to be used for the
  # bootstrap replicates and then this function loops over those replicates.
  #
  call <- match.call()
  stype <- match.arg(stype)
  if (missing(parallel)) parallel <- getOption("boot.parallel", "no")
  parallel <- match.arg(parallel)
  have_mc <- have_snow <- FALSE
  if (parallel != "no" && ncpus > 1L) {
    if (parallel == "multicore") have_mc <- .Platform$OS.type != "windows"
    else if (parallel == "snow") have_snow <- TRUE
    if (!have_mc && !have_snow) ncpus <- 1L
    loadNamespace("parallel") # get this out of the way before recording seed
  }
  if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
    warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0', so ignored")
    simple <- FALSE
  }
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
  seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
  n <- NROW(data)
  if ((n == 0) || is.null(n))
    stop("no data in call to 'boot'")
  temp.str <- strata
  strata <- tapply(seq_len(n),as.numeric(strata))
  t0 <- if (sim != "parametric") {
    if ((sim == "antithetic") && is.null(L))
      L <- empinf(data = data, statistic = statistic,
                  stype = stype, strata = strata, ...)
    if (sim != "ordinary") m <- 0
    else if (any(m < 0)) stop("negative value of 'm' supplied")
    if ((length(m) != 1L) && (length(m) != length(table(strata))))
      stop("length of 'm' incompatible with 'strata'")
    if ((sim == "ordinary") || (sim == "balanced")) {
      if (isMatrix(weights) && (nrow(weights) != length(R)))
        stop("dimensions of 'R' and 'weights' do not match")}
    else weights <- NULL
    if (!is.null(weights))
      weights <- t(apply(matrix(weights, n, length(R), byrow = TRUE),
                         2L, normalize, strata))
    if (!simple) i <- index.array(n, R, sim, strata, m, L, weights)

    original <- if (stype == "f") rep(1, n)
    else if (stype == "w") {
      ns <- tabulate(strata)[strata]
      1/ns
    } else seq_len(n)

    t0 <- if (sum(m) > 0L) statistic(data, original, rep(1, sum(m)), ...)
    else statistic(data, original, ...)
    rm(original)
    t0
  } else # "parametric"
    statistic(data, ...)

  pred.i <- NULL
  fn <- if (sim == "parametric") {
    ## force promises, so values get sent by parallel
    ran.gen; data; mle
    function(r) {
      dd <- ran.gen(data, mle)
      statistic(dd, ...)
    }
  } else {
    if (!simple && ncol(i) > n) {
      pred.i <- as.matrix(i[ , (n+1L):ncol(i)])
      i <- i[, seq_len(n)]
    }
    if (stype %in% c("f", "w")) {
      f <- freq.array(i)
      rm(i)
      if (stype == "w") f <- f/ns
      if (sum(m) == 0L) function(r) statistic(data, f[r,  ], ...)
      else function(r) statistic(data, f[r, ], pred.i[r, ], ...)
    } else if (sum(m) > 0L)
      function(r) statistic(data, i[r, ], pred.i[r,], ...)
    else if (simple)
      function(r)
        statistic(data,
                  index.array(n, 1, sim, strata, m, L, weights), ...)
    else function(r) statistic(data, i[r, ], ...)
  }
  RR <- sum(R)
  res <- if (ncpus > 1L && (have_mc || have_snow)) {
    if (have_mc) {
      parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
    } else if (have_snow) {
      list(...) # evaluate any promises
      if (is.null(cl)) {
        cl <- parallel::makePSOCKcluster(rep("localhost", ncpus))
        if(RNGkind()[1L] == "L'Ecuyer-CMRG")
          parallel::clusterSetRNGStream(cl)
        res <- parallel::parLapply(cl, seq_len(RR), fn)
        parallel::stopCluster(cl)
        res
      } else parallel::parLapply(cl, seq_len(RR), fn)
    }
  } else lapply(seq_len(RR), fn)
  t.star <- matrix(, RR, length(t0))
  for(r in seq_len(RR)) t.star[r, ] <- res[[r]]

  if (is.null(weights)) weights <- 1/tabulate(strata)[strata]
  boot0 <- boot.return(sim, t0, t.star, temp.str, R, data, statistic,
                       stype, call,
                       seed, L, m, pred.i, weights, ran.gen, mle)
  attr(boot0, "boot_type") <- "boot"
  boot0
}

normalize <- function(wts, strata)
{
  #
  # Normalize a vector of weights to sum to 1 within each strata.
  #
  n <- length(strata)
  out <- wts
  inds <- as.integer(names(table(strata)))
  for (is in inds) {
    gp <- seq_len(n)[strata == is]
    out[gp] <- wts[gp]/sum(wts[gp]) }
  out
}

boot.return <- function(sim, t0, t, strata, R, data, stat, stype, call,
                        seed, L, m, pred.i, weights, ran.gen, mle)
  #
  # Return the results of a bootstrap in the form of an object of class
  # "boot".
  #
{
  out <- list(t0=t0, t=t, R=R, data=data, seed=seed,
              statistic=stat, sim=sim, call=call)
  if (sim == "parametric")
    out <- c(out, list(ran.gen=ran.gen, mle=mle))
  else if (sim == "antithetic")
    out <- c(out, list(stype=stype, strata=strata, L=L))
  else if (sim == "ordinary") {
    if (sum(m) > 0)
      out <- c(out, list(stype=stype, strata=strata,
                         weights=weights, pred.i=pred.i))
    else 	out <- c(out, list(stype=stype, strata=strata,
                             weights=weights))
  } else if (sim == "balanced")
    out <- c(out, list(stype=stype, strata=strata,
                       weights=weights ))
  else
    out <- c(out, list(stype=stype, strata=strata))
  class(out) <- "boot"
  out
}

c.boot <- function (..., recursive = TRUE)
{
  args <- list(...)
  nm <- lapply(args, names)
  if (!all(sapply(nm, function(x) identical(x, nm[[1]]))))
    stop("arguments are not all the same type of \"boot\" object")
  res <- args[[1]]
  res$R <- sum(sapply(args, "[[", "R"))
  res$t <- do.call(rbind, lapply(args, "[[", "t"))
  res
}

boot.array <- function(boot.out, indices=FALSE) {
  #
  #  Return the frequency or index array for the bootstrap resamples
  #  used in boot.out
  #  This function recreates such arrays from the information in boot.out
  #
  if (exists(".Random.seed", envir=.GlobalEnv, inherits = FALSE))
    temp <- get(".Random.seed", envir=.GlobalEnv, inherits = FALSE)
  else temp<- NULL
  assign(".Random.seed",  boot.out$seed, envir=.GlobalEnv)
  n <- NROW(boot.out$data)
  R <- boot.out$R
  sim <- boot.out$sim
  type <- find_type(boot.out)
  if (type == "tsboot") {
    #  Recreate the array for an object created by tsboot, The default for
    #  such objects is to return the index array unless index is specifically
    #  passed as F
    if (missing(indices)) indices <- TRUE
    if (sim == "model")
      stop("index array not defined for model-based resampling")
    n.sim <- boot.out$n.sim
    i.a <- ts.array(n, n.sim, R, boot.out$l,
                    sim, boot.out$endcorr)
    out <- matrix(NA,R,n.sim)
    for(r in seq_len(R)) {
      if (sim == "geom")
        ends <- cbind(i.a$starts[r,  ],
                      i.a$lengths[r,  ])
      else
        ends <- cbind(i.a$starts[r,], i.a$lengths)
      inds <- apply(ends, 1L, make.ends, n)
      if (is.list(inds))
        inds <- unlist(inds)[seq_len(n.sim)]
      out[r,] <- inds
    }
  }
  else if (type == "censboot") {
    #  Recreate the array for an object created by censboot as long
    #  as censboot was called with sim = "ordinary"
    if (sim == "ordinary") {
      strata <- tapply(seq_len(n), as.numeric(boot.out$strata))
      out <- cens.case(n,strata,R)
    }
    else	stop("boot.array not implemented for this object")
  }
  else {
    #  Recreate the array for objects created by boot or tilt.boot
    if (sim == "parametric")
      stop("array cannot be found for parametric bootstrap")
    strata <- tapply(seq_len(n),as.numeric(boot.out$strata))
    if (find_type(boot.out) == "tilt.boot")
      weights <- boot.out$weights
    else {
      weights <- boot.out$call$weights
      if (!is.null(weights))
        weights <- boot.out$weights
    }
    out <- index.array(n, R, sim, strata, 0, boot.out$L, weights)
  }
  if (!indices) out <- freq.array(out)
  if (!is.null(temp)) assign(".Random.seed", temp, envir=.GlobalEnv)
  else rm(.Random.seed, pos=1)
  out
}

# plot.boot <- function(x,index=1, t0=NULL, t=NULL, jack=FALSE,
#                       qdist="norm",nclass=NULL,df, ...) {
#   #
#   #  A plot method for bootstrap output objects.  It produces a histogram
#   #  of the bootstrap replicates and a QQ plot of them.  Optionally it can
#   #  also produce a jackknife-after-bootstrap plot.
#   #
#   boot.out <- x
#   t.o <- t
#   if (is.null(t)) {
#     t <- boot.out$t[,index]
#     if (is.null(t0)) t0 <- boot.out$t0[index]
#   }
#   t <- t[is.finite(t)]
#   if (const(t, min(1e-8,mean(t, na.rm=TRUE)/1e6))) {
#     print(paste("All values of t* are equal to ", mean(t, na.rm=TRUE)))
#     return(invisible(boot.out))
#   }
#   if (is.null(nclass)) nclass <- min(max(ceiling(length(t)/25),10),100)
#   if (!is.null(t0)) {
#     #  Calculate the breakpoints for the histogram so that one of them is
#     #  exactly t0.
#     rg <- range(t)
#     if (t0<rg[1L]) rg[1L] <- t0
#     else if (t0 >rg[2L]) rg[2L] <- t0
#     rg <- rg+0.05*c(-1,1)*diff(rg)
#     lc <- diff(rg)/(nclass-2)
#     n1 <- ceiling((t0-rg[1L])/lc)
#     n2 <- ceiling((rg[2L]-t0)/lc)
#     bks <- t0+(-n1:n2)*lc
#   }
#   R <- boot.out$R
#   if (qdist == "chisq") {
#     qq <- qchisq((seq_len(R))/(R+1),df=df)
#     qlab <- paste("Quantiles of Chi-squared(",df,")",sep="")
#   }
#   else {
#     if (qdist!="norm")
#       warning(gettextf("%s distribution not supported: using normal instead", sQuote(qdist)), domain = NA)
#     qq <- qnorm((seq_len(R))/(R+1))
#     qlab <-"Quantiles of Standard Normal"
#   }
#   if (jack) {
#     layout(mat = matrix(c(1,2,3,3), 2L, 2L, byrow=TRUE))
#     if (is.null(t0))
#       hist(t,nclass=nclass,probability=TRUE,xlab="t*")
#     else	hist(t,breaks=bks,probability=TRUE,xlab="t*")
#     if (!is.null(t0)) abline(v=t0,lty=2)
#     qqplot(qq,t,xlab=qlab,ylab="t*")
#     if (qdist == "norm") abline(mean(t),sqrt(var(t)),lty=2)
#     else abline(0,1,lty=2)
#     jack.after.boot(boot.out,index=index,t=t.o, ...)
#   }
#   else {
#     par(mfrow=c(1,2))
#     if (is.null(t0))
#       hist(t,nclass=nclass,probability=TRUE,xlab="t*")
#     else	hist(t,breaks=bks,probability=TRUE,xlab="t*")
#     if (!is.null(t0)) abline(v=t0,lty=2)
#     qqplot(qq,t,xlab=qlab,ylab="t*")
#     if (qdist == "norm") abline(mean(t),sqrt(var(t)),lty=2)
#     else abline(0,1,lty=2)
#   }
#   par(mfrow=c(1,1))
#   invisible(boot.out)
# }

print.boot <- function(x, digits = getOption("digits"),
                       index = 1L:ncol(boot.out$t), ...)
{
  #
  # Print the output of a bootstrap
  #
  boot.out <- x
  sim <- boot.out$sim
  cl <- boot.out$call
  t <- matrix(boot.out$t[, index], nrow = nrow(boot.out$t))
  allNA <- apply(t,2L,function(t) all(is.na(t)))
  ind1 <- index[allNA]
  index <- index[!allNA]
  t <- matrix(t[, !allNA], nrow = nrow(t))
  rn <- paste("t",index,"*",sep="")
  if (length(index) == 0L)
    op <- NULL
  else if (is.null(t0 <- boot.out$t0)) {
    if (is.null(boot.out$call$weights))
      op <- cbind(apply(t,2L,mean,na.rm=TRUE),
                  sqrt(apply(t,2L,function(t.st) var(t.st[!is.na(t.st)]))))
    else {
      op <- NULL
      for (i in index)
        op <- rbind(op, imp.moments(boot.out,index=i)$rat)
      op[,2L] <- sqrt(op[,2])
    }
    dimnames(op) <- list(rn,c("mean", "std. error"))
  }
  else {
    t0 <- boot.out$t0[index]
    if (is.null(boot.out$call$weights)) {
      op <- cbind(t0,apply(t,2L,mean,na.rm=TRUE)-t0,
                  sqrt(apply(t,2L,function(t.st) var(t.st[!is.na(t.st)]))))
      dimnames(op) <- list(rn, c("original"," bias  "," std. error"))
    }
    else {
      op <- NULL
      for (i in index)
        op <- rbind(op, imp.moments(boot.out,index=i)$rat)
      op <- cbind(t0,op[,1L]-t0,sqrt(op[,2L]),
                  apply(t,2L,mean,na.rm=TRUE))
      dimnames(op) <- list(rn,c("original", " bias  ",
                                " std. error", " mean(t*)"))
    }
  }
  type <- find_type(boot.out)
  if (type == "boot") {
    if (sim == "parametric")
      cat("\nPARAMETRIC BOOTSTRAP\n\n")
    else if (sim == "antithetic") {
      if (is.null(cl$strata))
        cat("\nANTITHETIC BOOTSTRAP\n\n")
      else
        cat("\nSTRATIFIED ANTITHETIC BOOTSTRAP\n\n")
    }
    else if (sim == "permutation") {
      if (is.null(cl$strata))
        cat("\nDATA PERMUTATION\n\n")
      else
        cat("\nSTRATIFIED DATA PERMUTATION\n\n")
    }
    else if (sim == "balanced") {
      if (is.null(cl$strata) && is.null(cl$weights))
        cat("\nBALANCED BOOTSTRAP\n\n")
      else if (is.null(cl$strata))
        cat("\nBALANCED WEIGHTED BOOTSTRAP\n\n")
      else if (is.null(cl$weights))
        cat("\nSTRATIFIED BALANCED BOOTSTRAP\n\n")
      else
        cat("\nSTRATIFIED WEIGHTED BALANCED BOOTSTRAP\n\n")
    }
    else {
      if (is.null(cl$strata) && is.null(cl$weights))
        cat("\nORDINARY NONPARAMETRIC BOOTSTRAP\n\n")
      else if (is.null(cl$strata))
        cat("\nWEIGHTED BOOTSTRAP\n\n")
      else if (is.null(cl$weights))
        cat("\nSTRATIFIED BOOTSTRAP\n\n")
      else
        cat("\nSTRATIFIED WEIGHTED BOOTSTRAP\n\n")
    }
  }
  else if (type == "tilt.boot") {
    R <- boot.out$R
    th <- boot.out$theta
    if (sim == "balanced")
      cat("\nBALANCED TILTED BOOTSTRAP\n\n")
    else	cat("\nTILTED BOOTSTRAP\n\n")
    if ((R[1L] == 0) || is.null(cl$tilt) || eval(cl$tilt))
      cat("Exponential tilting used\n")
    else	cat("Frequency Smoothing used\n")
    i1 <- 1
    if (boot.out$R[1L]>0)
      cat(paste("First",R[1L],"replicates untilted,\n"))
    else {
      cat(paste("First ",R[2L]," replicates tilted to ",
                signif(th[1L],4),",\n",sep=""))
      i1 <- 2
    }
    if (i1 <= length(th)) {
      for (j in i1:length(th))
        cat(paste("Next ",R[j+1L]," replicates tilted to ",
                  signif(th[j],4L),
                  ifelse(j!=length(th),",\n",".\n"),sep=""))
    }
    op <- op[, 1L:3L]
  }
  else if (type == "tsboot") {
    if (!is.null(cl$indices))
      cat("\nTIME SERIES BOOTSTRAP USING SUPPLIED INDICES\n\n")
    else if (sim == "model")
      cat("\nMODEL BASED BOOTSTRAP FOR TIME SERIES\n\n")
    else if (sim == "scramble") {
      cat("\nPHASE SCRAMBLED BOOTSTRAP FOR TIME SERIES\n\n")
      if (boot.out$norm)
        cat("Normal margins used.\n")
      else
        cat("Observed margins used.\n")
    }
    else if (sim == "geom") {
      if (is.null(cl$ran.gen))
        cat("\nSTATIONARY BOOTSTRAP FOR TIME SERIES\n\n")
      else
        cat(paste("\nPOST-BLACKENED STATIONARY",
                  "BOOTSTRAP FOR TIME SERIES\n\n"))
      cat(paste("Average Block Length of",boot.out$l,"\n"))
    }
    else {
      if (is.null(cl$ran.gen))
        cat("\nBLOCK BOOTSTRAP FOR TIME SERIES\n\n")
      else
        cat(paste("\nPOST-BLACKENED BLOCK",
                  "BOOTSTRAP FOR TIME SERIES\n\n"))
      cat(paste("Fixed Block Length of",boot.out$l,"\n"))
    }
  }
  else if (type == "censboot") {
    cat("\n")
    if (sim == "weird") {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("WEIRD BOOTSTRAP FOR CENSORED DATA\n\n")
    }
    else if ((sim == "ordinary") ||
             ((sim == "model") && is.null(boot.out$cox))) {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("CASE RESAMPLING BOOTSTRAP FOR CENSORED DATA\n\n")
    }
    else if (sim == "model") {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("MODEL BASED BOOTSTRAP FOR COX REGRESSION MODEL\n\n")
    }
    else if (sim == "cond") {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("CONDITIONAL BOOTSTRAP ")
      if (is.null(boot.out$cox))
        cat("FOR CENSORED DATA\n\n")
      else
        cat("FOR COX REGRESSION MODEL\n\n")
    }
  } else warning('unknown type of "boot" object')
  cat("\nCall:\n")
  dput(cl, control=NULL)
  cat("\n\nBootstrap Statistics :\n")
  if (!is.null(op)) print(op,digits=digits)
  if (length(ind1) > 0L)
    for (j in ind1)
      cat(paste("WARNING: All values of t", j, "* are NA\n", sep=""))
  invisible(boot.out)
}




corr <- function(d, w=rep(1,nrow(d))/nrow(d))
{
  #  The correlation coefficient in weighted form.
  s <- sum(w)
  m1 <- sum(d[, 1L] * w)/s
  m2 <- sum(d[, 2L] * w)/s
  (sum(d[, 1L] * d[, 2L] * w)/s - m1 * m2)/sqrt((sum(d[, 1L]^2 * w)/s - m1^2) * (sum(d[, 2L]^2 * w)/s - m2^2))
}


extra.array <- function(n, R, m, strata=rep(1,n))
{
  #
  # Extra indices for predictions.  Can only be used with
  # types "ordinary" and "stratified".  For type "ordinary"
  # m is a positive integer.  For type "stratified" m can
  # be a positive integer or a vector of the same length as
  # strata.
  #
  if (length(m) == 1L)
    output <- matrix(sample.int(n, m*R, replace=TRUE), R, m)
  else {
    inds <- as.integer(names(table(strata)))
    output <- matrix(NA, R, sum(m))
    st <- 0
    for (i in inds) {
      if (m[i] > 0) {
        gp <- seq_len(n)[strata == i]
        inds1 <- (st+1):(st+m[i])
        output[,inds1] <- matrix(bsample(gp, R*m[i]), R, m[i])
        st <- st+m[i]
      }
    }
  }
  output
}

freq.array <- function(i.array)
{
  #
  # converts R x n array of bootstrap indices into
  # R X n array of bootstrap frequencies
  #
  result <- NULL
  n <- ncol(i.array)
  result <- t(apply(i.array, 1, tabulate, n))
  result
}



importance.array <- function(n, R, weights, strata){
  #
  #  Function to do importance resampling  within strata based
  #  on the weights supplied.  If weights is a matrix with n columns
  #  R must be a vector of length nrow(weights) otherwise weights
  #  must be a vector of length n and R must be a scalar.
  #
  imp.arr <- function(n, R, wts, inds=seq_len(n))
    matrix(bsample(inds, n*R, prob=wts), R, n)
  output <- NULL
  if (!isMatrix(weights))
    weights <- matrix(weights, nrow=1)
  inds <- as.integer(names(table(strata)))
  for (ir in seq_along(R)) {
    out <- matrix(rep(seq_len(n), R[ir]), R[ir], n, byrow=TRUE)
    for (is in inds) {
      gp <- seq_len(n)[strata == is]
      out[, gp] <- imp.arr(length(gp), R[ir],
                           weights[ir,gp], gp)
    }
    output <- rbind(output, out)
  }
  output
}

importance.array.bal <- function(n, R, weights, strata) {
  #
  #  Function to do balanced importance resampling within strata
  #  based on the supplied weights.  Balancing is achieved in such
  #  a way that each index appears in the array approximately in
  #  proportion to its weight.
  #
  imp.arr.bal <- function(n, R, wts, inds=seq_len(n)) {
    if (sum (wts) != 1) wts <- wts / sum(wts)
    nRw1 <- floor(n*R*wts)
    nRw2 <- n*R*wts - nRw1
    output <- rep(inds, nRw1)
    if (any (nRw2 != 0))
      output <- c(output,
                  sample0(inds, round(sum(nRw2)), prob=nRw2))
    matrix(rperm(output), R, n)
  }
  output <- NULL
  if (!isMatrix(weights))
    weights <- matrix(weights, nrow = 1L)
  inds <- as.integer(names(table(strata)))
  for (ir in seq_along(R)) {
    out <- matrix(rep(seq_len(n), R[ir]), R[ir], n, byrow=TRUE)
    for (is in inds) {
      gp <- seq_len(n)[strata == is]
      out[,gp] <- imp.arr.bal(length(gp), R[ir], weights[ir,gp], gp)
    }
    output <- rbind(output, out)
  }
  output
}



index.array <- function(n, R, sim, strata=rep(1,n), m=0, L=NULL, weights=NULL)
{
  #
  #  Driver function for generating a bootstrap index array.  This function
  #  simply determines the type of sampling required and calls the appropriate
  #  function.
  #
  indices <- NULL
  if (is.null (weights)) {
    if (sim == "ordinary") {
      indices <- ordinary.array(n, R, strata)
      if (sum(m) > 0)
        indices <- cbind(indices, extra.array(n, R, m, strata))
    }
    else if (sim == "balanced")
      indices <- balanced.array(n, R, strata)
    else if (sim == "antithetic")
      indices <- antithetic.array(n, R, L, strata)
    else if (sim == "permutation")
      indices <- permutation.array(n, R, strata)
  } else {
    if (sim == "ordinary")
      indices <- importance.array(n, R, weights, strata)
    else if (sim == "balanced")
      indices <- importance.array.bal(n, R, weights, strata)
  }
  indices
}

# jack.after.boot <- function(boot.out, index=1, t=NULL, L=NULL,
#                             useJ=TRUE, stinf = TRUE, alpha=NULL, main = "", ylab=NULL, ...)
# {
#   # jackknife after bootstrap plot
#   t.o <- t
#   if (is.null(t)) {
#     if (length(index) > 1L) {
#       index <- index[1L]
#       warning("only first element of 'index' used")
#     }
#     t <- boot.out$t[, index]
#   }
#   fins <- seq_along(t)[is.finite(t)]
#   t <- t[fins]
#   if (is.null(alpha)) {
#     alpha <- c(0.05, 0.1, 0.16, 0.5, 0.84, 0.9, 0.95)
#     if (is.null(ylab))
#       ylab <- "5, 10, 16, 50, 84, 90, 95 %-iles of (T*-t)"
#   }
#   if (is.null(ylab)) ylab <- "Percentiles of (T*-t)"
#   data <- boot.out$data
#   n <- NROW(data)
#   f <- boot.array(boot.out)[fins, , drop=TRUE]
#   percentiles <- matrix(data = NA, length(alpha), n)
#   J <- numeric(n)
#   for(j in seq_len(n)) {
#     # Find the quantiles of the bootstrap distribution on omitting each point.
#     values <- t[f[, j] == 0]
#     J[j] <- mean(values)
#     percentiles[, j] <- quantile(values, alpha) - J[j]
#   }
#   # Now find the jackknife values to be plotted, and standardize them,
#   # if required.
#   if (!useJ) {
#     if (is.null(L))
#       J <- empinf(boot.out, index=index, t=t.o, ...)
#     else 	J <- L
#   }
#   else	J <- (n - 1) * (mean(J) - J)
#   xtext <- "jackknife value"
#   if (!useJ) {
#     if (!is.null(L) || (is.null(t.o) && (boot.out$stype == "w")))
#       xtext <- paste("infinitesimal", xtext)
#     else	xtext <- paste("regression", xtext)
#   }
#   if (stinf) {
#     J <- J/sqrt(var(J))
#     xtext <- paste("standardized", xtext)
#   }
#   top <- max(percentiles)
#   bot <- min(percentiles)
#   ylts <- c(bot - 0.35 * (top - bot), top + 0.1 * (top - bot))
#   percentiles <- percentiles[,order(J)]#
#   # Plot the overall quantiles and the delete-1 quantiles against the
#   # jackknife values.
#   plot(sort(J), percentiles[1,  ], ylim = ylts, type = "n", xlab = xtext,
#        ylab = ylab, main=main)
#   for(j in seq_along(alpha))
#     lines(sort(J), percentiles[j,  ], type = "b", pch = "*")
#   percentiles <- quantile(t, alpha) - mean(t)
#   for(j in seq_along(alpha))
#     abline(h=percentiles[j], lty=2)
#   # Now print the observation numbers below the plotted lines.  They are printed
#   # in five rows so that all numbers can be read easily.
#   text(sort(J), rep(c(bot - 0.08 * (top - bot), NA, NA, NA, NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, bot - 0.14 * (top - bot), NA, NA, NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, NA, bot - 0.2 * (top - bot), NA, NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, NA, NA, bot - 0.26 * (top - bot), NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, NA, NA, NA, bot - 0.32 * (top - bot)), n, n),
#        order(J), cex = 0.5)
#   invisible()
# }


ordinary.array <- function(n, R, strata)
{
  #
  # R x n array of bootstrap indices, resampled within strata.
  # This is the function which generates a regular bootstrap array
  # using equal weights within each stratum.
  #
  inds <- as.integer(names(table(strata)))
  if (length(inds) == 1L) {
    output <- sample.int(n, n*R, replace=TRUE)
    dim(output) <- c(R, n)
  } else {
    output <- matrix(as.integer(0L), R, n)
    for(is in inds) {
      gp <- seq_len(n)[strata == is]
      output[, gp] <- if (length(gp) == 1) rep(gp, R) else bsample(gp, R*length(gp))
    }
  }
  output
}

permutation.array <- function(n, R, strata)
{
  #
  # R x n array of bootstrap indices, permuted within strata.
  # This is similar to ordinary array except that resampling is
  # done without replacement in each row.
  #
  output <- matrix(rep(seq_len(n), R), n, R)
  inds <- as.integer(names(table(strata)))
  for(is in inds) {
    group <- seq_len(n)[strata == is]
    if (length(group) > 1L) {
      g <- apply(output[group,  ], 2L, rperm)
      output[group,  ] <- g
    }
  }
  t(output)
}


cv.glm <- function(data, glmfit, cost=function(y,yhat) mean((y-yhat)^2),
                   K=n)
{
  # cross-validation estimate of error for glm prediction with K groups.
  # cost is a function of two arguments: the observed values and the
  # the predicted values.
  call <- match.call()
  if (!exists(".Random.seed", envir=.GlobalEnv, inherits = FALSE)) runif(1)
  seed <- get(".Random.seed", envir=.GlobalEnv, inherits = FALSE)
  n <- nrow(data)
  if ((K > n) || (K <= 1))
    stop("'K' outside allowable range")
  K.o <- K
  K <- round(K)
  kvals <- unique(round(n/(1L:floor(n/2))))
  temp <- abs(kvals-K)
  if (!any(temp == 0))
    K <- kvals[temp == min(temp)][1L]
  if (K!=K.o) warning(gettextf("'K' has been set to %f", K), domain = NA)
  f <- ceiling(n/K)
  s <- sample0(rep(1L:K, f), n)
  n.s <- table(s)
  #    glm.f <- formula(glmfit)
  glm.y <- glmfit$y
  cost.0 <- cost(glm.y, fitted(glmfit))
  ms <- max(s)
  CV <- 0
  Call <- glmfit$call
  for(i in seq_len(ms)) {
    j.out <- seq_len(n)[(s == i)]
    j.in <- seq_len(n)[(s != i)]
    ## we want data from here but formula from the parent.
    Call$data <- data[j.in, , drop=FALSE]
    d.glm <- eval.parent(Call)
    p.alpha <- n.s[i]/n
    cost.i <- cost(glm.y[j.out],
                   predict(d.glm, data[j.out, , drop=FALSE],
                           type = "response"))
    CV <- CV + p.alpha * cost.i
    cost.0 <- cost.0 - p.alpha *
      cost(glm.y, predict(d.glm, data, type = "response"))
  }
  list(call = call, K = K,
       delta = as.numeric(c(CV, CV + cost.0)),  # drop any names
       seed = seed)
}


boot.ci <- function(boot.out,conf = 0.95,type = "all",
                    index = 1L:min(2L, length(boot.out$t0)),
                    var.t0 = NULL ,var.t = NULL, t0 = NULL, t = NULL,
                    L = NULL, h = function(t) t,
                    hdot = function(t) rep(1, length(t)),
                    hinv = function(t) t, ...)
  #
  #  Main function to calculate bootstrap confidence intervals.
  #  This function calls a number of auxilliary functions to do
  #  the actual calculations depending on the type of interval(s)
  #  requested.
  #
{
  call <- match.call()
  #  Get and transform the statistic values and their variances,
  if ((is.null(t) && !is.null(t0)) ||
      (!is.null(t) && is.null(t0)))
    stop("'t' and 't0' must be supplied together")
  t.o <- t; t0.o <- t0
  #    vt.o <- var.t
  vt0.o <- var.t0
  if (is.null(t)) {
    if (length(index) == 1L) {
      t0 <- boot.out$t0[index]
      t <- boot.out$t[,index]
    }
    else if (ncol(boot.out$t)<max(index)) {
      warning("index out of bounds; minimum index only used.")
      index <- min(index)
      t0 <- boot.out$t0[index]
      t <- boot.out$t[,index]
    }
    else {
      t0 <- boot.out$t0[index[1L]]
      t <- boot.out$t[,index[1L]]
      if (is.null(var.t0)) var.t0 <- boot.out$t0[index[2L]]
      if (is.null(var.t)) var.t <- boot.out$t[,index[2L]]
    }
  }
  if (const(t, min(1e-8, mean(t, na.rm=TRUE)/1e6))) {
    print(paste("All values of t are equal to ", mean(t, na.rm=TRUE),
                "\n Cannot calculate confidence intervals"))
    return(NULL)
  }
  if (length(t) != boot.out$R)
    stop(gettextf("'t' must of length %d", boot.out$R), domain = NA)
  if (is.null(var.t))
    fins <- seq_along(t)[is.finite(t)]
  else {
    fins <- seq_along(t)[is.finite(t) & is.finite(var.t)]
    var.t <- var.t[fins]
  }
  t <- t[fins]
  R <- length(t)
  if (!is.null(var.t0)) var.t0 <- var.t0*hdot(t0)^2
  if (!is.null(var.t))  var.t <- var.t*hdot(t)^2
  t0 <- h(t0); t <- h(t)
  if (missing(L)) L <- boot.out$L
  output <- list(R = R, t0 = hinv(t0), call = call)
  #  Now find the actual intervals using the methods listed in type
  if (any(type == "all" | type == "norm"))
    output <- c(output,
                list(normal = norm.ci(boot.out, conf,
                                      index[1L], var.t0=vt0.o, t0=t0.o, t=t.o,
                                      L=L, h=h, hdot=hdot, hinv=hinv)))
  if (any(type == "all" | type == "basic"))
    output <- c(output, list(basic=basic.ci(t0,t,conf,hinv=hinv)))
  if (any(type == "all" | type == "stud")) {
    if (length(index)==1L)
      warning("bootstrap variances needed for studentized intervals")
    else
      output <- c(output, list(student=stud.ci(c(t0,var.t0),
                                               cbind(t ,var.t), conf, hinv=hinv)))
  }
  if (any(type == "all" | type == "perc"))
    output <- c(output, list(percent=perc.ci(t,conf,hinv=hinv)))
  if (any(type == "all" | type == "bca")) {
    if (find_type(boot.out) == "tsboot")
      warning("BCa intervals not defined for time series bootstraps")
    else
      output <- c(output, list(bca=bca.ci(boot.out,conf,
                                          index[1L],L=L,t=t.o, t0=t0.o,
                                          h=h,hdot=hdot, hinv=hinv, ...)))
  }
  class(output) <- "bootci"
  output
}

print.bootci <- function(x, hinv = NULL, ...) {
  #
  #  Print the output from boot.ci
  #
  ci.out <- x
  cl <- ci.out$call
  ntypes <- length(ci.out)-3L
  nints <- nrow(ci.out[[4L]])
  t0 <- ci.out$t0
  if (!is.null(hinv)) t0 <- hinv(t0)  #
  #  Find the number of decimal places which should be used
  digs <- ceiling(log10(abs(t0)))
  if (digs <= 0) digs <- 4
  else if (digs >= 4) digs <- 0
  else digs <- 4-digs
  intlabs <- NULL
  basrg <- strg <- perg <- bcarg <- NULL
  if (!is.null(ci.out$normal))
    intlabs <- c(intlabs,"     Normal        ")
  if (!is.null(ci.out$basic)) {
    intlabs <- c(intlabs,"     Basic         ")
    basrg <- range(ci.out$basic[,2:3]) }
  if (!is.null(ci.out$student)) {
    intlabs <- c(intlabs,"   Studentized     ")
    strg <- range(ci.out$student[,2:3]) }
  if (!is.null(ci.out$percent)) {
    intlabs <- c(intlabs,"    Percentile     ")
    perg <- range(ci.out$percent[,2:3]) }
  if (!is.null(ci.out$bca)) {
    intlabs <- c(intlabs,"      BCa          ")
    bcarg <- range(ci.out$bca[,2:3]) }
  level <- 100*ci.out[[4L]][, 1L]
  if (ntypes == 4L) n1 <- n2 <- 2L
  else if (ntypes == 5L) {n1 <- 3L; n2 <- 2L}
  else {n1 <- ntypes; n2 <- 0L}
  ints1 <- matrix(NA,nints,2L*n1+1L)
  ints1[,1L] <- level
  n0 <- 4L
  #  Re-organize the intervals and coerce them into character data
  for (i in n0:(n0+n1-1)) {
    j <- c(2L*i-6L,2L*i-5L)
    nc <- ncol(ci.out[[i]])
    nc <- c(nc-1L,nc)
    if (is.null(hinv))
      ints1[,j] <- ci.out[[i]][,nc]
    else	ints1[,j] <- hinv(ci.out[[i]][,nc])
  }
  n0 <- 4L+n1
  ints1 <- format(round(ints1,digs))
  ints1[,1L] <- paste("\n",level,"%  ",sep="")
  ints1[,2*(1L:n1)] <- paste("(",ints1[,2*(1L:n1)],",",sep="")
  ints1[,2*(1L:n1)+1L] <- paste(ints1[,2*(1L:n1)+1L],")  ")
  if (n2 > 0) {
    ints2 <- matrix(NA,nints,2L*n2+1L)
    ints2[,1L] <- level
    j <- c(2L,3L)
    for (i in n0:(n0+n2-1L)) {
      if (is.null(hinv))
        ints2[,j] <- ci.out[[i]][,c(4L,5L)]
      else	ints2[,j] <- hinv(ci.out[[i]][,c(4L,5L)])
      j <- j+2L
    }
    ints2 <- format(round(ints2,digs))
    ints2[,1L] <- paste("\n",level,"%  ",sep="")
    ints2[,2*(1L:n2)] <- paste("(",ints2[,2*(1L:n2)],",",sep="")
    ints2[,2*(1L:n2)+1L] <- paste(ints2[,2*(1L:n2)+1L],")  ")
  }
  R <- ci.out$R                       #
  #  Print the intervals
  cat("BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS\n")
  cat(paste("Based on",R,"bootstrap replicates\n\n"))
  cat("CALL : \n")
  dput(cl, control=NULL)
  cat("\nIntervals : ")
  cat("\nLevel",intlabs[1L:n1])
  cat(t(ints1))
  if (n2 > 0) {
    cat("\n\nLevel",intlabs[(n1+1):(n1+n2)])
    cat(t(ints2))
  }
  if (!is.null(cl$h)) {
    if (is.null(cl$hinv) && is.null(hinv))
      cat("\nCalculations and Intervals on ",
          "Transformed Scale\n")
    else	cat("\nCalculations on Transformed Scale;",
             " Intervals on Original Scale\n")
  }
  else if (is.null(cl$hinv) && is.null(hinv))
    cat("\nCalculations and Intervals on Original Scale\n")
  else 	cat("\nCalculations on Original Scale",
            " but Intervals Transformed\n")#
  #  Print any warnings about extreme values.
  if (!is.null(basrg)) {
    if ((basrg[1L] <= 1) || (basrg[2L] >= R))
      cat("Warning : Basic Intervals used Extreme Quantiles\n")
    if ((basrg[1L] <= 10) || (basrg[2L] >= R-9))
      cat("Some basic intervals may be unstable\n")
  }
  if (!is.null(strg)) {
    if ((strg[1L] <= 1) || (strg[2L] >= R))
      cat("Warning : Studentized Intervals used Extreme Quantiles\n")
    if ((strg[1L] <= 10) || (strg[2L] >= R-9))
      cat("Some studentized intervals may be unstable\n")
  }
  if (!is.null(perg)) {
    if ((perg[1L] <= 1) || (perg[2L] >= R))
      cat("Warning : Percentile Intervals used Extreme Quantiles\n")
    if ((perg[1L] <= 10) || (perg[2L] >= R-9))
      cat("Some percentile intervals may be unstable\n")
  }
  if (!is.null(bcarg)) {
    if ((bcarg[1L] <= 1) || (bcarg[2L] >= R))
      cat("Warning : BCa Intervals used Extreme Quantiles\n")
    if ((bcarg[1L] <= 10) || (bcarg[2L] >= R-9))
      cat("Some BCa intervals may be unstable\n")
  }
  invisible(ci.out)
}

norm.ci <-
  function(boot.out = NULL,conf = 0.95,index = 1,var.t0 = NULL, t0 = NULL,
           t = NULL, L = NULL, h = function(t) t, hdot = function(t) 1,
           hinv = function(t) t)
    #
    #  Normal approximation method for confidence intervals.  This can be
    #  used with or without a bootstrap object.  If a bootstrap object is
    #  given then the intervals are bias corrected and the bootstrap variance
    #  estimate can be used if none is supplied.
    #
  {
    if (is.null(t0))  {
      if (!is.null(boot.out)) t0 <-boot.out$t0[index]
      else stop("bootstrap output object or 't0' required")
    }
    if (!is.null(boot.out) && is.null(t))
      t <- boot.out$t[,index]
    if (!is.null(t)) {
      fins <- seq_along(t)[is.finite(t)]
      t <- h(t[fins])
    }
    if (is.null(var.t0)) {
      if (is.null(t)) {
        if (is.null(L))
          stop("unable to calculate 'var.t0'")
        else	var.t0 <- sum((hdot(t0)*L/length(L))^2)
      }
      else	var.t0 <- var(t)
    }
    else	var.t0 <- var.t0*hdot(t0)^2
    t0 <- h(t0)
    if (!is.null(t))
      bias <- mean(t)-t0
    else	bias <- 0
    merr <- sqrt(var.t0)*qnorm((1+conf)/2)
    out <- cbind(conf,hinv(t0-bias-merr),hinv(t0-bias+merr))
    out
  }

norm.inter <- function(t,alpha)
  #
  #  Interpolation on the normal quantile scale.  For a non-integer
  #  order statistic this function interpolates between the surrounding
  #  order statistics using the normal quantile scale.  See equation
  #  5.8 of Davison and Hinkley (1997)
  #
{
  t <- t[is.finite(t)]
  R <- length(t)
  rk <- (R+1)*alpha
  if (!all(rk>1 & rk<R))
    warning("extreme order statistics used as endpoints")
  k <- trunc(rk)
  inds <- seq_along(k)
  out <- inds
  kvs <- k[k>0 & k<R]
  tstar <- sort(t, partial = sort(union(c(1, R), c(kvs, kvs+1))))
  ints <- (k == rk)
  if (any(ints)) out[inds[ints]] <- tstar[k[inds[ints]]]
  out[k == 0] <- tstar[1L]
  out[k == R] <- tstar[R]
  not <- function(v) xor(rep(TRUE,length(v)),v)
  temp <- inds[not(ints) & k != 0 & k != R]
  temp1 <- qnorm(alpha[temp])
  temp2 <- qnorm(k[temp]/(R+1))
  temp3 <- qnorm((k[temp]+1)/(R+1))
  tk <- tstar[k[temp]]
  tk1 <- tstar[k[temp]+1L]
  out[temp] <- tk + (temp1-temp2)/(temp3-temp2)*(tk1 - tk)
  cbind(round(rk, 2), out)
}

basic.ci <- function(t0, t, conf = 0.95, hinv = function(t) t)
  #
  #  Basic bootstrap confidence method
  #
{
  qq <- norm.inter(t,(1+c(conf,-conf))/2)
  cbind(conf, matrix(qq[,1L],ncol=2L), matrix(hinv(2*t0-qq[,2L]),ncol=2L))
}

stud.ci <- function(tv0, tv, conf = 0.95, hinv=function(t) t)
  #
  #  Studentized version of the basic bootstrap confidence interval
  #
{
  if ((length(tv0) < 2) || (ncol(tv) < 2)) {
    warning("variance required for studentized intervals")
    NA
  } else {
    z <- (tv[,1L]-tv0[1L])/sqrt(tv[,2L])
    qq <- norm.inter(z, (1+c(conf,-conf))/2)
    cbind(conf, matrix(qq[,1L],ncol=2L),
          matrix(hinv(tv0[1L]-sqrt(tv0[2L])*qq[,2L]),ncol=2L))
  }
}

perc.ci <- function(t, conf = 0.95, hinv = function(t) t)
  #
  #  Bootstrap Percentile Confidence Interval Method
  #
{
  alpha <- (1+c(-conf,conf))/2
  qq <- norm.inter(t,alpha)
  cbind(conf,matrix(qq[,1L],ncol=2L),matrix(hinv(qq[,2]),ncol=2L))
}

bca.ci <-
  function(boot.out,conf = 0.95,index = 1,t0 = NULL,t = NULL, L = NULL,
           h = function(t) t, hdot = function(t) 1, hinv = function(t) t,
           ...)
    #
    #  Adjusted Percentile (BCa) Confidence interval method.  This method
    #  uses quantities calculated from the empirical influence values to
    #  improve on the precentile interval.  Usually the required order
    #  statistics for this method will not be integers and so norm.inter
    #  is used to find them.
    #
  {
    t.o <- t
    if (is.null(t) || is.null(t0)) {
      t <- boot.out$t[,index]
      t0 <- boot.out$t0[index]
    }
    t <- t[is.finite(t)]
    w <- qnorm(sum(t < t0)/length(t))
    if (!is.finite(w)) stop("estimated adjustment 'w' is infinite")
    alpha <- (1+c(-conf,conf))/2
    zalpha <- qnorm(alpha)
    if (is.null(L))
      L <- empinf(boot.out, index=index, t=t.o, ...)
    a <- sum(L^3)/(6*sum(L^2)^1.5)
    if (!is.finite(a)) stop("estimated adjustment 'a' is NA")
    adj.alpha <- pnorm(w + (w+zalpha)/(1-a*(w+zalpha)))
    qq <- norm.inter(t,adj.alpha)
    cbind(conf, matrix(qq[,1L],ncol=2L), matrix(hinv(h(qq[,2L])),ncol=2L))
  }



abc.ci <- function(data, statistic, index = 1, strata = rep(1, n), conf = 0.95,
                   eps = 0.001/n, ...)
  #
  #   Non-parametric ABC method for constructing confidence intervals.
  #
{
  y <- data
  n <- NROW(y)
  strata1 <- tapply(strata,as.numeric(strata))
  if (length(index) != 1L) {
    warning("only first element of 'index' used in 'abc.ci'")
    index <- index[1L]
  }
  S <- length(table(strata1))
  mat <- matrix(0,n,S)
  for (s in 1L:S) {
    gp <- seq_len(n)[strata1 == s]
    mat[gp,s] <- 1
  }
  #  Calculate the observed value of the statistic
  w.orig <- rep(1/n,n)
  t0 <- statistic(y,w.orig/(w.orig%*%mat)[strata1], ...)[index]#
  #  Now find the linear and quadratic empirical influence values through
  #  numerical differentiation
  L <- L2 <- numeric(n)
  for (i in seq_len(n)) {
    w1 <- (1-eps)*w.orig
    w1[i] <- w1[i]+eps
    w2 <- (1+eps)*w.orig
    w2[i] <- w2[i] - eps
    t1 <- statistic(y,w1/(w1%*%mat)[strata1], ...)[index]
    t2 <- statistic(y,w2/(w2%*%mat)[strata1], ...)[index]
    L[i] <- (t1-t2)/(2*eps)
    L2[i] <- (t1-2*t0+t2)/eps^2
  }
  #  Calculate the required quantities for the intervals
  temp1 <- sum(L*L)
  sigmahat <- sqrt(temp1)/n
  ahat <- sum(L^3)/(6*temp1^1.5)      # called a in the text
  bhat <- sum(L2)/(2*n*n)             # called b in the text
  dhat <- L/(n*n*sigmahat)            # called k in the text
  w3 <- w.orig+eps*dhat
  w4 <- w.orig-eps*dhat
  chat <- (statistic(y,w3/(w3%*%mat)[strata1], ...)[index]-2*t0 +
             statistic(y,w4/(w4%*%mat)[strata1], ...)[index]) /
    (2*eps*eps*sigmahat)   # called c in the text
  bprime <- ahat-(bhat/sigmahat-chat) # called w in the text
  alpha <- (1+as.vector(rbind(-conf,conf)))/2
  zalpha <- qnorm(alpha)
  lalpha <- (bprime+zalpha)/(1-ahat*(bprime+zalpha))^2#
  #  Finally calculate the interval endpoints by calling the statistic with
  #  various weight vectors.
  out <- seq(alpha)
  for (i in seq_along(alpha)) {
    w.fin <- w.orig+lalpha[i]*dhat
    out[i] <- statistic(y,w.fin/(w.fin%*%mat)[strata1], ...)[index]
  }
  out <- cbind(conf,matrix(out,ncol=2L,byrow=TRUE))
  if (length(conf) == 1L) out <- as.vector(out)
  out
}

censboot <-
  function(data, statistic, R, F.surv, G.surv, strata = matrix(1, n, 2),
           sim = "ordinary", cox = NULL, index = c(1, 2), ...,
           parallel = c("no", "multicore", "snow"),
           ncpus = getOption("boot.ncpus", 1L), cl = NULL)
  {
    #
    #  Bootstrap replication for survival data.  Possible resampling
    #  schemes are case, model-based, conditional bootstrap (with or without
    #  a model) and the weird bootstrap.
    #
    mstrata <- missing(strata)
    if (any(is.na(data)))
      stop("missing values not allowed in 'data'")
    if ((sim != "ordinary") && (sim != "model") && (sim != "cond")
        && (sim != "weird")) stop("unknown value of 'sim'")
    if ((sim == "model") && (is.null(cox))) sim <- "ordinary"
    if (missing(parallel)) parallel <- getOption("boot.parallel", "no")
    parallel <- match.arg(parallel)
    have_mc <- have_snow <- FALSE
    if (parallel != "no" && ncpus > 1L) {
      if (parallel == "multicore") have_mc <- .Platform$OS.type != "windows"
      else if (parallel == "snow") have_snow <- TRUE
      if (!have_mc && !have_snow) ncpus <- 1L
      loadNamespace("parallel") # get this out of the way before recording seed
    }
    if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
    seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
    call <- match.call()
    if (isMatrix(data)) n <- nrow(data)
    else stop("'data' must be a matrix with at least 2 columns")
    if (ncol(data) < 2L)
      stop("'data' must be a matrix with at least 2 columns")
    if (length(index) < 2L)
      stop("'index' must contain 2 elements")
    if (length(index) > 2L) {
      warning("only first 2 elements of 'index' used")
      index <- index[1L:2L]
    }
    if (ncol(data) < max(index))
      stop("indices are incompatible with 'ncol(data)'")
    if (sim == "weird") {
      if (!is.null(cox))
        stop("sim = \"weird\" cannot be used with a \"coxph\" object")
      if (ncol(data) > 2L)
        warning(gettextf("only columns %s and %s of 'data' used",
                         index[1L], index[2L]), domain = NA)
      data <- data[,index]
    }
    if (!is.null(cox) && is.null(cox$coefficients) &&
        ((sim == "cond") || (sim == "model"))) {
      warning("no coefficients in Cox model -- model ignored")
      cox <- NULL
    }
    if ((sim != "ordinary")  && missing(F.surv))
      stop("'F.surv' is required but missing")
    if (missing(G.surv) && ((sim == "cond") || (sim == "model")))
      stop("'G.surv' is required but missing")
    if (NROW(strata) != n) stop("'strata' of wrong length")
    if (!isMatrix(strata)) {
      if (!((sim == "weird") || (sim == "ordinary")))
        strata <- cbind(strata, 1)
    } else {
      if ((sim == "weird") || (sim == "ordinary")) strata <- strata[, 1L]
      else  strata <- strata[, 1L:2L]
    }
    temp.str <- strata
    strata <- if (isMatrix(strata))
      apply(strata, 2L, function(s, n) tapply(seq_len(n), as.numeric(s)), n)
    else  tapply(seq_len(n), as.numeric(strata))
    t0 <- if ((sim == "weird") && !mstrata) statistic(data, temp.str, ...)
    else  statistic(data, ...)
    ## Calculate the resampled data sets.  For ordinary resampling this
    ## involves finding the matrix of indices of the case to be resampled.
    ## For the conditional bootstrap or model-based we must find an array
    ## consisting of R matrices containing the resampled times and their
    ## censoring indicators.  The data sets for the weird bootstrap must be
    ## calculated individually.
    fn <- if (sim == "ordinary") {
      bt <- cens.case(n, strata, R)
      function(r) statistic(data[sort(bt[r, ]), ], ...)
    } else if (sim == "weird") {
      ## force promises
      data; F.surv
      if (!mstrata) {
        function(r) {
          bootdata <- cens.weird(data, F.surv, strata)
          statistic(bootdata[, 1:2], bootdata[, 3L], ...)
        }
      } else  {
        function(r) {
          bootdata <- cens.weird(data, F.surv, strata)
          statistic(bootdata[, 1:2], ...)
        }
      }
    } else {
      bt <- cens.resamp(data, R, F.surv, G.surv, strata, index, cox, sim)
      function(r) {
        bootdata <- data
        bootdata[, index] <- bt[r, , ]
        oi <- order(bt[r, , 1L], 1-bt[r, , 2L])
        statistic(bootdata[oi, ], ...)
      }
    }
    rm(mstrata)

    res <- if (ncpus > 1L && (have_mc || have_snow)) {
      if (have_mc) {
        parallel::mclapply(seq_len(R), fn, ..., mc.cores = ncpus)
      } else if (have_snow) {
        list(...) # evaluate any promises
        if (is.null(cl)) {
          cl <- parallel::makePSOCKcluster(rep("localhost", ncpus))
          if(RNGkind()[1L] == "L'Ecuyer-CMRG")
            parallel::clusterSetRNGStream(cl)
          parallel::clusterEvalQ(cl, library(survival))
          res <- parallel::parLapply(cl, seq_len(R), fn)
          parallel::stopCluster(cl)
          res
        } else {
          parallel::clusterEvalQ(cl, library(survival))
          parallel::parLapply(cl, seq_len(R), fn)
        }
      }
    } else lapply(seq_len(R), fn)

    t <- matrix(, R, length(t0))
    for(r in seq_len(R)) t[r, ] <- res[[r]]

    cens.return(sim, t0, t, temp.str, R, data, statistic, call, seed)
  }

cens.return <- function(sim, t0, t, strata, R, data, statistic, call, seed) {
  #
  #  Create an object of class "boot" from the output of a censored bootstrap.
  #
  out <- list(t0 = t0, t = t, R = R, sim = sim, data = data, seed = seed,
              statistic = statistic, strata = strata, call = call)
  class(out) <- "boot"
  attr(boot, "boot_type") <- "censboot"
  out
}

cens.case <- function(n, strata, R) {
  #
  #  Simple case resampling.
  #
  out <- matrix(NA, nrow = R, ncol = n)
  for (s in seq_along(table(strata))) {
    inds <- seq_len(n)[strata == s]
    ns <- length(inds)
    out[, inds] <- bsample(inds,  ns*R)
  }
  out
}


cens.weird <- function(data, surv, strata) {
  #
  #  The weird bootstrap.  Censoring times are fixed and the number of
  #  failures at each failure time are sampled from a binomial
  #  distribution.  See Chapter 3 of Davison and Hinkley (1997).
  #
  #  data is a two column matrix containing the times and censoring
  #    indicator.
  #  surv is a survival object giving the failure time distribution.
  #  strata is a the strata vector used in surv or a vector of 1's if no
  #    strata were used.
  #
  m <- length(surv$time)
  if (is.null(surv$strata)) {
    nstr <- 1
    str <- rep(1, m)
  } else {
    nstr <- length(surv$strata)
    str <- rep(1L:nstr, surv$strata)
  }
  n.ev <- rbinom(m, surv$n.risk, surv$n.event/surv$n.risk)
  while (any(tapply(n.ev, str, sum) == 0))
    n.ev <- rbinom(m, surv$n.risk, surv$n.event/surv$n.risk)
  times <- rep(surv$time, n.ev)
  str <- rep(str, n.ev)
  out <- NULL
  for (s in 1L:nstr) {
    temp <- cbind(times[str == s], 1)
    temp <- rbind(temp,
                  as.matrix(data[(strata == s&data[, 2L] == 0), , drop=FALSE]))
    temp <- cbind(temp, s)
    oi <- order(temp[, 1L], 1-temp[, 2L])
    out <- rbind(out, temp[oi, ])
  }
  if (is.data.frame(data)) out <- as.data.frame(out)
  out
}



cens.resamp <- function(data, R, F.surv, G.surv, strata, index = c(1,2),
                        cox = NULL, sim = "model")
{
  #
  #  Other types of resampling for the censored bootstrap.  This function
  #  uses some local functions to implement the conditional bootstrap for
  #  censored data and resampling based on a Cox regression model.  This
  #  latter method of sampling can also use conditional sampling to get the
  #  censoring times.
  #
  #  data is the data set
  #  R is the number of replicates
  #  F.surv is a survfit object for the failure time distribution
  #  G.surv is a survfit object for the censoring time distribution
  #  strata is a two column matrix, the first column gives the strata
  #     gives the strata for the failure times and the second for the
  #     censoring times.
  #  index is a vector with two integer components giving the position
  #     of the times and censoring indicators in data
  #  cox is an object returned by the coxph function to give the Cox
  #     regression model for the failure times.
  #  sim is the simulation type which will always be "model" or "cond"
  #
  gety1 <- function(n, R, surv, inds) {
    # Sample failure times from the product limit estimate of the failure
    # time distribution.
    survival <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (survival[n1] > 0L) {
      survival <- c(survival, 0)
      time <- c(time, Inf)
    }
    probs <- diff(-c(1, survival))
    matrix(bsample(time, n*R, prob = probs), R, n)
  }
  gety2 <- function(n, R, surv, eta, inds) {
    # Sample failure times from the Cox regression model.
    F0 <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (F0[n1] > 0) {
      F0 <- c(F0, 0)
      time <- c(time, Inf)
    }
    ex <- exp(eta)
    Fh <- 1 - outer(F0, ex, "^")
    apply(rbind(0, Fh), 2L,
          function(p, y, R) bsample(y, R, prob = diff(p)), time, R)
  }
  getc1 <- function(n, R, surv, inds) {
    # Sample censoring times from the product-limit estimate of the
    # censoring distribution.
    cens <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (cens[n1] > 0) {
      cens <- c(cens, 0)
      time <- c(time, Inf)
    }
    probs <- diff(-c(1, cens))
    matrix(bsample(time, n*R, prob = probs), nrow = R)
  }
  getc2 <- function(n, R, surv, inds, data, index) {
    # Sample censoring times form the conditional distribution.  If a failure
    # was observed then sample from the product-limit estimate of the censoring
    # distribution conditional on the time being greater than the observed
    # failure time.  If the observation is censored then resampled time is the
    # observed censoring time.
    cens <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (cens[n1] > 0) {
      cens <- c(cens, 0)
      time <- c(time, Inf)
    }
    probs <- diff(-c(1, cens))
    cout <- matrix(NA, R, n)
    for (i in seq_len(n)) {
      if (data[i, 2] == 0) cout[, i] <- data[i, 1L]
      else {
        pri <- probs[time > data[i, 1L]]
        ti <- time[time > data[i, 1L]]
        if (length(ti) == 1L) cout[, i] <- ti
        else cout[, i] <- bsample(ti, R, prob = pri)
      }
    }
    cout
  }
  n <- nrow(data)
  Fstart <- 1
  Fstr <- F.surv$strata
  if (is.null(Fstr)) Fstr <- length(F.surv$time)
  Gstart <- 1
  Gstr <- G.surv$strata
  if (is.null(Gstr)) Gstr <- length(G.surv$time)
  y0 <- matrix(NA, R, n)
  for (s in seq_along(table(strata[, 1L]))) {
    # Find the resampled failure times within strata for failures
    ns <- sum(strata[, 1L] == s)
    inds <- Fstart:(Fstr[s]+Fstart-1)
    y0[, strata[, 1L] == s] <- if (is.null(cox)) gety1(ns, R, F.surv, inds)
    else  gety2(ns, R, F.surv, cox$linear.predictors[strata[, 1L] == s], inds)
    Fstart <- Fstr[s]+Fstart
  }
  c0 <- matrix(NA, R, n)
  for (s in seq_along(table(strata[, 2L]))) {
    # Find the resampled censoring times within strata for censoring times
    ns <- sum(strata[, 2] == s)
    inds <- Gstart:(Gstr[s]+Gstart-1)
    c0[, strata[, 2] == s] <- if (sim != "cond") getc1(ns, R, G.surv, inds)
    else  getc2(ns, R, G.surv, inds, data[strata[,2] == s, index])
    Gstart <- Gstr[s]+Gstart
  }
  infs <- (is.infinite(y0) & is.infinite(c0))
  if (sum(infs) > 0) {
    # If both the resampled failure time and the resampled censoring time
    # are infinite then set the resampled time to be a failure at the largest
    # failure time in the failure time stratum containing the observation.
    evs <- seq_len(n)[data[, index[2L]] == 1]
    maxf <- tapply(data[evs, index[1L]], strata[evs, 1L], max)
    maxf <- matrix(maxf[strata[, 1L]], nrow = R, ncol = n, byrow = TRUE)
    y0[infs] <- maxf[infs]
  }
  array(c(pmin(y0, c0), 1*(y0 <= c0)), c(dim(y0), 2))
}

empinf <- function(boot.out = NULL, data = NULL, statistic = NULL,
                   type = NULL, stype = NULL ,index = 1, t = NULL,
                   strata = rep(1, n), eps = 0.001, ...)
{
  #
  #   Calculation of empirical influence values.  Possible types are
  #   "inf" = infinitesimal jackknife (numerical differentiation)
  #   "reg" = regression based estimation
  #   "jack" = usual jackknife estimates
  #   "pos" = positive jackknife estimates
  #
  if (!is.null(boot.out))
  {
    if (boot.out$sim == "parametric")
      stop("influence values cannot be found from a parametric bootstrap")
    data <- boot.out$data
    if (is.null(statistic))
      statistic <- boot.out$statistic
    if (is.null(stype))
      stype <- boot.out$stype
    if (!is.null(boot.out$strata))
      strata <- boot.out$strata
  }
  else
  {
    if (is.null(data))
      stop("neither 'data' nor bootstrap object specified")
    if (is.null(statistic))
      stop("neither 'statistic' nor bootstrap object specified")
    if (is.null(stype)) stype <- "w"
  }
  n <- NROW(data)
  if (is.null(type)) {
    if (!is.null(t)) type <- "reg"
    else if (stype == "w") type <- "inf"
    else if (!is.null(boot.out) &&
             (boot.out$sim != "parametric") &&
             (boot.out$sim != "permutation")) type <- "reg"
    else type <- "jack"
  }

  if (type == "inf") {
    # calculate the infinitesimal jackknife values by numerical differentiation
    if (stype !="w") stop("'stype' must be \"w\" for type=\"inf\"")
    if (length(index) != 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    if (!is.null(t))
      warning("input 't' ignored; type=\"inf\"")
    L <- inf.jack(data, statistic, index, strata, eps, ...)
  } else if (type == "reg") {
    # calculate the regression estimates of the influence values
    if (is.null(boot.out))
      stop("bootstrap object needed for type=\"reg\"")
    if (is.null(t)) {
      if (length(index) != 1L) {
        warning("only first element of 'index' used")
        index <- index[1L]
      }
      t <- boot.out$t[,index]
    }
    L <- empinf.reg(boot.out, t)
  } else if (type == "jack") {
    if (!is.null(t))
      warning("input 't' ignored; type=\"jack\"")
    if (length(index) != 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    L <- usual.jack(data, statistic, stype, index, strata, ...)
  } else if (type == "pos") {
    if (!is.null(t))
      warning("input 't' ignored; type=\"pos\"")
    if (length(index) != 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    L <- positive.jack(data, statistic, stype, index, strata, ...)
  }
  L
}

inf.jack <-
  function(data, stat, index = 1, strata  =  rep(1, n), eps  =  0.001, ...)
  {
    #
    #   Numerical differentiation to get infinitesimal jackknife estimates
    #   of the empirical influence values.
    #
    n <- NROW(data)
    L <- seq_len(n)
    eps <- eps/n
    strata <- tapply(strata, as.numeric(strata))
    w.orig <- 1/table(strata)[strata]
    tobs <- stat(data, w.orig, ...)[index]
    for(i in seq_len(n)) {
      group <- seq_len(n)[strata == strata[i]]
      w <- w.orig
      w[group] <- (1 - eps)*w[group]
      w[i] <- w[i] + eps
      L[i] <- (stat(data, w, ...)[index] - tobs)/eps
    }
    L
  }

empinf.reg <- function(boot.out, t = boot.out$t[,1L])
  #
  #  Function to estimate empirical influence values using regression.
  #  This method regresses the observed bootstrap values on the bootstrap
  #  frequencies to estimate the empirical influence values
  #
{
  fins <- seq_along(t)[is.finite(t)]
  t <- t[fins]
  R <- length(t)
  n <- NROW(boot.out$data)
  strata <- boot.out$strata
  if (is.null(strata))
    strata <- rep(1,n)
  else 	strata <- tapply(strata,as.numeric(strata))
  ns <- table(strata)
  #    S <- length(ns)
  f <- boot.array(boot.out)[fins,]
  X <- f/matrix(ns[strata], R, n ,byrow=TRUE)
  out <- tapply(seq_len(n), strata, min)
  inc <- seq_len(n)[-out]
  X <- X[,inc]
  beta <- coefficients(glm(t ~ X))[-1L]
  l <- rep(0, n)
  l[inc] <- beta
  l <- l - tapply(l,strata,mean)[strata]
  l
}

usual.jack <- function(data, stat, stype = "w", index = 1,
                       strata = rep(1, n), ...)
  #
  #  Function to use the normal (delete 1) jackknife method to estimate the
  #  empirical influence values
  #
{
  n <- NROW(data)
  l <- rep(0,n)
  strata <- tapply(strata,as.numeric(strata))
  if (stype == "w") {
    w0 <- rep(1, n)/table(strata)[strata]
    tobs <- stat(data, w0, ...)[index]
    for (i in seq_len(n)) {
      w1 <- w0
      w1[i] <- 0
      gp <- strata == strata[i]
      w1[gp] <- w1[gp]/sum(w1[gp])
      l[i] <- (sum(gp)-1)*(tobs - stat(data,w1, ...)[index])
    }
  } else if (stype == "f") {
    f0 <- rep(1,n)
    tobs <- stat(data, f0, ...)[index]
    for (i in seq_len(n)) {
      f1 <- f0
      f1[i] <- 0
      gp <- strata == strata[i]
      l[i] <- (sum(gp)-1)*(tobs - stat(data, f1, ...)[index])
    }
  } else {
    i0 <- seq_len(n)
    tobs <- stat(data, i0, ...)[index]
    for (i in seq_len(n)) {
      i1 <- i0[-i]
      gp <- strata == strata[i]
      l[i] <- (sum(gp)-1)*(tobs - stat(data, i1, ...)[index])
    }
  }
  l
}

positive.jack <- function(data, stat, stype = "w", index = 1,
                          strata = rep(1 ,n), ...)
{
  #
  #  Use the positive jackknife to estimate the empirical influence values.
  #  The positive jackknife includes one observation twice to find its
  #  influence.
  #
  strata <- tapply(strata,as.numeric(strata))
  n <- NROW(data)
  L <- rep(0, n)
  if (stype == "w") {
    w0 <- rep(1, n)/table(strata)[strata]
    tobs <- stat(data, w0, ...)[index]
    for (i in seq_len(n)) {
      st1 <- c(strata,strata[i])
      w1 <- 1/table(st1)[strata]
      w1[i] <- 2*w1[i]
      gp <- strata == strata[i]
      w1[gp] <- w1[gp]/sum(w1[gp])
      L[i] <- (sum(gp)+1)*(stat(data, w1, ...)[index] - tobs)
    }
  } else if (stype == "f") {
    f0 <- rep(1,n)
    tobs <- stat(data, f0, ...)[index]
    for (i in seq_len(n)) {
      f1 <- f0
      f1[i] <- 2
      gp <- strata == strata[i]
      L[i] <- (sum(gp)+1)*(stat(data, f1, ...)[index] - tobs)
    }
  } else if (stype == "i") {
    i0 <- seq_len(n)
    tobs <- stat(data, i0, ...)[index]
    for (i in seq_len(n)) {
      i1 <- c(i0, i)
      gp <- strata == strata[i]
      L[i] <- (sum(gp)+1)*(stat(data, i1, ...)[index] - tobs)
    }
  }
  L
}

linear.approx <- function(boot.out, L = NULL, index = 1, type = NULL,
                          t0 = NULL, t = NULL, ...)
  #
  #  Find the linear approximation to the bootstrap replicates of a
  #  statistic.  L should be the linear influence values which will
  #  be found by empinf if they are not supplied.
  #
{
  f <- boot.array(boot.out)
  n <- length(f[1,  ])
  if ((length(index) > 1L) && (is.null(t0) || is.null(t))) {
    warning("only first element of 'index' used")
    index <- index[1L]
  }
  if (is.null(t0)) {
    t0 <- boot.out$t0[index]
    if (is.null(L))
      L <- empinf(boot.out, index=index, type=type, ...)
  } else if (is.null(t) && is.null(L)) {
    warning("input 't0' ignored: neither 't' nor 'L' supplied")
    t0 <- t0[index]
    L <- empinf(boot.out, index=index, type=type, ...)
  }
  else if (is.null(L))
    L <- empinf(boot.out, type=type, t=t, ...)
  tL <- rep(t0, boot.out$R)
  strata <- boot.out$strata
  if (is.null(strata))
    strata <- rep(1, n)
  else 	strata <- tapply(strata,as.numeric(strata))
  S <- length(table(strata))
  for(s in 1L:S) {
    i.s <- seq_len(n)[strata == s]
    tL <- tL + f[, i.s] %*% L[i.s]/length(i.s)
  }
  as.vector(tL)
}

envelope <-
  function(boot.out = NULL, mat = NULL, level = 0.95, index = 1L:ncol(mat))
    #
    #  Function to estimate pointwise and overall confidence envelopes for
    #  a function.
    #
    #  mat is a matrix of bootstrap values of the function at a number of
    #     points.  The points at which they are evaluated are assumed to
    #     be constant over the rows.
    #
  {
    emperr <- function(rmat, p = 0.05, k = NULL)
      #  Local function to estimate the overall error rate of an envelope.
    {
      R <- nrow(rmat)
      if (is.null(k)) k <- p*(R+1)/2 else p <- 2*k/(R+1)
      kf <- function(x, k, R) 1*((min(x) <= k)|(max(x) >= R+1L-k))
      c(k, p, sum(apply(rmat, 1L, kf, k, R))/(R+1))
    }
    kfun <- function(x, k1, k2)
      # Local function to find the cut-off points in each column of the matrix.
      sort(x ,partial = sort(c(k1, k2)))[c(k1, k2)]
    if (!is.null(boot.out) && isMatrix(boot.out$t)) mat <- boot.out$t
    if (!isMatrix(mat)) stop("bootstrap output matrix missing")
    if (length(index) < 2L) stop("use 'boot.ci' for scalar parameters")
    mat <- mat[,index]
    rmat <- apply(mat,2L,rank)
    R <- nrow(mat)
    if (length(level) == 1L) level <- rep(level,2L)
    k.pt <- floor((R+1)*(1-level[1L])/2+1e-10)
    k.pt <- c(k.pt, R+1-k.pt)
    err.pt <- emperr(rmat,k = k.pt[1L])
    ov <- emperr(rmat,k = 1)
    ee <- err.pt
    al <- 1-level[2L]
    if (ov[3L] > al)
      warning("unable to achieve requested overall error rate")
    else {
      continue <- !(ee[3L] < al)
      while(continue) {
        #  If the observed error is greater than the level required for the overall
        #  envelope then try another envelope.  This loop uses linear interpolation
        #  on the integers between 1 and k.pt[1L] to find the required value.
        kk <- ov[1L]+round((ee[1L]-ov[1L])*(al-ov[3L])/ (ee[3L]-ov[3L]))
        if (kk == ov[1L]) kk <- kk+1
        else if (kk == ee[1L]) kk <- kk-1
        temp <- emperr(rmat, k = kk)
        if (temp[3L] > al) ee <- temp
        else ov <- temp
        continue <- !(ee[1L] == ov[1L]+1)
      }
    }
    k.ov <- c(ov[1L], R+1-ov[1L])
    err.ov <- ov[-1L]
    out <- apply(mat, 2L, kfun, k.pt, k.ov)
    list(point = out[2:1,], overall = out[4:3,], k.pt = k.pt,
         err.pt = err.pt[-1L], k.ov = k.ov, err.ov = err.ov, err.nom = 1-level)
  }


glm.diag <- function(glmfit)
{
  #
  #  Calculate diagnostics for objects of class "glm".  The diagnostics
  #  calculated are various types of residuals as well as the Cook statistics
  #  and the leverages.
  #
  w <- if (is.null(glmfit$prior.weights)) rep(1,length(glmfit$residuals))
  else glmfit$prior.weights
  sd <- switch(family(glmfit)$family[1L],
               "gaussian" = sqrt(glmfit$deviance/glmfit$df.residual),
               "Gamma" = sqrt(sum(w*(glmfit$y/fitted(glmfit) - 1)^2)/
                                glmfit$df.residual),
               1)
  ##     sd <- ifelse(family(glmfit)$family[1L] == "gaussian",
  ##                  sqrt(glmfit$deviance/glmfit$df.residual), 1)
  ##     sd <- ifelse(family(glmfit)$family[1L] == "Gamma",
  ##                  sqrt(sum(w*(glmfit$y/fitted(glmfit) - 1)^2)/glmfit$df.residual), sd)
  dev <- residuals(glmfit, type = "deviance")/sd
  pear <- residuals(glmfit, type = "pearson")/sd
  ## R change: lm.influence drops 0-wt cases.
  h <- rep(0, length(w))
  h[w != 0] <- lm.influence(glmfit)$hat
  p <- glmfit$rank
  rp <- pear/sqrt(1 - h)
  rd <- dev/sqrt(1 - h)
  cook <- (h * rp^2)/((1 - h) * p)
  res <- sign(dev) * sqrt(dev^2 + h * rp^2)
  list(res = res, rd = rd, rp = rp, cook = cook, h = h, sd = sd)
}


# glm.diag.plots <-
#   function(glmfit, glmdiag = glm.diag(glmfit), subset  =  NULL,
#            iden = FALSE, labels = NULL, ret = FALSE)
#   {
#     #  Diagnostic plots for objects of class "glm"
#     if (is.null(glmdiag))
#       glmdiag <- glm.diag(glmfit)
#     if (is.null(subset))
#       subset <- seq_along(glmdiag$h)
#     else if (is.logical(subset))
#       subset <- seq_along(subset)[subset]
#     else if (is.numeric(subset) && all(subset<0))
#       subset <- (1L:(length(subset)+length(glmdiag$h)))[subset]
#     else if (is.character(subset)) {
#       if (is.null(labels)) labels <- subset
#       subset <- seq_along(subset)
#     }
#     #	close.screen(all = T)
#     #	split.screen(c(2, 2))
#     #	screen(1) #
#     par(mfrow = c(2,2))
#     #  Plot the deviance residuals against the fitted values
#     x1 <- predict(glmfit)
#     plot(x1, glmdiag$res, xlab = "Linear predictor", ylab = "Residuals")
#     pars <- vector(4L, mode="list")
#     pars[[1L]] <- par("usr")
#     #	screen(2) #
#     #  Plot a normal QQ plot of the standardized deviance residuals
#     y2 <- glmdiag$rd
#     x2 <- qnorm(ppoints(length(y2)))[rank(y2)]
#     plot(x2, y2, ylab = "Quantiles of standard normal",
#          xlab = "Ordered deviance residuals")
#     abline(0, 1, lty = 2)
#     pars[[2L]] <- par("usr")
#     #	screen(3) #
#     #  Plot the Cook statistics against h/(1-h) and draw line to highlight
#     #  possible influential and high leverage points.
#     hh <- glmdiag$h/(1 - glmdiag$h)
#     plot(hh, glmdiag$cook, xlab = "h/(1-h)", ylab = "Cook statistic")
#     rx <- range(hh)
#     ry <- range(glmdiag$cook)
#     rank.fit <- glmfit$rank
#     nobs <- rank.fit + glmfit$df.residual
#     cooky <- 8/(nobs - 2 * rank.fit)
#     hy <- (2 * rank.fit)/(nobs - 2 * rank.fit)
#     if ((cooky >= ry[1L]) && (cooky <= ry[2L])) abline(h = cooky, lty = 2)
#     if ((hy >= rx[1L]) && (hy <= rx[2L])) abline(v = hy, lty = 2)
#     pars[[3L]] <- par("usr")
#     #	screen(4) #
#     #  Plot the Cook statistics against the observation number in the original
#     #  data set.
#     plot(subset, glmdiag$cook, xlab = "Case", ylab = "Cook statistic")
#     if ((cooky >= ry[1L]) && (cooky <= ry[2L])) abline(h = cooky, lty = 2)
#     xx <- list(x1,x2,hh,subset)
#     yy <- list(glmdiag$res, y2, glmdiag$cook, glmdiag$cook)
#     pars[[4L]] <- par("usr")
#
#     if (is.null(labels)) labels <- names(x1)
#     while (iden) {
#       #  If interaction with the plots is required then ask the user which plot
#       #  they wish to interact with and then run identify() on that plot.
#       #  When the user terminates identify(), reprompt until no further interaction
#       #  is required and the user inputs a 0.
#       cat("****************************************************\n")
#       cat("Please Input a screen number (1,2,3 or 4)\n")
#       cat("0 will terminate the function \n")
#       #		num <- scan(nmax=1)
#       num <- as.numeric(readline())
#       if ((length(num) > 0L) &&
#           ((num == 1)||(num == 2)||(num == 3)||(num == 4))) {
#         cat(paste("Interactive Identification for screen",
#                   num,"\n"))
#         cat("left button = Identify, center button = Exit\n")
#         #			screen(num, new=F)
#         nm <- num+1
#         par(mfg = c(trunc(nm/2),1 +nm%%2, 2, 2))
#         par(usr = pars[[num]])
#         identify(xx[[num]], yy[[num]], labels)
#       }
#       else 	iden <- FALSE
#     }
#     #	close.screen(all=T)
#     par(mfrow = c(1, 1))
#     if (ret) glmdiag else invisible()
#   }

exp.tilt <- function(L, theta = NULL, t0 = 0, lambda = NULL,
                     strata = rep(1, length(L)) )
{
  # exponential tilting of linear approximation to statistic
  # to give mean theta.
  #
  tilt.dis <- function(lambda)  {
    #  Find the squared error in the mean using the multiplier lambda
    #  This is then minimized to find the correct value of lambda
    #  Note that the function should have minimum 0.
    L <- para[[2L]]
    theta <- para[[1L]]
    strata <- para[[3L]]
    ns <- table(strata)
    tilt <- rep(NA, length(L) )
    for (s in seq_along(ns)) {
      p <- exp(lambda*L[strata == s]/ns[s])
      tilt[strata == s] <- p/sum(p)
    }
    (sum(L*tilt) - theta)^2
  }
  tilted.prob <- function(lambda, L, strata)  {
    #  Find the tilted probabilities for a given value of lambda
    ns <- table(strata)
    m <- length(lambda)
    tilt <- matrix(NA, m, length(L))
    for (i in 1L:m)
      for (s in seq_along(ns)) {
        p <- exp(lambda[i]*L[strata == s]/ns[s])
        tilt[i,strata == s] <- p/sum(p)
      }
    if (m == 1) tilt <- as.vector(tilt)
    tilt
  }
  strata <- tapply(strata, as.numeric(strata))
  if (!is.null(theta)) {
    theta <- theta-t0
    m <- length(theta)
    lambda <- rep(NA,m)
    for (i in 1L:m) {
      para <- list(theta[i],L,strata)
      #			assign("para",para,frame=1)
      #			lambda[i] <- nlmin(tilt.dis, 0 )$x
      lambda[i] <- optim(0, tilt.dis, method = "BFGS")$par
      msd <- tilt.dis(lambda[i])
      if (is.na(msd) || (abs(msd) > 1e-6))
        stop(gettextf("unable to find multiplier for %f", theta[i]),
             domain = NA)
    }
  }
  else if (is.null(lambda))
    stop("'theta' or 'lambda' required")
  probs <- tilted.prob( lambda, L, strata )
  if (is.null(theta)) theta <- t0 + sum(probs * L)
  else theta <- theta+t0
  list(p = probs, theta = theta, lambda = lambda)
}


imp.weights <- function(boot.out, def = TRUE, q = NULL)
{
  #
  # Takes boot.out object and calculates importance weights
  # for each element of boot.out$t, as if sampling from multinomial
  # distribution with probabilities q.
  # If q is NULL the weights are calculated as if
  # sampling from a distribution with equal probabilities.
  # If def=T calculates weights using defensive mixture
  # distribution, if F uses weights knowing from which element of
  # the mixture they come.
  #
  R <- boot.out$R
  if (length(R) == 1L)
    def <- FALSE
  f <- boot.array(boot.out)
  n <- ncol(f)
  strata <- tapply(boot.out$strata,as.numeric(boot.out$strata))
  #    ns <- table(strata)
  if (is.null(q))  q <- rep(1,ncol(f))
  if (any(q == 0)) stop("0 elements not allowed in 'q'")
  p <- boot.out$weights
  if ((length(R) == 1L) && all(abs(p - q)/p < 1e-10))
    return(rep(1, R))
  np <- length(R)
  q <- normalize(q, strata)
  lw.q <- as.vector(f %*% log(q))
  if (!isMatrix(p))
    p <- as.matrix(t(p))
  p <- t(apply(p, 1L, normalize, strata))
  lw.p <- matrix(NA, sum(R), np)
  for(i in 1L:np) {
    zz <- seq_len(n)[p[i,  ] > 0]
    lw.p[, i] <- f[, zz] %*% log(p[i, zz])
  }
  if (def)
    w <- 1/(exp(lw.p - lw.q) %*% R/sum(R))
  else {
    i <- cbind(seq_len(sum(R)), rep(seq_along(R), R))
    w <- exp(lw.q - lw.p[i])
  }
  as.vector(w)
}

const <- function(w, eps=1e-8) {
  # Are all of the values of w equal to within the tolerance eps.
  all(abs(w-mean(w, na.rm=TRUE)) < eps)
}

imp.moments <- function(boot.out=NULL, index=1, t=boot.out$t[,index],
                        w=NULL, def=TRUE, q=NULL )
{
  # Calculates raw, ratio, and regression estimates of mean and
  # variance of t using importance sampling weights in w.
  if (missing(t) && is.null(boot.out$t))
    stop("bootstrap replicates must be supplied")
  if (is.null(w))
    if (!is.null(boot.out))
      w <- imp.weights(boot.out, def, q)
    else	stop("either 'boot.out' or 'w' must be specified.")
    if ((length(index) > 1L) && missing(t)) {
      warning("only first element of 'index' used")
      t <- boot.out$t[,index[1L]]
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    w <- w[fins]
    if (!const(w)) {
      y <- t*w
      m.raw <- mean( y )
      m.rat <- sum( y )/sum( w )
      t.lm <- lm( y~w )
      m.reg <- mean( y ) - coefficients(t.lm)[2L]*(mean(w)-1)
      v.raw <- mean(w*(t-m.raw)^2)
      v.rat <- sum(w/sum(w)*(t-m.rat)^2)
      x <- w*(t-m.reg)^2
      t.lm2 <- lm( x~w )
      v.reg <- mean( x ) - coefficients(t.lm2)[2L]*(mean(w)-1)
    }
    else {	m.raw <- m.rat <- m.reg <- mean(t)
    v.raw <- v.rat <- v.reg <- var(t)
    }
    list( raw=c(m.raw,v.raw), rat = c(m.rat,v.rat),
          reg = as.vector(c(m.reg,v.reg)))
}


imp.reg <- function(w)
{
  #  This function takes a vector of importance sampling weights and
  #  returns the regression importance sampling weights.  The function
  #  is called by imp.prob and imp.quantiles to enable those functions
  #  to find regression estimates of tail probabilities and quantiles.
  if (!const(w)) {
    R <- length(w)
    mw <- mean(w)
    s2w <- (R-1)/R*var(w)
    b <- (1-mw)/s2w
    w <- w*(1+b*(w-mw))/R
  }
  cumsum(w)/sum(w)
}


imp.quantile <- function(boot.out=NULL, alpha=NULL, index=1,
                         t=boot.out$t[,index], w=NULL, def=TRUE, q=NULL )
{
  # Calculates raw, ratio, and regression estimates of alpha quantiles
  #  of t using importance sampling weights in w.
  if (missing(t) && is.null(boot.out$t))
    stop("bootstrap replicates must be supplied")
  if (is.null(alpha)) alpha <- c(0.01,0.025,0.05,0.95,0.975,0.99)
  if (is.null(w))
    if (!is.null(boot.out))
      w <- imp.weights(boot.out, def, q)
    else	stop("either 'boot.out' or 'w' must be specified.")
    if ((length(index) > 1L) && missing(t)){
      warning("only first element of 'index' used")
      t <- boot.out$t[,index[1L]]
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    w <- w[fins]
    o <- order(t)
    t <- t[o]
    w <- w[o]
    cum <- cumsum(w)
    o <- rev(o)
    w.m <- w[o]
    t.m <- -rev(t)
    cum.m <- cumsum(w.m)
    cum.rat <- cum/mean(w)
    cum.reg <- imp.reg(w)
    R <- length(w)
    raw <- rat <- reg <- rep(NA,length(alpha))
    for (i in seq_along(alpha)) {
      if (alpha[i]<=0.5) raw[i] <-  max(t[cum<=(R+1)*alpha[i]])
      else raw[i] <- -max(t.m[cum.m<=(R+1)*(1-alpha[i])])
      rat[i] <- max(t[cum.rat <= (R+1)*alpha[i]])
      reg[i] <- max(t[cum.reg <= (R+1)*alpha[i]])
    }
    list(alpha=alpha, raw=raw, rat=rat, reg=reg)
}

imp.prob <- function(boot.out=NULL, index=1, t0=boot.out$t0[index],
                     t=boot.out$t[,index], w=NULL,  def=TRUE, q=NULL)
{
  # Calculates raw, ratio, and regression estimates of tail probability
  #  pr( t <= t0 ) using importance sampling weights in w.
  is.missing <- function(x) length(x) == 0L || is.na(x)

  if (missing(t) && is.null(boot.out$t))
    stop("bootstrap replicates must be supplied")
  if (is.null(w))
    if (!is.null(boot.out))
      w <- imp.weights(boot.out, def, q)
    else	stop("either 'boot.out' or 'w' must be specified.")
    if ((length(index) > 1L) && (missing(t) || missing(t0))) {
      warning("only first element of 'index' used")
      index <- index[1L]
      if (is.missing(t)) t <- boot.out$t[,index]
      if (is.missing(t0)) t0 <- boot.out$t0[index]
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    w <- w[fins]
    o <- order(t)
    t <- t[o]
    w <- w[o]
    raw <- rat <- reg <- rep(NA,length(t0))
    cum <- cumsum(w)/sum(w)
    cum.r <- imp.reg(w)
    for (i in seq_along(t0)) {
      raw[i] <-sum(w[t<=t0[i]])/length(w)
      rat[i] <- max(cum[t<=t0[i]])
      reg[i] <- max(cum.r[t<=t0[i]])
    }
    list(t0=t0, raw=raw, rat=rat, reg=reg )
}

smooth.f <- function(theta, boot.out, index=1, t=boot.out$t[,index],
                     width=0.5 )
{
  # Does frequency smoothing of the frequency array for boot.out with
  # bandwidth A to give frequencies for 'typical' distribution at theta
  if ((length(index) > 1L) && missing(t)) {
    warning("only first element of 'index' used")
    t <- boot.out$t[,index[1L]]
  }
  if (isMatrix(t)) {
    warning("only first column of 't' used")
    t <- t[,1L]
  }
  fins <- seq_along(t)[is.finite(t)]
  t <- t[fins]
  m <- length(theta)
  v <- imp.moments(boot.out, t=t)$reg[2L]
  eps <- width*sqrt(v)
  if (m  == 1)
    w <- dnorm((theta-t)/eps )/eps
  else {
    w <- matrix(0,length(t),m)
    for (i in 1L:m)
      w[,i] <- dnorm((theta[i]-t)/eps )/eps
  }
  f <- crossprod(boot.array(boot.out)[fins,] , w)
  strata <- boot.out$strata
  strata <- tapply(strata, as.numeric(strata))
  ns <- table(strata)
  out <- matrix(NA,ncol(f),nrow(f))
  for (s in seq_along(ns)) {
    ts <- matrix(f[strata == s,],m,ns[s],byrow=TRUE)
    ss <- apply(ts,1L,sum)
    out[,strata == s] <-  ts/matrix(ss,m,ns[s])
  }
  if (m == 1) out <- as.vector(out)
  out
}

tilt.boot <- function(data, statistic, R, sim="ordinary",
                      stype="i", strata = rep(1, n), L = NULL, theta=NULL,
                      alpha=c(0.025,0.975), tilt=TRUE, width=0.5, index=1, ... )
{
  #  Does tilted bootstrap sampling of stat applied to data with strata strata
  #  and simulation type sim.
  #  The levels of R give the number of simulations at each level.  For example,
  #  R=c(199,100,50) will give three separate bootstraps with 199, 100, 50
  #  simulations.  If R[1L]>0 the first simulation is assumed to be untilted
  #  and L can be estimated from it by regression, or it can be frequency
  #  smoothed to give probabilities p.
  #  If tilt=T use exponential tilting with empirical influence value L
  #  given explicitly or estimated from boot0, but if tilt=F
  #  (in which case R[1L] should be large) frequency smoothing of boot0 is used
  #  with bandwidth A.
  #  Tilting/frequency smoothing is to theta (so length(theta)=length(R)-1).
  #  The function assumes at present that q=0 is the median of the distribution
  #  of t*.
  if ((sim != "ordinary") && (sim != "balanced"))
    stop("invalid value of 'sim' supplied")
  if (!is.null(theta) && (length(R) != length(theta)+1))
    stop("'R' and 'theta' have incompatible lengths")
  if (!tilt && (R[1L] == 0))
    stop("R[1L] must be positive for frequency smoothing")
  call <- match.call()
  n <- NROW(data)
  if (R[1L]>0) {
    # If required run an initial bootstrap with equal weights.
    if (is.null(theta) && (length(R) != length(alpha)+1))
      stop("'R' and 'alpha' have incompatible lengths")
    boot0 <- boot(data, statistic, R = R[1L], sim=sim, stype=stype,
                  strata = strata, ... )
    if (is.null(theta)) {
      if (any(c(alpha,1-alpha)*(R[1L]+1) <= 5))
        warning("extreme values used for quantiles")
      theta <- quantile(boot0$t[,index],alpha)
    }
  }
  else {
    # If no initial bootstrap is run then exponential tilting must be
    # used.  Also set up a dummy bootstrap object to hold the output.
    tilt <- TRUE
    if (is.null(theta))
      stop("'theta' must be supplied if R[1L] = 0")
    if (!missing(alpha))
      warning("'alpha' ignored; R[1L] = 0")
    if (stype == "i") orig <- seq_len(n)
    else if (stype == "f") orig <- rep(1,n)
    else orig <- rep(1,n)/n
    boot0 <- boot.return(sim=sim,t0=statistic(data,orig, ...),
                         t=NULL, strata=strata, R=0, data=data,
                         stat=statistic, stype=stype,call=NULL,
                         seed=get(".Random.seed", envir=.GlobalEnv, inherits = FALSE),
                         m=0,weights=NULL)
  }
  # Calculate the weights for the subsequent bootstraps
  if (is.null(L) & tilt)
    if (R[1L] > 0) L <- empinf(boot0, index, ...)
  else L <- empinf(data=data, statistic=statistic, stype=stype,
                   index=index, ...)
  if (tilt) probs <- exp.tilt(L, theta, strata=strata, t0=boot0$t0[index])$p
  else probs <- smooth.f(theta, boot0, index, width=width)#
  # Run the weighted bootstraps and collect the output.
  boot1 <- boot(data, statistic, R[-1L], sim=sim, stype=stype,
                strata=strata, weights=probs, ...)
  boot0$t <- rbind(boot0$t, boot1$t)
  boot0$weights <- rbind(boot0$weights, boot1$weights)
  boot0$R <- c(boot0$R, boot1$R)
  boot0$call <- call
  boot0$theta <- theta
  attr(boot0, "boot_type") <- "tilt.boot"
  boot0
}


control <- function(boot.out, L=NULL, distn=NULL, index=1, t0=NULL, t=NULL,
                    bias.adj=FALSE, alpha=NULL,