R/functions.R

Defines functions find_type ts.return scramble tsboot make.ends ts.array print.saddle.distn saddle.distn saddle print.simplex simplex1 simplex zero iden inv.logit logit cum3 k3.linear var.linear control tilt.boot smooth.f imp.prob imp.quantile imp.reg imp.moments const imp.weights exp.tilt glm.diag envelope linear.approx positive.jack usual.jack empinf.reg inf.jack empinf cens.resamp cens.weird cens.case cens.return censboot abc.ci bca.ci perc.ci stud.ci basic.ci norm.inter norm.ci print.bootci boot.ci cv.glm permutation.array ordinary.array index.array importance.array.bal importance.array freq.array extra.array corr print.boot boot.array boot.return normalize boot balanced.array anti.arr antithetic.array rperm isMatrix bsample sample0 get_stat prop_stronger prop_stronger_sign pct_pval calib_ests tau_CI z_to_r r_to_z r_to_d d_to_logRR format_CI round2 scrape_meta parse_CI_string

Documented in calib_ests d_to_logRR format_CI parse_CI_string pct_pval prop_stronger prop_stronger_sign round2 r_to_d r_to_z scrape_meta tau_CI z_to_r

################################ FNs: SCRAPING PUBLISHED META-ANALYSIS ################################


#' Parse a string with point estimate and confidence interval
#'
#' Given a vector of strings such as "0.65 (0.6, 0.70)", for example obtained by running optical character recognition (OCR)
#' software on a screenshot of a published forest plot, parses the strings into a dataframe of
#' point estimates and upper confidence interval limits. Assumes that the point estimate occurs before
#' an opening bracket of the form "(" or "[" and that the confidence interval upper limit follows a
#' the character \code{sep} (by default a comma, but might be a hyphen, for example). To further parse this dataframe
#' into point estimates and variances, see \code{MetaUtility::scrape_meta}.
#' @param string A vector of strings to be parsed.
#' @param sep The character (not including whitespaces) separating the lower from the upper limits.
#' @export
#' @importFrom
#' stringr str_replace_all str_remove_all
#' @examples
#' # messy string of confidence intervals
#' mystring = c( "0.65 [0.6, 0.7]", "0.8(0.5, 0.9]", "1.2  [0.3, 1.5)")
#' parse_CI_string(mystring)
#'
#' # now with a hyphen separator
#' mystring = c( "0.65 [0.6- 0.7]", "0.8(0.5 - 0.9]", "1.2  [0.3 -1.5)")
#' parse_CI_string(mystring, sep="-")

# assumes a comma before the upper limit
# everything before first bracket treated as point estimate
parse_CI_string = function( string, sep = "," ) {

  # standardize the CI brackets
  string = str_replace_all(string, "\\[", "\\(")
  string = str_replace_all(string, "\\]", "\\)")

  # remove all spaces
  string = str_replace_all(string, " ", "")

  # parse CI string into est and upper limit
  yi = as.numeric( unlist( lapply( strsplit( string, "\\("), function(x) x[1] ) ) )

  # hi will be the last entry after the comma
  hi = unlist( lapply( strsplit( string, sep), function(x) x[length(x)] ) )
  hi = as.numeric( str_replace_all(hi, "\\)", "") )

  # remove stupid characters
  hi = as.numeric( str_remove_all( hi, "[) ,]" ) )

  return( data.frame( yi, hi) )
}



#' Convert forest plot or summary table to meta-analytic dataset
#'
#' Given relative risks (RR) and upper bounds of 95\% confidence intervals (CI)
#' from a forest plot or summary table, returns a dataframe ready for meta-analysis
#' (e.g., via the \code{metafor} package) with the log-RRs and their variances.
#' Optionally, the user may indicate studies for which the point estimate is to be
#' interpreted as an odds ratios of a common outcome rather than a relative risk;
#' for such studies, the function applies VanderWeele (2017)'s square-root transformation to convert
#' the odds ratio to an approximate risk ratio.
#' @param type \code{RR} if point estimates are RRs or ORs (to be handled on log scale); \code{raw} if point estimates are raw differences, standardized mean differences, etc. (such that they can be handled with no transformations)
#' @param est Vector of study point estimates on RR or OR scale
#' @param hi Vector of upper bounds of 95\% CIs on RRs
#' @param sqrt Vector of booleans (TRUE/FALSE) for whether each study measured an odds ratio of a common outcome that should be approximated as a risk ratio via the square-root transformation
#' @export
#' @references
#' VanderWeele TJ (2017). On a square-root transformation of the odds ratio for a common outcome. \emph{Epidemiology}.
#' @import stats

scrape_meta = function( type="RR", est, hi, sqrt=FALSE ){

  if ( type == "RR" ) {
    # take square root for certain elements
    RR = est
    RR[sqrt] = sqrt( RR[sqrt] )

    # same for upper CI limit
    hi.RR = hi
    hi.RR[sqrt] = sqrt( hi.RR[sqrt] )

    sei = ( log(hi.RR) - log(RR) ) / qnorm(.975)

    return( data.frame( yi = log(RR), vyi = sei^2 ) )

  } else if ( type == "raw" ) {

    sei = ( hi - est ) / qnorm(.975)
    return( data.frame( yi = est, vyi = sei^2 ) )
  }
}



################################ FNs: FORMATTING ################################

#' Round while keeping trailing zeroes
#'
#' Rounds a numeric value and formats it as a string, keeping trailing zeroes.
#' @param x Numeric value to round
#' @param digits Digits for rounding
#' @export
#' @examples
#' round2(0.03000, digits = 4)
#'
#' # compare to base round, which drops trailing zeroes and returns a numeric
#' round(0.03000, digits = 4)
round2 = function(x, digits = 2) {
  formatC( round( x, digits ), format='f', digits=digits )
}


#' Manuscript-friendly confidence interval formatting
#'
#' Formats confidence interval lower and upper bounds into a rounded string.
#' @param lo Confidence interval lower limit (numeric)
#' @param hi Confidence interval upper limit (numeric)
#' @param digits Digits for rounding
#' @export
#' @examples
#' format_CI(0.36, 0.72, 3)
format_CI = function( lo, hi, digits = 2 ) {
  paste( "[", round2( lo, digits ), ", ", round2( hi, digits ), "]", sep="" )
}


#' Manuscript-friendly number formatting
#'
#' Formats a numeric result (e.g., p-value) as a manuscript-friendly string in which values below a minimum cutoff (e.g., 10^-5) are
#' reported for example as "< 10^-5", values between the minimum cutoff and a maximum cutoff (e.g., 0.01)
#' are reported in scientific notation, and p-values above the maximum cutoff are reported simply as, for example, 0.72.
#' @param x Numeric value to format
#' @param digits Digits for rounding
#' @param cutoffs A vector containing the two cutoffs
#' @export
#' @examples
#' format_stat(0.735253)
#' format_stat(0.735253, digits = 4)
#' format_stat(0.0123)
#' format_stat(0.0001626)
#' format_stat(0.0001626, cutoffs = c(0.01, 10^-3))

format_stat = Vectorize( function(x,
                                  digits = 2,
                                  cutoffs = c(0.01, 10^-5) ) {
  if( length(cutoffs) != 2) stop("Only 2 cutoffs are supported")

  if ( x >= max(cutoffs) ) return( round2( x, digits ) )
  if (x < max(cutoffs) & x > min(cutoffs) ) return( formatC( x, format = "e", digits = 0 ) )
  if ( x < min(cutoffs) ) return( paste("<", min(cutoffs), sep=" ") )
}, vectorize.args = "x" )





################################ FNs: EFFECT SIZE CONVERSIONS ################################





# convert d to log-OR
# used for sensitivity analyses for unmeasured confounding
# uses square-root transformation (assumes common outcome; otherwise conservative)
# and Hasselblad/Chinn conversion
# gets variance by delta method

#' Convert Cohen's d to log risk ratio
#'
#' Converts Cohen's d (computed with a binary X) to a log risk ratio
#' for use in meta-analysis. Under the assumption that Y is approximately normal, the resulting log risk ratio represents a dichotomization of Y that is near its median and otherwise will  tend to be conservative.
#' @param d Cohen's d
#' @param se Standard error of d
#' @details  Internally, this function first converts d to a log odds ratio using Chinn's (2000) conversion. The resulting log odds ratio approximates the value that would be obtained if Y were dichotomized; if Y is approximately normal, the log odds ratio is approximately invariant to the choice of threshold at which Y is dichotomized (Chinn, 2000). Then, the function converts the log odds ratio to a log risk ratio using VanderWeele's (2020) square-root conversion. That conversion is conservative in that it allows for the possibility that the dichotomized Y is not rare (i.e., the point of dichotomization is not at an extreme value of Y).
#' @references
#' VanderWeele, TJ (2020). Optimal approximate conversions of odds ratios and hazard ratios to risk ratios. \emph{Biometrics}.
#'
#' Chinn S (2000). A simple method for converting an odds ratio to effect size for use in meta-analysis. \emph{Statistics in Medicine}.

#' @export
#' @examples
#' d_to_logRR( d = c(0.5, -0.2, .1),
#'         se = c(0.21, NA, 0.3) )
d_to_logRR = function( d, se = NA) {

  # simplified the math
  # Hasselblad conversion to log-OR followed by TVW's square-root transformation
  #  to RR so that we can imagine dichotomizing near median
  logRR = log( sqrt( exp( d * pi / sqrt(3) ) ) )

  # delta method:
  # derivative of log( sqrt( exp(c*d) ) ) wrt d = pi/(2*sqrt(3))
  # so squared derivative is pi^2 / 12
  varlogRR = ( pi^2 / 12 ) * se^2

  return( data.frame(logRR, varlogRR) )

}


#' Convert Pearson's r to Cohen's d
#'
#' Converts Pearson's r (computed with a continuous X and Y) to Cohen's d
#' for use in meta-analysis. The resulting Cohen's d represents the
#' estimated increase in standardized Y that is associated with a delta-unit
#' increase in X.
#' @param r Pearson's correlation
#' @param sx Sample standard deviation of X
#' @param delta Contrast in X for which to compute Cohen's d, specified in raw units of X (not standard deviations).
#' @param N Sample size used to estimate \code{r}
#' @param Ns Sample size used to estimate \code{sx}, if different from \code{N}
#' @param sx.known Is \code{sx} known rather than estimated? (By default, assumes \code{sx} is estimated, which will almost always be the case.)
#' @details To preserve the sign of the effect size, the code takes the absolute value of \code{delta}. The standard error
#' estimate assumes that X is approximately normal and that \code{N} is large.
#' @references
#' Mathur MB & VanderWeele TJ (2019). A simple, interpretable conversion from Pearson's correlation to Cohen's d for meta-analysis. \emph{Epidemiology}.
#' @export
#' @examples
#' # d for a 1-unit vs. a 2-unit increase in X
#' r_to_d( r = 0.5,
#'         sx = 2,
#'         delta = 1,
#'         N = 100 )
#' r_to_d( r = 0.5,
#'         sx = 2,
#'         delta = 2,
#'         N = 100 )
#'
#' # d when sx is estimated in the same vs. a smaller sample
#' # point estimate will be the same, but inference will be a little
#' # less precise in second case
#' r_to_d( r = -0.3,
#'          sx = 2,
#'          delta = 2,
#'          N = 300,
#'          Ns = 300 )
#'
#' r_to_d( r = -0.3,
#'         sx = 2,
#'         delta = 2,
#'         N = 300,
#'         Ns = 30 )
r_to_d = function(r, sx, delta, N = NA, Ns = N, sx.known = FALSE )  {

    # delta should always be positive regardless of sign of d
    delta = abs(delta)

    # point estimate
    d = (delta * r) / ( sx * sqrt(1-r^2) )

    # standard error and CI
    if ( !is.na(N) ) {
      term1 = 1 / ( r^2 * ( N - 3) )
      term2 = 1 / ( 2 * (Ns - 1) )
      if (sx.known == TRUE) term2 = 0
      se = abs(d) * sqrt( term1 + term2 )

      # handle case where r = 0 exactly using the limit
      # see saved file "Limit as r -> 0"
      if ( r == 0 ) {
        se = (1/sx) * delta * sqrt(1 / (N-3))
      }

      d.lo = d - qnorm(.975) * se
      d.hi = d + qnorm(.975) * se
    } else {
      se = NA
      d.lo = NA
      d.hi = NA
    }

    return( data.frame( d=d, se=se, lo=d.lo, hi=d.hi ) )
  }





#' Convert Pearson's r to Fisher's z
#'
#' Converts Pearson's r to Fisher's z for use in meta-analysis.
#' @param r Pearson's correlation
#' @export
#' @examples
#' r_to_z( c(.22, -.9, NA) )
r_to_z = function(r) {
  r.not.NA = r[ !is.na(r) ]

  if ( any( abs(r.not.NA) > 1 ) ) stop("Pearson's r cannot be greater than 1 in absolute value")

  # handle possible NAs in r vector
  z = rep(NA, length(r))
  z[ !is.na(r) ] = .5 * ( log(1 + r.not.NA) - log(1 - r.not.NA) )

  return(z)
}


#' Convert Fisher's z to Pearson's r
#'
#' Converts Fisher's z to Pearson's r for use in meta-analysis.
#' @param z Fisher's z
#' @export
#' @examples
#' z_to_r( c(1.1, NA, -0.2) )
z_to_r = function(z) {
  z.not.NA = z[ !is.na(z) ]

  # handle possible NAs in z vector
  r = rep(NA, length(z))

  r[ !is.na(z) ] = ( exp( 2 * z.not.NA ) - 1 ) / ( exp( 2 * z.not.NA ) + 1 )

  return(r)
}




################################ FNs: MISC ################################


#' Return confidence interval for tau for a meta-analysis
#'
#' Returns confidence interval lower and upper limits for tau (the estimated standard deviation of
#' the population effects) for a meta-analysis fit in \code{metafor::rma}.
#' @param meta A meta-analysis object fit in \code{metafor::rma}.
#' @param ci.level Confidence interval level as a proportion (e.g., 0.95)
#' @export
#' @import
#' metafor
#' stats
#' @examples
#' # calculate effect sizes for example dataset
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                    ci=cpos, di=cneg, data=metadat::dat.bcg)
#'
#' # fit random-effects model
#' # note that metafor package returns on the log scale
#' m = metafor::rma.uni(yi= d$yi, vi=d$vi, knha=TRUE,
#' measure="RR", method="REML" )
#'
#' tau_CI(m)
#'
#' # for nicer formatting
#' format_CI( tau_CI(m)[1], tau_CI(m)[2] )
tau_CI = function( meta,
                   ci.level = 0.95 ) {

  alpha = 1 - ci.level

  t2.lb = meta$tau2 - qnorm(1 - alpha/2) * meta$se.tau2
  t2.ub = meta$tau2 + qnorm(1 - alpha/2) * meta$se.tau2

  # if lower bound is < 0, set equal to 0
  if ( t2.lb > 0 ) tau.lb = sqrt(t2.lb) else tau.lb = 0
  tau.ub = sqrt(t2.ub)

  if ( tau.lb < 0 ) tau.lb = 0

  return( c(tau.lb, tau.ub))
}



#' Calculate variance given confidence interval limit and point estimate
#'
#' Returns approximate variance (i.e., squared standard error) of point estimate given either the upper or lower limit (\code{ci.lim}) of a Wald-type confidence interval. If degrees of freedom (\code{df}) are not provided, it is assumed that the confidence interval uses a z-distribution approximation. If degrees of freedom are provided, it is assumed that the confidence interval uses a t-distribution approximation.
#' @param est Point estimate
#' @param ci.lim Lower or upper confidence interval limit
#' @param ci.level Confidence interval level as a proportion (e.g., 0.95)
#' @param df Degrees of freedom
#' @export
#' @details The estimate and confidence interval must be provided on a scale such that the confidence interval is symmetric. For example, if the point estimate is a relative risk, then the estimate and confidence interval should be provided on the log-relative risk scale.
#' @import
#' stats
#' @examples
ci_to_var = Vectorize( function(est,
                                ci.lim,
                                ci.level = 0.95,
                                df = NA){

  if ( is.na(est) | is.na(ci.lim) ) return(NA)
  # Z or t-stat
  stat = abs(est - ci.lim)

  alpha = 1 - ci.level
  if ( is.na(df) ) crit = qnorm(1 - alpha/2)
  if ( !is.na(df) ) crit = qt(p = 1 - alpha/2, df = df)

  se = abs(est - ci.lim) / crit

  return(se^2)
} )




################################ FN: CALIBRATED ESTIMATES ################################

#' Return calibrated estimates of studies' population effect sizes
#'
#' Returns estimates of the population effect in each study based on the methods of Wang & Lee (2019).
#' Unlike the point estimates themselves, these "calibrated" estimates have been
#' appropriately shrunk to correct the overdispersion that arises due to the studies' finite sample sizes.
#' By default, this function uses Dersimonian-Laird moments-based estimates of the mean and variance of the true
#' effects, as Wang & Lee (2019) recommended.
#' @param yi Vector of study-level point estimates
#' @param sei Vector of study-level standard errors
#' @param method Estimation method for mean and variance of population effects (passed to \code{metafor::rma.uni})
#' @export
#' @import
#' metafor
#' stats
#' @references
#' Wang C-C & Lee W-C (2019). A simple method to estimate prediction intervals and
#' predictive distributions: Summarizing meta-analyses
#' beyond means and confidence intervals. \emph{Research Synthesis Methods}.
#' @examples
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                      ci=cpos, di=cneg, data=metadat::dat.bcg)
#'
#' # calculate calibrated estimates
#' d$calib = calib_ests( yi = d$yi,
#'                       sei = sqrt(d$vi) )
#'
#' # look at 5 studies with the largest calibrated estimates
#' d = d[ order(d$calib, decreasing = TRUE), ]
#' d$trial[1:5]
#'
#' # look at kernel density estimate of calibrated estimates
#' plot(density(d$calib))

calib_ests = function(yi,
                      sei,
                      method = "DL") {

  meta = rma.uni( yi = yi,
                  sei = sei,
                  method = method )

  muhat = meta$b
  t2 = meta$tau2

  # return ensemble estimates
  as.numeric( c(muhat) + ( c(t2) / ( c(t2) + sei^2 ) )^(1/2) * ( yi - c(muhat) ) )
}


################################ INTERNAL FNs FOR SIGN TEST METHODT ################################

# documentation from Rui Wang paper
##########################################################################
##########################################################################
### theta: treatment effect estimates ###
### (usually a vector of length K, K is the number of studies) ###
### theta.sd: estimated standard error of theta ###
### (usually a vector of same length as theta) ###
### mu: the specified value in the null hypothesis. ###
### pct: the percentile of interest ###
### 0.5=median, ###
### 0.25=25th percentile, ###
### 0.75=75th percentile. ###
### nperm: number of realizations in the conditional test ###

##########################################################################
### Output a 2-sided p-value. ###
##########################################################################


#' Return sign test p-value for meta-analysis percentile
#'
#' Returns a p-value for testing the hypothesis that \code{mu} is the \code{pct}^th
#' percentile of the population effect distribution based on the nonparametric sign test
#' method of Wang et al. (2010). This function is also called by \code{prop_stronger}
#' when using the sign test method.
#' @param yi Vector of study-level point estimates
#' @param sei Vector of study-level standard errors
#' @param mu The effect size to test as the \code{pct}^th percentile
#' @param pct The percentile of interest (e.g., 0.50 for the median)
#' @param R Number of simulation iterates to use when estimating null distribution of the test statistic.
#' @export
#' @references
#' Wang R, Tian L, Cai T, & Wei LJ (2010). Nonparametric inference procedure for percentiles
#' of the random effects distribution in meta-analysis. \emph{Annals of Applied Statistics}.
#' @examples
#' # calculate effect sizes for example dataset
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                    ci=cpos, di=cneg, data=metadat::dat.bcg)
#'
#' # test H0: the median is -0.3
#' # using only R = 100 for speed, but should be much larger (e.g., 2000) in practice
#' pct_pval( yi = d$yi,
#'           sei = sqrt(d$vi),
#'           mu = -0.3,
#'           pct = 0.5,
#'           R = 100 )

pct_pval <- function(yi,
                     sei,
                     mu,
                     pct,
                     R=2000) {

  K<-length(yi)

  # "score" is equivalent to kth contribution of sum in
  #  first eq. on page 4 (see my note in sidebar for equivalence)
  score <- pnorm( (mu-yi)/sei ) - 0.5
  # OBSERVED test stat
  stat<-sum(score)

  # initialize what will be the test stat vector UNDER H0
  # i.e., Tstar in paper
  test.stat<-rep(0,R)

  # draw Deltas for R iterations
  # this is the H0 distribution
  i<-1
  while (i<=R) {
    # here they use "ref1" and "ref2" (0/1) instead of Delta (1/-1)
    #  for computational convenience
    ref1 <- rbinom(K,1,pct)
    ref2 <- 1-ref1
    # this is the second eq. on page 4 of paper
    test.stat[i] <- sum( (abs(score)) * ref1 - (abs(score))*ref2 )
    i<-i+1
  }

  # compare test.stat (which is under H0) to observed one
  p1 <- mean(as.numeric(test.stat<=stat))
  p2 <- mean(as.numeric(test.stat>=stat))
  p3 <- mean(as.numeric(test.stat==stat))
  pval <- 2*min(p1,p2) - p3
  return(pval)
}


#' Return sign test point estimate of proportion of effects above or below threshold.
#'
#' Internal function not intended for user to call. Uses an extension of the sign test method of Wang et al. (2010) to estimate the proportion of true (i.e., population parameter) effect sizes in a meta-analysis
#' that are above or below a specified threshold of scientific importance. See important caveats in the Details section of the documentation for
#' the function \code{prop_stronger}.
#' @param q Population effect size that is the threshold for "scientific importance"
#' @param yi Study-level point estimates
#' @param vi study-level variances
#' @param ci.level Confidence level as a proportion
#' @param tail \code{above} for the proportion of effects above \code{q}; \code{below} for
#' the proportion of effects below \code{q}.
#' @param R Number of simulation iterates to estimate null distribution of sign test statistic
#' @param return.vectors Should all percents and p-values from the grid search be returned?
#' @import
#' purrr
#' @importFrom
#' dplyr "%>%"
#' @references
#' Wang R, Tian L, Cai T, & Wei LJ (2010). Nonparametric inference procedure for percentiles
#' of the random effects distribution in meta-analysis. \emph{Annals of Applied Statistics}.
prop_stronger_sign = function(q,
                              yi,
                              vi,
                              ci.level = 0.95,
                              tail = NA,
                              R = 2000,
                              return.vectors = FALSE ) {


  # Phat values to try
  pct.vec = seq( 0, 1, 0.001 )

  pvals = pct.vec %>% map( function(x) pct_pval( yi = yi,
                                                 sei = sqrt(vi),
                                                 mu = q,
                                                 pct = x,
                                                 R = R ) ) %>%
    unlist # return a double-type vector instead of list

  # point estimate: the value of Phat.below with the largest p-value?
  Phat.below.NP = pct.vec[ which.max( pvals ) ]

  # get CI limits
  alpha = 1 - ci.level
  # in case the point estimate is already 1 or 0, avoid null objects
  if ( Phat.below.NP == 1 ) CI.hi.NP = 1
  # of the "candidate" Phat values for *upper* CI (i.e., those *above* the one with largest p-value),
  #  which has p-value closest to 0.05?
  else CI.hi.NP = pct.vec[ pct.vec > Phat.below.NP ][ which.min( abs( pvals[ pct.vec > Phat.below.NP ] - alpha ) ) ]

  if ( Phat.below.NP == 0 ) CI.lo.NP = 0
  else CI.lo.NP = pct.vec[ pct.vec < Phat.below.NP ][ which.min( abs( pvals[ pct.vec < Phat.below.NP ] - alpha ) ) ]

  # if user wanted the lower tail
  if ( tail == "below" ) {
    res = data.frame(Est = Phat.below.NP,
                     lo = CI.lo.NP,
                     hi = CI.hi.NP )
    pcts = pct.vec
  }

  # if user wanted the upper tail, reverse everything
  if ( tail == "above" ) {
    res = data.frame( Est = 1 - Phat.below.NP,
                      lo = 1 - CI.hi.NP,
                      hi = 1 - CI.lo.NP )
    pcts = 1 - pct.vec
  }

  if ( return.vectors == FALSE ) return(res)
  if ( return.vectors == TRUE ) invisible( list(res = res,
                                                pcts = pcts,
                                                pvals = pvals) )
}


################################ FN: COMPUTE PROPORTION OF EFFECTS STRONGER THAN THRESHOLD ################################

#' Estimate proportion of population effect sizes above or below a threshold
#'
#' Estimates the proportion of true (i.e., population parameter) effect sizes in a meta-analysis
#' that are above or below a specified threshold of scientific importance based on the parametric methods described in Mathur & VanderWeele (2018), the nonparametric calibrated methods described in Mathur & VanderWeele (2020b), and the cluster-bootstrapping methods described in Mathur & VanderWeele (2020c).
#' @param q Population effect size that is the threshold for "scientific importance"
#' @param M Pooled point estimate from meta-analysis (required only for parametric estimation/inference and for Shapiro p-value)
#' @param t2 Estimated heterogeneity (tau^2) from meta-analysis (required only for parametric estimation/inference and for Shapiro p-value)
#' @param se.M Estimated standard error of pooled point estimate from meta-analysis (required only for parametric inference)
#' @param se.t2 Estimated standard error of tau^2 from meta-analysis (required only for parametric inference)
#' @param ci.level Confidence level as a proportion (e.g., 0.95 for a 95\% confidence interval)
#' @param tail \code{"above"} for the proportion of effects above \code{q}; \code{"below"} for
#' the proportion of effects below \code{q}.
#' @param estimate.method Method for point estimation of the proportion (\code{"calibrated"} or \code{"parametric"}). See Details.
#' @param ci.method Method for confidence interval estimation (\code{"calibrated"}, \code{"parametric"}, or \code{"sign.test"}). See Details.
#' @param calib.est.method Method for estimating the mean and variance of the population effects when computing calibrated estimates. See Details.
#' @param dat Dataset of point estimates (with names equal to the passed \code{yi.name}) and their variances
#' (with names equal to the passed \code{vi.name}). Not required if using \code{ci.method = "parametric"} and bootstrapping is not needed.
#' @param R Number of bootstrap or simulation iterates (depending on the methods chosen). Not required if using \code{ci.method = "parametric"} and bootstrapping is not needed.
#' @param bootstrap Argument only used when \code{ci.method = "parametric"} (because otherwise the bootstrap is always used). In that case, if \code{bootstrap = "ifneeded"}, bootstraps if estimated proportion is less than 0.15 or more than
#' 0.85. If equal to \code{"never"}, instead does not return inference in the above edge cases.
#' @param yi.name Name of the variable in \code{dat} containing the study-level point estimates. Used for bootstrapping and conducting Shapiro test.
#' @param vi.name Name of the variable in \code{dat} containing the study-level variances. Used for bootstrapping and conducting Shapiro test.
#' @param cluster.name Name of the variable in \code{dat} identifying clusters of studies. If left \code{NA}, assumes studies are independent (i.e., each study is its own cluster).
#' @export
#' @import
#' metafor
#' stats
#' tidyr
#' @importFrom
#' dplyr "%>%" "group_nest"
#' @importFrom rlang .data
#' @return
#' Returns a dataframe containing the point estimate for the proportion (\code{est}), its estimated standard error (\code{se}), lower and upper confidence
#' interval limits (\code{lo} and \code{hi}), and, depending on the user's specifications, the mean of the bootstrap estimates of the proportion (\code{bt.mn})
#' and the p-value for a Shapiro test for normality conducted on the standardized point estimates (\code{shapiro.pval}).
#' @details
#' These methods perform well only in meta-analyses with at least 10 studies; we do not recommend reporting them in smaller
#' meta-analyses. By default, \code{prop_stronger} performs estimation using a "calibrated" method (Mathur & VanderWeele, 2020b; 2020c) that extends work by Wang et al. (2019).
#' This method makes no assumptions about the distribution of population effects, performs well in meta-analyses with
#' as few as 10 studies, and can accommodate clustering of the studies (e.g., when articles contributed multiple studies on similar populations). Calculating the calibrated estimates involves first estimating the meta-analytic mean and variance,
#' which, by default, is done using the moments-based Dersimonian-Laird estimator as in Wang et al. (2019). To use a different method, which will be passed
#' to \code{metafor::rma.uni}, change the argument \code{calib.est.method} based on the documentation for \code{metafor::rma.uni}. For inference, the calibrated method uses bias-corrected
#' and accelerated bootstrapping that will account for clustered point estimates if the argument \code{cluster.name} is specified (Mathur & VanderWeele, 2020c). The bootstrapping may fail to converge for some small meta-analyses for which the threshold is distant from the mean of the population effects.
#' In these cases, you can try choosing a threshold closer to the pooled point estimate of your meta-analysis.
#' The mean of the bootstrap estimates of the proportion is returned as a diagnostic for potential bias in the estimated proportion.
#'
#' The parametric method assumes that the population effects are approximately normal, that the number of studies is large, and that the studies are independent. When these conditions hold
#' and the proportion being estimated is not extreme (between 0.15 and 0.85), the parametric method may be more precise than the calibrated method.
#' to improve precision. When using the parametric method and the estimated proportion is less than 0.15 or more than 0.85, it is best to bootstrap the confidence interval
#' using the bias-corrected and accelerated (BCa) method (Mathur & VanderWeele, 2018); this is the default behavior of \code{prop_stronger}.
#' Sometimes BCa confidence interval estimation fails, in which case \code{prop_stronger} instead uses the percentile method,
#' issuing a warning if this is the case (but note that the percentile method should \emph{not} be used when bootstrapping the calibrated estimates
#' rather than the parametric estimates). We use a modified "safe" version of the \code{boot} package code for bootstrapping
#' such that if any bootstrap iterates fail (usually because of model estimation problems), the error message is printed but the
#' bootstrap iterate is simply discarded so that confidence interval estimation can proceed. As above, the mean of the bootstrapped
#' estimates of the proportion is returned as a diagnostic for potential bias in the estimated proportion.
#'
#' The sign test method (Mathur & VanderWeele, 2020b) is an extension of work by Wang et al. (2010). This method was included in Mathur & VanderWeele's (2020b) simulation study;
#' it performed adequately when there was high heterogeneity, but did not perform well with lower heterogeneity. However, in the absence of a clear criterion
#' for how much heterogeneity is enough for the method to perform well, we do not in general recommend its use. Additionally, this method requires effects that are reasonably symmetric and unimodal.
#'
#' @references
#' Mathur MB & VanderWeele TJ (2018). New metrics for meta-analyses of heterogeneous effects. \emph{Statistics in Medicine}.
#'
#' Mathur MB & VanderWeele TJ (2020a).New statistical metrics for multisite replication projects. \emph{Journal of the Royal Statistical Society: Series A.}
#'
#' Mathur MB & VanderWeele TJ (2020b). Robust metrics and sensitivity analyses for meta-analyses of heterogeneous effects. \emph{Epidemiology}.
#'
#' Mathur MB & VanderWeele TJ (2020c). Meta-regression methods to characterize evidence strength using meaningful-effect percentages conditional on study characteristics. Preprint available: \url{https://osf.io/bmtdq}.
#'
#' Wang R, Tian L, Cai T, & Wei LJ (2010). Nonparametric inference procedure for percentiles
#' of the random effects distribution in meta-analysis. \emph{Annals of Applied Statistics}.
#'
#' Wang C-C & Lee W-C (2019). A simple method to estimate prediction intervals and
#' predictive distributions: Summarizing meta-analyses
#' beyond means and confidence intervals. \emph{Research Synthesis Methods}.
#'
#' @examples
#' ##### Example 1: BCG Vaccine and Tuberculosis Meta-Analysis #####
#'
#' # calculate effect sizes for example dataset
#' d = metafor::escalc(measure="RR", ai=tpos, bi=tneg,
#'                     ci=cpos, di=cneg, data=metadat::dat.bcg)
#'
#' # fit random-effects model
#' # note that metafor package returns on the log scale
#' m = metafor::rma.uni(yi= d$yi, vi=d$vi, knha=TRUE,
#'                      measure="RR", method="REML" )
#'
#' # pooled point estimate (RR scale)
#' exp(m$b)
#'
#' # estimate the proportion of effects stronger than RR = 0.70
#' # as recommended, use the calibrated approach for both point estimation and CI
#' # bootstrap reps should be higher in practice (e.g., 1000)
#' # here using fewer for speed
#' prop_stronger( q = log(0.7),
#'                tail = "below",
#'                estimate.method = "calibrated",
#'                ci.method = "calibrated",
#'                dat = d,
#'                yi.name = "yi",
#'                vi.name = "vi",
#'                R = 100)
#' # warning goes away with more bootstrap iterates
#' # no Shapiro p-value because we haven't provided the dataset and its variable names
#'
#' # now use the parametric approach (Mathur & VanderWeele 2018)
#' # no bootstrapping will be needed for this choice of q
#' prop_stronger( q = log(0.7),
#'                M = as.numeric(m$b),
#'                t2 = m$tau2,
#'                se.M = as.numeric(m$vb),
#'                se.t2 = m$se.tau2,
#'                tail = "below",
#'                estimate.method = "parametric",
#'                ci.method = "parametric",
#'                bootstrap = "ifneeded")
#'
#'
#' ##### Example 2: Meta-Analysis of Multisite Replication Studies #####
#'
#' # replication estimates (Fisher's z scale) and SEs
#' # from moral credential example in reference #2
#' r.fis = c(0.303, 0.078, 0.113, -0.055, 0.056, 0.073,
#'           0.263, 0.056, 0.002, -0.106, 0.09, 0.024, 0.069, 0.074,
#'           0.107, 0.01, -0.089, -0.187, 0.265, 0.076, 0.082)
#'
#' r.SE = c(0.111, 0.092, 0.156, 0.106, 0.105, 0.057,
#'          0.091, 0.089, 0.081, 0.1, 0.093, 0.086, 0.076,
#'          0.094, 0.065, 0.087, 0.108, 0.114, 0.073, 0.105, 0.04)
#'
#' d = data.frame( yi = r.fis,
#'                 vi = r.SE^2 )
#'
#' # meta-analyze the replications
#' m = metafor::rma.uni( yi = r.fis, vi = r.SE^2, measure = "ZCOR" )
#'
#' # probability of population effect above r = 0.10 = 28%
#' # convert threshold on r scale to Fisher's z
#' q = r_to_z(0.10)
#'
#' # bootstrap reps should be higher in practice (e.g., 1000)
#' # here using only 100 for speed
#' prop_stronger( q = q,
#'                tail = "above",
#'                estimate.method = "calibrated",
#'                ci.method = "calibrated",
#'                dat = d,
#'                yi.name = "yi",
#'                vi.name = "vi",
#'                R = 100 )
#'
#'
#' # probability of population effect equally strong in opposite direction
#' q.star = r_to_z(-0.10)
#' prop_stronger( q = q.star,
#'                tail = "below",
#'                estimate.method = "calibrated",
#'                ci.method = "calibrated",
#'                dat = d,
#'                yi.name = "yi",
#'                vi.name = "vi",
#'                R = 100 )
#' # BCa fails to converge here


prop_stronger = function( q,
                          M = NA,
                          t2 = NA,
                          se.M = NA,
                          se.t2 = NA,
                          ci.level = 0.95,
                          tail = NA,

                          estimate.method = "calibrated",  # "parametric" or "calibrated"
                          ci.method = "calibrated", # "parametric", "calibrated", or "sign.test"
                          calib.est.method = "DL",  # will be passed to rma.uni

                          # below arguments only needed for bootstrapping with parametric method
                          dat = NULL,
                          R = 2000,
                          bootstrap = "ifneeded",  # "ifneeded" or "never"
                          yi.name = "yi",
                          vi.name = "vi",
                          cluster.name = NA) {


  ##### Check for Bad Input #####

  if ( ( estimate.method == "parametric" | ci.method == "parametric" ) &
       ( is.na(t2) | is.na(M) ) ) {
    stop("Must provide M and t2 if using estimate.method = 'parametric' or ci.method = 'parametric'.")
  }

  if ( !is.na(t2) & t2 < 0 ) stop("Heterogeneity cannot be negative")

  if ( ! is.na(se.M) ) {
    if (se.M < 0) stop("se.M cannot be negative")
  }

  if ( ! is.na(se.t2) ) {
    if (se.t2 < 0) stop("se.t2 cannot be negative")
  }

  if ( !estimate.method %in% c( "parametric", "calibrated" ) ) {
    stop("Invalid estimate.method argument")
  }

  if ( !ci.method %in% c( "parametric", "calibrated", "sign.test" ) ) {
    stop("Invalid ci.method argument")
  }

  if ( !bootstrap %in% c( "ifneeded", "never" ) ) {
    stop("Invalid bootstrap argument")
  }

  if ( ci.method == "parametric" & estimate.method != "parametric"){
    stop("\nError: You chose ci.method = 'parametric',\nin which case you must use estimate.method = 'parametric' as well.\n")
  }

  # if no cluster variable, assume each study is in its own cluster
  if ( !is.null(dat) & is.na(cluster.name) ) dat$cluster = 1:nrow(dat)
  # otherwise standardize the name of the cluster variable
  if ( !is.null(dat) & !is.na(cluster.name) ) dat$cluster = dat[[cluster.name]]

  ##### Messages When Not All Output Can Be Computed #####

  if ( ci.method == "parametric" & ( is.na(se.M) | is.na(se.t2) ) ) message("Cannot compute parametric inference without se.M and \nse.t2.\n Returning only point estimates.\n")
  if ( is.null(dat) | is.na(M) | is.na(t2) ) message("Cannot report Shapiro normality test without dat, M, and t2.\n Returning NA for Shapiro p-value.\n")

  if ( ( estimate.method == "calibrated" | ci.method == "calibrated" ) &
       is.null(dat) ) stop("Cannot use calibrated methods without providing dat.")

  ##### Get Point Estimate #####

  if ( estimate.method == "parametric" ) {

    # same regardless of tail
    Z = (q - M) / sqrt(t2)

    if ( tail == "above" ) phat = 1 - pnorm(Z)
    else if ( tail == "below" ) phat = pnorm(Z)
  }

  if ( estimate.method == "calibrated" ) {
    # use DL method by default
    calib = calib_ests( yi = dat[[yi.name]],
                        sei = sqrt(dat[[vi.name]]),
                        calib.est.method )

    if ( tail == "above" ) phat = sum(calib > c(q)) / length(calib)
    if ( tail == "below" ) phat = sum(calib < c(q)) / length(calib)
  }

  ##### Inference #####

  # in case we don't bootstrap
  bt.mn = NA

  if ( ci.method == "parametric") {

    warning("Warning: You chose ci.method = 'parametric'.\nIt is almost always better to use ci.method = 'calibrated'.\n")

    # is point estimate extreme enough to require bootstrap?
    extreme = (phat < 0.15 | phat > 0.85)

    if ( extreme ) {
      if ( bootstrap == "ifneeded" ) {
        message("The estimated proportion is close to 0 or 1,\n so the theoretical CI may perform poorly.\nUsing bootstrapping instead.\n")

        # more sanity checks
        if ( is.null(dat) ) stop("Must provide dat in order to bootstrap.")
        if ( !yi.name %in% names(dat) ) stop("dat must contain variable called yi.name.")
        if ( !vi.name %in% names(dat) ) stop("dat must contain variable called vi.name.")

        bootCIs = lo = hi = SE = NULL

        boot.res = suppressWarnings( safe_boot( data = dat,
                                                parallel = "multicore",
                                                R = R,
                                                statistic = get_stat,  # get_stat internally handles clustering
                                                # below arguments are being passed to get_stat
                                                q = q,
                                                tail = tail,
                                                yi.name = yi.name,
                                                vi.name = vi.name ) )

        # if BCa fails (due to infinite w adjustment issue), use percentile instead
        boot.values = tryCatch( {

          bootCIs = boot.ci(boot.res,
                            type="bca",
                            conf = ci.level )
          lo = round( bootCIs$bca[4], 2 )
          hi = round( bootCIs$bca[5], 2 )
          SE = sd(boot.res$t)
          bt.mn = mean(boot.res$t)

          c(lo, hi, SE, bt.mn)  # these will be in the vector boot.values

        }, error = function(err) {
          warning("Had problems computing BCa CI. Using percentile method instead.")

          bootCIs = boot.ci(boot.res,
                            type="perc",
                            conf = ci.level )

          lo = round( bootCIs$perc[4], 2 )

          hi = round( bootCIs$perc[5], 2 )

          SE = sd(boot.res$t)
          c(lo, hi, SE, bt.mn)
        }
        )

        lo = boot.values[1]
        hi = boot.values[2]
        SE = boot.values[3]
        bt.mn = boot.values[4]

      } # end loop for boot == "ifneeded"

      if ( bootstrap == "never" ) {
        warning("The estimated proportion is close to 0 or 1,\n so the theoretical CI may perform poorly. Should use \nBCa bootstrapping instead.")
        SE = lo = hi = NA
      }
    }


    if ( !extreme ) {
      # do inference only if given needed SEs
      if ( !is.na(se.M) & !is.na(se.t2) ){

        ##### Delta Method Inference on Original Scale #####
        term1.1 = se.M^2 / t2
        term1.2 = ( se.t2^2 * ( q - M )^2 ) / ( 4 * t2^3 )
        term1 = sqrt( term1.1 + term1.2 )

        SE = term1 * dnorm(Z)

        # confidence interval
        tail.prob = ( 1 - ci.level ) / 2
        lo = max( 0, phat + qnorm( tail.prob )*SE )
        hi = min( 1, phat - qnorm( tail.prob )*SE )
      } else {
        SE = lo = hi = NA
      }
    }

  } # end ci.method == "parametric"

  if ( ci.method == "calibrated") {

    # more sanity checks
    if ( is.null(dat) ) stop("Must provide dat in order to use ci.method = 'calibrated'.")
    if ( !yi.name %in% names(dat) ) stop("dat must contain variable called yi.name.")
    if ( !vi.name %in% names(dat) ) stop("dat must contain variable called vi.name.")

    bootCIs = lo = hi = SE = NULL

    # to easily allow for cluster-bootstrapping, nest the data
    # now has one row per cluster
    # works whether there is clustering or not
    # because without clustering, the clusters are 1:nrow(data)
    # prop_stronger creates a variable that is always called "cluster"
    #bm: utils::globalVariables(c("cluster", "data"))  # doesn't work


    datNest = dat %>% group_nest(.data$cluster)

    boot.res = suppressWarnings( safe_boot( data = datNest,
                                            parallel = "multicore",
                                            R = R,
                                            statistic = function(original, indices) {

                                              # resample clusters with replacement
                                              bNest = original[indices,]
                                              b = bNest %>% unnest(.data$data)

                                              calib.b = calib_ests( yi = b[[yi.name]],
                                                                    sei = sqrt(b[[vi.name]]),
                                                                    calib.est.method)

                                              if ( tail == "above" ) phatb = sum(calib.b > c(q)) / length(calib.b)
                                              if ( tail == "below" ) phatb = sum(calib.b < c(q)) / length(calib.b)
                                              return(phatb)
                                            } ) )

    # catch BCa failures (usually due to infinite w adjustment issue)
    boot.values = tryCatch( {

      bootCIs = boot.ci(boot.res,
                        type="bca",
                        conf = ci.level )
      lo = round( bootCIs$bca[4], 2 )
      hi = round( bootCIs$bca[5], 2 )
      SE = sd(boot.res$t)
      bt.mn = mean(boot.res$t)

      c(lo, hi, SE, bt.mn)

    }, error = function(err) {
      warning("\nHad problems computing BCa CI. \nThis typically happens when the estimated proportion is close to 0 or 1 and the number of studies is small.\nYou could try choosing q closer to the pooled point estimate.")
      boot.values = c(NA, NA, NA, NA)  # ~~~ added one more here
    }
    )

    lo = boot.values[1]
    hi = boot.values[2]
    SE = boot.values[3]
    bt.mn = boot.values[4]

  } # end ci.method == "calibrated"

  if (ci.method == "sign.test") {

    warning("\n\nWarning: You are estimating a CI using the sign test method.\nThis method only works well with high heterogeneity and point estimates that are\n relatively symmetric and unimodal.\nThis method does not provide a standard error estimate, only CI limits.")

    Phat.np = prop_stronger_sign(q = q,
                                 yi = dat[[yi.name]],
                                 vi = dat[[vi.name]],
                                 ci.level = ci.level,
                                 tail = tail,
                                 R = R,
                                 return.vectors = FALSE )

    lo = Phat.np$lo
    hi = Phat.np$hi
    SE = NA  # this method doesn't give an SE
  }

  ##### Normality Check #####
  if ( !is.null(dat) & !is.na(M) & !is.na(t2) ) {
    if ( !yi.name %in% names(dat) ) stop("dat must contain variable called yi.name.")
    if ( !vi.name %in% names(dat) ) stop("dat must contain variable called vi.name.")

    # normalized point estimates
    yi.norm = ( dat[[yi.name]] - c(M) ) / sqrt( dat[[vi.name]] + c(t2) )

    # Shapiro-Wilk test
    shapiro.pval = shapiro.test( yi.norm )$p.value
  } else {
    shapiro.pval = NA
  }

  # return results
  res = data.frame( est = phat,
                    se = SE,
                    lo,
                    hi,
                    bt.mn,
                    shapiro.pval )
  rownames(res) = NULL
  res
}




# ~~~~~~~~~~~~~~~~~~~~~~ INTERNAL FUNCTIONS FOR BOOTSTRAPPING ~~~~~~~~~~~~~~~~~~~~~~ #

################################### GET P-HAT STAT FOR USE WITH BOOT PACKAGE ###################################

# Internal function not intended for the user. Meta-analyzes a bootstrap sample and estimates the proportion of effects stronger than q.
# This is a "safe" function that does not return an error if estimation fails so that bootstrapping
# can still proceed.

# original = The original dataset
# indices = indices for bootstrap resample
# method = Method of meta-analysis model-fitting passed to code{metafor::rma.uni}
get_stat = function( original,
                     indices,
                     q,
                     tail,
                     method = "REML",
                     yi.name = "yi",
                     vi.name = "vi",
                     ci.level ) {

  b = original[indices,]

  # tryCatch in case meta-analysis runs into Fisher convergence problems
  phatb = tryCatch( {

    # meta-analyze the bootstrapped data
    mb = rma.uni( yi = b[[yi.name]],
                  vi = b[[vi.name]],
                  knha = TRUE,
                  method = method,
                  # last argument helps prevent convergence problems
                  control=list(stepadj=0.5) )

    Mb = mb$b
    t2b = mb$tau2

    Z = (q - Mb) / sqrt(t2b)

    if ( tail == "above" ) phatb = 1 - pnorm(Z)
    else if ( tail == "below" ) phatb = pnorm(Z)

  }, error = function(err) {
    message(err)
    return(NA)
  } )

  if (!is.null(phatb)) return(phatb)
  #if(got.error == FALSE) return(phatb)

}



################################### MODIFIED FROM BOOT PACKAGE ###################################

# see section called "our additions"
# we minimally modified the function so that it can proceed even if some of the bootstrap iterates run into errors
# (in this case, Fisher convergence issues) because the boot package version gets confused about dimension mismatches

safe_boot = function (data, statistic, R, sim = "ordinary", stype = c("i",
                                                                      "f", "w"), strata = rep(1, n), L = NULL, m = 0, weights = NULL,
                      ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...,
                      parallel = c("no", "multicore", "snow"), ncpus = getOption("boot.ncpus",
                                                                                 1L), cl = NULL)
{


  call <- match.call()
  stype <- match.arg(stype)
  if (missing(parallel))
    parallel <- getOption("boot.parallel", "no")
  parallel <- match.arg(parallel)
  have_mc <- have_snow <- FALSE
  if (parallel != "no" && ncpus > 1L) {
    if (parallel == "multicore")
      have_mc <- .Platform$OS.type != "windows"
    else if (parallel == "snow")
      have_snow <- TRUE
    if (!have_mc && !have_snow)
      ncpus <- 1L
    loadNamespace("parallel")
  }
  if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
    warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0', so ignored")
    simple <- FALSE
  }
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE))
    runif(1)
  seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
  n <- NROW(data)
  if ((n == 0) || is.null(n))
    stop("no data in call to 'boot'")
  temp.str <- strata
  strata <- tapply(seq_len(n), as.numeric(strata))
  t0 <- if (sim != "parametric") {
    if ((sim == "antithetic") && is.null(L))
      L <- empinf(data = data, statistic = statistic, stype = stype,
                  strata = strata, ...)
    if (sim != "ordinary")
      m <- 0
    else if (any(m < 0))
      stop("negative value of 'm' supplied")
    if ((length(m) != 1L) && (length(m) != length(table(strata))))
      stop("length of 'm' incompatible with 'strata'")
    if ((sim == "ordinary") || (sim == "balanced")) {
      if (isMatrix(weights) && (nrow(weights) != length(R)))
        stop("dimensions of 'R' and 'weights' do not match")
    }
    else weights <- NULL
    if (!is.null(weights))
      weights <- t(apply(matrix(weights, n, length(R),
                                byrow = TRUE), 2L, normalize, strata))
    if (!simple)
      i <- index.array(n, R, sim, strata, m, L, weights)
    original <- if (stype == "f")
      rep(1, n)
    else if (stype == "w") {
      ns <- tabulate(strata)[strata]
      1/ns
    }
    else seq_len(n)
    t0 <- if (sum(m) > 0L)
      statistic(data, original, rep(1, sum(m)), ...)
    else statistic(data, original, ...)
    rm(original)
    t0
  }
  else statistic(data, ...)
  pred.i <- NULL
  fn <- if (sim == "parametric") {
    ran.gen
    data
    mle
    function(r) {
      dd <- ran.gen(data, mle)
      statistic(dd, ...)
    }
  }
  else {
    if (!simple && ncol(i) > n) {
      pred.i <- as.matrix(i[, (n + 1L):ncol(i)])
      i <- i[, seq_len(n)]
    }
    if (stype %in% c("f", "w")) {
      f <- freq.array(i)
      rm(i)
      if (stype == "w")
        f <- f/ns
      if (sum(m) == 0L)
        function(r) statistic(data, f[r, ], ...)
      else function(r) statistic(data, f[r, ], pred.i[r,
      ], ...)
    }
    else if (sum(m) > 0L)
      function(r) statistic(data, i[r, ], pred.i[r, ],
                            ...)
    else if (simple)
      function(r) statistic(data, index.array(n, 1, sim,
                                              strata, m, L, weights), ...)
    else function(r) statistic(data, i[r, ], ...)
  }
  RR <- sum(R)
  res <- if (ncpus > 1L && (have_mc || have_snow)) {
    if (have_mc) {
      parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
    }
    else if (have_snow) {
      list(...)
      if (is.null(cl)) {
        cl <- parallel::makePSOCKcluster(rep("localhost",
                                             ncpus))
        if (RNGkind()[1L] == "L'Ecuyer-CMRG")
          parallel::clusterSetRNGStream(cl)
        res <- parallel::parLapply(cl, seq_len(RR), fn)
        parallel::stopCluster(cl)
        res
      }
      else parallel::parLapply(cl, seq_len(RR), fn)
    }
  }
  else lapply(seq_len(RR), fn)
  #t.star <- matrix(, RR, length(t0))  # ~~~ MM commented out


  # ~~~~~ beginning of our additions
  # number of non-NULL elements of the results vector
  RR = length(unlist(res))
  nulls = sapply( res, is.null)
  res = res[ !nulls ]
  t.star <- matrix(, RR, length(t0))

  # if some reps failed, warn user
  if ( sum(RR) < R ) warning( "Only ", sum(RR), " of ", R, " bootstrap reps managed to fit the meta-analysis successfully. Discarding all those that did not.")

  # without this, boot.CI gets confused about number of replicates
  R = RR
  # ~~~~~ end of our additions


  for (r in seq_len(RR)) t.star[r, ] <- res[[r]]
  if (is.null(weights))
    weights <- 1/tabulate(strata)[strata]
  boot.return(sim, t0, t.star, temp.str, R, data, statistic,
              stype, call, seed, L, m, pred.i, weights, ran.gen, mle)
}


################################### UNMODIFIED INTERNAL FUNCTIONS FROM BOOT PACKAGE ###################################

# these functions need to be loaded directly in order for safe_boot to work

# part of R package boot
# copyright (C) 1997-2001 Angelo J. Canty
# corrections (C) 1997-2014 B. D. Ripley
#
# Unlimited distribution is permitted

# safe version of sample
# needs R >= 2.9.0
# only works if size is not specified in R >= 2.11.0, but it always is in boot
sample0 <- function(x, ...) x[sample.int(length(x), ...)]
bsample <- function(x, ...) x[sample.int(length(x), replace = TRUE, ...)]

isMatrix <- function(x) length(dim(x)) == 2L

## random permutation of x.
rperm <- function(x) sample0(x, length(x))



antithetic.array <- function(n, R, L, strata)
  #
  #  Create an array of indices by antithetic resampling using the
  #  empirical influence values in L.  This function just calls anti.arr
  #  to do the sampling within strata.
  #
{
  inds <- as.integer(names(table(strata)))
  out <- matrix(0L, R, n)
  for (s in inds) {
    gp <- seq_len(n)[strata == s]
    out[, gp] <- anti.arr(length(gp), R, L[gp], gp)
  }
  out
}

anti.arr <- function(n, R, L, inds=seq_len(n))
{
  #  R x n array of bootstrap indices, generated antithetically
  #  according to the empirical influence values in L.
  unique.rank <- function(x) {
    # Assign unique ranks to a numeric vector
    ranks <- rank(x)
    if (any(duplicated(ranks))) {
      inds <- seq_along(x)
      uniq <- sort(unique(ranks))
      tab <- table(ranks)
      for (i in seq_along(uniq))
        if (tab[i] > 1L) {
          gp <- inds[ranks == uniq[i]]
          ranks[gp] <- rperm(inds[sort(ranks) == uniq[i]])
        }
    }
    ranks
  }
  R1 <- floor(R/2)
  mat1 <- matrix(bsample(inds, R1*n), R1, n)
  ranks <- unique.rank(L)
  rev <- inds
  for (i in seq_len(n)) rev[i] <- inds[ranks == (n+1-ranks[i])]
  mat1 <- rbind(mat1, matrix(rev[mat1], R1, n))
  if (R != 2*R1) mat1 <- rbind(mat1, bsample(inds, n))
  mat1
}




balanced.array <- function(n, R, strata)
{
  #
  # R x n array of bootstrap indices, sampled hypergeometrically
  # within strata.
  #
  output <- matrix(rep(seq_len(n), R), n, R)
  inds <- as.integer(names(table(strata)))
  for(is in inds) {
    group <- seq_len(n)[strata == is]
    if(length(group) > 1L) {
      g <- matrix(rperm(output[group,  ]), length(group), R)
      output[group,  ] <- g
    }
  }
  t(output)
}

boot <- function(data, statistic, R, sim = "ordinary",
                 stype = c("i", "f", "w"),
                 strata  =  rep(1, n), L = NULL, m = 0, weights = NULL,
                 ran.gen = function(d, p) d, mle = NULL, simple = FALSE, ...,
                 parallel = c("no", "multicore", "snow"),
                 ncpus = getOption("boot.ncpus", 1L), cl = NULL)
{
  #
  # R replicates of bootstrap applied to  statistic(data)
  # Possible sim values are: "ordinary", "balanced", "antithetic",
  #                     "permutation", "parametric"
  # Various auxilliary functions find the indices to be used for the
  # bootstrap replicates and then this function loops over those replicates.
  #
  call <- match.call()
  stype <- match.arg(stype)
  if (missing(parallel)) parallel <- getOption("boot.parallel", "no")
  parallel <- match.arg(parallel)
  have_mc <- have_snow <- FALSE
  if (parallel != "no" && ncpus > 1L) {
    if (parallel == "multicore") have_mc <- .Platform$OS.type != "windows"
    else if (parallel == "snow") have_snow <- TRUE
    if (!have_mc && !have_snow) ncpus <- 1L
    loadNamespace("parallel") # get this out of the way before recording seed
  }
  if (simple && (sim != "ordinary" || stype != "i" || sum(m))) {
    warning("'simple=TRUE' is only valid for 'sim=\"ordinary\", stype=\"i\", n=0', so ignored")
    simple <- FALSE
  }
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
  seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
  n <- NROW(data)
  if ((n == 0) || is.null(n))
    stop("no data in call to 'boot'")
  temp.str <- strata
  strata <- tapply(seq_len(n),as.numeric(strata))
  t0 <- if (sim != "parametric") {
    if ((sim == "antithetic") && is.null(L))
      L <- empinf(data = data, statistic = statistic,
                  stype = stype, strata = strata, ...)
    if (sim != "ordinary") m <- 0
    else if (any(m < 0)) stop("negative value of 'm' supplied")
    if ((length(m) != 1L) && (length(m) != length(table(strata))))
      stop("length of 'm' incompatible with 'strata'")
    if ((sim == "ordinary") || (sim == "balanced")) {
      if (isMatrix(weights) && (nrow(weights) != length(R)))
        stop("dimensions of 'R' and 'weights' do not match")}
    else weights <- NULL
    if (!is.null(weights))
      weights <- t(apply(matrix(weights, n, length(R), byrow = TRUE),
                         2L, normalize, strata))
    if (!simple) i <- index.array(n, R, sim, strata, m, L, weights)

    original <- if (stype == "f") rep(1, n)
    else if (stype == "w") {
      ns <- tabulate(strata)[strata]
      1/ns
    } else seq_len(n)

    t0 <- if (sum(m) > 0L) statistic(data, original, rep(1, sum(m)), ...)
    else statistic(data, original, ...)
    rm(original)
    t0
  } else # "parametric"
    statistic(data, ...)

  pred.i <- NULL
  fn <- if (sim == "parametric") {
    ## force promises, so values get sent by parallel
    ran.gen; data; mle
    function(r) {
      dd <- ran.gen(data, mle)
      statistic(dd, ...)
    }
  } else {
    if (!simple && ncol(i) > n) {
      pred.i <- as.matrix(i[ , (n+1L):ncol(i)])
      i <- i[, seq_len(n)]
    }
    if (stype %in% c("f", "w")) {
      f <- freq.array(i)
      rm(i)
      if (stype == "w") f <- f/ns
      if (sum(m) == 0L) function(r) statistic(data, f[r,  ], ...)
      else function(r) statistic(data, f[r, ], pred.i[r, ], ...)
    } else if (sum(m) > 0L)
      function(r) statistic(data, i[r, ], pred.i[r,], ...)
    else if (simple)
      function(r)
        statistic(data,
                  index.array(n, 1, sim, strata, m, L, weights), ...)
    else function(r) statistic(data, i[r, ], ...)
  }
  RR <- sum(R)
  res <- if (ncpus > 1L && (have_mc || have_snow)) {
    if (have_mc) {
      parallel::mclapply(seq_len(RR), fn, mc.cores = ncpus)
    } else if (have_snow) {
      list(...) # evaluate any promises
      if (is.null(cl)) {
        cl <- parallel::makePSOCKcluster(rep("localhost", ncpus))
        if(RNGkind()[1L] == "L'Ecuyer-CMRG")
          parallel::clusterSetRNGStream(cl)
        res <- parallel::parLapply(cl, seq_len(RR), fn)
        parallel::stopCluster(cl)
        res
      } else parallel::parLapply(cl, seq_len(RR), fn)
    }
  } else lapply(seq_len(RR), fn)
  t.star <- matrix(, RR, length(t0))
  for(r in seq_len(RR)) t.star[r, ] <- res[[r]]

  if (is.null(weights)) weights <- 1/tabulate(strata)[strata]
  boot0 <- boot.return(sim, t0, t.star, temp.str, R, data, statistic,
                       stype, call,
                       seed, L, m, pred.i, weights, ran.gen, mle)
  attr(boot0, "boot_type") <- "boot"
  boot0
}

normalize <- function(wts, strata)
{
  #
  # Normalize a vector of weights to sum to 1 within each strata.
  #
  n <- length(strata)
  out <- wts
  inds <- as.integer(names(table(strata)))
  for (is in inds) {
    gp <- seq_len(n)[strata == is]
    out[gp] <- wts[gp]/sum(wts[gp]) }
  out
}

boot.return <- function(sim, t0, t, strata, R, data, stat, stype, call,
                        seed, L, m, pred.i, weights, ran.gen, mle)
  #
  # Return the results of a bootstrap in the form of an object of class
  # "boot".
  #
{
  out <- list(t0=t0, t=t, R=R, data=data, seed=seed,
              statistic=stat, sim=sim, call=call)
  if (sim == "parametric")
    out <- c(out, list(ran.gen=ran.gen, mle=mle))
  else if (sim == "antithetic")
    out <- c(out, list(stype=stype, strata=strata, L=L))
  else if (sim == "ordinary") {
    if (sum(m) > 0)
      out <- c(out, list(stype=stype, strata=strata,
                         weights=weights, pred.i=pred.i))
    else 	out <- c(out, list(stype=stype, strata=strata,
                             weights=weights))
  } else if (sim == "balanced")
    out <- c(out, list(stype=stype, strata=strata,
                       weights=weights ))
  else
    out <- c(out, list(stype=stype, strata=strata))
  class(out) <- "boot"
  out
}

c.boot <- function (..., recursive = TRUE)
{
  args <- list(...)
  nm <- lapply(args, names)
  if (!all(sapply(nm, function(x) identical(x, nm[[1]]))))
    stop("arguments are not all the same type of \"boot\" object")
  res <- args[[1]]
  res$R <- sum(sapply(args, "[[", "R"))
  res$t <- do.call(rbind, lapply(args, "[[", "t"))
  res
}

boot.array <- function(boot.out, indices=FALSE) {
  #
  #  Return the frequency or index array for the bootstrap resamples
  #  used in boot.out
  #  This function recreates such arrays from the information in boot.out
  #
  if (exists(".Random.seed", envir=.GlobalEnv, inherits = FALSE))
    temp <- get(".Random.seed", envir=.GlobalEnv, inherits = FALSE)
  else temp<- NULL
  assign(".Random.seed",  boot.out$seed, envir=.GlobalEnv)
  n <- NROW(boot.out$data)
  R <- boot.out$R
  sim <- boot.out$sim
  type <- find_type(boot.out)
  if (type == "tsboot") {
    #  Recreate the array for an object created by tsboot, The default for
    #  such objects is to return the index array unless index is specifically
    #  passed as F
    if (missing(indices)) indices <- TRUE
    if (sim == "model")
      stop("index array not defined for model-based resampling")
    n.sim <- boot.out$n.sim
    i.a <- ts.array(n, n.sim, R, boot.out$l,
                    sim, boot.out$endcorr)
    out <- matrix(NA,R,n.sim)
    for(r in seq_len(R)) {
      if (sim == "geom")
        ends <- cbind(i.a$starts[r,  ],
                      i.a$lengths[r,  ])
      else
        ends <- cbind(i.a$starts[r,], i.a$lengths)
      inds <- apply(ends, 1L, make.ends, n)
      if (is.list(inds))
        inds <- unlist(inds)[seq_len(n.sim)]
      out[r,] <- inds
    }
  }
  else if (type == "censboot") {
    #  Recreate the array for an object created by censboot as long
    #  as censboot was called with sim = "ordinary"
    if (sim == "ordinary") {
      strata <- tapply(seq_len(n), as.numeric(boot.out$strata))
      out <- cens.case(n,strata,R)
    }
    else	stop("boot.array not implemented for this object")
  }
  else {
    #  Recreate the array for objects created by boot or tilt.boot
    if (sim == "parametric")
      stop("array cannot be found for parametric bootstrap")
    strata <- tapply(seq_len(n),as.numeric(boot.out$strata))
    if (find_type(boot.out) == "tilt.boot")
      weights <- boot.out$weights
    else {
      weights <- boot.out$call$weights
      if (!is.null(weights))
        weights <- boot.out$weights
    }
    out <- index.array(n, R, sim, strata, 0, boot.out$L, weights)
  }
  if (!indices) out <- freq.array(out)
  if (!is.null(temp)) assign(".Random.seed", temp, envir=.GlobalEnv)
  else rm(.Random.seed, pos=1)
  out
}

# plot.boot <- function(x,index=1, t0=NULL, t=NULL, jack=FALSE,
#                       qdist="norm",nclass=NULL,df, ...) {
#   #
#   #  A plot method for bootstrap output objects.  It produces a histogram
#   #  of the bootstrap replicates and a QQ plot of them.  Optionally it can
#   #  also produce a jackknife-after-bootstrap plot.
#   #
#   boot.out <- x
#   t.o <- t
#   if (is.null(t)) {
#     t <- boot.out$t[,index]
#     if (is.null(t0)) t0 <- boot.out$t0[index]
#   }
#   t <- t[is.finite(t)]
#   if (const(t, min(1e-8,mean(t, na.rm=TRUE)/1e6))) {
#     print(paste("All values of t* are equal to ", mean(t, na.rm=TRUE)))
#     return(invisible(boot.out))
#   }
#   if (is.null(nclass)) nclass <- min(max(ceiling(length(t)/25),10),100)
#   if (!is.null(t0)) {
#     #  Calculate the breakpoints for the histogram so that one of them is
#     #  exactly t0.
#     rg <- range(t)
#     if (t0<rg[1L]) rg[1L] <- t0
#     else if (t0 >rg[2L]) rg[2L] <- t0
#     rg <- rg+0.05*c(-1,1)*diff(rg)
#     lc <- diff(rg)/(nclass-2)
#     n1 <- ceiling((t0-rg[1L])/lc)
#     n2 <- ceiling((rg[2L]-t0)/lc)
#     bks <- t0+(-n1:n2)*lc
#   }
#   R <- boot.out$R
#   if (qdist == "chisq") {
#     qq <- qchisq((seq_len(R))/(R+1),df=df)
#     qlab <- paste("Quantiles of Chi-squared(",df,")",sep="")
#   }
#   else {
#     if (qdist!="norm")
#       warning(gettextf("%s distribution not supported: using normal instead", sQuote(qdist)), domain = NA)
#     qq <- qnorm((seq_len(R))/(R+1))
#     qlab <-"Quantiles of Standard Normal"
#   }
#   if (jack) {
#     layout(mat = matrix(c(1,2,3,3), 2L, 2L, byrow=TRUE))
#     if (is.null(t0))
#       hist(t,nclass=nclass,probability=TRUE,xlab="t*")
#     else	hist(t,breaks=bks,probability=TRUE,xlab="t*")
#     if (!is.null(t0)) abline(v=t0,lty=2)
#     qqplot(qq,t,xlab=qlab,ylab="t*")
#     if (qdist == "norm") abline(mean(t),sqrt(var(t)),lty=2)
#     else abline(0,1,lty=2)
#     jack.after.boot(boot.out,index=index,t=t.o, ...)
#   }
#   else {
#     par(mfrow=c(1,2))
#     if (is.null(t0))
#       hist(t,nclass=nclass,probability=TRUE,xlab="t*")
#     else	hist(t,breaks=bks,probability=TRUE,xlab="t*")
#     if (!is.null(t0)) abline(v=t0,lty=2)
#     qqplot(qq,t,xlab=qlab,ylab="t*")
#     if (qdist == "norm") abline(mean(t),sqrt(var(t)),lty=2)
#     else abline(0,1,lty=2)
#   }
#   par(mfrow=c(1,1))
#   invisible(boot.out)
# }

print.boot <- function(x, digits = getOption("digits"),
                       index = 1L:ncol(boot.out$t), ...)
{
  #
  # Print the output of a bootstrap
  #
  boot.out <- x
  sim <- boot.out$sim
  cl <- boot.out$call
  t <- matrix(boot.out$t[, index], nrow = nrow(boot.out$t))
  allNA <- apply(t,2L,function(t) all(is.na(t)))
  ind1 <- index[allNA]
  index <- index[!allNA]
  t <- matrix(t[, !allNA], nrow = nrow(t))
  rn <- paste("t",index,"*",sep="")
  if (length(index) == 0L)
    op <- NULL
  else if (is.null(t0 <- boot.out$t0)) {
    if (is.null(boot.out$call$weights))
      op <- cbind(apply(t,2L,mean,na.rm=TRUE),
                  sqrt(apply(t,2L,function(t.st) var(t.st[!is.na(t.st)]))))
    else {
      op <- NULL
      for (i in index)
        op <- rbind(op, imp.moments(boot.out,index=i)$rat)
      op[,2L] <- sqrt(op[,2])
    }
    dimnames(op) <- list(rn,c("mean", "std. error"))
  }
  else {
    t0 <- boot.out$t0[index]
    if (is.null(boot.out$call$weights)) {
      op <- cbind(t0,apply(t,2L,mean,na.rm=TRUE)-t0,
                  sqrt(apply(t,2L,function(t.st) var(t.st[!is.na(t.st)]))))
      dimnames(op) <- list(rn, c("original"," bias  "," std. error"))
    }
    else {
      op <- NULL
      for (i in index)
        op <- rbind(op, imp.moments(boot.out,index=i)$rat)
      op <- cbind(t0,op[,1L]-t0,sqrt(op[,2L]),
                  apply(t,2L,mean,na.rm=TRUE))
      dimnames(op) <- list(rn,c("original", " bias  ",
                                " std. error", " mean(t*)"))
    }
  }
  type <- find_type(boot.out)
  if (type == "boot") {
    if (sim == "parametric")
      cat("\nPARAMETRIC BOOTSTRAP\n\n")
    else if (sim == "antithetic") {
      if (is.null(cl$strata))
        cat("\nANTITHETIC BOOTSTRAP\n\n")
      else
        cat("\nSTRATIFIED ANTITHETIC BOOTSTRAP\n\n")
    }
    else if (sim == "permutation") {
      if (is.null(cl$strata))
        cat("\nDATA PERMUTATION\n\n")
      else
        cat("\nSTRATIFIED DATA PERMUTATION\n\n")
    }
    else if (sim == "balanced") {
      if (is.null(cl$strata) && is.null(cl$weights))
        cat("\nBALANCED BOOTSTRAP\n\n")
      else if (is.null(cl$strata))
        cat("\nBALANCED WEIGHTED BOOTSTRAP\n\n")
      else if (is.null(cl$weights))
        cat("\nSTRATIFIED BALANCED BOOTSTRAP\n\n")
      else
        cat("\nSTRATIFIED WEIGHTED BALANCED BOOTSTRAP\n\n")
    }
    else {
      if (is.null(cl$strata) && is.null(cl$weights))
        cat("\nORDINARY NONPARAMETRIC BOOTSTRAP\n\n")
      else if (is.null(cl$strata))
        cat("\nWEIGHTED BOOTSTRAP\n\n")
      else if (is.null(cl$weights))
        cat("\nSTRATIFIED BOOTSTRAP\n\n")
      else
        cat("\nSTRATIFIED WEIGHTED BOOTSTRAP\n\n")
    }
  }
  else if (type == "tilt.boot") {
    R <- boot.out$R
    th <- boot.out$theta
    if (sim == "balanced")
      cat("\nBALANCED TILTED BOOTSTRAP\n\n")
    else	cat("\nTILTED BOOTSTRAP\n\n")
    if ((R[1L] == 0) || is.null(cl$tilt) || eval(cl$tilt))
      cat("Exponential tilting used\n")
    else	cat("Frequency Smoothing used\n")
    i1 <- 1
    if (boot.out$R[1L]>0)
      cat(paste("First",R[1L],"replicates untilted,\n"))
    else {
      cat(paste("First ",R[2L]," replicates tilted to ",
                signif(th[1L],4),",\n",sep=""))
      i1 <- 2
    }
    if (i1 <= length(th)) {
      for (j in i1:length(th))
        cat(paste("Next ",R[j+1L]," replicates tilted to ",
                  signif(th[j],4L),
                  ifelse(j!=length(th),",\n",".\n"),sep=""))
    }
    op <- op[, 1L:3L]
  }
  else if (type == "tsboot") {
    if (!is.null(cl$indices))
      cat("\nTIME SERIES BOOTSTRAP USING SUPPLIED INDICES\n\n")
    else if (sim == "model")
      cat("\nMODEL BASED BOOTSTRAP FOR TIME SERIES\n\n")
    else if (sim == "scramble") {
      cat("\nPHASE SCRAMBLED BOOTSTRAP FOR TIME SERIES\n\n")
      if (boot.out$norm)
        cat("Normal margins used.\n")
      else
        cat("Observed margins used.\n")
    }
    else if (sim == "geom") {
      if (is.null(cl$ran.gen))
        cat("\nSTATIONARY BOOTSTRAP FOR TIME SERIES\n\n")
      else
        cat(paste("\nPOST-BLACKENED STATIONARY",
                  "BOOTSTRAP FOR TIME SERIES\n\n"))
      cat(paste("Average Block Length of",boot.out$l,"\n"))
    }
    else {
      if (is.null(cl$ran.gen))
        cat("\nBLOCK BOOTSTRAP FOR TIME SERIES\n\n")
      else
        cat(paste("\nPOST-BLACKENED BLOCK",
                  "BOOTSTRAP FOR TIME SERIES\n\n"))
      cat(paste("Fixed Block Length of",boot.out$l,"\n"))
    }
  }
  else if (type == "censboot") {
    cat("\n")
    if (sim == "weird") {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("WEIRD BOOTSTRAP FOR CENSORED DATA\n\n")
    }
    else if ((sim == "ordinary") ||
             ((sim == "model") && is.null(boot.out$cox))) {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("CASE RESAMPLING BOOTSTRAP FOR CENSORED DATA\n\n")
    }
    else if (sim == "model") {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("MODEL BASED BOOTSTRAP FOR COX REGRESSION MODEL\n\n")
    }
    else if (sim == "cond") {
      if (!is.null(cl$strata)) cat("STRATIFIED ")
      cat("CONDITIONAL BOOTSTRAP ")
      if (is.null(boot.out$cox))
        cat("FOR CENSORED DATA\n\n")
      else
        cat("FOR COX REGRESSION MODEL\n\n")
    }
  } else warning('unknown type of "boot" object')
  cat("\nCall:\n")
  dput(cl, control=NULL)
  cat("\n\nBootstrap Statistics :\n")
  if (!is.null(op)) print(op,digits=digits)
  if (length(ind1) > 0L)
    for (j in ind1)
      cat(paste("WARNING: All values of t", j, "* are NA\n", sep=""))
  invisible(boot.out)
}




corr <- function(d, w=rep(1,nrow(d))/nrow(d))
{
  #  The correlation coefficient in weighted form.
  s <- sum(w)
  m1 <- sum(d[, 1L] * w)/s
  m2 <- sum(d[, 2L] * w)/s
  (sum(d[, 1L] * d[, 2L] * w)/s - m1 * m2)/sqrt((sum(d[, 1L]^2 * w)/s - m1^2) * (sum(d[, 2L]^2 * w)/s - m2^2))
}


extra.array <- function(n, R, m, strata=rep(1,n))
{
  #
  # Extra indices for predictions.  Can only be used with
  # types "ordinary" and "stratified".  For type "ordinary"
  # m is a positive integer.  For type "stratified" m can
  # be a positive integer or a vector of the same length as
  # strata.
  #
  if (length(m) == 1L)
    output <- matrix(sample.int(n, m*R, replace=TRUE), R, m)
  else {
    inds <- as.integer(names(table(strata)))
    output <- matrix(NA, R, sum(m))
    st <- 0
    for (i in inds) {
      if (m[i] > 0) {
        gp <- seq_len(n)[strata == i]
        inds1 <- (st+1):(st+m[i])
        output[,inds1] <- matrix(bsample(gp, R*m[i]), R, m[i])
        st <- st+m[i]
      }
    }
  }
  output
}

freq.array <- function(i.array)
{
  #
  # converts R x n array of bootstrap indices into
  # R X n array of bootstrap frequencies
  #
  result <- NULL
  n <- ncol(i.array)
  result <- t(apply(i.array, 1, tabulate, n))
  result
}



importance.array <- function(n, R, weights, strata){
  #
  #  Function to do importance resampling  within strata based
  #  on the weights supplied.  If weights is a matrix with n columns
  #  R must be a vector of length nrow(weights) otherwise weights
  #  must be a vector of length n and R must be a scalar.
  #
  imp.arr <- function(n, R, wts, inds=seq_len(n))
    matrix(bsample(inds, n*R, prob=wts), R, n)
  output <- NULL
  if (!isMatrix(weights))
    weights <- matrix(weights, nrow=1)
  inds <- as.integer(names(table(strata)))
  for (ir in seq_along(R)) {
    out <- matrix(rep(seq_len(n), R[ir]), R[ir], n, byrow=TRUE)
    for (is in inds) {
      gp <- seq_len(n)[strata == is]
      out[, gp] <- imp.arr(length(gp), R[ir],
                           weights[ir,gp], gp)
    }
    output <- rbind(output, out)
  }
  output
}

importance.array.bal <- function(n, R, weights, strata) {
  #
  #  Function to do balanced importance resampling within strata
  #  based on the supplied weights.  Balancing is achieved in such
  #  a way that each index appears in the array approximately in
  #  proportion to its weight.
  #
  imp.arr.bal <- function(n, R, wts, inds=seq_len(n)) {
    if (sum (wts) != 1) wts <- wts / sum(wts)
    nRw1 <- floor(n*R*wts)
    nRw2 <- n*R*wts - nRw1
    output <- rep(inds, nRw1)
    if (any (nRw2 != 0))
      output <- c(output,
                  sample0(inds, round(sum(nRw2)), prob=nRw2))
    matrix(rperm(output), R, n)
  }
  output <- NULL
  if (!isMatrix(weights))
    weights <- matrix(weights, nrow = 1L)
  inds <- as.integer(names(table(strata)))
  for (ir in seq_along(R)) {
    out <- matrix(rep(seq_len(n), R[ir]), R[ir], n, byrow=TRUE)
    for (is in inds) {
      gp <- seq_len(n)[strata == is]
      out[,gp] <- imp.arr.bal(length(gp), R[ir], weights[ir,gp], gp)
    }
    output <- rbind(output, out)
  }
  output
}



index.array <- function(n, R, sim, strata=rep(1,n), m=0, L=NULL, weights=NULL)
{
  #
  #  Driver function for generating a bootstrap index array.  This function
  #  simply determines the type of sampling required and calls the appropriate
  #  function.
  #
  indices <- NULL
  if (is.null (weights)) {
    if (sim == "ordinary") {
      indices <- ordinary.array(n, R, strata)
      if (sum(m) > 0)
        indices <- cbind(indices, extra.array(n, R, m, strata))
    }
    else if (sim == "balanced")
      indices <- balanced.array(n, R, strata)
    else if (sim == "antithetic")
      indices <- antithetic.array(n, R, L, strata)
    else if (sim == "permutation")
      indices <- permutation.array(n, R, strata)
  } else {
    if (sim == "ordinary")
      indices <- importance.array(n, R, weights, strata)
    else if (sim == "balanced")
      indices <- importance.array.bal(n, R, weights, strata)
  }
  indices
}

# jack.after.boot <- function(boot.out, index=1, t=NULL, L=NULL,
#                             useJ=TRUE, stinf = TRUE, alpha=NULL, main = "", ylab=NULL, ...)
# {
#   # jackknife after bootstrap plot
#   t.o <- t
#   if (is.null(t)) {
#     if (length(index) > 1L) {
#       index <- index[1L]
#       warning("only first element of 'index' used")
#     }
#     t <- boot.out$t[, index]
#   }
#   fins <- seq_along(t)[is.finite(t)]
#   t <- t[fins]
#   if (is.null(alpha)) {
#     alpha <- c(0.05, 0.1, 0.16, 0.5, 0.84, 0.9, 0.95)
#     if (is.null(ylab))
#       ylab <- "5, 10, 16, 50, 84, 90, 95 %-iles of (T*-t)"
#   }
#   if (is.null(ylab)) ylab <- "Percentiles of (T*-t)"
#   data <- boot.out$data
#   n <- NROW(data)
#   f <- boot.array(boot.out)[fins, , drop=TRUE]
#   percentiles <- matrix(data = NA, length(alpha), n)
#   J <- numeric(n)
#   for(j in seq_len(n)) {
#     # Find the quantiles of the bootstrap distribution on omitting each point.
#     values <- t[f[, j] == 0]
#     J[j] <- mean(values)
#     percentiles[, j] <- quantile(values, alpha) - J[j]
#   }
#   # Now find the jackknife values to be plotted, and standardize them,
#   # if required.
#   if (!useJ) {
#     if (is.null(L))
#       J <- empinf(boot.out, index=index, t=t.o, ...)
#     else 	J <- L
#   }
#   else	J <- (n - 1) * (mean(J) - J)
#   xtext <- "jackknife value"
#   if (!useJ) {
#     if (!is.null(L) || (is.null(t.o) && (boot.out$stype == "w")))
#       xtext <- paste("infinitesimal", xtext)
#     else	xtext <- paste("regression", xtext)
#   }
#   if (stinf) {
#     J <- J/sqrt(var(J))
#     xtext <- paste("standardized", xtext)
#   }
#   top <- max(percentiles)
#   bot <- min(percentiles)
#   ylts <- c(bot - 0.35 * (top - bot), top + 0.1 * (top - bot))
#   percentiles <- percentiles[,order(J)]#
#   # Plot the overall quantiles and the delete-1 quantiles against the
#   # jackknife values.
#   plot(sort(J), percentiles[1,  ], ylim = ylts, type = "n", xlab = xtext,
#        ylab = ylab, main=main)
#   for(j in seq_along(alpha))
#     lines(sort(J), percentiles[j,  ], type = "b", pch = "*")
#   percentiles <- quantile(t, alpha) - mean(t)
#   for(j in seq_along(alpha))
#     abline(h=percentiles[j], lty=2)
#   # Now print the observation numbers below the plotted lines.  They are printed
#   # in five rows so that all numbers can be read easily.
#   text(sort(J), rep(c(bot - 0.08 * (top - bot), NA, NA, NA, NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, bot - 0.14 * (top - bot), NA, NA, NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, NA, bot - 0.2 * (top - bot), NA, NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, NA, NA, bot - 0.26 * (top - bot), NA), n, n),
#        order(J), cex = 0.5)
#   text(sort(J), rep(c(NA, NA, NA, NA, bot - 0.32 * (top - bot)), n, n),
#        order(J), cex = 0.5)
#   invisible()
# }


ordinary.array <- function(n, R, strata)
{
  #
  # R x n array of bootstrap indices, resampled within strata.
  # This is the function which generates a regular bootstrap array
  # using equal weights within each stratum.
  #
  inds <- as.integer(names(table(strata)))
  if (length(inds) == 1L) {
    output <- sample.int(n, n*R, replace=TRUE)
    dim(output) <- c(R, n)
  } else {
    output <- matrix(as.integer(0L), R, n)
    for(is in inds) {
      gp <- seq_len(n)[strata == is]
      output[, gp] <- if (length(gp) == 1) rep(gp, R) else bsample(gp, R*length(gp))
    }
  }
  output
}

permutation.array <- function(n, R, strata)
{
  #
  # R x n array of bootstrap indices, permuted within strata.
  # This is similar to ordinary array except that resampling is
  # done without replacement in each row.
  #
  output <- matrix(rep(seq_len(n), R), n, R)
  inds <- as.integer(names(table(strata)))
  for(is in inds) {
    group <- seq_len(n)[strata == is]
    if (length(group) > 1L) {
      g <- apply(output[group,  ], 2L, rperm)
      output[group,  ] <- g
    }
  }
  t(output)
}


cv.glm <- function(data, glmfit, cost=function(y,yhat) mean((y-yhat)^2),
                   K=n)
{
  # cross-validation estimate of error for glm prediction with K groups.
  # cost is a function of two arguments: the observed values and the
  # the predicted values.
  call <- match.call()
  if (!exists(".Random.seed", envir=.GlobalEnv, inherits = FALSE)) runif(1)
  seed <- get(".Random.seed", envir=.GlobalEnv, inherits = FALSE)
  n <- nrow(data)
  if ((K > n) || (K <= 1))
    stop("'K' outside allowable range")
  K.o <- K
  K <- round(K)
  kvals <- unique(round(n/(1L:floor(n/2))))
  temp <- abs(kvals-K)
  if (!any(temp == 0))
    K <- kvals[temp == min(temp)][1L]
  if (K!=K.o) warning(gettextf("'K' has been set to %f", K), domain = NA)
  f <- ceiling(n/K)
  s <- sample0(rep(1L:K, f), n)
  n.s <- table(s)
  #    glm.f <- formula(glmfit)
  glm.y <- glmfit$y
  cost.0 <- cost(glm.y, fitted(glmfit))
  ms <- max(s)
  CV <- 0
  Call <- glmfit$call
  for(i in seq_len(ms)) {
    j.out <- seq_len(n)[(s == i)]
    j.in <- seq_len(n)[(s != i)]
    ## we want data from here but formula from the parent.
    Call$data <- data[j.in, , drop=FALSE]
    d.glm <- eval.parent(Call)
    p.alpha <- n.s[i]/n
    cost.i <- cost(glm.y[j.out],
                   predict(d.glm, data[j.out, , drop=FALSE],
                           type = "response"))
    CV <- CV + p.alpha * cost.i
    cost.0 <- cost.0 - p.alpha *
      cost(glm.y, predict(d.glm, data, type = "response"))
  }
  list(call = call, K = K,
       delta = as.numeric(c(CV, CV + cost.0)),  # drop any names
       seed = seed)
}


boot.ci <- function(boot.out,conf = 0.95,type = "all",
                    index = 1L:min(2L, length(boot.out$t0)),
                    var.t0 = NULL ,var.t = NULL, t0 = NULL, t = NULL,
                    L = NULL, h = function(t) t,
                    hdot = function(t) rep(1, length(t)),
                    hinv = function(t) t, ...)
  #
  #  Main function to calculate bootstrap confidence intervals.
  #  This function calls a number of auxilliary functions to do
  #  the actual calculations depending on the type of interval(s)
  #  requested.
  #
{
  call <- match.call()
  #  Get and transform the statistic values and their variances,
  if ((is.null(t) && !is.null(t0)) ||
      (!is.null(t) && is.null(t0)))
    stop("'t' and 't0' must be supplied together")
  t.o <- t; t0.o <- t0
  #    vt.o <- var.t
  vt0.o <- var.t0
  if (is.null(t)) {
    if (length(index) == 1L) {
      t0 <- boot.out$t0[index]
      t <- boot.out$t[,index]
    }
    else if (ncol(boot.out$t)<max(index)) {
      warning("index out of bounds; minimum index only used.")
      index <- min(index)
      t0 <- boot.out$t0[index]
      t <- boot.out$t[,index]
    }
    else {
      t0 <- boot.out$t0[index[1L]]
      t <- boot.out$t[,index[1L]]
      if (is.null(var.t0)) var.t0 <- boot.out$t0[index[2L]]
      if (is.null(var.t)) var.t <- boot.out$t[,index[2L]]
    }
  }
  if (const(t, min(1e-8, mean(t, na.rm=TRUE)/1e6))) {
    print(paste("All values of t are equal to ", mean(t, na.rm=TRUE),
                "\n Cannot calculate confidence intervals"))
    return(NULL)
  }
  if (length(t) != boot.out$R)
    stop(gettextf("'t' must of length %d", boot.out$R), domain = NA)
  if (is.null(var.t))
    fins <- seq_along(t)[is.finite(t)]
  else {
    fins <- seq_along(t)[is.finite(t) & is.finite(var.t)]
    var.t <- var.t[fins]
  }
  t <- t[fins]
  R <- length(t)
  if (!is.null(var.t0)) var.t0 <- var.t0*hdot(t0)^2
  if (!is.null(var.t))  var.t <- var.t*hdot(t)^2
  t0 <- h(t0); t <- h(t)
  if (missing(L)) L <- boot.out$L
  output <- list(R = R, t0 = hinv(t0), call = call)
  #  Now find the actual intervals using the methods listed in type
  if (any(type == "all" | type == "norm"))
    output <- c(output,
                list(normal = norm.ci(boot.out, conf,
                                      index[1L], var.t0=vt0.o, t0=t0.o, t=t.o,
                                      L=L, h=h, hdot=hdot, hinv=hinv)))
  if (any(type == "all" | type == "basic"))
    output <- c(output, list(basic=basic.ci(t0,t,conf,hinv=hinv)))
  if (any(type == "all" | type == "stud")) {
    if (length(index)==1L)
      warning("bootstrap variances needed for studentized intervals")
    else
      output <- c(output, list(student=stud.ci(c(t0,var.t0),
                                               cbind(t ,var.t), conf, hinv=hinv)))
  }
  if (any(type == "all" | type == "perc"))
    output <- c(output, list(percent=perc.ci(t,conf,hinv=hinv)))
  if (any(type == "all" | type == "bca")) {
    if (find_type(boot.out) == "tsboot")
      warning("BCa intervals not defined for time series bootstraps")
    else
      output <- c(output, list(bca=bca.ci(boot.out,conf,
                                          index[1L],L=L,t=t.o, t0=t0.o,
                                          h=h,hdot=hdot, hinv=hinv, ...)))
  }
  class(output) <- "bootci"
  output
}

print.bootci <- function(x, hinv = NULL, ...) {
  #
  #  Print the output from boot.ci
  #
  ci.out <- x
  cl <- ci.out$call
  ntypes <- length(ci.out)-3L
  nints <- nrow(ci.out[[4L]])
  t0 <- ci.out$t0
  if (!is.null(hinv)) t0 <- hinv(t0)  #
  #  Find the number of decimal places which should be used
  digs <- ceiling(log10(abs(t0)))
  if (digs <= 0) digs <- 4
  else if (digs >= 4) digs <- 0
  else digs <- 4-digs
  intlabs <- NULL
  basrg <- strg <- perg <- bcarg <- NULL
  if (!is.null(ci.out$normal))
    intlabs <- c(intlabs,"     Normal        ")
  if (!is.null(ci.out$basic)) {
    intlabs <- c(intlabs,"     Basic         ")
    basrg <- range(ci.out$basic[,2:3]) }
  if (!is.null(ci.out$student)) {
    intlabs <- c(intlabs,"   Studentized     ")
    strg <- range(ci.out$student[,2:3]) }
  if (!is.null(ci.out$percent)) {
    intlabs <- c(intlabs,"    Percentile     ")
    perg <- range(ci.out$percent[,2:3]) }
  if (!is.null(ci.out$bca)) {
    intlabs <- c(intlabs,"      BCa          ")
    bcarg <- range(ci.out$bca[,2:3]) }
  level <- 100*ci.out[[4L]][, 1L]
  if (ntypes == 4L) n1 <- n2 <- 2L
  else if (ntypes == 5L) {n1 <- 3L; n2 <- 2L}
  else {n1 <- ntypes; n2 <- 0L}
  ints1 <- matrix(NA,nints,2L*n1+1L)
  ints1[,1L] <- level
  n0 <- 4L
  #  Re-organize the intervals and coerce them into character data
  for (i in n0:(n0+n1-1)) {
    j <- c(2L*i-6L,2L*i-5L)
    nc <- ncol(ci.out[[i]])
    nc <- c(nc-1L,nc)
    if (is.null(hinv))
      ints1[,j] <- ci.out[[i]][,nc]
    else	ints1[,j] <- hinv(ci.out[[i]][,nc])
  }
  n0 <- 4L+n1
  ints1 <- format(round(ints1,digs))
  ints1[,1L] <- paste("\n",level,"%  ",sep="")
  ints1[,2*(1L:n1)] <- paste("(",ints1[,2*(1L:n1)],",",sep="")
  ints1[,2*(1L:n1)+1L] <- paste(ints1[,2*(1L:n1)+1L],")  ")
  if (n2 > 0) {
    ints2 <- matrix(NA,nints,2L*n2+1L)
    ints2[,1L] <- level
    j <- c(2L,3L)
    for (i in n0:(n0+n2-1L)) {
      if (is.null(hinv))
        ints2[,j] <- ci.out[[i]][,c(4L,5L)]
      else	ints2[,j] <- hinv(ci.out[[i]][,c(4L,5L)])
      j <- j+2L
    }
    ints2 <- format(round(ints2,digs))
    ints2[,1L] <- paste("\n",level,"%  ",sep="")
    ints2[,2*(1L:n2)] <- paste("(",ints2[,2*(1L:n2)],",",sep="")
    ints2[,2*(1L:n2)+1L] <- paste(ints2[,2*(1L:n2)+1L],")  ")
  }
  R <- ci.out$R                       #
  #  Print the intervals
  cat("BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS\n")
  cat(paste("Based on",R,"bootstrap replicates\n\n"))
  cat("CALL : \n")
  dput(cl, control=NULL)
  cat("\nIntervals : ")
  cat("\nLevel",intlabs[1L:n1])
  cat(t(ints1))
  if (n2 > 0) {
    cat("\n\nLevel",intlabs[(n1+1):(n1+n2)])
    cat(t(ints2))
  }
  if (!is.null(cl$h)) {
    if (is.null(cl$hinv) && is.null(hinv))
      cat("\nCalculations and Intervals on ",
          "Transformed Scale\n")
    else	cat("\nCalculations on Transformed Scale;",
             " Intervals on Original Scale\n")
  }
  else if (is.null(cl$hinv) && is.null(hinv))
    cat("\nCalculations and Intervals on Original Scale\n")
  else 	cat("\nCalculations on Original Scale",
            " but Intervals Transformed\n")#
  #  Print any warnings about extreme values.
  if (!is.null(basrg)) {
    if ((basrg[1L] <= 1) || (basrg[2L] >= R))
      cat("Warning : Basic Intervals used Extreme Quantiles\n")
    if ((basrg[1L] <= 10) || (basrg[2L] >= R-9))
      cat("Some basic intervals may be unstable\n")
  }
  if (!is.null(strg)) {
    if ((strg[1L] <= 1) || (strg[2L] >= R))
      cat("Warning : Studentized Intervals used Extreme Quantiles\n")
    if ((strg[1L] <= 10) || (strg[2L] >= R-9))
      cat("Some studentized intervals may be unstable\n")
  }
  if (!is.null(perg)) {
    if ((perg[1L] <= 1) || (perg[2L] >= R))
      cat("Warning : Percentile Intervals used Extreme Quantiles\n")
    if ((perg[1L] <= 10) || (perg[2L] >= R-9))
      cat("Some percentile intervals may be unstable\n")
  }
  if (!is.null(bcarg)) {
    if ((bcarg[1L] <= 1) || (bcarg[2L] >= R))
      cat("Warning : BCa Intervals used Extreme Quantiles\n")
    if ((bcarg[1L] <= 10) || (bcarg[2L] >= R-9))
      cat("Some BCa intervals may be unstable\n")
  }
  invisible(ci.out)
}

norm.ci <-
  function(boot.out = NULL,conf = 0.95,index = 1,var.t0 = NULL, t0 = NULL,
           t = NULL, L = NULL, h = function(t) t, hdot = function(t) 1,
           hinv = function(t) t)
    #
    #  Normal approximation method for confidence intervals.  This can be
    #  used with or without a bootstrap object.  If a bootstrap object is
    #  given then the intervals are bias corrected and the bootstrap variance
    #  estimate can be used if none is supplied.
    #
  {
    if (is.null(t0))  {
      if (!is.null(boot.out)) t0 <-boot.out$t0[index]
      else stop("bootstrap output object or 't0' required")
    }
    if (!is.null(boot.out) && is.null(t))
      t <- boot.out$t[,index]
    if (!is.null(t)) {
      fins <- seq_along(t)[is.finite(t)]
      t <- h(t[fins])
    }
    if (is.null(var.t0)) {
      if (is.null(t)) {
        if (is.null(L))
          stop("unable to calculate 'var.t0'")
        else	var.t0 <- sum((hdot(t0)*L/length(L))^2)
      }
      else	var.t0 <- var(t)
    }
    else	var.t0 <- var.t0*hdot(t0)^2
    t0 <- h(t0)
    if (!is.null(t))
      bias <- mean(t)-t0
    else	bias <- 0
    merr <- sqrt(var.t0)*qnorm((1+conf)/2)
    out <- cbind(conf,hinv(t0-bias-merr),hinv(t0-bias+merr))
    out
  }

norm.inter <- function(t,alpha)
  #
  #  Interpolation on the normal quantile scale.  For a non-integer
  #  order statistic this function interpolates between the surrounding
  #  order statistics using the normal quantile scale.  See equation
  #  5.8 of Davison and Hinkley (1997)
  #
{
  t <- t[is.finite(t)]
  R <- length(t)
  rk <- (R+1)*alpha
  if (!all(rk>1 & rk<R))
    warning("extreme order statistics used as endpoints")
  k <- trunc(rk)
  inds <- seq_along(k)
  out <- inds
  kvs <- k[k>0 & k<R]
  tstar <- sort(t, partial = sort(union(c(1, R), c(kvs, kvs+1))))
  ints <- (k == rk)
  if (any(ints)) out[inds[ints]] <- tstar[k[inds[ints]]]
  out[k == 0] <- tstar[1L]
  out[k == R] <- tstar[R]
  not <- function(v) xor(rep(TRUE,length(v)),v)
  temp <- inds[not(ints) & k != 0 & k != R]
  temp1 <- qnorm(alpha[temp])
  temp2 <- qnorm(k[temp]/(R+1))
  temp3 <- qnorm((k[temp]+1)/(R+1))
  tk <- tstar[k[temp]]
  tk1 <- tstar[k[temp]+1L]
  out[temp] <- tk + (temp1-temp2)/(temp3-temp2)*(tk1 - tk)
  cbind(round(rk, 2), out)
}

basic.ci <- function(t0, t, conf = 0.95, hinv = function(t) t)
  #
  #  Basic bootstrap confidence method
  #
{
  qq <- norm.inter(t,(1+c(conf,-conf))/2)
  cbind(conf, matrix(qq[,1L],ncol=2L), matrix(hinv(2*t0-qq[,2L]),ncol=2L))
}

stud.ci <- function(tv0, tv, conf = 0.95, hinv=function(t) t)
  #
  #  Studentized version of the basic bootstrap confidence interval
  #
{
  if ((length(tv0) < 2) || (ncol(tv) < 2)) {
    warning("variance required for studentized intervals")
    NA
  } else {
    z <- (tv[,1L]-tv0[1L])/sqrt(tv[,2L])
    qq <- norm.inter(z, (1+c(conf,-conf))/2)
    cbind(conf, matrix(qq[,1L],ncol=2L),
          matrix(hinv(tv0[1L]-sqrt(tv0[2L])*qq[,2L]),ncol=2L))
  }
}

perc.ci <- function(t, conf = 0.95, hinv = function(t) t)
  #
  #  Bootstrap Percentile Confidence Interval Method
  #
{
  alpha <- (1+c(-conf,conf))/2
  qq <- norm.inter(t,alpha)
  cbind(conf,matrix(qq[,1L],ncol=2L),matrix(hinv(qq[,2]),ncol=2L))
}

bca.ci <-
  function(boot.out,conf = 0.95,index = 1,t0 = NULL,t = NULL, L = NULL,
           h = function(t) t, hdot = function(t) 1, hinv = function(t) t,
           ...)
    #
    #  Adjusted Percentile (BCa) Confidence interval method.  This method
    #  uses quantities calculated from the empirical influence values to
    #  improve on the precentile interval.  Usually the required order
    #  statistics for this method will not be integers and so norm.inter
    #  is used to find them.
    #
  {
    t.o <- t
    if (is.null(t) || is.null(t0)) {
      t <- boot.out$t[,index]
      t0 <- boot.out$t0[index]
    }
    t <- t[is.finite(t)]
    w <- qnorm(sum(t < t0)/length(t))
    if (!is.finite(w)) stop("estimated adjustment 'w' is infinite")
    alpha <- (1+c(-conf,conf))/2
    zalpha <- qnorm(alpha)
    if (is.null(L))
      L <- empinf(boot.out, index=index, t=t.o, ...)
    a <- sum(L^3)/(6*sum(L^2)^1.5)
    if (!is.finite(a)) stop("estimated adjustment 'a' is NA")
    adj.alpha <- pnorm(w + (w+zalpha)/(1-a*(w+zalpha)))
    qq <- norm.inter(t,adj.alpha)
    cbind(conf, matrix(qq[,1L],ncol=2L), matrix(hinv(h(qq[,2L])),ncol=2L))
  }



abc.ci <- function(data, statistic, index = 1, strata = rep(1, n), conf = 0.95,
                   eps = 0.001/n, ...)
  #
  #   Non-parametric ABC method for constructing confidence intervals.
  #
{
  y <- data
  n <- NROW(y)
  strata1 <- tapply(strata,as.numeric(strata))
  if (length(index) != 1L) {
    warning("only first element of 'index' used in 'abc.ci'")
    index <- index[1L]
  }
  S <- length(table(strata1))
  mat <- matrix(0,n,S)
  for (s in 1L:S) {
    gp <- seq_len(n)[strata1 == s]
    mat[gp,s] <- 1
  }
  #  Calculate the observed value of the statistic
  w.orig <- rep(1/n,n)
  t0 <- statistic(y,w.orig/(w.orig%*%mat)[strata1], ...)[index]#
  #  Now find the linear and quadratic empirical influence values through
  #  numerical differentiation
  L <- L2 <- numeric(n)
  for (i in seq_len(n)) {
    w1 <- (1-eps)*w.orig
    w1[i] <- w1[i]+eps
    w2 <- (1+eps)*w.orig
    w2[i] <- w2[i] - eps
    t1 <- statistic(y,w1/(w1%*%mat)[strata1], ...)[index]
    t2 <- statistic(y,w2/(w2%*%mat)[strata1], ...)[index]
    L[i] <- (t1-t2)/(2*eps)
    L2[i] <- (t1-2*t0+t2)/eps^2
  }
  #  Calculate the required quantities for the intervals
  temp1 <- sum(L*L)
  sigmahat <- sqrt(temp1)/n
  ahat <- sum(L^3)/(6*temp1^1.5)      # called a in the text
  bhat <- sum(L2)/(2*n*n)             # called b in the text
  dhat <- L/(n*n*sigmahat)            # called k in the text
  w3 <- w.orig+eps*dhat
  w4 <- w.orig-eps*dhat
  chat <- (statistic(y,w3/(w3%*%mat)[strata1], ...)[index]-2*t0 +
             statistic(y,w4/(w4%*%mat)[strata1], ...)[index]) /
    (2*eps*eps*sigmahat)   # called c in the text
  bprime <- ahat-(bhat/sigmahat-chat) # called w in the text
  alpha <- (1+as.vector(rbind(-conf,conf)))/2
  zalpha <- qnorm(alpha)
  lalpha <- (bprime+zalpha)/(1-ahat*(bprime+zalpha))^2#
  #  Finally calculate the interval endpoints by calling the statistic with
  #  various weight vectors.
  out <- seq(alpha)
  for (i in seq_along(alpha)) {
    w.fin <- w.orig+lalpha[i]*dhat
    out[i] <- statistic(y,w.fin/(w.fin%*%mat)[strata1], ...)[index]
  }
  out <- cbind(conf,matrix(out,ncol=2L,byrow=TRUE))
  if (length(conf) == 1L) out <- as.vector(out)
  out
}

censboot <-
  function(data, statistic, R, F.surv, G.surv, strata = matrix(1, n, 2),
           sim = "ordinary", cox = NULL, index = c(1, 2), ...,
           parallel = c("no", "multicore", "snow"),
           ncpus = getOption("boot.ncpus", 1L), cl = NULL)
  {
    #
    #  Bootstrap replication for survival data.  Possible resampling
    #  schemes are case, model-based, conditional bootstrap (with or without
    #  a model) and the weird bootstrap.
    #
    mstrata <- missing(strata)
    if (any(is.na(data)))
      stop("missing values not allowed in 'data'")
    if ((sim != "ordinary") && (sim != "model") && (sim != "cond")
        && (sim != "weird")) stop("unknown value of 'sim'")
    if ((sim == "model") && (is.null(cox))) sim <- "ordinary"
    if (missing(parallel)) parallel <- getOption("boot.parallel", "no")
    parallel <- match.arg(parallel)
    have_mc <- have_snow <- FALSE
    if (parallel != "no" && ncpus > 1L) {
      if (parallel == "multicore") have_mc <- .Platform$OS.type != "windows"
      else if (parallel == "snow") have_snow <- TRUE
      if (!have_mc && !have_snow) ncpus <- 1L
      loadNamespace("parallel") # get this out of the way before recording seed
    }
    if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
    seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
    call <- match.call()
    if (isMatrix(data)) n <- nrow(data)
    else stop("'data' must be a matrix with at least 2 columns")
    if (ncol(data) < 2L)
      stop("'data' must be a matrix with at least 2 columns")
    if (length(index) < 2L)
      stop("'index' must contain 2 elements")
    if (length(index) > 2L) {
      warning("only first 2 elements of 'index' used")
      index <- index[1L:2L]
    }
    if (ncol(data) < max(index))
      stop("indices are incompatible with 'ncol(data)'")
    if (sim == "weird") {
      if (!is.null(cox))
        stop("sim = \"weird\" cannot be used with a \"coxph\" object")
      if (ncol(data) > 2L)
        warning(gettextf("only columns %s and %s of 'data' used",
                         index[1L], index[2L]), domain = NA)
      data <- data[,index]
    }
    if (!is.null(cox) && is.null(cox$coefficients) &&
        ((sim == "cond") || (sim == "model"))) {
      warning("no coefficients in Cox model -- model ignored")
      cox <- NULL
    }
    if ((sim != "ordinary")  && missing(F.surv))
      stop("'F.surv' is required but missing")
    if (missing(G.surv) && ((sim == "cond") || (sim == "model")))
      stop("'G.surv' is required but missing")
    if (NROW(strata) != n) stop("'strata' of wrong length")
    if (!isMatrix(strata)) {
      if (!((sim == "weird") || (sim == "ordinary")))
        strata <- cbind(strata, 1)
    } else {
      if ((sim == "weird") || (sim == "ordinary")) strata <- strata[, 1L]
      else  strata <- strata[, 1L:2L]
    }
    temp.str <- strata
    strata <- if (isMatrix(strata))
      apply(strata, 2L, function(s, n) tapply(seq_len(n), as.numeric(s)), n)
    else  tapply(seq_len(n), as.numeric(strata))
    t0 <- if ((sim == "weird") && !mstrata) statistic(data, temp.str, ...)
    else  statistic(data, ...)
    ## Calculate the resampled data sets.  For ordinary resampling this
    ## involves finding the matrix of indices of the case to be resampled.
    ## For the conditional bootstrap or model-based we must find an array
    ## consisting of R matrices containing the resampled times and their
    ## censoring indicators.  The data sets for the weird bootstrap must be
    ## calculated individually.
    fn <- if (sim == "ordinary") {
      bt <- cens.case(n, strata, R)
      function(r) statistic(data[sort(bt[r, ]), ], ...)
    } else if (sim == "weird") {
      ## force promises
      data; F.surv
      if (!mstrata) {
        function(r) {
          bootdata <- cens.weird(data, F.surv, strata)
          statistic(bootdata[, 1:2], bootdata[, 3L], ...)
        }
      } else  {
        function(r) {
          bootdata <- cens.weird(data, F.surv, strata)
          statistic(bootdata[, 1:2], ...)
        }
      }
    } else {
      bt <- cens.resamp(data, R, F.surv, G.surv, strata, index, cox, sim)
      function(r) {
        bootdata <- data
        bootdata[, index] <- bt[r, , ]
        oi <- order(bt[r, , 1L], 1-bt[r, , 2L])
        statistic(bootdata[oi, ], ...)
      }
    }
    rm(mstrata)

    res <- if (ncpus > 1L && (have_mc || have_snow)) {
      if (have_mc) {
        parallel::mclapply(seq_len(R), fn, ..., mc.cores = ncpus)
      } else if (have_snow) {
        list(...) # evaluate any promises
        if (is.null(cl)) {
          cl <- parallel::makePSOCKcluster(rep("localhost", ncpus))
          if(RNGkind()[1L] == "L'Ecuyer-CMRG")
            parallel::clusterSetRNGStream(cl)
          parallel::clusterEvalQ(cl, library(survival))
          res <- parallel::parLapply(cl, seq_len(R), fn)
          parallel::stopCluster(cl)
          res
        } else {
          parallel::clusterEvalQ(cl, library(survival))
          parallel::parLapply(cl, seq_len(R), fn)
        }
      }
    } else lapply(seq_len(R), fn)

    t <- matrix(, R, length(t0))
    for(r in seq_len(R)) t[r, ] <- res[[r]]

    cens.return(sim, t0, t, temp.str, R, data, statistic, call, seed)
  }

cens.return <- function(sim, t0, t, strata, R, data, statistic, call, seed) {
  #
  #  Create an object of class "boot" from the output of a censored bootstrap.
  #
  out <- list(t0 = t0, t = t, R = R, sim = sim, data = data, seed = seed,
              statistic = statistic, strata = strata, call = call)
  class(out) <- "boot"
  attr(boot, "boot_type") <- "censboot"
  out
}

cens.case <- function(n, strata, R) {
  #
  #  Simple case resampling.
  #
  out <- matrix(NA, nrow = R, ncol = n)
  for (s in seq_along(table(strata))) {
    inds <- seq_len(n)[strata == s]
    ns <- length(inds)
    out[, inds] <- bsample(inds,  ns*R)
  }
  out
}


cens.weird <- function(data, surv, strata) {
  #
  #  The weird bootstrap.  Censoring times are fixed and the number of
  #  failures at each failure time are sampled from a binomial
  #  distribution.  See Chapter 3 of Davison and Hinkley (1997).
  #
  #  data is a two column matrix containing the times and censoring
  #    indicator.
  #  surv is a survival object giving the failure time distribution.
  #  strata is a the strata vector used in surv or a vector of 1's if no
  #    strata were used.
  #
  m <- length(surv$time)
  if (is.null(surv$strata)) {
    nstr <- 1
    str <- rep(1, m)
  } else {
    nstr <- length(surv$strata)
    str <- rep(1L:nstr, surv$strata)
  }
  n.ev <- rbinom(m, surv$n.risk, surv$n.event/surv$n.risk)
  while (any(tapply(n.ev, str, sum) == 0))
    n.ev <- rbinom(m, surv$n.risk, surv$n.event/surv$n.risk)
  times <- rep(surv$time, n.ev)
  str <- rep(str, n.ev)
  out <- NULL
  for (s in 1L:nstr) {
    temp <- cbind(times[str == s], 1)
    temp <- rbind(temp,
                  as.matrix(data[(strata == s&data[, 2L] == 0), , drop=FALSE]))
    temp <- cbind(temp, s)
    oi <- order(temp[, 1L], 1-temp[, 2L])
    out <- rbind(out, temp[oi, ])
  }
  if (is.data.frame(data)) out <- as.data.frame(out)
  out
}



cens.resamp <- function(data, R, F.surv, G.surv, strata, index = c(1,2),
                        cox = NULL, sim = "model")
{
  #
  #  Other types of resampling for the censored bootstrap.  This function
  #  uses some local functions to implement the conditional bootstrap for
  #  censored data and resampling based on a Cox regression model.  This
  #  latter method of sampling can also use conditional sampling to get the
  #  censoring times.
  #
  #  data is the data set
  #  R is the number of replicates
  #  F.surv is a survfit object for the failure time distribution
  #  G.surv is a survfit object for the censoring time distribution
  #  strata is a two column matrix, the first column gives the strata
  #     gives the strata for the failure times and the second for the
  #     censoring times.
  #  index is a vector with two integer components giving the position
  #     of the times and censoring indicators in data
  #  cox is an object returned by the coxph function to give the Cox
  #     regression model for the failure times.
  #  sim is the simulation type which will always be "model" or "cond"
  #
  gety1 <- function(n, R, surv, inds) {
    # Sample failure times from the product limit estimate of the failure
    # time distribution.
    survival <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (survival[n1] > 0L) {
      survival <- c(survival, 0)
      time <- c(time, Inf)
    }
    probs <- diff(-c(1, survival))
    matrix(bsample(time, n*R, prob = probs), R, n)
  }
  gety2 <- function(n, R, surv, eta, inds) {
    # Sample failure times from the Cox regression model.
    F0 <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (F0[n1] > 0) {
      F0 <- c(F0, 0)
      time <- c(time, Inf)
    }
    ex <- exp(eta)
    Fh <- 1 - outer(F0, ex, "^")
    apply(rbind(0, Fh), 2L,
          function(p, y, R) bsample(y, R, prob = diff(p)), time, R)
  }
  getc1 <- function(n, R, surv, inds) {
    # Sample censoring times from the product-limit estimate of the
    # censoring distribution.
    cens <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (cens[n1] > 0) {
      cens <- c(cens, 0)
      time <- c(time, Inf)
    }
    probs <- diff(-c(1, cens))
    matrix(bsample(time, n*R, prob = probs), nrow = R)
  }
  getc2 <- function(n, R, surv, inds, data, index) {
    # Sample censoring times form the conditional distribution.  If a failure
    # was observed then sample from the product-limit estimate of the censoring
    # distribution conditional on the time being greater than the observed
    # failure time.  If the observation is censored then resampled time is the
    # observed censoring time.
    cens <- surv$surv[inds]
    time <- surv$time[inds]
    n1 <- length(time)
    if (cens[n1] > 0) {
      cens <- c(cens, 0)
      time <- c(time, Inf)
    }
    probs <- diff(-c(1, cens))
    cout <- matrix(NA, R, n)
    for (i in seq_len(n)) {
      if (data[i, 2] == 0) cout[, i] <- data[i, 1L]
      else {
        pri <- probs[time > data[i, 1L]]
        ti <- time[time > data[i, 1L]]
        if (length(ti) == 1L) cout[, i] <- ti
        else cout[, i] <- bsample(ti, R, prob = pri)
      }
    }
    cout
  }
  n <- nrow(data)
  Fstart <- 1
  Fstr <- F.surv$strata
  if (is.null(Fstr)) Fstr <- length(F.surv$time)
  Gstart <- 1
  Gstr <- G.surv$strata
  if (is.null(Gstr)) Gstr <- length(G.surv$time)
  y0 <- matrix(NA, R, n)
  for (s in seq_along(table(strata[, 1L]))) {
    # Find the resampled failure times within strata for failures
    ns <- sum(strata[, 1L] == s)
    inds <- Fstart:(Fstr[s]+Fstart-1)
    y0[, strata[, 1L] == s] <- if (is.null(cox)) gety1(ns, R, F.surv, inds)
    else  gety2(ns, R, F.surv, cox$linear.predictors[strata[, 1L] == s], inds)
    Fstart <- Fstr[s]+Fstart
  }
  c0 <- matrix(NA, R, n)
  for (s in seq_along(table(strata[, 2L]))) {
    # Find the resampled censoring times within strata for censoring times
    ns <- sum(strata[, 2] == s)
    inds <- Gstart:(Gstr[s]+Gstart-1)
    c0[, strata[, 2] == s] <- if (sim != "cond") getc1(ns, R, G.surv, inds)
    else  getc2(ns, R, G.surv, inds, data[strata[,2] == s, index])
    Gstart <- Gstr[s]+Gstart
  }
  infs <- (is.infinite(y0) & is.infinite(c0))
  if (sum(infs) > 0) {
    # If both the resampled failure time and the resampled censoring time
    # are infinite then set the resampled time to be a failure at the largest
    # failure time in the failure time stratum containing the observation.
    evs <- seq_len(n)[data[, index[2L]] == 1]
    maxf <- tapply(data[evs, index[1L]], strata[evs, 1L], max)
    maxf <- matrix(maxf[strata[, 1L]], nrow = R, ncol = n, byrow = TRUE)
    y0[infs] <- maxf[infs]
  }
  array(c(pmin(y0, c0), 1*(y0 <= c0)), c(dim(y0), 2))
}

empinf <- function(boot.out = NULL, data = NULL, statistic = NULL,
                   type = NULL, stype = NULL ,index = 1, t = NULL,
                   strata = rep(1, n), eps = 0.001, ...)
{
  #
  #   Calculation of empirical influence values.  Possible types are
  #   "inf" = infinitesimal jackknife (numerical differentiation)
  #   "reg" = regression based estimation
  #   "jack" = usual jackknife estimates
  #   "pos" = positive jackknife estimates
  #
  if (!is.null(boot.out))
  {
    if (boot.out$sim == "parametric")
      stop("influence values cannot be found from a parametric bootstrap")
    data <- boot.out$data
    if (is.null(statistic))
      statistic <- boot.out$statistic
    if (is.null(stype))
      stype <- boot.out$stype
    if (!is.null(boot.out$strata))
      strata <- boot.out$strata
  }
  else
  {
    if (is.null(data))
      stop("neither 'data' nor bootstrap object specified")
    if (is.null(statistic))
      stop("neither 'statistic' nor bootstrap object specified")
    if (is.null(stype)) stype <- "w"
  }
  n <- NROW(data)
  if (is.null(type)) {
    if (!is.null(t)) type <- "reg"
    else if (stype == "w") type <- "inf"
    else if (!is.null(boot.out) &&
             (boot.out$sim != "parametric") &&
             (boot.out$sim != "permutation")) type <- "reg"
    else type <- "jack"
  }

  if (type == "inf") {
    # calculate the infinitesimal jackknife values by numerical differentiation
    if (stype !="w") stop("'stype' must be \"w\" for type=\"inf\"")
    if (length(index) != 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    if (!is.null(t))
      warning("input 't' ignored; type=\"inf\"")
    L <- inf.jack(data, statistic, index, strata, eps, ...)
  } else if (type == "reg") {
    # calculate the regression estimates of the influence values
    if (is.null(boot.out))
      stop("bootstrap object needed for type=\"reg\"")
    if (is.null(t)) {
      if (length(index) != 1L) {
        warning("only first element of 'index' used")
        index <- index[1L]
      }
      t <- boot.out$t[,index]
    }
    L <- empinf.reg(boot.out, t)
  } else if (type == "jack") {
    if (!is.null(t))
      warning("input 't' ignored; type=\"jack\"")
    if (length(index) != 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    L <- usual.jack(data, statistic, stype, index, strata, ...)
  } else if (type == "pos") {
    if (!is.null(t))
      warning("input 't' ignored; type=\"pos\"")
    if (length(index) != 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    L <- positive.jack(data, statistic, stype, index, strata, ...)
  }
  L
}

inf.jack <-
  function(data, stat, index = 1, strata  =  rep(1, n), eps  =  0.001, ...)
  {
    #
    #   Numerical differentiation to get infinitesimal jackknife estimates
    #   of the empirical influence values.
    #
    n <- NROW(data)
    L <- seq_len(n)
    eps <- eps/n
    strata <- tapply(strata, as.numeric(strata))
    w.orig <- 1/table(strata)[strata]
    tobs <- stat(data, w.orig, ...)[index]
    for(i in seq_len(n)) {
      group <- seq_len(n)[strata == strata[i]]
      w <- w.orig
      w[group] <- (1 - eps)*w[group]
      w[i] <- w[i] + eps
      L[i] <- (stat(data, w, ...)[index] - tobs)/eps
    }
    L
  }

empinf.reg <- function(boot.out, t = boot.out$t[,1L])
  #
  #  Function to estimate empirical influence values using regression.
  #  This method regresses the observed bootstrap values on the bootstrap
  #  frequencies to estimate the empirical influence values
  #
{
  fins <- seq_along(t)[is.finite(t)]
  t <- t[fins]
  R <- length(t)
  n <- NROW(boot.out$data)
  strata <- boot.out$strata
  if (is.null(strata))
    strata <- rep(1,n)
  else 	strata <- tapply(strata,as.numeric(strata))
  ns <- table(strata)
  #    S <- length(ns)
  f <- boot.array(boot.out)[fins,]
  X <- f/matrix(ns[strata], R, n ,byrow=TRUE)
  out <- tapply(seq_len(n), strata, min)
  inc <- seq_len(n)[-out]
  X <- X[,inc]
  beta <- coefficients(glm(t ~ X))[-1L]
  l <- rep(0, n)
  l[inc] <- beta
  l <- l - tapply(l,strata,mean)[strata]
  l
}

usual.jack <- function(data, stat, stype = "w", index = 1,
                       strata = rep(1, n), ...)
  #
  #  Function to use the normal (delete 1) jackknife method to estimate the
  #  empirical influence values
  #
{
  n <- NROW(data)
  l <- rep(0,n)
  strata <- tapply(strata,as.numeric(strata))
  if (stype == "w") {
    w0 <- rep(1, n)/table(strata)[strata]
    tobs <- stat(data, w0, ...)[index]
    for (i in seq_len(n)) {
      w1 <- w0
      w1[i] <- 0
      gp <- strata == strata[i]
      w1[gp] <- w1[gp]/sum(w1[gp])
      l[i] <- (sum(gp)-1)*(tobs - stat(data,w1, ...)[index])
    }
  } else if (stype == "f") {
    f0 <- rep(1,n)
    tobs <- stat(data, f0, ...)[index]
    for (i in seq_len(n)) {
      f1 <- f0
      f1[i] <- 0
      gp <- strata == strata[i]
      l[i] <- (sum(gp)-1)*(tobs - stat(data, f1, ...)[index])
    }
  } else {
    i0 <- seq_len(n)
    tobs <- stat(data, i0, ...)[index]
    for (i in seq_len(n)) {
      i1 <- i0[-i]
      gp <- strata == strata[i]
      l[i] <- (sum(gp)-1)*(tobs - stat(data, i1, ...)[index])
    }
  }
  l
}

positive.jack <- function(data, stat, stype = "w", index = 1,
                          strata = rep(1 ,n), ...)
{
  #
  #  Use the positive jackknife to estimate the empirical influence values.
  #  The positive jackknife includes one observation twice to find its
  #  influence.
  #
  strata <- tapply(strata,as.numeric(strata))
  n <- NROW(data)
  L <- rep(0, n)
  if (stype == "w") {
    w0 <- rep(1, n)/table(strata)[strata]
    tobs <- stat(data, w0, ...)[index]
    for (i in seq_len(n)) {
      st1 <- c(strata,strata[i])
      w1 <- 1/table(st1)[strata]
      w1[i] <- 2*w1[i]
      gp <- strata == strata[i]
      w1[gp] <- w1[gp]/sum(w1[gp])
      L[i] <- (sum(gp)+1)*(stat(data, w1, ...)[index] - tobs)
    }
  } else if (stype == "f") {
    f0 <- rep(1,n)
    tobs <- stat(data, f0, ...)[index]
    for (i in seq_len(n)) {
      f1 <- f0
      f1[i] <- 2
      gp <- strata == strata[i]
      L[i] <- (sum(gp)+1)*(stat(data, f1, ...)[index] - tobs)
    }
  } else if (stype == "i") {
    i0 <- seq_len(n)
    tobs <- stat(data, i0, ...)[index]
    for (i in seq_len(n)) {
      i1 <- c(i0, i)
      gp <- strata == strata[i]
      L[i] <- (sum(gp)+1)*(stat(data, i1, ...)[index] - tobs)
    }
  }
  L
}

linear.approx <- function(boot.out, L = NULL, index = 1, type = NULL,
                          t0 = NULL, t = NULL, ...)
  #
  #  Find the linear approximation to the bootstrap replicates of a
  #  statistic.  L should be the linear influence values which will
  #  be found by empinf if they are not supplied.
  #
{
  f <- boot.array(boot.out)
  n <- length(f[1,  ])
  if ((length(index) > 1L) && (is.null(t0) || is.null(t))) {
    warning("only first element of 'index' used")
    index <- index[1L]
  }
  if (is.null(t0)) {
    t0 <- boot.out$t0[index]
    if (is.null(L))
      L <- empinf(boot.out, index=index, type=type, ...)
  } else if (is.null(t) && is.null(L)) {
    warning("input 't0' ignored: neither 't' nor 'L' supplied")
    t0 <- t0[index]
    L <- empinf(boot.out, index=index, type=type, ...)
  }
  else if (is.null(L))
    L <- empinf(boot.out, type=type, t=t, ...)
  tL <- rep(t0, boot.out$R)
  strata <- boot.out$strata
  if (is.null(strata))
    strata <- rep(1, n)
  else 	strata <- tapply(strata,as.numeric(strata))
  S <- length(table(strata))
  for(s in 1L:S) {
    i.s <- seq_len(n)[strata == s]
    tL <- tL + f[, i.s] %*% L[i.s]/length(i.s)
  }
  as.vector(tL)
}

envelope <-
  function(boot.out = NULL, mat = NULL, level = 0.95, index = 1L:ncol(mat))
    #
    #  Function to estimate pointwise and overall confidence envelopes for
    #  a function.
    #
    #  mat is a matrix of bootstrap values of the function at a number of
    #     points.  The points at which they are evaluated are assumed to
    #     be constant over the rows.
    #
  {
    emperr <- function(rmat, p = 0.05, k = NULL)
      #  Local function to estimate the overall error rate of an envelope.
    {
      R <- nrow(rmat)
      if (is.null(k)) k <- p*(R+1)/2 else p <- 2*k/(R+1)
      kf <- function(x, k, R) 1*((min(x) <= k)|(max(x) >= R+1L-k))
      c(k, p, sum(apply(rmat, 1L, kf, k, R))/(R+1))
    }
    kfun <- function(x, k1, k2)
      # Local function to find the cut-off points in each column of the matrix.
      sort(x ,partial = sort(c(k1, k2)))[c(k1, k2)]
    if (!is.null(boot.out) && isMatrix(boot.out$t)) mat <- boot.out$t
    if (!isMatrix(mat)) stop("bootstrap output matrix missing")
    if (length(index) < 2L) stop("use 'boot.ci' for scalar parameters")
    mat <- mat[,index]
    rmat <- apply(mat,2L,rank)
    R <- nrow(mat)
    if (length(level) == 1L) level <- rep(level,2L)
    k.pt <- floor((R+1)*(1-level[1L])/2+1e-10)
    k.pt <- c(k.pt, R+1-k.pt)
    err.pt <- emperr(rmat,k = k.pt[1L])
    ov <- emperr(rmat,k = 1)
    ee <- err.pt
    al <- 1-level[2L]
    if (ov[3L] > al)
      warning("unable to achieve requested overall error rate")
    else {
      continue <- !(ee[3L] < al)
      while(continue) {
        #  If the observed error is greater than the level required for the overall
        #  envelope then try another envelope.  This loop uses linear interpolation
        #  on the integers between 1 and k.pt[1L] to find the required value.
        kk <- ov[1L]+round((ee[1L]-ov[1L])*(al-ov[3L])/ (ee[3L]-ov[3L]))
        if (kk == ov[1L]) kk <- kk+1
        else if (kk == ee[1L]) kk <- kk-1
        temp <- emperr(rmat, k = kk)
        if (temp[3L] > al) ee <- temp
        else ov <- temp
        continue <- !(ee[1L] == ov[1L]+1)
      }
    }
    k.ov <- c(ov[1L], R+1-ov[1L])
    err.ov <- ov[-1L]
    out <- apply(mat, 2L, kfun, k.pt, k.ov)
    list(point = out[2:1,], overall = out[4:3,], k.pt = k.pt,
         err.pt = err.pt[-1L], k.ov = k.ov, err.ov = err.ov, err.nom = 1-level)
  }


glm.diag <- function(glmfit)
{
  #
  #  Calculate diagnostics for objects of class "glm".  The diagnostics
  #  calculated are various types of residuals as well as the Cook statistics
  #  and the leverages.
  #
  w <- if (is.null(glmfit$prior.weights)) rep(1,length(glmfit$residuals))
  else glmfit$prior.weights
  sd <- switch(family(glmfit)$family[1L],
               "gaussian" = sqrt(glmfit$deviance/glmfit$df.residual),
               "Gamma" = sqrt(sum(w*(glmfit$y/fitted(glmfit) - 1)^2)/
                                glmfit$df.residual),
               1)
  ##     sd <- ifelse(family(glmfit)$family[1L] == "gaussian",
  ##                  sqrt(glmfit$deviance/glmfit$df.residual), 1)
  ##     sd <- ifelse(family(glmfit)$family[1L] == "Gamma",
  ##                  sqrt(sum(w*(glmfit$y/fitted(glmfit) - 1)^2)/glmfit$df.residual), sd)
  dev <- residuals(glmfit, type = "deviance")/sd
  pear <- residuals(glmfit, type = "pearson")/sd
  ## R change: lm.influence drops 0-wt cases.
  h <- rep(0, length(w))
  h[w != 0] <- lm.influence(glmfit)$hat
  p <- glmfit$rank
  rp <- pear/sqrt(1 - h)
  rd <- dev/sqrt(1 - h)
  cook <- (h * rp^2)/((1 - h) * p)
  res <- sign(dev) * sqrt(dev^2 + h * rp^2)
  list(res = res, rd = rd, rp = rp, cook = cook, h = h, sd = sd)
}


# glm.diag.plots <-
#   function(glmfit, glmdiag = glm.diag(glmfit), subset  =  NULL,
#            iden = FALSE, labels = NULL, ret = FALSE)
#   {
#     #  Diagnostic plots for objects of class "glm"
#     if (is.null(glmdiag))
#       glmdiag <- glm.diag(glmfit)
#     if (is.null(subset))
#       subset <- seq_along(glmdiag$h)
#     else if (is.logical(subset))
#       subset <- seq_along(subset)[subset]
#     else if (is.numeric(subset) && all(subset<0))
#       subset <- (1L:(length(subset)+length(glmdiag$h)))[subset]
#     else if (is.character(subset)) {
#       if (is.null(labels)) labels <- subset
#       subset <- seq_along(subset)
#     }
#     #	close.screen(all = T)
#     #	split.screen(c(2, 2))
#     #	screen(1) #
#     par(mfrow = c(2,2))
#     #  Plot the deviance residuals against the fitted values
#     x1 <- predict(glmfit)
#     plot(x1, glmdiag$res, xlab = "Linear predictor", ylab = "Residuals")
#     pars <- vector(4L, mode="list")
#     pars[[1L]] <- par("usr")
#     #	screen(2) #
#     #  Plot a normal QQ plot of the standardized deviance residuals
#     y2 <- glmdiag$rd
#     x2 <- qnorm(ppoints(length(y2)))[rank(y2)]
#     plot(x2, y2, ylab = "Quantiles of standard normal",
#          xlab = "Ordered deviance residuals")
#     abline(0, 1, lty = 2)
#     pars[[2L]] <- par("usr")
#     #	screen(3) #
#     #  Plot the Cook statistics against h/(1-h) and draw line to highlight
#     #  possible influential and high leverage points.
#     hh <- glmdiag$h/(1 - glmdiag$h)
#     plot(hh, glmdiag$cook, xlab = "h/(1-h)", ylab = "Cook statistic")
#     rx <- range(hh)
#     ry <- range(glmdiag$cook)
#     rank.fit <- glmfit$rank
#     nobs <- rank.fit + glmfit$df.residual
#     cooky <- 8/(nobs - 2 * rank.fit)
#     hy <- (2 * rank.fit)/(nobs - 2 * rank.fit)
#     if ((cooky >= ry[1L]) && (cooky <= ry[2L])) abline(h = cooky, lty = 2)
#     if ((hy >= rx[1L]) && (hy <= rx[2L])) abline(v = hy, lty = 2)
#     pars[[3L]] <- par("usr")
#     #	screen(4) #
#     #  Plot the Cook statistics against the observation number in the original
#     #  data set.
#     plot(subset, glmdiag$cook, xlab = "Case", ylab = "Cook statistic")
#     if ((cooky >= ry[1L]) && (cooky <= ry[2L])) abline(h = cooky, lty = 2)
#     xx <- list(x1,x2,hh,subset)
#     yy <- list(glmdiag$res, y2, glmdiag$cook, glmdiag$cook)
#     pars[[4L]] <- par("usr")
#
#     if (is.null(labels)) labels <- names(x1)
#     while (iden) {
#       #  If interaction with the plots is required then ask the user which plot
#       #  they wish to interact with and then run identify() on that plot.
#       #  When the user terminates identify(), reprompt until no further interaction
#       #  is required and the user inputs a 0.
#       cat("****************************************************\n")
#       cat("Please Input a screen number (1,2,3 or 4)\n")
#       cat("0 will terminate the function \n")
#       #		num <- scan(nmax=1)
#       num <- as.numeric(readline())
#       if ((length(num) > 0L) &&
#           ((num == 1)||(num == 2)||(num == 3)||(num == 4))) {
#         cat(paste("Interactive Identification for screen",
#                   num,"\n"))
#         cat("left button = Identify, center button = Exit\n")
#         #			screen(num, new=F)
#         nm <- num+1
#         par(mfg = c(trunc(nm/2),1 +nm%%2, 2, 2))
#         par(usr = pars[[num]])
#         identify(xx[[num]], yy[[num]], labels)
#       }
#       else 	iden <- FALSE
#     }
#     #	close.screen(all=T)
#     par(mfrow = c(1, 1))
#     if (ret) glmdiag else invisible()
#   }

exp.tilt <- function(L, theta = NULL, t0 = 0, lambda = NULL,
                     strata = rep(1, length(L)) )
{
  # exponential tilting of linear approximation to statistic
  # to give mean theta.
  #
  tilt.dis <- function(lambda)  {
    #  Find the squared error in the mean using the multiplier lambda
    #  This is then minimized to find the correct value of lambda
    #  Note that the function should have minimum 0.
    L <- para[[2L]]
    theta <- para[[1L]]
    strata <- para[[3L]]
    ns <- table(strata)
    tilt <- rep(NA, length(L) )
    for (s in seq_along(ns)) {
      p <- exp(lambda*L[strata == s]/ns[s])
      tilt[strata == s] <- p/sum(p)
    }
    (sum(L*tilt) - theta)^2
  }
  tilted.prob <- function(lambda, L, strata)  {
    #  Find the tilted probabilities for a given value of lambda
    ns <- table(strata)
    m <- length(lambda)
    tilt <- matrix(NA, m, length(L))
    for (i in 1L:m)
      for (s in seq_along(ns)) {
        p <- exp(lambda[i]*L[strata == s]/ns[s])
        tilt[i,strata == s] <- p/sum(p)
      }
    if (m == 1) tilt <- as.vector(tilt)
    tilt
  }
  strata <- tapply(strata, as.numeric(strata))
  if (!is.null(theta)) {
    theta <- theta-t0
    m <- length(theta)
    lambda <- rep(NA,m)
    for (i in 1L:m) {
      para <- list(theta[i],L,strata)
      #			assign("para",para,frame=1)
      #			lambda[i] <- nlmin(tilt.dis, 0 )$x
      lambda[i] <- optim(0, tilt.dis, method = "BFGS")$par
      msd <- tilt.dis(lambda[i])
      if (is.na(msd) || (abs(msd) > 1e-6))
        stop(gettextf("unable to find multiplier for %f", theta[i]),
             domain = NA)
    }
  }
  else if (is.null(lambda))
    stop("'theta' or 'lambda' required")
  probs <- tilted.prob( lambda, L, strata )
  if (is.null(theta)) theta <- t0 + sum(probs * L)
  else theta <- theta+t0
  list(p = probs, theta = theta, lambda = lambda)
}


imp.weights <- function(boot.out, def = TRUE, q = NULL)
{
  #
  # Takes boot.out object and calculates importance weights
  # for each element of boot.out$t, as if sampling from multinomial
  # distribution with probabilities q.
  # If q is NULL the weights are calculated as if
  # sampling from a distribution with equal probabilities.
  # If def=T calculates weights using defensive mixture
  # distribution, if F uses weights knowing from which element of
  # the mixture they come.
  #
  R <- boot.out$R
  if (length(R) == 1L)
    def <- FALSE
  f <- boot.array(boot.out)
  n <- ncol(f)
  strata <- tapply(boot.out$strata,as.numeric(boot.out$strata))
  #    ns <- table(strata)
  if (is.null(q))  q <- rep(1,ncol(f))
  if (any(q == 0)) stop("0 elements not allowed in 'q'")
  p <- boot.out$weights
  if ((length(R) == 1L) && all(abs(p - q)/p < 1e-10))
    return(rep(1, R))
  np <- length(R)
  q <- normalize(q, strata)
  lw.q <- as.vector(f %*% log(q))
  if (!isMatrix(p))
    p <- as.matrix(t(p))
  p <- t(apply(p, 1L, normalize, strata))
  lw.p <- matrix(NA, sum(R), np)
  for(i in 1L:np) {
    zz <- seq_len(n)[p[i,  ] > 0]
    lw.p[, i] <- f[, zz] %*% log(p[i, zz])
  }
  if (def)
    w <- 1/(exp(lw.p - lw.q) %*% R/sum(R))
  else {
    i <- cbind(seq_len(sum(R)), rep(seq_along(R), R))
    w <- exp(lw.q - lw.p[i])
  }
  as.vector(w)
}

const <- function(w, eps=1e-8) {
  # Are all of the values of w equal to within the tolerance eps.
  all(abs(w-mean(w, na.rm=TRUE)) < eps)
}

imp.moments <- function(boot.out=NULL, index=1, t=boot.out$t[,index],
                        w=NULL, def=TRUE, q=NULL )
{
  # Calculates raw, ratio, and regression estimates of mean and
  # variance of t using importance sampling weights in w.
  if (missing(t) && is.null(boot.out$t))
    stop("bootstrap replicates must be supplied")
  if (is.null(w))
    if (!is.null(boot.out))
      w <- imp.weights(boot.out, def, q)
    else	stop("either 'boot.out' or 'w' must be specified.")
    if ((length(index) > 1L) && missing(t)) {
      warning("only first element of 'index' used")
      t <- boot.out$t[,index[1L]]
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    w <- w[fins]
    if (!const(w)) {
      y <- t*w
      m.raw <- mean( y )
      m.rat <- sum( y )/sum( w )
      t.lm <- lm( y~w )
      m.reg <- mean( y ) - coefficients(t.lm)[2L]*(mean(w)-1)
      v.raw <- mean(w*(t-m.raw)^2)
      v.rat <- sum(w/sum(w)*(t-m.rat)^2)
      x <- w*(t-m.reg)^2
      t.lm2 <- lm( x~w )
      v.reg <- mean( x ) - coefficients(t.lm2)[2L]*(mean(w)-1)
    }
    else {	m.raw <- m.rat <- m.reg <- mean(t)
    v.raw <- v.rat <- v.reg <- var(t)
    }
    list( raw=c(m.raw,v.raw), rat = c(m.rat,v.rat),
          reg = as.vector(c(m.reg,v.reg)))
}


imp.reg <- function(w)
{
  #  This function takes a vector of importance sampling weights and
  #  returns the regression importance sampling weights.  The function
  #  is called by imp.prob and imp.quantiles to enable those functions
  #  to find regression estimates of tail probabilities and quantiles.
  if (!const(w)) {
    R <- length(w)
    mw <- mean(w)
    s2w <- (R-1)/R*var(w)
    b <- (1-mw)/s2w
    w <- w*(1+b*(w-mw))/R
  }
  cumsum(w)/sum(w)
}


imp.quantile <- function(boot.out=NULL, alpha=NULL, index=1,
                         t=boot.out$t[,index], w=NULL, def=TRUE, q=NULL )
{
  # Calculates raw, ratio, and regression estimates of alpha quantiles
  #  of t using importance sampling weights in w.
  if (missing(t) && is.null(boot.out$t))
    stop("bootstrap replicates must be supplied")
  if (is.null(alpha)) alpha <- c(0.01,0.025,0.05,0.95,0.975,0.99)
  if (is.null(w))
    if (!is.null(boot.out))
      w <- imp.weights(boot.out, def, q)
    else	stop("either 'boot.out' or 'w' must be specified.")
    if ((length(index) > 1L) && missing(t)){
      warning("only first element of 'index' used")
      t <- boot.out$t[,index[1L]]
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    w <- w[fins]
    o <- order(t)
    t <- t[o]
    w <- w[o]
    cum <- cumsum(w)
    o <- rev(o)
    w.m <- w[o]
    t.m <- -rev(t)
    cum.m <- cumsum(w.m)
    cum.rat <- cum/mean(w)
    cum.reg <- imp.reg(w)
    R <- length(w)
    raw <- rat <- reg <- rep(NA,length(alpha))
    for (i in seq_along(alpha)) {
      if (alpha[i]<=0.5) raw[i] <-  max(t[cum<=(R+1)*alpha[i]])
      else raw[i] <- -max(t.m[cum.m<=(R+1)*(1-alpha[i])])
      rat[i] <- max(t[cum.rat <= (R+1)*alpha[i]])
      reg[i] <- max(t[cum.reg <= (R+1)*alpha[i]])
    }
    list(alpha=alpha, raw=raw, rat=rat, reg=reg)
}

imp.prob <- function(boot.out=NULL, index=1, t0=boot.out$t0[index],
                     t=boot.out$t[,index], w=NULL,  def=TRUE, q=NULL)
{
  # Calculates raw, ratio, and regression estimates of tail probability
  #  pr( t <= t0 ) using importance sampling weights in w.
  is.missing <- function(x) length(x) == 0L || is.na(x)

  if (missing(t) && is.null(boot.out$t))
    stop("bootstrap replicates must be supplied")
  if (is.null(w))
    if (!is.null(boot.out))
      w <- imp.weights(boot.out, def, q)
    else	stop("either 'boot.out' or 'w' must be specified.")
    if ((length(index) > 1L) && (missing(t) || missing(t0))) {
      warning("only first element of 'index' used")
      index <- index[1L]
      if (is.missing(t)) t <- boot.out$t[,index]
      if (is.missing(t0)) t0 <- boot.out$t0[index]
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    w <- w[fins]
    o <- order(t)
    t <- t[o]
    w <- w[o]
    raw <- rat <- reg <- rep(NA,length(t0))
    cum <- cumsum(w)/sum(w)
    cum.r <- imp.reg(w)
    for (i in seq_along(t0)) {
      raw[i] <-sum(w[t<=t0[i]])/length(w)
      rat[i] <- max(cum[t<=t0[i]])
      reg[i] <- max(cum.r[t<=t0[i]])
    }
    list(t0=t0, raw=raw, rat=rat, reg=reg )
}

smooth.f <- function(theta, boot.out, index=1, t=boot.out$t[,index],
                     width=0.5 )
{
  # Does frequency smoothing of the frequency array for boot.out with
  # bandwidth A to give frequencies for 'typical' distribution at theta
  if ((length(index) > 1L) && missing(t)) {
    warning("only first element of 'index' used")
    t <- boot.out$t[,index[1L]]
  }
  if (isMatrix(t)) {
    warning("only first column of 't' used")
    t <- t[,1L]
  }
  fins <- seq_along(t)[is.finite(t)]
  t <- t[fins]
  m <- length(theta)
  v <- imp.moments(boot.out, t=t)$reg[2L]
  eps <- width*sqrt(v)
  if (m  == 1)
    w <- dnorm((theta-t)/eps )/eps
  else {
    w <- matrix(0,length(t),m)
    for (i in 1L:m)
      w[,i] <- dnorm((theta[i]-t)/eps )/eps
  }
  f <- crossprod(boot.array(boot.out)[fins,] , w)
  strata <- boot.out$strata
  strata <- tapply(strata, as.numeric(strata))
  ns <- table(strata)
  out <- matrix(NA,ncol(f),nrow(f))
  for (s in seq_along(ns)) {
    ts <- matrix(f[strata == s,],m,ns[s],byrow=TRUE)
    ss <- apply(ts,1L,sum)
    out[,strata == s] <-  ts/matrix(ss,m,ns[s])
  }
  if (m == 1) out <- as.vector(out)
  out
}

tilt.boot <- function(data, statistic, R, sim="ordinary",
                      stype="i", strata = rep(1, n), L = NULL, theta=NULL,
                      alpha=c(0.025,0.975), tilt=TRUE, width=0.5, index=1, ... )
{
  #  Does tilted bootstrap sampling of stat applied to data with strata strata
  #  and simulation type sim.
  #  The levels of R give the number of simulations at each level.  For example,
  #  R=c(199,100,50) will give three separate bootstraps with 199, 100, 50
  #  simulations.  If R[1L]>0 the first simulation is assumed to be untilted
  #  and L can be estimated from it by regression, or it can be frequency
  #  smoothed to give probabilities p.
  #  If tilt=T use exponential tilting with empirical influence value L
  #  given explicitly or estimated from boot0, but if tilt=F
  #  (in which case R[1L] should be large) frequency smoothing of boot0 is used
  #  with bandwidth A.
  #  Tilting/frequency smoothing is to theta (so length(theta)=length(R)-1).
  #  The function assumes at present that q=0 is the median of the distribution
  #  of t*.
  if ((sim != "ordinary") && (sim != "balanced"))
    stop("invalid value of 'sim' supplied")
  if (!is.null(theta) && (length(R) != length(theta)+1))
    stop("'R' and 'theta' have incompatible lengths")
  if (!tilt && (R[1L] == 0))
    stop("R[1L] must be positive for frequency smoothing")
  call <- match.call()
  n <- NROW(data)
  if (R[1L]>0) {
    # If required run an initial bootstrap with equal weights.
    if (is.null(theta) && (length(R) != length(alpha)+1))
      stop("'R' and 'alpha' have incompatible lengths")
    boot0 <- boot(data, statistic, R = R[1L], sim=sim, stype=stype,
                  strata = strata, ... )
    if (is.null(theta)) {
      if (any(c(alpha,1-alpha)*(R[1L]+1) <= 5))
        warning("extreme values used for quantiles")
      theta <- quantile(boot0$t[,index],alpha)
    }
  }
  else {
    # If no initial bootstrap is run then exponential tilting must be
    # used.  Also set up a dummy bootstrap object to hold the output.
    tilt <- TRUE
    if (is.null(theta))
      stop("'theta' must be supplied if R[1L] = 0")
    if (!missing(alpha))
      warning("'alpha' ignored; R[1L] = 0")
    if (stype == "i") orig <- seq_len(n)
    else if (stype == "f") orig <- rep(1,n)
    else orig <- rep(1,n)/n
    boot0 <- boot.return(sim=sim,t0=statistic(data,orig, ...),
                         t=NULL, strata=strata, R=0, data=data,
                         stat=statistic, stype=stype,call=NULL,
                         seed=get(".Random.seed", envir=.GlobalEnv, inherits = FALSE),
                         m=0,weights=NULL)
  }
  # Calculate the weights for the subsequent bootstraps
  if (is.null(L) & tilt)
    if (R[1L] > 0) L <- empinf(boot0, index, ...)
  else L <- empinf(data=data, statistic=statistic, stype=stype,
                   index=index, ...)
  if (tilt) probs <- exp.tilt(L, theta, strata=strata, t0=boot0$t0[index])$p
  else probs <- smooth.f(theta, boot0, index, width=width)#
  # Run the weighted bootstraps and collect the output.
  boot1 <- boot(data, statistic, R[-1L], sim=sim, stype=stype,
                strata=strata, weights=probs, ...)
  boot0$t <- rbind(boot0$t, boot1$t)
  boot0$weights <- rbind(boot0$weights, boot1$weights)
  boot0$R <- c(boot0$R, boot1$R)
  boot0$call <- call
  boot0$theta <- theta
  attr(boot0, "boot_type") <- "tilt.boot"
  boot0
}


control <- function(boot.out, L=NULL, distn=NULL, index=1, t0=NULL, t=NULL,
                    bias.adj=FALSE, alpha=NULL, ... )
{
  #
  #  Control variate estimation.  Post-simulation balance can be used to
  #  find the adjusted bias estimate.  Alternatively the linear approximation
  #  to the statistic of interest can be used as a control variate and hence
  #  moments and quantiles can be estimated.
  #
  if (!is.null(boot.out$call$weights))
    stop("control methods undefined when 'boot.out' has weights")
  if (is.null(alpha))
    alpha <- c(1,2.5,5,10,20,50,80,90,95,97.5,99)/100
  tL <- dL <- bias <- bias.L <- var.L <- NULL
  k3.L <- q.out <- distn.L <- NULL
  stat <- boot.out$statistic
  data <- boot.out$data
  R <- boot.out$R
  f <- boot.array(boot.out)
  if (bias.adj) {
    # Find the adjusted bias estimate using post-simulation balance.
    if (length(index) > 1L) {
      warning("only first element of 'index' used")
      index <- index[1L]
    }
    f.big <- apply(f, 2L, sum)
    if (boot.out$stype == "i")
    { 	n <- ncol(f)
    i.big <- rep(seq_len(n),f.big)
    t.big <- stat(data, i.big, ...)[index]
    }
    else if (boot.out$stype == "f")
      t.big <- stat(data, f.big, ...)[index]
    else if (boot.out$stype == "w")
      t.big <- stat(data, f.big/R, ...)[index]
    bias <- mean(boot.out$t[, index]) - t.big
    out <- bias
  }
  else {
    # Using the linear approximation as a control variable, find estimates
    # of the moments and quantiles of the statistic of interest.
    if (is.null(t) || is.null(t0)) {
      if (length(index) > 1L) {
        warning("only first element of 'index' used")
        index <- index[1L]
      }
      if (is.null(L))
        L <- empinf(boot.out, index=index, ...)
      tL <- linear.approx(boot.out, L, index, ...)
      t <- boot.out$t[,index]
      t0 <- boot.out$t0[index]
    }
    else {
      if (is.null(L))
        L <- empinf(boot.out, t=t, ...)
      tL <- linear.approx(boot.out, L, t0=t0, ...)
    }
    fins <- seq_along(t)[is.finite(t)]
    t <- t[fins]
    tL <- tL[fins]
    R <- length(t)
    dL <- t - tL                    #
    # Find the moments of the statistic of interest.
    bias.L <- mean(dL)
    strata <- tapply(boot.out$strata, as.numeric(boot.out$strata))
    var.L <- var.linear(L, strata) + 2*var(tL, dL) + var(dL)
    k3.L <- k3.linear(L, strata) + 3 * cum3(tL, dL) +
      3 * cum3(dL, tL) + cum3(dL)
    if (is.null(distn)) {
      # If distn is not supplied then calculate the saddlepoint approximation to
      # the distribution of the linear approximation.
      distn <- saddle.distn((t0+L)/length(L),
                            alpha = (1L:R)/(R + 1),
                            t0=c(t0,sqrt(var.L)), strata=strata)
      dist.q <- distn$quantiles[,2]
      distn <- distn$distn
    }
    else	dist.q <- predict(distn, x=qnorm((1L:R)/(R+1)))$y#
    # Use the quantiles of the distribution of the linear approximation and
    # the control variates to estimate the quantiles of the statistic of interest.
    distn.L <- sort(dL[order(tL)] + dist.q)
    q.out <- distn.L[(R + 1) * alpha]
    out <- list(L=L, tL=tL, bias=bias.L, var=var.L, k3=k3.L,
                quantiles=cbind(alpha,q.out), distn=distn)
  }
  out
}

var.linear <- function(L, strata = NULL)
{
  #  estimate the variance of a statistic using its linear approximation
  vL <- 0
  n <- length(L)
  if (is.null(strata))
    strata <- rep(1, n)
  else 	strata <- tapply(seq_len(n),as.numeric(strata))
  S <- length(table(strata))
  for(s in 1L:S) {
    i.s <- seq_len(n)[strata == s]
    vL <- vL + sum(L[i.s]^2/length(i.s)^2)
  }
  vL
}

k3.linear <- function(L, strata = NULL)
{
  #  estimate the skewness of a statistic using its linear approximation
  k3L <- 0
  n <- length(L)
  if (is.null(strata))
    strata <- rep(1, n)
  else	strata <- tapply(seq_len(n),as.numeric(strata))
  S <- length(table(strata))
  for(s in 1L:S) {
    i.s <- seq_len(n)[strata == s]
    k3L <- k3L + sum(L[i.s]^3/length(i.s)^3)
  }
  k3L
}

cum3 <- function(a, b=a, c=a, unbiased=TRUE)
  # calculate third order cumulants.
{
  n <- length(a)
  if (unbiased) mult <- n/((n-1)*(n-2))
  else mult <- 1/n
  mult*sum((a - mean(a)) * (b - mean(b)) * (c - mean(c)))
}

logit <- function(p) qlogis(p)
#
#  Calculate the logit of a proportion in the range [0,1]
#
## {
##     out <- p
##     inds <- seq_along(p)[!is.na(p)]
##     if (any((p[inds] < 0) | (p[inds] > 1)))
##         stop("invalid proportions input")
##     out[inds] <- log(p[inds]/(1-p[inds]))
##     out[inds][p[inds] == 0] <- -Inf
##     out[inds][p[inds] == 1] <- Inf
##     out
## }

inv.logit <- function(x)
  #
  #  Calculate the inverse logit of a number
  #
  # {
  #     out <- exp(x)/(1+exp(x))
  #     out[x==-Inf] <- 0
  #     out[x==Inf] <- 1
  #     out
  # }
  plogis(x)

iden <- function(n)
  #
  #  Return the identity matrix of size n
  #
  if (n > 0) diag(rep(1,n)) else NULL

zero <- function(n,m)
  #
  #  Return an n x m matrix of 0's
  #
  if ((n > 0) & (m > 0)) matrix(0,n,m) else NULL


simplex <- function(a,A1=NULL,b1=NULL,A2=NULL,b2=NULL,A3=NULL,b3=NULL,
                    maxi=FALSE, n.iter=n+2*m, eps=1e-10)
  #
  #   This function calculates the solution to a linear programming
  #   problem using the tableau simplex method.  The constraints are
  #   given by the matrices A1, A2, A3 and the vectors b1, b2 and b3
  #   such that A1%*%x <= b1, A2%*%x >= b2 and A3%*%x = b3.  The 2-phase
  #   Simplex method is used.
  #
{
  call <- match.call()
  if (!is.null(A1))
    if (is.matrix(A1))
      m1 <- nrow(A1)
    else 	m1 <- 1
    else 	m1 <- 0
    if (!is.null(A2))
      if (is.matrix(A2))
        m2 <- nrow(A2)
      else 	m2 <- 1
      else 	m2 <- 0
      if (!is.null(A3))
        if (is.matrix(A3))
          m3 <- nrow(A3)
        else 	m3 <- 1
        else 	m3 <- 0
        m <- m1+m2+m3
        n <- length(a)
        a.o <- a
        if (maxi) a <- -a
        if (m2+m3 == 0)
          # If there are no >= or = constraints then the origin is a feasible
          # solution, and so only the second phase is required.
          out <- simplex1(c(a,rep(0,m1)), cbind(A1,iden(m1)), b1,
                          c(rep(0,m1),b1), n+(1L:m1), eps=eps)
        else {
          if (m2 > 0)
            out1 <- simplex1(c(a,rep(0,m1+2*m2+m3)),
                             cbind(rbind(A1,A2,A3),
                                   rbind(iden(m1),zero(m2+m3,m1)),
                                   rbind(zero(m1,m2),-iden(m2),
                                         zero(m3,m2)),
                                   rbind(zero(m1,m2+m3),
                                         iden(m2+m3))),
                             c(b1,b2,b3),
                             c(rep(0,n),b1,rep(0,m2),b2,b3),
                             c(n+(1L:m1),(n+m1+m2)+(1L:(m2+m3))),
                             stage=1, n1=n+m1+m2,
                             n.iter=n.iter, eps=eps)
          else
            out1 <- simplex1(c(a,rep(0,m1+m3)),
                             cbind(rbind(A1,A3),
                                   iden(m1+m3)),
                             c(b1,b3),
                             c(rep(0,n),b1,b3),
                             n+(1L:(m1+m3)), stage=1, n1=n+m1,
                             n.iter=n.iter, eps=eps)
          #  In phase 1 use 1 artificial variable for each constraint and
          #  minimize the sum of the artificial variables.  This gives a
          #  feasible solution to the original problem as long as all
          #  artificial variables are non-basic (and hence the value of the
          #  new objective function is 0).  If this is true then optimize the
          #  original problem using the result as the original feasible solution.
          if (out1$val.aux > eps)
            out <- out1
          else	out <- simplex1(out1$a[1L:(n+m1+m2)],
                               out1$A[,1L:(n+m1+m2)],
                               out1$soln[out1$basic],
                               out1$soln[1L:(n+m1+m2)],
                               out1$basic,
                               val=out1$value, n.iter=n.iter, eps=eps)
        }
        if (maxi)
          out$value <- -out$value
        out$maxi <- maxi
        if (m1 > 0L)
          out$slack <- out$soln[n+(1L:m1)]
        if (m2 > 0L)
          out$surplus <- out$soln[n+m1+(1L:m2)]
        if (out$solved == -1)
          out$artificial <- out$soln[-(1L:n+m1+m2)]
        out$obj <- a.o
        names(out$obj) <- paste("x",seq_len(n),sep="")
        out$soln <- out$soln[seq_len(n)]
        names(out$soln) <- paste("x",seq_len(n),sep="")
        out$call <- call
        class(out) <- "simplex"
        out
}




simplex1 <- function(a,A,b,init,basic,val=0,stage=2, n1=N, eps=1e-10,
                     n.iter=n1)
  #
  #  Tableau simplex function called by the simplex routine.  This does
  #  the actual calculations required in each phase of the simplex method.
  #
{
  pivot <- function(tab, pr, pc) {
    #  Given the position of the pivot and the tableau, complete
    #  the matrix operations to swap the variables.
    pv <- tab[pr,pc]
    pcv <- tab[,pc]
    tab[-pr,]<- tab[-pr,] - (tab[-pr,pc]/pv)%o%tab[pr,]
    tab[pr,] <- tab[pr,]/(-pv)
    tab[pr,pc] <- 1/pv
    tab[-pr,pc] <- pcv[-pr]/pv
    tab
  }
  N <- ncol(A)
  M <- nrow(A)
  nonbasic <- (1L:N)[-basic]
  tableau <- cbind(b,-A[,nonbasic,drop=FALSE])
  #  If in the first stage then find the artifical objective function,
  #  otherwise use the original objective function.
  if (stage == 2) {
    tableau <- rbind(tableau,c(val,a[nonbasic]))
    obfun <- a[nonbasic]
  }
  else {	obfun <- apply(tableau[(M+n1-N+1):M,,drop=FALSE],2L,sum)
  tableau <- rbind(c(val,a[nonbasic]),tableau,obfun)
  obfun <- obfun[-1L]
  }
  it <- 1
  while (!all(obfun> -eps) && (it <= n.iter))
    #  While the objective function can be reduced
    #	Find a pivot
    #	complete the matrix operations required
    #	update the lists of basic and non-basic variables
  {
    pcol <- 1+order(obfun)[1L]
    if (stage == 2)
      neg <- (1L:M)[tableau[1L:M,pcol]< -eps]
    else 	neg <- 1+ (1L:M)[tableau[2:(M+1),pcol] < -eps]
    ratios <- -tableau[neg,1L]/tableau[neg,pcol]
    prow <- neg[order(ratios)[1L]]
    tableau <- pivot(tableau,prow,pcol)
    if (stage == 1) {
      temp <- basic[prow-1L]
      basic[prow-1L] <- nonbasic[pcol-1L]
      nonbasic[pcol-1L] <- temp
      obfun <- tableau[M+2L,-1L]
    }
    else {	temp <- basic[prow]
    basic[prow] <- nonbasic[pcol-1L]
    nonbasic[pcol-1L] <- temp
    obfun <- tableau[M+1L,-1L]
    }
    it <- it+1
  }
  #  END of while loop
  if (stage == 1) {
    val.aux <- tableau[M+2,1L]
    # If the value of the auxilliary objective function is zero but some
    # of the artificial variables are basic (with value 0) then switch
    # them with some nonbasic variables (which are not artificial).
    if ((val.aux < eps) && any(basic>n1)) {
      ar <- (1L:M)[basic>n1]
      for (j in seq_along(temp)) {
        prow <- 1+ar[j]
        pcol <- 1 + order(
          nonbasic[abs(tableau[prow,-1L])>eps])[1L]
        tableau <- pivot(tableau,prow,pcol)
        temp1 <- basic[prow-1L]
        basic[prow-1L] <- nonbasic[pcol-1L]
        nonbasic[pcol-1L] <- temp1
      }
    }
    soln <- rep(0,N)
    soln[basic] <- tableau[2:(M+1L),1L]
    val.orig <- tableau[1L,1L]
    A.out <- matrix(0,M,N)
    A.out[,basic] <- iden(M)
    A.out[,nonbasic] <- -tableau[2L:(M+1L),-1L]
    a.orig <- rep(0,N)
    a.orig[nonbasic] <- tableau[1L,-1L]
    a.aux <- rep(0,N)
    a.aux[nonbasic] <- tableau[M+2,-1L]
    list(soln=soln, solved=-1, value=val.orig, val.aux=val.aux,
         A=A.out, a=a.orig, a.aux=a.aux, basic=basic)
  }
  else {
    soln <- rep(0,N)
    soln[basic] <- tableau[1L:M,1L]
    val <- tableau[(M+1L),1L]
    A.out <- matrix(0,M,N)
    A.out[,basic] <- iden(M)
    A.out[,nonbasic] <- tableau[1L:M,-1L]
    a.out <- rep(0,N)
    a.out[nonbasic] <- tableau[M+1L,-1L]
    if (it <= n.iter) solved <- 1L
    else solved <- 0L
    list(soln=soln, solved=solved, value=val,  A=A.out,
         a=a.out, basic=basic)
  }
}

print.simplex <- function(x, ...) {
  #
  #  Print the output of a simplex solution to a linear programming problem.
  #
  simp.out <- x
  cat("\nLinear Programming Results\n\n")
  cl <- simp.out$call
  cat("Call : ")
  dput(cl, control=NULL)
  if (simp.out$maxi) cat("\nMaximization ")
  else cat("\nMinimization ")
  cat("Problem with Objective Function Coefficients\n")
  print(simp.out$obj)
  if (simp.out$solved == 1) {
    cat("\n\nOptimal solution has the following values\n")
    print(simp.out$soln)
    cat(paste("The optimal value of the objective ",
              " function is ",simp.out$value,".\n",sep=""))
  }
  else if (simp.out$solved == 0) {
    cat("\n\nIteration limit exceeded without finding solution\n")
    cat("The coefficient values at termination were\n")
    print(simp.out$soln)
    cat(paste("The objective function value was ",simp.out$value,
              ".\n",sep=""))
  }
  else cat("\nNo feasible solution could be found\n")
  invisible(x)
}


saddle <-
  function(A = NULL, u = NULL, wdist = "m", type = "simp", d = NULL, d1 = 1,
           init = rep(0.1, d), mu = rep(0.5, n), LR = FALSE, strata = NULL,
           K.adj = NULL, K2 = NULL)
    #
    #  Saddle point function.  Standard multinomial saddlepoints are
    #  computed using nlmin whereas the more complicated conditional
    #  saddlepoints for Poisson and Binary cases are done by fitting
    #  a GLM to a set of responses which, in turn, are derived from a
    #  linear programming problem.
    #
  {
    det <- function(mat) {
      #  absolute value of the determinant of a matrix.
      if (any(is.na(mat))) NA
      else if (!all(is.finite(mat))) Inf
      else  abs(prod(eigen(mat,only.values = TRUE)$values))
    }
    sgn <- function(x, eps = 1e-10)
      #  sign of a real number.
      if (abs(x) < eps) 0 else 2*(x > 0) - 1

    if (!is.null(A)) {
      A <- as.matrix(A)
      d <- ncol(A)
      if (length(u) != d)
        stop(gettextf("number of columns of 'A' (%d) not equal to length of 'u' (%d)",
                      d, length(u)), domain = NA)
      n <- nrow(A)
    } else if (is.null(K.adj))
      stop("either 'A' and 'u' or 'K.adj' and 'K2' must be supplied")
    if (!is.null(K.adj)) {
      #  If K.adj and K2 are supplied then calculate the simple saddlepoint.
      if (is.null(d)) d <- 1
      type <- "simp"
      wdist <- "o"
      speq <- suppressWarnings(optim(init, K.adj))
      if (speq$convergence == 0) {
        ahat <- speq$par
        Khat <- K.adj(ahat)
        K2hat <- det(K2(ahat))
        gs <- 1/sqrt((2*pi)^d*K2hat)*exp(Khat)
        if (d == 1) {
          r <- sgn(ahat)*sqrt(-2*Khat)
          v <- ahat*sqrt(K2hat)
          if (LR)	Gs <- pnorm(r)+dnorm(r)*(1/r + 1/v)
          else	Gs <- pnorm(r+log(v/r)/r)
        }
        else	Gs <- NA
      }
      else gs <- Gs <- ahat <- NA
    }
    else if (wdist == "m") {
      #  Calculate the standard simple saddlepoint for the multinomial case.
      type <- "simp"
      if (is.null(strata)) {
        p <- mu/sum(mu)
        para <- list(p,A,u,n)
        K <- function(al) {
          w <- para[[1L]]*exp(al%*%t(para[[2L]]))
          para[[4L]]*log(sum(w))-sum(al*para[[3L]])
        }
        speq <- suppressWarnings(optim(init, K))
        ahat <- speq$par
        w <- as.vector(p*exp(ahat%*%t(A)))
        Khat <- n*log(sum(w))-sum(ahat*u)
        sw <- sum(w)
        if (d == 1)
          K2hat <- n*(sum(w*A*A)/sw-(sum(w*A)/sw)^2)
        else {
          saw <- w %*% A
          sa2w <- t(matrix(w,n,d)*A) %*% A
          K2hat <- det(n/sw*(sa2w-(saw%*%t(saw))/sw))
        }
      }
      else {
        sm <- as.vector(tapply(mu,strata,sum)[strata])
        p <- mu/sm
        ns <- table(strata)
        para <- list(p,A,u,strata,ns)
        K <- function(al) {
          w <- para[[1L]]*exp(al%*%t(para[[2L]]))
          ## avoid matrix methods
          sum(para[[5]]*log(tapply(c(w), para[[4L]], sum))) -
            sum(al*para[[3L]])
        }
        speq <- suppressWarnings(optim(init, K))
        ahat <- speq$par
        w <- p*exp(ahat%*%t(A))
        Khat <- sum(ns*log(tapply(w,strata,sum))) - sum(ahat*u)
        temp <- matrix(0,d,d)
        for (s in seq_along(ns)) {
          gp <- seq_len(n)[strata == s]
          sw <- sum(w[gp])
          saw <- w[gp]%*%A[gp,]
          sa2w <- t(matrix(w[gp],ns[s],d)*A[gp,])%*%A[gp,]
          temp <- temp+ns[s]/sw*(sa2w-(saw%*%t(saw))/sw)
        }
        K2hat <- det(temp)
      }
      if (speq$convergence == 0) {
        gs <- 1/sqrt(2*pi*K2hat)^d*exp(Khat)
        if (d == 1) {
          r <- sgn(ahat)*sqrt(-2*Khat)
          v <- ahat*sqrt(K2hat)
          if (LR)	Gs <- pnorm(r)+dnorm(r)*(1/r - 1/v)
          else	Gs <- pnorm(r+log(v/r)/r)
        }
        else	Gs <- NA
      }
      else	gs <- Gs <- ahat <- NA
    } else if (wdist == "p") {
      if (type == "cond") {
        #  Conditional Poisson and Binary saddlepoints are caculated by first
        #  solving a linear programming problem and then fitting a generalized
        #  linear model to find the solution to the saddlepoint equations.
        smp <- simplex(rep(0, n), A3 = t(A), b3 = u)
        if (smp$solved == 1) {
          y <- smp$soln
          A1 <- A[,1L:d1]
          A2 <- A[,-(1L:d1)]
          mod1 <- summary(glm(y ~ A1 + A2 + offset(log(mu)) - 1,
                              poisson, control = glm.control(maxit=100)))
          mod2 <- summary(glm(y ~ A2 + offset(log(mu)) - 1,
                              poisson, control = glm.control(maxit=100)))
          ahat <- mod1$coefficients[,1L]
          ahat2 <- mod2$coefficients[,1L]
          temp1 <- mod2$deviance - mod1$deviance
          temp2 <- det(mod2$cov.unscaled)/det(mod1$cov.unscaled)
          gs <- 1/sqrt((2*pi)^d1*temp2)*exp(-temp1/2)
          if (d1 == 1) {
            r <- sgn(ahat[1L])*sqrt(temp1)
            v <- ahat[1L]*sqrt(temp2)
            if (LR)	Gs<-pnorm(r)+dnorm(r)*(1/r-1/v)
            else	Gs <- pnorm(r+log(v/r)/r)
          }
          else	Gs <- NA
        }
        else {
          ahat <- ahat2 <- NA
          gs <- Gs <- NA
        }
      }
      else stop("this type not implemented for Poisson")
    }
    else if (wdist == "b") {
      if (type == "cond") {
        smp <- simplex(rep(0, n), A1 = iden(n), b1 = rep(1-2e-6, n),
                       A3 = t(A), b3 = u - 1e-6*apply(A, 2L, sum))
        #  For the binary case we require that the values are in the interval (0,1)
        #  since glm code seems to have problems when there are too many 0's or 1's.
        if (smp$solved == 1) {
          y <- smp$soln+1e-6
          A1 <- A[, 1L:d1]
          A2 <- A[, -(1L:d1)]
          mod1 <- summary(glm(cbind(y, 1-y) ~ A1+A2+offset(qlogis(mu))-1,
                              binomial, control = glm.control(maxit=100)))
          mod2 <- summary(glm(cbind(y, 1-y) ~ A2+offset(qlogis(mu))-1,
                              binomial, control = glm.control(maxit=100)))
          ahat <- mod1$coefficients[,1L]
          ahat2 <- mod2$coefficients[,1L]
          temp1 <- mod2$deviance-mod1$deviance
          temp2 <- det(mod2$cov.unscaled)/det(mod1$cov.unscaled)
          gs <- 1/sqrt((2*pi)^d1*temp2)*exp(-temp1/2)
          if (d1 == 1) {
            r <- sgn(ahat[1L])*sqrt(temp1)
            v <- ahat[1L]*sqrt(temp2)
            if (LR)	Gs<-pnorm(r)+dnorm(r)*(1/r-1/v)
            else	Gs <- pnorm(r+log(v/r)/r)
          }
          else	Gs <- NA
        }
        else {
          ahat <- ahat2 <- NA
          gs <- Gs <- NA
        }
      }
      else stop("this type not implemented for Binary")
    }
    if (type == "simp")
      out <- list(spa = c(gs, Gs), zeta.hat = ahat)
    else #if (type == "cond")
      out <- list(spa = c(gs, Gs), zeta.hat = ahat, zeta2.hat = ahat2)
    names(out$spa) <- c("pdf", "cdf")
    out
  }


saddle.distn <-
  function(A, u = NULL, alpha = NULL, wdist = "m",
           type = "simp", npts = 20, t = NULL, t0 = NULL, init = rep(0.1, d),
           mu = rep(0.5, n), LR = FALSE, strata = NULL, ...)
    #
    #  This function calculates the entire saddlepoint distribution by
    #  finding the saddlepoint approximations at npts values and then
    #  fitting a spline to the results (on the normal quantile scale).
    #  A may be a matrix or a function of t.  If A is a matrix with 1 column
    #  u is not used (u = t), if A is a matrix with more than 1 column u must
    #  be a vector with ncol(A)-1 elements, if A is a function of t then u
    #  must also be a function returning a vector of ncol(A(t, ...)) elements.
  {
    call <- match.call()
    if (is.null(alpha)) alpha <- c(0.001,0.005,0.01,0.025,0.05,0.1,0.2,0.5,
                                   0.8,0.9,0.95,0.975,0.99,0.995,0.999)
    if (is.null(t) && is.null(t0))
      stop("one of 't' or 't0' required")
    ep1 <- min(c(alpha,0.01))/10
    ep2 <- (1-max(c(alpha,0.99)))/10
    d <- if (type == "simp") 1
    else if (is.function(u)) {
      if (is.null(t)) length(u(t0[1L], ...)) else length(u(t[1L], ...))
    } else  1L+length(u)
    i <- nsads <- 0
    if (!is.null(t)) npts <- length(t)
    zeta <- matrix(NA,npts,2L*d-1L)
    spa <- matrix(NA,npts,2L)
    pts <- NULL
    if (is.function(A)) {
      n <- nrow(as.matrix(A(t0[1L], ...)))
      if (is.null(u)) stop("function 'u' missing")
      if (!is.function(u)) stop("'u' must be a function")
      if (is.null(t)) {
        t1 <- t0[1L]-2*t0[2L]
        sad <- saddle(A = A(t1, ...), u = u(t1, ...),
                      wdist = wdist, type = type, d1 = 1,
                      init = init, mu = mu, LR = LR, strata = strata)
        bdu <- bdl <- NULL
        while (is.na(sad$spa[2L]) || (sad$spa[2L] > ep1) ||
               (sad$spa[2L] < ep1/100)) {
          nsads <- nsads+1
          #  Find a lower bound on the effective range of the saddlepoint distribution
          if (!is.na(sad$spa[2L]) && (sad$spa[2L] > ep1)) {
            i <- i+1
            zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
            spa[i,] <- sad$spa
            pts <- c(pts,t1)
            bdu <- t1
          }
          else	bdl <- t1
          if (nsads == npts)
            stop("unable to find range")
          if (is.null(bdl)) {
            t1 <- 2*t1-t0[1L]
            sad <- saddle(A = A(t1, ...),
                          u = u(t1, ...), wdist = wdist,
                          type = type, d1 = 1, init = init,
                          mu = mu, LR = LR, strata = strata)
          }
          else if (is.null(bdu)) {
            t1 <- (t0[1L]+bdl)/2
            sad <- saddle(A = A(t1, ...),
                          u = u(t1, ...), wdist = wdist,
                          type = type, d1 = 1, init = init,
                          mu = mu, LR = LR, strata = strata)
          }
          else {
            t1 <- (bdu+bdl)/2
            sad <- saddle(A = A(t1, ...),
                          u = u(t1, ...), wdist = wdist,
                          type = type, d1 = 1, init = init,
                          mu = mu, LR = LR, strata = strata)
          }
        }
        i1 <- i <- i+1
        nsads <- 0
        zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
        spa[i,] <- sad$spa
        pts <- c(pts,t1)
        t2 <- t0[1L]+2*t0[2L]
        sad <- saddle(A = A(t2, ...), u = u(t2, ...),
                      wdist = wdist, type = type, d1 = 1, init = init,
                      mu = mu, LR = LR, strata = strata)
        bdu <- bdl <- NULL
        while (is.na(sad$spa[2L]) || (1-sad$spa[2L] > ep2) ||
               (1-sad$spa[2L] < ep2/100)){
          #  Find an upper bound on the effective range of the saddlepoint distribution
          nsads <- nsads+1
          if (!is.na(sad$spa[2L])&&(1-sad$spa[2L] > ep2)) {
            i <- i+1
            zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
            spa[i,] <- sad$spa
            pts <- c(pts,t2)
            bdl <- t2
          }
          else	bdu <- t2
          if (nsads  == npts)
            stop("unable to find range")
          if (is.null(bdu)) {
            t2 <- 2*t2-t0[1L]
            sad <- saddle(A = A(t2, ...),
                          u = u(t2, ...), wdist = wdist,
                          type = type, d1 = 1, init = init,
                          mu = mu, LR = LR, strata = strata)
          } else if (is.null(bdl)) {
            t2 <- (t0[1L]+bdu)/2
            sad <- saddle(A = A(t2, ...),
                          u = u(t2, ...), wdist = wdist,
                          type = type, d1 = 1, init = init,
                          mu = mu, LR = LR, strata = strata)
          } else {
            t2 <- (bdu+bdl)/2
            sad <- saddle(A = A(t2, ...),
                          u = u(t2, ...), wdist = wdist,
                          type = type, d1 = 1, init = init,
                          mu = mu, LR = LR, strata = strata)
          }
        }
        i <- i+1
        zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
        spa[i,] <- sad$spa
        pts <- c(pts,t2)
        #  Now divide the rest of the npts points so that about half are at
        #  either side of t0[1L].
        if ((npts %% 2) ==  0) {
          tt1<- seq.int(t1, t0[1L], length.out = npts/2-i1+2)[-1L]
          tt2 <- seq.int(t0[1L], t2, length.out = npts/2+i1-i+2)[-1L]
          t <- c(tt1[-length(tt1)], tt2[-length(tt2)])
        } else {
          ex <- 1*(t1+t2 > 2*t0[1L])
          ll <- floor(npts/2)+2
          tt1 <- seq.int(t1, t0[1L], length.out = ll-i1+1-ex)[-1L]
          tt2 <- seq.int(t0[1L], t2, length.out = ll+i1-i+ex)[-1L]
          t <- c(tt1[-length(tt1)], tt2[-length(tt2)])
        }
      }
      init1 <- init
      for (j in (i+1):npts) {
        #  Calculate the saddlepoint approximations at the extra points.
        sad <- saddle(A = A(t[j-i], ...), u = u(t[j-i], ...),
                      wdist = wdist, type = type, d1 = 1,
                      init = init1, mu = mu, LR = LR,
                      strata = strata)
        zeta[j,] <- c(sad$zeta.hat, sad$zeta2.hat)
        init1 <- sad$zeta.hat
        spa[j,] <- sad$spa
      }
    }
    else {
      A <- as.matrix(A)
      n <- nrow(A)
      if (is.null(t)) {
        #  Find a lower bound on the effective range of the saddlepoint distribution
        t1 <- t0[1L]-2*t0[2L]
        sad <- saddle(A = A, u = c(t1,u), wdist = wdist, type = type,
                      d = d, d1 = 1, init = init, mu = mu, LR = LR,
                      strata = strata)
        bdu <- bdl <- NULL
        while (is.na(sad$spa[2L]) || (sad$spa[2L] > ep1) ||
               (sad$spa[2L] < ep1/100)) {
          if (!is.na(sad$spa[2L]) && (sad$spa[2L] > ep1)) {
            i <- i+1
            zeta[i,] <- c(sad$zeta.hat,
                          sad$zeta2.hat)
            spa[i,] <- sad$spa
            pts <- c(pts,t1)
            bdu <- t1
          }
          else	bdl <- t1
          if (i == floor(npts/2))
            stop("unable to find range")
          if (is.null(bdl)) {
            t1 <- 2*t1-t0[1L]
            sad <- saddle(A = A, u = c(t1,u),
                          wdist = wdist, type = type, d = d,
                          d1 = 1, init = init, mu = mu, LR = LR,
                          strata = strata)
          } else if (is.null(bdu)) {
            t1 <- (t0[1L]+bdl)/2
            sad <- saddle(A = A, u = c(t1,u),
                          wdist = wdist, type = type, d = d,
                          d1 = 1, init = init, mu = mu, LR = LR,
                          strata = strata)
          } else {
            t1 <- (bdu+bdl)/2
            sad <- saddle(A = A, u = c(t1,u),
                          wdist = wdist, type = type, d = d,
                          d1 = 1, init = init, mu = mu, LR = LR,
                          strata = strata)
          }
        }
        i1 <- i <- i+1
        zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
        spa[i,] <- sad$spa
        pts <- c(pts,t1)
        #  Find an upper bound on the effective range of the saddlepoint distribution
        t2 <- t0[1L]+2*t0[2L]
        sad <- saddle(A = A, u = c(t2,u), wdist = wdist, type = type,
                      d = d, d1 = 1, init = init, mu = mu, LR = LR,
                      strata = strata)
        bdu <- bdl <- NULL
        while (is.na(sad$spa[2L]) || (1-sad$spa[2L] > ep2) ||
               (1-sad$spa[2L] < ep2/100)) {
          if (!is.na(sad$spa[2L])&&(1-sad$spa[2L] > ep2)) {
            i <- i+1
            zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
            spa[i,] <- sad$spa
            pts <- c(pts, t2)
            bdl <- t2
          }
          else	bdu <- t2
          if ((i-i1) == floor(npts/2))
            stop("unable to find range")
          if (is.null(bdu)) {
            t2 <- 2*t2-t0[1L]
            sad <- saddle(A = A, u = c(t2, u),
                          wdist = wdist, type = type, d = d,
                          d1 = 1, init = init, mu = mu, LR = LR,
                          strata = strata)
          }
          else if (is.null(bdl)) {
            t2 <- (t0[1L]+bdu)/2
            sad <- saddle(A = A, u = c(t2, u),
                          wdist = wdist, type = type, d = d,
                          d1 = 1, init = init, mu = mu, LR = LR,
                          strata = strata)
          }
          else {
            t2 <- (bdu+bdl)/2
            sad <- saddle(A = A, u = c(t2, u),
                          wdist = wdist, type = type, d = d,
                          d1 = 1, init = init, mu = mu, LR = LR,
                          strata = strata)
          }
        }
        i <- i+1
        zeta[i,] <- c(sad$zeta.hat, sad$zeta2.hat)
        spa[i,] <- sad$spa
        pts <- c(pts, t2)
        #  Now divide the rest of the npts points so that about half are at
        #  either side of t0[1L].
        if ((npts %% 2) == 0) {
          tt1 <- seq.int(t1, t0[1L], length.out=npts/2-i1+2)[-1L]
          tt2 <- seq.int(t0[1L], t2, length.out=npts/2+i1-i+2)[-1L]
          t <- c(tt1[-length(tt1)], tt2[-length(tt2)])
        }
        else {
          ex <- 1*(t1+t2 > 2*t0[1L])
          ll <- floor(npts/2)+2
          tt1 <- seq.int(t1, t0[1L], length.out=ll-i1+1-ex)[-1L]
          tt2 <- seq.int(t0[1L], t2, length.out=ll+i1-i+ex)[-1L]
          t <- c(tt1[-length(tt1)], tt2[-length(tt2)])
        }
      }
      init1 <- init
      for (j in (i+1):npts) {
        #  Calculate the saddlepoint approximations at the extra points.
        sad <- saddle(A=A, u=c(t[j-i],u), wdist=wdist,
                      type=type, d=d, d1=1, init=init,
                      mu=mu, LR=LR, strata=strata)
        zeta[j,] <- c(sad$zeta.hat, sad$zeta2.hat)
        init1 <- sad$zeta.hat
        spa[j,] <- sad$spa
      }
    }
    #  Omit points too close to the center as the distribution approximation is
    #  not good at those points.
    pts.in <- (1L:npts)[(abs(zeta[,1L]) > 1e-6) &
                          (abs(spa[, 2L] - 0.5) < 0.5 - 1e-10)]
    pts <- c(pts,t)[pts.in]
    zeta <- as.matrix(zeta[pts.in, ])
    spa <- spa[pts.in, ]
    #  Fit a spline to the approximations and predict at the required quantile
    #  values.
    distn <- smooth.spline(qnorm(spa[,2]), pts)
    quantiles <- predict(distn, qnorm(alpha))$y
    quans <- cbind(alpha, quantiles)
    colnames(quans) <- c("alpha", "quantile")
    inds <- order(pts)
    psa <- cbind(pts[inds], spa[inds,], zeta[inds,])
    if (d == 1) anames <- "zeta"
    else {	anames <- rep("",2*d-1)
    for (j in 1L:d) anames[j] <- paste("zeta1.", j ,sep = "")
    for (j in (d+1):(2*d-1)) anames[j] <- paste("zeta2.", j-d, sep = "")
    }
    dimnames(psa) <- list(NULL,c("t", "gs", "Gs", anames))
    out <- list(quantiles = quans, points = psa, distn = distn,
                call = call, LR = LR)
    class(out) <- "saddle.distn"
    out
  }

print.saddle.distn <- function(x, ...) {
  #
  #  Print the output from saddle.distn
  #
  sad.d <- x
  cl <- sad.d$call
  rg <- range(sad.d$points[,1L])
  mid <- mean(rg)
  digs <- ceiling(log10(abs(mid)))
  if (digs <= 0) digs <- 4
  else if (digs >= 4) digs <- 0
  else digs <- 4-digs
  rg <- round(rg,digs)
  level <- 100*sad.d$quantiles[,1L]
  quans <- format(round(sad.d$quantiles,digs))
  quans[,1L] <- paste("\n",format(level),"%     ",sep="")
  cat("\nSaddlepoint Distribution Approximations\n\n")
  cat("Call : \n")
  dput(cl, control=NULL)
  cat("\nQuantiles of the Distribution\n")
  cat(t(quans))
  cat(paste("\n\nSmoothing spline used ", nrow(sad.d$points),
            " points in the range ", rg[1L]," to ", rg[2L], ".\n", sep=""))
  if (sad.d$LR)
    cat("Lugananni-Rice approximations used\n")
  invisible(sad.d)
}

# lines.saddle.distn <-
#   function(x, dens = TRUE, h = function(u) u, J = function(u) 1,
#            npts = 50, lty = 1, ...)
#   {
#     #
#     #  Add lines corresponding to a saddlepoint approximation to a plot
#     #
#     sad.d <- x
#     tt <- sad.d$points[,1L]
#     rg <- range(h(tt, ...))
#     tt1 <- seq.int(from = rg[1L], to = rg[2L], length.out = npts)
#     if (dens) {
#       gs <- sad.d$points[,2]
#       spl <- smooth.spline(h(tt, ...),log(gs*J(tt, ...)))
#       lines(tt1,exp(predict(spl, tt1)$y), lty = lty)
#     } else {
#       Gs <- sad.d$points[,3]
#       spl <- smooth.spline(h(tt, ...),qnorm(Gs))
#       lines(tt1,pnorm(predict(spl ,tt1)$y))
#     }
#     invisible(sad.d)
#   }
#
ts.array <- function(n, n.sim, R, l, sim, endcorr)
{
  #
  #  This function finds the starting positions and lengths for the
  #  block bootstrap.
  #
  #  n is the number of data points in the original time series
  #  n.sim is the number require in the simulated time series
  #  R is the number of simulated series required
  #  l is the block length
  #  sim is the simulation type "fixed" or "geom".  For "fixed" l is taken
  #	to be the fixed block length, for "geom" l is the average block
  #	length, the actual lengths having a geometric distribution.
  #  endcorr is a logical specifying whether end-correction is required.
  #
  #  It returns a list of two components
  #  starts is a matrix of starts, it has R rows
  #  lens is a vector of lengths if sim="fixed" or a matrix of lengths
  #	corresponding to the starting points in starts if sim="geom"
  endpt <- if (endcorr) n else n-l+1
  cont <- TRUE
  if (sim == "geom") {
    len.tot <- rep(0,R)
    lens <- NULL
    while (cont) {
      #            inds <- (1L:R)[len.tot < n.sim]
      temp <- 1+rgeom(R, 1/l)
      temp <- pmin(temp, n.sim - len.tot)
      lens <- cbind(lens, temp)
      len.tot <- len.tot + temp
      cont <- any(len.tot < n.sim)
    }
    dimnames(lens) <- NULL
    nn <- ncol(lens)
    st <- matrix(sample.int(endpt, nn*R, replace = TRUE), R)
  } else {
    nn <- ceiling(n.sim/l)
    lens <- c(rep(l,nn-1), 1+(n.sim-1)%%l)
    st <- matrix(sample.int(endpt, nn*R, replace = TRUE), R)
  }
  list(starts = st, lengths = lens)
}

make.ends <- function(a, n)
{
  #  Function which takes a matrix of starts and lengths and returns the
  #  indices for a time series simulation. (Viewing the series as circular.)
  mod <- function(i, n) 1 + (i - 1) %% n
  if (a[2L] == 0) numeric()
  else  mod(seq.int(a[1L], a[1L] + a[2L] - 1, length.out = a[2L]), n)
}


tsboot <- function(tseries, statistic, R, l = NULL, sim = "model",
                   endcorr = TRUE, n.sim = NROW(tseries), orig.t = TRUE,
                   ran.gen = function(tser, n.sim, args) tser,
                   ran.args = NULL, norm = TRUE, ...,
                   parallel = c("no", "multicore", "snow"),
                   ncpus = getOption("boot.ncpus", 1L), cl = NULL)
{
  #
  #  Bootstrap function for time series data.  Possible resampling methods are
  #  the block bootstrap, the stationary bootstrap (these two can also be
  #  post-blackened), model-based resampling and phase scrambling.
  #
  if (missing(parallel)) parallel <- getOption("boot.parallel", "no")
  parallel <- match.arg(parallel)
  have_mc <- have_snow <- FALSE
  if (parallel != "no" && ncpus > 1L) {
    if (parallel == "multicore") have_mc <- .Platform$OS.type != "windows"
    else if (parallel == "snow") have_snow <- TRUE
    if (!have_mc && !have_snow) ncpus <- 1L
    loadNamespace("parallel") # get this out of the way before recording seed
  }

  ## This does not necessarily call statistic, so we force a promise.
  statistic

  tscl <- class(tseries)
  R <- floor(R)
  if (R <= 0) stop("'R' must be positive")
  call <- match.call()
  if (!exists(".Random.seed", envir = .GlobalEnv, inherits = FALSE)) runif(1)
  seed <- get(".Random.seed", envir = .GlobalEnv, inherits = FALSE)
  t0 <- if (orig.t) statistic(tseries, ...) else NULL
  ts.orig <- if (!isMatrix(tseries)) as.matrix(tseries) else tseries
  n <- nrow(ts.orig)
  if (missing(n.sim)) n.sim <- n
  class(ts.orig) <- tscl
  if ((sim == "model") || (sim == "scramble"))
    l <- NULL
  else if ((is.null(l) || (l <= 0) || (l > n)))
    stop("invalid value of 'l'")
  fn <- if (sim == "scramble") {
    rm(ts.orig)
    ## Phase scrambling
    function(r) statistic(scramble(tseries, norm), ...)
  } else if (sim == "model") {
    rm(ts.orig)
    ## Model-based resampling
    ## force promises
    ran.gen; ran.args
    function(r) statistic(ran.gen(tseries, n.sim, ran.args), ...)
  } else if (sim %in% c("fixed", "geom")) {
    ## Otherwise generate an R x n matrix of starts and lengths for blocks.
    ## The actual indices of the blocks can then easily be found and these
    ## indices used for the resampling.  If ran.gen is present then
    ## post-blackening is required when the blocks have been formed.
    if (sim == "geom") endcorr <- TRUE
    i.a <- ts.array(n, n.sim, R, l, sim, endcorr)
    ## force promises
    ran.gen; ran.args
    function(r) {
      ends <- if (sim == "geom")
        cbind(i.a$starts[r,  ], i.a$lengths[r,  ])
      else  cbind(i.a$starts[r, ], i.a$lengths)
      inds <- apply(ends, 1L, make.ends, n)
      inds <- if (is.list(inds)) matrix(unlist(inds)[1L:n.sim], n.sim, 1L)
      else matrix(inds, n.sim, 1L)
      statistic(ran.gen(ts.orig[inds, ], n.sim, ran.args), ...)
    }
  } else
    stop("unrecognized value of 'sim'")

  res <- if (ncpus > 1L && (have_mc || have_snow)) {
    if (have_mc) {
      parallel::mclapply(seq_len(R), fn, mc.cores = ncpus)
    } else if (have_snow) {
      list(...) # evaluate any promises
      if (is.null(cl)) {
        cl <- parallel::makePSOCKcluster(rep("localhost", ncpus))
        if(RNGkind()[1L] == "L'Ecuyer-CMRG")
          parallel::clusterSetRNGStream(cl)
        res <- parallel::parLapply(cl, seq_len(R), fn)
        parallel::stopCluster(cl)
        res
      } else parallel::parLapply(cl, seq_len(R), fn)
    }
  } else lapply(seq_len(R), fn)

  t <- matrix(, R, length(res[[1L]]))
  for(r in seq_len(R)) t[r, ] <- res[[r]]

  ts.return(t0 = t0, t = t, R = R, tseries = tseries, seed = seed,
            stat = statistic, sim = sim, endcorr = endcorr, n.sim = n.sim,
            l = l, ran.gen = ran.gen, ran.args = ran.args, call = call,
            norm = norm)
}

scramble <- function(ts, norm = TRUE)
  #
  #  Phase scramble a time series.  If norm = TRUE then normal margins are
  #  used otherwise exact empirical margins are used.
  #
{
  if (isMatrix(ts)) stop("multivariate time series not allowed")
  st <- start(ts)
  dt <- deltat(ts)
  frq <- frequency(ts)
  y <- as.vector(ts)
  e <- y - mean(y)
  n <- length(e)
  if (!norm) e <- qnorm( rank(e)/(n+1) )
  f <- fft(e) * complex(n, argument = runif(n) * 2 * pi)
  C.f <- Conj(c(0, f[seq(from = n, to = 2L, by = -1L)])) # or n:2
  e <- Re(mean(y) + fft((f + C.f)/sqrt(2), inverse = TRUE)/n)
  if (!norm) e <- sort(y)[rank(e)]
  ts(e, start = st, freq = frq, deltat = dt)
}

ts.return <- function(t0, t, R, tseries, seed, stat, sim, endcorr,
                      n.sim, l, ran.gen, ran.args, call, norm) {
  #
  #  Return the results of a time series bootstrap as an object of
  #  class "boot".
  #
  out <- list(t0 = t0,t = t, R = R, data = tseries, seed = seed,
              statistic = stat, sim = sim, n.sim = n.sim, call = call)
  if (sim ==  "scramble")
    out <- c(out, list(norm = norm))
  else if (sim == "model")
    out <- c(out, list(ran.gen = ran.gen, ran.args = ran.args))
  else {
    out <- c(out, list(l = l, endcorr = endcorr))
    if (!is.null(call$ran.gen))
      out <- c(out,list(ran.gen = ran.gen, ran.args = ran.args))
  }
  class(out) <- "boot"
  attr(out, "boot_type") <- "tsboot"
  out
}

## unexported helper

find_type <- function(boot.out)
{
  if(is.null(type <- attr(boot.out, "boot_type")))
    type <- sub("^boot::", "", deparse(boot.out$call[[1L]]))
  type
}

Try the MetaUtility package in your browser

Any scripts or data that you put into this service are public.

MetaUtility documentation built on Oct. 30, 2021, 5:07 p.m.