Nothing
#' TDI estimation and inference
#'
#' @description
#' This function implements the estimation of the TDI and its corresponding \eqn{100(1-\alpha)\%} upper bound (UB),
#' where \eqn{\alpha} is the significance level, using the methods proposed by Choudhary (2007),
#' Escaramis \emph{et al.} (2010), Choudhary (2010) and Perez-Jaume and Carrasco (2015) in the case of two raters.
#' See \strong{Details} and \strong{References} for further information about these methods.
#'
#' @details
#' The methods of Choudhary (2007) and Escaramis \emph{et al.} (2010) are parametric methods based on linear mixed models
#' that assume normality of the data and linearity between the response and the effects (subjects, raters and
#' random errors). The linear mixed models are fitted using the function \code{\link[nlme]{lme}} from the \code{nlme} package.
#' The methods of Choudhary (2010) and Perez-Jaume and Carrasco (2015) are non-parametric methods
#' based on the estimation of quantiles of the absolute value of the differences between raters. Non-parametric
#' methods are recommended when dealing with skewed data or other non-normally distributed data, such as count data.
#' In situations of normality, parametric methods are recommended. See \strong{References} for further details.
#'
#' @usage TDI(data, y, id, met, rep = NA,
#' method = c("Choudhary P", "Escaramis et al.",
#' "Choudhary NP", "Perez-Jaume and Carrasco"),
#' p = 0.9, ub = TRUE, boot.type = c("differences", "cluster"),
#' type = 8, R = 10000, dec.p = 2, dec.est = 3,
#' choose.model.ch.p = TRUE, var.equal = TRUE,
#' choose.model.es = TRUE, int = FALSE, tol = 10^(-8), add.es = NULL,
#' alpha = 0.05)
#'
#' @param data name of the dataset, of class \code{data.frame}, containing at least 3 columns (quantitative measurement, subject effect, rater effect).
#' @param y quantitative measurement column name.
#' @param id subject effect column name. The corresponding column of \code{data} must be a factor.
#' @param met rater effect column name. The corresponding column of \code{data} must be a factor.
#' @param rep replicate effect column name. When there are no replicates the user should use \code{rep = NA}. When there are replicates, the corresponding column of data must be a factor. \cr
#' The default value is \code{NA}.
#' @param method name of the method(s) to estimate the TDI and UB. The options are:
#' \code{"Choudhary P"} (Choudhary, 2007), \code{"Escaramis et al."} (Escaramis \emph{et al.}, 2010),
#' \code{"Choudhary NP"} (Choudhary, 2010) and \code{"Perez-Jaume and Carrasco"} (Perez-Jaume and Carrasco, 2015).
#' This argument is not case-sensitive and is passed to \code{\link[base]{match.arg}}. \cr
#' The default value is \code{c("Choudhary P", "Escaramis et al.", "Choudhary NP", "Perez-Jaume and Carrasco")}, so all approaches are executed by default.
#' @param p a value or vector of the proportion(s) for estimation of the TDI, where \eqn{0<p<1}. Commonly, \eqn{p\geq 0.80}. \cr
#' The default value is 0.90.
#' @param ub logical asking whether the UBs should be computed. \cr
#' The default value is \code{TRUE}.
#' @param boot.type name of the bootstrap approach(es) to be used in the method of Perez-Jaume and Carrasco (2015). There are two
#' different options when there are replicates: to bootstrap the vector of the within-subject differences (\code{"differences"})
#' or to bootstrap at subject level (\code{"cluster"}). This is, not all the differences coming from the same subject
#' need to be bootstrapped together in the first one but all the measurements from the same subjects have to be bootstrapped
#' together in the second one. This argument is passed to \code{\link[base]{match.arg}} \cr
#' The default value is \code{c("differences", "cluster")}, so all approaches are executed by default.
#' @param type in the method of Perez-Jaume and Carrasco (2015), a quantile is calculated to obtain the estimation of the TDI. This argument is an integer
#' between 1 and 9 selecting one of the nine quantile algorithms (to be passed to \code{\link[stats]{quantile}}).
#' We recommend 8 for continuous data and 3 for discrete data. \cr
#' The default value is 8.
#' @param R in the method of Perez-Jaume and Carrasco (2015), bootstrap is used for the estimation of the UB.
#' This argument chooses the number of bootstrap replicates (to be passed to \code{\link[boot]{boot}}). \cr
#' The default value is 10000.
#' @param dec.p number of decimals to display for \code{p} in the method \code{\link[TDIagree]{print.tdi}}. \cr
#' The default value is 2.
#' @param dec.est number of decimals to display for the estimates in the method \code{\link[TDIagree]{print.tdi}}. Up to 4 decimals. \cr
#' The default value is 3.
#' @param choose.model.ch.p in the parametric method of Choudhary (2007) two methods can be fit, one with equal residual homoscedasticity
#' between raters and one with unequal residual homoscedasticity. This argument, if \code{TRUE}, chooses the
#' model with the minimum AIC. If \code{FALSE}, the argument \code{var.equal} must be specified. \cr
#' The default value is \code{TRUE}.
#' @param var.equal logical asking if there is residual homoscedasticity between raters to choose the model in the parametric method of Choudhary (2007).
#' If \code{choose.model.ch.p} is set to \code{TRUE}, this argument is ignored. \cr
#' The default value is \code{TRUE}.
#' @param choose.model.es in the method of Escaramis \emph{et al.} (2010) two methods can be fit, one including the subject--rater interaction
#' and one that does not. The model with interaction only applies to data with replicates. This argument, if \code{TRUE}, chooses the
#' model with the minimum AIC. If \code{FALSE}, the argument \code{int} must be specified. \cr
#' The default value is \code{TRUE}.
#' @param int logical asking if there is interaction between subjects and methods to choose the model in the method of Escaramis \emph{et al.} (2010).
#' The model with interaction only applies to data with replicates. If \code{choose.model.es} is set to \code{TRUE}, this argument is ignored. \cr
#' The default value is \code{FALSE}.
#' @param tol tolerance to be used in the method of Escaramis \emph{et al.} (2010). \cr
#' The default value is 10^(-8).
#' @param add.es name of the columns in \code{data} that will be added to the model (as fixed effects) of the method of Escaramis \emph{et al.} (2010). It must be passed as a column
#' name or vector of column names. \cr
#' The default value, \code{NULL}, indicates that no extra variables are to be added in the model.
#' @param alpha significance level for inference on the TDI. \cr
#' The default value is 0.05.
#'
#' @returns An object of class \code{tdi}, which is a list with five components:
#' \describe{
#' \item{\code{result}}{an object of class \code{data.frame} with the TDI estimates and UBs of the methods specified for every proportion.}
#' \item{\code{fitted.models}}{a list with the fitted models of the parametric methods of Choudhary (2007) and Escaramis \emph{et al.} (2010).}
#' \item{\code{params}}{a list with the values \code{dec.est}, \code{dec.p}, \code{ub}, \code{method} and \code{alpha} to be used in the method \code{\link[TDIagree]{print.tdi}}
#' and in the method \code{\link[TDIagree]{plot.tdi}}.}
#' \item{\code{data.long}}{an object of class \code{data.frame} with columns y, id, met (and rep if it applies) with the values of the measurement, subject identifiers,
#' rater (and replicate number if it applies) from the original data frame provided.}
#' \item{\code{data.wide}}{an object of class \code{data.frame} with either:
#' \itemize{
#' \item{columns id, y_met1, y_met2 (in the case of no replicates) with the measurements of each method.}
#' \item{columns id, y_met1rep1,..., y_met1rep\eqn{m}, y_met2rep1,..., y_met2rep\eqn{m}, with the measurements of each method and each replicate, where \eqn{m} is the number of replicates.}}
#' Numbers 1 and 2 after met correspond to the first and second level of the column met in data, respectively.
#' Numbers 1,..., \eqn{m} after rep correspond to the first,..., \eqn{m}-th level of the column rep in data, respectively.}
#' }
#'
#'
#' @examples
#' # normal data, parametric methods more suitable
#'
#' set.seed(2025)
#'
#' n <- 100
#'
#' mu.ind <- rnorm(n, 0, 7)
#'
#' epsA1 <- rnorm(n, 0, 3)
#' epsA2 <- rnorm(n, 0, 3)
#' epsB1 <- rnorm(n, 0, 3)
#' epsB2 <- rnorm(n, 0, 3)
#'
#' y_A1 <- 50 + mu.ind + epsA1 # rater A, replicate 1
#' y_A2 <- 50 + mu.ind + epsA2 # rater A, replicate 2
#' y_B1 <- 40 + mu.ind + epsB1 # rater B, replicate 1
#' y_B2 <- 40 + mu.ind + epsB2 # rater B, replicate 2
#'
#' ex_data <- data.frame(y = c(y_A1, y_A2, y_B1, y_B2),
#' rater = factor(rep(c("A", "B"), each = 2*n)),
#' replicate = factor(rep(rep(1:2, each = n), 2)),
#' subj = factor(rep(1:n, 4)))
#'
#' tdi <- TDI(ex_data, y, subj, rater, replicate, p = c(0.8, 0.9),
#' method = c("Choudhary P", "Escaramis et al.",
#' "Choudhary NP", "Perez-Jaume and Carrasco"),
#' boot.type = "cluster", R = 1000)
#'
#' tdi$result
#' tdi$fitted.models
#' tdi$data.long
#' tdi$data.wide
#'
#'
#' # non-normal data, non-parametric methods more suitable
#'
#' tdi.aml <- TDI(AMLad, mrd, id, met, rep, p = c(0.85, 0.95), boot.type = "cluster",
#' dec.est = 4, R = 1000)
#' tdi.aml$result
#' tdi.aml$fitted.models
#' tdi.aml$data.long
#' tdi.aml$data.wide
#'
#' @seealso \code{\link[TDIagree]{print.tdi}}, \code{\link[TDIagree]{plot.tdi}}
#'
#' @references Efron, B., & Tibshirani, R. (1993). An Introduction to the Bootstrap; Chapman and Hall. Inc.: New York, NY, USA, 914.
#'
#' Lin, L. I. K. (2000). Total deviation index for measuring individual agreement with applications in laboratory performance and bioequivalence. Statistics in Medicine, 19(2):255-270.
#'
#' Choudhary, P. K. (2007). A tolerance interval approach for assessment of agreement with left censored data. Journal of Biopharmaceutical Statistics, 17(4), 583-594.
#'
#' Escaramis, G., Ascaso, C., & Carrasco, J. L. (2010). The total deviation index estimated by tolerance intervals to evaluate the concordance of measurement devices. BMC Medical Research Methodology, 10, 1-12.
#'
#' Choudhary, P. K. (2010). A unified approach for nonparametric evaluation of agreement in method comparison studies. The International Journal of Biostatistics, 6(1).
#'
#' Perez‐Jaume, S., & Carrasco, J. L. (2015). A non‐parametric approach to estimate the total deviation index for non‐normal data. Statistics in Medicine, 34(25), 3318-3335.
#'
#' @export
TDI <- function(data, y, id, met, rep = NA,
method = c("Choudhary P", "Escaramis et al.",
"Choudhary NP", "Perez-Jaume and Carrasco"),
p = 0.9, ub = TRUE, boot.type = c("differences", "cluster"),
type = 8, R = 10000, dec.p = 2, dec.est = 3,
choose.model.ch.p = TRUE, var.equal = TRUE,
choose.model.es = TRUE, int = FALSE, tol = 10^(-8), add.es = NULL,
alpha = 0.05) {
# columns
y <- paste(deparse(substitute(y)), collapse = "")
y <- gsub(" ", "", y)
id <- paste(deparse(substitute(id)), collapse = "")
id <- gsub(" ", "", id)
met <- paste(deparse(substitute(met)), collapse = "")
met <- gsub(" ", "", met)
rep <- paste(deparse(substitute(rep)), collapse = "")
rep <- gsub(" ", "", rep)
add.es <- paste(deparse(substitute(add.es)), collapse = "")
add.es <- gsub(" ", "", add.es)
add.es <- unlist(strsplit(gsub("\\)", "", gsub("c\\(", "", add.es)), ","))
if ("NULL" %in% add.es) {
add.es <- NULL
}
# argument type checks
if (!is.data.frame(data)) {
stop("'data' must be a data.frame")
}
if (rep != "NA") {
if (!all(c(y, id, met, rep) %in% names(data))) {
stop("'y', 'id', 'met' and 'rep' must be columns in 'data'")
}
if (any(duplicated(c(y, id, met, rep)))) {
stop("two of the column identifiers are the same")
}
if (!is.factor(data[, id]) | !is.factor(data[, met]) | !is.factor(data[, rep])) {
stop("'id', 'met' and 'rep' columns must be factors")
}
} else{
rep <- NA
if (!all(c(y, id, met) %in% names(data))) {
stop("'y', 'id' and 'met' must be columns in 'data'")
}
if (any(duplicated(c(y, id, met)))) {
stop("two of the column identifiers are the same")
}
if (!is.factor(data[, id]) | !is.factor(data[, met])) {
stop("'id' and 'met' columns must be factors")
}
}
choices.method <- tolower(c("Choudhary P", "Escaramis et al.", "Choudhary NP", "Perez-Jaume and Carrasco"))
method.pre <- tolower(method)
method <- match.arg(method.pre, choices.method, several.ok = TRUE)
if (length(method) != length(method.pre)) {
warning("one of the methods called does not exist and will not be estimated")
}
p <- unique(p)
if (!is.numeric(p)) {
stop("'p' must be a numeric value or vector")
}
if (!is.logical(ub)) {
stop("'ub' must be logical")
}
boot.type <- match.arg(boot.type, several.ok = TRUE)
if (!is.numeric(type)) {
stop("'type' must be numeric")
} else if (type != 1 & type != 2 & type != 3 & type != 4 & type != 5 & type != 6 & type != 7 & type != 8 & type != 9) {
stop("'type' must be an integer between 1 and 9")
}
if (!is.numeric(R)) {
stop("'R' must be numeric")
}
if (!is.numeric(dec.p)) {
stop("'dec.p' must be numeric")
} else if (dec.p > 4) {
stop("'dec.p' must be less or equal than 4")
}
if (!is.numeric(dec.est)) {
stop("'dec.est' must be numeric")
} else if (dec.est > 4) {
stop("'dec.est' must be less or equal than 4")
}
if (!is.logical(choose.model.ch.p)) {
stop("'choose.model.ch.p' must be logical")
}
if (!is.logical(var.equal)) {
stop("'var.equal' must be logical")
}
if (!is.logical(choose.model.es)) {
stop("'choose.model.es' must be logical")
}
if (!is.logical(int)) {
stop("'int' must be logical")
}
if (!is.numeric(tol)) {
stop("'tol' must be numeric")
}
if (!is.null(add.es)) {
if (!all(add.es %in% names(data))) {
stop("'add.es' must be NULL or columns in 'data'")
}
}
if (!is.numeric(alpha)) {
stop("'alpha' must be numeric")
} else if (alpha <= 0 | alpha >= 1) {
stop("'alpha' must be between 0 and 1, both not included")
}
# warning checks
if (any(table(data[, id]) == 0)) {
warning("'id' column has (at least) one empty level. Calculation proceeds ignoring the empty level(s)")
data[, id] <- factor(data[, id])
}
if (any(table(data[, met]) == 0)) {
warning("'met' column has (at least) one empty level. Calculation proceeds ignoring the empty level(s)")
data[, met] <- factor(data[, met])
}
if (nlevels(data[, met]) != 2) {
stop("this package only supports two levels in 'met' column")
}
if (!is.na(rep)) {
if (any(table(data[, rep]) == 0)) {
warning("'rep' column has (at least) one empty level. Calculation proceeds ignoring the empty level(s)")
data[, rep] <- factor(data[, rep])
}
if (nlevels(data[, rep]) == 1) {
warning("'rep' column only has one level. Consider omitting 'rep' call. Calculation proceeds assuming rep = NA")
rep <- NA
}
}
if (nlevels(data[, id]) < 6){
warning("number of subjects less than 6. Proceed with caution")
}
if (int & is.na(rep)){
warning("no replicates specified in 'rep', unable to fit model with interaction. Calculation proceeds assuming int = FALSE")
int <- FALSE
}
if (ub) {
if (is.na(rep) & "cluster" %in% boot.type){
warning("no replicates specified in 'rep', cluster bootstrap and differences bootstrap are the same. Consider omitting 'cluster' in boot.type call. Calculation proceeds assuming boot.type = 'differences'")
boot.type <- "differences"
}
}
# stop checks
if (!all(table(data[, met]) == table(data[, met])[1])) {
stop("the design must be balanced, there must be the same number of measurements per method")
}
if (is.na(rep)){
if (sum(table(data[, id], data[, met]) != 1) > 0) {
stop("the design must be balanced, each subject should have one measurement per method")
}
} else{
if (sum(table(data[, id], data[, met], data[, rep]) != 1) > 0) {
stop("the design must be balanced, each subject should have one measurement per method and replicate")
}
}
# extracting number of replicate
if (!is.na(rep)) {
m <- nlevels(data[, rep])
} else{
m <- 1
}
# extracting necessary columns of data
if (!is.na(rep)) {
data.long <- data.frame(y = data[, y], id = data[, id],
met = data[, met], rep = data[, rep])
if (!is.null(add.es)) {
for(i in 1:length(add.es)){
data.long[, add.es[i]] <- data[, add.es[i]]
}
}
data.long$z <- interaction(data.long$met, data.long$rep)
data.wide <- data.frame(id = unique(data.long$id))
for (j in 1:(nlevels(data.long$z)/2)){
data.wide[, paste0("y_met1rep", j)] <- data.long[data.long$z == levels(data.long$z)[2*j-1], "y"]
data.wide[, paste0("y_met2rep", j)] <- data.long[data.long$z == levels(data.long$z)[2*j], "y"]
}
} else{
data.long <- data.frame(y = data[, y], id = data[, id], met = data[, met])
if (!is.null(add.es)) {
for(i in 1:length(add.es)){
data.long[, add.es[i]] <- data[, add.es[i]]
}
}
y1 <- subset(data.long, met==levels(data.long[, "met"])[1])$y
y2 <- subset(data.long, met==levels(data.long[, "met"])[2])$y
data.wide <- data.frame(id=unique(data.long$id), y_met1 = y1, y_met2 = y2)
}
# stop check NAs
if (any(is.na(data.long))) {
stop("NAs not supported for TDI estimation. Rerun without NAs")
}
# defining outputs
result <- data.frame(p = p,
tdi.ch.p = NA, ub.ch.p = NA,
tdi.es.p = NA, ub.es.p = NA,
tdi.ch.np = NA, ub.ch.np = NA,
tdi.pc.np = NA,
ub_p_db.pc.np = NA, ub_n_db.pc.np = NA, ub_e_db.pc.np = NA, ub_b_db.pc.np = NA,
ub_p_cb.pc.np = NA, ub_n_cb.pc.np = NA, ub_e_cb.pc.np = NA, ub_b_cb.pc.np = NA)
output <- list()
model.ch <- "Parametric method of Choudhary not called"
model.es <- "Method of Escaramis et al. not called"
for (i in 1:length(p)) {
# parametric method of Choudhary
if ("choudhary p" %in% method) {
choud.p <- try(TDI.Choudhary.P(data.long, p = p[i], ub = ub,
choose.model = choose.model.ch.p, var.equal = var.equal, alpha = alpha), silent = FALSE)
if (inherits(choud.p, "try-error") & length(method) == 1) {
stop("Parametric method of Choudhary did not converge")
} else if (inherits(choud.p, "try-error")) {
warning("Parametric method of Choudhary did not converge")
result$tdi.ch.p[i] <- NA
result$ub.ch.p[i] <- NA
model.ch <- "Parametric method of Choudhary did not converge"
} else{
result$tdi.ch.p[i] <- choud.p$result[1]
result$ub.ch.p[i] <- choud.p$result[2]
model.ch <- choud.p$fitted.model
}
}
# method of Escaramis et al.
if ("escaramis et al." %in% method) {
escaramis <- TDI.Escaramis(data.long, rep, p = p[i], ub = ub,
choose.model = choose.model.es, tol = tol, int = int,
add.es = add.es, alpha = alpha)
result$tdi.es.p[i] <- escaramis$result[1]
result$ub.es.p[i] <- escaramis$result[2]
model.es <- escaramis$fitted.model
}
# non-parametric method of Choudhary
if ("choudhary np" %in% method) {
choud.np <- TDI.Choudhary.NP(data.long, rep, p = p[i], ub = ub, alpha = alpha)
result$tdi.ch.np[i] <- choud.np[1]
result$ub.ch.np[i] <- choud.np[2]
}
# method of Perez-Jaume and Carrasco
if ("perez-jaume and carrasco" %in% method) {
pjc <- TDI.Perez_Jaume_and_Carrasco(data.wide, p = p[i], ub = ub, boot.type = boot.type,
alpha = alpha, type = type, R = R)
result$tdi.pc.np[i] <- pjc[1]
result$ub_p_db.pc.np[i] <- pjc[2]
result$ub_n_db.pc.np[i] <- pjc[3]
result$ub_e_db.pc.np[i] <- pjc[4]
result$ub_b_db.pc.np[i] <- pjc[5]
result$ub_p_cb.pc.np[i] <- pjc[6]
result$ub_n_cb.pc.np[i] <- pjc[7]
result$ub_e_cb.pc.np[i] <- pjc[8]
result$ub_b_cb.pc.np[i] <- pjc[9]
}
}
result <- result[ , colSums(is.na(result))==0]
output$result <- result
output$fitted.models$choudhary <- model.ch
output$fitted.models$escaramis <- model.es
output$params$dec.est <- dec.est
output$params$dec.p <- dec.p
output$params$ub <- ub
output$params$method <- method
output$params$alpha <- alpha
if (is.na(rep)) {
output$data.long <- data.long[, 1:3]
} else{
output$data.long <- data.long[, 1:4]
}
output$data.wide <- data.wide
class(output) <- "tdi"
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.