Nothing
#' @title Prepare data for z-curve
#'
#' @description \code{zcurve_data} is used to prepare data for the
#' [zcurve()] function. The function transform strings containing
#' reported test statistics \code{"z", "t", "f", "chi", "p"} into two-sided
#' p-values. Test statistics reported as inequalities are as considered
#' to be censored as well as test statistics reported with low accuracy
#' (i.e., rounded to too few decimals). See details for more information.
#'
#' @param data a vector strings containing the test statistics.
#' @param id a vector identifying observations from the same cluster.
#' @param rounded an optional argument specifying whether de-rounding should be applied.
#' Defaults to \code{FALSE} to treat all input as exact values or a numeric
#' vector with values specifying precision of the input. The other option,
#' \code{FALSE}, automatically extracts the number of decimals from input
#' and treats the input as censored if it does not surpass the \code{stat_precise} and
#' the \code{p_precise} thresholds.
#' @param stat_precise an integer specifying the numerical precision of
#' \code{"z", "t", "f"} statistics treated as exact values.
#' @param p_precise an integer specifying the numerical precision of
#' p-values treated as exact values.
#'
#' @details By default, the function extract the type of test statistic:
#' \describe{
#' \item{\code{"F(df1, df2)=x"}}{F-statistic with df1 and df2 degrees of freedom,}
#' \item{\code{"chi(df)=x"}}{Chi-square statistic with df degrees of freedom,}
#' \item{\code{"t(df)=x"}}{for t-statistic with df degrees of freedom,}
#' \item{\code{"z=x"}}{for z-statistic,}
#' \item{\code{"p=x"}}{for p-value.}
#' }
#' The input is not case sensitive and automatically removes empty spaces. Furthermore,
#' inequalities (\code{"<"} and \code{">"}) can be used to denote censoring. I.e., that
#' the p-value is lower than \code{"x"} or that the test statistic is larger than \code{"x"}
#' respectively. The automatic de-rounding procedure (if \code{rounded = TRUE}) treats
#' p-values with less decimal places than specified in \code{p_precise} or test statistics
#' with less decimal places than specified in \code{stat_precise} as censored on an interval
#' that could result in a given rounded value. I.e., a \code{"p = 0.03"} input would be
#' de-rounded as a p-value lower than 0.035 but larger than 0.025.
#'
#'
#' @return An object of type \code{"zcurve_data"}.
#' @export zcurve_data
#'
#' @examples
#' # Specify a character vector containing the test statistics
#' data <- c("z = 2.1", "t(34) = 2.21", "p < 0.03", "F(2,23) > 10", "p = 0.003")
#'
#' # Obtain the z-curve data object
#' data <- zcurve_data(data)
#'
#' # inspect the resulting object
#' data
#' @seealso [zcurve()], [print.zcurve_data()], [head.zcurve_data()]
zcurve_data <- function(data, id = NULL, rounded = TRUE, stat_precise = 2, p_precise = 3){
if(!is.character(data)){
stop("'data' must be a character vector")
}
if(is.null(id)){
id <- 1:length(data)
}else if(is.vector(id) && length(data) == length(id)){
id <- as.numeric(as.factor(as.character(id)))
}else{
stop("'id' must be a vector of the same length as the data")
}
data <- tolower(data)
data <- gsub(" ", "", data)
# deal with chi^2
data <- gsub("chi2", "c", data)
data <- gsub("chi", "c", data)
# extract the values
stat_type <- substr(data, 1, 1)
stat_val <- substr(data, regexpr("[=]|[<]|[>]", data) + 1, nchar(data))
stat_df1 <- ifelse(stat_type %in% c("t", "f", "c"), substr(data, regexpr("\\(", data) + 1, regexpr("[,]|[\\)]", data) - 1), NA)
stat_df2 <- ifelse(stat_type == "f", substr(data, regexpr(",", data) + 1, regexpr("[\\)]", data) - 1), NA)
censored <- grepl("<", data) | grepl(">", data)
digits <- ifelse(regexpr("\\.", data) == -1, 0, nchar(data) - regexpr("\\.", data))
# check the input
if(any(!stat_type %in% c("t", "z", "p", "f", "c")))
stop(paste0("Unknown test statistic: ", paste0("'", unique(stat_type[!stat_type %in% c("t", "z", "p", "f", "c")]),"'", collapse = ", "), "."))
# check that all matches are numeric
stat_val <- tryCatch(
as.numeric(stat_val),
warning = function(w) stop(paste0("The following input could not be decoded: ", paste0("'", data[which(is.na(suppressWarnings(as.numeric(stat_val))))], "'", collapse = ", "), "."), call. = FALSE)
)
stat_df1 <- tryCatch(
as.numeric(stat_df1),
warning = function(w) stop(paste0("The following input could not be decoded: ", paste0("'", data[which(is.na(suppressWarnings(as.numeric(stat_df1))))], "'", collapse = ", "), "."), call. = FALSE)
)
stat_df2 <- tryCatch(
as.numeric(stat_df2),
warning = function(w) stop(paste0("The following input could not be decoded: ", paste0("'", data[which(is.na(suppressWarnings(as.numeric(stat_df2))))], "'", collapse = ", "), "."), call. = FALSE)
)
# set rounding (0 = un-rounded due to automatic conversion)
if(length(rounded) == 1 && !rounded){
# deal with the values as precise values
rounded <- rep(-1, length(data))
}else if(length(rounded) == 1 && rounded){
# specify automatic rounding
rounded <- rep(-1, length(data))
rounded[stat_type == "p" & digits < p_precise] <- digits[stat_type == "p" & digits < p_precise]
rounded[stat_type != "p" & digits < stat_precise] <- digits[stat_type != "p" & digits < stat_precise]
}else{
# use user specify rounding
if(length(rounded) != length(data))
stop("The rounding indicator does not match the lenght of data input.")
if(!is.numeric(rounded))
stop("The rounding indicator is not numeric.")
if(any(rounded < 0))
stop("The rounding indicator must be non-negative.")
}
# prepare empty containers
p_vals <- rep(NA, length(data))
p_vals.rep <- rep(NA, length(data))
p_vals.lb <- rep(NA, length(data))
p_vals.ub <- rep(NA, length(data))
# compute and allocate the p-values accordingly
for(i in seq_along(data)){
if(rounded[i] == -1 && !censored[i]){
# precise non-censored values
p_vals[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(stat_val[i], df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(stat_val[i], df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(abs(stat_val[i]), df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(abs(stat_val[i]), lower.tail = FALSE) * 2,
"p" = stat_val[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
}else if(rounded[i] == -1 && censored[i]){
# precise censored values
p_vals.ub[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(stat_val[i], df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(stat_val[i], df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(abs(stat_val[i]), df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(abs(stat_val[i]), lower.tail = FALSE) * 2,
"p" = stat_val[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
p_vals.lb[i] <- 0
p_vals.rep[i] <- p_vals.ub[i]
}else if(rounded[i] != -1 && !censored[i]){
# rounded non-censored values
temp_stat_val <- abs(stat_val[i])
temp_stat_val.lb <- abs(stat_val[i]) - 0.5 * 10^-digits[i]
temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i]
temp_stat_val.lb <- ifelse(temp_stat_val.lb < 0, 0, temp_stat_val.lb)
p_vals.ub[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2,
"p" = stat_val[i] + 0.5 * 10^-digits[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
p_vals.lb[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(temp_stat_val.ub, df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.ub, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.ub, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.ub, lower.tail = FALSE) * 2,
"p" = stat_val[i] - 0.5 * 10^-digits[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
p_vals.rep[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(temp_stat_val, df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(abs(temp_stat_val), df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(abs(temp_stat_val), lower.tail = FALSE) * 2,
"p" = temp_stat_val
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
}else if(rounded[i] != -1 && censored[i]){
# rounded censored values
temp_stat_val <- abs(stat_val[i])
temp_stat_val.ub <- abs(stat_val[i]) + 0.5 * 10^-digits[i]
p_vals.ub[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2,
"p" = stat_val[i] + 0.5 * 10^-digits[i]
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
p_vals.lb[i] <- 0
p_vals.rep[i] <- tryCatch(
switch(
stat_type[i],
"f" = stats::pf(temp_stat_val.lb , df1 = stat_df1[i], df2 = stat_df2[i], lower.tail = FALSE),
"c" = stats::pchisq(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE),
"t" = stats::pt(temp_stat_val.lb, df = stat_df1[i], lower.tail = FALSE) * 2,
"z" = stats::pnorm(temp_stat_val.lb, lower.tail = FALSE) * 2,
"p" = temp_stat_val
),
warning = function(w) stop(paste0("The following input could not be decoded: '", data[i], "'."))
)
}
}
output <- list(
precise = data.frame(
"input" = data[!is.na(p_vals)],
"p" = p_vals[!is.na(p_vals)],
"id" = id[!is.na(p_vals)]
),
censored = data.frame(
"input" = data[!is.na(p_vals.lb)],
"p.lb" = p_vals.lb[!is.na(p_vals.lb)],
"p.ub" = p_vals.ub[!is.na(p_vals.ub)],
"p.rep" = p_vals.rep[!is.na(p_vals.rep)],
"id" = id[!is.na(p_vals.lb)]
)
)
class(output) <- "zcurve_data"
return(output)
}
### methods
#' Prints a z-curve data object
#' @param x z-curve data object
#' @param ... Additional arguments
#' @export print.zcurve_data
#' @rawNamespace S3method(print, zcurve_data)
#' @seealso [zcurve_data()]
print.zcurve_data <- function(x, ...){
cat(paste0("Object of class z-curve data with ", nrow(x$precise), " precise and ", nrow(x$censored), " censored p-values.\n\n"))
cat("Precise p-values:\n")
print(x$precise, ...)
cat("\n")
cat("Censored p-values:\n")
print(x$censored, ...)
}
#' Prints first few rows of a z-curve data object
#' @param x z-curve data object
#' @param ... Additional arguments
#' @export head.zcurve_data
#' @rawNamespace S3method(head, zcurve_data)
#' @seealso [zcurve_data()]
#' @importFrom utils head
head.zcurve_data <- function(x, ...){
cat(paste0("Object of class z-curve data with ", nrow(x$precise), " precise and ", nrow(x$censored), " censored p-values.\n\n"))
cat("Precise p-values:\n")
print(head(x$precise, ...))
cat("\n")
cat("Censored p-values:\n")
print(head(x$censored, ...))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.