#' Extract F tests from text
#'
#' Extract F tests from text.
#'
#' @param input input text
#' @return A tibble contaitning all detected F tests, with columns identifying the type of statistic extracted, the reported test as reported in the text, the degrees of freedom, the p value, and the reported test statistic value.
#'
#' @examples
#' extractFTests("F(1, 23) = .01, p = < .001")
#'
#'
extractFTests <- function(input) {
# Setting up all possible parts of the regex
ftestRegex <- "(\\bF)"
numbericRegex <- "\\d{1,99})"
numbericRegex_decimals <- "((\\d{1,99}(\\.\\d{1,99})?)|(\\.\\d{1,99}))"
degreesOfFreedomRegex_decimals <-
paste0("(\\s{0,5}?\\(?\\s{0,5}?",
numbericRegex_decimals,
"\\s{0,5}?[,;\\s{1,5}]\\s{0,5}?",
numbericRegex_decimals,
"\\s{0,5}\\)?)")
paste0("((?i)\\(?\\s{0,5}((df\\s{0,5}\\=?\\s{0,5})|(n\\s{0,5}\\=\\s{0,5}))?\\s{0,5}",
numbericRegex_decimals,
"\\s{0,5}\\)?(?-i))")
ofOrEqualsRegex <- "((of)|=|:)"
numbericBelow1Regex <- "((?<![1-9])\\.\\d{1,99}|0(\\.\\d{1,99})?|(1\\.0{0,99}(?!(0{0,99}[1-9])))|((?<![0-9\\.])1(?![\\.0-9])))"
# additional p value detector
# additional p value detector
pValueRegex <- "((?i)((\\s{0,5},?\\s{0,5})(ns))|(p\\s{0,5}[<>=(ns):]\\s{0,5}[<>]?\\s{0,5}((ns)|(\\d?\\.?\\d{1,99}e?-?\\d{0,99})|(\\.\\d{1,99})))(?-i))"
# Additional df = detector
# Additional n = detector
ftestExtractionRegex <- paste0(
ftestRegex,
# Allowing spaces
"\\s{0,5}",
degreesOfFreedomRegex_decimals,
# Making degrees of freedom optional
"?",
# Allowing spaces
"\\s{0,5}",
ofOrEqualsRegex,
# Allowing spaces or negatives
"\\s{0,5}",
"\\-?",
"\\s{0,5}",
numbericRegex_decimals,
"\\s{0,5}",
"\\;?",
",?",
"\\s{0,5}",
pValueRegex,
"?" # Making p values optional
)
### All values between 0 and 1 with decimals
### This one is for ttest coefficents
detected_ftests <- unlist(stringr::str_extract_all(
input, ftestExtractionRegex
))
# Extracting the value with P
value_with_p <- stringr::str_remove(
detected_ftests,
paste0(
ftestRegex,
# Allowing spaces
"\\s{0,5}",
degreesOfFreedomRegex_decimals,
# Making degrees of freedom optional
"?",
# Allowing spaces
"\\s{0,5}",
ofOrEqualsRegex,
# Allowing spaces or negatives
"\\s{0,5}"))
value <-
stringr::str_remove_all(
stringr::str_extract(
value_with_p,
paste0("^",
"(\\s{0,5}",
"\\-?",
"\\s{0,5}",
numbericRegex_decimals, ")")
), "\\s*")
ps <-
stringr::str_trim(
stringr::str_extract(
detected_ftests,
pValueRegex
))
dfs <-
stringr::str_extract(
detected_ftests,
paste0(
"(?<=",
ftestRegex,
# Allowing spaces
")",
"\\s{0,5}",
degreesOfFreedomRegex_decimals
))
df1 <- stringr::str_extract(dfs, "\\d{1,99}")
df2 <- stringr::str_extract(dfs, "(?<=\\d{1,99}\\s{0,5}[,;\\s{1,5}]\\s{0,5})\\d+")
return(tibble::tibble(statistic = "F",
reported = stringr::str_trim(detected_ftests),
df1 = as.numeric(df1),
df2 = as.numeric(df2),
p = as.character(unlist(ps)),
value = as.numeric(unlist(value))
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.