R/final_digit.R
In reappraised: Statistical Tools for Assessing Publication Integrity of Groups of Trials

#' Compares proportions of final digits from summary statistics
#'
#' Creates graph of proportion of final digits for summary statistics of specified variables\cr
#'
#' This approach is still in development and needs validation and discussion about its place in integrity assessment.\cr
#'
#' Requires data frame containing columns for study, variable (named var), summary statistic(s) (named with single letter eg m or s),
#'  and optional columns for decimal places for each statistic (named dp_* eg dp_m, dp_s). Data can be imported using the generic option of load_clean function\cr
#'
#'
#' Returns a list containing 5 objects and prints the plot digit_graph
#'
#' @param df data frame generated from load_clean function
#' @param vars vector of the summary statistics to be used
#' @param dec.pl "yes" or "no" indicator whether columns for decimal places are included (yes) or should be calculated (no)
#' @param dec.pl.vars vector of the names of the columns for decimal places for each statistics
#' @param title title name for plots (optional)
#' @param verbose TRUE or FALSE indicates whether print plot
#'
#'
#' @return list containing 5 objects as described
#'
#'\itemize{
#'   \item digit_graph = plot of proportions of final digits
#'   \item digit_ft = flextable of results
#'   \item digit_table = data frame of results
#'   \item digit_dataset = data frame of data set used to generate results data
#'   \item digit_data = results of analyses used to generate results data
#'   }
#'
#'
#' @import ggplot2
#' @importFrom magrittr %>%
#' @importFrom dplyr mutate select arrange full_join rename
#' @importFrom purrr map_df
#' @importFrom flextable align border_remove hline_bottom font fontsize set_table_properties bold
#' @export final_digit_fn
#'
#'
#' @examples
#' # load example data
#' generic_data <- load_clean(import= "no", file.cont = "SI_pvals_cont", generic= "yes",
#' gen.vars.del = c("p"), format.cont = "wide")$generic_data
#'
#' \donttest{
#' # run function (takes only a few seconds)
#' final_digit_fn(vars = c("m","s"), dec.pl = "n")$digit_graph
#'
#' # to import an excel spreadsheet (modify using local path,
#' # file and sheet name, range, and format):
#'
#' # get path for example files
#' path <- system.file("extdata", "reappraised_examples.xlsx", package = "reappraised",
#'                     mustWork = TRUE)
#' # delete file name from path
#' path <- sub("/[^/]+$", "", path)
#'
#' # load data
#' generic_data <- load_clean(import= "yes", generic = "yes", dir = path,
#'     file.name.cont = "reappraised_examples.xlsx", sheet.name.cont = "SI_pvals_cont",
#'     range.name.cont = "A1:O51", gen.vars.del = c("p"),
#'     format.cont = "wide")$generic_data}
#' @md


#function for distribution of final digits ---------------------------------
final_digit_fn <- function (df= generic_data, vars = "", dec.pl = "no", dec.pl.vars ="", title="", verbose = TRUE) {

  a <- df

  #install_libs ()

  if(substr(tolower(dec.pl),1,1) == "y") {
    if (!all.equal(dec.pl.vars, paste0("dp_", vars)) | sum (dec.pl.vars %in% colnames(a)) != length(dec.pl.vars)) {
      stop_mb("The supplied dec.pl.vars does not match variable vars. Format should be 'dp_m', 'dp_s' etc in same order")}
    }

  names <- colnames(a) [substr(colnames(a),1,1) %in% vars & !colnames(a) %in% c("study", "var")]

  #select variables
  if(substr(tolower(dec.pl),1,1) == "y") {a <- a %>% dplyr::select(study, var, all_of(names), all_of(dec.pl.vars))
  } else {a <- a %>% dplyr::select(study, var, all_of(names))}

  if(substr(tolower(dec.pl),1,1) == "n") {dec.pl.vars =""}

  names <- setdiff(colnames (a), c("study", "var", dec.pl.vars))
  prefix <- gsub("\\d", "", names)

  #if no decimal places, calculate them
  if (substr(tolower(dec.pl),1,1) != "y") {

    #add digits
    dec <- function (x) {
      ifelse(!is.na(x),
             ifelse(abs(x - round(x)) > .Machine$double.eps^0.5,
                    nchar(sub('^\\d+\\.', '', sub('0+$', '', as.character(abs(x))))), 0)
             , NA)}

    dp <- purrr::map_df(a[, names], dec) %>% as.data.frame()
    a [, paste0("dp_",vars)] <- NA

    for (i in 1:length(vars)) {
      a [ , paste0("dp_",vars [i])] <- apply(dp [, substr(colnames(dp),1,1) == vars [i]], 1,
                                             function(x) ifelse(!all(is.na(x)), max(x, na.rm=TRUE), NA))}
  }

  #convert to long and check final digits match decimal places
  a [, vars] <- a [, paste0("dp_",vars)]

  rm(dp, i, dec)

  b <- a %>% dplyr::select (- all_of(paste0("dp_",vars))) %>%  gather(k, val, -study, -var, -all_of(vars), na.rm = TRUE) %>%
    dplyr::mutate(k1 = gsub("\\d", "", k))
  b$match = match(b$k1, names(b))
  b$dp <- as.numeric(b [cbind(seq_along(b$match), b$match)])
  b$digits = ifelse(b$dp == 0, b$val, sub(".*\\.","", b$val))
  b$digits0 <- ifelse(b$dp ==0, b$digits, ifelse(nchar(b$digits) != b$dp, paste0(b$digits,"0"), b$digits))
  b$final <- as.numeric(substr(b$digits0, nchar(b$digits0), nchar(b$digits0)))
  b$diff <- ifelse(b$dp ==0, "", ifelse(nchar(b$digits)!= b$dp, "Zero", ""))

  p <- as.data.frame(table (b$final))
  p$n <- as.numeric(as.character(p$Var1))
  p <- dplyr::full_join(p, data.frame (n = 0:9), by = "n") %>% dplyr::arrange(n) %>% replace(is.na(.), 0) %>% as.data.frame()
  p$Var1 <- factor(p$n)

  #graph for calculated p-values
  labels <- list(paste(length(unique(b$study)), "trials"),
                 paste(length(b$val), "values"))

  p.g <- digit_g (x = p, l= labels, t= title)

  pval <- p.g [[2]]
  p.g <- p.g [[1]]

  #create a flextable
  p$Percent = round(p$Freq/sum(p$Freq)*100,1)
  colnames(p) [2:3] <- c("Number", "Final digit")

  p1 <- p [, c(3,2,4)]

  #make flextables
  f <- flextable::flextable(p1)
  f <- flextable::font(f, fontname = "Arial")
  f <- flextable::fontsize(f, size = 8)
  f <- flextable::fontsize(f, part = "header", size = 10)
  f <- flextable::align(f, align = "center", part = "all")
  f <- flextable::bold(f, j=1, part= "body")
  f <- flextable::bold(f, part= "header")
  f <- flextable::border_remove(f)
  f <- flextable::hline_bottom(f, border= fp_border(color="black", width = 2), part = "header")
  f <- flextable::set_table_properties(f, width = 1, layout = "autofit")

  b <-  b %>% dplyr::rename(vars_grp = k, final_digit = final) %>% dplyr::select("study", "var", "vars_grp", "val", "dp", "final_digit")

  results <- list( digit_graph= p.g, digit_ft = f, digit_table = p1, digit_dataset = a, digit_data = b)

  if (isTRUE(verbose) | tolower(substr(verbose,1,1)) == "t") {print (p.g)}

  return (results)

}

Any scripts or data that you put into this service are public.

reappraised documentation built on Oct. 6, 2023, 9:08 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

reappraised
Statistical Tools for Assessing Publication Integrity of Groups of Trials

R/final_digit.R
In reappraised: Statistical Tools for Assessing Publication Integrity of Groups of Trials

Try the reappraised package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

reappraised Statistical Tools for Assessing Publication Integrity of Groups of Trials

R/final_digit.R In reappraised: Statistical Tools for Assessing Publication Integrity of Groups of Trials

Try the reappraised package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

reappraised
Statistical Tools for Assessing Publication Integrity of Groups of Trials

R/final_digit.R
In reappraised: Statistical Tools for Assessing Publication Integrity of Groups of Trials