R/CRtools.R

Defines functions CRmodelSummary.logistic CRprojectBuilder simpleCap pct CRnormalizePath pRound round2

# same as round() function, but rounds .5 up to higher number, rather than rounding even
round2 <- function( vec , digits=0 ){
  vec0 <- vec
  eps <- 10^(-10)
  vec <- abs(vec)
  vec <- vec*10^digits
  vec2 <- vec-floor(vec)
  #    vec <- floor( vec ) + ifelse( vec2 < .5 , 0 , 1 )
  vec <- floor( vec ) + ifelse( ( vec2 - .5 ) < - eps, 0 , 1 )
  vec.round <- sign(vec0) * vec / 10^digits
  return(vec.round)
}
# 2020-04-06 rounds p values below 0.0001 to '<0.0001'; converts to character
# 2021-04-06 updated: to use original method, use deprecated = T
pRound <- function( vec , digits=4 ,deprecated = F){

  if(deprecated %in% T){
    if(is.na(vec)){
      return(NA)
    } else{
      vec0 <- vec
      eps <- 10^(-10)
      vec <- abs(vec)
      vec <- vec*10^digits
      vec2 <- vec-floor(vec)
      #    vec <- floor( vec ) + ifelse( vec2 < .5 , 0 , 1 )
      vec <- floor( vec ) + ifelse( ( vec2 - .5 ) < - eps, 0 , 1 )
      vec.round <- sign(vec0) * vec / 10^digits
      if(vec.round<0.0001){return('<0.0001')} else{return(as.character(vec.round))}
    }
  } else if(deprecated %in% F){
    pRoundFxn <- function(vec,digits=4){
      if (is.na(vec)) {
        return(NA)
      }
      else {
        vec0 <- vec
        eps <- 10^(-10)
        vec <- abs(vec)
        vec <- vec * 10^digits
        vec2 <- vec - floor(vec)
        vec <- floor(vec) + ifelse((vec2 - 0.5) < -eps, 0, 1)
        vec.round <- sign(vec0) * vec/10^digits
        if (vec.round < 1e-04) {
          return("<0.0001")
        }
        else {
          return(as.character(vec.round))
        }
      }
    }
    sapply(vec,pRoundFxn)
  }

}

# 2022-07-06 new approach to normalizing paths by OS
CRnormalizePath <- function(myPath){
  if(Sys.info()['sysname'] %in% "Windows"){
    gsub(x = myPath,pattern = "/",replacement = "\\\\")
  } else {
    gsub(x = myPath,pattern = "\\\\",replacement = "/")
  }
}

# convert number to percent as character variable
# input:
# numbertobeconvertedtocharacter = numeric value
# numberofdecimals = number of decimal places in percentage (default: 0)
# output: character with that number written as a percent
pct <- function(numbertobeconvertedtocharacter, numberofdecimals = 0){
  return(paste0(round2(numbertobeconvertedtocharacter*100,numberofdecimals),"%"))
}

# capitalizes first letters of strings
# 2021-04-06 updated: to use original method, use deprecated = T
simpleCap <- function(x,deprecated = F) {
  if(deprecated %in% T){
    s <- strsplit(x, " ")[[1]]
    paste(toupper(substring(s, 1,1)), substring(s, 2),
          sep="", collapse=" ")
  } else if (deprecated %in% F){
    simpleCapFxn <- function (x) {
      s <- strsplit(x, " ")[[1]]
      paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "",
            collapse = " ")
    }

    sapply(x,simpleCapFxn)
  }
}

# creates subfolder directories and basic files for project
# works in conjunction with custom shortcuts and custom pipe (not included in this R code)
CRprojectBuilder <- function(){

  # identify mac vs windows
  cr.pathSetup <- if(Sys.info()['sysname'] %in% "Windows"){
    "\\"
  } else {
    "/"
  }

  if(!dir.exists(CRnormalizePath("data"))){dir.create(CRnormalizePath("data"))}
  if(!dir.exists(CRnormalizePath("code"))){dir.create(CRnormalizePath("code"))}
  if(!dir.exists(CRnormalizePath("figures"))){dir.create(CRnormalizePath("figures"))}
  if(!dir.exists(CRnormalizePath("output"))){dir.create(CRnormalizePath("output"))}
  if(!dir.exists(CRnormalizePath("docs"))){dir.create(CRnormalizePath("docs"))}

  if(!file.exists(CRnormalizePath("code\\_import.R"))){
    file.create(CRnormalizePath("code\\_import.R"))
    fileConn.import <- file(CRnormalizePath("code\\_import.R"))
    writeLines(c("library(tidyverse)","library(CRtools)","library(cowplot)","library(ggthemes)","library(readxl)","library(scales)","library(skimr)","library(car)","library(ggfortify)","library(lubridate)","library(data.table)","library(kableExtra)","library(tidytable)","",
                 "# import ----",
                 "",
                 "",
                 "",
                 "",
                 "# data cleaning ----",
                 "",
                 "",
                 "",
                 "",
                 "# data filtering ----",
                 "",
                 "",
                 "",
                 "",
                 "# final datasets"
    ),fileConn.import)
    close(fileConn.import)
  }

  if(!file.exists(CRnormalizePath("code\\_libs.R"))){
    file.create(CRnormalizePath("code\\_libs.R"))
    fileConn.libs <- file(CRnormalizePath("code\\_libs.R"))
    writeLines(c("library(tidyverse)","library(CRtools)","library(cowplot)","library(ggthemes)","library(readxl)","library(scales)","library(skimr)","library(car)","library(ggfortify)","library(lubridate)","library(data.table)","library(kableExtra)","library(tidytable)"),fileConn.libs)
    close(fileConn.libs)
  }

  if(!file.exists(CRnormalizePath("code\\_documentation.R"))){
    file.create(CRnormalizePath("code\\_documentation.R"))
    fileConn.documentation <- file(CRnormalizePath("code\\_documentation.R"))
    writeLines(c("dat.doc <- vector('list')","dat.doc[['Notes']] <- 'Enter Notes Here'"),fileConn.documentation)
    close(fileConn.documentation)
  }

  # removing this file since library(renv) can replace it
  # if(!file.exists("code\\_sessionInfo.R")){
  #   file.create("code\\_sessionInfo.R")
  #   fileConn.sessionInfo <- file("code\\_sessionInfo.R")
  #   writeLines(c("# write date, then paste results of sessionInfo()","# assuming packrat and renv are still not working correctly with shortcuts","sessionInfo()"),fileConn.sessionInfo)
  #   close(fileConn.sessionInfo)
  # }

  if(!file.exists(CRnormalizePath("code\\_Analysis Markdown.rmd"))){
    file.create(CRnormalizePath("code\\_Analysis Markdown.rmd"))
    fileConn.analysisMarkdown <- file(CRnormalizePath("code\\_Analysis Markdown.rmd"))
    writeLines(c(
      "---",
      "title: \"Analysis\"",
      "output:",
      "  rmdformats::material:",
      "    highlight: kate",
      "    self_contained: true",
      "    thumbnails: false",
      "    gallery: true",
      "    df_print: kable",
      "pkgdown:",
      "  as_is: true",
      "knit: (function(inputFile, encoding) { rmarkdown::render(inputFile, encoding = encoding, output_file = file.path(dirname(gsub(x = inputFile,pattern = \"/code\",replacement = \"\")), paste0(\"output/Analysis_\",format(Sys.time(), \"%Y-%m-%d-%H-%M-%S\"),\".html\"))) })",
      "---",
      "",
      "```{r setup, echo=F,include=F}",
      "cr.pathSetup <- if(Sys.info()['sysname'] %in% \"Windows\"){",
      "  \"\\\\\"",
      "} else {",
      "  \"/\"",
      "}",
      "setwd(gsub(x = getwd(),pattern = \"/code\",replacement = \"\"))",
      "source(CRnormalizePath(\"code/_functions.R\"))",
      "source(CRnormalizePath(\"code/_import.R\"))",
      "source(CRnormalizePath(\"code/_analysis.R\"))",
      "source(CRnormalizePath(\"code/_figures.R\"))",
      "```",
      "",
      "# Descriptive Statistics",
      "",
      "## Table 1",
      "",
      "```{r table 1, echo = F,fig.width=10}",
      "func.categoryCount(categoryVariableInQuotes = \"cyl\",columnGroupingInQuotes = \"gear\",mydata = mtcars %>% mutate(gear = as.character(gear)),categoryVariableLabel = \"Gear\",formattedOutput = T,rowOrColumnPercent = \"row\") %>%",
        "  kableExtra::kbl(",
          "    booktabs = T,",
          "    align = \"llcccc\"",
        "  ) %>% ",
        "  kable_classic(full_width = T, html_font = \"Arial\") %>% ",
        "  kable_styling(latex_options = c(\"striped\")) %>%",
        "  column_spec(2,italic = T)",
      "```"
    ),fileConn.analysisMarkdown)
    close(fileConn.analysisMarkdown)
  }


  if(!file.exists(CRnormalizePath("code\\_analysis.R"))){
    file.create(CRnormalizePath("code\\_analysis.R"))
  }

  if(!file.exists(CRnormalizePath("code\\_functions.R"))){
    file.create(CRnormalizePath("code\\_functions.R"))
  }

  if(!file.exists(CRnormalizePath("code\\_diagnostics.R"))){
    file.create(CRnormalizePath("code\\_diagnostics.R"))
    fileConn.import <- file(CRnormalizePath("code\\_diagnostics.R"))
    writeLines(c(
      "# Variable check",
      "skim(mtcars) %>% View()",
      "# Checking if different pairs of variables have similar missing values",
      "CRtools::CRnullCorr(mydataset)",
      "# Graphing null values among two variables",
      "CRtools::CRnullGraph(mydata = mydataset,x = \"var1inquotes\",y=\"var2inquotes\")",
      "# which variables predict nulls in an explanatory variable",
      "CRtools::CRnullModel({mydataset %>% select(var1,var2,var3)},\"var1inquotes\")",
      "# table of two variable missingness",
      "CRtools::CRnullTable(mydataset,\"var1inquotes\",\"var2inquotes\")",
      "",
      "# outlier check",
      "CRoutlierCheck(mtcars)",
      "# quick analyses",
      "library(GGally)",
      "iris %>% ggpairs(mapping = aes(color=factor(Species)))",
      "",
      "# quick table one",
      "library(gtsummary)"
    ),fileConn.import)
    close(fileConn.import)
  }


  if(!file.exists(CRnormalizePath("code\\_old code.R"))){
    file.create(CRnormalizePath("code\\_old code.R"))
  }

  if(!file.exists(CRnormalizePath("code\\_figures.R"))){
    file.create(CRnormalizePath("code\\_figures.R"))
  }

  file.edit(CRnormalizePath("code\\_import.R"))
  # file.edit(CRnormalizePath("code\\_libs.R"))
}

# summarise mathematical models made using things like glm() or glmer() for easier understanding
# requires tidyverse (dplyr and tibble specifically)
CRmodelSummary.logistic <- function(myModel,rounding = 4,simpleResultOutput = T){
  myModelSummary <- myModel %>%
    summary %>%
    coefficients %>%
    as.data.frame %>%
    rownames_to_column() %>%
    rename("p value" = `Pr(>|z|)`,
           Coefficient = Estimate) %>%
    mutate_at(vars(Coefficient,`Std. Error`,`z value`),~round(.,rounding))

  myModelOddsRatios <- exp(myModel %>%
                             summary %>%
                             coefficients %>%
                             .[,1]) %>%
    as.data.frame %>%
    rownames_to_column() %>%
    rename("Odds Ratio" = ".") %>%
    mutate("Odds Ratio" = round2(`Odds Ratio`,rounding))

  myModelOddsRatioConfidenceInterval <- exp(confint(myModel)) %>%
    as.data.frame %>%
    rownames_to_column() %>%
    rename("Lower OR Confidence Limit" = "2.5 %") %>%
    rename("Upper OR Confidence Limit" = "97.5 %") %>%
    mutate_at(vars(contains("Confidence Limit")),~round2(.,rounding))

  myModelResults <- left_join(myModelSummary,myModelOddsRatios,by = "rowname") %>%
    left_join(myModelOddsRatioConfidenceInterval, by = "rowname") %>%
    rename(variableName = "rowname") %>%
    rowwise() %>%
    mutate(`p value.numeric` = `p value`) %>%
    mutate(`p value` = ifelse(`p value`<0.0001,"<0.0001",as.character(round2(`p value`,rounding)))) %>%
    ungroup()

  if(simpleResultOutput %in% T){
    myModelResults <- myModelResults %>%
      select(variableName,`p value`,`Odds Ratio`,`Lower OR Confidence Limit`,`Upper OR Confidence Limit`)
  }

  return(myModelResults)
}

# 2020-02-21 epic color palette

epic_color_palette <- c(
  rgb(0,133,242,maxColorValue=255),
  rgb(221,41,157,maxColorValue=255),
  rgb(105,195,0,maxColorValue=255),
  rgb(180,41,204,maxColorValue=255),
  rgb(0,191,212,maxColorValue=255),
  rgb(255,146,0,maxColorValue=255),
  rgb(106,76,224,maxColorValue=255),
  rgb(24,194,149,maxColorValue=255),
  rgb(217,78,111,maxColorValue=255),
  rgb(36,164,238,maxColorValue=255)
)

# CRpaletteLightBackground <- c(
#  "#1192e8",
#  "#6929c4",
#  "#005d5d",
#  "#9f1853",
#  "#fa4d56",
#  "#570408",
#  "#198038",
#  "#002d9c",
#  "#ee538b",
#  "#b28600",
#  "#009d9a",
#  "#012749",
#  "#8a3800",
#  "#a56eff"
# )
CRcolorPalette <- c(
  "#33b1ff",
  "#8a3ffc",
  "#007d79",
  "#ff7eb6",
  "#fa4d56",
  "#fff1f1",
  "#6fdc8c",
  "#4589ff",
  "#d12771",
  "#d2a106",
  "#08bdba",
  "#bae6ff",
  "#ba4e00",
  "#d4bbff"
)
CRcolorPalette2 <- c(
  rgb(239,45,47,maxColorValue = 255),
  rgb(75,139,191,maxColorValue = 255),
  rgb(95,183,92,maxColorValue = 255),
  rgb(161,94,171,maxColorValue = 255),
  rgb(255,140,25,maxColorValue = 255),
  rgb(254,254,71,maxColorValue = 255),
  rgb(175,103,61,maxColorValue = 255),
  rgb(247,150,196,maxColorValue = 255),
  rgb(163,163,163,maxColorValue = 255)
)

# 2020-06-09 adding cut2 from Hmisc so I don't have to load a package that conflicts with dplyr
cut2 <- function (x, cuts, m = 150, g, levels.mean = FALSE, digits, minmax = TRUE,
                  oneval = TRUE, onlycuts = FALSE, formatfun = format, ...)
{
  if (inherits(formatfun, "formula")) {
    if (!requireNamespace("rlang"))
      stop("Package 'rlang' must be installed to use formula notation")
    formatfun <- getFromNamespace("as_function", "rlang")(formatfun)
  }
  method <- 1
  x.unique <- sort(unique(c(x[!is.na(x)], if (!missing(cuts)) cuts)))
  min.dif <- min(diff(x.unique))/2
  min.dif.factor <- 1
  if (missing(digits))
    digits <- if (levels.mean)
      5
  else 3
  format.args <- if (any(c("...", "digits") %in%
                         names(formals(args(formatfun))))) {
    c(digits = digits, list(...))
  }
  else {
    list(...)
  }
  oldopt <- options("digits")
  options(digits = digits)
  on.exit(options(oldopt))
  xlab <- attr(x, "label")
  if (missing(cuts)) {
    nnm <- sum(!is.na(x))
    if (missing(g))
      g <- max(1, floor(nnm/m))
    if (g < 1)
      stop("g must be >=1, m must be positive")
    options(digits = 15)
    n <- table(x)
    xx <- as.double(names(n))
    options(digits = digits)
    cum <- cumsum(n)
    m <- length(xx)
    y <- as.integer(ifelse(is.na(x), NA, 1))
    labs <- character(g)
    cuts <- approx(cum, xx, xout = (1:g) * nnm/g, method = "constant",
                   rule = 2, f = 1)$y
    cuts[length(cuts)] <- max(xx)
    lower <- xx[1]
    upper <- 1e+45
    up <- low <- double(g)
    i <- 0
    for (j in 1:g) {
      cj <- if (method == 1 || j == 1)
        cuts[j]
      else {
        if (i == 0)
          stop("program logic error")
        s <- if (is.na(lower))
          FALSE
        else xx >= lower
        cum.used <- if (all(s))
          0
        else max(cum[!s])
        if (j == m)
          max(xx)
        else if (sum(s) < 2)
          max(xx)
        else approx(cum[s] - cum.used, xx[s], xout = (nnm -
                                                        cum.used)/(g - j + 1), method = "constant",
                    rule = 2, f = 1)$y
      }
      if (cj == upper)
        next
      i <- i + 1
      upper <- cj
      y[x >= (lower - min.dif.factor * min.dif)] <- i
      low[i] <- lower
      lower <- if (j == g)
        upper
      else min(xx[xx > upper])
      if (is.na(lower))
        lower <- upper
      up[i] <- lower
    }
    low <- low[1:i]
    up <- up[1:i]
    variation <- logical(i)
    for (ii in 1:i) {
      r <- range(x[y == ii], na.rm = TRUE)
      variation[ii] <- diff(r) > 0
    }
    if (onlycuts)
      return(unique(c(low, max(xx))))
    flow <- do.call(formatfun, c(list(low), format.args))
    fup <- do.call(formatfun, c(list(up), format.args))
    bb <- c(rep(")", i - 1), "]")
    labs <- ifelse(low == up | (oneval & !variation), flow,
                   paste("[", flow, ",", fup, bb, sep = ""))
    ss <- y == 0 & !is.na(y)
    if (any(ss))
      stop(paste("categorization error in cut2.  Values of x not appearing in any interval:\n",
                 paste(format(x[ss], digits = 12), collapse = " "),
                 "\nLower endpoints:", paste(format(low,
                                                    digits = 12), collapse = " "), "\nUpper endpoints:",
                 paste(format(up, digits = 12), collapse = " ")))
    y <- structure(y, class = "factor", levels = labs)
  }
  else {
    if (minmax) {
      r <- range(x, na.rm = TRUE)
      if (r[1] < cuts[1])
        cuts <- c(r[1], cuts)
      if (r[2] > max(cuts))
        cuts <- c(cuts, r[2])
    }
    l <- length(cuts)
    k2 <- cuts - min.dif
    k2[l] <- cuts[l]
    y <- cut(x, k2)
    if (!levels.mean) {
      brack <- rep(")", l - 1)
      brack[l - 1] <- "]"
      fmt <- do.call(formatfun, c(list(cuts), format.args))
      labs <- paste("[", fmt[1:(l - 1)], ",",
                    fmt[2:l], brack, sep = "")
      if (oneval) {
        nu <- table(cut(x.unique, k2))
        if (length(nu) != length(levels(y)))
          stop("program logic error")
        levels(y) <- ifelse(nu == 1, c(fmt[1:(l - 2)],
                                       fmt[l]), labs)
      }
      else levels(y) <- labs
    }
  }
  if (levels.mean) {
    means <- tapply(x, y, function(w) mean(w, na.rm = TRUE))
    levels(y) <- do.call(formatfun, c(list(means), format.args))
  }
  attr(y, "class") <- "factor"
  if (length(xlab))
    label(y) <- xlab
  y
}


# new function: overwrite libs
CRlibUpdate <- function(){
  # identify mac vs windows
  cr.pathSetup <- if(Sys.info()['sysname'] %in% "Windows"){
    "\\"
  } else {
    "/"
  }

  if(!file.exists(CRnormalizePath("code\\_libsBackup.R"))){
    file.create(CRnormalizePath("code\\_libsBackup.R"))
  }
  fileConn.import <- file(CRnormalizePath("code\\_import.R"))
  fileConn.libs <- file(CRnormalizePath("code\\_libs.R"))
  fileConn.libsBackup <- file(CRnormalizePath("code\\_libsBackup.R"))

  fileLines.import <- fileConn.import %>% readLines
  fileLines.libs <- fileConn.libs %>% readLines
  fileLines.libsBackup <- fileConn.libsBackup %>% readLines

  listOfLibs <- fileLines.import %>%
    as.data.frame() %>%
    filter(grepl(x = .data[["."]],pattern = "^library(.*)")|grepl(x = .data[["."]],pattern = "^options(.*)")) %>%
    pull(.data[["."]])


  contentToWriteToLibsBackup <- c(
    as.character(fileLines.libsBackup),
    "",
    as.character(paste0("# ",Sys.time())),
    as.character(listOfLibs)
  )

  writeLines(
    contentToWriteToLibsBackup,
    fileConn.libsBackup
  )

  writeLines(
    as.character(listOfLibs),
    fileConn.libs
  )

  close(fileConn.import)
  close(fileConn.libs)
  close(fileConn.libsBackup)
}


# backups of original CRtools methods
# round2 <- function( vec , digits=0 ){
#   vec0 <- vec
#   eps <- 10^(-10)
#   vec <- abs(vec)
#   vec <- vec*10^digits
#   vec2 <- vec-floor(vec)
#   #    vec <- floor( vec ) + ifelse( vec2 < .5 , 0 , 1 )
#   vec <- floor( vec ) + ifelse( ( vec2 - .5 ) < - eps, 0 , 1 )
#   vec.round <- sign(vec0) * vec / 10^digits
#   return(vec.round)
# }
# # 2020-04-06 rounds p values below 0.0001 to '<0.0001'; converts to character
# pRound <- function( vec , digits=4 ){
#   if(is.na(vec)){
#     return(NA)
#   } else{
#     vec0 <- vec
#     eps <- 10^(-10)
#     vec <- abs(vec)
#     vec <- vec*10^digits
#     vec2 <- vec-floor(vec)
#     #    vec <- floor( vec ) + ifelse( vec2 < .5 , 0 , 1 )
#     vec <- floor( vec ) + ifelse( ( vec2 - .5 ) < - eps, 0 , 1 )
#     vec.round <- sign(vec0) * vec / 10^digits
#     if(vec.round<0.0001){return('<0.0001')} else{return(as.character(vec.round))}
#   }
# }




# installing packages to use shortcuts
CRshortcutSetupFile <- function(){
  # identify mac vs windows
  cr.pathSetup <- if(Sys.info()['sysname'] %in% "Windows"){
    "\\"
  } else {
    "/"
  }
  if(!file.exists(CRnormalizePath("code\\shortcutSetup.R"))){
    file.create(CRnormalizePath("code\\shortcutSetup.R"))
    fileConn.shortcut <- file(CRnormalizePath("code\\shortcutSetup.R"))

    path_to_open_directory <- ifelse(
      Sys.info()['sysname'] %in% "Windows",
      "options(\"addinexamplesWV.usrFn3\" = function() { rstudioapi::insertText(\"shell.exec(getwd())\") })",
      "options(\"addinexamplesWV.usrFn3\" = function() { rstudioapi::insertText(\"system2(\\\"open\\\",getwd())\") })"
    )

    writeLines(c(

      "install.packages(\"devtools\")",
      "library(devtools)",
      "library(magrittr)",
      "",
      "install_github(\"WinVector/addinexamplesWV\",force=T)",
      "install_github(\"rstudio/addinexamples\",force=T) # necessary to get the %in% shortcut added",
      "",
      "options(\"addinexamplesWV.usrFn1\" = function() { rstudioapi::insertText(\" %xTempVar% \") })",
      "options(\"addinexamplesWV.usrFn2\" = function() { rstudioapi::insertText(\"libs()\") })",
      # "options(\"addinexamplesWV.usrFn3\" = function() { rstudioapi::insertText(\"shell.exec(getwd())\") })",
      path_to_open_directory,
      "",
      "`%xTempVar%` <- function(lhs, rhs) {",
      "xTempVar <<- deparse(substitute(lhs))",
      "eval(substitute(`%>%`(lhs, rhs)))",
      "}",
      "",
      "# Shortcut setup:",
      "# 1) go to Tools > Modify keyboard shortcuts",
      "# 2) search usrFn",
      "# 3) for usrFn1, modify it to ctrl+alt+shift+M (for the new pipe)",
      "# 4) for usrFn2, modify it to ctrl+alt+shift+l (for libraries)",
      "# 5) for usrFn3, modify it to ctrl+alt+shift+; (open directory in explorer)"



    ),fileConn.shortcut)
    close(fileConn.shortcut)

    file.edit("code\\shortcutSetup.R")
  }








}

CRshortcuts <- function(){
  # identify mac vs windows
  cr.pathSetup <- if(Sys.info()['sysname'] %in% "Windows"){
    "\\"
  } else {
    "/"
  }

  install.packages("devtools")
  library(devtools)
  library(magrittr)

  path_to_open_directory <- ifelse(
    Sys.info()['sysname'] %in% "Windows",
    "options(\"addinexamplesWV.usrFn3\" = function() { rstudioapi::insertText(\"shell.exec(getwd())\") })",
    "options(\"addinexamplesWV.usrFn3\" = function() { rstudioapi::insertText(\"system2(\"open\",getwd())\") })"
  )

  # 2022-03-02 moved this function to CRshortcuts.xTempVarSetup
  # # creating modified pipe
  # `%xTempVar%` <- function(lhs, rhs) {
  #   xTempVar <<- deparse(substitute(lhs))
  #   eval(substitute(`%>%`(lhs, rhs)))
  # }

  install_github("WinVector/addinexamplesWV",force=T)
  install_github("rstudio/addinexamples",force=T) # necessary to get the %in% shortcut added

  options("addinexamplesWV.usrFn1" = function() { rstudioapi::insertText(" %xTempVar% ") })
  options("addinexamplesWV.usrFn2" = function() { rstudioapi::insertText("libs()") })
  if(Sys.info()['sysname'] %in% "Windows"){
    options("addinexamplesWV.usrFn3" = function() { rstudioapi::insertText("shell.exec(getwd())") })
  } else{
    options("addinexamplesWV.usrFn3" = function() { rstudioapi::insertText("system2(\"open\",getwd())") })
  }


}
# to back up details on loaded packages and versions if not using renv, use sessionInfo()


# 2022-03-23 do multiple variables have missing values?  if so, are missings correlated?
CRnullCorr <- function(mydata,corrShape = "pie"){
  print("use skim() function to show detailed info on each variable")
  library(corrplot)
  mydata %>%
    mutate_all(~is.na(.)) %>%
    cor %>%
   corrplot(method = corrShape,type = "lower")
}

# 2021-05-04 experimental package to check missing values on variables in a data frame
# use to identify significant associations, the check individual variables with geom_miss_point()


CRnullModel <- function(mydata,variableWithNulls){

  # column index of variable with nulls
  columnIndex <- mydata %>% names %>% as.data.frame %>% rownames_to_column() %>% filter(`.` %in% variableWithNulls) %>% pull(rowname) %>% as.numeric

  columnListWithoutColumnOfInterest <- data.frame(columnIndices = 1:ncol(mydata)) %>% filter(!columnIndices %in% columnIndex)

  myDataForModel <- mydata %>%
    mutate(missingValues  = case_when(
      is.na(.data[[variableWithNulls]]) ~ 1,
      T ~ 0
    ))

  modelResults <- data.frame(
    term = as.character()#,
    # estimate = as.numeric(),
    # `std.error` = as.numeric(),
    # statistic = as.numeric(),
    # p.value = as.numeric()
  )

  for (i in columnListWithoutColumnOfInterest$columnIndices){
    myModel <-
      glm(
        myDataForModel$missingValues ~
          {myDataForModel %>% select(all_of(i)) %>% pull},
        family = "binomial"
      ) %>%
      broom::tidy() %>%
      mutate(term = gsub(
        x = term,
        pattern = "myDataForModel\\[, i\\]",
        replacement = names(myDataForModel)[i]
      )) %>%
      filter(!term %in% "(Intercept)")
    modelResults <- bind_rows(modelResults,myModel)
  }
  modelResults <- modelResults %>%
    mutate(
      significant = case_when(
        p.value<0.05 ~ "*",
        T ~ ""
      ),
      `Odds Ratio (Odds of Missingness, reference category = non-missing)` = round2(exp(estimate),2),
      p.value = pRound(p.value)
    ) %>%
    select(term,`Odds Ratio (Odds of Missingness, reference category = non-missing)`,p.value,significant)

  print(modelResults)
}

CRnullGraph <- function(mydata,x,y){
  library(naniar)
  mydata %>%
    ggplot(aes_string(x = x,y = y))+
    geom_miss_point(size=3)+
    theme_bw()+
    scale_color_viridis_d()
}

CRnullTable <- function(mydata,var1,var2){
  mydata <- mydata %>%
    select(.data[[var1]],.data[[var2]]) %>%
    rename(
      var1 = .data[[var1]],
      var2 = .data[[var2]]
    ) %>%
    mutate(
      var1 =
        case_when(
          is.na(var1) ~ "Missing",
          T ~ "Non-Missing"
        ),
     var2 =
        case_when(
          is.na(var2) ~ "Missing",
          T ~ "Non-Missing"
        )
    ) %>%
    rename(
      !!var1 := var1,
      !!var2 := var2
    )


  print(paste0("Missings in variable: ",var1))
  print(mydata %>% select(.data[[var1]]) %>% table(useNA = 'ifany') %>% as_tibble() %>% pivot_wider(names_from = ".",values_from = "n") %>% mutate(missing_percent = pct(`Missing` / (`Missing` + `Non-Missing`),1)))
  print("--------------------------------")
  print(paste0("Missings in variable: ",var2))
  print(mydata %>% select(.data[[var2]]) %>% table(useNA = 'ifany') %>% as_tibble() %>% pivot_wider(names_from = ".",values_from = "n") %>% mutate(missing_percent = pct(`Missing` / (`Missing` + `Non-Missing`),1)))
  print("--------------------------------")
  print(
    mydata %>%
      table(useNA = 'ifany') %>%
      as_tibble() %>%
      mutate(pct = pct(n/sum(n),1))
  )

  print("--------------------------------")
  print(paste0(
    "Overall agreement on missing vs non missing values: ",
    mydata %>% mutate(agree = as.numeric(.data[[var1]] == .data[[var2]])) %>% summarise(agreement = pct(sum(agree)/nrow(mydata),1)) %>% pull(agreement)
  ))
}


# example of checking data with missing values: the following data replaces some of the vs variable with missings significantly assoc w/ the am variable
# mtcars2 <- mtcars
# mtcars2$vs <-
# c(0,0,1,NA,NA,NA,0,NA,NA,1,NA,0,0,0,NA,NA,NA,1,1,1,1,0,NA,NA,NA,1,0,1,0,0,0,1)
# CRnullHack(mtcars2)
# doublechecking potential problem variables
# library(naniar)
# mtcars2 %>%
#   ggplot(aes(x = vs,y = wt))+
#   geom_miss_point()



# reverse the use of split to combine data frames after using split() and map() functions
# reduce(full_join, by = names(.[[1]]))

# same as round() function, but rounds .5 up to higher number, rather than rounding even
# round2 <- function( vec , digits=0 ){
#   vec0 <- vec
#   eps <- 10^(-10)
#   vec <- abs(vec)
#   vec <- vec*10^digits
#   vec2 <- vec-floor(vec)
#   #    vec <- floor( vec ) + ifelse( vec2 < .5 , 0 , 1 )
#   vec <- floor( vec ) + ifelse( ( vec2 - .5 ) < - eps, 0 , 1 )
#   vec.round <- sign(vec0) * vec / 10^digits
#   return(vec.round)
# }
# 2020-04-06 rounds p values below 0.0001 to '<0.0001'; converts to character
# 2021-04-06 updated: to use original method, use deprecated = T
# pRound <- function( vec , digits=4 ,deprecated = F){
#
#   if(deprecated %in% T){
#     if(is.na(vec)){
#       return(NA)
#     } else{
#       vec0 <- vec
#       eps <- 10^(-10)
#       vec <- abs(vec)
#       vec <- vec*10^digits
#       vec2 <- vec-floor(vec)
#       #    vec <- floor( vec ) + ifelse( vec2 < .5 , 0 , 1 )
#       vec <- floor( vec ) + ifelse( ( vec2 - .5 ) < - eps, 0 , 1 )
#       vec.round <- sign(vec0) * vec / 10^digits
#       if(vec.round<0.0001){return('<0.0001')} else{return(as.character(vec.round))}
#     }
#   } else if(deprecated %in% F){
#     pRoundFxn <- function(vec,digits=4){
#       if (is.na(vec)) {
#         return(NA)
#       }
#       else {
#         vec0 <- vec
#         eps <- 10^(-10)
#         vec <- abs(vec)
#         vec <- vec * 10^digits
#         vec2 <- vec - floor(vec)
#         vec <- floor(vec) + ifelse((vec2 - 0.5) < -eps, 0, 1)
#         vec.round <- sign(vec0) * vec/10^digits
#         if (vec.round < 1e-04) {
#           return("<0.0001")
#         }
#         else {
#           return(as.character(vec.round))
#         }
#       }
#     }
#     sapply(vec,pRoundFxn)
#   }
#
# }

# convert number to percent as character variable
# input:
# numbertobeconvertedtocharacter = numeric value
# numberofdecimals = number of decimal places in percentage (default: 0)
# output: character with that number written as a percent
pct <- function(numbertobeconvertedtocharacter, numberofdecimals = 0){
  return(paste0(round2(numbertobeconvertedtocharacter*100,numberofdecimals),"%"))
}

# capitalizes first letters of strings
# 2021-04-06 updated: to use original method, use deprecated = T
simpleCap <- function(x,deprecated = F) {
  if(deprecated %in% T){
    s <- strsplit(x, " ")[[1]]
    paste(toupper(substring(s, 1,1)), substring(s, 2),
          sep="", collapse=" ")
  } else if (deprecated %in% F){
    simpleCapFxn <- function (x) {
      s <- strsplit(x, " ")[[1]]
      paste(toupper(substring(s, 1, 1)), substring(s, 2), sep = "",
            collapse = " ")
    }

    sapply(x,simpleCapFxn)
  }
}


# summarise mathematical models made using things like glm() or glmer() for easier understanding
# may be better to just use broom::tidy(.,exponentiate = T,conf.int = T)
# requires tidyverse (dplyr and tibble specifically)
CRmodelSummary.logistic <- function(myModel,rounding = 4,simpleResultOutput = T){
  myModelSummary <- myModel %>%
    summary %>%
    coefficients %>%
    as.data.frame %>%
    rownames_to_column() %>%
    rename("p value" = `Pr(>|z|)`,
           Coefficient = Estimate) %>%
    mutate_at(vars(Coefficient,`Std. Error`,`z value`),~round(.,rounding))

  myModelOddsRatios <- exp(myModel %>%
                             summary %>%
                             coefficients %>%
                             .[,1]) %>%
    as.data.frame %>%
    rownames_to_column() %>%
    rename("Odds Ratio" = ".") %>%
    mutate("Odds Ratio" = round2(`Odds Ratio`,rounding))

  myModelOddsRatioConfidenceInterval <- exp(confint(myModel)) %>%
    as.data.frame %>%
    rownames_to_column() %>%
    rename("Lower OR Confidence Limit" = "2.5 %") %>%
    rename("Upper OR Confidence Limit" = "97.5 %") %>%
    mutate_at(vars(contains("Confidence Limit")),~round2(.,rounding))

  myModelResults <- left_join(myModelSummary,myModelOddsRatios,by = "rowname") %>%
    left_join(myModelOddsRatioConfidenceInterval, by = "rowname") %>%
    rename(variableName = "rowname") %>%
    rowwise() %>%
    mutate(`p value.numeric` = `p value`) %>%
    mutate(`p value` = ifelse(`p value`<0.0001,"<0.0001",as.character(round2(`p value`,rounding)))) %>%
    ungroup()

  if(simpleResultOutput %in% T){
    myModelResults <- myModelResults %>%
      select(variableName,`p value`,`Odds Ratio`,`Lower OR Confidence Limit`,`Upper OR Confidence Limit`)
  }

  return(myModelResults)
}

# 2020-02-21 epic color palette

epic_color_palette <- c(
  rgb(0,133,242,maxColorValue=255),
  rgb(221,41,157,maxColorValue=255),
  rgb(105,195,0,maxColorValue=255),
  rgb(180,41,204,maxColorValue=255),
  rgb(0,191,212,maxColorValue=255),
  rgb(255,146,0,maxColorValue=255),
  rgb(106,76,224,maxColorValue=255),
  rgb(24,194,149,maxColorValue=255),
  rgb(217,78,111,maxColorValue=255),
  rgb(36,164,238,maxColorValue=255)
)

# 2020-06-09 adding cut2 from Hmisc so I don't have to load a package that conflicts with dplyr
cut2 <- function (x, cuts, m = 150, g, levels.mean = FALSE, digits, minmax = TRUE,
                  oneval = TRUE, onlycuts = FALSE, formatfun = format, ...)
{
  if (inherits(formatfun, "formula")) {
    if (!requireNamespace("rlang"))
      stop("Package 'rlang' must be installed to use formula notation")
    formatfun <- getFromNamespace("as_function", "rlang")(formatfun)
  }
  method <- 1
  x.unique <- sort(unique(c(x[!is.na(x)], if (!missing(cuts)) cuts)))
  min.dif <- min(diff(x.unique))/2
  min.dif.factor <- 1
  if (missing(digits))
    digits <- if (levels.mean)
      5
  else 3
  format.args <- if (any(c("...", "digits") %in%
                         names(formals(args(formatfun))))) {
    c(digits = digits, list(...))
  }
  else {
    list(...)
  }
  oldopt <- options("digits")
  options(digits = digits)
  on.exit(options(oldopt))
  xlab <- attr(x, "label")
  if (missing(cuts)) {
    nnm <- sum(!is.na(x))
    if (missing(g))
      g <- max(1, floor(nnm/m))
    if (g < 1)
      stop("g must be >=1, m must be positive")
    options(digits = 15)
    n <- table(x)
    xx <- as.double(names(n))
    options(digits = digits)
    cum <- cumsum(n)
    m <- length(xx)
    y <- as.integer(ifelse(is.na(x), NA, 1))
    labs <- character(g)
    cuts <- approx(cum, xx, xout = (1:g) * nnm/g, method = "constant",
                   rule = 2, f = 1)$y
    cuts[length(cuts)] <- max(xx)
    lower <- xx[1]
    upper <- 1e+45
    up <- low <- double(g)
    i <- 0
    for (j in 1:g) {
      cj <- if (method == 1 || j == 1)
        cuts[j]
      else {
        if (i == 0)
          stop("program logic error")
        s <- if (is.na(lower))
          FALSE
        else xx >= lower
        cum.used <- if (all(s))
          0
        else max(cum[!s])
        if (j == m)
          max(xx)
        else if (sum(s) < 2)
          max(xx)
        else approx(cum[s] - cum.used, xx[s], xout = (nnm -
                                                        cum.used)/(g - j + 1), method = "constant",
                    rule = 2, f = 1)$y
      }
      if (cj == upper)
        next
      i <- i + 1
      upper <- cj
      y[x >= (lower - min.dif.factor * min.dif)] <- i
      low[i] <- lower
      lower <- if (j == g)
        upper
      else min(xx[xx > upper])
      if (is.na(lower))
        lower <- upper
      up[i] <- lower
    }
    low <- low[1:i]
    up <- up[1:i]
    variation <- logical(i)
    for (ii in 1:i) {
      r <- range(x[y == ii], na.rm = TRUE)
      variation[ii] <- diff(r) > 0
    }
    if (onlycuts)
      return(unique(c(low, max(xx))))
    flow <- do.call(formatfun, c(list(low), format.args))
    fup <- do.call(formatfun, c(list(up), format.args))
    bb <- c(rep(")", i - 1), "]")
    labs <- ifelse(low == up | (oneval & !variation), flow,
                   paste("[", flow, ",", fup, bb, sep = ""))
    ss <- y == 0 & !is.na(y)
    if (any(ss))
      stop(paste("categorization error in cut2.  Values of x not appearing in any interval:\n",
                 paste(format(x[ss], digits = 12), collapse = " "),
                 "\nLower endpoints:", paste(format(low,
                                                    digits = 12), collapse = " "), "\nUpper endpoints:",
                 paste(format(up, digits = 12), collapse = " ")))
    y <- structure(y, class = "factor", levels = labs)
  }
  else {
    if (minmax) {
      r <- range(x, na.rm = TRUE)
      if (r[1] < cuts[1])
        cuts <- c(r[1], cuts)
      if (r[2] > max(cuts))
        cuts <- c(cuts, r[2])
    }
    l <- length(cuts)
    k2 <- cuts - min.dif
    k2[l] <- cuts[l]
    y <- cut(x, k2)
    if (!levels.mean) {
      brack <- rep(")", l - 1)
      brack[l - 1] <- "]"
      fmt <- do.call(formatfun, c(list(cuts), format.args))
      labs <- paste("[", fmt[1:(l - 1)], ",",
                    fmt[2:l], brack, sep = "")
      if (oneval) {
        nu <- table(cut(x.unique, k2))
        if (length(nu) != length(levels(y)))
          stop("program logic error")
        levels(y) <- ifelse(nu == 1, c(fmt[1:(l - 2)],
                                       fmt[l]), labs)
      }
      else levels(y) <- labs
    }
  }
  if (levels.mean) {
    means <- tapply(x, y, function(w) mean(w, na.rm = TRUE))
    levels(y) <- do.call(formatfun, c(list(means), format.args))
  }
  attr(y, "class") <- "factor"
  if (length(xlab))
    label(y) <- xlab
  y
}


# new function: overwrite libs (possibly duplicated fxn)
CRlibUpdate <- function(){
  # identify mac vs windows
  cr.pathSetup <- if(Sys.info()['sysname'] %in% "Windows"){
    "\\"
  } else {
    "/"
  }

  if(!file.exists(CRnormalizePath("code\\_libsBackup.R"))){
    file.create(CRnormalizePath("code\\_libsBackup.R"))
  }
  fileConn.import <- file(CRnormalizePath("code\\_import.R"))
  fileConn.libs <- file(CRnormalizePath("code\\_libs.R"))
  fileConn.libsBackup <- file(CRnormalizePath("code\\_libsBackup.R"))

  fileLines.import <- fileConn.import %>% readLines
  fileLines.libs <- fileConn.libs %>% readLines
  fileLines.libsBackup <- fileConn.libsBackup %>% readLines

  listOfLibs <- fileLines.import %>%
    as.data.frame() %>%
    filter(grepl(x = .data[["."]],pattern = "^library(.*)")) %>%
    pull(.data[["."]])


  contentToWriteToLibsBackup <- c(
    as.character(fileLines.libsBackup),
    "",
    as.character(paste0("# ",Sys.time())),
    as.character(listOfLibs)
  )

  writeLines(
    contentToWriteToLibsBackup,
    fileConn.libsBackup
  )

  writeLines(
    as.character(listOfLibs),
    fileConn.libs
  )

  close(fileConn.import)
  close(fileConn.libs)
  close(fileConn.libsBackup)
}



# 2021-04-21 glimpse for data.table
glimpse. <- function(data_table){
  dplyr::glimpse(tibble::as_tibble(data_table))
}


# backing up projects
  # 2022-07-06 this is likely deprecated; commenting out - if unneeded then delete
# 2021-05-03 this is to temporarily bypass issue where external libraries can't be added to .renviron for renv
# CRrenv <- function(){
#   install.packages("devtools")
#   library(devtools)
#   install_github("WinVector/addinexamplesWV",force=T)
#   install_github("rstudio/addinexamples",force=T) # necessary to get the %in% shortcut added
#
#   options("addinexamplesWV.usrFn1" = function() { rstudioapi::insertText(" %xTempVar% ") })
#   options("addinexamplesWV.usrFn2" = function() { rstudioapi::insertText("libs()") })
#   options("addinexamplesWV.usrFn3" = function() { rstudioapi::insertText("shell.exec(getwd())") })
#
# }
# to back up details on loaded packages and versions if not using renv, use sessionInfo()


# 2021-05-04 experimental package to check missing values on variables in a data frame
# use to identify significant associations, the check individual variables with geom_miss_point()

# 2022-03-23 removed this function, replaced with others

# example of checking data with missing values: the following data replaces some of the vs variable with missings significantly assoc w/ the am variable
# mtcars2 <- mtcars
# mtcars2$vs <-
# c(0,0,1,NA,NA,NA,0,NA,NA,1,NA,0,0,0,NA,NA,NA,1,1,1,1,0,NA,NA,NA,1,0,1,0,0,0,1)
# doublechecking potential problem variables
# library(naniar)
# mtcars2 %>%
#   ggplot(aes(x = vs,y = wt))+
#   geom_miss_point()



# from rprofile
# 2022-01-28 copied from old RProfile
cx <- function(x,rowNames = F){
  if(.Platform$OS.type == "windows"){
    write.table(x = x,"clipboard",sep = "\t",row.names = rowNames)
  }
  else{
    library(clipr)
    clipr::write_clip(x)
  }
} # this copies data for easy pasting to tables
libs <- function(){source(CRnormalizePath("code\\_libs.R"))}


# figures
gx <- function(myggplot, myDpi=300,figType = 'png', figHeight = 6, figWidth = 8){
  if(figType %in% "tiff"){
    if(.Platform$OS.type == "windows"){
      ggsave(plot = myggplot,filename = paste0("figures\\",gsub(xTempVar,pattern = "\\.",replacement = "-"),"_",gsub(gsub(substr(Sys.time(),start=1,stop=16),pattern = " |-",replacement = "_"),pattern = ":",replacement = ""),".",figType),height = figHeight,width=figWidth,units="in",dpi = myDpi,compression = "lzw")
    } else{
      ggsave(plot = myggplot,filename = paste0("figures/",gsub(xTempVar,pattern = "\\.",replacement = "-"),"_",gsub(gsub(substr(Sys.time(),start=1,stop=16),pattern = " |-",replacement = "_"),pattern = ":",replacement = ""),".",figType),height = figHeight,width=figWidth,units="in",dpi = myDpi,compression = "lzw")
    }

  }else{
    if(.Platform$OS.type == "windows"){
      ggsave(plot = myggplot,filename = paste0("figures\\",gsub(xTempVar,pattern = "\\.",replacement = "-"),"_",gsub(gsub(substr(Sys.time(),start=1,stop=16),pattern = " |-",replacement = "_"),pattern = ":",replacement = ""),".",figType),height = figHeight,width=figWidth,units="in",dpi = myDpi)
    } else{
      ggsave(plot = myggplot,filename = paste0("figures/",gsub(xTempVar,pattern = "\\.",replacement = "-"),"_",gsub(gsub(substr(Sys.time(),start=1,stop=16),pattern = " |-",replacement = "_"),pattern = ":",replacement = ""),".",figType),height = figHeight,width=figWidth,units="in",dpi = myDpi)
    }

  }
}
# tables (write_csv)
dx <- function(myTable){
  if(.Platform$OS.type == "windows"){
    write_csv(x = myTable,file = paste0("output\\",gsub(xTempVar,pattern = "\\.",replacement = "-"),"_",gsub(gsub(substr(Sys.time(),start=1,stop=16),pattern = " |-",replacement = "_"),pattern = ":",replacement = ""),".csv"))
  } else{
    write_csv(x = myTable,file = paste0("output/",gsub(xTempVar,pattern = "\\.",replacement = "-"),"_",gsub(gsub(substr(Sys.time(),start=1,stop=16),pattern = " |-",replacement = "_"),pattern = ":",replacement = ""),".csv"))
  }

}

# %PIPES % #############################

# this is experimental code to allow use of variable names create as text in pipes.
# this creates a GLOBAL variable called xTempVar using the input variable.  <<- does this.
# using addinexamplsWV involved github package 'WinVector/addinexamplesWV'
# for new RStudio installs, you may need to load library, then:
# 1) go to Tools > Addins > Browse Addins
# 2) under the 'Name' column, select usrFn1 then click Execute
# -do the same for usrFn2 to let libs work
# also, to get shortcuts working:
# 1) go to Tools > Modify keyboard shortcuts
# 2) search usrFn
# 3) for usrFn1, modify it to ctrl+alt+shift+M (for the new pipe)
# 4) for usrFn2, modify it to ctrl+alt+shift+l (for libraries)
# 5) for usrFn3, modify it to ctrl+alt+shift+; (open directory in explorer)

# 2022-01-28 commented out for version to be used on cluster
# options("addinexamplesWV.usrFn1" = function() { rstudioapi::insertText(" %xTempVar% ") })
# options("addinexamplesWV.usrFn2" = function() { rstudioapi::insertText("libs()") })
# options("addinexamplesWV.usrFn3" = function() { rstudioapi::insertText("shell.exec(getwd())") })




v <- function(mydata){View(mydata)}
vx <- function(mydata){View(mydata,title = xTempVar)}


# 2022-03-24 testing more efficient approach for descriptive table results
func.nPctWithinDplyr <-
  function(variable,value){
    paste0(sum(variable %in% value)," (",pct(sum(variable %in% value)/n(),1),")")
  }

func.meanSD <- function(myVariableNotInQuotes,round=2){
  paste0(
    round2(mean(myVariableNotInQuotes,na.rm=TRUE),round),
    " (",
    round2(sd(myVariableNotInQuotes,na.rm=TRUE),round),
    ")"
  )
}

func.medianIqr <- function(myVariableNotInQuotes,round=2){
  paste0(
    round2(median(myVariableNotInQuotes,na.rm=TRUE),round),
    " (",
    round2(quantile(myVariableNotInQuotes,0.25,na.rm=TRUE),round),
    ", ",
    round2(quantile(myVariableNotInQuotes,0.75,na.rm=TRUE),round),
    ")"
  )
}

# 2022-09-19 alternative versions of meanSD and medianIQR to be used in conjunction with func.categoryCount_gt
func.meanSD_gt <- function(variableOfInterestInQuotes,variableLabel,columnGroupingInQuotes,myData,tTest = F){

  if(is.null(variableLabel)){
    variableLabel <- variableOfInterestInQuotes
  }

  myOutput <- myData %>%
    transmute(
      columnGroupingVariable = as.character(.data[[columnGroupingInQuotes]]),
      measure = .data[[variableOfInterestInQuotes]]
    ) %>%
    bind_rows(
      myData %>%
        transmute(columnGroupingVariable = "Overall",
                  measure = .data[[variableOfInterestInQuotes]])
    ) %>%
      # select(columnGroupingVariable,measure)


    group_by(columnGroupingVariable) %>%
    summarise(
      Metric = func.meanSD(measure)
    ) %>%
    pivot_wider(names_from = columnGroupingVariable,values_from = Metric) %>%
    mutate(Metric = variableLabel) %>%
    relocate(Metric,.before = everything())

  if(tTest %in% T){
    myOutput <-
      myOutput %>%
      mutate(
        "P-Value" = {t.test(
          variableOfInterestInQuotes ~ columnGroupingInQuotes,
          data = {
            myData %>%
              rename(columnGroupingInQuotes = .data[[columnGroupingInQuotes]]) %>%
              rename(variableOfInterestInQuotes = .data[[variableOfInterestInQuotes]])
        }) %>%
          .$p.value %>%
          pRound(.)}
      )
  }

  return(myOutput)
}
# func.meanSD_gt(
#   myData = mtcars,
#   variableOfInterest = "wt",
#   variableLabel = "Weight, mean (SD)",
#   columnGroupingInQuotes = "vs"
# )

func.medianIqr_gt <- function(variableOfInterestInQuotes,variableLabel,columnGroupingInQuotes,myData){

  if(is.null(variableLabel)){
    variableLabel <- variableOfInterestInQuotes
  }

  myData %>%
    transmute(
      columnGroupingVariable = as.character(.data[[columnGroupingInQuotes]]),
      measure = .data[[variableOfInterestInQuotes]]
    ) %>%
    bind_rows(
      myData %>%
        transmute(columnGroupingVariable = "Overall",
                  measure = .data[[variableOfInterestInQuotes]])
    ) %>%
    # select(columnGroupingVariable,measure)


    group_by(columnGroupingVariable) %>%
    summarise(
      Metric = func.medianIqr(measure)
    ) %>%
    pivot_wider(names_from = columnGroupingVariable,values_from = Metric) %>%
    mutate(Metric = variableLabel) %>%
    relocate(Metric,.before = everything())
}
# func.medianIqr_gt(
#   myData = mtcars,
#   variableOfInterest = "wt",
#   variableLabel = "Weight, median (IQR)",
#   columnGroupingInQuotes = "vs"
# )

# 2022-09-21 simplified chi square test
func.chiSquareTest <- function(mydata,variableOfInterestInQuotes,interventionVariableInQuotes){
  mydata %>%
    rename(myVar = .data[[variableOfInterestInQuotes]]) %>%
    rename(intervention = .data[[interventionVariableInQuotes]]) %>%
    select(myVar,intervention) %>%
    table %>%
    prop.test %>%
    .$p.value %>%
    pRound()
}


# category count function optimized for gt package
func.categoryCount_gt <- function(
    mydata,
    categoryVariableInQuotes,
    columnGroupingInQuotes = NULL,
    categoryVariableLabel = NULL,
    formattedOutput = T,
    rowOrColumnPercent = "row",
    chiSquareTest = F
){

  # creating label for Metric column
  if(is.null(categoryVariableLabel)){
    categoryLabel <- categoryVariableInQuotes
  } else {
    categoryLabel <- categoryVariableLabel
  }

  # fixing factor issue
  if(is.factor(mydata %>% pull(.data[[categoryVariableInQuotes]]))){
    factorLevels <- levels(mydata %>% pull(.data[[categoryVariableInQuotes]]))
    mydata <- mydata %>%
      mutate(!!categoryVariableInQuotes := as.character(.data[[categoryVariableInQuotes]]))
  }

  if(is.null(columnGroupingInQuotes)){
    myOutput <-
      mydata %>%
      # mutate(!!categoryVariableInQuotes := as.character(.data[[categoryVariableInQuotes]])) %>%
      group_by(.data[[categoryVariableInQuotes]]) %>%
      summarise(Overall=n()) %>%
      mutate(Percent = pct(Overall/sum(Overall),2)) %>%
      add_column(Metric = categoryLabel,.before = 1) %>%
      rename("Category" = .data[[categoryVariableInQuotes]])
  } else {

    myOutput <-
      bind_rows(
        mydata,
        mydata %>% mutate(!!columnGroupingInQuotes := "Overall")
      ) %>%
      group_by(.data[[categoryVariableInQuotes]],.data[[columnGroupingInQuotes]]) %>%
      summarise(n=n())

    if(formattedOutput == T){

      if(tolower(rowOrColumnPercent) %in% "row"){
        myOutput <- myOutput %>%
          ungroup() %>%
          group_by(.data[[categoryVariableInQuotes]]) %>%
          mutate(n2 = case_when(
            !.data[[columnGroupingInQuotes]] %in% "Overall" ~ paste0(n," (",pct(n/sum(n[!.data[[columnGroupingInQuotes]] %in% "Overall"]),2),")"),
            T ~ as.character(n)
          )) %>%
          select(-n) %>%
          rename(n = n2) %>%
          ungroup()
      }else if(tolower(rowOrColumnPercent) %in% "column"){
        myOutput <- myOutput %>%
          ungroup() %>%
          group_by(.data[[columnGroupingInQuotes]]) %>%
          mutate(n = paste0(n," (",pct(n/sum(n),2),")")) %>%
          ungroup()
      }else {
        print("error: must use \"row\" or \"column\"")
      }

      myOutput <- myOutput %>%
        group_by(.data[[categoryVariableInQuotes]],.data[[columnGroupingInQuotes]]) %>%
        pivot_wider(names_from = .data[[columnGroupingInQuotes]],values_from = n) %>%
        ungroup() %>%
        add_column(Metric = categoryLabel,.before = 1) %>%
        mutate_at(vars(-Metric,-.data[[categoryVariableInQuotes]]),~ifelse(is.na(.) & !is.na(.data[[categoryVariableInQuotes]]),0,.)) %>%
        mutate_all(~ifelse(is.na(.),"",.)) %>%
        rename("Category" = .data[[categoryVariableInQuotes]])

      if(exists("factorLevels")){
        myOutput <- myOutput %>%
          mutate(Category = factor(
            Category,
            levels = c("",factorLevels),
            ordered = T
          )) %>%
          arrange(Category) %>%
          mutate(Category = factor(Category,ordered = F))
      }

      if(chiSquareTest %in% T){
        p.value <- func.chiSquareTest(
          mydata = mydata,
          variableOfInterestInQuotes = categoryVariableInQuotes,
          interventionVariableInQuotes = columnGroupingInQuotes
        )

        myOutput <-
          myOutput %>%
          mutate("P-Value" = c(p.value,rep("",nrow(myOutput)-1)))
      }

      return(myOutput)
    }else{
      return(myOutput)
    }

  }

}

func.categoryCount <- function(
  mydata,
  categoryVariableInQuotes,
  columnGroupingInQuotes = NULL,
  categoryVariableLabel = NULL,
  formattedOutput = T,
  rowOrColumnPercent = "row"
){

  # creating label for Metric column
  if(is.null(categoryVariableLabel)){
    categoryLabel <- categoryVariableInQuotes
  } else {
    categoryLabel <- categoryVariableLabel
  }

  # fixing factor issue
  if(is.factor(mydata %>% pull(.data[[categoryVariableInQuotes]]))){
    factorLevels <- levels(mydata %>% pull(.data[[categoryVariableInQuotes]]))
    mydata <- mydata %>%
      mutate(!!categoryVariableInQuotes := as.character(.data[[categoryVariableInQuotes]]))
  }

  if(is.null(columnGroupingInQuotes)){
    myOutput <-
      mydata %>%
        # mutate(!!categoryVariableInQuotes := as.character(.data[[categoryVariableInQuotes]])) %>%
        group_by(.data[[categoryVariableInQuotes]]) %>%
        summarise(Overall=n()) %>%
        mutate(Percent = pct(Overall/sum(Overall),2)) %>%
        add_column(Metric = NA,.before = 1) %>%
        add_row(Metric = categoryVariableLabel,.before = 1) %>%
        mutate_at(vars(Metric,.data[[categoryVariableInQuotes]]),~ifelse(is.na(.),"",.)) %>%
        rename("Category" = .data[[categoryVariableInQuotes]])
  } else {

    myOutput <-
      bind_rows(
        mydata,
        mydata %>% mutate(!!columnGroupingInQuotes := "Overall")
      ) %>%
        group_by(.data[[categoryVariableInQuotes]],.data[[columnGroupingInQuotes]]) %>%
        summarise(n=n())

    if(formattedOutput == T){

      if(tolower(rowOrColumnPercent) %in% "row"){
        myOutput <- myOutput %>%
          ungroup() %>%
          group_by(.data[[categoryVariableInQuotes]]) %>%
          mutate(n2 = case_when(
            !.data[[columnGroupingInQuotes]] %in% "Overall" ~ paste0(n," (",pct(n/sum(n[!.data[[columnGroupingInQuotes]] %in% "Overall"]),2),")"),
              T ~ as.character(n)
            )) %>%
          select(-n) %>%
          rename(n = n2) %>%
          ungroup()
      }else if(tolower(rowOrColumnPercent) %in% "column"){
        myOutput <- myOutput %>%
          ungroup() %>%
          group_by(.data[[columnGroupingInQuotes]]) %>%
          mutate(n = paste0(n," (",pct(n/sum(n),2),")")) %>%
          ungroup()
      }else {
        print("error: must use \"row\" or \"column\"")
      }

      myOutput <- myOutput %>%
        group_by(.data[[categoryVariableInQuotes]],.data[[columnGroupingInQuotes]]) %>%
        pivot_wider(names_from = .data[[columnGroupingInQuotes]],values_from = n) %>%
        ungroup() %>%
        add_column(Metric = NA,.before = 1) %>%
        add_row(Metric = categoryLabel,.before = 1) %>%
        mutate_at(vars(-Metric,-.data[[categoryVariableInQuotes]]),~ifelse(is.na(.) & !is.na(.data[[categoryVariableInQuotes]]),0,.)) %>%
        mutate_all(~ifelse(is.na(.),"",.)) %>%
        rename("Category" = .data[[categoryVariableInQuotes]])

      if(exists("factorLevels")){
        myOutput <- myOutput %>%
          mutate(Category = factor(
            Category,
            levels = c("",factorLevels),
            ordered = T
          )) %>%
          arrange(Category) %>%
          mutate(Category = factor(Category,ordered = F))
      }

      return(myOutput)
    }else{
      return(myOutput)
    }

  }

}

# example for func.categoryCount():
# func.categoryCount(categoryVariableInQuotes = "cyl",columnGroupingInQuotes = "gear",mydata = mtcars %>% mutate(gear = as.character(gear)),categoryVariableLabel = "Cylinders",formattedOutput = T,rowOrColumnPercent = "row")

# 2022-05-06 NHAMCS estimator
func.nhamcsEstimator <- function(
  mydata = dat.nh,
  mysurveydata = dat.nh.survey,
  groupingVariablesVectorInQuotes = c("YEAR","SEX","MSA")
){

  library(srvyr)
  options(survey.lonely.psu="adjust")

  for(i in 1:length(groupingVariablesVectorInQuotes)){
    mydata <- mydata %>%
      group_by(.data[[groupingVariablesVectorInQuotes[i]]],.add = T)

    mysurveydata <- mysurveydata %>%
      group_by(.data[[groupingVariablesVectorInQuotes[i]]],.add = T)
  }

  unreliableUnweightedRows <- mydata %>%
    summarise(
      n = n()
    ) %>%
    mutate(UNWEIGHTED = ifelse(n<30,"UNRELIABLE",""))

  unreliableWeightedRows <- mysurveydata %>%
    summarise(
      n_weighted = survey_total(vartype = c("se","ci"))
    ) %>%
    mutate(se_prop = n_weighted_se/n_weighted) %>%
    mutate(WEIGHTED = ifelse(se_prop>=0.30,"UNRELIABLE",""))

  allUnreliableRows <- full_join(
    unreliableWeightedRows,
    unreliableUnweightedRows,
    by = groupingVariablesVectorInQuotes
  ) %>%
    mutate(UNRELIABLE = ifelse(WEIGHTED %in% "UNRELIABLE" | UNWEIGHTED %in% "UNRELIABLE","UNRELIABLE","")) %>%
    relocate(WEIGHTED,.after = "UNWEIGHTED") %>%
    rename(n_weighted_lowerCI = n_weighted_low,
           n_weighted_upperCI = n_weighted_upp)

  # print("Unreliable Estimates Reported With Asterisks")
  return(allUnreliableRows)

}

func.nhamcsEstimator_experimental <- function(
    mydata = dat.nh,
    mysurveydata = dat.nh.survey,
    # note: always make sure 'year' or 'yearbin' is first
    groupingVariablesVectorInQuotes = c("YEAR","SEX","MSA"),
    wide_or_long = "long",
    # note: measureLabel only works for one secondary variable (besides year)
    measureLabel = NA,
    convertToPercent = F,
    yearbins = F
){

  # if(nrow(mydata)!=nrow(mysurveydata)){
  #   print("ERROR: MISMATCH BETWEEN REGULAR DATASET AND SURVEY DATASET SIZES")
  # } else{
  library(srvyr)
  options(survey.lonely.psu="adjust")

  for(i in 1:length(groupingVariablesVectorInQuotes)){
    mydata <- mydata %>%
      group_by(.data[[groupingVariablesVectorInQuotes[i]]],.add = T)

    mysurveydata <- mysurveydata %>%
      group_by(.data[[groupingVariablesVectorInQuotes[i]]],.add = T)
  }

  unreliableUnweightedRows <- mydata %>%
    summarise(
      n = n()
    ) %>%
    mutate(UNWEIGHTED = ifelse(n<30,"UNRELIABLE",""))

  unreliableWeightedRows <- mysurveydata %>%
    summarise(
      n_weighted = survey_total(vartype = c("se","ci"))
    ) %>%
    mutate(se_prop = n_weighted_se/n_weighted) %>%
    mutate(WEIGHTED = ifelse(se_prop>=0.30,"UNRELIABLE",""))

  allUnreliableRows <- full_join(
    unreliableWeightedRows,
    unreliableUnweightedRows,
    by = groupingVariablesVectorInQuotes
  ) %>%
    mutate(UNRELIABLE = ifelse(WEIGHTED %in% "UNRELIABLE" | UNWEIGHTED %in% "UNRELIABLE","UNRELIABLE","")) %>%
    relocate(WEIGHTED,.after = "UNWEIGHTED") %>%
    rename(n_weighted_lowerCI = n_weighted_low,
           n_weighted_upperCI = n_weighted_upp)



  # conversion to percent
  if(convertToPercent %in% T){
    allUnreliableRows <-
      allUnreliableRows %>%
      group_by(.data[[names(allUnreliableRows[1])]]) %>%
      mutate(n_weighted = pct(n_weighted/sum(n_weighted,na.rm=T),2)) %>%
      ungroup()
  }


  # print("Unreliable Estimates Reported With Asterisks")
  if(wide_or_long %in% "long"){
    return(allUnreliableRows)
  }

  else if(wide_or_long %in% "wide"){

    nhamcsEstimate <- allUnreliableRows
    # function(nhamcsEstimate,measureLabel = NA,pct = F,yearbins = F){
    nhamcsEstimate <-
      nhamcsEstimate %>%
      ungroup()

    if(convertToPercent %in% F){
      nhamcsEstimate <-
        nhamcsEstimate %>%
        mutate(across(c(n_weighted,n_weighted_se,n_weighted_lowerCI,n_weighted_upperCI,se_prop),function(x){round2(x,0)}))
    } else if(convertToPercent %in% T){
      nhamcsEstimate <-
        nhamcsEstimate %>%
        mutate(across(c(n_weighted_se,n_weighted_lowerCI,n_weighted_upperCI,se_prop),function(x){round2(x,0)}))
    }

    if(yearbins == F){
      nhamcsEstimate <-
        nhamcsEstimate %>%
        mutate(across(c(n_weighted:n),function(x){ifelse(!UNRELIABLE %in% "",paste0(x," (unreliable)"),as.character(x))})) %>%
        select(YEAR:n_weighted) %>%
        pivot_wider(names_from = YEAR,values_from = n_weighted)


    } else if(yearbins == T){
      nhamcsEstimate <-
        nhamcsEstimate %>%
        mutate(across(c(n_weighted:n),function(x){ifelse(!UNRELIABLE %in% "",paste0(x," (unreliable)"),as.character(x))})) %>%
        select(cr.yearBins:n_weighted) %>%
        pivot_wider(names_from = cr.yearBins,values_from = n_weighted)
    }
    minimumYearGroup <- nhamcsEstimate %>% names %>% sort %>% .[1]


    if(names(nhamcsEstimate)[1] == minimumYearGroup){
      return(nhamcsEstimate)
    } else{
      namesList <- nhamcsEstimate %>% select(1:all_of(minimumYearGroup))
      namesList <- namesList %>% select(-length(namesList)) %>% names

      if(length(namesList) >1){
        for(i in 1:length(namesList)){
          nhamcsEstimate <-
            nhamcsEstimate %>%
            mutate(across(i,function(x){paste0(namesList[i],": ",x)})) %>%
            rowwise() %>%
            mutate("Measure Categories" = paste0(across(namesList),collapse = ", "))
        }

        nhamcsEstimate <-
          nhamcsEstimate %>%
          ungroup() %>%
          select(-namesList) %>%
          relocate("Measure Categories",.before = everything())
      } else {
        if (is.na(namesList[1])){
          measureLabel <- namesList[1]
        }
        nhamcsEstimate <-
          bind_rows(
            data.frame("Measure Name" = as.character(measureLabel),check.names = F),
            nhamcsEstimate
          ) %>%
          rename("Measure Categories" = namesList[1]) %>%
          relocate(`Measure Categories`, .after = `Measure Name`)
      }


      return(nhamcsEstimate)
      # }

    }
  }
}
# }


# 2022-06-13 outlier detection
CRoutlierCheck <- function(mydata,printWholeRow = F){
  for(i in 1:ncol(mydata)){


    if(!length(which(mydata[,i]  %in% boxplot.stats(mydata[,i])$out)) == 0){
      # cat(paste0("Variable: ",paste0(names(mydata %>% select(i)),collapse = ", ","\n")))

      if(printWholeRow %in% T){
        print(
          mydata %>%
            mutate(row_number = 1:n()) %>%
            relocate(row_number,.before = everything()) %>%
            slice(which(mydata[,i]  %in% boxplot.stats(mydata[,i])$out))
        )

        cat('\n')
      } else{
        # print(
        #   mydata %>%
        #     mutate(row_number = 1:n()) %>%
        #     relocate(row_number,.before = everything()) %>%
        #     select(row_number,all_of(i+1)) %>%
        #     slice(which(mydata[,i]  %in% boxplot.stats(mydata[,i])$out))
        # )

        cat('\n')
      }

    }


  }
}


# reference code:

# not in a function for now, but code can be copied -
CRdesplit <- function(){print(
  "reduce(full_join, by = names(.[[1]]))"
)}
# reverse the use of split to combine data frames after using split() and map() functions
# reduce(full_join, by = names(.[[1]]))


CRgt_code <- function(){
  mtcars %>%
    rownames_to_column() %>%
    arrange(factor(cyl)) %>%
    # adding a currency column
    mutate(currency = 10000:(10000+n()-1)) %>%
    group_by(cyl) %>%
    gt(rowname_col = "rowname") %>%
    # title/ subtitle
    tab_header(
      title = md("Mtcars Heading Here"),
      subtitle = "Mtcars Subheading Here"
    ) %>%
    # heading over specific column/columns
    tab_spanner(
      label = "MPG, Disp, and HP",
      columns = c(mpg,disp,hp)
    ) %>%
    # align rowname
    tab_style(
      style = list(
        cell_text(align = "left",indent = pct(2.5))
      ),
      locations = cells_stub(rows = TRUE)
    ) %>%
    # align columns
    cols_align(
      align = "center",
      columns = c(mpg:carb)
    ) %>%
    # add footnote (footnote style 1)
    tab_source_note(
      source_note = md(
        "Source: Base R"
      )
    ) %>%
    # add footnotes (NUMBERED footnotes linking to column or cell)
    tab_footnote(
      footnote = md("this is the first numbered footnote, pointing to a cell"),
      # cells_body indicates a cell rather than column name
      locations = cells_body(
        columns = am,
        rows = "Datsun 710"
      )
    ) %>%
    tab_footnote(
      footnote = md("second numbered footnote, pointing to a column"),
      # cells_column_labels indicates a column name rather than cell
      locations = cells_column_labels(
        columns = vs
      )
    ) %>%
    # bold headers
    tab_style(
      style = list(
        cell_text(weight = "bold")
      ),
      locations = cells_column_labels(columns = c(mpg:currency))
    ) %>%
    tab_style(
      style = list(
        cell_text(weight = "bold")
      ),
      locations = cells_column_labels(columns = c(mpg:currency))
    ) %>%
    # color cell column
    data_color(
      columns = mpg,
      colors = scales::col_numeric(
        palette = c("red", "orange", "green", "blue"),
        domain = c(min(mpg),max(mpg))
      )
    ) %>%
    # color row
    tab_style(
      style = list(
        cell_fill(color = "#D9654B")
      ),
      locations = cells_body(
        # columns = c(mpg:currency), # not needed if coloring all columns
        rows = 3)
    ) %>%
    # converting a column to currency
    fmt_currency(
      columns = currency,
      currency = "USD",
      decimals = 0
    )
}
craigrothenberg/CRtools documentation built on Nov. 23, 2022, 11:58 p.m.