R/mccrr.R

Defines functions load_data_flatfile save_data_flatfile coinflips palprint copy_data paste_data down_name tidy_name diagnostics correlate is_numeric is_extant `%notin%` regress dossier vlookup

Documented in coinflips copy_data correlate diagnostics dossier is_extant is_numeric palprint paste_data regress vlookup

#' Vlookup a value
#'
#' from Jenny Bryan @JennyBryan
#' https://twitter.com/JennyBryan/status/980978609794895872
#'
#' @param this a list of values
#' @param data a dataset to match it to
#' @param key the variable in the data to match on
#' @param value  the value applied to "this"
#'
#' @export
vlookup<-function(this,data,key,value){
  m<-match(this, data[[key]])
  data[[value]][m]
}

#' Print a Dossier
#'
#' Takes an input of a specific id for a specific observation and returns the name and value for each variable in the dataset for that single observation. A different way of examining what your dataset looks like on a micro level.
#'
#' @param df a dataframe
#' @param id the id variable of your data frame
#' @param value the value of your individual dossier (must be in quotations)
#'
#' @export
dossier<-function(df, id, value, ...){
  id <- substitute(id)
  t(filter(df, !!id  == value))
}

#' Regress
#' 
#' Combines several of my common regression formula choices with my preferred output format (using sjPlot's tab_model() ). Be sure that when using the cluster parameter that you pipe the variable in using the . pronoun e.g. ".$classroom" to cluster at the classroom level. 
#'
#' @param df a dataframe
#' @param formula a regression formula
#' @param clusters variables for clustered standard erorrs (default is NULL)
#' @param robust whether to use robust standard errors (default is TRUE)
#' @param logistic whether to run logistic regression (default is FALSE; defaults to binomial)
#' 
#' @export 
regress <- function(dat,
                    formula,
                    clusters = NULL,
                    robust = TRUE,
                    logistic = FALSE) {
  if (logistic == FALSE) {
    if (robust) {
      mod <- estimatr::lm_robust(formula, clusters = clusters, data = dat) 
    } else {
      mod <- lm(formula, data = dat) 
    }
  } else if (logistic) {
    if (robust) {
      mod <- robust::glmRob(formula, data = dat, family = binomial(), method = "cubif")
    } else {
      mod <- glm(formula, data = dat, family = "binomial") 
    }
  }
  sjPlot::tab_model(mod)
}







#' Not In Pipe
#'
#" @export
`%notin%` <- function(x, y) {
  !(x %in% y)
}



#' Check if a variable has data extant
#'
#' Sometimes for longitudinal data, variables exist but have not been entered yet. To ensure forward compatibility, this function checks to see if any data have yet been entered for a given variable.
#'
#' works best with "select_if()" as in:
#' data %>% select_if(is_extant)
#'
#'
#' @param x a data vector
#' @return a logical: TRUE or FALSE
#'
#' @export
is_extant <-function(x) any(!is.na(x))


#' Check if a variable is Numeric
#'
#' Sometimes you have an unknown number of variables being piped into a function that only takes numeric vectors. This function exists to select only those which will work in the final function.
#'
#' works best with "select_if()" as in:
#' data %>% select_if(is_numeric)
#'
#'
#' @param x a data vector
#' @return a logical: TRUE or FALSE
#'
#' @export
is_numeric<-function(x) any(is.numeric(x))




#' Correlation
#'
#' A fixed correlation function; it should operate in a pipe and also defaults to
#' pairwise complete observations being true. Additional arguments to base `cor()`
#' are also available.
#'
#' @param df A data frame
#' @param x The x variable
#' @param y The y variable
#'
#' @export
correlate<-function(df, x, y, ...){
  x <- substitute(x)
  y <- substitute(y)
  cx<- df %>% select(!!x)
  cy<- df %>% select(!!y)
  return(cor(cx, cy, use = "pairwise.complete.obs", ...))
}



#' Model Diagnostics
#'
#' Of unknown provenance--been in my files for years now.
#' Returns a model sumamry plus three graphs of model fit statistics/diagnostics
#' in base R.
#'
#' @param model the linear model
#'
#' @export
diagnostics<-function(model){
  #run model and print specific output
  model1<-model
  stats<-round(c(summary(model1)$fstatistic[c(1,3)],
                 summary(model1)$sigma,
                 summary(model1)$r.squared,
                 summary(model1)$adj.r.squared),3)
  names(stats)<-c("F","DF", "Sigma","Rsq","AdjRsq")
  l1<-list(round(summary(model1)$coefficients,3), stats)
  names(l1)<-c("Coefficients","Stats")
  print(l1)

  #run specific diagnostic tests
  par(mfrow=c(1,3))
  hist(model1$residuals, main="Histogram of residuals", xlab="")
  plot(model1, 1)
  plot(model1, 2)
}


# joke package name generator from Yihui Xie @xieyihui
tidy_name <- function(x) {
  x = tolower(substr(abbreviate(x), 1, 4))
  paste(c(x, rep('r', 5 - nchar(x))), collapse = '')
}


down_name <- function(x) tolower(paste0(gsub('\\s+', '', x), 'down'))

#' Paste data from an Excel Spreadsheet
#'
#' from https://www.r-bloggers.com/copying-data-from-excel-to-r-and-back/
#' adapted for tibbles
#'
#' @return a tibble of the pasted data
#'
#' @export
paste_data <- function(header=TRUE,...) {
  x<-read.table("clipboard",sep="\t",header=header,stringsAsFactors=TRUE,...)
  tibble::as_tibble(x)
}

# paste_data <- function (...) {
#     readr::read_tsv(readr::clipboard(), ...)
#  }


#' copy a dataframe to paste outward
#'
#' does not currently work
copy_data <- function(x,row.names=FALSE,col.names=TRUE,...) {
  write.table(x,"clipboard",sep="\t",row.names=row.names,col.names=col.names,...)
}



#' Print a palette of colors
#'
#' Maybe stolen from Karthik Ram?
#'
#' @param name a color palette
#' @param n how many colors to select
#' @param type whether to print the colors as discrete or continuous
#'
#' @export
palprint<- function(name, n, type = c("discrete", "continuous")) {
  type <- match.arg(type)

  pal <- name
  if (is.null(pal))
    stop("Palette not found.")

  if (missing(n)) {
    n <- length(pal)
  }

  if (type == "discrete" && n > length(pal)) {
    stop("Number of requested colors greater than what palette can offer")
  }

  out <- switch(type,
                continuous = grDevices::colorRampPalette(pal)(n),
                discrete = pal[1:n]
  )
  structure(out, class = "palette", name = "")
}



#' Flip a coin a few times
#'
#' from drob's twitter
#' https://twitter.com/drob/status/1008409373423611904
#'
#' @param n how many trials to run
#' @param m how many coinflips per trial
#'
#' @export
coinflips<-function(n = 10000, m = 100){
  tidyr::crossing(trial = 1:n,
           flip = 1:m) %>%
    dplyr::mutate(heads = rbinom(n(),1,0.5)) %>%
    dplyr::group_by(trial) %>%
    dplyr::mutate(next_flip = lead(heads),
           hh = heads & next_flip,
           ht = heads & !next_flip) %>%
    dplyr::summarise(first_hh = which(hh)[1] + 1,
              first_ht = which(ht)[1] + 1) %>%
    dplyr::summarise(first_hh = mean(first_hh),
              first_ht = mean(first_ht))
}

#------------------------------------------------------------
# for persistent data files via shiny app:
save_data_flatfile <-function(data) {
  data <-t(data)
  file_name <- paste0(paste(get_time_human(), digest(data,
                                                     algo = "md5"), sep = "_"), ".csv")
  write.csv(x = data, file = file.path(results_dir, file_name),
            row.names = FALSE, quote = FALSE)
}

load_data_flatfile <- function(){
  files <-list.files(file.path(results_dir), full.names = TRUE)
  data >= lapply(files, read.csv, stringsAsFactors = FALSE) %>%
    do.call(rbind, .)
  data
}

#' Plot Factors by Frequency
#'
#' https://stackoverflow.com/questions/46862482/plot-a-descending-frequency-bar-chart-using-a-custom-function-with-ggplot2-dply
#'
#' @param data a data frame
#' @param group a variable of factors
#' @param n how many to print into the ggplot graph
#'
#' @return a ggplot graph object
#'
#' @export
plot_freq <- function(data, group,  n=10){
  group <- enquo(group)
  data %>%
    count(!!group) %>%
    dplyr::top_n(n) %>%
    dplyr::mutate(group := fct_reorder(!!group, n)) %>%
    ggplot2::ggplot(., aes_(y=quo(n))) +
    ggplot2::geom_bar(aes(group),stat = "identity") +
    ggplot2::coord_flip()
}

#' Solve a Quadratic Equation
#'
#' Just another practice function.
#' returns roots in complex notation regardless of root form
#'
#' @param a the first coefficient in standard form
#' @param b the second coefficient
#' @param c the third coefficient
#'
#' @return two roots either real or imaginary. They will be in complex form notation. Roots are real if the imaginary term is 0i. If one term is 0 + 0i then there is only one root.
#'
#' @export
solve_quadratic<-function(a,b,c){
  det<-sqrt(as.complex(b^2-4*a*c))
  numerator<-(-b+det)
  denom<-(2*a)
  pos <- numerator/denom
  numerator<-(-b-det)
  neg <- numerator/denom
  result<-c(pos,neg)
  return(result)
}

#' view correctly
#'
#' @export
view <- function(...){
  View(...)
}

#' Wrap text in ggplot2
#'
#' taken from twitter @geokaramanis but he attributes it to an anon stack overflow postl
#'
#' example: subtitle = wrapper("Really long text...", width = 80)
#'
#' @param x some text for a ggplot label.
#'
#' @return wrapped text for a ggplot label.
#'
#' @export
wrapper <- function(x, ...){
  paste(strwrap(x, ...), collapse = "\n")
}

e <- function(){
  fact<-function(n){
    if (n == 0){
      return(1)
    } else {
      return(n*fact(n - 1))
    }
  }
  calc_e<-function(i){
    if (i == 0) {
      return(1)
    } else {
      return( 1/ fact(i) + calc_e(i - 1))
    }
  }
  return(calc_e(50))
}



#' Description of All variables in a Dataset
#'
#' cleans the output of `psych::describe()` and passes it into
#' a `gt::gt()` object to make an HTML output
#'
#' @param data your data
#' @param group optional: your grouping variable
#'
#' @return an HTML object in the viewer pane (or in a shiny app or markdown document)
#'
#' @export
description<-function(data, group = NULL, fast = TRUE, ...) {
  grp<-paste0(deparse(substitute(group)))
  #print(grp)


  if(is.null(group)) {
    data %>%
      psych::describe(fast = fast, ...) %>%
      tibble::rownames_to_column() %>%
      dplyr::select(-c(vars)) %>%
      dplyr::mutate(dplyr::across(is.numeric, round, 2)) %>%
      gt::gt() %>%
      gt::tab_options(
        column_labels.font.size = "small",
        table.font.size = "small",
        row_group.font.size = "small",
        data_row.padding = px(3)
      ) %>%
      gt::tab_header(
        title = paste0("Data Description")
      )
  } else {
    data %>%
      select_if(is.numeric) %>%
      psych::describeBy(group = group, fast = fast, mat= TRUE, ...) %>%
      tibble::rownames_to_column() %>%
      dplyr::select(-c(item, vars)) %>%
      dplyr::mutate(dplyr::across(is.numeric, round, 2)) %>%
      dplyr::arrange(group1) %>%
      dplyr::group_by(group1) %>%
      gt::gt() %>%
      gt::tab_options(
        column_labels.font.size = "small",
        table.font.size = "small",
        row_group.font.size = "small",
        data_row.padding = px(3))
  } %>%
    gt::tab_header(
      title = paste0("Data Description") ,
      subtitle = paste0("Grouped by: ",  grp )
    )
}
McCartneyAC/mccrr documentation built on March 24, 2024, 5:12 p.m.