#' Cross Reference Tables for Missing Keys
#'
#' @param data
#' @param data_reference
#' @param id_variable
#' @param select_columns
#' @param missing_variable
#' @param reference_variable
#' @param resolve_variable
#' @param reference_resolve_variable
#' @param return_message
#'
#' @return
#' @export
#'
#' @examples
fix_entity_reference_variable <-
function(data,
data_reference = NULL,
id_variable = NULL,
select_columns = NULL,
missing_variable = NULL,
reference_variable = NULL,
resolve_variable = NULL,
reference_resolve_variable = NULL,
return_message = T) {
if (length(id_variable) == 0) {
"Enter id" %>% message()
return(data)
}
if (length(data_reference) == 0) {
"Enter reference data" %>% message()
return(data)
}
if (length(select_columns) > 0) {
data <- data %>% select(one_of(select_columns))
}
if (length(missing_variable) == 0) {
"Enter missing variable" %>% message()
return(data)
}
if (length(resolve_variable) == 0) {
"Enter resolve variable" %>% message()
return(data)
}
if (length(reference_variable) == 0) {
"Enter resolve variable" %>% message()
return(data)
}
if (length(reference_resolve_variable) == 0) {
"Enter refernce resolve variable" %>% message()
return(data)
}
if (return_message) {
glue::glue(
"Appending missing {missing_variable} using {resolve_variable} from reference using {reference_variable} using {reference_resolve_variable}"
) %>% message()
}
tbl_matches <-
data %>%
filter(is.na(!!sym(missing_variable))) %>%
filter(!is.na(!!sym(resolve_variable))) %>%
janitor::remove_empty(which = "cols")
if (nrow(tbl_matches) == 0) {
return(data)
}
tbl_reference_matches <-
data_reference %>%
filter(!is.na(!!sym(reference_resolve_variable))) %>%
filter(!!sym(reference_resolve_variable) %in% (tbl_matches %>% select(!!sym(resolve_variable)) %>% pull() %>% unique())) %>%
group_by(!!sym(reference_resolve_variable)) %>%
slice(1) %>%
ungroup()
if (nrow(tbl_reference_matches) == 0) {
return(data)
}
tbl_ids <-
tbl_reference_matches %>%
select(reference_variable, reference_resolve_variable) %>%
rename(
UQ(missing_variable) := reference_variable,
UQ(resolve_variable) := reference_resolve_variable
) %>%
left_join(tbl_matches %>% select(one_of(id_variable, resolve_variable)), by = resolve_variable) %>%
arrange(!!sym(id_variable))
if (nrow(tbl_ids) == 0) {
return(data)
}
data <-
data %>%
filter(!!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())) %>%
select(-one_of(missing_variable)) %>%
left_join(tbl_ids) %>%
select(one_of(names(data))) %>%
bind_rows(data %>%
filter(!(
!!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())
))) %>%
arrange(!!sym(id_variable))
data
}
#' Fix Missing Variables from Entities
#'
#' @param data
#' @param id_variable
#' @param select_columns
#' @param missing_variable
#' @param resolve_variable
#'
#' @return
#' @export
#'
#' @examples
fix_entity_variable <-
function(data,
id_variable = NULL,
select_columns = NULL,
missing_variable = NULL,
resolve_variable = NULL,
return_message = T) {
if (length(id_variable) == 0) {
"Enter id" %>% message()
return(data)
}
if (length(select_columns) == 0) {
"Enter select columns" %>% message()
return(data)
}
if (length(missing_variable) == 0) {
"Enter missing variable" %>% message()
return(data)
}
if (length(resolve_variable) == 0) {
"Enter resolve variable" %>% message()
return(data)
}
if (return_message) {
glue("Appending missing {missing_variable} using {resolve_variable}") %>% message()
}
tbl_matches <-
data %>%
filter(is.na(!!sym(missing_variable))) %>%
filter(!is.na(!!sym(resolve_variable))) %>%
select(one_of(select_columns)) %>%
janitor::remove_empty(which = "cols")
tbl_resolve_count <-
data %>%
filter(!!sym(resolve_variable) %in%
(tbl_matches %>% select(!!sym(resolve_variable)) %>% pull())) %>%
filter(!is.na(!!sym(resolve_variable))) %>%
count(!!sym(resolve_variable), sort = T, name = "count")
tbl_matches <- tbl_matches %>%
left_join(tbl_resolve_count, by = resolve_variable) %>%
filter(count > 1)
if (nrow(tbl_matches) == 0) {
return(data)
}
tbl_new <-
data %>%
filter(!!sym(resolve_variable) %in%
(tbl_matches %>% select(!!sym(resolve_variable)) %>% pull())) %>%
select(one_of(c(missing_variable, resolve_variable))) %>%
filter(!is.na(!!sym(missing_variable))) %>%
arrange(!!sym(resolve_variable)) %>%
group_by(!!sym(resolve_variable)) %>%
slice(1) %>%
ungroup()
if (nrow(tbl_new) == 0) {
return(data)
}
tbl_ids <-
tbl_matches %>%
select(one_of(id_variable, resolve_variable)) %>%
left_join(tbl_new, by = resolve_variable) %>%
filter(!is.na(!!sym(missing_variable)))
if (nrow(tbl_ids) == 0) {
return(data)
}
data <-
data %>%
filter(!!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())) %>%
select(-one_of(missing_variable)) %>%
left_join(tbl_ids) %>%
select(one_of(names(data))) %>%
bind_rows(data %>%
filter(!(
!!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())
))) %>%
arrange(!!sym(id_variable))
data
}
#' Turn off Rstudio Viewer
#'
#' @return
#' @export
#'
#' @examples
disable_rstudio_viewer <-
function() {
"Disabling rstudio viewer" %>% message()
options(viewer = NULL)
return(invisible())
}
#' Assign a set of names to a tibble
#'
#' Requires a `feature` and `actual` column or
#' a `tibble` with 2 columns and the `actual` as the second colun
#'
#' @param data
#' @param dictionary_names
#'
#' @return
#' @export
#'
#' @examples
#' library(tibble)
#' dict_iris <- tibble(feature = names(iris), actual = c("SL", "SW", "PL", "PW", "Name of Species"))
#'
#' tbl_assign_dictionary_names(iris, dict_iris)
#' tbl_assign_dictionary_names(iris, dict_iris, snake_names = T)
#'
tbl_assign_dictionary_names <-
function(data, dictionary_names = NULL,
snake_names = F) {
if (length(dictionary_names) == 0) {
"No name dictionary" %>% message()
return(data)
}
data_names <- names(data)
feature_col <- names(dictionary_names)[[1]]
actual_col <- names(dictionary_names)[[2]]
actual_names <-
data_names %>%
map_chr(function(x){
df_row <- dictionary_names %>%
filter(!!sym(feature_col) == x)
if (nrow(df_row) == 0) {
glue("Missing {x}") %>% message()
return(x)
}
df_row[,2] %>% pull()
})
data <-
data %>%
setNames(actual_names) %>%
as_tibble()
if (snake_names) {
data <- data %>%
janitor::clean_names()
}
data
}
#' Print a message using cat
#'
#' @param text vector text
#'
#' @return invisible
#' @export
#' @import glue purrr
#'
#' @examples
#' cat_message(text = "Hello World")
cat_message <-
function(text = NULL) {
if (length(text) == 0) {
return(invisible())
}
text <- glue("\n\n{text}\n\n") %>% as.character()
cat(text, fill = T)
}
#' Installed Packages
#'
#' @return
#' @export
#'
#' @examples
tbl_installed_packages <-
function() {
data <- installed.packages() %>% as_tibble() %>% janitor::clean_names()
all_data <- tibble()
data$package %>%
walk(function(x){
x %>% message()
d <- packageDescription(x) %>% flatten_df() %>% janitor::clean_names()
all_data <<- all_data %>% bind_rows(d)
})
all_data
}
#' Remove an Item or Folder
#'
#' @param path
#' @param recursive
#' @param force
#' @param return_message
#'
#' @return
#' @export
#'
#' @examples
delete_item <-
function(path = NULL,
recursive = T,
force = T,
return_message = T) {
oldwd <- getwd()
setwd("~")
if (length(path) == 0) {
return(invisible())
}
if (return_message) {
glue("Removing {path}") %>% message()
}
unlink(x = path,
recursive = recursive,
force = T)
if (getwd() != oldwd) {
setwd(oldwd)
}
return(invisible())
}
# r_packages --------------------------------------------------------------
#' Tibble of CRAN Packages
#'
#' @param normalize_text if `TRUE` normalizes title descriptoon to upper
#'
#' @return
#' @export
#'
#' @examples
#' library(asbtools)
#' library(tidyverse)
#'
#' cran <- tbl_cran_packages()
#'
#' cran %>% sheldon::regex_keyword_match(text_columns = "title", keywords = "markdown", id_columns = "package")
#' cran %>% sheldon::kwic_keyword_match(text_columns = "title", id_columns = "package")
#'
tbl_cran_packages <- function(normalize_text = T) {
page <- "https://cran.r-project.org/web/packages/available_packages_by_date.html" %>%
rvest::read_html()
data <-
page %>% rvest::html_table(header = T) %>% .[[1]] %>% janitor::clean_names() %>%
mutate(date = lubridate::ymd(date))
if (normalize_text) {
data <- data %>%
mutate(title = str_to_upper(title))
}
data <-
data %>%
mutate(
url_cran = glue::glue(
"https://cran.r-project.org/web/packages/{package}/index.html"
) %>% as.character(),
year_released = date %>% lubridate::year() %>% as.numeric(),
month_released = date %>% lubridate::month(label = T)
) %>%
select(year_released, month_released, everything())
data
}
# edit --------------------------------------------------------------------
#' Edit Data
#'
#' @param data data frame
#' @param file_path if not `NULL` a filepath to save the file
#' @param folder folder to save the file
#' @param file_name file name
#'
#' @return
#' @export
#'
#' @examples
#' tbl_edit(data = iris, file_path = "Desktop/test", file_name = "iris")
#' tbl_edit(data = iris, file_path = "Desktop", file_name = "iris")
tbl_edit <-
function(data,
override_common_group = F,
file_path = NULL, folder = NULL, file_name = NULL) {
if (override_common_group & data %>% hasName("group")) {
data %>%
mutate(group = case_when(
group == "common" ~ "",
TRUE ~ group
))
}
data <- edit(data)
data <- as_tibble(data)
if (length(file_path) > 0) {
"Saving data" %>% message()
oldwd <- getwd()
setwd("~")
if (length(file_name) == 0) {
file_name <- "data"
}
pq_write(data = data, file_path = file_path, folder = folder, file_name = file_name)
if (getwd() != oldwd) {
setwd(oldwd)
}
}
data
}
#' Data Editor
#'
#' Use data editor to edit data
#'
#' @param data
#' @param override_common_group
#' @param file_path
#' @param folder
#' @param file_name
#'
#' @return
#' @export
#'
#' @examples
#' tbl_data_edit(data = iris, file_path = "Desktop/test", file_name = "iris")
#' tbl_data_edit(data = mtcars, file_path = "Desktop/test", file_name = "mtcars")
tbl_data_edit <-
function(data,
override_common_group = F,
file_path = NULL, folder = NULL, file_name = NULL) {
if (override_common_group & data %>% hasName("group")) {
data %>%
mutate(group = case_when(
group == "common" ~ "",
TRUE ~ group
))
}
data <- DataEditR::data_edit(data)
data <- as_tibble(data)
if (length(file_path) > 0) {
"Saving data" %>% message()
oldwd <- getwd()
setwd("~")
if (length(file_name) == 0) {
file_name <- "data"
}
pq_write(data = data, file_path = file_path, folder = folder, file_name = file_name)
if (getwd() != oldwd) {
setwd(oldwd)
}
}
data
}
#' Update Python Packages
#'
#' @param is_python_3
#'
#' @return
#' @export
#'
#' @examples
update_python_packages <-
function(is_python_3 = TRUE) {
if (is_python_3) {
system("pip3 freeze --local | grep -v '^\\-e' | cut -d = -f 1 | xargs -n1 pip3 install -U")
} else {
system("pip2 freeze --local | grep -v '^\\-e' | cut -d = -f 1 | xargs -n1 pip2 install -U")
}
}
#' Reset Graphics
#'
#' @return
#' @export
#'
#' @examples
reset_graphics <-
function(){
message("Reseting graphics")
dev.off()
par(mar=c(1,1,1,1))
return(invisible())
}
#' Brew everything
#'
#' @return
#' @export
#'
#' @examples
brew_everything <- function() {
system("brew upgrade")
system("brew update")
system("brew cleanup")
gc(verbose = T, reset = T, full = T)
}
# other -------------------------------------------------------------------
#' Shuffle Data
#'
#' @param data
#' @param seed
#'
#' @return
#' @export
#'
#' @examples
tbl_shuffle <-
function(data, seed = NULL) {
if (length(seed) > 0) {
set.seed(seed)
}
rows <- sample(nrow(data))
data <- data[rows,] |> as_tibble()
data
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.