R/utils.R

Defines functions tbl_shuffle brew_everything reset_graphics update_python_packages tbl_data_edit tbl_edit tbl_cran_packages delete_item tbl_installed_packages cat_message tbl_assign_dictionary_names disable_rstudio_viewer fix_entity_variable fix_entity_reference_variable

Documented in brew_everything cat_message delete_item disable_rstudio_viewer fix_entity_reference_variable fix_entity_variable reset_graphics tbl_assign_dictionary_names tbl_cran_packages tbl_data_edit tbl_edit tbl_installed_packages tbl_shuffle update_python_packages

#' Cross Reference Tables for Missing Keys
#'
#' @param data
#' @param data_reference
#' @param id_variable
#' @param select_columns
#' @param missing_variable
#' @param reference_variable
#' @param resolve_variable
#' @param reference_resolve_variable
#' @param return_message
#'
#' @return
#' @export
#'
#' @examples
fix_entity_reference_variable <-
  function(data,
           data_reference = NULL,
           id_variable = NULL,
           select_columns =  NULL,
           missing_variable = NULL,
           reference_variable = NULL,
           resolve_variable = NULL,
           reference_resolve_variable = NULL,
           return_message = T) {
    if (length(id_variable) == 0) {
      "Enter id" %>% message()
      return(data)
    }


    if (length(data_reference) == 0) {
      "Enter reference data" %>% message()
      return(data)
    }

    if (length(select_columns) > 0) {
      data <- data %>% select(one_of(select_columns))
    }

    if (length(missing_variable) == 0) {
      "Enter missing variable" %>% message()
      return(data)
    }

    if (length(resolve_variable) == 0) {
      "Enter resolve variable" %>% message()
      return(data)
    }

    if (length(reference_variable) == 0) {
      "Enter resolve variable" %>% message()
      return(data)
    }
    if (length(reference_resolve_variable) == 0) {
      "Enter refernce resolve variable" %>% message()
      return(data)
    }

    if (return_message) {
      glue::glue(
        "Appending missing {missing_variable} using {resolve_variable} from reference using {reference_variable} using {reference_resolve_variable}"
      ) %>% message()
    }

    tbl_matches <-
      data %>%
      filter(is.na(!!sym(missing_variable))) %>%
      filter(!is.na(!!sym(resolve_variable))) %>%
      janitor::remove_empty(which = "cols")

    if (nrow(tbl_matches) == 0) {
      return(data)
    }

    tbl_reference_matches <-
      data_reference %>%
      filter(!is.na(!!sym(reference_resolve_variable))) %>%
      filter(!!sym(reference_resolve_variable) %in% (tbl_matches %>% select(!!sym(resolve_variable)) %>% pull() %>% unique())) %>%
      group_by(!!sym(reference_resolve_variable)) %>%
      slice(1) %>%
      ungroup()

    if (nrow(tbl_reference_matches) == 0) {
      return(data)
    }

    tbl_ids <-
      tbl_reference_matches %>%
      select(reference_variable, reference_resolve_variable) %>%
      rename(
        UQ(missing_variable) := reference_variable,
        UQ(resolve_variable) := reference_resolve_variable
      ) %>%
      left_join(tbl_matches %>% select(one_of(id_variable, resolve_variable)), by = resolve_variable) %>%
      arrange(!!sym(id_variable))


    if (nrow(tbl_ids) == 0) {
      return(data)
    }



    data <-
      data %>%
      filter(!!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())) %>%
      select(-one_of(missing_variable)) %>%
      left_join(tbl_ids) %>%
      select(one_of(names(data))) %>%
      bind_rows(data %>%
                  filter(!(
                    !!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())
                  ))) %>%
      arrange(!!sym(id_variable))

    data

  }

#' Fix Missing Variables from Entities
#'
#' @param data
#' @param id_variable
#' @param select_columns
#' @param missing_variable
#' @param resolve_variable
#'
#' @return
#' @export
#'
#' @examples
fix_entity_variable <-
  function(data,
           id_variable = NULL,
           select_columns =  NULL,
           missing_variable = NULL,
           resolve_variable = NULL,
           return_message = T) {
    if (length(id_variable) == 0) {
      "Enter id" %>% message()
      return(data)
    }

    if (length(select_columns) == 0) {
      "Enter select columns" %>% message()
      return(data)
    }

    if (length(missing_variable) == 0) {
      "Enter missing variable" %>% message()
      return(data)
    }

    if (length(resolve_variable) == 0) {
      "Enter resolve variable" %>% message()
      return(data)
    }

    if (return_message) {
      glue("Appending missing {missing_variable} using {resolve_variable}") %>% message()
    }

    tbl_matches <-
      data %>%
      filter(is.na(!!sym(missing_variable))) %>%
      filter(!is.na(!!sym(resolve_variable))) %>%
      select(one_of(select_columns)) %>%
      janitor::remove_empty(which = "cols")

    tbl_resolve_count <-
      data %>%
      filter(!!sym(resolve_variable) %in%
               (tbl_matches %>% select(!!sym(resolve_variable)) %>% pull())) %>%
      filter(!is.na(!!sym(resolve_variable))) %>%
      count(!!sym(resolve_variable), sort = T, name = "count")

    tbl_matches <- tbl_matches %>%
      left_join(tbl_resolve_count, by = resolve_variable) %>%
      filter(count > 1)

    if (nrow(tbl_matches) == 0) {
      return(data)
    }

    tbl_new <-
      data %>%
      filter(!!sym(resolve_variable) %in%
               (tbl_matches %>% select(!!sym(resolve_variable)) %>% pull())) %>%
      select(one_of(c(missing_variable, resolve_variable))) %>%
      filter(!is.na(!!sym(missing_variable))) %>%
      arrange(!!sym(resolve_variable)) %>%
      group_by(!!sym(resolve_variable)) %>%
      slice(1) %>%
      ungroup()

    if (nrow(tbl_new) == 0) {
      return(data)
    }

    tbl_ids <-
      tbl_matches %>%
      select(one_of(id_variable, resolve_variable)) %>%
      left_join(tbl_new, by = resolve_variable) %>%
      filter(!is.na(!!sym(missing_variable)))

    if (nrow(tbl_ids) == 0) {
      return(data)
    }


    data <-
      data %>%
      filter(!!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())) %>%
      select(-one_of(missing_variable)) %>%
      left_join(tbl_ids) %>%
      select(one_of(names(data))) %>%
      bind_rows(data %>%
                  filter(!(
                    !!sym(id_variable) %in% (tbl_ids %>% select(one_of(id_variable)) %>% pull())
                  ))) %>%
      arrange(!!sym(id_variable))

    data

  }

#' Turn off Rstudio Viewer
#'
#' @return
#' @export
#'
#' @examples
disable_rstudio_viewer <-
  function() {
    "Disabling rstudio viewer" %>% message()
    options(viewer = NULL)
    return(invisible())
  }
#' Assign a set of names to a tibble
#'
#' Requires a `feature` and `actual` column or
#' a `tibble` with 2 columns and the `actual` as the second colun
#'
#' @param data
#' @param dictionary_names
#'
#' @return
#' @export
#'
#' @examples
#' library(tibble)
#' dict_iris <- tibble(feature = names(iris), actual = c("SL", "SW", "PL", "PW", "Name of Species"))
#'
#' tbl_assign_dictionary_names(iris, dict_iris)
#' tbl_assign_dictionary_names(iris, dict_iris, snake_names = T)
#'
tbl_assign_dictionary_names <-
  function(data, dictionary_names = NULL,
           snake_names  = F) {

    if (length(dictionary_names) == 0) {
      "No name dictionary" %>% message()
      return(data)
    }

    data_names <- names(data)
    feature_col <- names(dictionary_names)[[1]]
    actual_col <- names(dictionary_names)[[2]]
    actual_names <-
      data_names %>%
      map_chr(function(x){

        df_row <- dictionary_names %>%
          filter(!!sym(feature_col) == x)

        if (nrow(df_row) == 0) {
          glue("Missing {x}") %>% message()
          return(x)
        }

        df_row[,2] %>% pull()


      })

    data <-
      data %>%
      setNames(actual_names) %>%
      as_tibble()

    if (snake_names) {
      data <- data %>%
        janitor::clean_names()
    }

    data
  }
#' Print a message using cat
#'
#' @param text vector text
#'
#' @return invisible
#' @export
#' @import glue purrr
#'
#' @examples
#' cat_message(text = "Hello World")
cat_message <-
  function(text = NULL) {
    if (length(text) == 0) {
      return(invisible())
    }

    text <- glue("\n\n{text}\n\n") %>% as.character()

    cat(text, fill = T)
  }


#' Installed Packages
#'
#' @return
#' @export
#'
#' @examples
tbl_installed_packages <-
  function() {
    data <- installed.packages() %>% as_tibble() %>% janitor::clean_names()
    all_data <- tibble()
    data$package %>%
      walk(function(x){
        x %>% message()
        d <- packageDescription(x) %>% flatten_df() %>% janitor::clean_names()
        all_data <<- all_data %>% bind_rows(d)
      })

    all_data
  }

#' Remove an Item or Folder
#'
#' @param path
#' @param recursive
#' @param force
#' @param return_message
#'
#' @return
#' @export
#'
#' @examples
delete_item <-
  function(path =  NULL,
           recursive = T,
           force = T,
           return_message = T) {
    oldwd <- getwd()
    setwd("~")
    if (length(path) == 0) {
      return(invisible())
    }
    if (return_message) {
      glue("Removing {path}") %>% message()
    }
    unlink(x = path,
           recursive = recursive,
           force = T)

    if (getwd() != oldwd) {
      setwd(oldwd)
    }
    return(invisible())
  }



# r_packages --------------------------------------------------------------

#' Tibble of CRAN Packages
#'
#' @param normalize_text if `TRUE` normalizes title descriptoon to upper
#'
#' @return
#' @export
#'
#' @examples
#' library(asbtools)
#' library(tidyverse)
#'
#' cran <- tbl_cran_packages()
#'
#' cran %>% sheldon::regex_keyword_match(text_columns = "title", keywords = "markdown", id_columns = "package")
#' cran %>% sheldon::kwic_keyword_match(text_columns = "title", id_columns = "package")
#'
tbl_cran_packages <- function(normalize_text  = T) {
  page <- "https://cran.r-project.org/web/packages/available_packages_by_date.html" %>%
    rvest::read_html()

  data <-
    page %>% rvest::html_table(header = T) %>% .[[1]] %>% janitor::clean_names() %>%
    mutate(date = lubridate::ymd(date))

  if (normalize_text) {
    data <- data %>%
      mutate(title = str_to_upper(title))
  }

  data <-
    data %>%
    mutate(
      url_cran = glue::glue(
        "https://cran.r-project.org/web/packages/{package}/index.html"
      ) %>% as.character(),
      year_released = date %>% lubridate::year() %>% as.numeric(),
      month_released = date %>% lubridate::month(label = T)
    ) %>%
    select(year_released, month_released, everything())

  data
}



# edit --------------------------------------------------------------------



#' Edit Data
#'
#' @param data data frame
#' @param file_path if not `NULL` a filepath to save the file
#' @param folder folder to save the file
#' @param file_name file name
#'
#' @return
#' @export
#'
#' @examples
#' tbl_edit(data = iris, file_path = "Desktop/test",  file_name = "iris")
#' tbl_edit(data = iris, file_path = "Desktop",  file_name = "iris")



tbl_edit <-
  function(data,
           override_common_group = F,
           file_path = NULL, folder = NULL, file_name = NULL) {

    if (override_common_group & data %>% hasName("group")) {
      data %>%
        mutate(group = case_when(
          group == "common" ~ "",
          TRUE ~ group
        ))
    }

    data <- edit(data)
    data <- as_tibble(data)

    if (length(file_path) > 0) {
      "Saving data" %>% message()
      oldwd <- getwd()
      setwd("~")
      if (length(file_name) == 0) {
        file_name <- "data"
      }

      pq_write(data = data, file_path = file_path, folder = folder, file_name = file_name)

      if (getwd() != oldwd) {
        setwd(oldwd)
      }
    }


    data

  }


#' Data Editor
#'
#' Use data editor to edit data
#'
#' @param data
#' @param override_common_group
#' @param file_path
#' @param folder
#' @param file_name
#'
#' @return
#' @export
#'
#' @examples
#' tbl_data_edit(data = iris, file_path = "Desktop/test",  file_name = "iris")
#' tbl_data_edit(data = mtcars, file_path = "Desktop/test",  file_name = "mtcars")
tbl_data_edit <-
  function(data,
           override_common_group = F,
           file_path = NULL, folder = NULL, file_name = NULL) {

    if (override_common_group & data %>% hasName("group")) {
      data %>%
        mutate(group = case_when(
          group == "common" ~ "",
          TRUE ~ group
        ))
    }


    data <- DataEditR::data_edit(data)
    data <- as_tibble(data)

    if (length(file_path) > 0) {
      "Saving data" %>% message()
      oldwd <- getwd()
      setwd("~")
      if (length(file_name) == 0) {
        file_name <- "data"
      }

      pq_write(data = data, file_path = file_path, folder = folder, file_name = file_name)

      if (getwd() != oldwd) {
        setwd(oldwd)
      }
    }


    data

  }


#' Update Python Packages
#'
#' @param is_python_3
#'
#' @return
#' @export
#'
#' @examples
update_python_packages <-
  function(is_python_3 = TRUE) {
    if (is_python_3) {
      system("pip3 freeze --local | grep -v '^\\-e' | cut -d = -f 1  | xargs -n1 pip3 install -U")
    } else {
      system("pip2 freeze --local | grep -v '^\\-e' | cut -d = -f 1  | xargs -n1 pip2 install -U")
    }
  }


#' Reset Graphics
#'
#' @return
#' @export
#'
#' @examples
reset_graphics <-
  function(){
    message("Reseting graphics")
    dev.off()
    par(mar=c(1,1,1,1))
    return(invisible())
  }

#' Brew everything
#'
#' @return
#' @export
#'
#' @examples
brew_everything <- function() {
  system("brew upgrade")
  system("brew update")
  system("brew cleanup")
  gc(verbose = T, reset = T, full = T)
}


# other -------------------------------------------------------------------

#' Shuffle Data
#'
#' @param data
#' @param seed
#'
#' @return
#' @export
#'
#' @examples
tbl_shuffle <-
  function(data, seed = NULL) {
    if (length(seed) > 0) {
      set.seed(seed)
    }

    rows <- sample(nrow(data))
    data <- data[rows,] |> as_tibble()

    data

  }
abresler/asbtools documentation built on July 28, 2022, 11:04 p.m.