data-raw/rds_checker.R

#' ----
#' title: PureHoney checker
#' description: utility to standardize data flowing into the HST Explorer front end
#' author: nathan.day@hemoshear.com
#' ----

setwd("~/Binfo/PureHoney/Comp_RDSs/HemoShine/")

library(magrittr)
library(tidyverse)
library(assayr2)

### Pattern to change the white_list ------------------------------------

## load into env
data(explorer_wl)

## make changes
# explorer_wl$curve_plot %<>% c(., "2-Methyl-3-OH-butyryl")

## save to package data and overwrite old
# devtools::use_data(explorer_wl, pkg = "~/Binfo/Packages/assayr2/", internal = FALSE, overwrite = TRUE)

## reload to take effect
# devtools::load_all("~/Binfo/Packages/assayr2/")

### Pattern to check against white-list -------------------------------------

# new RDSs
new <- dir("new", full.names = TRUE)

new_rds <- map(new, readRDS) %>% set_names(new)

# check names
walk(new_rds, check_names)

# check values
walk(new_rds, ~check_values(., explorer_wl))

# template for changing stuff
new_rds %<>% map(~mutate(.,
  donor = gsub("220,000/sample", "HUH7", donor),
  tx_challenge_abstract = gsub("_.*", "", tx_challenge_abstract),
  isotope = gsub("(\\d+)(C)", "\\2\\1", isotope)
))

walk(new_rds, ~check_values(., explorer_wl))

# re-build nate_db
new_data <- bind_rows(new_rds) %>%
  mutate(tx_conc = as.character(tx_conc) %>% as.numeric())


#' Upload or download a file from a remote server using scp
#'
#' @param local_path Path on your local machine
#' @param method 'GET' or 'PUT'
#' @param pemfile_key The absolute path to your pemfile should be
#'    a value in .Renviron, `pemfile_key` is the key for that value in .Renviron
#' @param user Remote user
#' @param ip Remote server address
#' @param remote_path Path on remote machine

remote_file <- function(method = "GET",
                        local_path = ".", remote_path = "/data/test/nate_db.RDS",
                        pemfile_key = "HEMOSHINE_KEY", user = "HEMOSHINE_USER", ip = "HEMOSHINE_IP") {
  stopifnot(method %in% c("GET", "PUT"))

  # build paths
  local_path %<>% paste0(getwd(), "/", .)
  remote_path %<>% paste0(Sys.getenv(user), "@", Sys.getenv(ip), ":", .)

  # build system expression
  exprs <- paste0("scp -i ", Sys.getenv(pemfile_key), " ")

  # arrange local/remote depending on operation
  if (method == "GET") {
    exprs2 <- paste(remote_path, local_path)
  } else {
    exprs2 <- paste(local_path, remote_path)
  }
  # run system command
  system(paste0(exprs, exprs2))
}

remote_file("GET", paste0("nate_db/nate_db_", format(Sys.time(), "%F"), ".RDS"))

current <- dir("nate_db/", full.names = TRUE) %>%
  sort() %>%
  .[-c(1, length(.))] %>%
  .[length(.)] %>%
  readRDS()

# filter out runs alread in nate_db
current %<>% filter(!run %in% unique(new_data$run))

# stack back together
current %<>% bind_rows(new_data)

# pair down to needed columns
current %<>% dplyr::select(
  tx_run, isotope, curve_plot, tx_conc, targ, plate_id,
  donor, plate_run, sample_id, conc, conc_corrected,
  conc_incell_uM, contents, run, recov, c_bool, tx_cmpd,
  nuc_well, media, tx_challenge, tx_challenge_abstract
)

# destined for 'AWS-HemoShine-Instance@/data/test'
saveRDS(current, "nate_db/nate_db.RDS")
lookup_table <- current %>%
  dplyr::select(run, tx_challenge_abstract) %>%
  unique()
saveRDS(lookup_table, "nate_db/lookup.RDS")

remote_file("PUT", "nate_db/lookup.RDS", "/data/test/lookup.RDS")
remote_file("PUT", "nate_db/nate_db.RDS")

# # find current nate_db
# current <- readRDS("nate_db/nate_db.RDS")
# today <- Sys.Date()
# saveRDS(current, paste0("nate_db/nate_db_", today, ".RDS"))
#
# # filter out runs alread in nate_db
# current %<>% filter(!run %in% unique(new_data$run))
#
# # stack back together
# current %<>% bind_rows(new_data)
#
# # pair down to needed columns
# current %<>% dplyr::select(tx_run, isotope, curve_plot, tx_conc, targ, plate_id,
#                            donor, plate_run, sample_id, conc, conc_corrected,
#                            conc_incell_uM, contents, run, recov, c_bool, tx_cmpd,
#                            nuc_well, media, tx_challenge, tx_challenge_abstract)
#
# # destined for 'AWS-HemoShine-Instance@/data/test'
# saveRDS(current, "nate_db/nate_db.RDS")
# lookup_table <- current %>% dplyr::select(run, tx_challenge_abstract) %>%
#     unique()
# saveRDS(lookup_table, "nate_db/lookup.RDS")
#
# remote_file("~/Binfo/PureHoney/Comp_RDSs/HemoShine/nate_db/lookup.RDS", method = "PUT", user = "day", remote_path = '/data/test/lookup.RDS')
# remote_file("~/Binfo/PureHoney/Comp_RDSs/HemoShine/nate_db/nate_db.RDS", method = "PUT", user = "day")
hemoshear/assayr2 documentation built on Nov. 8, 2019, 6:13 p.m.