library(tidyverse)
library(reticulate)
library(superheat)
np <- import("numpy")
dir.create(params$out_dir)
fdir <- file.path(params$out_dir, "features")
unlink(fdir, force = TRUE, recursive = TRUE)

untar(params$run, exdir = params$out_dir)
subset <- read_csv(list.files(fdir, "subset*", full = TRUE))
Xy <- read_csv(list.files(params$out_dir, "Xy*", full = TRUE)) %>%
  left_join(subset)
H_ <- np$load(list.files(fdir, "*best.npy", full = TRUE)[1]) %>%
  as.data.frame()
cur_paths <- subset %>%
  mutate(ix = row_number()) %>%
  select(ix, path)

H <- Xy %>%
  filter(path %in% cur_paths$path) %>%
  left_join(cur_paths) %>%
  arrange(ix) %>%
  select(starts_with("X"))

# some RCF cols have 0 variance, so jitter
if (str_detect(params$run, "rcf")) {
  H_ <- H_ + matrix(runif(prod(dim(H_)), -0.1, 0.1), nrow(H_), ncol(H_))
}

png(
  file.path(params$out_dir, str_c(str_remove(basename(params$run), ".tar.gz"), ".png")),
  width = 900, height = 400
)
superheat(
  t(cor(H_, H[, -1])),
  pretty.order.rows = FALSE,
  pretty.order.cols = TRUE,
  bottom.label = "none",
  heat.pal = c("#822233", "#F7F7F7", "#497b95"),
  col.dendrogram = TRUE,
  heat.pal.values = c(0, .5, 1),
  left.label.text.size = 6,
  left.label.size = 0.15,
  grid.hline = FALSE,
  grid.vline = FALSE,
  legend = FALSE,
)
dev.off()


krisrs1128/MSLF documentation built on March 21, 2023, 10:21 a.m.