Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(pcpr)
## ----helper-------------------------------------------------------------------
library(magrittr) # for the pipe %>%
library(ggplot2) # for plotting
plot_matrix <- function(D, ..., lp = "none", title = NULL) {
D <- t(D)
if (is.null(colnames(D))) colnames(D) <- paste0("C", 1:ncol(D))
data.frame(D) %>%
dplyr::mutate(x = paste0("R", 1:nrow(.))) %>%
tidyr::pivot_longer(tidyselect::all_of(colnames(D)), names_to = "y", values_to = "value") %>%
dplyr::mutate(x = factor(x, levels = unique(x)), y = factor(y, levels = unique(y))) %>%
ggplot(aes(x = x, y = y, fill = value)) +
geom_raster() +
scale_y_discrete(limits = rev) +
coord_equal() +
scale_fill_viridis_c(na.value = "white", ...) +
theme_minimal() +
theme(
axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.text.y = element_blank(),
axis.ticks.y = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
legend.position = lp,
plot.margin = margin(0, 0, 0, 0),
aspect.ratio = 1
) +
ggtitle(title)
}
## ----sim data-----------------------------------------------------------------
data <- sim_data()
D <- data$D
L_0 <- data$L
S_0 <- data$S
Z_0 <- data$Z
## ----mat viz------------------------------------------------------------------
plot_matrix(D)
plot_matrix(L_0)
plot_matrix(S_0)
plot_matrix(Z_0)
## ----matrix rank--------------------------------------------------------------
matrix_rank(L_0)
matrix_rank(D)
## ----lod----------------------------------------------------------------------
lod_info <- sim_lod(D, q = 0.1)
D_lod <- lod_info$D_tilde
lod <- lod_info$lod
lod
## ----corrupt mat randomly-----------------------------------------------------
corrupted_data <- sim_na(D_lod, perc = 0.05)
D_tilde <- corrupted_data$D_tilde
lod_root2 <- matrix(
lod / sqrt(2),
nrow = nrow(D_tilde),
ncol = ncol(D_tilde), byrow = TRUE
)
lod_idxs <- which(lod_info$tilde_mask == 1)
D_tilde[lod_idxs] <- lod_root2[lod_idxs]
plot_matrix(D_tilde)
## ----sing---------------------------------------------------------------------
D_imputed <- impute_matrix(D_tilde, apply(D_tilde, 2, mean, na.rm = TRUE))
singular_values <- sing(D_imputed)
plot(singular_values, type = "b")
## ----gs, eval = FALSE, echo = TRUE--------------------------------------------
# eta_0 <- get_pcp_defaults(D_tilde)$eta
# etas <- data.frame("eta" = sort(c(0.1 * eta_0, eta_0 * seq(1, 10, 2))))
# # to get progress bar, could wrap this
# # in a call to progressr::with_progress({ gs <- grid_search_cv(...) })
# gs <- grid_search_cv(
# D_tilde,
# pcp_fn = rrmc,
# grid = etas, r = 5, LOD = lod,
# parallel_strategy = "multisession",
# num_workers = 16,
# verbose = FALSE
# )
## ----real gs, eval = TRUE, echo = FALSE---------------------------------------
gs <- readRDS("rds_files/quickstart-gs.rds")
## ----gs results---------------------------------------------------------------
r_star <- gs$summary_stats$r[1]
eta_star <- round(gs$summary_stats$eta[1], 3)
gs$summary_stats
## ----rrmc---------------------------------------------------------------------
pcp_model <- rrmc(D_tilde, r = r_star, eta = eta_star, LOD = lod)
## ----obj----------------------------------------------------------------------
plot(pcp_model$objective, type = "l")
## ----output L-----------------------------------------------------------------
plot_matrix(pcp_model$L)
matrix_rank(pcp_model$L)
## ----sparse-------------------------------------------------------------------
hist(pcp_model$S)
pcp_model$S <- hard_threshold(pcp_model$S, thresh = 0.4)
plot_matrix(pcp_model$S)
sparsity(pcp_model$S)
## ----pca----------------------------------------------------------------------
L_pca <- proj_rank_r(D_imputed, r = r_star)
## ----performance metrics------------------------------------------------------
data.frame(
"Obs_rel_err" = norm(L_0 - D_imputed, "F") / norm(L_0, "F"),
"PCA_L_rel_err" = norm(L_0 - L_pca, "F") / norm(L_0, "F"),
"PCP_L_rel_err" = norm(L_0 - pcp_model$L, "F") / norm(L_0, "F"),
"PCP_S_rel_err" = norm(S_0 - pcp_model$S, "F") / norm(S_0, "F"),
"PCP_L_rank" = matrix_rank(pcp_model$L),
"PCP_S_sparsity" = sparsity(pcp_model$S)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.