Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(slideimp)
set.seed(1234)
## -----------------------------------------------------------------------------
# 20 rows, 1000 columns, all columns have at least some NA
sim_obj <- sim_mat(n = 20, p = 1000, perc_col_na = 1)
obj <- sim_obj$input
obj[1:4, 1:4]
## -----------------------------------------------------------------------------
na_loc <- sample_na_loc(obj, n_cols = 200, n_rows = 5, n_reps = 5)
length(na_loc)
na_loc[[1]][1:6, ]
## -----------------------------------------------------------------------------
# This custom function imputes missing values with random normal values and takes
# `mean` and `sd` as params
rnorm_imp <- function(obj, mean, sd) {
na <- is.na(obj)
obj[na] <- rnorm(sum(na), mean = mean, sd = sd) # <- impute values with rnorm
return(obj) # <- return an imputed object with the same dim as obj
}
pca_tune <- tune_imp(
obj,
.f = "pca_imp",
na_loc = na_loc,
parameters = data.frame(ncp = 10)
)
knn_tune <- tune_imp(
obj,
.f = "knn_imp",
na_loc = na_loc,
parameters = data.frame(k = 10)
)
rnorm_tune <- tune_imp(
obj,
.f = rnorm_imp,
na_loc = na_loc,
parameters = data.frame(mean = 0, sd = 1) # must match with arguments of `rnorm_imp`
)
## -----------------------------------------------------------------------------
mean(compute_metrics(pca_tune, metrics = "rmse")$.estimate)
mean(compute_metrics(knn_tune, metrics = "rmse")$.estimate)
mean(compute_metrics(rnorm_tune, metrics = "rmse")$.estimate)
## -----------------------------------------------------------------------------
sim_obj <- sim_mat(n = 20, p = 50, n_col_groups = 2)
# Matrix to be imputed
obj <- sim_obj$input
obj[1:5, 1:4]
# Metadata, i.e., which features belong to which group
meta <- sim_obj$col_group
meta[1:5, ]
# We put feature 1 in `group3`
meta[1, 2] <- "group3"
meta[1:5, ]
## -----------------------------------------------------------------------------
set.seed(1234)
group_imp_df <- prep_groups(colnames(obj), group = meta, min_group_size = 10)
group_imp_df$parameters <- list(list(k = 3), list(k = 4), list(k = 5))
group_imp_df
## -----------------------------------------------------------------------------
knn_results <- group_imp(obj, group = group_imp_df, cores = 4, k = 10)
print(knn_results, p = 4)
## -----------------------------------------------------------------------------
set.seed(1234)
sample_names <- paste0("S", 1:10)
n_sites <- 1000
# Simulate positions with 50–500 bp between each site
distances_between <- sample(50:500, size = n_sites, replace = TRUE)
locations <- cumsum(distances_between) # <- important, location vector
methyl <- data.frame(
chr = "chr1",
start = locations,
end = locations,
strand = "+"
)
for (i in seq_along(sample_names)) {
methyl[[paste0("numCs", i)]] <- sample.int(100, size = n_sites, replace = TRUE)
methyl[[paste0("numTs", i)]] <- sample.int(100, size = n_sites, replace = TRUE)
methyl[[paste0("coverage", i)]] <- methyl[[paste0("numCs", i)]] + methyl[[paste0("numTs", i)]]
}
methyl[1:5, 1:10]
## -----------------------------------------------------------------------------
numCs_matrix <- as.matrix(methyl[, paste0("numCs", seq_along(sample_names))])
cov_matrix <- as.matrix(methyl[, paste0("coverage", seq_along(sample_names))])
beta_matrix <- numCs_matrix / cov_matrix
colnames(beta_matrix) <- sample_names
rownames(beta_matrix) <- methyl$start
beta_matrix <- t(beta_matrix)
# Set 10% of the data to missing
set.seed(1234)
beta_matrix[sample.int(length(beta_matrix), floor(length(beta_matrix) * 0.1))] <- NA
beta_matrix[1:4, 1:4]
## -----------------------------------------------------------------------------
params <- expand.grid(ncp = c(2, 4), window_size = c(5000, 10000))
params$overlap_size <- 1000
params$min_window_n <- 20 # windows with less than 20 columns are dropped
# Increase n_reps from 2 in actual analyses and use another chromosome (i.e., chr22)
tune_slide_pca <- tune_imp(
obj = beta_matrix,
parameters = params,
.f = "slide_imp",
n_reps = 2,
location = locations
)
metrics <- compute_metrics(tune_slide_pca)
aggregate(.estimate ~ .metric + ncp + window_size, data = metrics, FUN = mean)
## -----------------------------------------------------------------------------
slide_imp(
obj = beta_matrix,
location = locations,
window_size = 5000,
overlap_size = 1000,
ncp = 2,
min_window_n = 20,
dry_run = TRUE # <- dry_run to inspect the windows
)
## -----------------------------------------------------------------------------
slide_imp(
obj = beta_matrix,
location = locations,
window_size = 5000,
overlap_size = 1000,
ncp = 2,
min_window_n = 20,
dry_run = FALSE,
.progress = FALSE
)
## -----------------------------------------------------------------------------
slide_imp(
obj = beta_matrix,
location = locations,
window_size = 5000,
ncp = 2,
min_window_n = 20,
subset = c("1323", "33810"),
flank = TRUE,
dry_run = TRUE
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.