inst/examples/preprocessing/synthetic/encode_coverage.R

# ------------------------------------------
# Set working directory and load libraries
# ------------------------------------------
if (interactive()) { setwd(dirname(parent.frame(2)$ofile)) }
# Source the 'lib' directory
R.utils::sourceDirectory("../../lib", modifiedOnly = FALSE)
suppressPackageStartupMessages(require(BPRMeth))
suppressPackageStartupMessages(require(mvtnorm))
set.seed(1234)

##------------------------
# Initialize variables   #
##------------------------
io <- list(out_dir = "../../local-data/melissa/synthetic/imputation/coverage/raw/",
           coef_file = "../../local-data/melissa/synthetic/encode-coef.rds")
opts <- list()
opts$N <- 200               # Number of cells
opts$M <- 100               # Number of genomic regions
opts$K <- 4                 # Number of clusters
opts$pi_k <- c(.2,.4,.15,.25)  # Mixing proportions
opts$cluster_var  <- 0.5    # % of regions that are variable across clusters
opts$total_sims   <- 10     # Number of simulations
opts$basis_prof   <- create_rbf_object(M = 4) # Profile basis functions
opts$basis_mean   <- create_rbf_object(M = 0) # Rate basis function

##------------------------
# Create synthetic data
##------------------------
# Load ENCODE cluster profiles
encode_w <- readRDS(io$coef_file)

xs <- seq(-1,1, 0.01)
hs <- design_matrix(opts$basis_prof, xs)$H
for (k in 1:5) {
    ys <- pnorm(hs %*% encode_w[,k])
    if (k == 1) { plot(xs, ys, ylim = c(0, 1), type = "l", col = k) }
    else {lines(xs, ys, col = k) }
    print(mean(ys))
}


synth_data = params <- vector("list", length = opts$total_sims)
for (sim in 1:opts$total_sims) {
    synth_data[[sim]] <- generate_encode_synth_data(basis = opts$basis_prof, encode_w = encode_w,
        N = opts$N, M = opts$M, K = opts$K, pi_k = opts$pi_k, cluster_var = opts$cluster_var)
}

##-----------------------------
#   Save synthetic data
##-----------------------------
obj <- list(synth_data = synth_data, io = io, opts = opts)
saveRDS(obj, file = paste0(io$out_dir, "encode_data.rds"))

# Store each simulation independently due to memory issues when doing inference
for (sim in 1:opts$total_sims) {
    obj <- list(synth_data = synth_data[[sim]], io = io, opts = opts)
    saveRDS(obj, file = paste0(io$out_dir, "data-sims/encode_data_", sim, ".rds"))
}
andreaskapou/Melissa documentation built on June 12, 2020, 5:54 p.m.