#!/usr/bin/env Rscript
##-----------------------------------------------------------------------------
## Usage:
##
## Rscript overlap_score_summary_vs_nnn.R <options>
##
## Options:
## --chromosomes=<comma-separated integer names>
## --bin_sizes=<comma-separated basepair integers>
## --rhos=<comma-separated (0,0.5] values>
## --reference_rhos=(same|50%)
## --nsamples=<positive integer>
## --window_size=<basepair integer>
## --weights=(by_length|uniform)
## --fig_format=(pdf|png)
##
## Input:
## * human,HAP1,unique,chr*.rds files in R package 'TopDomStudy', i.e.
## in folder system.file("compiledData", package="TopDomStudy")
##
## Output:
## * figures/
## * overlapScoreData/
## * overlapScoreSummary/
##-----------------------------------------------------------------------------
library(TopDomStudy)
library(future) ## sources ./.future.R, if it exists
cmdArg <- R.utils::cmdArg
## Allow for 3-GiB objects to be exported during parallelizing
options(future.globals.maxSize = 3 * 1024^3)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Parse command-line options
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
chromosomes <- cmdArg(chromosomes = c("12", "16", "22")[3])
nsamples <- cmdArg(nsamples = 100L)
## Simulation parameters
## FIXME: chromosome="22", rho=0.01, bin_size=10000, nsamples=1L gives an error
## Skip rho=0.02 due to chr=12 memory constraints
rhos <- cmdArg(rhos = c(
# 0.001, 0.005, 0.010,
0.02, 0.03, 0.04, 0.05, 0.06, 0.08,
0.10, 0.12, 0.14, 0.16, 0.18, 0.20,
0.25, 0.30, 0.40, 0.50
))
choices <- c("same", "50%")
reference_rhos <- match.arg(cmdArg(reference_rhos = choices[1]), choices)
## TopDom parameters
bin_sizes <- cmdArg(bin_sizes = 1e3 * c(
6, 8, 10, 12, 15, 20,
30, 40, 50, 60, 80, 100
))
window_size <- cmdArg(window_size = 5L)
## Parameters for summarizing overlap scores
choices <- c("by_length", "uniform")
weights <- match.arg(cmdArg(weights = choices[1]), choices)
#domain_length <- "ref_len_iqr" ## WARNING: Requires that domain_length = NULL has been run before
#domain_length <- c(300e3, 1000e3)
domain_length <- cmdArg(domain_length = NULL)
## Miscellaneous
choices <- c("pdf", "png")
fig_format <- match.arg(cmdArg(fig_format = choices[1]), choices)
value <- cmdArg(xlim_score = NULL)
if (!is.null(value)) {
value <- as.numeric(value)
stopifnot(is.numeric(value), length(value) == 2L, !anyNA(value),
all(value >= 0), value[1] < value[2])
}
xlim <- value
value <- cmdArg(ylim_score = c(0,1))
stopifnot(is.numeric(value), length(value) == 2L, !anyNA(value),
all(value >= 0), all(value <= 1), value[1] < value[2])
ylim_score <- value
verbose <- cmdArg(verbose = TRUE)
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
# Process
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
for (vs in c("bin_size", "fraction")) {
FUN <- switch(vs,
bin_size = overlap_score_summary_vs_bin_size,
fraction = overlap_score_summary_vs_fraction
)
done <- FUN(
dataset = "human,HAP1",
chromosomes = chromosomes,
bin_sizes = bin_sizes,
rhos = rhos,
reference_rhos = reference_rhos,
window_size = window_size,
weights = weights,
domain_length = domain_length,
nsamples = nsamples,
fig_format = fig_format,
xlim = xlim,
ylim_score = ylim_score,
verbose = verbose
)
print(done)
}
## NOTES:
## * 2019-01-08:
## Running the above with nsamples = 50L, bin_size = 100000 on chromosomes
## 12, 19, 22 (data available in TopDomStudy) takes ~30 minutes with 6 cores
## on a Thinkpad X1C6.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.