Nothing
## Test functions present in the metagene.R file
### {{{ --- Test setup ---
if(FALSE) {
library( "RUnit" )
library( "metagene2" )
library( "data.table" )
library( "dplyr" )
}
### }}}
bam_files <- get_demo_bam_files()
named_bam_files <- bam_files
names(named_bam_files) <- letters[1:(length(named_bam_files))]
not_indexed_bam_file <- metagene2:::get_not_indexed_bam_file()
regions <- metagene2:::get_demo_regions()
design <- data.frame(Samples = c("align1_rep1.bam", "align1_rep2.bam",
"align2_rep1.bam", "align2_rep2.bam", "ctrl.bam"),
align1 = c(1,1,0,0,2), align2 = c(0,0,1,1,2))
design$Samples <- paste0(system.file("extdata", package = "metagene2"), "/",
design$Samples)
set.seed(1)
demo_mg <- metagene2$new(regions = get_demo_regions(),
bam_files = get_demo_bam_files())
full_mg = demo_mg$clone(deep=TRUE)
full_mg$produce_metagene()
util_test_invalid_param_constructor_value <- function(param_name, param_value, error_value) {
arg_list_new = list(bam_files=get_demo_bam_files(),
regions=get_demo_regions())
arg_list_new[[param_name]] = param_value
obs <- tryCatch(do.call(metagene2$new, arg_list_new),
error = conditionMessage)
checkIdentical(obs, error_value)
}
util_test_invalid_param_value <- function(param_name, param_value, step_function, error_value) {
arg_list_new = list(bam_files=get_demo_bam_files(),
regions=get_demo_regions())
arg_list_new[[param_name]] = param_value
obs <- tryCatch(do.call(metagene2$new, arg_list_new),
error = conditionMessage)
checkIdentical(obs, error_value)
mg <- demo_mg$clone(deep=TRUE)
obs <- tryCatch(do.call(mg[[step_function]], arg_list_new[param_name]),
error = conditionMessage)
checkIdentical(obs, error_value)
mg <- demo_mg$clone(deep=TRUE)
obs <- tryCatch(do.call(mg$produce_metagene, arg_list_new[param_name]),
error = conditionMessage)
checkIdentical(obs, error_value)
}
#region <- regions[1]
#bam_file <- bam_files[1]
#demo_mg_min <- metagene2$new(regions = region, bam_files = bam_file)
# Load fake SAMs for value tests.
fake_bam_files = file.path(system.file("extdata", package = "metagene2"),
c("fake_align1.bam", "fake_align2.bam", "fake_align3.bam"))
# Define a design that will test various combinations of bam files.
fake_bam_design = data.frame(BAM=fake_bam_files,
fake_align1=c(1, 0, 0),
fake_align2=c(0, 1, 0),
fake_align3=c(0, 0, 1),
fake_align12=c(1, 1, 0),
fake_align13=c(1, 0, 1),
fake_align23=c(0, 1, 1),
fake_align123=c(1, 1, 1),
stringsAsFactors=FALSE)
# Define the region over which single-region tests will be run.
# fake_align1 has 1 read over the whole region.
# fake_align2 has 2 reads over the whole region.
# fake_align3 has 4 reads over the first half of the region, 8 over the second half.
fake_bam_region_1_chr="chr1"
fake_bam_region_1_pos_start=1000000
fake_bam_region_1_pos_end = 1004999
fake_bam_region_1_pos_range=fake_bam_region_1_pos_start:fake_bam_region_1_pos_end
fake_bam_region_1_test_region_unique = GRanges(paste0(fake_bam_region_1_chr, ":", fake_bam_region_1_pos_start, "-", fake_bam_region_1_pos_end))
# Load the expected genomic coverages generated along with the bam files.
fake_bam_expected_coverages = list()
fake_bam_expected_rpm = list()
for(i in fake_bam_design$BAM) {
cov_file = gsub(".bam", ".sam", i)
load(paste0(cov_file, ".coverage.RData"))
fake_bam_expected_coverages[[i]] = cov_rle
load(paste0(cov_file, ".coverage_rpm.RData"))
fake_bam_expected_rpm[[i]] = rpm_rle
}
###################################################
## Test the metagene2$new() function (initialize)
###################################################
## Invalid verbose value
test.metagene_invalid_verbose <- function() {
util_test_invalid_param_constructor_value("verbose", "ZOMBIES", "verbose must be a logicial value (TRUE or FALSE)")
}
## Invalid force_seqlevels value
test.metagene_invalid_force_seqlevels <- function() {
util_test_invalid_param_constructor_value("force_seqlevels", "ZOMBIES", "force_seqlevels must be a logicial value (TRUE or FALSE)")
}
## Negative padding_size value
test.metagene_invalid_padding <- function() {
util_test_invalid_param_constructor_value("padding_size", "ZOMBIES", "padding_size must be a non-negative integer")
util_test_invalid_param_constructor_value("padding_size", -1, "padding_size must be a non-negative integer")
util_test_invalid_param_constructor_value("padding_size", 1.2, "padding_size must be a non-negative integer")
}
## Negative core value
test.metagene_invalid_core <- function() {
util_test_invalid_param_constructor_value("cores", "ZOMBIES", "cores must be a positive integer or a BiocParallelParam instance.")
util_test_invalid_param_constructor_value("cores", -1, "cores must be a positive integer or a BiocParallelParam instance.")
util_test_invalid_param_constructor_value("cores", 1.2, "cores must be a positive integer or a BiocParallelParam instance.")
util_test_invalid_param_constructor_value("cores", 0, "cores must be a positive integer or a BiocParallelParam instance.")
}
## Non-character vector bam_files value
util_test_invalid_bam_file <- function(value, error_value) {
obs <- tryCatch(metagene2:::metagene2$new(regions = get_demo_regions(),
bam_files = value),
error = conditionMessage)
checkIdentical(obs, error_value)
}
test.metagene_invalid_bam_files <- function() {
util_test_invalid_bam_file(c(2,4,3), "bam_files must be a vector of BAM filenames.")
util_test_invalid_bam_file(list(a = "a.txt", b = "b.txt"), "bam_files must be a vector of BAM filenames.")
util_test_invalid_bam_file(not_indexed_bam_file, "All BAM files must be indexed")
util_test_invalid_bam_file(c(bam_files, not_indexed_bam_file), "All BAM files must be indexed")
}
util_test_invalid_region <- function(invalid_region, error, ...) {
obs <- tryCatch(do.call(metagene2$new, c(list(...),
list(bam_files=get_demo_bam_files(),
regions = invalid_region))),
error = conditionMessage)
checkIdentical(obs, error)
}
util_test_valid_region <- function(valid_region, ...) {
obs <- do.call(metagene2$new, c(list(...),
list(bam_files=get_demo_bam_files(),
regions = valid_region)))
util_test_valid_metagene(obs)
}
util_test_valid_metagene <- function(mg) {
checkIdentical(class(mg), c("metagene2", "R6"))
}
# regions is the wrong class
test.metagene_initialize_invalid_region_value <- function() {
region <- array(data = NA, dim = c(2,2,2))
util_test_invalid_region(region, paste0("regions must be either a vector of BED filenames, a ",
"GRanges object or a GrangesList object"))
}
test.metagene_regions_seqlevels <- function() {
# GRanges have seqlevels (indicating all possible seqnames)
# and seqnames themselves. Extra seqlevels are no concerns.
region_with_extra_seq_level = get_demo_regions()[[1]]
GenomeInfoDb::seqlevels(region_with_extra_seq_level) <- c(GenomeInfoDb::seqlevels(region_with_extra_seq_level),
"extra_seqlevels")
util_test_invalid_region(region_with_extra_seq_level, metagene2:::SEQ_LEVEL_ERROR)
util_test_valid_region(region_with_extra_seq_level, force_seqlevels = TRUE)
# Extra seqnames means we must drop certain regions. We only do so
# if force_seqlevels=TRUE, otherwise we throw an error.
region_with_extra_seq = region_with_extra_seq_level
seqnames(region_with_extra_seq)[1] = "extra_seqlevels"
util_test_invalid_region(region_with_extra_seq, metagene2:::SEQ_LEVEL_ERROR)
util_test_valid_region(region_with_extra_seq, force_seqlevels = TRUE)
# Sometimes there can be no seqnames left after removing
# those with unknown levels. This happens often in chromosome names
# are mismatched ("chr1" vs "1")
region_no_common_seq = region_with_extra_seq_level
seqnames(region_no_common_seq) = factor(rep("extra_seqlevels", 50), levels=c("chr1", "extra_seqlevels"))
util_test_invalid_region(region_no_common_seq, "No seqlevels matching between regions and bam file", force_seqlevels=TRUE)
}
# Valid regions narrowPeak
test.metagene_initialize_valid_narrowpeak <- function() {
region <- metagene2:::get_narrowpeak_file()
mg <- metagene2$new(regions = region, bam_files = bam_files[1])
obs <- mg$get_regions()
extraCols <- c(signalValue = "numeric", pValue = "numeric",
qValue = "numeric", peak = "integer")
exp <- rtracklayer::import(region, format = "BED", extraCols = extraCols)
exp$region_name="list1"
checkIdentical(obs, exp)
}
# Valid regions broadPeak
test.metagene_initialize_valid_broadpeak <- function() {
region <- metagene2:::get_broadpeak_file()
mg <- metagene2$new(regions = region, bam_files = bam_files[1])
obs <- mg$get_regions()
extraCols <- c(signalValue = "numeric", pValue = "numeric",
qValue = "numeric")
exp <- rtracklayer::import(region, format = "BED", extraCols = extraCols)
exp$region_name="list1"
checkIdentical(obs, exp)
}
# Valid named bam files
test.metagene_initialize_valid_bam_files <- function() {
mg <- metagene2$new(regions = get_demo_regions(), bam_files = get_demo_bam_files())
# Make sure all bam_files were kept.
obs <- mg$get_params()[["bam_files"]]
exp <- get_demo_bam_files()
checkTrue(all(obs == exp))
# Make sure we have coverage for all bam files.
obs <- names(mg$get_raw_coverages())
exp <- get_demo_bam_files()
checkTrue(all(obs == exp))
# NAmed bam files should have no impact (Historically, they changed the coverage names.)
named_bam_files = get_demo_bam_files()
names(named_bam_files) = letters[seq_along(named_bam_files)]
mg <- metagene2$new(regions = get_demo_regions(), bam_files = named_bam_files)
# Make sure we have coverage for all bam files under the correct names.
obs <- names(mg$get_raw_coverages())
exp <- unname(named_bam_files)
checkIdentical(obs, exp)
}
##################################################
# Test the metagene2$get_params() function
##################################################
## Valid usage
test.metagene_get_params_valid_usage <- function() {
mg <- demo_mg$clone(deep=TRUE)
params <- mg$get_params()
checkIdentical(unname(params[["bam_files"]]), get_demo_bam_files())
checkIdentical(params[["padding_size"]], 0)
checkIdentical(params[["verbose"]], FALSE)
checkIdentical(params[["force_seqlevels"]], FALSE)
}
##################################################
# Test the metagene2$get_design() function
##################################################
## Valid usage
test.metagene_get_design_valid_usage <- function() {
mg <- demo_mg$clone(deep=TRUE)
mg$group_coverages(design=get_demo_design())
design <- mg$get_design()
checkIdentical(mg$get_design()[,-1], get_demo_design()[,-1])
checkTrue(all(mg$get_design()[,1] == mg$get_params()[["bam_files"]]))
}
##################################################
# Test the metagene2$get_regions() function
##################################################
## Valid usage default
test.metagene_get_regions_valid_usage_default <- function() {
mg <- demo_mg$clone(deep=TRUE)
regions <- mg$get_regions()
checkIdentical(length(regions), length(unlist(get_demo_regions())))
region_1_in_regions = length(get_demo_regions()[[1]])
region_1_in_metagene = sum(regions$region_name==names(get_demo_regions())[1])
checkIdentical(region_1_in_regions, region_1_in_metagene)
}
##################################################
# Test the metagene2$get_matrice() function
##################################################
# Tests to write:
# Test single region
# Test bin_size = 1
test.metagene_group_coverages_valid_usage_default = function(){
mg <- demo_mg$clone(deep=TRUE)
grouped_coverages = mg$group_coverages(design=get_demo_design())
checkIdentical(length(grouped_coverages), ncol(get_demo_design()) - 1L)
}
# Make sure bam_files order does not change results.
# See issue #7: https://github.com/ArnaudDroitLab/metagene2/issues/7
test.metagene_group_coverages_bam_files_order = function(){
mg = metagene2$new(regions=get_demo_regions(), bam_files=get_demo_bam_files())
mg2 = metagene2$new(regions=get_demo_regions(), bam_files=rev(get_demo_bam_files()))
checkIdentical(mg$bin_coverages()[["align1_rep1"]][1:10,1:10],
mg2$bin_coverages()[["align1_rep1"]][1:10,1:10])
}
test.metagene_bin_coverages_valid_usage_default_design = function(){
mg <- demo_mg$clone(deep=TRUE)
mg$group_coverages()
binned_coverages = mg$bin_coverages()
design = mg$get_design()
checkIdentical(length(binned_coverages), ncol(design) - 1L)
checkIdentical(names(binned_coverages), colnames(design)[-1])
for(i in 1:length(binned_coverages)) {
checkIdentical(nrow(binned_coverages[[i]]), length(mg$get_regions()))
checkTrue(ncol(binned_coverages[[i]]) == mg$get_params()[["bin_count"]])
}
}
test.metagene_bin_coverages_valid_usage_custom_design = function(){
mg <- demo_mg$clone(deep=TRUE)
mg$group_coverages(design=get_demo_design())
binned_coverages = mg$bin_coverages()
checkIdentical(length(binned_coverages), ncol(get_demo_design()) - 1L)
checkIdentical(names(binned_coverages), colnames(get_demo_design())[-1])
for(i in 1:length(binned_coverages)) {
checkIdentical(nrow(binned_coverages[[i]]), length(mg$get_regions()))
checkTrue(ncol(binned_coverages[[i]]) == mg$get_params()[["bin_count"]])
}
}
# Test for https://github.com/ArnaudDroitLab/metagene2/issues/24
test.metagene_bin_coverages_overlapping_regions = function(){
# Define two overlapping regions with known coverages in fake_align1
# Region 1 has 1 coverage everywhere.
# Region 2 has 1 coverage for its first 2500nt, 0 coverage for the last 2500nt.
overlapping_regions = GRanges(c("chr1:1000000-1004999", "chr1:1002500-1007499"))
mg_default <- metagene2$new(bam_files=fake_bam_design$BAM, regions=overlapping_regions)
checkTrue(all(mg_default$bin_coverages(bin_count=5000)[["fake_align1"]][1,]==1))
checkTrue(all(mg_default$bin_coverages(bin_count=5000)[["fake_align1"]][2,1:2500]==1))
checkTrue(all(mg_default$bin_coverages(bin_count=5000)[["fake_align1"]][2,2501:5000]==0))
}
# Invalid split_by
test.metagene_invalid_split_by <- function() {
util_test_invalid_param_value("split_by", -4, "split_coverages_by_regions", metagene2:::ERROR_SPLIT_BY)
util_test_invalid_param_value("split_by", c("region_name", NA), "split_coverages_by_regions", metagene2:::ERROR_SPLIT_BY)
# This should fail if region_metadata is brought back inside the parameter handler.
#util_test_invalid_param_value("split_by", "patate", "split_coverages_by_regions", metagene2:::ERROR_SPLIT_BY)
}
test.metagene_split_coverages_valid_usage_default = function(){
# We'll populate a list with split_coverages results which should all be identical,
# then run validations against those list elements.
split_coverages_list = list()
# Case 1: Use default split_by, which will default to region_name.
# So we'll pass a GRangesList upon initialization to get 3 region groups.
split_regions_default = GRangesList(
Just4=GRanges(data.frame(seqnames="chr1", start=c(1000001, 1000101), end=c(1000100, 1000200))),
Just8=GRanges(data.frame(seqnames="chr1", start=c(1002501, 1002601), end=c(1002600, 1002700))),
Mixed=GRanges(data.frame(seqnames="chr1", start=c(1000001, 1002501), end=c(1000100, 1002600))))
mg_default <- metagene2$new(bam_files=fake_bam_design$BAM, regions=split_regions_default)
split_coverages_list[["default"]] = mg_default$split_coverages_by_regions()
# Case 2: Use custom split_by. So we'll pass a GRanges where the regions have a "Custom" attribute,
# which we'll split on.
split_regions_custom = unlist(split_regions_default)
split_regions_custom$Custom = c("Just4", "Just4", "Just8", "Just8", "Mixed", "Mixed")
mg_custom <- metagene2$new(bam_files=fake_bam_design$BAM, regions=split_regions_custom, split_by="Custom")
split_coverages_list[["custom"]] = mg_custom$split_coverages_by_regions()
# Finally, validate both split_coverages_by_regions results are good.
for(split_coverages in split_coverages_list) {
# Make sure that we've got matrix list per bam file/design group.
checkTrue(length(split_coverages) == length(fake_bam_design$BAM))
# Make sure each matrix list has has many matrices as there are region groups.
for(i in 1:length(split_coverages)) {
for(j in 1:length(split_regions_default)) {
checkTrue(nrow(split_coverages[[i]][[j]]) == length(split_regions_default[[j]]))
}
}
# Make sure numeric values within the matrices are as expected.
checkTrue(all(split_coverages[["fake_align_3"]]$Just4 == 4))
checkTrue(all(split_coverages[["fake_align_3"]]$Just8 == 8))
checkTrue(all(split_coverages[["fake_align_3"]]$Mixed[,1] == 4))
checkTrue(all(split_coverages[["fake_align_3"]]$Mixed[,2] == 8))
}
}
test.metagene_raw_coverage_values_unique_region = function(){
# Create the metagene object.
mg <- metagene2$new(bam_files=fake_bam_design$BAM, regions=fake_bam_region_1_test_region_unique)
# Test genome-wide raw and normalized coverages for
# strand_mode=FALSE
raw_coverages = mg$get_raw_coverages()
norm_coverages = mg$get_normalized_coverages()
for(i in fake_bam_design$BAM) {
obs = raw_coverages[[i]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_coverages[[i]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkIdentical(obs, exp)
obs = norm_coverages[[i]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_rpm[[i]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkTrue(all(abs(obs - exp) < 10e-8))
}
}
test.metagene_group_coverage_values_unique_region = function(){
# Create the metagene object.
mg <- metagene2$new(bam_files=fake_bam_design$BAM, regions=fake_bam_region_1_test_region_unique)
# Test group coverages.
group_coverages = mg$group_coverages(design=fake_bam_design, simplify=FALSE)
# Grouped coverages for unit designs should be the same as the plain ones.
expected_names = file.path(system.file("extdata", package = "metagene2"),
paste0(c("fake_align1", "fake_align2", "fake_align3"), ".bam"))
names(expected_names) = c("fake_align1", "fake_align2", "fake_align3")
for(i in c("fake_align1", "fake_align2", "fake_align3")) {
obs = group_coverages[["*"]][[i]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_coverages[[expected_names[i]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkIdentical(obs, exp)
}
# Test combined group coverages.
obs = group_coverages[["*"]][["fake_align12"]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_coverages[[expected_names["fake_align1"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range] +
fake_bam_expected_coverages[[expected_names["fake_align2"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkIdentical(obs, exp)
obs = group_coverages[["*"]][["fake_align23"]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_coverages[[expected_names["fake_align2"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range] +
fake_bam_expected_coverages[[expected_names["fake_align3"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkIdentical(obs, exp)
obs = group_coverages[["*"]][["fake_align13"]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_coverages[[expected_names["fake_align1"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range] +
fake_bam_expected_coverages[[expected_names["fake_align3"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkIdentical(obs, exp)
obs = group_coverages[["*"]][["fake_align123"]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
exp = fake_bam_expected_coverages[[expected_names["fake_align1"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range] +
fake_bam_expected_coverages[[expected_names["fake_align2"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range] +
fake_bam_expected_coverages[[expected_names["fake_align3"]]][[fake_bam_region_1_chr]][fake_bam_region_1_pos_range]
checkIdentical(obs, exp)
}
test.metagene_bin_coverage_values_unique_region = function(){
# Create the metagene object.
mg <- metagene2$new(bam_files=fake_bam_design$BAM, regions=fake_bam_region_1_test_region_unique, design=fake_bam_design)
# Test binned coverages for bins
# where all values are the same.
bin_coverages = mg$bin_coverages()
checkTrue(all(bin_coverages[["fake_align1"]][1,]==1))
checkTrue(all(bin_coverages[["fake_align2"]][1,]==2))
checkTrue(all(bin_coverages[["fake_align3"]][1,1:50]==4))
checkTrue(all(bin_coverages[["fake_align3"]][1,51:100]==8))
checkTrue(all(bin_coverages[["fake_align123"]][1,1:50]==7))
checkTrue(all(bin_coverages[["fake_align123"]][1,51:100]==11))
# Test binned coverages where some values are means.
bin_coverages = mg$bin_coverages(bin_count=5)
checkTrue(all(bin_coverages[["fake_align1"]][1,]==1))
checkTrue(all(bin_coverages[["fake_align2"]][1,]==2))
checkTrue(bin_coverages[["fake_align3"]][1,1]==4)
checkTrue(bin_coverages[["fake_align3"]][1,2]==4)
checkTrue(bin_coverages[["fake_align3"]][1,3]==6)
checkTrue(bin_coverages[["fake_align3"]][1,4]==8)
checkTrue(bin_coverages[["fake_align3"]][1,5]==8)
}
test.metagene_calculate_ci_values_unique_region = function(){
# Create the metagene object.
mg <- metagene2$new(bam_files=fake_bam_design$BAM, regions=fake_bam_region_1_test_region_unique, design=fake_bam_design, bin_count=5)
full_df = mg$add_metadata()
# Only one region, all confidence intervals should be NA.
checkTrue(all(is.na(full_df$qinf) & is.na(full_df$qsup)))
# There should be five bins.
checkTrue(max(full_df$bin)==5 && length(unique(full_df$bin))==5)
# Bin values should match
checkTrue(full_df[full_df$bin==3 & full_df$design=="fake_align3",]$value == 6)
# All groups should be represented.
checkTrue(all(full_df$design %in% colnames(fake_bam_design)[-1]))
test_plot = mg$produce_metagene()
checkTrue("ggproto" %in% class(test_plot$scales))
}
test.metagene_calculate_ci_sample_count_zero = function(){
# Create the metagene object.
mg <- get_demo_metagene()
full_df = mg$calculate_ci(sample_count=0)
# Sample count is zero, all confidence intervals should be NA.
checkTrue(all(is.na(full_df$qinf) & is.na(full_df$qsup)))
# There should be n_design * n_regions * bin_count rows to the data.frame
n_regions = length(mg$split_coverages_by_regions()[[1]])
n_design = nrow(mg$get_params()$design)
exp_nrow = n_regions * n_design * mg$get_params()$bin_count
checkTrue(nrow(mg$calculate_ci()) == exp_nrow)
}
test.metagene_calculate_ci_resample_profile = function(){
# Create the metagene object.
mg <- get_demo_metagene()
mg$produce_metagene(resampling_strategy="profile")
full_df = mg$add_metadata()
# We should have ci values, not NAs.
checkTrue(all(!is.na(full_df$qinf) & !is.na(full_df$qsup)))
# There should be bin_count bins.
checkTrue(max(full_df$bin)==mg$get_params()$bin_count &&
length(unique(full_df$bin))==mg$get_params()$bin_count)
# There should be n_design * n_regions * bin_count rows to the data.frame
n_regions = length(mg$split_coverages_by_regions()[[1]])
n_design = nrow(mg$get_params()$design)
exp_nrow = n_regions * n_design * mg$get_params()$bin_count
checkTrue(nrow(mg$calculate_ci()) == exp_nrow)
}
##################################################
# Test the metagene2$get_data_frame() function
##################################################
## Valid usage default
test.metagene_get_data_frame_valid_usage_default <- function() {
df <- full_mg$get_data_frame()
regions <- get_demo_regions()
bam_files <- get_demo_bam_files()
checkTrue(is.data.frame(df))
checkTrue(ncol(df) == 8)
checkTrue(nrow(df) == length(regions) * length(bam_files) * full_mg$get_params()$bin_count)
}
### Valid usage subset
test.metagene_get_data_frame_valid_usage_subset <- function() {
single_region = names(get_demo_regions())[1]
three_designs = colnames(full_mg$get_design())[2:4]
df <- full_mg$get_data_frame(region_names = single_region, design_names =three_designs )
checkTrue(is.data.frame(df))
checkTrue(ncol(df) == 8)
checkTrue(nrow(df) == length(single_region) * length(three_designs) * full_mg$get_params()$bin_count)
}
## Valid usage get_data_frame return by copy of data_frame
test.metagene_get_data_frame_check_copy_of_data_frame <- function() {
mg <- full_mg$clone(deep=TRUE)
df1 <- mg$get_data_frame()
#modification of table by reference
df1$c <- 1:1000
#Is table copied and unchanged ?
df2 <- mg$get_data_frame()
checkIdentical(ncol(df1) == ncol(df2), FALSE)
}
## Valid usage no data_frame produced
test.metagene_get_data_frame_valid_usage_no_data_frame <- function() {
mg <- demo_mg$clone(deep=TRUE)
df <- mg$get_data_frame()
checkTrue(is.null(df))
df_subset <- mg$get_data_frame(get_demo_regions()[1],
get_demo_bam_files()[1:2])
checkTrue(is.null(df_subset))
}
##################################################
# Test the metagene2$get_plot() function
##################################################
## Valid case no graph
test.metagene_get_plot_valid_case_no_graph <- function() {
mg <- demo_mg$clone(deep=TRUE)
plot <- mg$get_plot()
checkTrue(is.null(plot))
}
## Valid case graph
test.metagene_get_plot_valid_case_graph <- function() {
plot <- full_mg$get_plot()
checkTrue(is(plot, c("gg", "ggplot")))
}
##################################################
# Test the metagene2$get_raw_coverages() function
##################################################
exp_raw <- GenomicAlignments::readGAlignments(bam_files[1])
exp_raw <- GenomicAlignments::coverage(exp_raw)
## Default filenames
test.metagene_get_raw_coverages <- function() {
obs <- full_mg$get_raw_coverages()[[1]]
checkTrue(all(vapply(1:length(obs),
function(i) all(obs[[i]]==exp_raw[[i]]),
logical(1))))
}
##################################################
# Test the metagene2$get_normalized_coverages() function
##################################################
count <- Rsamtools::countBam(bam_files[1])$records
weight <- 1 / (count / 1000000)
exp_norm <- exp_raw * weight
## Default filenames
test.metagene_get_normalized_coverages <- function() {
mg <- demo_mg$clone(deep=TRUE)
obs <- mg$get_normalized_coverages()[[1]]
checkTrue(all(vapply(1:length(obs),
function(i) all(obs[[i]]==exp_norm[[i]]),
logical(1))))
}
# Invalid bin_count class
test.metagene_invalid_bin_count <- function() {
util_test_invalid_param_value("bin_count", "a", "bin_coverages", "bin_count must be a positive integer")
util_test_invalid_param_value("bin_count", -1, "bin_coverages", "bin_count must be a positive integer")
util_test_invalid_param_value("bin_count", 1.2, "bin_coverages", "bin_count must be a positive integer")
}
# Invalid normalization class
test.metagene_invalid_normalization <- function() {
util_test_invalid_param_value("normalization", 1234, "group_coverages", 'normalization must be NULL, "RPM" or "log2_ratio".')
util_test_invalid_param_value("normalization", "CSI", "group_coverages", 'normalization must be NULL, "RPM" or "log2_ratio".')
# Invalid log2_ratio normalization without a valid design
util_test_invalid_param_value("normalization", "log2_ratio", "group_coverages",
"log2_ratio normalization requires all designs to have at least one control. Please update the design parameter.")
}
### Invalid alpha value
test.metagene_invalid_alpha <- function(){
util_test_invalid_param_value("alpha", 'test', "calculate_ci", "is.numeric(alpha) is not TRUE")
util_test_invalid_param_value("alpha", -0.8, "calculate_ci", "alpha >= 0 & alpha <= 1 is not TRUE")
}
### Invalid sample_count class
test.metagene_invalid_sample_count <- function(){
util_test_invalid_param_value("sample_count", 'test', "calculate_ci", "is.numeric(sample_count) is not TRUE")
util_test_invalid_param_value("sample_count", -10, "calculate_ci", "sample_count >= 0 is not TRUE")
}
test.metagene_replace_metadata <- function() {
regions_gr <- unlist(get_demo_regions())
demo_metadata = data.frame(BedName=names(regions_gr),
EvenStart=ifelse((start(regions_gr) %% 2) == 0, "Even", "Odd"),
Strand=strand(regions_gr))
mg <- metagene2$new(regions = get_demo_regions(),
region_metadata=demo_metadata,
bam_files = get_demo_bam_files(),
assay='chipseq')
mg$produce_metagene(split_by=c("EvenStart", "Strand"))
test_meta = mg$get_regions_metadata()
# Add column and replace. This should work fine.
test_meta$Foo = c("Foo", "Bar")
mg$replace_region_metadata(test_meta)
# Remove region_name column. It should be added back.
old_region_name = mg$get_regions_metadata()$region_name
test_meta = test_meta[setdiff(colnames(test_meta), "region_name")]
mg$replace_region_metadata(test_meta)
checkTrue(all(mg$get_regions_metadata()$region_name==old_region_name))
# Remove one of the split_by columns. Caches should be invalidated.
test_meta = test_meta[setdiff(colnames(test_meta), "EvenStart")]
mg$replace_region_metadata(test_meta)
checkIdentical(mg$get_params()[["split_by"]], "region_name")
# Try using a data-frame without enough rows. We should get an error message.
obs <- tryCatch(mg$replace_region_metadata(test_meta[1:4,]),
error = conditionMessage)
exp <- "region_metadata must have one row per region."
checkIdentical(obs, exp)
}
test.metagene_strand_specific <- function() {
plus_file = file.path(system.file("extdata", package = "metagene2"), "fake_align1.bam")
minus_file = file.path(system.file("extdata", package = "metagene2"), "fake_align1_minus.bam")
test_region_list=GRangesList(Plus=GRanges("chr1:1000001-1000100:+"),
Minus=GRanges("chr1:1001001-1001100:-"))
mg = metagene2$new(bam_files=c(Plus=plus_file, Minus=minus_file),
regions=test_region_list,
strand_specific=TRUE)
# Make sure raw coverages are strand-specific.
checkTrue(all(mg$get_raw_coverages()[["+"]][[plus_file]][test_region_list[["Plus"]]] == 1))
checkTrue(all(mg$get_raw_coverages()[["-"]][[plus_file]][test_region_list[["Plus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["+"]][[minus_file]][test_region_list[["Plus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["-"]][[minus_file]][test_region_list[["Plus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["+"]][[plus_file]][test_region_list[["Minus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["-"]][[plus_file]][test_region_list[["Minus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["+"]][[minus_file]][test_region_list[["Minus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["-"]][[minus_file]][test_region_list[["Minus"]]] == 1))
# Make sure split coverages are strand-specific
split_matrices = mg$split_coverages_by_regions()
checkTrue(all(split_matrices[["Plus"]][["Plus"]] == 1))
checkTrue(all(split_matrices[["Plus"]][["Minus"]] == 0))
checkTrue(all(split_matrices[["Minus"]][["Plus"]] == 0))
checkTrue(all(split_matrices[["Minus"]][["Minus"]] == 1))
# Now test with inverted strand.
mg = metagene2$new(bam_files=c(Plus=plus_file, Minus=minus_file),
regions=test_region_list,
strand_specific=TRUE, invert_strand=TRUE)
# Make sure raw coverages are strand-specific.
checkTrue(all(mg$get_raw_coverages()[["+"]][[plus_file]][test_region_list[["Plus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["-"]][[plus_file]][test_region_list[["Plus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["+"]][[minus_file]][test_region_list[["Plus"]]] == 1))
checkTrue(all(mg$get_raw_coverages()[["-"]][[minus_file]][test_region_list[["Plus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["+"]][[plus_file]][test_region_list[["Minus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["-"]][[plus_file]][test_region_list[["Minus"]]] == 1))
checkTrue(all(mg$get_raw_coverages()[["+"]][[minus_file]][test_region_list[["Minus"]]] == 0))
checkTrue(all(mg$get_raw_coverages()[["-"]][[minus_file]][test_region_list[["Minus"]]] == 0))
# Make sure split coverages are strand-specific
split_matrices = mg$split_coverages_by_regions()
checkTrue(all(split_matrices[["Plus"]][["Plus"]] == 0))
checkTrue(all(split_matrices[["Plus"]][["Minus"]] == 1))
checkTrue(all(split_matrices[["Minus"]][["Plus"]] == 1))
checkTrue(all(split_matrices[["Minus"]][["Minus"]] == 0))
}
test.metagene_extend_reads_basic <- function() {
mg <- metagene2$new(bam_files=fake_bam_design$BAM,
regions=fake_bam_region_1_test_region_unique,
design=fake_bam_design,
extend_reads=100)
# No reads overlap in fake_align1. The reads are all 50nt,
# on the + strand, and the coverage is 1 everywhere.
# So we should have a coverage of 1 for the first 50 nucleotides,
# of 2 for the rest. Coverage should extend on the + strand
checkTrue(all(mg$get_raw_coverages()[[fake_bam_design$BAM[1]]][["chr1"]][1000000:1000049] == 1))
checkTrue(all(mg$get_raw_coverages()[[fake_bam_design$BAM[1]]][["chr1"]][1000050:1002500] == 2))
}
test.metagene_extend_reads_over_edge <- function() {
# Some reads might not overlap the specified regions, yet extend into them.
# Those must be calculated into the coverage.
edge_region = flank(fake_bam_region_1_test_region_unique, width=50, start=FALSE)
mg <- metagene2$new(bam_files=fake_bam_design$BAM,
regions=edge_region,
design=fake_bam_design,
extend_reads=100, bin_count=25)
checkTrue(all(mg$get_raw_coverages()[[fake_bam_design$BAM[1]]][["chr1"]][start(edge_region):end(edge_region)] == 1))
}
test.metagene_extend_reads_directionality <- function() {
# Reads should be extended only in their specified direction.
# So if we go upstream from the first read in the bam, coverage should be 0.
edge_region = flank(fake_bam_region_1_test_region_unique, width=50, start=TRUE)
mg <- metagene2$new(bam_files=fake_bam_design$BAM,
regions=edge_region,
design=fake_bam_design,
extend_reads=100)
checkTrue(all(mg$get_raw_coverages()[[fake_bam_design$BAM[1]]][["chr1"]][start(edge_region):end(edge_region)] == 0))
}
test.metagene_extend_reads_no_shrink <- function() {
# Reads should not be shrunk when expand_reads is smaller than read size.
# mg <- metagene2$new(bam_files=fake_bam_design$BAM,
# regions=fake_bam_region_1_test_region_unique,
# design=fake_bam_design,
# extend_reads=35)
# checkTrue(all(mg$get_raw_coverages()[[fake_bam_design$BAM[1]]][["chr1"]][1000000:1002500] == 1))
TRUE
}
test.metagene_extend_reads_beyond_chr_end <- function() {
# Expanding past the end of chromosomes (Or past the start) should
# not raise any errors.
mg <- metagene2$new(bam_files=fake_bam_design$BAM,
regions=fake_bam_region_1_test_region_unique,
design=fake_bam_design,
extend_reads=195471971, bin_count=100)
mg$produce_metagene()
}
test.metagene_discontiguous <- function() {
fake_genes = GRangesList(
Single= GRanges(c("chr1:1000001-1000100:+")),
Multiple= GRanges(c("chr1:1000001-1000100:+",
"chr1:1000201-1000300:+")),
MultipleValues=GRanges(c("chr1:1000001-1000100:+",
"chr1:1002501-1002600:+")))
mg <- metagene2$new(bam_files=fake_bam_design$BAM,
regions=fake_genes,
design=fake_bam_design,
region_mode="stitch")
mg$produce_metagene()
# Make sure each design has binned coverages for each gene.
split_coverages = mg$split_coverages_by_regions()
for(design_coverage in split_coverages) {
checkTrue(length(design_coverage) == length(fake_genes))
}
# Check numerical values of binned coverages.
checkTrue(all(split_coverages[["fake_align1"]][["Single"]] == 1))
checkTrue(all(split_coverages[["fake_align1"]][["Multiple"]] == 1))
checkTrue(all(split_coverages[["fake_align1"]][["MultipleValues"]] == 1))
checkTrue(all(split_coverages[["fake_align3"]][["Single"]] == 4))
checkTrue(all(split_coverages[["fake_align3"]][["Multiple"]] == 4))
checkTrue(all(split_coverages[["fake_align3"]][["MultipleValues"]][1,1:50] == 4))
checkTrue(all(split_coverages[["fake_align3"]][["MultipleValues"]][1,51:100] == 8))
# Try grouping genes using region_metadata.
region_metadata = data.frame(CustomGroup=c("A", "A", "B"))
mg$replace_region_metadata(region_metadata)
mg$produce_metagene(split_by="CustomGroup")
split_coverages = mg$split_coverages_by_regions()
checkTrue(all(split_coverages[["fake_align1"]][["A"]] == 1))
checkTrue(all(split_coverages[["fake_align1"]][["B"]] == 1))
checkTrue(all(split_coverages[["fake_align3"]][["A"]] == 4))
checkTrue(all(split_coverages[["fake_align3"]][["B"]][1,1:50] == 4))
checkTrue(all(split_coverages[["fake_align3"]][["B"]][1,51:100] == 8))
}
test.metagene_bin_count_larger_than_regions <- function() {
mg <- metagene2$new(bam_files=get_demo_bam_files(),
regions=get_demo_regions(),
bin_count=1000000)
# Test for contiguous regions.
obs = tryCatch(mg$bin_coverages(), error = conditionMessage)
smallest_region = min(width(unlist(get_demo_regions())))
exp = sprintf(sprintf(metagene2:::BIN_COUNT_TOO_LARGE_ERROR, 1000000,
smallest_region, smallest_region))
checkTrue(obs==exp)
# Test for discontiguous regions.
region_test = GRangesList(
Test1=GRanges(c("chr1:1000001-1000100:+",
"chr1:1000301-1000400:+")),
Test2=GRanges(c("chr1:1000001-1000200:+",
"chr1:1000301-1000500:+")))
mg <- metagene2$new(bam_files=get_demo_bam_files(),
regions=region_test,
bin_count=200, region_mode="stitch")
# Individual region minimum is 100, but the sum of stitched regions is 200,
# so a 200 bin_count should not fail.
mg$bin_coverages()
# However, 1000000 should fail.
obs = tryCatch(mg$bin_coverages(bin_count=1000000), error = conditionMessage)
smallest_region = min(unlist(lapply(region_test, function(x) { sum(width(x)) })))
exp = sprintf(sprintf(metagene2:::BIN_COUNT_TOO_LARGE_ERROR, 1000000,
smallest_region, smallest_region))
checkTrue(obs==exp)
}
test.metagene_invalid_extend_paired_end <- function() {
obs = tryCatch(metagene2$new(bam_files=get_demo_bam_files(),
regions=get_demo_regions(),
paired_end=TRUE, extend_reads=200),
error=conditionMessage)
exp = "extend_reads and paired_end cannot both be set at the same time."
checkTrue(obs==exp)
}
test.metagene_log2_ratio <- function() {
mg = metagene2$new(bam_files=fake_bam_files,
regions=fake_bam_region_1_test_region_unique)
fake_design_ratio = data.frame(bam=fake_bam_files,
FB1vsFB2=c(1,2,0),
FB1vsFB3=c(1,0,2))
test_coverages = mg$group_coverages(normalization="log2_ratio",
design=fake_design_ratio)
# Fake BAM 1 and Fake BAM 2 have the same coverages, so the ratio
# should be 0
checkTrue(all(test_coverages[["FB1vsFB2"]][fake_bam_region_1_test_region_unique]==0))
# Fake BAM 1 has coverage of 1, with 100 reads, so an RPM of (1 / 100) * 10^6 = 10000.
# Fake BAM 3 has coverage of 4 (first 2500 nt)_and 8 (last 2500 nt), and a total of
# 600 reads, so RPMs of (4 / 600) * 10^6 = 6666.667 and 13333.33.
# The log2_ratio formula is log2(input RPM + 1 / control RPM + 1)
# So the expected values are log2(10000 + 1 / 6666.667 + 1)=0.5848903 and
# log2(10000 + 1 / 13333.33 + 1)=-0.4150011, plus accounting for rounding errors.
cov_run = test_coverages[["FB1vsFB3"]][fake_bam_region_1_test_region_unique]$chr1
cov_run_1 = cov_run[1:2500]
cov_run_2 = cov_run[2501:5000]
delta_val = 0.00001
cov_run_1_val = 0.5848903
checkTrue(all(cov_run_1 > (cov_run_1_val - delta_val) & cov_run_1 < (cov_run_1_val + delta_val)))
cov_run_2_val = -0.4150011
checkTrue(all(cov_run_2 > (cov_run_2_val - delta_val) & cov_run_2 < (cov_run_2_val + delta_val)))
}
# This test checks to see that strand_specific discontiguous mode works
test.metagene_strand_specific_discontiguous <- function() {
mg = metagene2$new(bam_files=get_demo_rna_bam_files(),
regions=get_demo_rna_regions(),
strand_specific=TRUE,
region_mode="stitch")
mg$produce_metagene()
}
# This test checks for an issue that occured if strand_specific is set to TRUE
# but one strand possesses no regions.
# See https://github.com/ArnaudDroitLab/metagene2/issues/9
test.metagene_strand_specific_no_region_on_strand <- function() {
mg = metagene2$new(bam_files=get_demo_bam_files(),
regions=get_demo_regions()[[1]],
strand_specific=TRUE)
mg$produce_metagene()
}
test.region_filter <- function() {
mg = get_demo_metagene()
mg$produce_metagene(region_filter=rlang::quo(region_name=="list1"))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.