Nothing
# Setup ----
# Create a minimal test dataset with options for small vs full size
create_test_data <- function(small = FALSE) {
data("example_data", package = "SomaDataIO")
if (small) {
# Ultra-small dataset for most tests: 3 samples, subset of analytes
sample_indices <- which(example_data$SampleType == "Sample")[1:2]
qc_indices <- which(example_data$SampleType == "QC")[1:1]
subset_indices <- c(sample_indices, qc_indices)
# Select subset of analytes from different dilution groups for speed
analytes <- getAnalytes(example_data)
analyte_info <- getAnalyteInfo(example_data)
# Get first 50 analytes from each dilution group for faster processing
selected_analytes <- c()
for (dil in unique(analyte_info$Dilution)) {
dil_analytes <- analytes[analyte_info$Dilution == dil]
selected_analytes <- c(selected_analytes, head(dil_analytes, 50))
}
test_adat <- example_data[subset_indices, c(getMeta(example_data), selected_analytes)]
} else {
# Standard test dataset: 16 samples, all analytes (for comprehensive tests)
sample_indices <- which(example_data$SampleType == "Sample")[1:12]
qc_indices <- which(example_data$SampleType == "QC")[1:4]
subset_indices <- c(sample_indices, qc_indices)
test_adat <- example_data[subset_indices, ]
}
# Modify header to simulate pre-ANML state (remove ANML steps)
header_meta <- attr(test_adat, "Header.Meta")
if (!is.null(header_meta) && !is.null(header_meta$HEADER)) {
# Remove ANML and median normalization steps to simulate pre-processed state
header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt, plateScale, Calibration"
attr(test_adat, "Header.Meta") <- header_meta
}
# Return subset with representative samples
test_adat
}
test_data <- create_test_data(small = TRUE)
test_data_full <- create_test_data(small = FALSE)
# Testing ----
test_that("`medianNormalize` Method 1: Internal reference (default)", {
# Test default internal reference method
expect_no_error(
result <- medianNormalize(test_data, verbose = FALSE)
)
# Check result structure
expect_true(is.soma_adat(result))
norm_cols <- grep("^NormScale_", names(result), value = TRUE)
expect_equal(length(norm_cols), 3) # Should have 3 dilution groups
expect_true(all(norm_cols %in% names(result)))
# Test scale factor output
expect_equal(result$NormScale_20[1:3], c(0.971895, 1.029779, 1.199944), tolerance = 0.000001)
expect_equal(result$NormScale_0_005[1:3], c(1.002922, 0.997176, 1.058289), tolerance = 0.000001)
expect_equal(result$NormScale_0_5[1:3], c(0.973205, 1.028323, 1.104745), tolerance = 0.000001)
# Test specific SeqId columns
expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 501.5))
expect_equal(result$seq.10001.7[1:3], c(301.4, 302.2, 207.4))
# Check header metadata
result_header <- attr(result, "Header.Meta")$HEADER
expect_true(grepl("MedNormSMP", result_header$ProcessSteps))
expect_equal(result_header$NormalizationAlgorithm, "MedNorm")
expect_true(grepl("intraplate, crossplate", result_header$MedNormReference))
})
test_that("`medianNormalize` Method 2: Reference from another ADAT", {
# Create a minimal reference ADAT from sample subset
ref_adat <- test_data[1:2, ] # Use first 2 samples as reference
expect_no_error(
result <- medianNormalize(test_data, reference = ref_adat, verbose = FALSE)
)
# Check result structure
expect_true(is.soma_adat(result))
norm_cols <- grep("^NormScale_", names(result), value = TRUE)
expect_equal(length(norm_cols), 3) # Should have 3 dilution groups
# Test scale factor output
expect_equal(result$NormScale_20[1:3], c(0.9719, 1.0298, 1.1999), tolerance = 0.0001)
expect_equal(result$NormScale_0_005[1:3], c(1.0029, 0.9972, 1.0583), tolerance = 0.0001)
expect_equal(result$NormScale_0_5[1:3], c(0.9732, 1.0283, 1.1047), tolerance = 0.0001)
# Test specific SeqId columns
expect_equal(result$seq.10000.28[1:3], c(463.1, 488.5, 501.5), tolerance = 0.1)
expect_equal(result$seq.10008.43[1:3], c(546, 558, 510.1), tolerance = 0.1)
# Check header metadata
result_header <- attr(result, "Header.Meta")$HEADER
expect_true(grepl("external_adat", result_header$MedNormReference))
expect_true(grepl("crossplate", result_header$MedNormReference))
})
test_that("`medianNormalize` External reference data.frame", {
# Create reference data.frame with SeqId-Reference format
analyte_info <- getAnalyteInfo(test_data)
ref_data <- data.frame(
SeqId = analyte_info$SeqId[1:15], # Use first 15 SeqIds
Reference = runif(15, 2000, 4000), # Random reference values
stringsAsFactors = FALSE
)
expect_no_error(
result <- medianNormalize(test_data, reference = ref_data, verbose = FALSE)
)
# Check result structure
expect_true(is.soma_adat(result))
norm_cols <- grep("^NormScale_", names(result), value = TRUE)
expect_equal(length(norm_cols), 3) # Should have 3 dilution groups
# Check header metadata
result_header <- attr(result, "Header.Meta")$HEADER
expect_true(grepl("external_data", result_header$MedNormReference))
expect_true(grepl("crossplate", result_header$MedNormReference))
# Check that medNormSMP_ReferenceRFU field was added with proper formatting
result_analyte_info <- getAnalyteInfo(result)
expect_true("medNormSMP_ReferenceRFU" %in% names(result_analyte_info))
# Check reference values are rounded to 2 decimal places
ref_values <- result_analyte_info$medNormSMP_ReferenceRFU
rounded_values <- round(ref_values, 2)
expect_equal(ref_values[!is.na(ref_values)], rounded_values[!is.na(rounded_values)])
})
test_that("`medianNormalize` validates input requirements", {
# Test with non-soma_adat object
expect_error(
medianNormalize(data.frame(a = 1:3, b = 4:6)),
"`adat` must be a class `soma_adat` object"
)
# Test with missing hybridization normalization
test_data_no_hyb <- test_data
header_meta <- list(HEADER = list(ProcessSteps = "SomeOtherStep"))
attr(test_data_no_hyb, "Header.Meta") <- header_meta
expect_error(
medianNormalize(test_data_no_hyb),
"Hybrid normalization step not detected"
)
# Test with missing normalization scale factors
test_data_no_norm <- test_data |>
dplyr::select(-c(NormScale_0_005, NormScale_0_5, NormScale_20))
header_meta <- list(HEADER = list(ProcessSteps = "Hyb Normalization"))
attr(test_data_no_norm, "Header.Meta") <- header_meta
expect_error(
medianNormalize(test_data_no_norm),
"No normalization scale factor columns found"
)
# Test with invalid adat object (not soma_adat)
expect_error(
medianNormalize(data.frame(wrong = 1:3), verbose = FALSE),
"`adat` must be a class `soma_adat` object"
)
# Test with invalid reference data.frame (missing required columns)
invalid_ref <- data.frame(Wrong = c("10000-28", "10001-7"), Column = c(1000, 2000))
expect_error(
medianNormalize(test_data, reference = invalid_ref, verbose = FALSE),
"Reference data must contain 'SeqId' and 'Reference' columns"
)
# Test with invalid reference type (non-data.frame)
expect_error(
medianNormalize(test_data, reference = "invalid_string", verbose = FALSE),
"Invalid reference type"
)
# Test with numeric input
expect_error(
medianNormalize(test_data, reference = 123, verbose = FALSE),
"Invalid reference type"
)
})
test_that("`medianNormalize` external reference data.frame validation", {
# Test with invalid SeqId format (should error when no matches found)
invalid_seqid_ref <- data.frame(
SeqId = c("invalid_format_1", "invalid_format_2"), # Invalid SeqId format
Reference = c(2500, 1800),
stringsAsFactors = FALSE
)
expect_error(
medianNormalize(test_data, reference = invalid_seqid_ref, verbose = FALSE),
"No matching SeqIds or dilution groups found in reference data"
)
# Test with empty reference data
empty_ref <- data.frame(
SeqId = character(0),
Reference = numeric(0),
stringsAsFactors = FALSE
)
expect_error(
medianNormalize(test_data, reference = empty_ref, verbose = FALSE),
"No matching SeqIds or dilution groups found in reference data"
)
})
test_that("`medianNormalize` handles grouping correctly", {
# Test grouping by Sex
expect_no_error(
result1 <- medianNormalize(test_data, by = "Sex", verbose = FALSE)
)
# Check result structure
expect_true(is.soma_adat(result1))
norm_cols <- grep("^NormScale_", names(result1), value = TRUE)
expect_equal(length(norm_cols), 3) # Should have 3 dilution groups
# Test scale factor output - will vary based on grouping by sex
expect_equal(result1$NormScale_20[1:3], c(0.971895, 1.029779, 1.199944), tolerance = 0.000001)
expect_equal(result1$NormScale_0_005[1:3], c(1.002922, 0.997176, 1.058289), tolerance = 0.000001)
expect_equal(result1$NormScale_0_5[1:3], c(0.973205, 1.028323, 1.104745), tolerance = 0.000001)
# Test specific SeqId columns
expect_equal(result1$seq.10000.28[1:3], c(463.1, 488.5, 501.5))
expect_equal(result1$seq.10008.43[1:3], c(546.0, 558.0, 510.1))
# Test grouping by multiple variables - use full example data
if ("PlateId" %in% names(test_data_full)) {
expect_no_error(
result2 <- medianNormalize(test_data_full, by = c("PlateId", "SampleType"), verbose = FALSE)
)
# Check result structure
expect_true(is.soma_adat(result2))
norm_cols <- grep("^NormScale_", names(result2), value = TRUE)
expect_equal(length(norm_cols), 3) # Should have 3 dilution groups
# Test scale factor output
expect_equal(result2$NormScale_20[1:3], c(1.0678, 1.0571, 1.0917), tolerance = 0.001)
expect_equal(result2$NormScale_0_005[1:3], c(1.0225, 1.0012, 1.0122), tolerance = 0.001)
expect_equal(result2$NormScale_0_5[1:3], c(1.1031, 1.0544, 1.1087), tolerance = 0.001)
# Test specific SeqId columns
expect_equal(result2$seq.10000.28[1:3], c(508.8, 501.5, 453.7))
expect_equal(result2$seq.10008.43[1:3], c(599.9, 572.8, 462.8))
}
# Test error with non-existent grouping column
expect_error(
medianNormalize(test_data, by = "NonExistentColumn", verbose = FALSE),
"Grouping column\\(s\\) not found"
)
})
test_that("`medianNormalize` produces expected verbose output", {
# specific verbose output messages
expect_snapshot(
result <- medianNormalize(test_data, verbose = TRUE)
)
# no output
expect_message(
result <- medianNormalize(test_data, verbose = FALSE),
NA # NA indicates no output expected
)
})
test_that("`medianNormalize` default behavior with by = NULL", {
# Test that the new default (by = NULL) works correctly
expect_no_error(
result_default <- medianNormalize(test_data, verbose = FALSE)
)
# Test that explicitly setting by = NULL gives the same result
expect_no_error(
result_explicit <- medianNormalize(test_data, by = NULL, verbose = FALSE)
)
# Both should produce identical results
expect_equal(result_default, result_explicit)
# Check that all samples are treated as one group (no grouping)
expect_true(is.soma_adat(result_default))
norm_cols <- grep("^NormScale_", names(result_default), value = TRUE)
expect_equal(length(norm_cols), 3) # Should have 3 dilution groups
})
test_that("`medianNormalize` errors on already normalized data", {
# Create ANML-like test data by modifying test_data
anml_test_data <- test_data
# Modify header to simulate ANML-processed data
header_meta <- attr(anml_test_data, "Header.Meta")
header_meta$HEADER$ProcessSteps <- "Raw RFU, Hyb Normalization, medNormInt (SampleId), plateScale, Calibration, anmlQC, qcCheck, anmlSMP"
header_meta$HEADER$NormalizationAlgorithm <- "ANML"
attr(anml_test_data, "Header.Meta") <- header_meta
# Add ANML-like normalization scale factors (non-1.0 values to simulate ANML)
anml_test_data$NormScale_20 <- c(1.15, 0.95, 1.08)
anml_test_data$NormScale_0_5 <- c(1.22, 0.88, 1.12)
anml_test_data$NormScale_0_005 <- c(0.93, 1.07, 0.98)
# Add ANMLFractionUsed columns to mimic ANML data
anml_test_data$ANMLFractionUsed_20 <- c(0.85, 0.92, 0.78)
anml_test_data$ANMLFractionUsed_0_5 <- c(0.88, 0.91, 0.83)
anml_test_data$ANMLFractionUsed_0_005 <- c(0.90, 0.87, 0.85)
# Test that medianNormalize errors on already normalized data
expect_error(
medianNormalize(anml_test_data, verbose = FALSE),
"Study samples appear to already be normalized"
)
# Test that the error message suggests reverseMedianNormalize
expect_error(
medianNormalize(anml_test_data, verbose = FALSE),
"reverseMedianNormalize"
)
})
test_that("`medianNormalize` handles SeqId reference format properly", {
# Test with SeqId-Reference format
analytes <- getAnalytes(test_data)
seqids <- getAnalyteInfo(test_data)$SeqId[1:10] # First 10 SeqIds
ref_data <- data.frame(
SeqId = seqids,
Reference = runif(10, 1000, 5000) # Random reference values
)
expect_no_error(
result <- medianNormalize(test_data, reference = ref_data, verbose = FALSE)
)
# Check that result is valid
expect_true(is.soma_adat(result))
# Check that medNormSMP_ReferenceRFU field was added with 2 decimal places
analyte_info <- getAnalyteInfo(result)
expect_true("medNormSMP_ReferenceRFU" %in% names(analyte_info))
# Check reference values are rounded to 2 decimal places
ref_values <- analyte_info$medNormSMP_ReferenceRFU
rounded_values <- round(ref_values, 2)
expect_equal(ref_values[!is.na(ref_values)], rounded_values[!is.na(rounded_values)])
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.