#------------------------------------------------------------------------------#
# Global vars
#------------------------------------------------------------------------------#
df <- tibble::tibble(
chr = c(
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1", "1", "1"
),
integration_locus = c(
121249, 251227, 645551, 732938,
775536, 846681, 1029785, 1036835,
121249, 251227, 645551, 732938,
775536, 846681, 1029785, 1036835,
121249, 251227, 645551, 732938,
775536, 846681, 1029785, 1036835,
121249, 251227, 645551, 732938,
775536, 846681, 1029785, 1036835,
121249, 251227, 645551, 732938,
775536, 846681, 1029785, 1036835
),
strand = c(
"+", "+", "+", "+", "+", "+", "-", "+", "+",
"+", "+", "+", "+", "+", "-", "+", "+", "+",
"+", "+", "+", "+", "-", "+", "+", "+", "+",
"+", "+", "+", "-", "+", "+", "+", "+", "+",
"+", "+", "-", "+"
),
GeneName = c(
"LOC729737", "LOC100132287", "LOC100133331",
"LOC100288069", "LINC01128", "LOC100130417",
"C1orf159", "C1orf159", "LOC729737",
"LOC100132287", "LOC100133331",
"LOC100288069", "LINC01128", "LOC100130417",
"C1orf159", "C1orf159", "LOC729737",
"LOC100132287", "LOC100133331",
"LOC100288069", "LINC01128", "LOC100130417",
"C1orf159", "C1orf159", "LOC729737",
"LOC100132287", "LOC100133331",
"LOC100288069", "LINC01128", "LOC100130417",
"C1orf159", "C1orf159", "LOC729737",
"LOC100132287", "LOC100133331",
"LOC100288069", "LINC01128", "LOC100130417",
"C1orf159", "C1orf159"
),
GeneStrand = c(
"-", "+", "-", "-", "+", "-", "-", "-",
"-", "+", "-", "-", "+", "-", "-", "-",
"-", "+", "-", "-", "+", "-", "-", "-",
"-", "+", "-", "-", "+", "-", "-", "-",
"-", "+", "-", "-", "+", "-", "-", "-"
),
CellMarker = c(
"CD13", "CD13", "CD13", "CD13", "CD13",
"CD13", "CD13", "CD13", "CD14", "CD14",
"CD14", "CD14", "CD14", "CD14", "CD14",
"CD14", "CD19", "CD19", "CD19", "CD19",
"CD19", "CD19", "CD19", "CD19", "CD3",
"CD3", "CD3", "CD3", "CD3", "CD3", "CD3",
"CD3", "CD34", "CD34", "CD34", "CD34",
"CD34", "CD34", "CD34", "CD34"
),
Tissue = c(
"BM", "BM", "BM", "BM", "BM", "BM", "BM",
"BM", "PB", "PB", "PB", "PB", "PB", "PB",
"PB", "PB", "PB", "PB", "PB", "PB", "PB",
"PB", "PB", "PB", "PB", "PB", "PB", "PB",
"PB", "PB", "PB", "PB", "BM", "BM", "BM",
"BM", "BM", "BM", "BM", "BM"
),
TimePoint = c(
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01"
),
Value = c(
1, 1, 1, 1000, 1, 10, 3, 3, 1, 1000, 1000, 500,
1, 12, 30, 1000, 1, 1, 500, 1, 10, 14, 30, 90,
1, 1, 1, 1, 10, 9, 30, 90, 1000, 1, 1, 300,
10, 8, 1000, 3
)
)
expected_output_sc <- tibble::tibble(
chr = c(
"1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1"
),
integration_locus = c(
121249, 251227,
645551, 645551,
732938, 732938,
732938, 775536,
775536, 775536,
775536, 775536,
846681, 846681,
846681, 846681,
846681, 1029785,
1036835
),
strand = c(
"+", "+", "+", "+", "+", "+",
"+", "+", "+", "+", "+", "+",
"+", "+", "+", "+", "+", "-",
"+"
),
GeneName = c(
"LOC729737", "LOC100132287",
"LOC100133331",
"LOC100133331",
"LOC100288069",
"LOC100288069",
"LOC100288069",
"LINC01128",
"LINC01128",
"LINC01128",
"LINC01128", "LINC01128",
"LOC100130417",
"LOC100130417",
"LOC100130417",
"LOC100130417",
"LOC100130417", "C1orf159",
"C1orf159"
),
GeneStrand = c(
"-", "+", "-", "-", "-",
"-", "-", "+", "+", "+",
"+", "+", "-", "-", "-",
"-", "-", "-", "-"
),
TimePoint = c(
"01", "01", "01", "01",
"01", "01", "01", "01",
"01", "01", "01", "01",
"01", "01", "01", "01",
"01", "01", "01"
),
CellMarker = c(
"CD34", "CD14", "CD14",
"CD19", "CD13", "CD14",
"CD34", "CD13", "CD14",
"CD19", "CD3", "CD34",
"CD13", "CD14", "CD19",
"CD3", "CD34", "CD34",
"CD14"
),
Tissue = c(
"BM", "PB", "PB", "PB", "BM",
"PB", "BM", "BM", "PB", "PB",
"PB", "BM", "BM", "PB", "PB",
"PB", "BM", "BM", "PB"
),
Value = c(
1000, 1000, 1000, 500, 1000,
500, 300, 1, 1, 10, 10, 10, 10,
12, 14, 9, 8, 1000, 1000
)
)
expected_output_ab <- tibble::tibble(
chr = c(
"1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1"
),
integration_locus = c(
121249, 251227, 645551, 645551,
732938, 732938, 732938, 775536,
775536, 846681, 846681, 846681,
1029785, 1029785, 1029785, 1036835,
1036835, 1036835
),
strand = c(
"+", "+", "+", "+", "+", "+", "+", "+", "+",
"+", "+", "+", "-", "-", "-", "+", "+", "+"
),
GeneName = c(
"LOC729737", "LOC100132287", "LOC100133331",
"LOC100133331", "LOC100288069",
"LOC100288069", "LOC100288069", "LINC01128",
"LINC01128", "LOC100130417", "LOC100130417",
"LOC100130417", "C1orf159", "C1orf159",
"C1orf159", "C1orf159", "C1orf159",
"C1orf159"
),
GeneStrand = c(
"-", "+", "-", "-", "-", "-", "-", "+",
"+", "-", "-", "-", "-", "-", "-", "-",
"-", "-"
),
TimePoint = c(
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01"
),
CellMarker = c(
"CD34", "CD14", "CD14", "CD19", "CD13",
"CD14", "CD34", "CD19", "CD3", "CD13",
"CD19", "CD3", "CD19", "CD3", "CD34",
"CD14", "CD19", "CD3"
),
Tissue = c(
"BM", "PB", "PB", "PB", "BM", "PB", "BM",
"PB", "PB", "BM", "PB", "PB", "PB", "PB",
"BM", "PB", "PB", "PB"
),
Value = c(
43.0477830391735, 28.2167042889391,
28.2167042889391, 77.2797527047913,
98.0392156862745, 14.1083521444695,
12.914334911752, 1.54559505409583,
6.99300699300699, 0.980392156862745,
2.16383307573416, 6.29370629370629,
4.63678516228748, 20.979020979021,
43.0477830391735, 28.2167042889391,
13.9103554868624, 62.9370629370629
)
)
#------------------------------------------------------------------------------#
# Tests
#------------------------------------------------------------------------------#
test_that("purity_filter produces expected output - sc", {
purity_filtered <- purity_filter(
x = df,
aggregation_key = c(
"CellMarker",
"Tissue",
"TimePoint"
),
group_key = c("CellMarker", "Tissue"),
min_value = 0, impurity_threshold = 10,
by_timepoint = TRUE, value_column = "Value"
)
expect_equal(purity_filtered, expected_output_sc)
})
test_that("purity_filter produces expected output - abundance", {
abund <- compute_abundance(x = df, columns = "Value", key = c(
"CellMarker",
"Tissue",
"TimePoint"
))
purity_filtered <- purity_filter(
x = abund,
aggregation_key = c(
"CellMarker",
"Tissue",
"TimePoint"
),
group_key = c("CellMarker", "Tissue"),
min_value = 0,
impurity_threshold = 10,
by_timepoint = TRUE,
value_column = "Value_PercAbundance"
)
expect_equal(purity_filtered, expected_output_ab)
})
test_that("purity_filter produces expected output - join", {
purity_filtered <- purity_filter(
x = df,
aggregation_key = c(
"CellMarker",
"Tissue",
"TimePoint"
),
group_key = c("HematoLineage"),
min_value = 3,
impurity_threshold = 10,
by_timepoint = TRUE,
value_column = "Value"
)
expected <- tibble::tibble(
chr = c(
"1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1"
),
integration_locus = c(
645551, 645551, 732938, 732938,
775536, 775536, 846681, 846681,
846681, 1029785, 1036835, 1036835,
121249, 251227
),
strand = c(
"+", "+", "+", "+", "+", "+", "+", "+", "+",
"-", "+", "+", "+", "+"
),
GeneName = c(
"LOC100133331", "LOC100133331",
"LOC100288069", "LOC100288069",
"LINC01128", "LINC01128", "LOC100130417",
"LOC100130417", "LOC100130417", "C1orf159",
"C1orf159", "C1orf159", "LOC729737",
"LOC100132287"
),
GeneStrand = c(
"-", "-", "-", "-", "+", "+", "-", "-",
"-", "-", "-", "-", "-", "+"
),
TimePoint = c(
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01"
),
HematoLineage = c(
"Lymphoid", "Myeloid", "CD34",
"Myeloid", "CD34", "Lymphoid",
"CD34", "Lymphoid", "Myeloid",
"CD34", "Lymphoid", "Myeloid",
"CD34", "Myeloid"
),
Value = c(
501, 1001, 300, 1500, 10, 20, 8, 23, 22,
1000, 180, 1003, 1000, 1001
)
)
expect_equal(purity_filtered, expected)
})
test_that("purity_filter produces expected output - group selection", {
## Vector
purity_filtered <- purity_filter(
x = df,
aggregation_key = c(
"CellMarker",
"Tissue",
"TimePoint"
),
group_key = c("CellMarker", "Tissue"),
selected_groups = c("CD34", "CD13"),
min_value = 0,
impurity_threshold = 10,
by_timepoint = TRUE,
value_column = "Value"
)
expected <- tibble::tibble(
chr = c(
"1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1", "1", "1",
"1", "1", "1", "1", "1", "1"
),
integration_locus = c(
121249, 251227, 251227, 645551,
645551, 732938, 732938, 775536,
775536, 846681, 846681, 1029785,
1036835, 1036835, 121249, 121249,
121249, 251227, 251227, 251227,
645551, 645551, 645551, 732938,
732938, 732938, 775536, 775536,
775536, 846681, 846681, 846681,
1029785, 1029785, 1029785, 1036835,
1036835, 1036835
),
strand = c(
"+", "+", "+", "+", "+", "+", "+", "+", "+",
"+", "+", "-", "+", "+", "+", "+", "+", "+",
"+", "+", "+", "+", "+", "+", "+", "+", "+",
"+", "+", "+", "+", "+", "-", "-", "-", "+",
"+", "+"
),
GeneName = c(
"LOC729737", "LOC100132287", "LOC100132287",
"LOC100133331", "LOC100133331",
"LOC100288069", "LOC100288069",
"LINC01128", "LINC01128", "LOC100130417",
"LOC100130417", "C1orf159", "C1orf159",
"C1orf159", "LOC729737", "LOC729737",
"LOC729737", "LOC100132287", "LOC100132287",
"LOC100132287", "LOC100133331",
"LOC100133331", "LOC100133331",
"LOC100288069", "LOC100288069",
"LOC100288069", "LINC01128", "LINC01128",
"LINC01128", "LOC100130417", "LOC100130417",
"LOC100130417", "C1orf159", "C1orf159",
"C1orf159", "C1orf159", "C1orf159",
"C1orf159"
),
GeneStrand = c(
"-", "+", "+", "-", "-", "-", "-", "+",
"+", "-", "-", "-", "-", "-", "-", "-",
"-", "+", "+", "+", "-", "-", "-", "-",
"-", "-", "+", "+", "+", "-", "-", "-",
"-", "-", "-", "-", "-", "-"
),
TimePoint = c(
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01", "01", "01", "01", "01",
"01", "01", "01"
),
CellMarker = c(
"CD34", "CD13", "CD34", "CD13", "CD34",
"CD13", "CD34", "CD13", "CD34", "CD13",
"CD34", "CD34", "CD13", "CD34", "CD14",
"CD19", "CD3", "CD14", "CD19", "CD3",
"CD14", "CD19", "CD3", "CD14", "CD19",
"CD3", "CD14", "CD19", "CD3", "CD14",
"CD19", "CD3", "CD14", "CD19", "CD3",
"CD14", "CD19", "CD3"
),
Tissue = c(
"BM", "BM", "BM", "BM", "BM", "BM", "BM",
"BM", "BM", "BM", "BM", "BM", "BM", "BM",
"PB", "PB", "PB", "PB", "PB", "PB", "PB",
"PB", "PB", "PB", "PB", "PB", "PB", "PB",
"PB", "PB", "PB", "PB", "PB", "PB", "PB",
"PB", "PB", "PB"
),
Value = c(
1000, 1, 1, 1, 1, 1000, 300, 1, 10, 10, 8,
1000, 3, 3, 1, 1, 1, 1000, 1, 1, 1000, 500,
1, 500, 1, 1, 1, 10, 10, 12, 14, 9, 30, 30,
30, 1000, 90, 90
)
)
expect_equal(purity_filtered, expected)
## DF
purity_filtered <- purity_filter(
x = df,
aggregation_key = c(
"CellMarker",
"Tissue",
"TimePoint"
),
group_key = c("CellMarker", "Tissue"),
selected_groups = tibble::tribble(
~CellMarker, ~Tissue,
"CD34", "BM",
"CD13", "BM"
),
min_value = 0,
impurity_threshold = 10,
by_timepoint = TRUE,
value_column = "Value"
)
expect_equal(purity_filtered, expected)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.