tests/testthat/test-collision-removal.R

library(ISAnalytics)

#------------------------------------------------------------------------------#
# Global vars
#------------------------------------------------------------------------------#
op <- withr::local_options(
    ISAnalytics.widgets = FALSE,
    ISAnalytics.verbose = FALSE
)

# Path to example association file
path_af <- system.file("extdata", "ex_association_file.tsv",
    package = "ISAnalytics"
)

# Path to correct file system example
path_root_correct <- system.file("extdata", "fs.zip",
    package = "ISAnalytics"
)
root_correct <- unzip_file_system(path_root_correct, "fs")

# Association file
af_missing_info <- function(af) {
    af %>%
        dplyr::filter(
            .data$CompleteAmplificationID != paste0(
                "CLOEXP_POOL6_LTR10LC10_VA2020-",
                "mix10_VA2020-mix10_lenti_737.",
                "pCCLsin.PPT.SFFV.eGFP.Wpre_",
                "PLATE1_NONE_1_NONE_SLiM_0000_",
                "NONE_NONE_OSR_NONE_NONE_NONE"
            )
        )
}

af_no_prob <- function(af) {
    af %>%
        dplyr::filter(
            .data$CompleteAmplificationID != paste0(
                "PROJECT1101_ABY-LR",
                "-PL4-POOL54_LTR51LC51_VA2020-",
                "mix14_VA2020-mix14_lenti_737.",
                "pCCLsin.PPT.SFFV.eGFP.Wpre_PL4_",
                "NONE_2_NONE_SLiM_0_NONE_NONE_",
                "OSR_NONE_NONE_NONE"
            )
        )
}

association_file_more <- import_association_file(path_af, root_correct,
    dates_format = "dmy"
)
association_file_np <- af_no_prob(association_file_more)
association_file_miss <- af_missing_info(association_file_more)

# Matrices
import_matr_silent <- function() {
    op <- options(ISAnalytics.verbose = FALSE, ISAnalytics.widgets = FALSE)
    on.exit(options(op))
    matrices <- import_parallel_Vispa2Matrices_auto(
        association_file = path_af, root = root_correct,
        quantification_type = c("fragmentEstimate", "seqCount"),
        matrix_type = "annotated", workers = 2, patterns = NULL,
        matching_opt = "ANY", dates_format = "dmy", multi_quant_matrix = FALSE
    )
    matrices
}

matrices <- import_matr_silent()

seq_count_m <- matrices$seqCount
fe_m <- matrices$fragmentEstimate

### Multi quant matrix
multi <- comparison_matrix(matrices)

#------------------------------------------------------------------------------#
# Tests .check_same_info
#------------------------------------------------------------------------------#
test_that(".check_same_info returns empty if no problems", {
    check <- .check_same_info(association_file_np, seq_count_m)
    expect_true(nrow(check) == 0)
})

test_that(".check_same_info returns non empty if more info", {
    check <- .check_same_info(association_file_more, seq_count_m)
    expect_true(nrow(check) == 1)
    expect_true(check$ProjectID == "PROJECT1101")
    expect_true(check$PoolID == "ABY-LR-PL4-POOL54")
})

#------------------------------------------------------------------------------#
# Tests .join_matrix_af
#------------------------------------------------------------------------------#
joined_df <- .join_matrix_af(
    seq_count_m, association_file_np,
    "SequencingDate"
)
joined_df_multi <- .join_matrix_af(
    multi, association_file_np,
    "SequencingDate"
)

test_that(".join_matrix_af produces the right table", {
    expect_true(!is.null(joined_df))
    expect_true(nrow(joined_df) == nrow(seq_count_m))
    expect_true(ncol(joined_df) == 12)

    expect_true(!is.null(joined_df_multi))
    expect_true(nrow(joined_df_multi) == nrow(seq_count_m))
    expect_true(ncol(joined_df_multi) == 13)
})

#------------------------------------------------------------------------------#
# Tests .identify_independent_samples
#------------------------------------------------------------------------------#
splitted <- .identify_independent_samples(joined_df)
splitted_multi <- .identify_independent_samples(joined_df_multi)

test_that(".identify_independent_samples splits joined_df", {
    expect_true(nrow(splitted$collisions) +
        nrow(splitted$non_collisions) == nrow(joined_df))
    expect_true(nrow(dplyr::inner_join(splitted[[1]], splitted[[2]],
        by = c(
            "chr",
            "integration_locus",
            "strand"
        )
    )) == 0)
    expect_true(nrow(splitted_multi$collisions) +
        nrow(splitted_multi$non_collisions) ==
        nrow(joined_df_multi))
    expect_true(nrow(dplyr::inner_join(splitted_multi[[1]], splitted_multi[[2]],
        by = c(
            "chr",
            "integration_locus",
            "strand"
        )
    )) == 0)
})

#------------------------------------------------------------------------------#
# Tests .obtain_nested
#------------------------------------------------------------------------------#
nested <- .obtain_nested(splitted$collisions)
nested_multi <- .obtain_nested(splitted_multi$collisions)
test_that(".obtain_nested correctly returns nested table", {
    expect_true(ncol(nested) == 4)
    expect_equal(colnames(nested), c(
        "chr", "integration_locus", "strand",
        "data"
    ))
    expect_true(ncol(nested_multi) == 4)
    expect_equal(colnames(nested_multi), c(
        "chr", "integration_locus", "strand",
        "data"
    ))
})

#------------------------------------------------------------------------------#
# Tests .discriminate_by_date
#------------------------------------------------------------------------------#
### OTHER VARS ###
example_df_nested <- function(ProjectID,
    Value,
    SequencingDate,
    PoolID,
    SubjectID,
    Replicate) {
    t <- tibble::tibble(
        Value = Value,
        SequencingDate = SequencingDate,
        ProjectID = ProjectID,
        PoolID = PoolID,
        SubjectID = SubjectID,
        ReplicateNumber = Replicate
    )
    t <- t %>%
        dplyr::mutate(
            CompleteAmplificationID = paste(.data$ProjectID,
                .data$PoolID,
                .data$SubjectID,
                .data$ReplicateNumber,
                sep = "_"
            ),
            .before = .data$Value
        )
}

example_df_nested_multi <- function(ProjectID,
    seqCount,
    fragmentEstimate,
    SequencingDate,
    PoolID,
    SubjectID,
    Replicate) {
    t <- tibble::tibble(
        seqCount = seqCount,
        fragmentEstimate = fragmentEstimate,
        SequencingDate = SequencingDate,
        ProjectID = ProjectID,
        PoolID = PoolID,
        SubjectID = SubjectID,
        ReplicateNumber = Replicate
    )
    t <- t %>%
        dplyr::mutate(
            CompleteAmplificationID = paste(.data$ProjectID,
                .data$PoolID,
                .data$SubjectID,
                .data$ReplicateNumber,
                sep = "_"
            ),
            .before = .data$seqCount
        )
}

test_that(".discriminate_by_date returns as expected for all equal dates", {
    dates <- lubridate::dmy("05/08/2020")
    df <- example_df_nested(
        ProjectID = "CLOEXP", Value = c(125, 25, 2),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c("subj1", "subj2", "subj3"),
        Replicate = c(1, 1, 2)
    )
    result <- .discriminate_by_date(df, "SequencingDate")
    expect_true(result$check == FALSE)
    expect_equal(result$data, df)
    df <- example_df_nested_multi(
        ProjectID = "CLOEXP", seqCount = c(125, 25, 2),
        fragmentEstimate = c(1354.54, 484.5, 386),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c("subj1", "subj2", "subj3"),
        Replicate = c(1, 1, 2)
    )
    result <- .discriminate_by_date(df, "SequencingDate")
    expect_true(result$check == FALSE)
    expect_equal(result$data, df)
})

test_that(".discriminate_by_date returns as expected for paired equal dates", {
    dates <- c(
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("03/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    result <- .discriminate_by_date(df, "SequencingDate")
    expect_true(result$check == FALSE)
    df <- df %>% dplyr::arrange(.data$SequencingDate)
    expect_equal(result$data, df)
    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    result <- .discriminate_by_date(df, "SequencingDate")
    expect_true(result$check == FALSE)
    df <- df %>% dplyr::arrange(.data$SequencingDate)
    expect_equal(result$data, df)
})

test_that(".discriminate_by_date returns as expected for single min date", {
    dates <- c(
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("03/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    result <- .discriminate_by_date(df, "SequencingDate")
    expect_true(result$check == TRUE)
    df <- df %>%
        dplyr::filter(.data$SequencingDate == lubridate::dmy("01/08/2020"))
    expect_equal(result$data, df)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    result <- .discriminate_by_date(df, "SequencingDate")
    expect_true(result$check == TRUE)
    df <- df %>%
        dplyr::filter(.data$SequencingDate == lubridate::dmy("01/08/2020"))
    expect_equal(result$data, df)
})

#------------------------------------------------------------------------------#
# Tests .discriminate_by_replicate
#------------------------------------------------------------------------------#
test_that(".discriminate_by_replicate returns as expected for single max", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj6", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    result <- .discriminate_by_replicate(df)
    expect_true(result$check == TRUE)
    df <- df %>% dplyr::filter(.data$SubjectID == "subj2")
    expect_equal(result$data, df)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj6", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    result <- .discriminate_by_replicate(df)
    expect_true(result$check == TRUE)
    df <- df %>% dplyr::filter(.data$SubjectID == "subj2")
    expect_equal(result$data, df)
})

test_that(".discriminate_by_replicate returns as expected for not max", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    result <- .discriminate_by_replicate(df)
    expect_true(result$check == FALSE)
    expect_equal(result$data, df)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    result <- .discriminate_by_replicate(df)
    expect_true(result$check == FALSE)
    expect_equal(result$data, df)
})

#------------------------------------------------------------------------------#
# Tests .discriminate_by_seqCount
#------------------------------------------------------------------------------#
test_that(".discriminate_by_seqCount returns as expected for ratio < 10", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    result <- .discriminate_by_seqCount(df, 10, "Value")
    expect_true(result$check == FALSE)
    expect_equal(result$data, df)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    result <- .discriminate_by_seqCount(df, 10, "seqCount")
    expect_true(result$check == FALSE)
    expect_equal(result$data, df)
})

test_that(".discriminate_by_seqCount returns as expected for ratio > 10", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(20, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    result <- .discriminate_by_seqCount(df, 10, "Value")
    expect_true(result$check == TRUE)
    df <- df %>% dplyr::filter(.data$SubjectID == "subj2")
    expect_equal(result$data, df)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(20, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    result <- .discriminate_by_seqCount(df, 10, "seqCount")
    expect_true(result$check == TRUE)
    df <- df %>% dplyr::filter(.data$SubjectID == "subj2")
    expect_equal(result$data, df)
})

#------------------------------------------------------------------------------#
# Tests .four_step_check
#------------------------------------------------------------------------------#
test_that(".four_step_check returns as expected for first step", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(20, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj6", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "Value"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_equal(rem_col$SequencingDate, c(lubridate::dmy("01/08/2020")))
    expect_equal(rem_col$SubjectID, c("subj1"))
    expect_true(nrow(rem_col) == 1)
    expect_true(reassigned == 1)
    expect_true(removed == 0)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(20, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj6", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "seqCount"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_equal(rem_col$SequencingDate, c(lubridate::dmy("01/08/2020")))
    expect_equal(rem_col$SubjectID, c("subj1"))
    expect_true(nrow(rem_col) == 1)
    expect_true(reassigned == 1)
    expect_true(removed == 0)
})

test_that(".four_step_check returns as expected for second step", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(20, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj4", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "Value"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_equal(rem_col$ReplicateNumber, c(1, 2))
    expect_equal(rem_col$SubjectID, c("subj1", "subj1"))
    expect_true(nrow(rem_col) == 2)
    expect_true(reassigned == 1)
    expect_true(removed == 0)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(20, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj4", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "seqCount"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_equal(rem_col$ReplicateNumber, c(1, 2))
    expect_equal(rem_col$SubjectID, c("subj1", "subj1"))
    expect_true(nrow(rem_col) == 2)
    expect_true(reassigned == 1)
    expect_true(removed == 0)
})

test_that(".four_step_check returns as expected for third step", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(20, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "Value"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_equal(rem_col$ReplicateNumber, c(1, 2))
    expect_equal(rem_col$Value, c(50, 800))
    expect_equal(rem_col$SubjectID, c("subj2", "subj2"))
    expect_true(nrow(rem_col) == 2)
    expect_true(reassigned == 1)
    expect_true(removed == 0)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(20, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "seqCount"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_equal(rem_col$ReplicateNumber, c(1, 2))
    expect_equal(rem_col$seqCount, c(50, 800))
    expect_equal(rem_col$SubjectID, c("subj2", "subj2"))
    expect_true(nrow(rem_col) == 2)
    expect_true(reassigned == 1)
    expect_true(removed == 0)
})

test_that(".four_step_check returns as expected for fourth step", {
    dates <- c(
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("03/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    df <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(20, 25, 2, 50, 80, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "Value"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_null(rem_col)
    expect_true(reassigned == 0)
    expect_true(removed == 1)

    df <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(20, 25, 2, 50, 80, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj1", "subj3", "subj2",
            "subj2", "subj5"
        ),
        Replicate = c(1, 2, 2, 1, 2, 3)
    )
    ex <- tibble::tibble(
        chr = 1, integration_locus = 10493, strand = "+",
        data = list(df)
    )
    res <- purrr::pmap(ex,
        .f = .four_step_check, date_col = "SequencingDate",
        reads_ratio = 10, seqCount_col = "seqCount"
    )
    rem_col <- purrr::map(res, function(x) {
        x$data
    })
    rem_col <- purrr::reduce(rem_col, dplyr::bind_rows)
    reassigned <- purrr::map(res, function(x) {
        x$reassigned
    })
    reassigned <- purrr::reduce(reassigned, sum)
    removed <- purrr::map(res, function(x) {
        x$removed
    })
    removed <- purrr::reduce(removed, sum)
    expect_null(rem_col)
    expect_true(reassigned == 0)
    expect_true(removed == 1)
})

#------------------------------------------------------------------------------#
# Tests .coll_mapping
#------------------------------------------------------------------------------#
### OTHER VARS ###
example_collisions <- function() {
    cols <- list(
        chr = c(1, 2, 3, 4),
        integration_locus = c(103948, 14390, 12453, 12353),
        strand = c("+", "+", "-", "-")
    )
    # Can discriminate by date
    dates <- c(
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("03/08/2020")
    )
    smpl1 <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    # Can discriminate by replicate
    dates <- c(
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    smpl2 <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    # Can discriminate by seqCount
    smpl3 <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(25, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj1"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    # Can't discriminate
    smpl4 <- example_df_nested(
        ProjectID = "CLOEXP",
        Value = c(125, 25, 2, 50, 800, 6),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj1"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    t <- tibble::as_tibble(cols)
    t <- t %>%
        tibble::add_column(tibble::as_tibble_col(list(
            smpl1, smpl2,
            smpl3, smpl4
        ),
        column_name = "data"
        ))
    t
}

example_collisions_multi <- function() {
    cols <- list(
        chr = c(1, 2, 3, 4),
        integration_locus = c(103948, 14390, 12453, 12353),
        strand = c("+", "+", "-", "-")
    )
    # Can discriminate by date
    dates <- c(
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("01/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("03/08/2020")
    )
    smpl1 <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    # Can discriminate by replicate
    dates <- c(
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020"),
        lubridate::dmy("05/08/2020")
    )
    smpl2 <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj5"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    # Can discriminate by seqCount
    smpl3 <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(25, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj1"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    # Can't discriminate
    smpl4 <- example_df_nested_multi(
        ProjectID = "CLOEXP",
        seqCount = c(125, 25, 2, 50, 800, 6),
        fragmentEstimate = c(456, 56, 786.87, 644.56, 4.857, 86.563),
        SequencingDate = dates, PoolID = "POOL6",
        SubjectID = c(
            "subj1", "subj2", "subj3", "subj4",
            "subj2", "subj1"
        ),
        Replicate = c(1, 1, 2, 1, 2, 3)
    )
    t <- tibble::as_tibble(cols)
    t <- t %>%
        tibble::add_column(tibble::as_tibble_col(list(
            smpl1, smpl2,
            smpl3, smpl4
        ),
        column_name = "data"
        ))
    t
}

ex_collisions <- example_collisions()
ex_collisions_multi <- example_collisions_multi()

test_that(".coll_mapping produces expected result for example collisions", {
    result <- .coll_mapping(ex_collisions, "SequencingDate", 10, "Value")
    coll <- result$coll
    removed <- result$removed
    reassigned <- result$reassigned
    expect_true(removed == 1)
    expect_true(reassigned == 3)
    # Removed integration should not be in the result
    rem_integration <- coll %>%
        dplyr::filter(
            .data$chr == 4, .data$integration_locus == 12353,
            .data$strand == "-"
        )
    expect_true(nrow(rem_integration) == 0)
    # First integration expected result
    first <- coll %>%
        dplyr::filter(
            .data$chr == 1, .data$integration_locus == 103948,
            .data$strand == "+"
        )
    expect_true(all(first$SubjectID == "subj3"))
    expect_true(nrow(first) == 1)
    # Second integration expected result
    second <- coll %>%
        dplyr::filter(
            .data$chr == 2, .data$integration_locus == 14390,
            .data$strand == "+"
        )
    expect_true(all(second$SubjectID == "subj2"))
    expect_true(nrow(second) == 2)
    # Third integration expected result
    third <- coll %>%
        dplyr::filter(
            .data$chr == 3, .data$integration_locus == 12453,
            .data$strand == "-"
        )
    expect_true(all(third$SubjectID == "subj2"))
    expect_true(nrow(third) == 2)

    result <- .coll_mapping(
        ex_collisions_multi, "SequencingDate", 10,
        "seqCount"
    )
    coll <- result$coll
    removed <- result$removed
    reassigned <- result$reassigned
    expect_true(removed == 1)
    expect_true(reassigned == 3)
    # Removed integration should not be in the result
    rem_integration <- coll %>%
        dplyr::filter(
            .data$chr == 4, .data$integration_locus == 12353,
            .data$strand == "-"
        )
    expect_true(nrow(rem_integration) == 0)
    # First integration expected result
    first <- coll %>%
        dplyr::filter(
            .data$chr == 1, .data$integration_locus == 103948,
            .data$strand == "+"
        )
    expect_true(all(first$SubjectID == "subj3"))
    expect_true(nrow(first) == 1)
    # Second integration expected result
    second <- coll %>%
        dplyr::filter(
            .data$chr == 2, .data$integration_locus == 14390,
            .data$strand == "+"
        )
    expect_true(all(second$SubjectID == "subj2"))
    expect_true(nrow(second) == 2)
    # Third integration expected result
    third <- coll %>%
        dplyr::filter(
            .data$chr == 3, .data$integration_locus == 12453,
            .data$strand == "-"
        )
    expect_true(all(third$SubjectID == "subj2"))
    expect_true(nrow(third) == 2)
})

#------------------------------------------------------------------------------#
# Tests .process_collisions
#------------------------------------------------------------------------------#
test_that(".process_collisions returns updated collisions", {
    result <- .process_collisions(
        ex_collisions %>%
            tidyr::unnest(.data$data),
        "SequencingDate", 10, "Value"
    )
    coll <- result$coll
    removed <- result$removed
    reassigned <- result$reassigned
    expect_true(removed == 1)
    expect_true(reassigned == 3)
    # Removed integration should not be in the result
    rem_integration <- coll %>%
        dplyr::filter(
            .data$chr == 4, .data$integration_locus == 12353,
            .data$strand == "-"
        )
    expect_true(nrow(rem_integration) == 0)
    # First integration expected result
    first <- coll %>%
        dplyr::filter(
            .data$chr == 1, .data$integration_locus == 103948,
            .data$strand == "+"
        )
    expect_true(all(first$SubjectID == "subj3"))
    expect_true(nrow(first) == 1)
    # Second integration expected result
    second <- coll %>%
        dplyr::filter(
            .data$chr == 2, .data$integration_locus == 14390,
            .data$strand == "+"
        )
    expect_true(all(second$SubjectID == "subj2"))
    expect_true(nrow(second) == 2)
    # Third integration expected result
    third <- coll %>%
        dplyr::filter(
            .data$chr == 3, .data$integration_locus == 12453,
            .data$strand == "-"
        )
    expect_true(all(third$SubjectID == "subj2"))
    expect_true(nrow(third) == 2)

    result <- .process_collisions(
        ex_collisions_multi %>%
            tidyr::unnest(.data$data),
        "SequencingDate", 10, "seqCount"
    )
    coll <- result$coll
    removed <- result$removed
    reassigned <- result$reassigned
    expect_true(removed == 1)
    expect_true(reassigned == 3)
    # Removed integration should not be in the result
    rem_integration <- coll %>%
        dplyr::filter(
            .data$chr == 4, .data$integration_locus == 12353,
            .data$strand == "-"
        )
    expect_true(nrow(rem_integration) == 0)
    # First integration expected result
    first <- coll %>%
        dplyr::filter(
            .data$chr == 1, .data$integration_locus == 103948,
            .data$strand == "+"
        )
    expect_true(all(first$SubjectID == "subj3"))
    expect_true(nrow(first) == 1)
    # Second integration expected result
    second <- coll %>%
        dplyr::filter(
            .data$chr == 2, .data$integration_locus == 14390,
            .data$strand == "+"
        )
    expect_true(all(second$SubjectID == "subj2"))
    expect_true(nrow(second) == 2)
    # Third integration expected result
    third <- coll %>%
        dplyr::filter(
            .data$chr == 3, .data$integration_locus == 12453,
            .data$strand == "-"
        )
    expect_true(all(third$SubjectID == "subj2"))
    expect_true(nrow(third) == 2)
})

#------------------------------------------------------------------------------#
# Tests remove_collisions
#------------------------------------------------------------------------------#
## TEST INPUT
test_that("remove_collisions stops if x is not a named list", {
    expect_error({
        rc <- remove_collisions(1, association_file_np)
    })
    expect_error({
        rc <- remove_collisions(list(1, 2, 3), association_file_np)
    })
})

test_that("remove_collisions stops if names of x, if list,
          are not in quant types", {
    # All
    expect_error({
        rc <- remove_collisions(list(a = "ab", b = "cd"), association_file_np)
    })
    # Only some
    expect_error({
        rc <- remove_collisions(
            list(seqCount = "ab", b = "cd"),
            association_file_np
        )
    })
})

test_that("remove_collisions stops if elements are not IS matrices", {
    non_ism <- tibble::tibble(a = c(1, 2, 3))
    miss_value <- seq_count_m %>% dplyr::select(-c(.data$Value))
    miss_camp <- seq_count_m %>%
        dplyr::select(-c(.data$CompleteAmplificationID))
    expect_error(
        {
            rc <- remove_collisions(x = non_ism, association_file_np)
        },
        regexp = .non_ISM_error()
    )
    expect_error(
        {
            rc <- remove_collisions(x = miss_value, association_file_np)
        },
        regexp = .missing_num_cols_error()
    )
    expect_error(
        {
            rc <- remove_collisions(x = miss_camp, association_file_np)
        },
        regexp = .missing_complAmpID_error()
    )
    expect_error(
        {
            rc <- remove_collisions(
                x = list(
                    seqCount = non_ism,
                    fragmentEstimate = seq_count_m
                ),
                association_file_np
            )
        },
        regexp = .non_ISM_error()
    )
    expect_error(
        {
            rc <- remove_collisions(
                x = list(
                    seqCount = miss_value,
                    fragmentEstimate = seq_count_m
                ),
                association_file_np
            )
        },
        regexp = .missing_value_col_error()
    )
    expect_error(
        {
            rc <- remove_collisions(
                x = list(
                    seqCount = miss_camp,
                    fragmentEstimate = seq_count_m
                ),
                association_file_np
            )
        },
        regexp = .missing_complAmpID_error()
    )
})

test_that("remove_collisions stops if association file is not a tibble", {
    expect_error({
        rc <- remove_collisions(
            list(seqCount = seq_count_m),
            path_af
        )
    })
})

test_that("remove_collisions stops if date_col is incorrect", {
    expect_error({
        rc <- remove_collisions(list(seqCount = seq_count_m),
            association_file_np,
            date_col = 2
        )
    })
    expect_error({
        rc <- remove_collisions(list(seqCount = seq_count_m),
            association_file_np,
            date_col = c(
                "SequencingDate", "FusionPrimerPCRDate"
            )
        )
    })
    expect_error({
        rc <- remove_collisions(list(seqCount = seq_count_m),
            association_file_np,
            date_col = "Date"
        )
    })
})

test_that("remove_collisions stops if reads_ratio is not a number", {
    expect_error({
        rc <- remove_collisions(list(seqCount = seq_count_m),
            association_file_np,
            date_col = "SequencingDate",
            reads_ratio = "20"
        )
    })
})

test_that("remove_collisions stops if reads_ratio is not a single number", {
    expect_error({
        rc <- remove_collisions(list(seqCount = seq_count_m),
            association_file_np,
            date_col = "SequencingDate",
            reads_ratio = c(10, 20)
        )
    })
})


test_that("remove_collisions stops if date_col contains NA", {
    expect_error(
        {
            rc <- remove_collisions(list(seqCount = seq_count_m),
                association_file_np,
                date_col = "FusionPrimerPCRDate"
            )
        },
        regexp = paste(
            "Selected date column contains NA values, please check",
            "and fix the association file"
        )
    )
})

test_that("remove_collisions stops if there's no seqCount matrix", {
    expect_error(
        {
            rc <- remove_collisions(
                list(fragmentEstimate = seq_count_m),
                association_file_np
            )
        },
        regexp = paste(
            "Sequence count data frame is required for collision",
            "removal but none was detected in x"
        )
    )
    expect_error(
        {
            sq <- seq_count_m %>% dplyr::filter(.data$chr == "zzz")
            rc <- remove_collisions(
                list(seqCount = sq),
                association_file_np
            )
        },
        regexp = paste(
            "Sequence count data frame is required for collision",
            "removal but none was detected in x"
        )
    )
})

test_that("remove_collisions throws warning info from af", {
    expect_warning(
        {
            invisible(capture.output({
                rc <- remove_collisions(
                    list(seqCount = seq_count_m),
                    association_file_miss
                )
            }))
        },
        regexp = paste(
            "The association file is missing needed info",
            "on some experiments. Missing samples will be removed",
            "from the matrices."
        )
    )
})

test_that("remove_collisions notifies additional data and succeeds", {
    op <- options(ISAnalytics.verbose = TRUE)
    on.exit(options(op))
    expect_message(
        {
            invisible(capture_output({
                coll_rem <- remove_collisions(seq_count_m, association_file_more,
                    date_col = "SequencingDate",
                    reads_ratio = 10
                )
            }))
        },
        regexp = paste("Found additional data relative to some projects",
            "that are not included in the imported matrices.",
            "Here is a summary",
            collapse = "\n"
        ),
        fixed = TRUE
    )
})

#------------------------------------------------------------------------------#
# Tests realign_after_collisions
#------------------------------------------------------------------------------#

### OTHER VARS ###
silent_coll <- function() {
    op <- options(ISAnalytics.verbose = FALSE)
    on.exit(options(op))
    invisible(capture_output({
        coll_rem <- remove_collisions(seq_count_m, association_file_more,
            date_col = "SequencingDate",
            reads_ratio = 10
        )
    }))
    coll_rem
}

coll_rem <- silent_coll()

test_that("realign_after_collisions fails if others is not a named list", {
    expect_error({
        realigned <- realign_after_collisions(coll_rem, 2)
    })
    expect_error({
        realigned <- realign_after_collisions(coll_rem, NULL)
    })
    expect_error({
        realigned <- realign_after_collisions(
            coll_rem,
            fe_m
        )
    })
})

test_that("realign_after_collisions fails if names not quant types", {
    expect_error({
        realigned <- realign_after_collisions(coll_rem, fe_m)
    })
    expect_error({
        realigned <- realign_after_collisions(coll_rem, list(a = fe_m))
    })
})

test_that("realign_after_collisions fails if list contains non-IS matrices", {
    fe1 <- fe_m %>% dplyr::select(-c(.data$CompleteAmplificationID))
    expect_error(
        {
            realigned <- realign_after_collisions(
                coll_rem$seqCount,
                list(
                    fragmentEstimate =
                        tibble::tibble(a = c(1, 2, 3))
                )
            )
        },
        regexp = .non_ISM_error()
    )
    expect_error(
        {
            realigned <- realign_after_collisions(
                coll_rem,
                list(
                    fragmentEstimate = fe_m,
                    barcodeCount = fe1
                )
            )
        },
        regexp = .missing_complAmpID_error()
    )
})

test_that("realign_after_collisions correctly re-aligns", {
    realigned <- realign_after_collisions(
        coll_rem,
        list(fragmentEstimate = fe_m)
    )
    expect_true(nrow(coll_rem) == nrow(realigned$fragmentEstimate))
    expect_true(all(realigned$fragmentEstimate$chr %in% coll_rem$chr))
    expect_true(all(realigned$fragmentEstimate$integration_locus
        %in% coll_rem$integration_locus))
    expect_true(all(realigned$fragmentEstimate$CompleteAmplificationID
        %in% coll_rem$CompleteAmplificationID))
})

Try the ISAnalytics package in your browser

Any scripts or data that you put into this service are public.

ISAnalytics documentation built on April 9, 2021, 6:01 p.m.