test_MSnSet.R
In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics

context("MSnSet class")

test_that("MSnSet validity", {
    expect_true(validObject(new("MSnSet")))
    e <- matrix(rnorm(12), ncol = 3)
    rownames(e) <- LETTERS[1:4]
    colnames(e) <- letters[1:3]
    expect_true(validObject(MSnSet(e)))
})

test_that("MSnSet acquisitionNum", {
    data(msnset)
    expect_equal(acquisitionNum(msnset),
                 setNames(featureData(msnset)$acquisition.number,
                          featureNames(msnset)))
    featureData(msnset)$acquisition.number <- NULL
    expect_error(acquisitionNum(msnset), "has no column")
})

test_that("MSnSet validity for empty feature names", {
    data(msnset)
    expect_true(validObject(msnset))
    featureNames(msnset)[1] <- ""
    expect_error(validObject(msnset),
                 "Empty string is not a valid feature name.")
})

test_that("MSnSet coersion", {
    data(dunkley2006, package = "pRolocdata")
    expect_true(validObject(dunkley2006))
    es <- as(dunkley2006, "ExpressionSet")
    expect_true(validObject(es))
    ## back to MSnSet
    ms <- as(es, "MSnSet")
    expect_true(validObject(ms))
    ## compare original dunkley2006 and ms
    ## we know that coercion ExpressionSet to MSnSet initialises a new
    ## experimentData
    ms@experimentData <- experimentData(dunkley2006)
    ## processingData is also list in MSnSet -> ExpressionSet
    ms@processingData <- processingData(dunkley2006)
    ## and classVersion will differ
    ms@.__classVersion__ <- classVersion(dunkley2006)
    expect_true(all.equal(ms, dunkley2006))
})

test_that("Combine MSnSet features: groupBy and fcol", {
    data(msnset)
    msnset <- msnset[11:15, ]
    grp <- as.factor(c(1, 1, 2, 2, 2))
    fData(msnset)$k <- grp
    x1 <- combineFeatures(msnset, groupBy = grp)
    x2 <- combineFeatures(msnset, fcol = "k")
    x2@processingData <- x1@processingData
    expect_equal(x1, x2)
})

test_that("Combine MSnSet features (V)", {
    aa <- new("MSnSet",
              exprs = matrix(
                  c(rnorm(10, 4, 0.0001),
                    rnorm(10, 10, 0.0001)),
                  nrow = 10, byrow = TRUE),
              featureData = new("AnnotatedDataFrame",
                  data = data.frame(
                      A = rep(c("A", "B"), each = 5),
                      B = paste(
                          rep(c("A", "B"), each = 5),
                          1:10,
                          sep = "."))))
    expect_true(validObject(aa))
    ## gb <- factor(rep(letters[1:2], each = 5))
    gb <- factor(rep(1:2, each = 5))
    bb <- MSnbase:::nologging(combineFeatures(aa, gb, "mean"), 1)
    bb2 <- MSnbase:::nologging(combineFeatures(aa, as.character(gb), "mean"), 1)
    gbn <- rep(1:2, each = 5)
    bb3 <- MSnbase:::nologging(combineFeatures(aa, gbn, "mean"), 1)
    expect_equal(bb, bb2)
    expect_equal(bb, bb3)
    cc <- combineFeatures(aa, gb, "sum")
    dd <- combineFeatures(aa, gb, "median")
    ee <- combineFeatures(aa, gb, "weighted.mean", w = rep(1, 10))
    ff <- combineFeatures(aa, gb, "medpolish", verbose = FALSE)
    expect_equal(exprs(bb),
                 matrix(c(4, 10, 4, 10), ncol = 2),
                 tolerance = .001,
                 check.attributes = FALSE)
    expect_equal(exprs(dd),
                 matrix(c(4, 10, 4, 10), ncol = 2),
                 tolerance = .001,
                 check.attributes = FALSE)
    expect_equal(exprs(ee),
                 matrix(c(4, 10, 4, 10), ncol = 2),
                 tolerance = .001,
                 check.attributes = FALSE)
    expect_equal(exprs(ff),
                 matrix(c(4, 10, 4, 10), ncol = 2),
                 tolerance = .001,
                 check.attributes = FALSE)

    expect_equal(exprs(cc),
                 matrix(c(5*4, 5*10, 5*4, 5*10), ncol = 2),
                 tolerance = .001,
                 check.attributes = FALSE)

    expect_true(all(fData(bb)[, 1] == c("A", "B")))
    expect_true(all(fData(bb)[, 2] == c("A.1", "B.6")))
    gb2 <- factor(c("a", "c", "z", "a", "z", "b", "b", "a", "c", "a"))
    gb3 <- factor(rev(c("a", "c", "z", "a", "z", "b", "b", "a", "c", "a")))
    fData(aa)$Z <- gb2
    zz <- combineFeatures(aa, gb2, method = "sum")
    zz3 <- combineFeatures(aa[10:1, ], gb3, method = "sum")
    expect_equal(exprs(zz), exprs(zz3))
    expect_equal(fData(zz)[, 3:5], fData(zz3)[, 3:5], tolerance=1e-5)
    expect_true(all.equal(as.numeric(exprs(zz["a", ])),
                          colSums(exprs(aa)[gb2 == "a", ]),
                          check.attributes = FALSE))
    expect_true(all.equal(as.numeric(exprs(zz["b", ])),
                          colSums(exprs(aa)[gb2 == "b", ]),
                          check.attributes = FALSE))
    expect_true(all.equal(as.numeric(exprs(zz["c", ])),
                          colSums(exprs(aa)[gb2 == "c", ]),
                          check.attributes = FALSE))
    expect_true(all.equal(as.numeric(exprs(zz["z", ])),
                          colSums(exprs(aa)[gb2 == "z", ]),
                          check.attributes = FALSE))
    expect_true(all.equal(featureNames(zz),
                          as.character(fData(zz)$Z),
                          check.attributes = FALSE))
})


test_that("Combine MSnSet features (L)", {
    e <- matrix(1:15, nrow = 5)
    colnames(e) <- paste0("X", 1:3)
    rownames(e) <- letters[1:5]
    ee <- new("MSnSet", exprs = e)
    expect_true(validObject(ee))

    ## different lengths -> error
    L <- list(c("A", "B"), "B", "C", c("A", "B", "C"), "A", "X")
    expect_error(combineFeatures(ee, L))

    ## wrong names
    L <- list(c("A", "B"), "B", "C", c("A", "B", "C"), "A")
    names(L) <- LETTERS[1:5]
    expect_error(combineFeatures(ee, L))

    ## unordered names -> warning
    names(L) <- featureNames(ee)
    L <- L[sample(featureNames(ee))]
    expect_warning(combineFeatures(ee, L,
                                   redundancy.handler = "unique",
                                   cv = FALSE))

    L <- list(c("A", "B"), "B", "C", c("A", "B", "C"), "A")
    names(L) <- featureNames(ee)
    ee2 <- combineFeatures(ee, L,
                           redundancy.handler = "unique",
                           cv = FALSE)
    ee2 <- MSnbase:::nologging(ee2, 2)
    ## peptide a -> protein(s) A, B  DISCARD
    ## peptide b -> protein(s) B       KEEP
    ## peptide c -> protein(s) C       KEEP
    ## peptide d -> protein(s) A, B, C DISCARD
    ## peptide e -> protein(s) A       KEEP
    ##
    ## Protein A is quantified by pep e only
    ## Protein B is quantified by pep b only
    ## Protein C is quantified by pep c only
    ee3 <- ee[c("e", "b", "c"), ]
    featureNames(ee3) <- LETTERS[1:3]
    ee3@processingData@merged <- TRUE
    ee3 <- MSnbase:::nologging(ee3, 1)
    expect_equal(ee2, ee3)

    ee4 <- combineFeatures(ee, L,
                           redundancy.handler = "multiple",
                           cv = FALSE)
    expect_equal(exprs(ee4)["A", ],
                 colMeans(exprs(ee)[sapply(L, function(l) any(grepl("A", l))), ]))
    expect_equal(exprs(ee4)["B", ],
                 colMeans(exprs(ee)[sapply(L, function(l) any(grepl("B", l))), ]))
    expect_equal(exprs(ee4)["C", ],
                 colMeans(exprs(ee)[sapply(L, function(l) any(grepl("C", l))), ]))
})

test_that("Combine MSnSet features repeatedly", {
    data(msnset)
    fv1 <- fvarLabels(msnset)
    pep <- combineFeatures(msnset, groupBy = fData(msnset)$PeptideSequence,
                           cv = TRUE)
    prot <- combineFeatures(pep, groupBy = fData(pep)$ProteinAccession,
                            cv = TRUE)
    expect_identical(length(fv1) + 4L, length(fvarLabels(pep)))
    expect_identical(length(fv1) + 8L, length(fvarLabels(prot)))
})

test_that("makeImpuritiesMatrix", {
    i4 <- dir(system.file("extdata", package = "MSnbase"),
              pattern = "iTRAQ4plexPurityCorrection",
              full.names = TRUE)
    m4 <- makeImpuritiesMatrix(filename = i4, edit = FALSE)
    a4 <- matrix(c(0.929, 0.059, 0.002, 0.000,
                   0.020, 0.923, 0.056, 0.001,
                   0.000, 0.030, 0.924, 0.045,
                   0.000, 0.001, 0.040, 0.923),
                 nrow = 4, byrow = TRUE)
    expect_equal(a4, m4, check.attributes = FALSE)
    t6 <- dir(system.file("extdata", package = "MSnbase"),
              pattern = "TMT6plexPurityCorrection",
              full.names = TRUE)
    m6 <- makeImpuritiesMatrix(filename = t6, edit = FALSE)
    a6 <- matrix(c(0.939, 0.061, 0.000, 0.000, 0.000, 0.000,
                   0.005, 0.928, 0.067, 0.000, 0.000, 0.000,
                   0.000, 0.011, 0.947, 0.042, 0.000, 0.000,
                   0.000, 0.000, 0.017, 0.942, 0.041, 0.000,
                   0.000, 0.000, 0.000, 0.016, 0.963, 0.021,
                   0.000, 0.000, 0.000, 0.002, 0.032, 0.938),
                 nrow = 6, byrow = TRUE)
    expect_equal(a6, m6, check.attributes = FALSE)
})

test_that("Normalisation and transpose", {
    data(msnset)
    msnset1 <- normalise(msnset, "sum")
    expect_equivalent(rowSums(exprs(msnset1), na.rm = TRUE),
                      rep(1, nrow(msnset1)))
    msnset2 <- normalise(msnset, "max")
    expect_true(all(apply(exprs(msnset2), 1, max, na.rm = TRUE) == 1))
    msnset3 <- normalise(msnset, "quantiles")
    msnset4 <- normalise(msnset, "quantiles.robust")
    msnset5 <- normalise(msnset, "vsn")
})


test_that("Transpose and subset", {
    data(msnset)
    ## transpose
    tmsnset <- t(msnset)
    expect_true(nrow(msnset) == ncol(tmsnset))
    expect_true(ncol(msnset) == nrow(tmsnset))
    expect_true(all.equal(pData(msnset), fData(tmsnset)))
    expect_true(all.equal(pData(tmsnset), fData(msnset)))
    ## subset
    bb <- msnset[1:2, c(2, 4)]
    expect_identical(dim(bb), c(2L, 2L))
})

test_that("nQuants", {
    m <- new("MSnSet",
             exprs=matrix(1:10, nrow=5, ncol=2),
             featureData=new("AnnotatedDataFrame",
                             data=data.frame(accession=
                                             factor(c("A", "A", "A", "B", "B")))))
    qu <- matrix(3:2, nrow=2, ncol=2, dimnames=list(c("A", "B"), 1:2))
    expect_equal(nQuants(m, group=fData(m)$accession), qu)

    ## more levels than items present in the factor
    m@featureData <- new("AnnotatedDataFrame",
                         data=data.frame(accession=
                                         factor(c("A", "A", "A", "B", "B"),
                                                    levels=LETTERS[1:10])))
    expect_equal(nQuants(m, group=fData(m)$accession), qu)

    ## real world example
    data(msnset)
    pa <- fData(msnset)$ProteinAccession
    upa <- unique(pa)
    qu <- matrix(1, nrow=length(upa), ncol=ncol(msnset),
                 dimnames=list(upa[order(upa)], sampleNames(msnset)))
    qu[c("ECA0435", "ECA0469", "ECA3349", "ECA3566", "ECA4026"),] <- 2
    qu["BSA",] <- 3
    qu["ENO",] <- c(4, 4, 3, 4)
    qu["ECA4514",] <- 6
    expect_equal(nQuants(msnset, pa), qu)

    ## more levels than items present in the factor
    qu <- matrix(1, nrow=10, ncol=4,
                 dimnames=list(as.character(pa[order(pa[1:10])]),
                                            sampleNames(msnset)))
    expect_equal(nQuants(msnset[1:10], pa[1:10]), qu)
})

test_that("featureCV", {
    m <- new("MSnSet",
             exprs=matrix(1:10, nrow=5, ncol=2),
             featureData=new("AnnotatedDataFrame",
                             data=data.frame(accession=
                                             factor(c("A", "A", "A", "B", "B")))))
    cv <- matrix(c(0.5, sqrt(0.5)/4.5, 1/7, sqrt(0.5)/9.5),
                 nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2")))
    expect_equal(featureCV(m, group=fData(m)$accession, norm="none"), cv)

    ## more levels than items present in the factor
    m@featureData <- new("AnnotatedDataFrame",
                         data=data.frame(accession=
                                         factor(c("A", "A", "A", "B", "B"),
                                                    levels=LETTERS[1:10])))
    expect_equal(featureCV(m, group=fData(m)$accession, norm="none"), cv)

    i <- list(1:3, 4:5, 6:8, 9:10)
    div <- rowSums(exprs(m))
    cv <- matrix(sapply(i, function(ii) {
                   sd((exprs(m)/div)[ii]/mean((exprs(m)/div)[ii])) }),
                 nrow=2, ncol=2, dimnames=list(c("A", "B"), c("CV.1", "CV.2")))
    expect_equal(featureCV(m, group=fData(m)$accession, norm="sum"), cv)

    cv1 <- featureCV(m, group=fData(m)$accession)
    cv2 <- featureCV(m, group=fData(m)$accession, suffix = "2")
    expect_identical(colnames(cv1), sub("\\.2", "", colnames(cv2)))
})

context("MSnSet identification data")

test_that("addIdentificationData", {
  quantFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
                   full.name = TRUE, pattern = "mzXML$")
  identFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
                   full.name = TRUE, pattern = "dummyiTRAQ.mzid")
  aa <- extdata_mzXML_in_mem_ms2
  msnset <- quantify(aa, method = "trap", reporters = iTRAQ4,
                     BPPARAM = SerialParam(),
                     verbose = FALSE)
  fd <- fData(addIdentificationData(msnset, identFile, verbose = FALSE))
  expect_equal(fd$spectrum, 1:5)
  expect_equal(fd$file, rep(1, 5))
  expect_equal(fd$acquisition.number, 1:5)
  expect_equal(fd$sequence,
               c("VESITARHGEVLQLRPK", "IDGQWVTHQWLKK",
                 NA, NA, "LVILLFR"))
  expect_equal(fd$DatabaseAccess,
               c("ECA0984", "ECA1028", NA, NA, "ECA0510"))
  expect_equal(fd$idFile, c("dummyiTRAQ.mzid", "dummyiTRAQ.mzid", NA, NA,
                            "dummyiTRAQ.mzid"))
  expect_equal(fd$npsm.prot, c(1, 1, NA, NA, 1))
  expect_equal(fd$npsm.pep, c(1, 1, NA, NA, 1))
  expect_equal(fd$npep.prot, c(1, 1, NA, NA, 1))
  expect_equal(fd$nprot, c(1, 1, NA, NA, 1))
})

test_that("idSummary", {
  quantFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
                   full.name = TRUE, pattern = "mzXML$")
  identFile <- dir(system.file(package = "MSnbase", dir = "extdata"),
                   full.name = TRUE, pattern = "dummyiTRAQ.mzid")
  ## aa <- readMSData(quantFile, centroided. = FALSE)
  aa <- extdata_mzXML_in_mem_ms2
  msnset <- quantify(aa, method = "trap", reporters = iTRAQ4,
                     BPPARAM = SerialParam())
  bb <- addIdentificationData(msnset, identFile)
  expect_error(idSummary(aa), "No quantification/identification data found")
  expect_equal(idSummary(bb),
               data.frame(spectrumFile = "dummyiTRAQ.mzXML",
                          idFile = "dummyiTRAQ.mzid",
                          coverage = 0.6,
                          stringsAsFactors = FALSE))
})


test_that("commonFeatureNames works with lists or MSnSetLists", {
    data(tan2009r1, package = "pRolocdata")
    data(tan2009r2, package = "pRolocdata")
    data(tan2009r3, package = "pRolocdata")
    ## both below show work exactly the same
    res1 <- commonFeatureNames(MSnSetList(list(tan2009r1, tan2009r2)))
    res2 <- commonFeatureNames(list(tan2009r1, tan2009r2))
    res1 <- lapply(res1, MSnbase:::nologging)
    res2 <- lapply(res2, MSnbase:::nologging)
    expect_equal(res1, res2)
    res3 <- commonFeatureNames(MSnSetList(list(tan2009r1 = tan2009r1,
                                               tan2009r2 = tan2009r2)))
    res4 <- commonFeatureNames(list(tan2009r1 = tan2009r1,
                                    tan2009r2 = tan2009r2))
    res3 <- lapply(res3, MSnbase:::nologging)
    res4 <- lapply(res4, MSnbase:::nologging)
    expect_equal(res3, res4)
})

test_that("keeping common features", {
    data(tan2009r1, package = "pRolocdata")
    data(tan2009r2, package = "pRolocdata")
    data(tan2009r3, package = "pRolocdata")
    res0 <- commonFeatureNames(tan2009r1, tan2009r1)
    ## @qual and @processingData@processing will be different
    ## expect_equal(res0[[1]], res0[[2]]) ## uncomment after release <2017-04-18 Tue>
    res01 <- res0[[1]]
    res01@qual <- tan2009r1@qual
    res01 <- MSnbase:::nologging(res01)
    expect_equal(res01, tan2009r1)
    res1 <- commonFeatureNames(tan2009r1, tan2009r2)
    ## both below show work exactly the same
    res2 <- commonFeatureNames(MSnSetList(list(tan2009r1, tan2009r2)))
    res2 <- commonFeatureNames(list(tan2009r1, tan2009r2))
    res3 <- commonFeatureNames(list(tan2009r1 = tan2009r1,
                                    tan2009r2 = tan2009r2))
    ## the only expected difference are
    ## names and .@processingData@processing
    res1 <- lapply(res1, MSnbase:::nologging)
    res2 <- lapply(res2, MSnbase:::nologging)
    expect_equal(msnsets(res1), msnsets(res2),
                 check.attributes = FALSE)
    expect_equal(names(res2), as.character(1:2))
    expect_equal(names(res1), names(res3))
})

test_that("Combine with fun or 'fun'", {
    aa <- new("MSnSet",
              exprs = matrix(
                  c(rnorm(10, 4, 0.0001),
                    rnorm(10, 10, 0.0001)),
                  nrow = 10, byrow = TRUE),
              featureData = new("AnnotatedDataFrame",
                                data = data.frame(
                                    A = rep(c("A", "B"), each = 5),
                                    B = paste(
                                        rep(c("A", "B"), each = 5),
                                        1:10,
                                        sep = "."))))
    expect_true(validObject(aa))
    gb <- factor(rep(1:2, each = 5))
    xchar <- combineFeatures(aa, gb, "sum")
    xfun <- combineFeatures(aa, gb, sum)
    expect_equal(exprs(xchar), exprs(xfun))
})


test_that("Robust summary and sample names order (bug PR# 349)", {
    ## This identified the bug
    data(msnset)
    msnset2 <- msnset <- log(filterNA(msnset), 2)
    ## Expected results
    res0 <- combineFeatures(msnset,
                            fcol = "ProteinAccession",
                            method = "robust")
    ## Identify the bug
    sampleNames(msnset2)[1] <- "zzz"
    res2 <- combineFeatures(msnset2,
                            fcol = "ProteinAccession",
                            method = "robust")
    ## Re-set sample name
    sampleNames(res2) <- sampleNames(res0)
    expect_equal(exprs(res0), exprs(res2))
})


test_that("aggvar, son of ragnar", {
    e <- matrix(1:9, nrow = 3)
    colnames(e) <- letters[1:3]
    rownames(e) <- 1:3
    f <- data.frame(gb = c("A", "A", "B"),
                    row.names = rownames(e))
    p <- data.frame(row.names = colnames(e))
    x <- MSnSet(exprs = e, fData = f, pData = p)
    res1 <- aggvar(x, "gb", max)
    expect_identical(max(dist(exprs(x)[1:2, ])), res1[1, 1])
    expect_true(is.na(res1[2, 1]))
    expect_identical(res1[, 2], c(A = 2, B = 1))
})

test_that("nFeatures are added correctly", {
    data("hyperLOPIT2015ms3r1psm", package = "pRolocdata")
    k0 <- k <- table(fData(hyperLOPIT2015ms3r1psm)$Protein.Group.Accessions)
    k <- k[as.character(fData(hyperLOPIT2015ms3r1psm)[, "Protein.Group.Accessions"])]
    res <- nFeatures(hyperLOPIT2015ms3r1psm, "Protein.Group.Accessions")
    expect_equivalent(k, fData(res)$Protein.Group.Accessions.nFeatures)
    expect_error(nFeatures(res, "Protein.Group.Accessions"),
                 "'Protein.Group.Accessions.nFeatures' already present.")
    expect_error(nFeatures(hyperLOPIT2015ms3r1psm, "foo"))
    tmp <- fData(res)$Protein.Group.Accessions.nFeatures
    names(tmp) <- fData(res)$Protein.Group.Accessions
    sel <- !duplicated(names(tmp))
    g <- tmp[sel]
    expect_equivalent(g[sort(names(g))], k0[sort(names(k0))])
})
Any scripts or data that you put into this service are public.
MSnbase documentation built on Jan. 23, 2021, 2 a.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
MSnbase
Base Functions and Classes for Mass Spectrometry and Proteomics

tests/testthat/test_MSnSet.R
In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics

Try the MSnbase package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MSnbase Base Functions and Classes for Mass Spectrometry and Proteomics

tests/testthat/test_MSnSet.R In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics

Try the MSnbase package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

MSnbase
Base Functions and Classes for Mass Spectrometry and Proteomics

tests/testthat/test_MSnSet.R
In MSnbase: Base Functions and Classes for Mass Spectrometry and Proteomics