megastudyDT <- data.table('study.id'=c('a','a','a','b','b','b'),
'study.author'=c('Cool Guy', 'Cool Guy', 'Cool Guy', 'Uncool Guy', 'Uncool Guy', 'Uncool Guy'),
'collection.id'=c(1,1,2,1,2,2),
'collection.attractant'=c('A','A','B','C','D','D'),
'sample.id'=c(1,2,3,4,5,6),
'sample.species'=c('species1','species2','species1','species1','species1','species2'),
'sample.sex'=c('male','male','female','male','female','male'),
'sample.specimen_count'=c(10,20,15,15,10,20),
'assay.id'=c(11,12,13,14,15,16),
'assay.pathogen_prevalence'=c(.1,.2,.3,.4,.5,.6),
'assay.pathogen_presence'=c('Yes','Yes','No','No','Yes','No'),
'assay.pathogen2_presence'=c('Yes','No','Yes','No','Yes','No'),
'assay.pathogen3_presence'=c('No','Yes','No','Yes','No','Yes'),
'assay.weighting_variable'=c(5,10,15,20,25,30))
## the collectionsDT needs to include all collections, unless explicitly filtered against
## it also needs to tell us values for any collection variables that are in the plot
## were trying to impute samples, based on collections that dont have samples.
## so the only way to get collection variables values for those samples, is to get them from the collectionsDT
collectionsDT <- data.table(
'study.id' = c('a', 'a', 'a','b', 'b', 'b', 'b'),
'collection.id' = c(1, 2, 3, 1, 2, 3, 4),
'collection.attractant' = c('A', 'B', 'C', 'C', 'D', 'D', 'E'))
sexVocabs.dt <- data.table::data.table(
'study.id' = c('a', 'a', 'a', 'a', 'a', 'b', 'b'),
'sample.sex' = c('female', 'male', 'non-binary', 'other', 'do not wish to specify', 'male', 'female'))
sexVocabs <- veupathUtils::StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='sex'),
studyVocab=sexVocabs.dt
)
sexVocabsSMALL.dt <- data.table::data.table(
'study.id' = c('a', 'a', 'b', 'b'),
'sample.sex' = c('female', 'male', 'male', 'female'))
sexVocabsSMALL <- veupathUtils::StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='sex'),
studyVocab=sexVocabsSMALL.dt
)
sexVocabsSingleStudy.dt <- data.table::data.table(
'study.id' = c('a', 'a', 'a', 'a', 'a'),
'sample.sex' = c('female', 'male', 'non-binary', 'other', 'do not wish to specify'))
sexVocabsSingleStudy <- veupathUtils::StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='sex'),
studyVocab=sexVocabsSingleStudy.dt
)
speciesVocabs.dt <- data.table::data.table(
'study.id' = c('a', 'a', 'a', 'b', 'b', 'b'),
'sample.species' = c('species1', 'species2', 'species3', 'species1', 'species2', 'species5'))
speciesVocabs <- veupathUtils::StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='species'),
studyVocab=speciesVocabs.dt
)
speciesVocabsSMALL.dt <- data.table::data.table(
'study.id' = c('a', 'a', 'b', 'b'),
'sample.species' = c('species1', 'species2', 'species1', 'species2'))
speciesVocabsSMALL <- veupathUtils::StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=veupathUtils::VariableSpec(entityId='sample',variableId='species'),
studyVocab=speciesVocabsSMALL.dt
)
pathogenVocabs.dt <- data.table::data.table(
'study.id' = c('a', 'a', 'b', 'b'),
'assay.pathogen_presence' = c('Yes', 'No', 'Yes', 'No'))
pathogenVocabs <- veupathUtils::StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=veupathUtils::VariableSpec(entityId='assay',variableId='pathogen_presence'),
studyVocab=pathogenVocabs.dt
)
test_that("Megastudy and associated validation works", {
# works at all
m <- Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs)))
expect_equal(slotNames(m), c('data','ancestorIdColumns','studySpecificVocabularies','collectionsDT'))
expect_equal(length(m@studySpecificVocabularies), 1)
expect_equal(data.table::uniqueN(m@studySpecificVocabularies[[1]]@studyVocab[,1]), 2)
expect_equal(slotNames(m@studySpecificVocabularies[[1]]), c("studyIdColumnName","variableSpec","studyVocab"))
# works w multiple vocab lists
m <- Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
expect_equal(slotNames(m), c('data','ancestorIdColumns','studySpecificVocabularies','collectionsDT'))
expect_equal(length(m@studySpecificVocabularies), 2)
expect_equal(data.table::uniqueN(m@studySpecificVocabularies[[2]]@studyVocab[,1]), 2)
expect_equal(slotNames(m@studySpecificVocabularies[[2]]), c("studyIdColumnName","variableSpec","studyVocab"))
# errs if no ancestors/ids?
expect_error(Megastudy(data=megastudyDT,
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=NA,
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=NULL,
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=c(),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=character(),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=NA_character_,
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
# errs if extra ancestor ids provided but not in dt
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id', 'imNOTan.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs))))
# errs if special vocabs on different entities
expect_error(Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, pathogenVocabs))))
# errs if studyIdColumnName and varSpec col name not in dt
expect_error(StudySpecificVocabulariesByVariable(
studyIdColumnName='imNOTan.id',
variableSpec=VariableSpec(entityId='sample',variableId='species'),
vocabulary=speciesVocabs.dt)
)
# errs if no study id provided for study vocab
expect_error(StudySpecificVocabulariesByVariable(variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=NA, variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=NULL, variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=c(), variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=character(), variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName=NA_character_, variableSpec=VariableSpec(entityId='sample',variableId='species'), studyVocab=speciesVocabs.dt))
# errs if no var spec provided for study vocab
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName='study.id', studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName='study.id', variableSpec=NA, studyVocab=speciesVocabs.dt))
expect_error(StudySpecificVocabulariesByVariable(studyIdColumnName='study.id', variableSpec=NULL, studyVocab=speciesVocabs.dt))
})
# TODO this could go in its own file maybe
test_that("imputeZeroes method is sane", {
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant', 'study.author'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
# case where neither study nor collection vars in the plot
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
# result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
# 5 sexes * 3 species in study A (15) + 2 sexes * 3 species in study B (6) * 2 collections per study = 42
expect_equal(nrow(imputedDT), 42)
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
# case where some collection ids are missing
# in real life, we have some collections where all samples are 0 and so not loaded
# in this case, the collection ids are missing from the data table R gets handed.
# we want to impute zeroes for their samples anyhow.
# in this version, some collection level variables are in both data and collectionsDT
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)),
collectionsDT=collectionsDT)
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
# result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
# 5 sexes * 3 species * 3 collections in study A (45) + 2 sexes * 3 species * 4 collections in study B (30) = 69
expect_equal(nrow(imputedDT), 69) ## TODO check these numbers
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 63)
# in this version, collection level variables are only in collectionsDT
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)),
collectionsDT=collectionsDT)
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
# result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
# 5 sexes * 3 species * 3 collections in study A (45) + 2 sexes * 3 species * 4 collections in study B (30) = 69
expect_equal(nrow(imputedDT), 69) ## TODO check these numbers
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 63)
#################################################################################################################
# case where one study vocab is missing a study
mDTSexSingleStudy <- megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant', 'study.author'), with=FALSE]
mDTSexSingleStudy$sample.sex[mDTSexSingleStudy$study.id == 'b'] <- NA_character_
mDTSexSingleStudy <- unique(mDTSexSingleStudy)
mSexSingleStudy <- Megastudy(data=mDTSexSingleStudy,
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabsSingleStudy)))
imputedDT <- getDTWithImputedZeroes(mSexSingleStudy, variables, FALSE)
# result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
expect_equal(all(c("sample.species","sample.specimen_count") %in% names(imputedDT)), TRUE)
# 5 sexes * 3 species in study A (15) + 3 species in study B * 2 collections per study = 42
expect_equal(nrow(imputedDT), 36)
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 30)
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.sex', 'sample.species', 'collection.attractant', 'study.author'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
# collection entity var is present
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(c("sample.species","sample.specimen_count","collection.attractant") %in% names(imputedDT)), TRUE)
expect_equal(nrow(imputedDT), 42)
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
# both collection and study entity vars are present
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(c("sample.species","sample.specimen_count","collection.attractant","study.author") %in% names(imputedDT)), TRUE)
expect_equal(nrow(imputedDT), 42)
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
# all values in vocab already present
megastudyDTSMALL <- rbind(megastudyDT,
data.table::data.table(study.id=c('a','b'),
study.author=c('Cool Guy','Uncool Guy'),
collection.id=c(2,1),
collection.attractant=c('B','C'),
sample.id=c(7,8),
sample.species=c('species2','species2'),
sample.sex=c('female','female'),
sample.specimen_count=c(5,5),
assay.id=c(17,18),
assay.pathogen_prevalence=c(.7,.8),
assay.pathogen_presence=c('No','Yes'),
assay.pathogen2_presence=c('Yes','No'),
assay.pathogen3_presence=c('No','Yes'),
assay.weighting_variable=c(35,40)))
mCOMPLETE <- Megastudy(data=megastudyDTSMALL[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.species', 'sample.sex', 'collection.attractant', 'study.author'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabsSMALL, sexVocabsSMALL)))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(mCOMPLETE, variables, FALSE)
expect_equal(all(names(imputedDT) %in% names(mCOMPLETE@data)), TRUE)
# 2 species * 2 sexes * 2 collections * 2 studies = 16
expect_equal(nrow(imputedDT), 16)
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 8)
# no weighting var in plot
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(mCOMPLETE, variables, FALSE)
expect_equal(all(names(imputedDT) %in% names(mCOMPLETE@data)), TRUE)
expect_equal(nrow(imputedDT), nrow(mCOMPLETE@data))
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
# an assay var is present
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id','assay.id', 'assay.pathogen_prevalence', 'sample.species', 'sample.sex', 'collection.attractant', 'study.author')],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs, sexVocabs)))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
expect_equal(nrow(imputedDT), nrow(m@data))
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'pathogen_presence', entityId = 'assay'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# empty plotReference means that it is not plotted
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE)
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
expect_equal(nrow(imputedDT), nrow(m@data))
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
# multiple special vocabs in same plot, w one shared weighting var
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'sample.specimen_count', 'sample.species', 'sample.sex', 'collection.attractant'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs,sexVocabs)))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary=TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'))
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(c("sample.species","sample.specimen_count","sample.sex","collection.attractant") %in% names(imputedDT)), TRUE)
expect_equal(nrow(imputedDT), 42)
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 36)
# special vocab on sample, regular weighting var on assay
# phase this in
# m <- Megastudy(data=megastudyDT,
# ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'),
# studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs)))
#
# variables <- new("VariableMetadataList", SimpleList(
# new("VariableMetadata",
# variableClass = new("VariableClass", value = 'native'),
# variableSpec = new("VariableSpec", variableId = 'weighting_variable', entityId = 'assay'),
# plotReference = new("PlotReference", value = 'xAxis'),
# dataType = new("DataType", value = 'NUMBER'),
# dataShape = new("DataShape", value = 'CONTINUOUS')),
# new("VariableMetadata",
# variableClass = new("VariableClass", value = 'native'),
# variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
# plotReference = new("PlotReference", value = 'yAxis'),
# dataType = new("DataType", value = 'NUMBER'),
# dataShape = new("DataShape", value = 'CONTINUOUS')),
# new("VariableMetadata",
# variableClass = new("VariableClass", value = 'native'),
# variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
# plotReference = new("PlotReference", value = 'overlay'),
# dataType = new("DataType", value = 'STRING'),
# dataShape = new("DataShape", value = 'CATEGORICAL'),
# weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
# hasStudyDependentVocabulary=TRUE),
# new("VariableMetadata",
# variableClass = new("VariableClass", value = 'native'),
# variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
# plotReference = new("PlotReference", value = 'facet1'),
# dataType = new("DataType", value = 'STRING'),
# dataShape = new("DataShape", value = 'CATEGORICAL'),
# weightingVariableSpec = VariableSpec(variableId='weighting_variable',entityId='assay'),
# hasStudyDependentVocabulary = FALSE)
# ))
#
# imputedDT <- getDTWithImputedZeroes(m, variables)
# expect_equal(names(imputedDT), c("sample.species","sample.specimen_count","collection.attractant","study.author","study.id","collection.id","sample.id"))
# expect_equal(nrow(imputedDT), 12)
# expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 6)
#
# no special vocab or weight present
m <- Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'))
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
expect_equal(nrow(imputedDT), nrow(m@data))
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
# only regular weight var present
m <- Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id','assay.id'))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'pathogen_prevalence', entityId = 'assay'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS'),
weightingVariableSpec = VariableSpec(variableId='weighting_variable',entityId='assay')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'author', entityId = 'study'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'))
))
imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
expect_equal(all(names(imputedDT) %in% names(m@data)), TRUE)
expect_equal(nrow(imputedDT), nrow(m@data))
expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 0)
# sample var without a study vocab present when another sample var has one
m <- Megastudy(data=megastudyDT,
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(speciesVocabs)))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'species', entityId = 'sample'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS')),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'sex', entityId = 'sample'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'attractant', entityId = 'collection'),
plotReference = new("PlotReference", value = 'facet1'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'))
))
expect_error(getDTWithImputedZeroes(m, variables, FALSE))
# variable collection exists in plot
pathogenVariableCollectionVocabs.dt <- data.table::data.table(
study.id = c('a','a','b','b'),
assay.pathogen_presence_variable_collection = c('Yes','No','Yes','No')
)
pathogenVariableCollectionVocabs <- StudySpecificVocabulariesByVariable(
studyIdColumnName='study.id',
variableSpec=VariableSpec(entityId='assay',variableId='pathogen_presence_variable_collection'),
studyVocab=pathogenVariableCollectionVocabs.dt
)
m <- Megastudy(data=megastudyDT[, c('study.id', 'collection.id', 'sample.id', 'assay.id', 'sample.specimen_count', 'collection.attractant', 'study.author', 'assay.pathogen_presence', 'assay.pathogen2_presence', 'assay.pathogen3_presence'), with=FALSE],
ancestorIdColumns=c('study.id', 'collection.id', 'sample.id', 'assay.id'),
studySpecificVocabularies=StudySpecificVocabulariesByVariableList(S4Vectors::SimpleList(pathogenVariableCollectionVocabs)))
variables <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'pathogen_presence_variable_collection', entityId = 'assay'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
isCollection = TRUE,
weightingVariableSpec = VariableSpec(variableId='specimen_count',entityId='sample'),
hasStudyDependentVocabulary = TRUE,
members = VariableSpecList(S4Vectors::SimpleList(VariableSpec(variableId='pathogen_presence', entityId='assay'),
VariableSpec(variableId='pathogen2_presence', entityId='assay'),
VariableSpec(variableId='pathogen3_presence', entityId='assay')))
),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'specimen_count', entityId = 'sample'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS'))
))
## Im inclined to commenting this for now. i dont think we have a case like this yet,
## and im not even completely sure yet what the api would be for when we do...
#imputedDT <- getDTWithImputedZeroes(m, variables, FALSE)
# result has the columns needed to build a plot, based on variables AND the correct number of rows/ zeroes
#expect_equal(all(c("assay.pathogen_presence","assay.pathogen2_presence","assay.pathogen3_presence","sample.specimen_count") %in% names(imputedDT)), TRUE)
# 2 studies * 2 collections per study * 2 values for each of 3 pathogen variables = 32?
# im not sure this test makes any sense, bc were imputing 0 on a sample for a collection on assay
# im going to comment until we see a real use case
#expect_equal(nrow(imputedDT), 32)
#expect_equal(nrow(imputedDT[imputedDT$sample.specimen_count == 0]), 26)
})
test_that("we have reasonable perf w a real-ish use case", {
megastudyVariablesReal <- new("VariableMetadataList", SimpleList(
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'POPBIO_8000017', entityId = 'EUPATH_0000609'),
plotReference = new("PlotReference", value = 'yAxis'),
dataType = new("DataType", value = 'NUMBER'),
dataShape = new("DataShape", value = 'CONTINUOUS'),
hasStudyDependentVocabulary = FALSE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'PATO_0000047', entityId = 'EUPATH_0000609'),
plotReference = new("PlotReference", value = 'xAxis'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'OBI_0001909', entityId = 'EUPATH_0000609'),
plotReference = new("PlotReference", value = 'overlay'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'UBERON_0000105', entityId = 'EUPATH_0000609'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
hasStudyDependentVocabulary = TRUE),
new("VariableMetadata",
variableClass = new("VariableClass", value = 'native'),
variableSpec = new("VariableSpec", variableId = 'EUPATH_0043227', entityId = 'EUPATH_0000609'),
dataType = new("DataType", value = 'STRING'),
dataShape = new("DataShape", value = 'CATEGORICAL'),
weightingVariableSpec = VariableSpec(variableId='POPBIO_8000017',entityId='EUPATH_0000609'),
hasStudyDependentVocabulary = TRUE)
))
## minimal use case, with real vocabularies. based on a heavily subsetted megastudy.
megastudyReal <- Megastudy(
data=megastudyDataReal,
ancestorIdColumns=c(
'EUPATH_0000605.Study_stable_id',
'GAZ_00000448.GeographicLocation_stable_id',
'OBI_0000659.ParentOfSample_stable_id',
'EUPATH_0000609.Sample_stable_id'
),
studySpecificVocabularies=studyVocabsReal
)
benchmark <- microbenchmark::microbenchmark(getDTWithImputedZeroes(megastudyReal, megastudyVariablesReal, verbose = FALSE))
print(benchmark)
expect_true(mean(benchmark$time)/1000000 < 55) ## this is in milliseconds
expect_true(median(benchmark$time)/1000000 < 55)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.