inst/tinytest/test-StoxAnalyticalBaselineFunctions.R

#
# Test adding length-group
#
StoxBioticWLengthGroup <- RstoxFDA:::AddLengthGroupStoxBiotic(RstoxFDA::CatchLotteryExample, LengthInterval=5, LengthGroupVariable="LengthGroup", LeftOpen=TRUE)
expect_true(all(c("(15,20]", "(20,25]", "(25,30]", "(30,35]") %in% StoxBioticWLengthGroup$Individual$LengthGroup))
StoxBioticWLengthGroup <- RstoxFDA:::AddLengthGroupStoxBiotic(RstoxFDA::CatchLotteryExample, LengthInterval=0.5, LengthGroupVariable="LengthGroup", LeftOpen=FALSE)
expect_true(all(c("[16,16.5)", "[16.5,17)", "[33.5,34)") %in% StoxBioticWLengthGroup$Individual$LengthGroup))

#
# Test sampling parameters
#

designParamsFile <- system.file("testresources", "lotteryParameters", "lotteryDesignNSH.txt", package="RstoxFDA")

#regular read:
designParams <- RstoxFDA:::ReadPSUSamplingParameters( designParamsFile)
expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParams))
expect_equal(nrow(designParams$SelectionTable), 64)
expect_equal(nrow(designParams$SampleTable), 1)
expect_equal(ncol(designParams$StratificationVariables), 1)
expect_equal(nrow(designParams$StratificationVariables), 1)
expect_equal(sum(designParams$SelectionTable$HTsamplingWeight), 1)
expect_equal(sum(designParams$SelectionTable$HHsamplingWeight), 1)

designParamsFileStratified <- system.file("testresources", "lotteryParameters", "lotteryDesignNSHstrata.txt", package="RstoxFDA")
designParamsStratified <- RstoxFDA:::ReadPSUSamplingParameters(designParamsFileStratified)
expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParamsStratified))
expect_equal(nrow(designParamsStratified$SelectionTable), 64)
expect_equal(nrow(designParamsStratified$SampleTable), 1)
expect_equal(ncol(designParamsStratified$StratificationVariables), 2)
expect_equal(nrow(designParamsStratified$StratificationVariables), 1)
expect_equal(sum(designParamsStratified$SelectionTable$HTsamplingWeight), 1)
expect_equal(sum(designParamsStratified$SelectionTable$HHsamplingWeight), 1)

# test assignment to data
expect_error(RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "MissingAtRandom"), "Argument \'DataRecordId\' must be provided.")
expect_error(RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "Haul", "Sample", "MissingAtRandom"), "The column provided for 'DataRecordId' ")
expect_error(RstoxFDA::AssignPSUSamplingParameters(designParams, RstoxFDA::CatchLotteryExample, "HaulKey", "Haul", "MissingAtRandom"), "The 'SamplingUnitId' ")
ex <- RstoxFDA::CatchLotteryExample
designParamsCorrected <- RstoxFDA::AssignPSUSamplingParameters(designParams, ex, "serialnumber", "Haul", "MissingAtRandom")
expect_equal(sum(designParamsCorrected$SelectionTable$HTsamplingWeight),1)
expect_equal(sum(designParamsCorrected$SelectionTable$HHsamplingWeight),1)
#HT should be approximately the same after non-response correction
expect_true(abs((sum(1/designParamsCorrected$SelectionTable$InclusionProbability)-sum(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1)
#HH should be approximately the same after non-response correction
expect_true(abs((mean(1/designParamsCorrected$SelectionTable$InclusionProbability)-mean(1/designParams$SelectionTable$InclusionProbability))/sum(1/designParamsCorrected$SelectionTable$InclusionProbability))<0.1)


#define pps-parameters from data
calculatedPps <- RstoxFDA::ComputePSUSamplingParameters(RstoxFDA::CatchLotteryExample, 
                                                        "ProportionalPoissonSampling", 
                                                        "serialnumber", StratumName = 
                                                          "Nordsjo", Quota = 124*1e6, 
                                                        ExpectedSampleSize = 110)
expect_true(RstoxFDA:::is.PSUSamplingParametersData(calculatedPps))
comp <- merge(calculatedPps$SelectionTable, 
              RstoxFDA::CatchLotterySamplingExample$SelectionTable
              , by=c("Stratum", "SamplingUnitId"))
comp <- comp[!(comp$SamplingUnitId %in% c("38408", "38409"))] #remove catches that has had weights corrected or something
expect_true(all(comp$SelectionProbability.x - comp$SelectionProbability.y < 1e-10))
expect_true(all(comp$InclusionProbability.x - comp$InclusionProbability.y < 1e-10))

# add stratification columns
expect_error(RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(VesselLengthGroup="o15m", VesselFlag="NOR")), "Invalid 'StratificationVariablesTable'")
expect_error(RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum=c("s1", "s2"), VesselLengthGroup=c("o15m","o15m"), VesselFlag=c("NOR","NOR"))), "Not all strata in 'StratificationVariablesTable' exist in 'PSUSamplingParametersData'.")
calculatedPps2 <- calculatedPps
calculatedPps2$StratificationVariables <- data.table::data.table(Stratum=c("s1","s2"))
expect_error(RstoxFDA:::AddPsuStratificationVariables(calculatedPps2, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum=c("s1", "s2"), VesselLengthGroup=c("o15m","o15m"), VesselFlag=c("NOR","NOR"))), "Stratification variables does not identify strata. Several strata overlap with")

calculatedPpsStrat <- RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum="Nordsjo", VesselFlag="NOR"))
expect_true(all(c("Stratum", "VesselFlag") %in% names(calculatedPpsStrat$StratificationVariables)))
expect_equal(nrow(calculatedPpsStrat$StratificationVariables), 1)

calculatedPpsStrat <- RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum="Nordsjo", VesselLengthGroup="o15m", VesselFlag="NOR"))
expect_true(all(c("Stratum", "VesselLengthGroup", "VesselFlag") %in% names(calculatedPpsStrat$StratificationVariables)))
expect_equal(nrow(calculatedPpsStrat$StratificationVariables), 1)

calculatedPpsStrat <- RstoxFDA:::AddPsuStratificationVariables(calculatedPps, StratificationVariables = c("VesselLengthGroup", "VesselFlag"), StratificationVariablesTable = data.table::data.table(Stratum=c("Nordsjo", "Nordsjo"), VesselLengthGroup=c("u15m","o15m"), VesselFlag=c("NOR")))
expect_true(all(c("Stratum", "VesselLengthGroup", "VesselFlag") %in% names(calculatedPpsStrat$StratificationVariables)))
expect_equal(nrow(calculatedPpsStrat$StratificationVariables), 2)

#sampling weights are different, because of the two removed hauls above, but should be proportional
expect_true(length(unique(round(comp$HHsamplingWeight.x / comp$HHsamplingWeight.y, digits=10)))==1)
expect_true(length(unique(round(comp$HTsamplingWeight.x / comp$HTsamplingWeight.y, digits=10)))==1)

#define equal prob fixed-size selection from data
suppressWarnings(StoxBioticData <- RstoxData::StoxBiotic(RstoxData::ReadBiotic(system.file("testresources", "biotic_v3_example.xml", package="RstoxFDA"))))
designParamsSB <- RstoxFDA:::ComputePSUSamplingParameters(StoxBioticData=StoxBioticData, SamplingUnitId = "Sample", StratificationColumns = c("SpeciesCategoryKey"))
expect_true(RstoxFDA:::is.PSUSamplingParametersData(designParamsSB))
#compare names of output with stratification variables to output without

expect_true(all(!is.na(designParamsSB$SelectionTable$HHsamplingWeight)))
expect_true(all(designParamsSB$SampleTable$SelectionMethod=="FSWR"))
#some check that method and incprob is reasonable.

expect_true(all(names(designParamsSB$SampleTable) == names(designParams$SampleTable)))
expect_true(all(names(designParamsSB$SelectionTable) == names(designParams$SelectionTable)))

expect_equal(nrow(designParamsSB$SelectionTable), 14)
expect_equal(nrow(designParamsSB$SampleTable), 9)
expect_equal(ncol(designParamsSB$StratificationVariables), 2)
expect_equal(nrow(designParamsSB$StratificationVariables), 9)

#
# Prepare dataset with sub-sampled parameters
#
ds <- RstoxFDA::StoxBioticDataExample
ds$Individual$IndividualAge[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] <- as.numeric(NA)
ds$Individual$IndividualRoundWeight[rep(c(TRUE,FALSE), nrow(ds$Individual)/2)] <- as.numeric(NA)
ds$Sample$CatchFractionNumber[is.na(ds$Sample$CatchFractionNumber)] <- 1000

#Define Individual design, SRS
expect_error(RstoxFDA:::ComputeIndividualSamplingParameters(ds, "SRS"))
srs <- RstoxFDA:::ComputeIndividualSamplingParameters(ds, "SRS", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"))
expect_true(RstoxFDA:::is.IndividualSamplingParametersData(srs))
weights <- srs$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))

#Define Individual design, Length stratified
expect_error(RstoxFDA:::ComputeIndividualSamplingParameters(ds, "LengthStratified", c("IndividualAge", "IndividualTotalLength", "IndividualRoundWeight"), LengthInterval = 5), "'IndividualTotalLength' may not be among the variables in 'Parameters' for length-stratified sampling.")
ls<-RstoxFDA:::ComputeIndividualSamplingParameters(ds, "LengthStratified", c("IndividualAge", "IndividualRoundWeight"), LengthInterval = 5)

expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ls))
weights <- ls$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))

expect_error(RstoxFDA:::collapseStrataIndividualDesignParamaters(ls, "LengthStratum"), "Cannot collapse unsampled strata. Unsampled strata exists for samples:")

#Define Individual design, stratified, setting strata by length as in Length stratified
bioStrat <- ds
bioStrat$Individual$LStrat <- as.character(cut(bioStrat$Individual$IndividualTotalLength, seq(0,max(bioStrat$Individual$IndividualTotalLength)+5,5), right = F))
ss<-RstoxFDA:::ComputeIndividualSamplingParameters(bioStrat, "Stratified", c("IndividualAge", "IndividualRoundWeight"), StratificationColumns = c("LStrat"))
expect_true(RstoxFDA:::is.IndividualSamplingParametersData(ss))
weights <- ss$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))

#check that length stratified and stratified is consistent.
expect_equal(ss$SelectionTable$InclusionProbability[[4]], ls$SelectionTable$InclusionProbability[[4]])
expect_true(srs$SelectionTable$InclusionProbability[[4]] != ls$SelectionTable$InclusionProbability[[4]])
expect_equal(nrow(ss$SampleTable), nrow(ls$SampleTable))

#
# Test collapse strata
#

ss <- ds
ss$Individual$IndividualSex[is.na(ss$Individual$IndividualSex)] <- "Unkown"
ls <- RstoxFDA:::ComputeIndividualSamplingParameters(ss, "Stratified", c("IndividualTotalLength"), StratificationColumns = "IndividualSex")
weights <- ls$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))

lsCol <- RstoxFDA:::CollapseStrata(ls, c("SpeciesCategory", "IndividualSex"))
expect_true(nrow(lsCol$SampleTable)==nrow(ls$SampleTable))
expect_true(nrow(lsCol$SelectionTable)==nrow(ls$SelectionTable))
expect_true(ncol(lsCol$StratificationVariables)==ncol(ls$StratificationVariables))
weights <- lsCol$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))

lsCol <- RstoxFDA:::CollapseStrata(ls, c())
expect_true(nrow(lsCol$SampleTable)<nrow(ls$SampleTable))
expect_true(nrow(lsCol$SelectionTable)==nrow(ls$SelectionTable))
expect_true(ncol(lsCol$StratificationVariables)<ncol(ls$StratificationVariables))
expect_true(ncol(lsCol$StratificationVariables)==2)
weights <- lsCol$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))
expect_true(all(lsCol$StratificationVariables$Stratum=="All"))

lsCol <- RstoxFDA:::CollapseStrata(ls, c("IndividualSex"))
expect_true(nrow(lsCol$SampleTable)==nrow(ls$SampleTable)) #original stratification columns are redundant
expect_true(nrow(lsCol$SelectionTable)==nrow(ls$SelectionTable))
expect_true(ncol(lsCol$StratificationVariables)<ncol(ls$StratificationVariables))
expect_true(ncol(lsCol$StratificationVariables)==3)
weights <- lsCol$SelectionTable[,list(meanN=sum(HTsamplingWeight)), by=c("Stratum", "SampleId")]
expect_true(all(abs(weights$meanN-1) < 1e-6))




#test estimate with HansenHurwitzDomainEstimate, read params
data <- RstoxFDA::CatchLotteryExample
indSampling <- RstoxFDA:::ComputeIndividualSamplingParameters(data, "SRS", c("IndividualAge"))

psuSampling <- RstoxFDA::CatchLotterySamplingExample

psuEst <- RstoxFDA:::AnalyticalPSUEstimate(data, indSampling, "IndividualRoundWeight")

expect_equal(sum(psuEst$SampleCount$nIndividuals), nrow(indSampling$SelectionTable))
diffs <- psuEst$Abundance$Abundance*psuEst$Variables$Mean - psuEst$Variables$Total
expect_true(all(diffs[!is.nan(diffs)] < 1e-6))
expect_true(sum(!is.nan(diffs)) > 0)
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(data, indSampling, "IndividualRoundWeight", c("IndividualAge", "IndividualSex"))
diffs <- psuEst$Abundance$Abundance*psuEst$Variables$Mean - psuEst$Variables$Total
expect_true(all(diffs[!is.nan(diffs)] < 1e-6))
expect_true(sum(!is.nan(diffs)) > 0)

#
# Test domain estimates
#

ss <- ds
ss$Individual$IndividualSex[is.na(ss$Individual$IndividualSex)] <- "Unkown"
ls <- RstoxFDA:::ComputeIndividualSamplingParameters(ss, "SRS", c("IndividualTotalLength"))

psuEstDom <- RstoxFDA:::AnalyticalPSUEstimate(ss, ls, "IndividualTotalLength", c("IndividualSex", "IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, ls, "IndividualTotalLength")
expect_true(nrow(psuEstDom$Abundance)>nrow(psuEst$Abundance))
expect_true(nrow(psuEstDom$Variables)>nrow(psuEst$Variables))
expect_true(ncol(psuEstDom$DomainVariables)==3)
expect_true(nrow(psuEstDom$Abundance)==nrow(psuEstDom$StratificationVariables)*nrow(psuEstDom$DomainVariables))

expect_true(all(psuEst$Abundance$Frequency-1<1e-6))
expect_true(mean(psuEstDom$Abundance$Frequency)>1e-2)

totalBySampleDom <- psuEstDom$Abundance[,list(total=sum(Abundance)), by="SampleId"]
totalBySampleTot <- psuEst$Abundance[,list(total=sum(Abundance)), by="SampleId"]
tot <- merge(totalBySampleDom, totalBySampleTot, by="SampleId")
expect_true(all(abs(tot$total.x-tot$total.y)/tot$total.x<1e-6))

totalLengthBySampleDom <- psuEstDom$Variables[Variable=="IndividualTotalLength",list(total=sum(Total)), by="SampleId"]
totalLengthBySampleTot <- psuEst$Variables[Variable=="IndividualTotalLength",list(total=sum(Total)), by="SampleId"]
tot <- merge(totalLengthBySampleDom, totalLengthBySampleTot, by="SampleId")
expect_true(all(abs(tot$total.x-tot$total.y)/tot$total.x<1e-6))

#
# Test stratified estimates
#

ls <- RstoxFDA:::ComputeIndividualSamplingParameters(ss, "Stratified", c("IndividualTotalLength"), StratificationColumns = "IndividualSex")
lsCol <- RstoxFDA:::collapseStrataIndividualDesignParamaters(ls, c("SpeciesCategory", "IndividualSex"))

psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, lsCol, "IndividualTotalLength", c("IndividualSex"))
expect_true(nrow(psuEst$DomainVariables)==3)
expect_true(ncol(psuEst$DomainVariables)==2)
expect_true(sum(is.na(psuEst$Abundance$Abundance))==0)
expect_true(sum(!is.na(psuEst$Abundance$Abundance))>0)
expect_true(sum(is.na(psuEst$Abundance$Frequency))==0)
expect_true(sum(!is.na(psuEst$Abundance$Frequency))>0)
expect_true(sum(is.na(psuEst$Variables$Total))==0)
expect_true(sum(!is.na(psuEst$Variables$Mean))>0)

#check that unsampled strata gives NAs
lengthStratMissingStrata <-  RstoxFDA:::ComputeIndividualSamplingParameters(ss, "LengthStratified", c("IndividualAge"), LengthInterval = 5)
expect_warning(psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, lengthStratMissingStrata, "IndividualRoundWeight", c("IndividualAge")), "Not all strata are sampled. Estimates will not be provided for some strata for SampleIds:")

expect_true(nrow(psuEst$DomainVariables)==length(unique(ss$Individual$IndividualAge[!is.na(ss$Individual$IndividualAge)])))
expect_true(ncol(psuEst$DomainVariables)==2)

expect_true(sum(is.na(psuEst$Abundance$Abundance))>0)
expect_true(sum(!is.na(psuEst$Abundance$Abundance))>0)
expect_true(sum(is.na(psuEst$Abundance$Frequency))>0)
expect_true(sum(!is.na(psuEst$Abundance$Frequency))>0)
expect_true(sum(is.na(psuEst$Variables$Total))>0)
expect_true(sum(!is.na(psuEst$Variables$Mean))>0)

naAbundance <- psuEst$Abundance[is.na(psuEst$Abundance$Abundance),]
unSampled <- lengthStratMissingStrata$SampleTable[N>0 & n==0]
expect_true(nrow(naAbundance)==nrow(unSampled)*nrow(psuEst$DomainVariables))

#
# Test LiftStrata
#
sexStrat <-  RstoxFDA:::ComputeIndividualSamplingParameters(ss, "Stratified", c("IndividualAge"), StratificationColumns = "IndividualSex")
expect_warning(psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, sexStrat, "IndividualRoundWeight", c("IndividualSex")), "Not all strata are sampled. Estimates will not be provided for some strata for SampleIds:")
expect_true(sum(is.na(psuEst$Variables$Domain))==0)
psuEstLifted <- RstoxFDA:::LiftStrata(psuEst)
expect_true(RstoxFDA:::is.AnalyticalPSUEstimateData(psuEstLifted))
ssnacheck <- merge(psuEstLifted$Abundance, psuEstLifted$SampleCount, by=c("SampleId", "Stratum", "Domain"), all.x=T)
expect_true(!any(is.na(ssnacheck$nIndividuals)))
expect_true(nrow(psuEstLifted$Abundance)==nrow(psuEstLifted$Variables))
expect_true(nrow(psuEstLifted$Abundance)==length(unique(sexStrat$StratificationVariables$Stratum))*length(unique(sexStrat$SampleTable$SampleId))*length(unique(ss$Individual$IndividualSex)))
expect_true(nrow(psuEstLifted$StratificationVariables)==length(unique(sexStrat$StratificationVariables$Stratum))*length(unique(sexStrat$SampleTable$SampleId)))

#
# Test AnalyticalPopulationEstimate
#

stationDesign <- RstoxFDA:::ComputePSUSamplingParameters(StoxBioticData = ss, SamplingUnitId = "Haul", StratificationColumns = "Gear")
sexStrat <-  RstoxFDA:::ComputeIndividualSamplingParameters(ss, "Stratified", c("IndividualAge"), StratificationColumns = "IndividualSex")
expect_warning(psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, sexStrat, "IndividualRoundWeight", c("IndividualSex")), "Not all strata are sampled. Estimates will not be provided for some strata for SampleIds:")
psuEst <- RstoxFDA:::LiftStrata(psuEst)
expect_error(popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst), "Cannot estimate. Abundance- or total-estimates are not provided for all samples in 'AnalyticalPSUEstimateData'. Missing for SamplingUnitIds")

#Test that Abundance and Frequency are NA for unsampled strata (Domain Sex is Unsampled for strata unkown sex)
unsampled <- merge(psuEst$Abundance, sexStrat$SampleTable[n==0], by=c("SampleId", "Stratum"))
expect_true(nrow(unsampled)>0)
expect_true(all(is.na(psuEst$Abundance[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Abundance)))
expect_true(all(is.na(psuEst$Abundance[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Frequency)))
expect_true(!any(is.nan(psuEst$Abundance[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Abundance)))
expect_true(!any(is.nan(psuEst$Abundance[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Frequency)))

#Test that Mean and Total NA for unsampled strata
unsampled <- merge(psuEst$Variables, sexStrat$SampleTable[n==0], by=c("SampleId", "Stratum"))
expect_true(nrow(unsampled)>0)
expect_true(all(is.na(psuEst$Variables[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Total)))
expect_true(all(is.na(psuEst$Variables[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Mean)))
expect_true(!any(is.nan(psuEst$Variables[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Total)))
expect_true(!any(is.nan(psuEst$Variables[paste(SampleId, Stratum) %in% paste(unsampled$SampleId, unsampled$Stratum)]$Mean)))

#Test that Mean is NaN and Total is 0 for zero-abundance domains
sexStrat <-  RstoxFDA:::ComputeIndividualSamplingParameters(ss, "Stratified", c("IndividualTotalLength"), StratificationColumns = "IndividualSex")
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, sexStrat, "IndividualTotalLength", c("IndividualSex"))
psuEst <- RstoxFDA:::LiftStrata(psuEst)
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)
popEstMeanOfMeans <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst, MeanOfMeans = T)

expect_equal(sum(popEst$SampleCount$nIndividuals), sum(psuEst$SampleCount$nIndividuals))

zeroAbund <- popEstMeanOfMeans$Abundance[popEstMeanOfMeans$Abundance$Frequency==0]
NaNMeans <- popEstMeanOfMeans$Variables[is.nan(popEstMeanOfMeans$Variables$Mean)]
zeroTotals <- popEstMeanOfMeans$Variables[popEstMeanOfMeans$Variables$Total==0]
expect_true(nrow(zeroAbund)>0)
expect_true(nrow(zeroAbund)==nrow(NaNMeans))


#
# Test unsampled PSU strata
#

stationDesign <- RstoxFDA:::ComputePSUSamplingParameters(StoxBioticData = ss, SamplingUnitId = "Haul", StratificationColumns = "Gear")
stationDesign$SampleTable$n[stationDesign$SampleTable$Stratum==40]<-0
stationDesign$SelectionTable <- stationDesign$SelectionTable[Stratum!="40",]
sexStrat <-  RstoxFDA:::ComputeIndividualSamplingParameters(ss, "Stratified", c("IndividualAge"), StratificationColumns = "IndividualSex")
expect_warning(psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ss, sexStrat, "IndividualRoundWeight", c("IndividualSex")), "Not all strata are sampled. Estimates will not be provided for some strata for SampleIds:")
psuEst <- RstoxFDA:::LiftStrata(psuEst)
psuEst$Abundance$Frequency[is.na(psuEst$Abundance$Frequency)] <- 0
psuEst$Variables$Mean[is.na(psuEst$Variables$Mean) & !is.nan(psuEst$Variables$Mean)] <- .1
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst, MeanOfMeans = T)

nastrataAbundance <- popEst$Abundance$Stratum[is.na(popEst$Abundance$Frequency)]
expect_true(length(grep("PSU-stratum:40", nastrataAbundance))==length(nastrataAbundance))
nastrataVariables <- popEst$Variables$Stratum[is.na(popEst$Variables$Frequency)]
expect_true(length(grep("PSU-stratum:40", nastrataVariables))==length(nastrataVariables))
nastrataAbundanceCovar <- popEst$AbundanceCovariance$Stratum[is.na(popEst$AbundanceCovariance$Frequency)]
expect_true(length(grep("PSU-stratum:40", nastrataAbundanceCovar))==length(nastrataAbundanceCovar))
nastrataVariableCovar <- popEst$VariablesCovariance$Stratum[is.na(popEst$VariablesCovariance$Frequency)]
expect_true(length(grep("PSU-stratum:40", nastrataVariableCovar))==length(nastrataVariableCovar))

#
# Test herring example here. 
#
stationDesign <- RstoxFDA::CatchLotterySamplingExample
ex <- RstoxFDA::CatchLotteryExample
ex$SpeciesCategory$SpeciesCategory <- "061104"
ex$Individual$IW <- ex$Individual$IndividualRoundWeight #for testing that covariances equal variances when appropriate
ex$Individual$one <- 1 #for testing that variable covariance equal abundance covariance when appropriate.
stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <-  RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"))
expect_true(abs(sum(psuEst$Abundance$Abundance) - sum(ex$Sample$CatchFractionNumber))/sum(ex$Sample$CatchFractionNumber) < 1e-3)
popEstAgeDomain <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)
popEstMeanOfMeans <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst, MeanOfMeans = T)
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)
expect_true(abs(popEst$Variables$Total-125183088936) < 1) #g, not checked for correctness, just there to detect if anything changes. Offical landings over 15m were 133137498 kg within 7% difference
expect_true(abs(sum(popEstAgeDomain$Abundance$Abundance)-popEst$Abundance$Abundance)<1e-6)

#mean of means should be different, but not too different. Set a reasonable range, re-check if test fails
maxDIffMeanOfMean <- max((popEstMeanOfMeans$Variables$Mean-popEstAgeDomain$Variables$Mean)/popEstMeanOfMeans$Variables$Mean)
expect_true(maxDIffMeanOfMean >.01)
expect_true(maxDIffMeanOfMean <.1)

#test PSU domains
psuEstPD <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"), c(), c("Gear"))
popEstPD <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEstPD)
expect_true(abs(popEst$Abundance$Abundance - sum(popEstPD$Abundance$Abundance))/popEst$Abundance$Abundance < 1e-2)
expect_true(abs(popEst$Variables$Total - sum(popEstPD$Variables$Total))/popEst$Variables$Total < 1e-2)

#test PSU domains, computed PSU sampling parameters
ex2 <- RstoxFDA::CatchLotteryExample
ex2$SpeciesCategory$SpeciesCategory <- "061104"
stationDesignComp <- RstoxFDA::ComputePSUSamplingParameters(StoxBioticData = ex2, "ProportionalPoissonSampling", "serialnumber", StratumName = "Nordsjo", Quota = 124*1e6, ExpectedSampleSize = 110)
stationDesignComp <- RstoxFDA::AssignPSUSamplingParameters(stationDesignComp, ex2, "serialnumber", "Haul", "MissingAtRandom")

psuEstCompPD <- RstoxFDA:::AnalyticalPSUEstimate(ex2, srs, c("IndividualRoundWeight"), c("IndividualAge"), c("Gear"))
popEstCompPD <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesignComp, psuEstCompPD)
expect_true(abs(sum(popEstCompPD$Abundance$Abundance) - sum(popEstPD$Abundance$Abundance))/sum(popEstCompPD$Abundance$Abundance) < 1e-2)
#
# Test ratio estimation
#
psuEstDomain <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"), "IndividualAge")
popEstDomain <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEstDomain)

CVs <- merge(popEstDomain$Abundance, popEstDomain$AbundanceCovariance[Domain1==Domain2], by.x=c("Stratum", "Domain"), by.y=c("Stratum", "Domain1"))
CVs$CV <- sqrt(CVs$AbundanceCovariance) / CVs$Abundance
expect_true(min(CVs$CV)<.2)

#some annotation and recoding for for testing purposes (does not correspond to actual gear mapping)
land <- RstoxFDA::CatchLotteryLandingExample
land$Landing$SpeciesCategory <- "061104"
ex$Haul$Gear[ex$Haul$Gear %in% c("3500", "3600")] <- "51"
ex$Haul$Gear[ex$Haul$Gear %in% c("3700")] <- "53"
ex$Haul$Gear[ex$Haul$Gear %in% c("3100")] <- "11"

# add tiny error to Mean to ensure that it is recalculated
popEstDomain$Variables$Mean <- popEstDomain$Variables$Mean + 1e-3

#
# Landings for ratio-estimation tests
#

psuBySex <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"), "IndividualSex")
popBySex <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuBySex)
ll <- land
ll$Landing$IndividualSex <- "M"

expect_error(RstoxFDA:::AnalyticalRatioEstimate(popBySex, ll, "IndividualRoundWeight", StratificationVariables = "Sex"), "Some Stratification Variables or Domain Variables could not be matched with landings")
ll <- land
ll$Landing$SpeciesCategory <- 1
expect_error(RstoxFDA:::AnalyticalRatioEstimate(popEstDomain, ll, "IndividualRoundWeight", StratificationVariables = "SpeciesCategory"), "None of the landing partitions")

ll <- land
ll$Landing$SpeciesCategory <- NULL
expect_error(RstoxFDA:::AnalyticalRatioEstimate(popEstDomain, ll, "IndividualRoundWeight", StratificationVariables = "SpeciesCategory"), "Some Stratification Variables or Domain Variables could not be matched with landings")

# Test year as stratification variable
popEstDomainYear <- popEstDomain
popEstDomainYear$StratificationVariables$Year <- "2022"

result <- RstoxFDA:::AnalyticalRatioEstimate(popEstDomainYear, ll, "IndividualRoundWeight",  StratificationVariables = "Year")
expect_true(all(result$StratificationVariables$Year == "2022"))

#
# Test total catch estimates (no ind domains)
#

psuEstNoD <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"))
popEstNoD <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEstNoD)

ratioEst <- RstoxFDA:::AnalyticalRatioEstimate(popEstNoD, land, "IndividualRoundWeight",  StratificationVariables = "SpeciesCategory")

#error should be close to zero, since domain coincides with strata
expect_true(abs(ratioEst$Variables$Total - sum(land$Landing$RoundWeight)*1000)/ratioEst$Variables$Total<1e-6)
expect_true(sqrt(ratioEst$VariablesCovariance$TotalCovariance) / ratioEst$Variables$Total < 5e-2)


#
# Test with MeanDomainWeights
#

#
# Input tests MeanDomainWeight
#

psuEstDomain <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"), c("IndividualAge", "Gear"))
popEstDomain <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEstDomain)

#total estimate, disregard matching domain in ratio estimate
expect_warning(ratioEstTotal <- RstoxFDA:::AnalyticalRatioEstimate(popEstDomain, land, "IndividualRoundWeight", StratificationVariables = "SpeciesCategory"))
comp <- merge(ratioEstTotal$Abundance, popEstDomain$Abundance, by=c("Stratum", "Domain"))

#check that cells with zero landings are inferred to have zero abundance
landWoG53 <- land
landWoG53$Landing <- landWoG53$Landing[get("Gear")!=53,]

ratioEst0Landings <- RstoxFDA:::AnalyticalRatioEstimate(popEstDomain, landWoG53, "IndividualRoundWeight", StratificationVariables = "SpeciesCategory", DomainVariables = "Gear")

tab <- merge(ratioEst0Landings$Abundance, ratioEst0Landings$DomainVariables, by = "Domain")
expect_equal(sum(tab$Abundance[tab$Gear==53]),0)
expect_equal(sum(tab$Frequency[tab$Gear==53]),0)

tab <- merge(ratioEst0Landings$Variables, ratioEst0Landings$DomainVariables, by = "Domain")
expect_equal(sum(tab$Total[tab$Gear==53]),0)
expect_true(mean(tab$Mean[tab$Gear==53])>0)

#everything should be scaled with same factor
expect_true(var((comp$Abundance.x-comp$Abundance.y)/comp$Abundance.x)<1e-10)

ratioEstMDW <- RstoxFDA:::AnalyticalRatioEstimate(popEstDomain, land, "IndividualRoundWeight", StratificationVariables = "SpeciesCategory", DomainVariables = "Gear")
comp <- merge(ratioEstMDW$Abundance, popEstDomain$Abundance, by=c("Stratum", "Domain"))

#everything should not be scaled with same factor
expect_true(var((comp$Abundance.x-comp$Abundance.y)/comp$Abundance.x)>10)

#check that total abundance estimates are in the same ballpark (catch rounding errors etc)
expect_true(abs(sum(ratioEstMDW$Abundance$Abundance) - sum(popEstDomain$Abundance$Abundance))/sum(ratioEstMDW$Abundance$Abundance) < .2)
#check that some CVs are in reasonable range
CVs <- merge(ratioEstMDW$Abundance, ratioEstMDW$AbundanceCovariance[Domain1==Domain2], by.x=c("Stratum", "Domain"), by.y=c("Stratum", "Domain1"))
CVs$CV <- sqrt(CVs$AbundanceCovariance) / CVs$Abundance
expect_true(min(CVs$CV)<.2)

#check that total and total are changed in accordance with difference between estimated total weight and landing total weight
expect_true((sum(ratioEstMDW$Variables$Total) - sum(popEstDomain$Variables$Total))/sum(ratioEstMDW$Variables$Total) - (sum(popEstDomain$Variables$Total) - sum(land$Landing$RoundWeight*1000)/sum(land$Landing$RoundWeight*1000)) < 1e-6)

#check that some CVs are in reasonable range
CVs <- merge(ratioEstMDW$Variables, ratioEstMDW$VariablesCovariance[Domain1==Domain2], by.x=c("Stratum", "Domain"), by.y=c("Stratum", "Domain1"))
CVs$CV <- sqrt(CVs$TotalCovariance) / CVs$Total
expect_true(min(CVs$CV)<.2)

#check that Means and Covariances are unchanged
expect_true(all(ratioEstMDW$Variables$Mean == popEstDomain$Variables$Mean))
expect_true(all(ratioEstMDW$VariablesCovariance$MeanCovariance[!is.nan(ratioEstMDW$VariablesCovariance$MeanCovariance)] == popEstDomain$VariablesCovariance$MeanCovariance[!is.nan(popEstDomain$VariablesCovariance$MeanCovariance)]))


#
# Correctness test for a minimal example
# Constructed from the example at example at: https://online.stat.psu.edu/stat506/node/15/
#
miniEx <- stationDesign
miniEx$SampleTable$N <- 15650
miniEx$SampleTable$n <- 3
miniEx$SampleTable$SelectionMethod <- "FSWR"

miniEx$SelectionTable <- miniEx$SelectionTable[1:3]
miniEx$SelectionTable$InclusionProbability <- as.numeric(NA)
miniEx$SelectionTable$HTsamplingWeight <- as.numeric(NA)
miniEx$SelectionTable$SelectionProbability[1] <- 650/15650
miniEx$SelectionTable$SelectionProbability[2] <- 2840/15650
miniEx$SelectionTable$SelectionProbability[3] <- 3200/15650
miniEx$SelectionTable$HHsamplingWeight <- 1/(miniEx$SelectionTable$SelectionProbability*sum(1/miniEx$SelectionTable$SelectionProbability))

miniExInd <- srs
miniExInd$SampleTable$N[miniExInd$SampleTable$SampleId %in% miniEx$SelectionTable$SamplingUnitId[1]] <- 420
miniExInd$SampleTable$N[miniExInd$SampleTable$SampleId %in% miniEx$SelectionTable$SamplingUnitId[2]] <- 1785
miniExInd$SampleTable$N[miniExInd$SampleTable$SampleId %in% miniEx$SelectionTable$SamplingUnitId[3]] <- 2198
miniExInd$SampleTable <- miniExInd$SampleTable[SampleId %in% miniEx$SelectionTable$SamplingUnitId,]
miniExInd$SelectionTable <- miniExInd$SelectionTable[SampleId %in% miniEx$SelectionTable$SamplingUnitId,]
miniExInd$StratificationVariables <- miniExInd$StratificationVariables[SampleId %in% miniEx$SelectionTable$SamplingUnitId,]

miniExInd$SelectionTable$InclusionProbability[miniExInd$SelectionTable$SampleId %in% miniEx$SelectionTable$SamplingUnitId[1]] <- 38/420
miniExInd$SelectionTable$InclusionProbability[miniExInd$SelectionTable$SampleId %in% miniEx$SelectionTable$SamplingUnitId[2]] <- 60/1785
miniExInd$SelectionTable$InclusionProbability[miniExInd$SelectionTable$SampleId %in% miniEx$SelectionTable$SamplingUnitId[3]] <- 25/2198

psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight", "IW", "IndividualTotalLength", "one"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(miniEx, psuEst)
popEstMeanOfMeans <- RstoxFDA:::AnalyticalPopulationEstimate(miniEx, psuEst, MeanOfMeans = T)

expect_true(abs(popEst$Abundance$Abundance - 10232.75)<1e-2)
expect_true(abs(popEst$Abundance$Abundance * popEst$Variables$Mean[popEst$Variables$Variable=="IndividualRoundWeight"] - popEst$Variables$Total[popEst$Variables$Variable=="IndividualRoundWeight"]) < 1e-6)

#check annotation of PSU domains
expect_error(RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight"), "IndividualSex" ,"Gears"), "All PSUDomainVariables must be columns in StoxBioticData. The following are not valid: Gears")
psuEstDomain <- RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight"), "IndividualSex" ,"Gear")
expect_equal(nrow(psuEstDomain$PSUDomainVariables),3)
expect_error(RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight"), "IndividualSex" ,"IndividualSex"), "PSUDomainVariables must be unique to each PSU. Duplicates found for IndividualSexfor PSUs:")
psuEstDomain <- RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight"), "IndividualSex" , c("Gear", "SpeciesCategory"))
psuEstDomain <- RstoxFDA:::LiftStrata(psuEstDomain)
expect_equal(ncol(psuEstDomain$PSUDomainVariables), 4)
psuEstDomain <- RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight"), "IndividualSex")
expect_true(all(psuEstDomain$PSUDomainVariables$PSUDomain=="All"))

#check that domainEst sums to total est
psuEstDomain <- RstoxFDA:::AnalyticalPSUEstimate(ex, miniExInd, c("IndividualRoundWeight"), "IndividualSex")
popEstDomain <- RstoxFDA:::AnalyticalPopulationEstimate(miniEx, psuEstDomain)

expect_true(length(unique(popEstDomain$Abundance$Domain))>1)
expect_true(abs(sum(popEstDomain$Abundance$Abundance)-sum(popEst$Abundance$Abundance))<1e-6)
expect_true(abs(sum(popEstDomain$Variables$Total)-sum(popEst$Variables$Total[popEst$Variables$Variable=="IndividualRoundWeight"]))<1e-6)

#check that frequencies add to one for unstratified estimate
expect_true(abs(sum(popEstDomain$Abundance$Frequency)-1) < 1e-6)
expect_true(abs(sum(popEst$Abundance$Frequency)-1) < 1e-6)

#check that means are consistent between domain estimate and total estimate
expect_true(abs(sum(popEstDomain$Variables$Mean*popEstDomain$Abundance$Abundance, na.rm=T)/sum(popEstDomain$Abundance$Abundance) - popEst$Variables$Mean[popEst$Variables$Variable=="IndividualRoundWeight"])<1e-6)

#check correctness univariate variance
expect_true((abs(popEst$AbundanceCovariance$AbundanceCovariance - 73125.74) / 73125.74) < 0.001)

#check that covariance is identical to variance when variables are completely aligned (IW vs IndividualRoundWeight)
filt1 <- popEst$VariablesCovariance$Variable1=="IW" & popEst$VariablesCovariance$Variable2=="IndividualRoundWeight"
filt2 <- popEst$VariablesCovariance$Variable1=="IW" & popEst$VariablesCovariance$Variable2=="IW"
#expect_true(abs(popEst$VariablesCovariance$TotalCovariance[filt1] - popEst$VariablesCovariance$TotalCovariance[filt2])<1e-6)

#check that covariance is not identical to variance when variables are not completely aligned (IW vs IndividualTotalLength)
#expect_true(abs(popEst$VariablesCovariance$TotalCovariance[popEst$VariablesCovariance$Variable1=="IW" & popEst$VariablesCovariance$Variable2=="IndividualTotalLength"] - popEst$VariablesCovariance$TotalCovariance[filt2])>1)
#check that variable covariance equal abundance covariance for a variable that is always set to 1.

#expect_true(abs(popEst$VariablesCovariance$TotalCovariance[popEst$VariablesCovariance$Variable1=="one" & popEst$VariablesCovariance$Variable2=="one"] - popEst$AbundanceCovariance$AbundanceCovariance)<1e-6)
#expect_true(abs(popEst$VariablesCovariance$MeanCovariance[popEst$VariablesCovariance$Variable1=="one" & popEst$VariablesCovariance$Variable2=="one"] - popEst$AbundanceCovariance$FrequencyCovariance)<1e-6)
#stop("Above fails in online checks. Expected TRUE, but got logical of length 0. Figure out what is going on.")

#check that Mean of Means estimates have higher variance than the other option.
#this is probably not generally guaranteed, but seem to work for this example
all(popEst$VariablesCovariance$MeanCovariance < popEstMeanOfMeans$VariablesCovariance$MeanCovariance)

#
# Test domain coverage expansion
#

land <- RstoxFDA::CatchLotteryLandingExample
stationDesign <- RstoxFDA::CatchLotterySamplingExample
ex <- RstoxFDA::CatchLotteryExample
ex$Haul$Gear[ex$Haul$Gear=="3600"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3700"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3100"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3500"] <- "53"
stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <-  RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"), c("Gear"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)

expandedPopEst <- RstoxFDA:::InterpolateAnalyticalDomainEstimates(popEst, land, "Strict", "Gear")
expect_equal(sum(expandedPopEst$SampleCount$nPSU), sum(popEst$SampleCount$nPSU))
expect_equal(sum(expandedPopEst$SampleCount$nIndividuals), sum(popEst$SampleCount$nIndividuals))
expect_equal(nrow(expandedPopEst$Variables),nrow(popEst$Variables)*3) #added domain for gear 11 and gear 51
expect_equal(nrow(expandedPopEst$Abundance),nrow(popEst$Abundance)*3)
expect_equal(nrow(expandedPopEst$AbundanceCovariance), (nrow(popEst$Abundance)*3)*(nrow(popEst$Abundance)*3-1)/2+nrow(popEst$Abundance)*3) #all domain combinations
expect_equal(nrow(expandedPopEst$VariablesCovariance), nrow(expandedPopEst$AbundanceCovariance)*3) #three variable combinations
expect_equal(nrow(expandedPopEst$DomainVariables), nrow(popEst$DomainVariables)*3)
expect_equal(nrow(expandedPopEst$StratificationVariables), nrow(popEst$StratificationVariables))
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))

eps <- 1e-3

expandedPopEst <- RstoxFDA:::InterpolateAnalyticalDomainEstimates(popEst, land, "StratumMean", "Gear", eps)
expect_equal(nrow(expandedPopEst$Variables),nrow(popEst$Variables)*3) #added domain for gear 11 and gear 51
expect_equal(nrow(expandedPopEst$Abundance),nrow(popEst$Abundance)*3)
expect_equal(nrow(expandedPopEst$AbundanceCovariance), (nrow(popEst$Abundance)*3)*(nrow(popEst$Abundance)*3-1)/2+nrow(popEst$Abundance)*3) #all domain combinations
expect_equal(nrow(expandedPopEst$VariablesCovariance), nrow(expandedPopEst$AbundanceCovariance)*3) #three variable combinations
expect_equal(nrow(expandedPopEst$DomainVariables), nrow(popEst$DomainVariables)*3)
expect_equal(nrow(expandedPopEst$StratificationVariables), nrow(popEst$StratificationVariables))
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))
totalFreqDiff <- sum(expandedPopEst$Abundance$Frequency,na.rm=T)- sum(popEst$Abundance$Frequency)
expect_true(totalFreqDiff < eps)
expect_true(sum(expandedPopEst$Variables$Mean) > sum(popEst$Variables$Mean))
expect_true(sum(is.na(expandedPopEst$Abundance$Abundance))>0)
expect_true(sum(is.na(popEst$Abundance$Abundance))==0)
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))

#
# Test sample frame expansion
#
land <- RstoxFDA::CatchLotteryLandingExample
land$Landing$Frame <- "Sampling frame"
land$Landing$Frame[1:300] <- "OutOfFrame"
stationDesign <- RstoxFDA::CatchLotterySamplingExample
stationDesign$StratificationVariables$Frame <- "Sampling frame"
ex <- RstoxFDA::CatchLotteryExample
stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <-  RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)

# Test expanding along a single stratification variable
expandedPopEst <- RstoxFDA:::ExtendAnalyticalSamplingFrameCoverage(popEst, land, "Frame", "Strict", "Unsampled")
expect_equal(sum(expandedPopEst$SampleCount$nPSU), sum(popEst$SampleCount$nPSU))
expect_equal(sum(expandedPopEst$SampleCount$nIndividuals), sum(popEst$SampleCount$nIndividuals))
expect_equal(nrow(expandedPopEst$Abundance), nrow(popEst$Abundance)*2)
expect_equal(nrow(expandedPopEst$Variables), nrow(popEst$Variables)*2)
expect_equal(nrow(expandedPopEst$AbundanceCovariance), nrow(popEst$AbundanceCovariance)*2)
expect_equal(nrow(expandedPopEst$VariablesCovariance), nrow(popEst$VariablesCovariance)*2)
expect_equal(nrow(expandedPopEst$DomainVariables), nrow(popEst$DomainVariables))
expect_equal(nrow(expandedPopEst$StratificationVariables), nrow(popEst$StratificationVariables)*2)
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))

# Test expanding along several variables for both domain and stratification

#add stratification columns to landing
land <- RstoxFDA::CatchLotteryLandingExample
land$Landing$FrameVar1 <- "Sampling frame 1"
land$Landing$FrameVar1[1:300] <- "OutOfFrame 1"
land$Landing$FrameVar2 <- "Sampling frame 2-1"
land$Landing$FrameVar2[500:1000] <- "Sampling frame 2-2"
land$Landing$FrameVar2[1:150] <- "OutOfFrame 2-1"
land$Landing$FrameVar2[150:300] <- "OutOfFrame 2-2"

#add stratification columns to sampling design
stationDesign <- RstoxFDA::CatchLotterySamplingExample
stationDesign$StratificationVariables$FrameVar1 <- "Sampling frame"
stationDesign$StratificationVariables$FrameVar2 <- "Sampling frame 2-1"
stationDesign$StratificationVariables <- rbind(stationDesign$StratificationVariables,
                                               data.table::data.table(Stratum="Nordsjo2", CountryVessel="NOR", FrameVar1="Sampling frame", FrameVar2="Sampling frame 2-2"))
stationDesign$SelectionTable$Stratum[1:10] <- "Nordsjo2"
stationDesign$SampleTable$N <- stationDesign$SampleTable$N - 200
stationDesign$SampleTable$n <- stationDesign$SampleTable$n - 10
stationDesign$SampleTable <- rbind(stationDesign$SampleTable, data.table::data.table(Stratum="Nordsjo2", N=200, n=10, SelectionMethod="Poisson", FrameDescription=""))

ex <- RstoxFDA::CatchLotteryExample
ex$Haul$Gear[ex$Haul$Gear=="3600"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3700"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3100"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3500"] <- "11"
ex$Haul$Usage <- "2"
ex$Haul$Usage[1:2] <- "1"

stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <-  RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"), c("Gear", "Usage"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)

expect_error(RstoxFDA::AnalyticalRatioEstimate(popEst, land, "IndividualRoundWeight", StratificationVariables = "FrameVar2", DomainVariables = c("Gear", "Usage")), "Estimates missing for some domains in landings. Consider filtering data with")

domainExpanded <- RstoxFDA:::InterpolateAnalyticalDomainEstimates(popEst, land, "Strict", c("Gear", "Usage"), eps)

ss<-merge(domainExpanded$Abundance, domainExpanded$SampleCount, by=c("Stratum", "Domain"), all.x=T)
expect_true(!any(is.na(ss$nPSU)))
expect_true(!any(is.na(ss$nIndividuals)))
expect_equal(sum(domainExpanded$SampleCount$nPSU), sum(popEst$SampleCount$nPSU))
expect_equal(sum(domainExpanded$SampleCount$nIndividuals), sum(popEst$SampleCount$nIndividuals))

expandedPopEst <- RstoxFDA:::ExtendAnalyticalSamplingFrameCoverage(domainExpanded, land, c("FrameVar1", "FrameVar2"), "Strict", "Unsampled")
expect_true(nrow(expandedPopEst$Variables)>nrow(popEst$Variables)) 
expect_true(nrow(expandedPopEst$Abundance)>nrow(popEst$Abundance))
expect_true(nrow(expandedPopEst$AbundanceCovariance)> nrow(popEst$Abundance))
expect_equal(nrow(expandedPopEst$VariablesCovariance), nrow(expandedPopEst$AbundanceCovariance)*3) #three variable combinations
expect_true(nrow(expandedPopEst$DomainVariables)>nrow(popEst$DomainVariables))
expect_true(nrow(expandedPopEst$StratificationVariables)>nrow(popEst$StratificationVariables))
expect_equal(length(unique(expandedPopEst$StratificationVariables$Stratum)), length(unique(popEst$StratificationVariables$Stratum))+1)
expect_equal(sum(expandedPopEst$Abundance$Abundance,na.rm=T), sum(popEst$Abundance$Abundance))
expect_equal(sum(expandedPopEst$Abundance$Frequency,na.rm=T), sum(popEst$Abundance$Frequency))

#
# Test Stratum mean option for frame expansion
#
#add stratification columns to landing
land <- RstoxFDA::CatchLotteryLandingExample
land$Landing$FrameVar1 <- "Sampling frame 1"
land$Landing$FrameVar1[1:300] <- "OutOfFrame 1"
land$Landing$FrameVar2 <- "Sampling frame 2-1"
land$Landing$FrameVar2[500:1000] <- "Sampling frame 2-2"
land$Landing$FrameVar2[1:150] <- "OutOfFrame 2-1"
land$Landing$FrameVar2[150:300] <- "OutOfFrame 2-2"

#add stratification columns to sampling design
stationDesign <- RstoxFDA::CatchLotterySamplingExample
stationDesign$StratificationVariables$FrameVar1 <- "Sampling frame"
stationDesign$StratificationVariables$FrameVar2 <- "Sampling frame 2-1"
stationDesign$StratificationVariables <- rbind(stationDesign$StratificationVariables,
                                               data.table::data.table(Stratum="Nordsjo2", CountryVessel="NOR", FrameVar1="Sampling frame", FrameVar2="Sampling frame 2-2"))
stationDesign$SelectionTable$Stratum[1:10] <- "Nordsjo2"
stationDesign$SampleTable$N <- stationDesign$SampleTable$N - 200
stationDesign$SampleTable$n <- stationDesign$SampleTable$n - 10
stationDesign$SampleTable <- rbind(stationDesign$SampleTable, data.table::data.table(Stratum="Nordsjo2", N=200, n=10, SelectionMethod="Poisson", FrameDescription=""))

ex <- RstoxFDA::CatchLotteryExample
ex$Haul$Gear[ex$Haul$Gear=="3600"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3700"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3100"] <- "53"
ex$Haul$Gear[ex$Haul$Gear=="3500"] <- "11"
ex$Haul$Usage <- "2"
ex$Haul$Usage[1:2] <- "1"

stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <-  RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"), c("Gear", "Usage"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)

domainExpanded <- RstoxFDA:::InterpolateAnalyticalDomainEstimates(popEst, land, "StratumMean", c("Gear", "Usage"), eps)
expandedPopEst <- RstoxFDA:::ExtendAnalyticalSamplingFrameCoverage(domainExpanded, land, c("FrameVar1", "FrameVar2"), "SetToStratum", "Unsampled", "PSU-stratum:Nordsjo Lower-stratum:sild'G05/161722.G05/126417/Clupea harengus")

expect_true(nrow(expandedPopEst$Variables)>nrow(popEst$Variables)) 
expect_true(nrow(expandedPopEst$Abundance)>nrow(popEst$Abundance))
expect_true(nrow(expandedPopEst$AbundanceCovariance)> nrow(popEst$Abundance))
expect_equal(nrow(expandedPopEst$VariablesCovariance), nrow(expandedPopEst$AbundanceCovariance)*3) #three variable combinations
expect_true(nrow(expandedPopEst$DomainVariables)>nrow(popEst$DomainVariables))
expect_true(nrow(expandedPopEst$StratificationVariables)>nrow(popEst$StratificationVariables))
expect_equal(length(unique(expandedPopEst$StratificationVariables$Stratum)), length(unique(popEst$StratificationVariables$Stratum))+1)
expect_true(sum(expandedPopEst$Abundance$Frequency,na.rm=T)>sum(popEst$Abundance$Frequency))

nvar <- length(unique(expandedPopEst$VariablesCovariance$Variable1))
ndomain <- length(unique(expandedPopEst$VariablesCovariance$Domain1))
ncombo <- (nvar*(nvar+1)/2)*(ndomain*(ndomain+1)/2)
rowsByStratum <- expandedPopEst$VariablesCovariance[,.N,by="Stratum"]
expect_true(all(rowsByStratum$N==ncombo))

#check that age-proportions in new domains are all equal
freqtab <- merge(expandedPopEst$Abundance, expandedPopEst$DomainVariables)
freqTabIndGear <- freqtab[,list(FrequencyInd=mean(Frequency)), by=c("IndividualAge", "Gear","Usage")]
freqTabGear <- freqTabIndGear[,list(totalFreqGear=sum(FrequencyInd)),by=c("Gear", "Usage")]
freqTabIndGear <- merge(freqTabIndGear, freqTabGear, by=c("Gear", "Usage"))
freqTabIndGear$prop <- freqTabIndGear$FrequencyInd/freqTabIndGear$totalFreqGear
freqTabIndGearwOdomains <- merge(freqTabIndGear, popEst$DomainVariables, by=c("IndividualAge", "Gear", "Usage"), all.x=T)
freqTabIndNewDomains <- freqTabIndGearwOdomains[is.na(freqTabIndGearwOdomains$Domain),]
propVars <- freqTabIndNewDomains[,list(propVar=var(prop)),by="IndividualAge"]
expect_true(all(propVars$propVar==0))

#check that new domains amount to practically zero proportion
propByNewDom <- freqTabIndGearwOdomains[,list(totalIndFreq=sum(FrequencyInd)), by=list(newDomain=!is.na(get("Domain")))]
expect_true(propByNewDom$totalIndFreq[!propByNewDom$newDomain] / sum(propByNewDom$totalIndFreq)<1e-2)

#check that imputed means are reasonable (should be within ranges of original domains)

meantab <- merge(expandedPopEst$Variables, expandedPopEst$DomainVariables)
meantabwOdomains <- merge(meantab, popEst$DomainVariables, by=c("Gear", "Usage", "IndividualAge"), all.x=T)
meantabwOdomains <- meantabwOdomains[order(meantabwOdomains$Mean, meantabwOdomains$Domain.y),]
maxDomains <- meantabwOdomains[,list(maxMeanNewDomain=max(Mean[is.na(Domain.y)],na.rm=T), maxMeanOldDomain=max(Mean[!is.na(Domain.y)],na.rm=T)), by="IndividualAge"]
maxDomains$diff <- maxDomains$maxMeanOldDomain - maxDomains$maxMeanNewDomain
#check that highest mean for each age group is not an imputed domain
expect_true(all(maxDomains$diff/maxDomains$maxMeanOldDomain > -1e-3))

minDomains <- meantabwOdomains[,list(minMeanNewDomain=min(Mean[is.na(Domain.y)],na.rm=T), minMeanOldDomain=min(Mean[!is.na(Domain.y)],na.rm=T)), by="IndividualAge"]
minDomains$diff <- minDomains$minMeanOldDomain - minDomains$minMeanNewDomain
#check that highest mean for each age group is not an imputed domain
expect_true(all(minDomains$diff/minDomains$minMeanOldDomain < 1e-3))
#check that frequency cvs are in reasonable range
cvtabFreq <- merge(expandedPopEst$AbundanceCovariance[Domain1==Domain2], expandedPopEst$Abundance, by.x=c("Stratum", "Domain1"), by.y=c("Stratum", "Domain"))
cvtabFreq$cv <- sqrt(cvtabFreq$FrequencyCovariance) / cvtabFreq$Frequency
#check that unsampled domain has gotten a cv
expect_true(!is.na(cvtabFreq$cv[cvtabFreq$Domain1=="All/Gear:11/Usage:1/IndividualAge:1" & cvtabFreq$Stratum=="Unsampled"]))
cvtabFreq <- merge(cvtabFreq, expandedPopEst$DomainVariables, by.x=c("Domain1"), by.y=c("Domain"))
expect_true(mean(cvtabFreq$cv[cvtabFreq$IndividualAge==2],na.rm=T)<.3)

#check that frequency cvs are in reasonable range
cvtabMean <- merge(expandedPopEst$VariablesCovariance[Domain1==Domain2 & Variable1==Variable2], expandedPopEst$Variables, by.x=c("Stratum", "Domain1", "Variable1"), by.y=c("Stratum", "Domain", "Variable"))
cvtabMean$cv <- sqrt(cvtabMean$MeanCovariance) / cvtabMean$Mean
#check that unsampled domain has gotten a cv
expect_true(!is.na(cvtabMean$cv[cvtabMean$Domain1=="All/Gear:11/Usage:1/IndividualAge:1" & cvtabMean$Stratum=="Unsampled" & cvtabMean$Variable1=="IndividualRoundWeight"]))
cvtabMean <- merge(cvtabMean, expandedPopEst$DomainVariables, by.x=c("Domain1"), by.y=c("Domain"))
expect_true(mean(cvtabMean$cv[cvtabMean$IndividualAge==2],na.rm=T)<.3)

#
# check aggregation
#

#check with ratio estimation with several strata (based on tests above)
aggPopEst <- RstoxFDA:::AggregateAnalyticalEstimate(expandedPopEst, AggregateStratumName = "sampled", RetainStrata = "Unsampled")
expect_true(RstoxFDA:::is.AnalyticalPopulationEstimateData(aggPopEst))
expect_equal(sum(!is.na(aggPopEst$Abundance$Frequency)), sum(aggPopEst$Abundance$Stratum=="sampled")) #frequency estimated from abundance. NAs if some abundance in stratum is missing (aggreagate before ratio-estimate)
aggPopEstRatioEst <- RstoxFDA::AnalyticalRatioEstimate(aggPopEst, land, "IndividualRoundWeight", 
                                                       StratificationVariables = c("FrameVar1", "FrameVar2"),
                                                       DomainVariables = c("Gear", "Usage"))
expect_equal(sum(is.na(aggPopEstRatioEst$Abundance$Frequency)),0)
expect_true(RstoxFDA:::is.AnalyticalPopulationEstimateData(aggPopEstRatioEst))
expect_true(abs(sum(aggPopEstRatioEst$Variables$Total[aggPopEstRatioEst$Variables$Variable=="IndividualRoundWeight"],na.rm=T) / sum(land$Landing$RoundWeight) - 1000) < 1e-6)

indPrKgOriginal<-sum(popEst$Abundance$Abundance) / sum(popEst$Variables$Total[popEst$Variables$Variable=="IndividualRoundWeight"])
indPrKgAggPop <- sum(aggPopEstRatioEst$Abundance$Abundance) / sum(aggPopEstRatioEst$Variables$Total[aggPopEstRatioEst$Variables$Variable=="IndividualRoundWeight"],na.rm=T)

#check that some approximate invariants are OK
expect_true(abs(indPrKgAggPop-indPrKgOriginal)/indPrKgOriginal < 5e-2)

#
#checks aggregation with herring example with added stratification
#
stationDesign <- RstoxFDA::CatchLotterySamplingExample
stationDesign$StratificationVariables <- rbind(stationDesign$StratificationVariables, data.table::data.table(Stratum="S2", CountryVessel="OUT"))
stationDesign$SampleTable <- rbind(stationDesign$SampleTable, data.table::data.table(Stratum="S2", N=100, n=2, SelectionMethod="Poisson", FrameDescription="OUT"))
stationDesign$SampleTable$n[1] <- stationDesign$SampleTable$n[1]-2
stationDesign$SelectionTable$Stratum[stationDesign$SelectionTable$SamplingUnitId %in% c("38401","38433")] <- "S2"

ex <- RstoxFDA::CatchLotteryExample
ex$SpeciesCategory$SpeciesCategory <- "061104"
ex$Individual$IW <- ex$Individual$IndividualRoundWeight #for testing that covariances equal variances when appropriate
ex$Individual$one <- 1 #for testing that variable covariance equal abundance covariance when appropriate.
stationDesign <- RstoxFDA::AssignPSUSamplingParameters(stationDesign, ex, "serialnumber", "Haul", "MissingAtRandom")
srs <-  RstoxFDA:::ComputeIndividualSamplingParameters(ex, "SRS", c("IndividualAge"))
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight", "IndividualTotalLength"), c("IndividualAge"))
expect_true(abs(sum(psuEst$Abundance$Abundance) - sum(ex$Sample$CatchFractionNumber))/sum(ex$Sample$CatchFractionNumber) < 1e-3)
popEstAgeDomain <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)
popEstMeanOfMeans <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst, MeanOfMeans = T)
psuEst <- RstoxFDA:::AnalyticalPSUEstimate(ex, srs, c("IndividualRoundWeight"))
popEst <- RstoxFDA:::AnalyticalPopulationEstimate(stationDesign, psuEst)

#
# check against some manual calculations
#

aggregate <- RstoxFDA::AggregateAnalyticalEstimate(popEst, AggregateStratumName = "aggregate")
expect_equal(aggregate$Abundance$Frequency, 1)
abund <- sum(popEst$Abundance$Abundance)
expect_equal(abund, aggregate$Abundance$Abundance)
freqCovar <- sum(popEst$AbundanceCovariance$FrequencyCovariance * popEst$Abundance$Abundance**2) / sum(popEst$Abundance$Abundance)**2
expect_true(abs(aggregate$AbundanceCovariance$FrequencyCovariance - freqCovar)/freqCovar < 1e-3)

mean <- sum(popEst$Variables$Mean * popEst$Abundance$Abundance) / sum(popEst$Abundance$Abundance)
expect_equal(mean, aggregate$Variables$Mean)
total <- sum(popEst$Variables$Total)
expect_true(abs(aggregate$Variables$Total - total)/total < 1e-3)
meanVar <- sum(popEst$VariablesCovariance$MeanCovariance * (popEst$Abundance$Abundance / sum(popEst$Abundance$Abundance))**2)
expect_true(abs(aggregate$VariablesCovariance$MeanCovariance - meanVar)/meanVar < 1e-3)
totalVar <- sum(popEst$VariablesCovariance$TotalCovariance)
expect_true(abs(aggregate$VariablesCovariance$TotalCovariance - totalVar)/totalVar < 1e-3)

expect_equal(sum(popEst$SampleCount$nPSU), sum(aggregate$SampleCount$nPSU))
expect_equal(sum(popEst$SampleCount$nIndividuals), sum(aggregate$SampleCount$nIndividuals))

#
# check retained
#

retained <- RstoxFDA::AggregateAnalyticalEstimate(popEst, RetainStrata = popEst$StratificationVariables$Stratum, AggregateStratumName = "aggregate")
expect_true(all(popEst$Abundance$Abundance==retained$Abundance$Abundance))
expect_equal(aggregate$Abundance$Abundance, sum(retained$Abundance$Abundance))
expect_true(all(retained$Abundance$Frequency==1))
expect_true(!any(aggregate$Abundance$Stratum %in% retained$Abundance$Stratum))
expect_equal(aggregate$VariablesCovariance$TotalCovariance, sum(retained$VariablesCovariance$TotalCovariance))
expect_true(!any(aggregate$VariablesCovariance$Stratum %in% retained$VariablesCovariance$Stratum))
StoXProject/RstoxFDA documentation built on June 14, 2025, 1:37 a.m.