tests/mefa4comparisons.R

## performance comparison
library(mefa)
library(mefa4)

data(abmibirds)
## NONE: no species was recorded (8)
## SNI: Species Not Identified (216)
## VNA: Variable Not Applicable (6)
## DNC: Did Not Collect (13)
## PNA: Protocol Not Available (0 in this case)

## S3 mefa processing
b3 <- abmibirds
b3 <- b3[!(b3$Scientific.Name %in% c("VNA", "DNC", "PNA")),]
levels(b3$Scientific.Name)[levels(b3$Scientific.Name) %in% c("NONE", "SNI")] <- "zero.pseudo"
b3$Counts <- ifelse(b3$Scientific.Name == "zero.pseudo", 0, 1)
b3$Label <- with(b3, paste(ABMI.Site, Year, Point.Count.Station, sep="_"))
x3 <- b3[!duplicated(b3$Label), c("Label", "ABMI.Site", "Year", "Field.Date", "Point.Count.Station",
    "Wind.Conditions", "Precipitation")]
rownames(x3) <- x3$Label
z3 <- b3[!duplicated(b3$Scientific.Name), c("Common.Name",
    "Scientific.Name", "Taxonomic.Resolution", "Unique.Taxonomic.Identification.Number")]
rownames(z3) <- z3$Scientific.Name
z3 <- z3[z3$Scientific.Name != "zero.pseudo",]
t31 <- system.time(s3 <- suppressWarnings(stcs(b3[,c("Label","Scientific.Name","Counts")])))
t32 <- system.time(m30 <- mefa(s3))
t33 <- system.time(m31 <- mefa(s3, x3, z3))
y30 <- m30$xtab
t34 <- system.time(m32 <- mefa(y30, x3, z3))

## S4 mefa4 processing
b4 <- abmibirds
b4$Label <- with(b4, paste(ABMI.Site, Year, Point.Count.Station, sep="_"))
x4 <- b4[!duplicated(b4$Label), c("Label", "ABMI.Site", "Year", "Field.Date", "Point.Count.Station",
    "Wind.Conditions", "Precipitation")]
rownames(x4) <- x4$Label
z4 <- b4[!duplicated(b4$Scientific.Name), c("Common.Name",
    "Scientific.Name", "Taxonomic.Resolution", "Unique.Taxonomic.Identification.Number")]
rownames(z4) <- z4$Scientific.Name
t41 <- system.time(s4 <- Xtab(~ Label + Scientific.Name, b4, cdrop = c("NONE", "SNI"), 
    subset = !(b4$Scientific.Name %in% c("VNA", "DNC", "PNA")), drop.unused.levels = TRUE))
t42 <- system.time(m40 <- Mefa(s4))
t43 <- system.time(m41 <- Mefa(s4, x4, z4))
y40 <- as.matrix(m40@xtab)
t44 <- system.time(m42 <- Mefa(y40, x4, z4))

res <- cbind("SIZE, *=3"=c("b*"=object.size(b3),
    "s*"=object.size(s3),
    "y*0"=object.size(y30),
    "m*0"=object.size(m30),
    "m*1"=object.size(m31),
    "m*2"=object.size(m32)),
"SIZE, *=4"=c("b*"=object.size(b4),
    "s*"=object.size(s4),
    "y*0"=object.size(y40),
    "m*0"=object.size(m40),
    "m*1"=object.size(m41),
    "m*2"=object.size(m42)),
"TIME, *=3"=c("b*"=NA,
    "s*"=t31[3],
    "y*0"=NA,
    "m*0"=t32[3],
    "m*1"=t33[3],
    "m*2"=t34[3]),
"TIME, *=4"=c("b*"=NA,
    "s*"=t41[3],
    "y*0"=NA,
    "m*0"=t42[3],
    "m*1"=t43[3],
    "m*2"=t44[3]))
(res <- cbind(res, "SIZE"=res[,2]/res[,1], "TIME"=res[,4]/res[,3]))

## debugging

dim(y30)
dim(y40)

setdiff(rownames(y30), rownames(y40))
setdiff(rownames(y40), rownames(y30))

setdiff(colnames(y30), colnames(y40))
setdiff(colnames(y40), colnames(y30))

system.time(xx3 <- aggregate(m31, "ABMI.Site"))
system.time(xx4 <- groupSums(m41, 1, m41@samp$ABMI.Site))
as.Mefa(xx3)
xx4

Try the mefa4 package in your browser

Any scripts or data that you put into this service are public.

mefa4 documentation built on Sept. 12, 2022, 5:05 p.m.