Sara Stoudt 12/2/2019
require(ggplot2)
require(dplyr)
require(tidyr)
require(parallel)
require(boral)
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data")
source("../metrics.R")
load(file="testMats_correctlySpecified.RData")
numSpecies <- c(5, 10, 15, 20, 25, 30)
numFactors <- c(1, 2, 5, 10, 20)
scenarios = expand.grid(numSpecies, numFactors)
kl <- c()
frob <- c()
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data")
for(i in 1:18){
load(file=paste0("correctlySpecifiedResults/r",i,".RData"))
estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1])
## put lambda lambda together
## kl divergence
kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]]))))
## frobenius
frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]]))
}
results = cbind.data.frame(scenarios[1:18,], kl, frob)
names(results)[1:2]= c("numSpecies", "numFactors")
### misspecified ####
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data")
kl <- c()
frob <- c()
for(i in 19:nrow(scenarios)){
load(file=paste0("correctlySpecifiedResults/r",i,".RData"))
estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1])
## put lambda lambda together
## kl divergence
kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]]))))
## frobenius
frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]]))
}
results2 = cbind.data.frame(scenarios[19:nrow(scenarios),], kl, frob)
names(results2)[1:2]= c("numSpecies", "numFactors")
30 sites
## correctly specified
ggplot(results, aes(numSpecies, kl,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("kl divergence")
ggplot(results, aes(numSpecies, frob,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("sqrt(sum(abs(obs-est)))")
## fit with 5 latent factors
ggplot(results2, aes(numSpecies, kl,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("kl divergence")
ggplot(results2, aes(numSpecies, frob,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("sqrt(sum(abs(obs-est)")
makeJaccardECDF = function(data, title){
truth1 <- data$yP[nrow(data$yP), ]
parsetemp <- unlist(lapply(strsplit(colnames(data$yStarP), "\\["), function(x) {
x[2]
}))
siteNum1 <- unlist(lapply(strsplit(parsetemp, ","), function(x) {
x[1]
}))
speciesNum <- unlist(lapply(strsplit(parsetemp, ","), function(x) {
x[2]
}))
speciesNum1 <- sub("\\]", "", speciesNum)
test <- cbind.data.frame(t(data$yStarP), speciesNum=speciesNum1, siteNum=siteNum1, truth=truth1)
test2 <- gather(test, -speciesNum, -siteNum, -truth, key = "iteration", value = "val")
jaccardCollapseOverSites <- function(s1, s2) {
sub1 <- subset(trueDist, speciesNum == s1)
sub2 <- subset(trueDist, speciesNum == s2)
num <- which(sub1$truth == 1 & sub2$truth == 1)
aa <- which(sub1$truth == 1)
bb <- which(sub2$truth == 1)
jaccard <- length(num) / (length(aa) + length(bb) - length(num))
coOccur <- length(num)
return(list(jaccard = jaccard, coOccur = coOccur))
}
trueDist <- subset(test2, iteration == 1)
trueDist$speciesNum <- as.numeric(as.character(trueDist$speciesNum))
pairs <- t(combn(unique(trueDist$speciesNum), 2))
trueJ <- mapply(jaccardCollapseOverSites, pairs[, 1], pairs[, 2], SIMPLIFY = F)
trueJJ <- lapply(trueJ, function(x) {
x$jaccard
})
trueCO <- lapply(trueJ, function(x) {
x$coOccur
})
jaccardCollapseOverSitesIteration <- function(s1, s2, Iteration) {
sub1 <- subset(trueDist, speciesNum == s1 & iteration == Iteration)
sub2 <- subset(trueDist, speciesNum == s2 & iteration == Iteration)
num <- which(sub1$val == 1 & sub2$val == 1)
aa <- which(sub1$val == 1)
bb <- which(sub2$val == 1)
jaccard <- length(num) / (length(aa) + length(bb) - length(num))
coOccur <- length(num)
return(list(jaccard = jaccard, coOccur = coOccur))
}
p1 <- rep(pairs[, 1], times = 100)
p2 <- rep(pairs[, 2], times = 100)
itN <- rep(901:1000, each = nrow(pairs))
test2$iteration <- as.numeric(as.character(test2$iteration))
trueDist <- subset(test2, iteration > 900)
#ptm <- proc.time()
itJ <- mcmapply(jaccardCollapseOverSitesIteration, p1, p2, itN, SIMPLIFY = F, mc.cores = 4)
#proc.time() - ptm ##
itN <- rep(901:1000, each = nrow(pairs))
itJJ <- lapply(itJ, function(x) {
x$jaccard
})
itCO <- lapply(itJ, function(x) {
x$coOccur
})
byIterationJ <- cbind.data.frame(jaccard = unlist(itJJ), coOccur = unlist(itCO), iteration = itN)
g1=ggplot(byIterationJ,aes(x=jaccard,group=iteration))+stat_ecdf(geom="line",alpha=0.2,col="red")+stat_ecdf(data=data.frame(true = unlist(trueJJ)),aes(x=true, group=1),col="blue",geom="line")+theme_minimal(base_size = 18)+ylab("ECDF") +xlab("jaccard - similarity between species")+ggtitle(title)
return(g1)
}
numSpecies <- c(5, 10, 15, 20, 25, 30)
numFactors <- c(1, 2, 5, 10, 20)
scenarios = expand.grid(numSpecies=numSpecies, numFactors=numFactors)
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r1.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[1,1]," numFactors: ", scenarios[1,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r2.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[2,1]," numFactors: ", scenarios[2,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r3.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[3,1]," numFactors: ", scenarios[3,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r4.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[4,1]," numFactors: ", scenarios[4,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r5.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[5,1]," numFactors: ", scenarios[5,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r6.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[6,1]," numFactors: ", scenarios[6,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r7.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[7,1]," numFactors: ", scenarios[7,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r8.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[8,1]," numFactors: ", scenarios[8,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r9.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[9,1]," numFactors: ", scenarios[9,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r10.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[10,1]," numFactors: ", scenarios[10,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r11.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[11,1]," numFactors: ", scenarios[11,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r12.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[12,1]," numFactors: ", scenarios[12,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r13.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[13,1]," numFactors: ", scenarios[13,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r14.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[14,1]," numFactors: ", scenarios[14,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r15.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[15,1]," numFactors: ", scenarios[15,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r16.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[16,1]," numFactors: ", scenarios[16,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r17.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[17,1]," numFactors: ", scenarios[17,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r18.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[18,1]," numFactors: ", scenarios[18,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r19.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[19,1]," numFactors: ", scenarios[19,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r20.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[20,1]," numFactors: ", scenarios[20,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r21.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[21,1]," numFactors: ", scenarios[21,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r22.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[22,1]," numFactors: ", scenarios[22,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r23.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[23,1]," numFactors: ", scenarios[23,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r24.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[24,1]," numFactors: ", scenarios[24,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r25.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[25,1]," numFactors: ", scenarios[25,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r26.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[26,1]," numFactors: ", scenarios[26,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r27.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[27,1]," numFactors: ", scenarios[27,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r28.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[28,1]," numFactors: ", scenarios[28,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r29.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[29,1]," numFactors: ", scenarios[29,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
load("r30.RData")
makeJaccardECDF(fit.boral, paste("species: ", scenarios[30,1]," numFactors: ", scenarios[30,2]))
#https://github.com/cran/jaccard/blob/master/R/jaccard.R
jaccard <- function(x, y, center=FALSE, px=NULL, py=NULL) {
if(length(x) != length(y)) {
stop("Two fingerprints (x and y) must be of the same length.")
}
if(is.null(px) | is.null(py)){
px <- mean(x)
py <- mean(y)
}
sumxy <- sum(x & y)
unionxy <- sum(x)+sum(y)-sumxy
if(unionxy == 0) {
j <- (px*py)/(px+py-px*py)
} else {
j <- sumxy/unionxy
}
if(center == FALSE) {
return(j)
} else {
return(j - (px*py)/(px+py-px*py))
}
}
process_jaccardSpecies <- function(fileName, numSpecies){
#browser()
load(file=fileName)
truth1 <- fit.boral$yP[nrow(fit.boral$yP), ]
parsetemp <- unlist(lapply(strsplit(colnames(fit.boral$yStarP), "\\["), function(x) {
x[2]
}))
siteNum1 <- unlist(lapply(strsplit(parsetemp, ","), function(x) {
x[1]
}))
speciesNum <- unlist(lapply(strsplit(parsetemp, ","), function(x) {
x[2]
}))
speciesNum1 <- sub("\\]", "", speciesNum)
test <- cbind.data.frame(t(fit.boral$yStarP), speciesNum=speciesNum1, siteNum=siteNum1, truth=truth1)
test2 <- gather(test, -speciesNum, -siteNum, -truth, key = "iteration", value = "val")
trueDist <- subset(test2, iteration == 1)
trueDist$speciesNum <- as.numeric(as.character(trueDist$speciesNum))
truthNF=matrix(truth1, nrow=numSpecies, ncol=numSpecies, byrow=F)
pairs <- t(combn(1:numSpecies, 2))
obsJ=mapply(function(x,y){jaccard(truthNF[,x], truthNF[,y])},pairs[,1], pairs[,2],SIMPLIFY = F)
test2$speciesNum=as.numeric(as.character(test2$speciesNum))
test2$siteNum=as.numeric(as.character(test2$siteNum))
toStore = vector("list", 100)
idx=1
for(i in 900:1000){
toUse = subset(test2, iteration==i)
ppcNF <- matrix(nrow=length(unique(test2$siteNum)), ncol=length(unique(test2$speciesNum)))
ppcNF[cbind(toUse$siteNum, toUse$speciesNum)] <- toUse$val
ppcJ=mapply(function(x,y){jaccard(ppcNF[,x], ppcNF[,y])},pairs[,1], pairs[,2],SIMPLIFY = F) ## want to do this for everything in test2 then collapse
toStore[[idx]] = unlist(ppcJ)
idx=idx+1
}
## take average afterwards not before
ksT = ks.test(unlist(obsJ), apply(do.call("rbind", toStore),2,mean))
return(ksT$p.value)
}
numSpecies <- c(5, 10, 15, 20, 25, 30)
numFactors <- c(1, 2, 5, 10, 20)
scenarios = expand.grid(numSpecies = numSpecies, numFactors = numFactors)
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults")
test = mapply(process_jaccardSpecies,list.files()[order(nchar(list.files()))], scenarios$numSpecies,SIMPLIFY = F)
scenarios$pval=unlist(test)
ggplot(scenarios, aes(numSpecies, pval ))+geom_point(cex=2)+geom_line()+facet_wrap(~numFactors, scales = "free_y")+theme_minimal()+ylab("p(ish)-value")
kl <- c()
frob <- c()
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data")
for(i in 1:18){
load(file=paste0("correctlySpecifiedResults/r",i,".RData"))
estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1])
## put lambda lambda together
## kl divergence
kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]]))))
## frobenius
frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]]))
}
numSpecies <- c(5, 10, 15, 20, 25, 30)
numFactors <- c(1, 2, 5, 10, 20)
scenarios = expand.grid(numSpecies = numSpecies, numFactors = numFactors)
results = cbind.data.frame(scenarios[1:18,], kl, frob, pval=unlist(test)[1:18])
ggplot(results, aes(kl,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y")
ggplot(results, aes(frob,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y")
### misspecified ####
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data")
kl <- c()
frob <- c()
for(i in 19:nrow(scenarios)){
load(file=paste0("correctlySpecifiedResults/r",i,".RData"))
estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1])
## put lambda lambda together
## kl divergence
kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]]))))
## frobenius
frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]]))
}
numSpecies <- c(5, 10, 15, 20, 25, 30)
numFactors <- c(1, 2, 5, 10, 20)
scenarios = expand.grid(numSpecies = numSpecies, numFactors = numFactors)
results2 = cbind.data.frame(scenarios[19:nrow(scenarios),], kl, frob, pval=unlist(test)[19:length(test)])
names(results2)[1:2]= c("numSpecies", "numFactors")
ggplot(results2, aes(kl,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y")
ggplot(results2, aes(frob,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.