knitr::opts_chunk$set(echo = TRUE)
require(ggplot2) require(dplyr) require(tidyr) require(parallel) require(boral)
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data") source("../metrics.R") load(file="testMats_correctlySpecified.RData") numSpecies <- c(5, 10, 15, 20, 25, 30) numFactors <- c(1, 2, 5, 10, 20) scenarios = expand.grid(numSpecies, numFactors)
kl <- c() frob <- c() setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data") for(i in 1:18){ load(file=paste0("correctlySpecifiedResults/r",i,".RData")) estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1]) ## put lambda lambda together ## kl divergence ## adding this diagonal is not good, still have to figure out the invertibility thing kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]])))) ## frobenius frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]])) } results = cbind.data.frame(scenarios[1:18,], kl, frob) names(results)[1:2]= c("numSpecies", "numFactors")
### misspecified #### setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data") kl <- c() frob <- c() for(i in 19:nrow(scenarios)){ load(file=paste0("correctlySpecifiedResults/r",i,".RData")) estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1]) ## put lambda lambda together ## kl divergence kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]])))) ## frobenius frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]])) } results2 = cbind.data.frame(scenarios[19:nrow(scenarios),], kl, frob) names(results2)[1:2]= c("numSpecies", "numFactors")
30 sites
## correctly specified ggplot(results, aes(numSpecies, kl,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("kl divergence") ggplot(results, aes(numSpecies, frob,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("sqrt(sum(abs(obs-est)))") ## fit with 5 latent factors ggplot(results2, aes(numSpecies, kl,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("kl divergence") ggplot(results2, aes(numSpecies, frob,col=as.factor(numFactors)))+geom_point()+theme_minimal()+ylab("sqrt(sum(abs(obs-est)")
makeJaccardECDF = function(data, title){ truth1 <- data$yP[nrow(data$yP), ] parsetemp <- unlist(lapply(strsplit(colnames(data$yStarP), "\\["), function(x) { x[2] })) siteNum1 <- unlist(lapply(strsplit(parsetemp, ","), function(x) { x[1] })) speciesNum <- unlist(lapply(strsplit(parsetemp, ","), function(x) { x[2] })) speciesNum1 <- sub("\\]", "", speciesNum) test <- cbind.data.frame(t(data$yStarP), speciesNum=speciesNum1, siteNum=siteNum1, truth=truth1) test2 <- gather(test, -speciesNum, -siteNum, -truth, key = "iteration", value = "val") jaccardCollapseOverSites <- function(s1, s2) { sub1 <- subset(trueDist, speciesNum == s1) sub2 <- subset(trueDist, speciesNum == s2) num <- which(sub1$truth == 1 & sub2$truth == 1) aa <- which(sub1$truth == 1) bb <- which(sub2$truth == 1) jaccard <- length(num) / (length(aa) + length(bb) - length(num)) coOccur <- length(num) return(list(jaccard = jaccard, coOccur = coOccur)) } trueDist <- subset(test2, iteration == 1) trueDist$speciesNum <- as.numeric(as.character(trueDist$speciesNum)) pairs <- t(combn(unique(trueDist$speciesNum), 2)) trueJ <- mapply(jaccardCollapseOverSites, pairs[, 1], pairs[, 2], SIMPLIFY = F) trueJJ <- lapply(trueJ, function(x) { x$jaccard }) trueCO <- lapply(trueJ, function(x) { x$coOccur }) jaccardCollapseOverSitesIteration <- function(s1, s2, Iteration) { sub1 <- subset(trueDist, speciesNum == s1 & iteration == Iteration) sub2 <- subset(trueDist, speciesNum == s2 & iteration == Iteration) num <- which(sub1$val == 1 & sub2$val == 1) aa <- which(sub1$val == 1) bb <- which(sub2$val == 1) jaccard <- length(num) / (length(aa) + length(bb) - length(num)) coOccur <- length(num) return(list(jaccard = jaccard, coOccur = coOccur)) } p1 <- rep(pairs[, 1], times = 100) p2 <- rep(pairs[, 2], times = 100) itN <- rep(901:1000, each = nrow(pairs)) test2$iteration <- as.numeric(as.character(test2$iteration)) trueDist <- subset(test2, iteration > 900) #ptm <- proc.time() itJ <- mcmapply(jaccardCollapseOverSitesIteration, p1, p2, itN, SIMPLIFY = F, mc.cores = 4) #proc.time() - ptm ## itN <- rep(901:1000, each = nrow(pairs)) itJJ <- lapply(itJ, function(x) { x$jaccard }) itCO <- lapply(itJ, function(x) { x$coOccur }) byIterationJ <- cbind.data.frame(jaccard = unlist(itJJ), coOccur = unlist(itCO), iteration = itN) g1=ggplot(byIterationJ,aes(x=jaccard,group=iteration))+stat_ecdf(geom="line",alpha=0.2,col="red")+stat_ecdf(data=data.frame(true = unlist(trueJJ)),aes(x=true, group=1),col="blue",geom="line")+theme_minimal(base_size = 18)+ylab("ECDF") +xlab("jaccard - similarity between species")+ggtitle(title) return(g1) }
numSpecies <- c(5, 10, 15, 20, 25, 30) numFactors <- c(1, 2, 5, 10, 20) scenarios = expand.grid(numSpecies=numSpecies, numFactors=numFactors)
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r1.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[1,1]," numFactors: ", scenarios[1,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r2.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[2,1]," numFactors: ", scenarios[2,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r3.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[3,1]," numFactors: ", scenarios[3,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r4.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[4,1]," numFactors: ", scenarios[4,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r5.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[5,1]," numFactors: ", scenarios[5,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r6.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[6,1]," numFactors: ", scenarios[6,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r7.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[7,1]," numFactors: ", scenarios[7,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r8.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[8,1]," numFactors: ", scenarios[8,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r9.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[9,1]," numFactors: ", scenarios[9,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r10.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[10,1]," numFactors: ", scenarios[10,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r11.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[11,1]," numFactors: ", scenarios[11,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r12.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[12,1]," numFactors: ", scenarios[12,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r13.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[13,1]," numFactors: ", scenarios[13,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r14.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[14,1]," numFactors: ", scenarios[14,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r15.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[15,1]," numFactors: ", scenarios[15,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r16.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[16,1]," numFactors: ", scenarios[16,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r17.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[17,1]," numFactors: ", scenarios[17,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r18.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[18,1]," numFactors: ", scenarios[18,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r19.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[19,1]," numFactors: ", scenarios[19,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r20.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[20,1]," numFactors: ", scenarios[20,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r21.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[21,1]," numFactors: ", scenarios[21,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r22.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[22,1]," numFactors: ", scenarios[22,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r23.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[23,1]," numFactors: ", scenarios[23,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r24.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[24,1]," numFactors: ", scenarios[24,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r25.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[25,1]," numFactors: ", scenarios[25,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r26.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[26,1]," numFactors: ", scenarios[26,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r27.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[27,1]," numFactors: ", scenarios[27,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r28.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[28,1]," numFactors: ", scenarios[28,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r29.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[29,1]," numFactors: ", scenarios[29,2]))
setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") load("r30.RData") makeJaccardECDF(fit.boral, paste("species: ", scenarios[30,1]," numFactors: ", scenarios[30,2]))
#https://github.com/cran/jaccard/blob/master/R/jaccard.R jaccard <- function(x, y, center=FALSE, px=NULL, py=NULL) { if(length(x) != length(y)) { stop("Two fingerprints (x and y) must be of the same length.") } if(is.null(px) | is.null(py)){ px <- mean(x) py <- mean(y) } sumxy <- sum(x & y) unionxy <- sum(x)+sum(y)-sumxy if(unionxy == 0) { j <- (px*py)/(px+py-px*py) } else { j <- sumxy/unionxy } if(center == FALSE) { return(j) } else { return(j - (px*py)/(px+py-px*py)) } } process_jaccardSpecies <- function(fileName, numSpecies){ #browser() load(file=fileName) truth1 <- fit.boral$yP[nrow(fit.boral$yP), ] parsetemp <- unlist(lapply(strsplit(colnames(fit.boral$yStarP), "\\["), function(x) { x[2] })) siteNum1 <- unlist(lapply(strsplit(parsetemp, ","), function(x) { x[1] })) speciesNum <- unlist(lapply(strsplit(parsetemp, ","), function(x) { x[2] })) speciesNum1 <- sub("\\]", "", speciesNum) test <- cbind.data.frame(t(fit.boral$yStarP), speciesNum=speciesNum1, siteNum=siteNum1, truth=truth1) test2 <- gather(test, -speciesNum, -siteNum, -truth, key = "iteration", value = "val") trueDist <- subset(test2, iteration == 1) trueDist$speciesNum <- as.numeric(as.character(trueDist$speciesNum)) truthNF=matrix(truth1, nrow=numSpecies, ncol=numSpecies, byrow=F) pairs <- t(combn(1:numSpecies, 2)) obsJ=mapply(function(x,y){jaccard(truthNF[,x], truthNF[,y])},pairs[,1], pairs[,2],SIMPLIFY = F) test2$speciesNum=as.numeric(as.character(test2$speciesNum)) test2$siteNum=as.numeric(as.character(test2$siteNum)) toStore = vector("list", 100) idx=1 for(i in 900:1000){ toUse = subset(test2, iteration==i) ppcNF <- matrix(nrow=length(unique(test2$siteNum)), ncol=length(unique(test2$speciesNum))) ppcNF[cbind(toUse$siteNum, toUse$speciesNum)] <- toUse$val ppcJ=mapply(function(x,y){jaccard(ppcNF[,x], ppcNF[,y])},pairs[,1], pairs[,2],SIMPLIFY = F) ## want to do this for everything in test2 then collapse toStore[[idx]] = unlist(ppcJ) idx=idx+1 } ## take average afterwards not before ksT = ks.test(unlist(obsJ), apply(do.call("rbind", toStore),2,mean)) return(ksT$p.value) }
numSpecies <- c(5, 10, 15, 20, 25, 30) numFactors <- c(1, 2, 5, 10, 20) scenarios = expand.grid(numSpecies = numSpecies, numFactors = numFactors) setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data/correctlySpecifiedResults") test = mapply(process_jaccardSpecies,list.files()[order(nchar(list.files()))], scenarios$numSpecies,SIMPLIFY = F) scenarios$pval=unlist(test)
ggplot(scenarios, aes(numSpecies, pval ))+geom_point(cex=2)+geom_line()+facet_wrap(~numFactors, scales = "free_y")+theme_minimal()+ylab("p(ish)-value")
kl <- c() frob <- c() setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data") for(i in 1:18){ load(file=paste0("correctlySpecifiedResults/r",i,".RData")) estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1]) ## put lambda lambda together ## kl divergence kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]])))) ## frobenius frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]])) } numSpecies <- c(5, 10, 15, 20, 25, 30) numFactors <- c(1, 2, 5, 10, 20) scenarios = expand.grid(numSpecies = numSpecies, numFactors = numFactors) results = cbind.data.frame(scenarios[1:18,], kl, frob, pval=unlist(test)[1:18])
ggplot(results, aes(kl,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y") ggplot(results, aes(frob,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y")
### misspecified #### setwd("~/Desktop/communityDimensions/R/simulation_setup/test_data") kl <- c() frob <- c() for(i in 19:nrow(scenarios)){ load(file=paste0("correctlySpecifiedResults/r",i,".RData")) estimatedMat = fit.boral$lv.coefs.mean[,-1]%*%t(fit.boral$lv.coefs.mean[,-1]) ## put lambda lambda together ## kl divergence kl <- c(kl,klDivergence(trueMats[[i]]+diag(nrow(trueMats[[i]])), estimatedMat+diag(nrow(trueMats[[i]])))) ## frobenius frob <- c(frob,frobeniusNorm(estimatedMat, trueMats[[i]])) } numSpecies <- c(5, 10, 15, 20, 25, 30) numFactors <- c(1, 2, 5, 10, 20) scenarios = expand.grid(numSpecies = numSpecies, numFactors = numFactors) results2 = cbind.data.frame(scenarios[19:nrow(scenarios),], kl, frob, pval=unlist(test)[19:length(test)]) names(results2)[1:2]= c("numSpecies", "numFactors")
ggplot(results2, aes(kl,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y") ggplot(results2, aes(frob,pval))+geom_point()+geom_line()+facet_wrap(~numFactors,scales = "free_y")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.