knitr::opts_chunk$set(echo = TRUE)
library(here)
dat <- readRDS("../benchmark-data/Pollen/Whitmore/whitmore_pollen.rds")
# str(dat)
y <- as.matrix(dat[[1]])
## drop site name
sitenames <- y[, 1]
y <- y[, -1]
names <- colnames(y)
y <- matrix(as.numeric(y), dim(y))
colnames(y) <- names
rownames(y) <- sitenames
# str(y)
## Set NAs to 0
y[is.na(y)] <- 0
clim <- read.csv("../benchmark-data/Pollen/Whitmore/whitmore_worldclim_10min.csv")
# str(clim)
X <- clim$Tann
names(X) <- as.character(clim$ID2)
# all.equal(rownames(y), names(X))
## get same values
X <- X[match(rownames(y), names(X))]
y <- y[match(names(X), rownames(y)), ]
# all.equal(rownames(y), names(X))

## remove NA climate 
na_idx <- which(is.na(X))
y <- y[-na_idx, ]
X <- X[-na_idx]
# ## check if any NAs
# sum(is.na(y))
# sum(is.na(X))
# ## make sure colnames are still ok
# all.equal(rownames(y), names(X))
library(Methods)
library(ggplot2)

# y=y[1:200,]
# X=X[1:200]

model_name="WA"
WA=make_CV(y=y, X=X, model_name=model_name, kfold=2)

model_name="MAT"
MAT=make_CV(y=y, X=X, model_name=model_name, kfold=2)

model_name="MLRC"
MLRC=make_CV(y=y, X=X, model_name=model_name, kfold=2)

model_name="RF"
RF=make_CV(y=y, X=X, model_name=model_name, kfold=2)

out = rbind(data.frame(WA, method=rep('WA', nrow(WA))), 
            data.frame(MAT, method=rep('MAT', nrow(MAT))),
            data.frame(MLRC, method=rep('MLRC', nrow(MLRC))),
             data.frame(RF, method=rep('RF', nrow(RF))))

ggplot(data=out, aes(x=observations/10, y=mu/10, color=method)) + 
  geom_abline(intercept=0, slope=1) + 
  geom_point() + 
  xlab('Observed') +
  ylab('Predicted') +
  theme_bw()
ggsave('figures/model_vs_data.pdf')

ggplot(data=out, aes(x=observations/10, y=mu/10, color=method)) + 
  geom_abline(intercept=0, slope=1) + 
  geom_point() + 
  geom_smooth(method='lm',formula=y~x) +
  xlab('Observed') +
  ylab('Predicted') +
  theme_bw()
ggsave('figures/model_vs_data_best_fit_line.pdf')


ggplot(data=out) + 
  geom_abline(intercept=0, slope=1) + 
  geom_point(aes(x=observations/10, y=mu/10, color=method)) + 
    geom_pointrange(aes(x=observations/10, y=mu/10, ymin=mu/10-sd, ymax=mu/10+sd, color=method)) +
  xlab('Observed') +
  ylab('Predicted') +
  theme_bw()
ggsave('figures/model_vs_data_best_uncertainty.pdf')
library(rioja)
nboot <- 100
N <- dim(y)[1]
boot_MA <- matrix(0, N, nboot)

## drop rare taxa
drop_idx <- which((apply(y, 2, sum) / sum(y)) < 0.01)
y <- y[, -drop_idx]
y_prop <- counts2proportions(y[1:500, ])
for (i in 1:nboot) {
  modMAT <- rioja::MAT(as.data.frame(y_prop), X[1:500], k=5, lean=TRUE)
  boot_MA[, i] <- predict(modMAT, as.data.frame(y_prop), k=5, sse=TRUE, n.boot=1)$fit[, 1]



}


PCMIP/Methods documentation built on May 30, 2019, 10:46 p.m.