R/Gaussian_process_nnotes.R

# Load libraries
library(rethinking)
library(plyr)
library(stringr)
library(sp)
library(lme4)
library(bbmle)
library(dplyr)

# Load data
dstan_female <-
  read.csv("/Users/denasmacbook/Geographic tarsiers/tarsier.female.march28.csv")

# Create distance matrix based on GPS points of recording locations
geo_distance <- spDists(cbind(dstan_female$lon,dstan_female$lat),longlat = T)
geo_matrix <- as.matrix(geo_distance)
Dmat <- geo_matrix
colnames(Dmat) <- dstan_female$site
row.names(Dmat) <- dstan_female$site

# Create distance matrix based on GPS points of recording locations
site.sub <- dstan_female %>%
  group_by(site) %>%
  summarize(lat=mean(lat),lon=mean(lon))


geo_distance <- spDists(cbind(site.sub$lon,site.sub$lat),longlat = T)
geo_matrix <- as.matrix(geo_distance)
Dmat.site <- geo_matrix
colnames(Dmat.site) <- site.sub$site
row.names(Dmat.site) <- site.sub$site


# Create distance matrix based on GPS points of recording locations
indiv.sub <- dstan_female %>%
  group_by(group) %>%
  summarize(lat=mean(lat),lon=mean(lon))


geo_distance <- spDists(cbind(indiv.sub$lon,indiv.sub$lat),longlat = T)
geo_matrix <- as.matrix(geo_distance)
Dmat.indiv <- geo_matrix
colnames(Dmat.indiv) <- indiv.sub$group
row.names(Dmat.indiv) <- indiv.sub$group

# Part 1 A. Model for female number of notes
# Create datalist to pass to STAN
dat_list_notes_female <- list(
  dim_J = length(unique(dstan_female$site)),
  dim_K = length(unique(dstan_female$group)),
  site = as.integer(as.factor(dstan_female$site)),
  individual=as.integer(as.factor(dstan_female$group)),
  cov1 = standardize(log(dstan_female$Prec)),
  cov2 = standardize(log(dstan_female$Temp)),
  cov3 = dstan_female$dist.main,
  outcome_var = as.integer(dstan_female$n.notes), # this is outcome
  Dmat_site = Dmat.site,
  Dmat_indiv = Dmat.indiv
)


# Create an intercept only model
n.notes.intercept.female <- ulam(
  alist(
    outcome_var ~ dpois( mu ),
    mu <- a,
    a ~ dnorm(0,0.5)
  ), data=dat_list_notes_female , chains=4 , iter=3000, log_lik=TRUE )


# Create a model that includes only the environmental predictors
n.notes.covs.female <- ulam(
  alist(
    outcome_var ~ dpois( mu ),
    mu <- exp(a +  bcov1*cov1+ bcov2*cov2 + bcov3*cov3),
    a ~ dnorm(0,0.5),
    c(bcov1,bcov2,bcov3) ~ normal(0,0.5) #
  ), data=dat_list_notes_female , chains=4 , iter=3000, cores=4, log_lik=TRUE )

# Create a model that includes Gaussian process and environmental predictors
n.notes.covs.gp.female.indiv <- ulam(
  alist(
    outcome_var ~ dpois( mu ),
    mu <-  exp((a +  bcov1*cov1+ bcov2*cov2 + bcov3*cov3) + k[individual]),
    # vector[dim_K]:k ~ multi_normal( 0 , SIGMA ),
    # matrix[dim_K,dim_K]:SIGMA <- cov_GPL2( Dmat_site , etasq , rhosq , extra_cov ),
    vector[dim_K]:k ~ multi_normal( 0 , SIGMA ),
    matrix[dim_K,dim_K]:SIGMA <- cov_GPL2( Dmat_indiv, etasq , rhosq , extra_cov ),
    a ~ dnorm(0,0.5),
    c(bcov1,bcov2,bcov3) ~ dnorm(0,1),
    # etasq ~ dexp( 2 ),
    # rhosq ~ dexp( 0.5 ),
    # extra_cov ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    rhosq ~ dexp( 0.5 ),
    extra_cov ~ dexp( 0.5 )
  ), data=dat_list_notes_female , chains=4 , iter=3000, cores=4 , log_lik=TRUE )

# Create a model that includes Gaussian process and environmental predictors
n.notes.gp.only.female.indiv <- ulam(
  alist(
    outcome_var ~ dpois( mu ),
    mu <-  exp((a) + k[individual]),
    # vector[dim_K]:k ~ multi_normal( 0 , SIGMA ),
    # matrix[dim_K,dim_K]:SIGMA <- cov_GPL2( Dmat_site , etasq , rhosq , extra_cov ),
    vector[dim_K]:k ~ multi_normal( 0 , SIGMA ),
    matrix[dim_K,dim_K]:SIGMA <- cov_GPL2( Dmat_indiv, etasq , rhosq , extra_cov ),
    a ~ dnorm(0,0.5),
    #c(bcov1,bcov2,bcov3) ~ dnorm(0,1),
    # etasq ~ dexp( 2 ),
    # rhosq ~ dexp( 0.5 ),
    # extra_cov ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    rhosq ~ dexp( 0.5 ),
    extra_cov ~ dexp( 0.5 )
  ), data=dat_list_notes_female , chains=4 , iter=3000, cores=4 , log_lik=TRUE )


# Create random effects model
n.notes.ranef.female <- ulam(
  alist(
    outcome_var ~ dpois( mu ),
    mu <- exp(a + j[individual] + k[site]),
    k[site] ~ dnorm(0,etasq),
    j[individual] ~ dnorm(0,etasq),
    a ~ dnorm(0,0.5),
    #c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    #sigmasq ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    betasq ~ dexp( 2 )
  ), data=dat_list_notes_female , chains=4 , iter=3000, cores=4 , log_lik=TRUE )


# Create random effects with covariates model
n.notes.ranef.cov.female <- ulam(
  alist(
    outcome_var ~ dpois( mu ),
    mu <- exp((a +  bcov1*cov1+ bcov2*cov2 + bcov3*cov3) + j[individual] + k[site]),
    k[site] ~ dnorm(0,etasq),
    j[individual] ~ dnorm(0,etasq),
    a ~ dnorm(0,0.5),
    c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    #sigmasq ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    betasq ~ dexp( 2 )
  ), data=dat_list_notes_female , chains=4 , iter=3000, cores=4 , log_lik=TRUE )


write.csv(compare(n.notes.intercept.female,n.notes.covs.female, n.notes.covs.gp.female.indiv,n.notes.gp.only.female.indiv,
        n.notes.ranef.female,n.notes.ranef.cov.female),"nnotes.csv")


precis(n.notes.ranef.cov.female)

post <- extract.samples(n.notes.covs.gp.female.individual)
# Calculate proportion of variance due to spatial
median(post$etasq/(post$etasq+ post$extra_cov))


# plot the posterior median Covariance function

# plot the posterior median Covariance function
plot( NULL , xlab="Distance ( km)" , ylab="Covariance" ,main="Female Number of Notes",
      xlim=c(0,50) , ylim=c(0,2))
# compute posterior median Covariance
x_seq <- seq( from=0 , to=50 , length.out=50 )
pmcov <- sapply( x_seq , function(x) post$etasq*exp(-post$rhosq*x^2) )
pmcov_mu <- apply( pmcov , 2 , median )
lines( x_seq , pmcov_mu , lwd=2 )
# plot 60 functions sampled from posterior
for ( i in 1:50 )
  curve( post$etasq[i]*exp(-post$rhosq[i]*x^2) , add=TRUE ,
         col=col.alpha("black",0.3) )
par(new=T)
curve( median(etasq.prior)*exp(-median(rhosq.prior)*x^2) , from=0 , to=250, col="red",
       lwd=2,xlim=c(0,50) , ylim=c(0,2), xlab="" , ylab="" )
DenaJGibbon/geographic_tarsiers documentation built on May 23, 2019, 12:50 a.m.