R/Gaussian_process_malenoterate.R

# Load libraries
library(rethinking)
library(plyr)
library(stringr)
library(sp)
library(lme4)
library(bbmle)
library(dplyr)

# Load data
dstan_male <- read.csv("/Users/denasmacbook/Geographic tarsiers/tarsier.male.data.csv")


# Create distance matrix based on GPS points of recording locations
geo_distance <- spDists(cbind(dstan_male$lon,dstan_male$lat),longlat = T)
geo_matrix <- as.matrix(geo_distance)
Dmat <- geo_matrix
colnames(Dmat) <- dstan_male$site
row.names(Dmat) <- dstan_male$site

# Create distance matrix based on GPS points of recording locations
site.sub <- dstan_male %>%
  group_by(site) %>%
  summarize(lat=mean(lat),lon=mean(lon))


geo_distance <- spDists(cbind(site.sub$lon,site.sub$lat),longlat = T)
geo_matrix <- as.matrix(geo_distance)
Dmat.site <- geo_matrix
colnames(Dmat.site) <- site.sub$site
row.names(Dmat.site) <- site.sub$site


# Create distance matrix based on GPS points of recording locations
indiv.sub <- dstan_male %>%
  group_by(group) %>%
  summarize(lat=mean(lat),lon=mean(lon))


geo_distance <- spDists(cbind(indiv.sub$lon,indiv.sub$lat),longlat = T)
geo_matrix <- as.matrix(geo_distance)
Dmat.indiv <- geo_matrix
colnames(Dmat.indiv) <- indiv.sub$group
row.names(Dmat.indiv) <- indiv.sub$group

# Part 1 A. Model for male number of notes
# Create datalist to pass to STAN
dat_list_noterate_male <- list(
  dim_J = length(unique(dstan_male$site)),
  dim_K = length(unique(dstan_male$group)),
  site = as.integer(as.factor(dstan_male$site)),
  individual=as.integer(as.factor(dstan_male$group)),
  cov1 = standardize(log(dstan_male$Prec)),
  cov2 = standardize(log(dstan_male$Temp)),
  cov3 = dstan_male$dist.main,
  outcome_var = standardize(log(dstan_male$note.rate.1)), # this is outcome
  Dmat_site = Dmat.site,
  Dmat_indiv = Dmat.indiv
)


# Create an intercept only model
noterate.intercept.male <- ulam(
  alist(
    outcome_var ~ normal( mu , sigma ),
    mu <- a,
    a ~ dnorm(0,0.5),
    sigma ~ exponential(1)
  ), data=dat_list_noterate_male , chains=4 , iter=3000, log_lik=TRUE )


# Create a model that includes only the environmental predictors
noterate.covs.male <- ulam(
  alist(
    outcome_var ~ normal( mu , sigma ),
    mu <- exp(a + bcov1*cov1 + bcov2*cov2+ bcov3*cov3),
    a ~ normal(0,1),
    c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    sigma ~ exponential(1)
  ), data=dat_list_noterate_male , chains=4 , iter=3000, cores=4, log_lik=TRUE )


# Create a model that includes Gaussian process and environmental predictors
noterate.covs.gp.male.indiv <- ulam(
  alist(
    outcome_var ~ normal( mu, sigmasq ),
    mu <-  exp((a +  bcov1*cov1+ bcov2*cov2 + bcov3*cov3) + k[individual]),
    vector[dim_K]:k ~ multi_normal( 0 , SIGMA ),
    matrix[dim_K,dim_K]:SIGMA <- cov_GPL2( Dmat_indiv , etasq , rhosq , extra_cov ),
    a ~ dnorm(0,0.5),
    c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    sigmasq ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    rhosq ~ dexp( 0.5 ),
    extra_cov ~ dexp( 0.5 )
  ), data=dat_list_noterate_male , chains=4 , iter=3000, cores=4 , log_lik=TRUE )

# Create a model that includes Gaussian process and environmental predictors
noterate.gp.only.male.indiv <- ulam(
  alist(
    outcome_var ~ normal( mu, sigmasq ),
    mu <-  exp((a) + k[individual]),
    vector[dim_K]:k ~ multi_normal( 0 , SIGMA ),
    matrix[dim_K,dim_K]:SIGMA <- cov_GPL2( Dmat_indiv , etasq , rhosq , extra_cov ),
    a ~ dnorm(0,0.5),
    #c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    sigmasq ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    rhosq ~ dexp( 0.5 ),
    extra_cov ~ dexp( 0.5 )
  ), data=dat_list_noterate_male , chains=4 , iter=3000, cores=4 , log_lik=TRUE )


# Create random effects model
noterate.ranef.male <- ulam(
  alist(
    outcome_var ~ normal( mu, sigmasq ),
    mu <-  exp(a + j[individual] +  k[site]),
    k[site] ~ dnorm(0,etasq),
    j[individual] ~ dnorm(0,betasq),
    a ~ dnorm(0,0.5),
    #c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    sigmasq ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    betasq ~ dexp( 2 )
  ), data=dat_list_noterate_male , chains=4 , iter=3000, cores=4 , log_lik=TRUE )


# Create random effects with covariates model
noterate.ranef.cov.male <- ulam(
  alist(
    outcome_var ~ normal( mu, sigmasq ),
    mu <-  exp((a +  bcov1*cov1+ bcov2*cov2 + bcov3*cov3) + j[individual] +  k[site]),
    k[site] ~ dnorm(0,etasq),
    j[individual] ~ dnorm(0,betasq),
    a ~ dnorm(0,0.5),
    c(bcov1,bcov2,bcov3) ~ normal(0,0.5),
    sigmasq ~ dexp( 0.5 ),
    etasq ~ dexp( 2 ),
    betasq ~ dexp( 2 )
  ), data=dat_list_noterate_male , chains=4 , iter=3000, cores=4 , log_lik=TRUE )


compare(noterate.intercept.male,noterate.covs.male, noterate.covs.gp.male.indiv,noterate.gp.only.male.indiv,
        noterate.ranef.male,noterate.ranef.cov.male)

precis(noterate.covs.gp.male.indiv)

post <- extract.samples(noterate.covs.gp.male.indiv)

# Calculate proportion of variance due to spatial
median(post$etasq/(post$etasq + post$sigmasq + post$extra_cov))

precis(noterate.covs.gp.male.indiv)
# plot the posterior median Covariance function

# plot the posterior median Covariance function
plot( NULL , xlab="Distance ( km)" , ylab="Covariance" ,main="Male Note Rate",
      xlim=c(0,50) , ylim=c(0,2))
# compute posterior median Covariance
x_seq <- seq( from=0 , to=50 , length.out=50 )
pmcov <- sapply( x_seq , function(x) post$etasq*exp(-post$rhosq*x^2) )
pmcov_mu <- apply( pmcov , 2 , median )
lines( x_seq , pmcov_mu , lwd=2 )
# plot 60 functions sampled from posterior
for ( i in 1:50 )
  curve( post$etasq[i]*exp(-post$rhosq[i]*x^2) , add=TRUE ,
         col=col.alpha("grey",0.3) )
par(new=T)
curve( median(etasq.prior)*exp(-median(rhosq.prior)*x^2) , from=0 , to=50, col="red",
       lwd=2,xlim=c(0,50) , ylim=c(0,2), xlab="" , ylab="" )
DenaJGibbon/geographic_tarsiers documentation built on May 23, 2019, 12:50 a.m.