R/06_vital-rates-GAM.R

######################################################################
#
# GRM -- vital rate regressions
#
#  Rob Smith, phytomosaic@gmail.com, 10 Apr 2020
#
##  CC-BY-SA 4.0 License (Creative Commons Attribution-ShareAlike 4.0)

### TODO: use GAMs not linear models (random effect = plot = unique popn)
### TODO: use mixed models (random effect = plot = unique popn)
### TODO: permit predictor interactions (x1 * x2 * x3 * ...)

### preamble
rm(list=ls())
require(ecole)
require(data.table)
require(mgcv)

### load data
getwd()
load('./data/pnw_tree.rda', verbose=T)
d <- pnw_tree  ;  rm(pnw_tree)
d$nffd <- rowSums(cbind(d$nffd_wt, d$nffd_sp, d$nffd_sm, d$nffd_at))
d$dia_y1  <- log10(1 + d$dia_y1)  # log10-transform
d$dia_y2  <- log10(1 + d$dia_y2)  # log10-transform
d$annincr <- d$dia_y2 - d$dia_y1  # annual increment
# d$rgr     <- d$annincr / d$dia_y1 # relative growth rate
j     <- grep('ppt', names(d))
d$map <- log10(1+rowSums(d[,..j], na.rm=T)) # mean ann precip
j     <- grep('tmax', names(d))
d$mat <- rowSums(d[,..j], na.rm=T) # SUM of mean seasonal temp
j     <- grep('cmd', names(d))
d$cmd <- rowSums(d[,..j], na.rm=T) # SUM of climatic moisture deficit
d$td  <- d[, tave_sm - tave_wt]    # continentality
# mean ann temp
# plot(d$annincr, d$rgr, cex=0.4, col='#00000030')

i <- sample(1:NROW(d), size=9999)
pairs(d[i,.(mat,td,cmd,map)], cex=0.3, upper.panel=NULL)


### vital rate regressions per species
`vital_rate_gam` <- function(d_i,
                             # covar=c('pc1','pc2'),
                             covar=c('mat','td','cmd','map'),
                             multiplicative=FALSE,
                             out_mult=1.5, ...) {

    # d_i <- d[d$spp %in% c('tsuga_heterophylla'),]

    t_start <- Sys.time()
    ### TODO: variable selection (GAM or NPMR)
    ### TODO: flexible number of covariates


    # if (length(covar) != 2)
    #     stop('requires exactly 2 covariates')


    # setup formulas
    if (multiplicative) {
        f <- paste0('te(',paste0(covar, sep='', collapse=','),')')
    } else {
        f <- paste0('s(',covar,', k=5)', sep='', collapse=' + ')
    }
    ### include starting size AND climate
    # f_s <- as.formula(paste('surv ~ s(dia_y1) +', f))
    # f_g <- as.formula(paste('dia_y2 ~ s(dia_y1) +', f))

    ### use only climate PCs (does not depend on starting size)
    f_s <- as.formula(paste('surv ~ ', f))
    f_g <- as.formula(paste('annincr ~ ', f))  # or annincr

    # coerce data.table to data.frame
    data.table::setDF(d_i)
    # outlier handling (for growth model only)
    if (is.numeric(out_mult)) {
        `outl` <- function (x, mult, ...) {
            qu  <- quantile(x, na.rm=T)
            iqr <-  IQR(x, na.rm=T)
            hi  <- as.numeric(iqr * mult + qu[4])
            which(x > hi)
        }
        i <- outl(d_i$dia_y2 - d_i$dia_y1, mult=out_mult)
        if (length(i) != 0) {
            cat(length(i),' outliers excluded from growth model (',
                round(length(i)/NROW(d_i)*100,3),
                '%)\n', sep='')
            d_i[i, 'annincr'] <- NA
        }
    }

    # survival regression - LOGIT link
    reg_surv <- mgcv::gam(f_s, data=d_i,  select=T,
                          family=binomial(link='logit'),
                          na.action='na.exclude', method = 'REML')
    # summary(reg_surv)
    # set_par(4) ; gam.check(reg_surv)
    # plot(reg_surv, pages=1)

    # growth regression - LINEAR link
    reg_growth <- mgcv::gam(f_g, data=d_i, select=T,
                            na.action='na.exclude', method = 'REML')
    # summary(reg_growth)
    # set_par(4) ; gam.check(reg_growth)
    # plot(reg_growth, pages=1)

    # diameter distribution of new recruits
    recruitsize <- data.frame(mean=NA, sd=NA)
    recruitsize$mean <- mean(d_i$dia_y2[!is.na(d_i$dia_y1)],
                             na.rm=T)
    recruitsize$sd   <-   sd(d_i$dia_y2[!is.na(d_i$dia_y1)],
                             na.rm=T)
    t_end <- Sys.time()
    cat('...elapsed', t_end - t_start,'\n')
    # output
    return(list(which_sp    = unique(d_i$spp),
                d_i         = d_i, ### only for visreg
                recruitsize = recruitsize,
                models = list(reg_surv   = reg_surv,
                              reg_growth = reg_growth),
                fits = data.frame(tre_cn     = d_i$tre_cn,
                                  fit_surv   = fitted(reg_surv),
                                  fit_growth = fitted(reg_growth)),
                preds = data.frame(tre_cn     = d_i$tre_cn,
                                   fit_surv   = predict(reg_surv, type='response'),
                                   fit_growth = predict(reg_growth, type='response'))
                ))
}
###################################################################
### vital rates for MULTIPLE species, from most to least frequent
nm <- names(rev(sort(table(d$spp))))[1:20]
nm <- nm[nm!='quercus_douglasii']
vr <- lapply(nm, function(x) {   # ! ! ! TIMEWARN ! ! !
    cat(x,'\n')
    keep <- d$spp %in% x
    vital_rate_gam(d_i = d[keep,], covar = c('pc1','pc2'))
})
###################################################################

### model fits
`get_rsq` <- function(x) {
    data.frame(s_rsq = summary(x$models$reg_surv)$r.sq,
               g_rsq = summary(x$models$reg_growth)$r.sq)
}
rsq <- t(sapply(vr, get_rsq))
dimnames(rsq)[[1]] <- sapply(vr, function(x) x$which_sp)
rsq

### mean of *predicted* values per SPECIES per PLOT
prs <- lapply(vr, function(x) { data.table::data.table(x$preds) })
prs <- do.call(rbind, prs)
d <- cbind(d, prs[match(d$tre_cn, prs$tre_cn)])  ;  rm(prs)
p <- d[, list(plot_surv   = mean(fit_surv, na.rm=T),
              plot_growth = mean(fit_growth, na.rm=T),
              obs_surv    = mean(surv, na.rm=T),
              obs_growth  = mean(annincr, na.rm=T),
              startdiam   = mean(dia_y1, na.rm=T)),
       by=list(plt_cn, spp)] ### <-- <-- <-- <--
j <- !names(d) %in% c('tre_cn','tre_cn.1','dist','x','y',
                      'is_seed','is_edge','dia_y1','dia_y2',
                      'surv','fit_surv','fit_growth')
p <- cbind(p, d[match(p$plt_cn, d$plt_cn), ..j]) ; rm(j) # plot match
is.na(p) <- is.na(p)
p

# ### mean of *fitted* values per SPECIES per PLOT
# fits <- lapply(vr, function(x) { data.table::data.table(x$fits) })
# fits <- do.call(rbind, fits)
# d <- cbind(d, fits[match(d$tre_cn, fits$tre_cn)])
# rm(p)
# p <- d[, list(plot_surv   = mean(fit_surv, na.rm=T),
#               plot_growth = mean(fit_growth, na.rm=T),
#               obs_surv    = mean(surv, na.rm=T),
#               obs_growth  = mean(annincr, na.rm=T),
#               startdiam   = mean(dia_y1, na.rm=T)),
#        by=list(plt_cn, spp)] ### <-- <-- <-- <--
# j <- !names(d) %in% c('tre_cn','tre_cn.1','dist','x','y',
#                       'is_seed','is_edge','dia_y1','dia_y2',
#                       'surv','fit_surv','fit_growth')
# p <- cbind(p, d[match(p$plt_cn, d$plt_cn), ..j]) ; rm(j) # plot match
# is.na(p) <- is.na(p)
# p

# png('./fig/fig_02_gamdiagnostics.png',9,3,'in',bg='transparent',res=1000)
set_par(4)
plo(p$obs_surv, p$plot_surv, cex=0.5, ylab='Fitted survival', xlab='Obsvd survival') ; abline(0,1)
plo(p$obs_growth, p$plot_growth, cex=0.5, ylab='Fitted growth', xlab='Obsvd growth') ; abline(0,1)
plot(p$obs_surv, p$plot_surv, pch=16, cex=0.5, col=colvec(as.factor(d$spp)),
    ylab='Fitted survival', xlab='Obsvd survival') ; abline(0,1)
plot(p$obs_growth, p$plot_growth, pch=16, cex=0.5, col=colvec(as.factor(d$spp)),
    ylab='Fitted growth', xlab='Obsvd growth') ; abline(0,1)
# dev.off()


# png('./fig/fig_03_gam-vs-climate.png',9,3,'in',bg='transparent',res=1000)
set_par(12)
plo(p$pc1, p$obs_surv, cex=0.5, xlab='PC1', ylab='Obsvd survival')
plo(p$pc1, p$plot_surv, cex=0.5, xlab='PC1', ylab='Fitted survival')
plo(p$pc1, p$obs_growth, cex=0.5, xlab='PC1', ylab='Obsvd growth')
plo(p$pc1, p$plot_growth, cex=0.5, xlab='PC1', ylab='Fitted growth')
# set_par(4)
plo(p$pc2, p$obs_surv, cex=0.5, xlab='PC2', ylab='Obsvd survival')
plo(p$pc2, p$plot_surv, cex=0.5, xlab='PC2', ylab='Fitted survival')
plo(p$pc2, p$obs_growth, cex=0.5, xlab='PC2', ylab='Obsvd growth')
plo(p$pc2, p$plot_growth, cex=0.5, xlab='PC2', ylab='Fitted growth')
# set_par(4)
plo(p$startdiam, p$obs_surv, cex=0.5, xlab='Diam Y1', ylab='Obsvd survival')
plo(p$startdiam, p$plot_surv, cex=0.5, xlab='Diam Y1', ylab='Fitted survival')
plo(p$startdiam, p$obs_growth, cex=0.5, xlab='Diam Y1', ylab='Obsvd growth')
plo(p$startdiam, p$plot_growth, cex=0.5, xlab='Diam Y1', ylab='Fitted growth')
# dev.off()












### smooth plots
`get_smooth` <- function(vr, dd,
                         pick=c('reg_surv',
                                'reg_growth'),
                         xvar='dia_y1') {
    visreg::visreg(vr$models[[pick]], xvar=xvar,
                   type='conditional', scale='response',
                   data=setDF(dd[spp %in% vr$which_sp,]),
                   plot=F)$fit[,c(xvar,'visregFit')]
    #     mod <- vr$models[[pick]]
    #     dat <- setDF(dd[spp %in% vr$which_sp,])
    #     visreg::visreg(mod, xvar=xvar,
    #                    type='conditional', scale='response',
    #                    data=dat,plot=F)$fit[,c(xvar,'visregFit')]
}
n <- 10
# n <- length(vr)
cc <- viridis::viridis(n)
sm_surv <- lapply(vr[c(1:n)], function(i) get_smooth(i,d,'reg_surv'))
sm_growth <- lapply(vr[c(1:n)], function(i) {
    cat(i$which_sp,'\n')
    get_smooth(i,d,'reg_growth')
})

par(mar=c(4,4,0.3,0.3), oma=c(0,0,0,0), mgp=c(2,0.5,0), tcl=-0.2,
    las=1, bty='l')
plot(sm_surv[[1]], type='l', xlim=c(0,130), ylim=c(0,1),
     col=cc[1], ylab='Survival', xlab='Dia Y1', lwd=2)
lapply(2:n, function(i) lines(sm_surv[[i]], col=cc[i], lwd=2))
legend('topright', legend=sapply(vr[c(1:n)], function(x) x$which_sp),
       fill=cc, bty='n', cex=0.7, pt.cex=0.9)


### compare PEAK of GAM along climate gradient among species
`h` <- function(x, ...) {
    hist(x, bre=22, col='grey', xaxs='i', yaxs='i', las=1, main='', ...)
}
set_par(12)
sapply(names(param), function(x) h(param[,x], xlab=x))




















### plot recruit sizes, by species
`plot_se` <- function(xval, yval, se, x_order=TRUE, ylim, ...) {
    o <- if (isTRUE(x_order)) order(yval) else 1:length(yval)
    if (is.character(xval)) {
        nm     <- xval[o]
        yval   <- yval[o]
        se     <- se[o]
        xval   <- 1:length(xval)
    }
    if(missing(ylim)) {
        rng  <- range(c(yval-se, yval+se), na.rm=T)
        buf  <- diff(rng) * 0.1
        ylim <- c(rng[1]-buf, rng[2]+buf)
    }
    par(mar=c(8,2.5,0.5,0.5), las=2, bty='L', yaxs='i', tcl=-0.2,
        mgp=c(1.5,0.4,0))#, pty='s')
    plot(xval, yval, pch=19, cex.axis=0.7, cex.lab=0.9, xaxt='n',
         ylim=ylim, xlab='', ...)
    axis(1, at=xval, labels=nm, cex.axis=0.7)
    arrows(as.numeric(xval), yval-se, as.numeric(xval), yval+se,
           len=0.02, ang=90, code=3)
}
recruitsize <- do.call(rbind, lapply(vr, function(x) {x$recruitsize}))
dimnames(recruitsize)[[1]] <- nm
recruitsize
set_par(1)
plot_se(xval = rownames(recruitsize), yval = recruitsize$mean,
        se   = recruitsize$sd, ylab = 'Recruit size (DBH)')

####    END    ####
phytomosaic/pnw documentation built on April 16, 2020, 7:29 p.m.