knitr::opts_chunk$set(fig.width = 12, fig.height = 9)
knitr::opts_knit$set(progress = TRUE, verbose = TRUE)

Import Data

library("ChickpeaAscoDispersal")
library("tidyverse")
library("broom")
library("ggpubr")
library("mgcv")
library("mgcViz")

theme_set(theme_pubclean(base_size = 14))

Create Data Set for GAMs

Join the lesion_counts data and the summary_weather data to create dat for creating GAMs.

dat <-
   left_join(lesion_counts, summary_weather, by = c("site", "rep"))
# run only if knitting on new computer/new R installation
# !!!! Run this in a regular interactive console not this markdown file !!!!
# this is necessary to embed fonts in .eps files for EJPP
# run only if knitting on new computer
# this is necessary to embed fonts in .eps files for EJPP
library("extrafont")

if (.Platform$OS.type == "windows") {
   font_import(pattern = "arial", prompt = FALSE)
   loadfonts(device = "postscript", quiet = TRUE)
} else {
   font_import(pattern = "Arial", prompt = FALSE)
   loadfonts(device = "postscript", quiet = TRUE)
}

Fit GAMs

For reproducibility purposes, use set.seed().

set.seed(27)

mod1 - s(Distance)

mod1 <-
   gam(
      m_lesions ~ s(distance, k = 5),
      data = dat
   )

summary(mod1)

print(p_gam(x = getViz(mod1)) +
         ggtitle("s(Distance)"),
      pages = 1)

mod2 - s(Distance) + Precipitation

mod2 <-
   gam(
      m_lesions ~ sum_rain+ s(distance, k = 5),
      data = dat
   )

summary(mod2)

print(p_gam(x = getViz(mod2)) +
         ggtitle("s(Distance) + Precipitation"),
      pages = 1)

mod3 - s(Distance) + Wind speed

mod3 <-
   gam(m_lesions ~ mws + s(distance, k = 5),
       data = dat)

summary(mod3)

print(p_gam(x = getViz(mod3)) +
         ggtitle("s(Distance) + Wind speed"),
      pages = 1)

mod4 - s(Distance) + Wind speed + Precipitation

mod4 <-
   gam(m_lesions ~ sum_rain + mws + s(distance, k = 5),
       data = dat)

summary(mod4)

print(p_gam(x = getViz(mod4)) +
         ggtitle("s(Distance) + Wind speed + Precipitation"),
      pages = 1)

mod5 - s(Distance + Wind Speed) + Precipitation

mod5 <-
   gam(
      m_lesions ~ sum_rain + s(distance + mws, k = 5),
      data = dat
   )

summary(mod5)


print(p_gam(x = getViz(mod5)) +
         ggtitle("s(Distance + Wind Speed) + Precipitation"),
      pages = 1)

mod6 - s(Distance) + s(Wind Speed) + Precipitation

mod6 <-
   gam(
      m_lesions ~ sum_rain + s(distance, k = 5) + s(mws, k = 5),
      data = dat
   )

summary(mod6)


print(p_gam(x = getViz(mod6)) +
         ggtitle("s(Distance) + s(Wind Speed) + Precipitation"),
      pages = 1)

mod7 - s(Distance) + s(Wind Speed)

mod7 <-
   gam(
      m_lesions ~ s(distance, k = 5) + s(mws, k = 5),
      data = dat
   )

summary(mod7)

print(p_gam(x = getViz(mod7)) +
         ggtitle("s(Distance) + s(Wind Speed)"),
      pages = 1)

mod8 - s(Distance) + s(Wind Speed) + s(Precipitation)

mod8 <-
   gam(
      m_lesions ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, k = 5),
      data = dat
   )

summary(mod8)


print(p_gam(x = getViz(mod8)) +
         ggtitle("s(Distance) + s(Wind Speed) + s(Precipitation)"),
      pages = 1)

mod9 - s(Distance) + s(Precipitation)

mod9 <-
   gam(
      m_lesions ~ s(distance, k = 5) + s(sum_rain, k = 5),
      data = dat
   )

summary(mod9)

print(p_gam(x = getViz(mod9)) +
         ggtitle("s(Distance) + s(Precipitation)"),
      pages = 1)

mod10 - s(Distance) +s(Precipitation) + Wind speed

mod10 <-
   gam(
      m_lesions ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws,
      data = dat
   )

summary(mod10)

print(p_gam(x = getViz(mod10)) +
         ggtitle("s(Distance) + s(Precipitation) + Wind speed"),
      pages = 1)

mod11 - s(Distance) + s(Wind Speed) + s(Precipitation), family = tw()

This is the same as mod8 but using family = tw(), see ?family.mgcv for more on the families. The Tweedie distribution is used where the distribution has a positive mass at zero, but is continuous unlike the Poisson distribution that requires count data. The data visualisation shows clearly that the mean pot count data have this shape.

mod11 <-
   gam(
      m_lesions ~ s(distance, k = 5) + 
         s(mws, k = 5) + 
         s(sum_rain, k = 5),
      data = dat,
      family = tw()
   )

summary(mod11)

print(p_gam(x = getViz(mod11)) +
   ggtitle("s(Distance) + s(Wind Speed) + s(Precipitation), family = tw()"),
   pages = 1)

mod12 - s(Distance, bs = "ts") + s(Precipitation, bs = "ts") Wind speed, family = tw()

Try using wind speed as a linear predictor only.

mod12 <-
   gam(
      m_lesions ~ s(distance, k = 5, bs = "ts") + 
         s(mws, k = 5, bs = "ts") + 
         s(sum_rain, k = 5, bs = "ts"),
      data = dat,
      family = tw()
   )

summary(mod12)

print(
   p_gam(x = getViz(mod12)) +
      ggtitle(
         "s(Distance, bs = 'ts') + s(Wind speed, bs = 'ts')\n+ s(Precipitation, bs = 'ts'), family = tw()"
      ),
   pages = 1
)

mod13 - s(Distance, bs = "ts") + s(Wind speed, bs = "ts") + s(Precipitation, bs = "ts"), family = tw()

mod13 <-
   gam(
      m_lesions ~ s(distance, k = 5, bs = "ts") + 
         s(mws, k = 5, bs = "ts") + 
         s(sum_rain, k = 5, bs = "ts"),
      data = dat,
      family = tw()
   )

summary(mod13)

print(
   p_gam(x = getViz(mod13)) +
      ggtitle(
         "s(Distance, bs = 'ts') + s(Wind speed, bs = 'ts')\n+ s(Precipitation, bs = 'ts'), family = tw()"
      ),
   pages = 1
)

This model, same structure as mod11, uses thin-plate splines to shrink the coefficients of the smooth to zero when possible.

Compare the Models

AIC, BIC

models <- list(mod1 = mod1,
               mod2 = mod2,
               mod3 = mod3,
               mod4 = mod4,
               mod5 = mod5,
               mod6 = mod6,
               mod7 = mod7,
               mod8 = mod8,
               mod9 = mod9,
               mod10 = mod10,
               mod11 = mod11,
               mod12 = mod12,
               mod13 = mod13
               )
map_df(models, glance, .id = "model") %>%
   arrange(AIC)

R^2^

enframe(c(
   mod1 = summary(mod1)$r.sq,
   mod2 = summary(mod2)$r.sq,
   mod3 = summary(mod3)$r.sq,
   mod4 = summary(mod4)$r.sq,
   mod5 = summary(mod5)$r.sq,
   mod6 = summary(mod6)$r.sq,
   mod7 = summary(mod7)$r.sq,
   mod8 = summary(mod8)$r.sq,
   mod9 = summary(mod9)$r.sq,
   mod10 = summary(mod10)$r.sq,
   mod11 = summary(mod11)$r.sq,
   mod12 = summary(mod12)$r.sq,
   mod13 = summary(mod13)$r.sq
)) %>%
   arrange(desc(value))

NOTE The original work had an anova.gam() to compare the models. Somehow, somewhere it seems that changes have been made and current versions of the R packages don't allow an ANOVA comparison for GAMs that use family = tw(), so this portion has been removed from this vignette. -ahs 2024-04-28

Check Best Model Fit

mod11 - s(Distance) + s(Wind Speed) + s(Precipitation), family = tw()

mod11_vis <- getViz(mod11)
check(mod11_vis,
      a.qq = list(method = "tnorm", 
                  a.cipoly = list(fill = "light blue")), 
      a.respoi = list(size = 0.5), 
      a.hist = list(bins = 10))
# generate a new plot.gam object just for the publication (no main title)
p11 <- p_gam(x = getViz(mod11))

# save png and eps files
png(
   file = here::here("man/figures", "Fig1.png"),
   width = 640,
   height = 640,
   units = "px",
   pointsize = 14
)
print(p11, pages = 1)
dev.off()

postscript(file = here::here("man/figures", "Fig1.eps"),
           family = "Arial")
par(mar = c(5, 3, 2, 2) + 0.1)
print(p11, pages = 1)
dev.off()

embed_fonts(
   file = here::here("man/figures", "Fig1.eps"),
   outfile = here::here("man/figures", "Fig1.eps"),
   options = "-dEPSCrop"
)

Thoughts

This model, mod11, m_lesions ~ s(Distance) + s(WindSpeed) + s(Precipitation) - family = tw(), is the best performing model. It cannot be used for predictions, but suitably describes the dispersal data we have on hand with the parameters used. More data would be desirable to increase the value of k as evidenced in the GAM checks.



adamhsparks/ChickpeaAscoDispersal documentation built on April 29, 2024, 12:32 p.m.