knitr::opts_chunk$set(fig.width = 12, fig.height = 9) knitr::opts_knit$set(progress = TRUE, verbose = TRUE)
library("ChickpeaAscoDispersal") library("tidyverse") library("broom") library("ggpubr") library("mgcv") library("mgcViz") theme_set(theme_pubclean(base_size = 14))
Join the lesion_counts
data and the summary_weather
data to create dat
for creating GAMs.
dat <- left_join(lesion_counts, summary_weather, by = c("site", "rep"))
# run only if knitting on new computer/new R installation # !!!! Run this in a regular interactive console not this markdown file !!!! # this is necessary to embed fonts in .eps files for EJPP # run only if knitting on new computer # this is necessary to embed fonts in .eps files for EJPP library("extrafont") if (.Platform$OS.type == "windows") { font_import(pattern = "arial", prompt = FALSE) loadfonts(device = "postscript", quiet = TRUE) } else { font_import(pattern = "Arial", prompt = FALSE) loadfonts(device = "postscript", quiet = TRUE) }
For reproducibility purposes, use set.seed()
.
set.seed(27)
mod1 <- gam( m_lesions ~ s(distance, k = 5), data = dat ) summary(mod1) print(p_gam(x = getViz(mod1)) + ggtitle("s(Distance)"), pages = 1)
mod2 <- gam( m_lesions ~ sum_rain+ s(distance, k = 5), data = dat ) summary(mod2) print(p_gam(x = getViz(mod2)) + ggtitle("s(Distance) + Precipitation"), pages = 1)
mod3 <- gam(m_lesions ~ mws + s(distance, k = 5), data = dat) summary(mod3) print(p_gam(x = getViz(mod3)) + ggtitle("s(Distance) + Wind speed"), pages = 1)
mod4 <- gam(m_lesions ~ sum_rain + mws + s(distance, k = 5), data = dat) summary(mod4) print(p_gam(x = getViz(mod4)) + ggtitle("s(Distance) + Wind speed + Precipitation"), pages = 1)
mod5 <- gam( m_lesions ~ sum_rain + s(distance + mws, k = 5), data = dat ) summary(mod5) print(p_gam(x = getViz(mod5)) + ggtitle("s(Distance + Wind Speed) + Precipitation"), pages = 1)
mod6 <- gam( m_lesions ~ sum_rain + s(distance, k = 5) + s(mws, k = 5), data = dat ) summary(mod6) print(p_gam(x = getViz(mod6)) + ggtitle("s(Distance) + s(Wind Speed) + Precipitation"), pages = 1)
mod7 <- gam( m_lesions ~ s(distance, k = 5) + s(mws, k = 5), data = dat ) summary(mod7) print(p_gam(x = getViz(mod7)) + ggtitle("s(Distance) + s(Wind Speed)"), pages = 1)
mod8 <- gam( m_lesions ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, k = 5), data = dat ) summary(mod8) print(p_gam(x = getViz(mod8)) + ggtitle("s(Distance) + s(Wind Speed) + s(Precipitation)"), pages = 1)
mod9 <- gam( m_lesions ~ s(distance, k = 5) + s(sum_rain, k = 5), data = dat ) summary(mod9) print(p_gam(x = getViz(mod9)) + ggtitle("s(Distance) + s(Precipitation)"), pages = 1)
mod10 <- gam( m_lesions ~ s(distance, k = 5) + s(sum_rain, k = 5) + mws, data = dat ) summary(mod10) print(p_gam(x = getViz(mod10)) + ggtitle("s(Distance) + s(Precipitation) + Wind speed"), pages = 1)
This is the same as mod8
but using family = tw()
, see ?family.mgcv
for more on the families.
The Tweedie distribution is used where the distribution has a positive mass at zero, but is continuous unlike the Poisson distribution that requires count data.
The data visualisation shows clearly that the mean pot count data have this shape.
mod11 <- gam( m_lesions ~ s(distance, k = 5) + s(mws, k = 5) + s(sum_rain, k = 5), data = dat, family = tw() ) summary(mod11) print(p_gam(x = getViz(mod11)) + ggtitle("s(Distance) + s(Wind Speed) + s(Precipitation), family = tw()"), pages = 1)
Try using wind speed as a linear predictor only.
mod12 <- gam( m_lesions ~ s(distance, k = 5, bs = "ts") + s(mws, k = 5, bs = "ts") + s(sum_rain, k = 5, bs = "ts"), data = dat, family = tw() ) summary(mod12) print( p_gam(x = getViz(mod12)) + ggtitle( "s(Distance, bs = 'ts') + s(Wind speed, bs = 'ts')\n+ s(Precipitation, bs = 'ts'), family = tw()" ), pages = 1 )
mod13 <- gam( m_lesions ~ s(distance, k = 5, bs = "ts") + s(mws, k = 5, bs = "ts") + s(sum_rain, k = 5, bs = "ts"), data = dat, family = tw() ) summary(mod13) print( p_gam(x = getViz(mod13)) + ggtitle( "s(Distance, bs = 'ts') + s(Wind speed, bs = 'ts')\n+ s(Precipitation, bs = 'ts'), family = tw()" ), pages = 1 )
This model, same structure as mod11
, uses thin-plate splines to shrink the coefficients of the smooth to zero when possible.
models <- list(mod1 = mod1, mod2 = mod2, mod3 = mod3, mod4 = mod4, mod5 = mod5, mod6 = mod6, mod7 = mod7, mod8 = mod8, mod9 = mod9, mod10 = mod10, mod11 = mod11, mod12 = mod12, mod13 = mod13 ) map_df(models, glance, .id = "model") %>% arrange(AIC)
enframe(c( mod1 = summary(mod1)$r.sq, mod2 = summary(mod2)$r.sq, mod3 = summary(mod3)$r.sq, mod4 = summary(mod4)$r.sq, mod5 = summary(mod5)$r.sq, mod6 = summary(mod6)$r.sq, mod7 = summary(mod7)$r.sq, mod8 = summary(mod8)$r.sq, mod9 = summary(mod9)$r.sq, mod10 = summary(mod10)$r.sq, mod11 = summary(mod11)$r.sq, mod12 = summary(mod12)$r.sq, mod13 = summary(mod13)$r.sq )) %>% arrange(desc(value))
NOTE The original work had an
anova.gam()
to compare the models. Somehow, somewhere it seems that changes have been made and current versions of the R packages don't allow an ANOVA comparison for GAMs that usefamily = tw()
, so this portion has been removed from this vignette. -ahs 2024-04-28
mod11_vis <- getViz(mod11) check(mod11_vis, a.qq = list(method = "tnorm", a.cipoly = list(fill = "light blue")), a.respoi = list(size = 0.5), a.hist = list(bins = 10))
# generate a new plot.gam object just for the publication (no main title) p11 <- p_gam(x = getViz(mod11)) # save png and eps files png( file = here::here("man/figures", "Fig1.png"), width = 640, height = 640, units = "px", pointsize = 14 ) print(p11, pages = 1) dev.off() postscript(file = here::here("man/figures", "Fig1.eps"), family = "Arial") par(mar = c(5, 3, 2, 2) + 0.1) print(p11, pages = 1) dev.off() embed_fonts( file = here::here("man/figures", "Fig1.eps"), outfile = here::here("man/figures", "Fig1.eps"), options = "-dEPSCrop" )
This model, mod11, m_lesions ~ s(Distance) + s(WindSpeed) + s(Precipitation) - family = tw()
, is the best performing model.
It cannot be used for predictions, but suitably describes the dispersal data we have on hand with the parameters used.
More data would be desirable to increase the value of k
as evidenced in the GAM checks.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.