knitr::opts_chunk$set(echo = TRUE) library(ggplot2) library(dplyr)
I am trying to develop a way of summarizing a timeseries (or any dataset, I guess) to extract the slope of a linear fit, and the fuzz around the slope, without leaning heavily on p-values.
ibd <- readRDS("C:\\Users\\diaz.renata\\Documents\\GitHub\\BBSsize\\analysis\\isd_data\\ibd_isd_bbs_rtrg_304_17.Rds") sv <- ibd %>% group_by(year) %>% summarize(richness = length(unique(id)), abundance = dplyr::n(), biomass = sum(ind_size), energy = sum(ind_b)) %>% ungroup() %>% mutate(mean_energy = energy / abundance, mean_mass = biomass/abundance) sv_long <- sv %>% tidyr::pivot_longer(-year, names_to = "currency", values_to = "value") ggplot(sv_long, aes(x = year, y = value, color = currency)) + geom_line() + theme_bw() + facet_wrap(vars(currency), scales = "free_y")
So let's focus here on the abundance and energy time series, and maybe create a third TS that we consider more stable:
set.seed(1977) sv$stable <- rnorm(nrow(sv), mean = mean(sv$abundance), sd = sd(sv$abundance) / 10) sv_long <- sv %>% select(year, abundance, energy, stable) %>% tidyr::pivot_longer(-year, names_to = "currency", values_to = "value") ggplot(sv, aes(year, abundance)) + geom_line()+ ylim(0, max(sv$abundance)) + theme_bw() ggplot(sv, aes(year, energy)) + geom_line()+ ylim(0, max(sv$energy)) + theme_bw() ggplot(sv, aes(year, stable)) + geom_line()+ ylim(0, max(sv$stable)) + theme_bw()
We can fit a linear model and pull out the R2, but I don't think that's quite going to work...
n_lm <- lm((abundance) ~ year, sv) summary(n_lm) e_lm <- lm((energy) ~ year, sv) summary(e_lm) s_lm <- lm((stable) ~ year, sv) summary(s_lm)
For Hartland:
I want something to tell me that stable is more tightly around a 0-slope line than is energy.
sv_residuals <- data.frame( year = sv$year, abundance = resid(n_lm), energy = resid(e_lm), stable = resid(s_lm) ) %>% tidyr::pivot_longer(cols = c(abundance, energy, stable), names_to = "currency", values_to = "residual") sv_ests <- data.frame( year = sv$year, abundance = predict(n_lm), energy = predict(e_lm), stable = predict(s_lm) )%>% tidyr::pivot_longer(cols = c(abundance, energy, stable), names_to = "currency", values_to = "estimate") sv_slopes <- data.frame( abundance = n_lm$coefficients["year"], energy = e_lm$coefficients["year"], stable = s_lm$coefficients["year"] )%>% tidyr::pivot_longer(cols = c(abundance, energy, stable), names_to = "currency", values_to = "slope") ggplot(sv_residuals, aes(year, residual, color =currency)) + geom_line() sv_err <- left_join(sv_residuals, sv_ests) %>% left_join(sv_slopes) %>% mutate(abs_residual = abs(residual)) %>% mutate(abs_resid_est = abs_residual / estimate) ggplot(sv_err, aes(year, abs_resid_est, color =currency)) + geom_line() sv_err %>% group_by(currency) %>% summarize(mean_abs_resid_est = mean(abs_resid_est), mean_est = mean(estimate), slope = mean(slope)) %>% mutate(scaled_slope = slope / mean_est) ggplot(sv_err, aes(year, estimate, color = currency)) + geom_line() ggplot(filter(sv_err, currency != "energy"), aes(year, estimate, color = currency)) + geom_line()
So what I have done here is:
This gives a measure of how large the residuals are relative to the estimates. Scale() and center() break this kind of approach...
Then I got the slopes from the lms(), but because the slopes depend on the magnitude of the variables, I rescaled them to slope / mean(estimated value).
The mean_abs_resid_est and scaled_slope values align with what I generally want them to, but it's all extremely rough. I may have reinvented some wheels.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.