knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)

I am trying to develop a way of summarizing a timeseries (or any dataset, I guess) to extract the slope of a linear fit, and the fuzz around the slope, without leaning heavily on p-values.

ibd <- readRDS("C:\\Users\\diaz.renata\\Documents\\GitHub\\BBSsize\\analysis\\isd_data\\ibd_isd_bbs_rtrg_304_17.Rds")


sv <- ibd %>%
  group_by(year) %>%
  summarize(richness = length(unique(id)),
            abundance = dplyr::n(),
            biomass = sum(ind_size),
            energy = sum(ind_b)) %>%
  ungroup() %>%
  mutate(mean_energy = energy / abundance,
         mean_mass = biomass/abundance)


sv_long <- sv %>%
  tidyr::pivot_longer(-year, names_to = "currency", values_to = "value")

ggplot(sv_long, aes(x = year, y = value, color = currency)) +
  geom_line() +
  theme_bw() +
  facet_wrap(vars(currency), scales = "free_y")

So let's focus here on the abundance and energy time series, and maybe create a third TS that we consider more stable:

set.seed(1977)
sv$stable <- rnorm(nrow(sv), mean = mean(sv$abundance), sd = sd(sv$abundance) / 10)


sv_long <- sv %>%
  select(year, abundance, energy, stable) %>%
  tidyr::pivot_longer(-year, names_to = "currency", values_to = "value")

ggplot(sv, aes(year, abundance)) + 
  geom_line()+
  ylim(0, max(sv$abundance)) +
  theme_bw()


ggplot(sv, aes(year, energy)) + 
  geom_line()+
  ylim(0, max(sv$energy)) +
  theme_bw()


ggplot(sv, aes(year, stable)) + 
  geom_line()+
  ylim(0, max(sv$stable)) +
  theme_bw()

We can fit a linear model and pull out the R2, but I don't think that's quite going to work...

n_lm <- lm((abundance) ~ year, sv)

summary(n_lm)


e_lm <- lm((energy) ~ year, sv)

summary(e_lm)


s_lm <- lm((stable) ~ year, sv)

summary(s_lm)

For Hartland:

I want something to tell me that stable is more tightly around a 0-slope line than is energy.

sv_residuals <- data.frame(
  year = sv$year,
  abundance = resid(n_lm),
  energy = resid(e_lm),
  stable = resid(s_lm)
) %>%
  tidyr::pivot_longer(cols = c(abundance, energy, stable), names_to = "currency", values_to = "residual")

sv_ests <- data.frame(
  year = sv$year,
  abundance = predict(n_lm),
  energy = predict(e_lm),
  stable = predict(s_lm)
)%>%
  tidyr::pivot_longer(cols = c(abundance, energy, stable), names_to = "currency", values_to = "estimate")

sv_slopes <- data.frame(
  abundance = n_lm$coefficients["year"],
  energy = e_lm$coefficients["year"],
  stable = s_lm$coefficients["year"]
)%>%
  tidyr::pivot_longer(cols = c(abundance, energy, stable), names_to = "currency", values_to = "slope")

ggplot(sv_residuals, aes(year, residual, color =currency)) +
  geom_line()

sv_err <- left_join(sv_residuals, sv_ests) %>%
  left_join(sv_slopes) %>%
  mutate(abs_residual = abs(residual)) %>%
  mutate(abs_resid_est = abs_residual / estimate)


ggplot(sv_err, aes(year, abs_resid_est, color =currency)) +
  geom_line()

sv_err %>%
  group_by(currency) %>%
  summarize(mean_abs_resid_est = mean(abs_resid_est),
            mean_est = mean(estimate),
            slope = mean(slope)) %>%
  mutate(scaled_slope = slope / mean_est)

ggplot(sv_err, aes(year, estimate, color = currency)) +
  geom_line()

ggplot(filter(sv_err, currency != "energy"), aes(year, estimate, color = currency)) +
  geom_line()

So what I have done here is:

This gives a measure of how large the residuals are relative to the estimates. Scale() and center() break this kind of approach...

Then I got the slopes from the lms(), but because the slopes depend on the magnitude of the variables, I rescaled them to slope / mean(estimated value).

The mean_abs_resid_est and scaled_slope values align with what I generally want them to, but it's all extremely rough. I may have reinvented some wheels.



diazrenata/BBSstories documentation built on July 6, 2020, 11:59 a.m.