In brouwern/avesdemazan: What the Package Does (One Line, Title Case)

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

library(avesdemazan)

# Data cleaning
library(dplyr)      
library(tidyr)   
library(reshape2)
library(stringi)  

library(knitr)

library(here)

Purpose of script

merge in species traits (e.g. diet, weight class, etc)
set up continuous time variable
save as .RData for use in main regression analysis
summarize data into total captures per sessions (pools all species into "community abundance")

Load data

Data is stored within the avesdemazan package

data("ecuador_5")

## Species list
data("Ecuador_Species_List")

# rename data object
## TODO() update object names / code so this isn't necessary
spp_trt <- Ecuador_Species_List

Add species traits to dataframe

Species traits stored in Ecuador_Species_List, which is re-saved as spp_trt upon loading (above).

# Species list (mist net)
spp_list <- ecuador_5[,c("Specie.Code")]
spp_list <- unique(spp_list)

# Convert species list to dataframe 
spp_list <- as.data.frame(unlist(spp_list))
colnames(spp_list) <- c("Specie.Code")

# Add indicator column (used for filtering merged dataframe)
spp_list$ind <- rep(1,dim(spp_list)[1])

# Prepare dataframes for merging
spp_list$Specie.Code <- as.character(spp_list$Specie.Code)
spp_trt$Specie.Code <- as.character(spp_trt$Specie.Code)

# Merge dataframes
spp_list <- full_join(spp_trt, spp_list, by = "Specie.Code")
spp_list <- spp_list[-which(is.na(spp_list$ind) == TRUE), ]

Merge trait data with species data

# 
ecuador_5$Specie.Code <- as.character(ecuador_5$Specie.Code)
ecuador_5 <- merge(ecuador_5, spp_list, by = "Specie.Code")
ecuador_5$Specie.Code <- as.factor(ecuador_5$Specie.Code)

Regression data setup

Inclusion criterion We included only those species captured in 4 or more years (out of 11 years).

Regression model We modeled capture rate as a function of time and habitat type with random intercepts and random slopes for each species nested within each habitat. We used an autoregressive(1) negative binomial model to account for correlation across time. Autoregressive(1), or AR(1), models include the response variable at the previous time point as a predictor for the current time point; that is, the observed capture rate at time $t$ is used to predict the capture rate at time $t+1$. We compared different distributions, including poisson, negative binomial, zero-inflated poisson, zero-inflated negative binomial, and the negative binomial distribution performed best.

Make continuous time variable

# Make continuous time variable
# Find all unique years
years <- unique(ecuador_5$year)

# Create new column for continuous time variable
ecuador_5$time_cts <- NA

# Create continuous time variable
for (i in 1:length(years))
{
  # Index current year
  i.year <- which(ecuador_5$year == years[i])

  # Calculate time variable
  ecuador_5$time_cts[i.year] <- ifelse(ecuador_5$session[i.year] == "PRIMERA", 3*(i-1) + 1,
                              ifelse(ecuador_5$session[i.year] == "SEGUNDA", 3*(i-1) + 2,
                              ifelse(ecuador_5$session[i.year] == "TERCERA", 3*(i-1) + 3,
                                                                           NA)))
}

# Change time unit = 1 year
ecuador_5$time_cts <- ecuador_5$time_cts/3

# Subset species caught 4 or more years
ecuador0 <- ecuador_5
ecuador_5 <- ecuador_5 %>% filter(tot.yrs > 3)
ecuador_5$i <- 1:dim(ecuador_5)[1]

# ?
ecuador_5 <- ecuador_5 %>% filter(time_cts < 7.6 | time_cts > 7.7)
ecuador_5 <- ecuador_5 %>% filter(time_cts < 10.3 | time_cts > 10.4)

Time organized in 1/3 of years.
year 1, session PRIMERA = 0.33 year year 1, session SEGUNDA = 0.667 year year 1, session TERCERA = 1.909 year year 2, session SEGUNDA = 1.33 year

summary(factor(ecuador_5$time_cts[1:10]))

This plot implies how it works

par(mfrow = c(1,2))
plot(time_cts ~ year_cent, data = ecuador_5)
plot(time_cts ~ year, data = ecuador_5)

Save individual-level dataframe

This is the dataframe used in analyses.

It is 1949 x 22

dim(ecuador) # 1949   22

# save .csv to data-raw folder
file. <- "ecuador.csv"
loc. <- here::here("data-raw")
file.full <- here::here(loc., file.)
#exists(file.)
write.csv(ecuador_5, file = file.full, row.names = F)


# save .RData to data folder
ecuador <- ecuador_5
usethis::use_data(ecuador, overwrite = TRUE)
make_dateset_helpfile(dataset = ecuador,
                      dataset_name = "ecuador")

Create and save total captures

This pools all captures to determine "community abundance"

# Aggregate across species
ecuador <- ecuador_5
ecuador_tot <- ecuador %>% group_by(year, 
                                    session, Location, 
                                    tot_net_hours, 
                                    time_cts) %>%
  summarize(N = sum(N))

dim(ecuador_tot) # 86  6

# save .csv to data-raw folder
file. <- "ecuador_tot.csv"
loc. <- here::here("data-raw")
file.full <- here::here(loc., file.)
#exists(file.)
write.csv(ecuador_tot, file = file.full, row.names = F)


# save .RData to data folder
usethis::use_data(ecuador_tot, overwrite = TRUE)
# make_dateset_helpfile(dataset = ecuador_tot,
#                       dataset_name = "ecuador_tot")