knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

Citation

Lloyd et al. 2016. Assessing conservation status of resident and migrant birds on Hispaniola with mist-netting. PeerJ. https://peerj.com/articles/1541/

Preliminaries

Load Libraries

library(reshape2)
library(here)
library(lubridate)

Load data

The here() function of the here package is used to construct the correct file path for the raw data

file. <- ""
path. <- 

Getting to know the data

Check to make sure it work

file.exists(path.)

Now load the actualy data


Check out data structure

Use basic R functdions to get to know the data

dim(dat)
names(dat)
summary(factor(dat$Date))

Check out data layout

Reshape Data For Regression Analyses

Look at months when sampling occured

table(as.numeric(dat$Mo),dat$Year)

Set up index for working data

NOTE: There is a random year when they also sampled in May - look into this (see above)

dat$Year <- as.numeric(dat$Year)
i.07.all <- which(dat$Year > 1996 & 
                #dat$Repeat.New == 1 &
                dat$Mo %in% c(1,2,3) &       #originally dat$Mo != 11 &
                dat$effort == "constant")

Check

summary(factor(dat$Year)[i.07.all])

"Cast by Location: PUVI vs. PALO"

This actually casts by ID... not sure why discrepencay in how I describe it and what I did, so I put the title in " "

with(dat, table(Session, Mo,Year))

summary(dat$Session)
summary(factor(dat$Mo))
summary(factor(dat$Year))
summary(factor(dat$Session))

with(dat, table(Year,Session))
cast.by.site <- dcast(data = dat[i.07.all,],
               formula = Species + Year + Location + Session  ~ ID,
               value.var = "Species",
               fun.aggregate = length)


dim(cast.by.site)

#df is very wide b/c there is a row for each individual!
summary(factor(cast.by.site$Year))
summary(factor(cast.by.site$Session))

with(cast.by.site, table(Year,Session,Location))

Recode 0/1

this allows total count of unique individuals to be determined

fx01 <- function(x){ifelse(x > 0,1,0)}
names(cast.by.site)[1:5]
cast.by.site[,-c(1:4)] <- apply(cast.by.site[,-c(1:4)],2,fx01)
summary(factor(cast.by.site$Year))
dim(cast.by.site)

Calcualte total captured for a given year w/in a site

spp.ann.tot <- apply(cast.by.site[,-c(1:4)],1,sum)
length(spp.ann.tot)
spp.ann.tot <- cbind(cast.by.site[,c(1:4)],
                     spp.ann.tot)
dim(spp.ann.tot)

summary(factor(spp.ann.tot$Year))

Check

length(unique(spp.ann.tot$Species))
unique(spp.ann.tot$Year)


with(spp.ann.tot, table(Year,Session,Location))

Add years when no birds were caught

Note that in its current format, there is a row of data ONLY if a species was captured at least once - there are no zeros for the year it wasn't captured

summary(spp.ann.tot$spp.ann.tot ) #min value == 1

Need to add zeros. First, make a dataframe that has a row for every species for every year for both sites. We'll use the handy expand.grid() functions will make all combinations of species-locations-years

all.spp <- unique(spp.ann.tot$Species)
length(all.spp)


Location <- unique(spp.ann.tot$Location)

#remove bad location
Location <- as.character(Location[-3])


Year <- unique(spp.ann.tot$Year)[order(unique(spp.ann.tot$Year))]


#session 1
empty.datframe.sess1 <- expand.grid(Species = all.spp,
                              Year = Year,
                              Location = Location,
                              Session = 1)

length(all.spp)*length(Year)*length(Location)
with(empty.datframe.sess1,
     table(Year, Location))

#session 2
## a 2nd sampling sessions occured at PUVI in March in just these 2 years
empty.datframe.sess2 <- expand.grid(Species = all.spp,
                              Year = c(2007,
                                       2008),
                              Location = "PUVI",
                              Session = 2)


dim(empty.datframe.sess1)  #884 4
dim(empty.datframe.sess2)  #68 4

empty.datframe <- rbind(empty.datframe.sess1,
                              empty.datframe.sess2)

dim(empty.datframe) #952 4

#952 = 884 + 68

There are 884 species-year-site combinations. There is extra stuff for sessions 2

length(all.spp)*length(Location)*length(Year)  #884

Merge the real data with the "empty dataframe. First, I'll show a problematic merge, then I'll correct it

    bad.merge <- merge(spp.ann.tot,empty.datframe)

The orignal and new dataframes are the same size; rows have not been inserted where there are currently zeros

    dim(bad.merge)
    dim(spp.ann.tot)

Add "all = T" to insert NAs into the dataframe where there currently are no entries (ie, years when a species wans't captured currently don't occur)

spp.ann.tot.b <- merge(spp.ann.tot,
                   empty.datframe,
                   all = T)

dim(spp.ann.tot.b)

Check the size; its now as big as the "empty.datafrme"

dim(spp.ann.tot.b)

summary(factor(spp.ann.tot.b$Session))

with(spp.ann.tot.b,table(Year, Session,Location))



summary(spp.ann.tot.b)

Change NAs to zeros

There now should be NAs inserted into the abundance column in years when a species wasn't observed at a particular site

summary(spp.ann.tot.b$spp.ann.tot)

Convert those NAs to zeros

i.NA <- which(is.na(spp.ann.tot.b$spp.ann.tot) == TRUE)
spp.ann.tot.b[i.NA,"spp.ann.tot"] <- 0

Check that zeros have been added

summary(spp.ann.tot.b$spp.ann.tot)
summary(factor(spp.ann.tot.b$Year))

with(spp.ann.tot.b,
     table(Year,Session))

with(spp.ann.tot.b,
     table(Year,Session,Location))

Load effort data

From Table 1 in Lloyd et al 2016. Note that for 2007 and 2009 there were additional sampling sessions in March for one Location

#data directory
d2 <- paste(d.working,"data/data_in", sep = "")

effort <- read.my.csv(file. = "Lloyd_table1_effort",
                      rel.dir. = d2)

Merge count data and effort

#remove stray NA

# i.NA <- which(spp.ann.tot.b$Location == "")
# spp.ann.tot.b <- spp.ann.tot.b[-i.NA,]

spp.ann.tot.b$Location <- factor(as.character(spp.ann.tot.b$Location))


#merge data
spp.ann.tot2<- merge(spp.ann.tot.b,
                     effort,
                     all = T)


dim(spp.ann.tot.b) == dim(spp.ann.tot2)

dim(spp.ann.tot.b) 
dim(spp.ann.tot2)
summary(spp.ann.tot2)

NOTE: things don't line up perfectly b/c of 2 years with zero effort and a few random NAs

Remove date column

i.date.col <- which(names(spp.ann.tot2) == "date")
spp.ann.tot2 <- spp.ann.tot2[,-i.date.col]
with(spp.ann.tot2, 
     table(Year, Location))
i.PALO <- which(spp.ann.tot2$Location == "PALO")

with(spp.ann.tot2[i.PALO,], 
     table(Year, Location))

Remove bad location

i.bad.loc <- which(spp.ann.tot2$Location == "")

spp.ann.tot2 <- spp.ann.tot2[-i.bad.loc,]

Set abundance of NA when net hours = 0

i.net.zero <- which(spp.ann.tot2$net.hours == 0)

spp.ann.tot2$spp.ann.tot[i.net.zero] <- NA

Save final cleaned data

d. <- paste(".",d.working,"data/data_out",sep = "")
f. <- paste("./",d.,"/Lloyd_data_cleaned.csv", sep = "")
write.csv(spp.ann.tot2, 
          row.names = F,
          file = f.)

spp.ann.tot2[which(spp.ann.tot2$Species == "GTGT" &
                     spp.ann.tot2$Location == "PUVI"), ]
spp.ann.tot2[which(spp.ann.tot2$Species == "WEWA" & spp.ann.tot2$spp.ann.tot > 0),]

Recaps per year

dat[which(dat$Species == "WEWA"),]

i.WEWA <- which(dat$Species == "WEWA" & dat$Year > 1996 & 
                dat$Repeat.New == 1 & #exclude ALL recaps
                dat$Mo %in% c(1,2,3) &  #previous was "!= 11"
                dat$effort == "constant")

cast.WEWA <- dcast(data = dat[i.WEWA,],
               formula = Species + Year  ~ ID,
               value.var = "Species",
               fun.aggregate = length)

Recode 0/1

Recode to pres/abs for

fx01 <- function(x){ifelse(x > 0,1,0)}

cast.WEWA[,-c(1:2)] <- apply(cast.WEWA[,-c(1:2)],2,fx01)

Calculate total caps

ann.tot.WEWA <- apply(cast.WEWA[,-c(1:2)],1,sum)
ann.tot.WEWA <- cbind(cast.WEWA[,c(1:2)],ann.tot.WEWA)

Cast to total across years

spp.tot.WEWA <- dcast(data  = ann.tot.WEWA,
      formula = Species ~ .,
      value.var = "ann.tot.WEWA",
      fun.aggregate = sum)

names(spp.tot.WEWA)[2] <- "spp.total"


brouwern/lloyd2016 documentation built on June 8, 2020, 12:36 a.m.