knitr::opts_chunk$set(echo = TRUE)
Data was recieved from Steve Latta as seperate spreadsheets which were merged into a single .csv file in 0_proofing_merging....Rmd script
This present script tidies/cleans up the data.
The following script makes final changes to prepare for analysis
Note that data worked up here result in slightly different results than appear in the original reject MS; this is likely due tabulation errors during the original work. In general the rank-order of all comparisons (eg, when looking at differences between sites) remains the same
What follows is a brief orview of my general approach to cleaning data. This is mostly boiler plate. See 0_proofing_merging....Rmd for more details
The following steps aren't necessarily all distinct in practice or necessarily occur in this precise order
taking data from the format it was entered in into the format that works best w/ R; usually reshaping from "wide" to "long" format. eg, with data from each subplot in a different column to all data in a single column with subplot number indicated in a desperate column.
converting into an analysis-read format. For example, splitting things like columns that combine treatment and plot number into separate columns. (treatment column, subplot column)
Taking reshaped data and fixing any issues just as typos, incorrect column names, data entered into wrong field, fixing species names, etc.
After cleaning and tidying, the data could be "reshaped" (or recast) back into its original format and there would still be an ~1:1 correspondence with the datasheet.
library(here) library(reshape2) library(stringr) library(stringi) library(lubridate)
Use here::here()
here::here() filename <- here::here("data-raw", "data-raw-mencia", "mencia_all_captures_by_year", "mencia_all_years.csv")
Load .csv w/ All years of capture records stacked (single entry per birds; not within season recaps for site persistance)
mencia <- read.csv(file =filename, skip = 0, stringsAsFactors = FALSE, colClasses=c("BAND.SUF"="character")) dim(mencia) head(mencia)
names(mencia) <- tolower(names(mencia))
Used for sexing. Put into seperate column
mencia$beak <- gsub("[a-zA-Z]","",mencia$notes) mencia$beak <- as.numeric(mencia$beak) summary(mencia$beak)
mencia$band.pre <- factor(mencia$band.pre) mencia$band.suf <- factor(mencia$band.suf) mencia$colors <- factor(mencia$colors) mencia$age <- factor(mencia$age) mencia$year <- factor(mencia$year)
summary(mencia)
summary(factor(mencia$fat),1000)
summary(factor(mencia$sex),1000)
summary(factor(mencia$notes),1000)
mencia$date2 <- dmy(mencia$date)
Order by date
mencia <- mencia[order(mencia$date2), ]
Some years sites have "A" label; remove
with(mencia, table(year, site)) mencia$site <- factor(gsub("[ ][A]$","",mencia$site ))
# x vs X mencia$colors <- factor(gsub("x-","X-",as.character(mencia$colors))) # stray hyphen mencia$species <- factor(gsub("AM-KE","AMKE",as.character(mencia$species))) # unknown sex = NA mencia$sex <- factor(ifelse(mencia$sex == "U", NA,mencia$sex))
Need to check species codes to make sure they are current and no typos.
Some of these are out of date. This will be fixed in the "Scrubbing" script. This current work is done to send the info to Steve Latta for review
# library(reshape2) # library(ggplot2) # library(ggpubr) # spp.count <- dcast(data = mencia, # formula = species ~ ., # fun.aggregate = length) # names(spp.count) <- c("species","tot.captures") # # i.order <- order(spp.count$tot.captures,decreasing = T) # spp.count$species <- factor(spp.count$species, # levels = spp.count$species[i.order]) # # spp.count <- spp.count[i.order,] # # #plot # ggbarplot(data = spp.count, # x = "species", y = "tot.captures") + # theme(axis.text.x = element_text(angle = 90, hjust = 1))
Save table of species names for sending to S Latta
#write.csv(spp.count, file = "temp_spp_codes.csv")
Because the study occurs over several years the sites undergo succession and their exact age each year is importnat to consider
site.age.tab <- data.frame(site = levels(mencia$site), site.age.init = c(20,5,2,10)) mencia <- merge(mencia,site.age.tab) mencia$site.age <- mencia$site.age.init + mencia$year.num-2003
mencia$band <- with(mencia, paste(band.pre, band.suf, sep = "-"))
These are the species focused on in the original reject MS
focal.mig <- c("OVEN","BAWW","COYE","AMRE","CMWA", "BTBW","PAWA","PRAW") i <- which(mencia$species %in% focal.mig) mencia$status.focals <- NA mencia[i, "status.focals"] <- "mig"
These are the species focused on in the original reject MS
focal.res <- c("HLCU" #changes to HILC ,"STOF" ,"RLTH" ,"NOMO" ,"GRWA" # changes to GTGT ,"BANA" ,"BCPT" ,"YFGR" ,"BFGR" ,"GABU") i <- which(mencia$species %in% focal.res) mencia[i, "status.focals"] <- "res"
Bird could be aged into 2 diff ages classes; did not appear occur. Makes sense b/c most birds not recaptured. Birds captures as HY often die or move to new site
# x <- dcast(data= mencia[i.unique,], # formula = band + species + site ~ age, # value.var = "age", # fun.aggregate = length) # # # timescap <- apply(x[,c("AHY","ASY","HY","SY","NA")],1,sum) # max(timescap)
spp.meta <- read.csv(here::here("data","spp_list.csv"),stringsAsFactors =F)
Subset columns I want to use
#status spp.meta$status spp.meta$status2 spp.meta <- spp.meta[c("spp.code","spp", "status2", "hab1","diet")]
summary(spp.meta)
Green tailed ground warlber and green tailed ground tanager are the same
Change column name
names(mencia) <- gsub("species","spp.code", names(mencia))
mencia$spp.code <- as.character(mencia$spp.code) mencia$spp.code[which(mencia$spp.code == "GRWA")] <- "GTGT"
mencia$spp.code[which(mencia$spp.code == "HLCU")] <- "HILC"
MYWA = myrtyle warbler = YRWA yellow rumped warbler NUMA -> SBMU #nutmet manakin = scaly breasted munia WHQD = WFQD
mencia$spp.code[which(mencia$spp.code == "MYWA")] <- "YRWA" mencia$spp.code[which(mencia$spp.code == "NUMA")] <- "SBMU" mencia$spp.code[which(mencia$spp.code == "WHQD")] <- "WFQD"
Merge
names(mencia) names(spp.meta) names(mencia2) mencia2 <- merge(mencia, spp.meta, all = T) dim(mencia) dim(mencia2) mencia2 <-mencia2[-which(is.na(mencia2$site) == TRUE), ] mencia <- mencia2
filename <- "mencia_cleaned.csv" filename <- here::here("data-raw", "data-raw-mencia", "mencia_all_captures_by_year", filename) write.csv(mencia, file = filename,row.names = F)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.