Cleaning before entry into R:
Did text to columns by comma to separate coded notes into columns
Replaced "MORT" and other non-numeric mortality notes with 7
Made a 'dead' column that was comprised of notes 7 (mortality) and 10 (whole fish collected)
Replaced '
and "
with blank to avoid problems reading in text columns
Replaced ND
(No Data) with NA
for missing data
Changed Species
to Code
in the fish data headers to be able to merge with the species data (could do in R after import)
Added primary key
Removed #
symbol from GameFish
rm(list=ls()) library(data.table) # fast, efficient reading in of big data library(dplyr) library(ggplot2) library(lubridate) setwd('/Users/Dan/Documents/Research/Stream_Climate_Change/Brook_Trout/singlePassDetection/') gameFish <- fread("inst/extdata/W_FI_Gamefish.csv", sep = ",", na.strings = c("NA", "ND", "", " ")) gameFish[gameFish == 99] <- NA siteVisits <- fread("inst/extdata/R_SiteVisits.csv", sep = ",") siteVisits[siteVisits == 999] <- NA species <- fread("inst/extdata/W_FI_zdd_FISH_Species.csv", sep = ",", header = TRUE) upstream <- fread("inst/extdata/shenandoahUpstreamStats.csv", sep = ",") siteVisits$SiteID <- siteVisits$SITEID
# I love magrittr and pipes! fish <- gameFish %>% group_by(SiteVisit_ID, RUN, Code, AGE) %>% summarise(meanTL = mean(TL), count = n()) %>% left_join(species, by = 'Code') %>% left_join(siteVisits, by = 'SiteVisit_ID')
This is not efficient at all but I was having trouble getting the columns to fill out properly, so I just brute forced it.
fish$siteVisitPass <- paste0(fish$SiteVisit_ID, "--", fish$RUN) fullGrid <- expand.grid(siteVisitPass = unique(fish$siteVisitPass), Code = unique(fish$Code), AGE = 0:1) fullGrid$siteVisitPass <- as.character(fullGrid$siteVisitPass) fullGrid$Code <- as.character(fullGrid$Code) fullDF <- fullGrid %>% left_join(fish, by = c("siteVisitPass", "Code", "AGE")) fullDF$SiteVisit_ID <- matrix(unlist(strsplit(fullDF$siteVisitPass, split = "--", fixed = TRUE)), ncol = 2, byrow = TRUE)[ , 1] fullDF$RUN <- matrix(unlist(strsplit(fullDF$siteVisitPass, split = "--", fixed = TRUE)), ncol = 2, byrow = TRUE)[ , 2] foo <- select(siteVisits, SiteVisit_ID, SITEID, sDate) fullDF2 <- left_join(fullDF, y = foo, by = c("SiteVisit_ID")) fullDF2$SiteID <- fullDF2$SITEID.y allDF <- left_join(fullDF2, upstream, by = 'SiteID') allDF[is.na(allDF$count), ]$count <- 0 # replace count NA with 0 because now all the combinations should represent site visits and passes that were conducted for all species and ages allDF$date <- ymd_hms(allDF$sDate.y, tz = "US/Eastern")
# reduce to the variables of interest for modeling and rename the variables with consistent format subDF <- select(allDF, SiteID, SiteVisit_ID, date, Code, RUN, AGE, count, FI_Shocker_Voltage, FEATUREID, Latitude, Longitude, TotDASqKM, ReachSlope, ReachElev, tmaxann_brkt, forest, developed) setnames(subDF, old = names(subDF), new = c("site", "SiteVisit_ID", "date", "spp", "pass", "age", "count", "voltage", "FEATUREID", "lat", "lon", "drainageArea", "slope", "elev", "tmaxAnn", "forest", "developed"))
bktDF <- subDF %>% filter(!is.na(lat)) %>% filter(spp == "BKT") # check subDF %>% group_by(spp) %>% summarise(siteVisitsPasses = n(), sum(count)) gameFish %>% group_by(Code) %>% summarise(siteVisitsPasses = n())
Use the same flow and temperature for all sites
load("data/localShenDaymet.RData") climate <- fread('inst/extdata/Shen_climate_data.csv', sep=',') setnames(climate, names(climate), c("year", "yday", "tmax", "tmin", "precip", "siteID")) climate$day.year <- paste0(climate$yday, '-', climate$year) climate$date <- parse_date_time(climate$day.year, "%j%Y", tz = "US/Eastern") # move this function to functions in R/ getSeason <- function(DATES) { WS <- as.Date("2012-12-15", format = "%Y-%m-%d") # Winter Solstice SE <- as.Date("2012-3-15", format = "%Y-%m-%d") # Spring Equinox SS <- as.Date("2012-6-15", format = "%Y-%m-%d") # Summer Solstice FE <- as.Date("2012-9-15", format = "%Y-%m-%d") # Fall Equinox # Convert dates from any year to 2012 dates d <- as.Date(strftime(DATES, format="2012-%m-%d")) ifelse (d >= WS | d < SE, "Winter", ifelse (d >= SE & d < SS, "Spring", ifelse (d >= SS & d < FE, "Summer", "Fall"))) } climate$season <- getSeason(climate$date) annual.climate <- climate %>% group_by(year, season) %>% summarise(mean.tmax = mean(tmax), mean.precip = mean(precip)) write.table(annual.climate, file = 'inst/extdata/annual-climate.csv', row.names = FALSE, sep = ",") masterData$season <- getSeason(masterData$date) annual.site.climate <- masterData %>% group_by(site, year, season) %>% summarise(mean.tmean = mean(airTemp), mean.precip = mean(prcp)) save(annual.site.climate, file = 'data/annual-site-climate.RData')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.