#' Prepare the Deepwater Electrofishing Data
#'
#' Read in the deepwater electofishing data (including information on the
#' lamprey catch, the lamprey lengths, and the identification of plots that
#' were treated) and prepare them for estimation.
#' @param Dir
#' A character scalar identifying the path where the data files are
#' stored. Use forward slashes, e.g., \code{Dir = "C:/temp/mydir"}.
#' @param CatchFile
#' A character scalar identifying the name of the *.xl* file with
#' catch data. The file should have at least the following 19 columns,
#' named in the header row:
#' SAMPID, LATITUDE, LONGITUDE, STIME, BOAT, SAMPLE, DEPTH, SUB_MAJOR,
#' SUB_MINOR1, SUB_MINOR2, GPSDATE, HAB_TYPE, SL_TOTAL, AB_TOTAL, I_TOTAL,
#' COMMENT, NEW_NUMB, INBPLOT, REGION
#' (these last 3 columns are added in using ArcInfo).
#' See details.
#' @param LengthsFile
#' A character vector identifying the names of the *.xl* files with
#' the lengths data. The files should have at least the following 2 columns,
#' named in the header row: SAMPID, LENGTH.
#' See details.
#' @param PlotsFile
#' A character vector identifying the name of the *.xl* file with
#' the treatment plots data. The files should have at least the following 3
#' columns, named in the header row:
#' AREA, Plot_09, Treat_YYYY, where YYYY is the current year.
#' Treat_YYYY is equal to 1 if the plot was treated that year, equal to 0
#' otherwise. If a plot was treated twice in one year, it will be listed
#' on two separate rows, each with Treat_YYYY=1.
#' See details.
#' @param TRTtiming
#' A character scalar identifying the timing of the assessment survey relative
#' to treatment. "AFTER" if all plots were surveyed AFTER they were treated
#' (the default), "BEFORE" if all plots were survey BEFORE they were treated,
#' "NONE" if no plots were treated, and "MIXED" if some plots were surveyed
#' before and some plots were surveyed after treatment.
#' @param CatchHist
#' A character scalar identifying the name of the *.csv file with
#' historic catch data. The file should have these 29 columns, named in the
#' header row: year, mm, dd, stime, period, sampid, transamp, transect, site,
#' boat, latitude, longitude, region, label, inbplot, plot.num, new.numb,
#' sample, cluster, depth, hab.type, sub.major, sub.minor1, sub.minor2,
#' ab.total, i.total, sl.larv.n, sl.larv.adj, comment.
#' @param LengthHist
#' A character scalar identifying the name of the *.csv file with
#' historic lengths data. The file should have these 4 columns, named in the
#' header row: year, sampid, length, sl.larv.adj.
#' @param PlotHist
#' A character scalar identifying the name of the *.csv file with
#' historic plot-specific estimates. The file should have these 15
#' columns, named in the header row: year, period, new.numb, meanlat,
#' meanlong, area.ha, n.samp, catch, meannperha, sd.dens, larvpe, ptran,
#' tranpe, pbig, bigpe.
#' @param PEHist
#' A character scalar identifying the name of the *.csv file with
#' historic whole-river population estimates. The file should have these 14
#' columns, named in the header row: Year, Design, Type, Period, Trt, Dates,
#' Samples, Catch, Area_ha, PE, SD, LO, HI, CV.
#' @param b4plots
#' A numeric vector identifying the plots that were surveyed BEFORE they
#' were treated. A value for this should only be provided if
#' \code{TRTtiming} is set to "MIXED" (default NULL).
#' @details
#' The order of the columns and the case of the column names in the
#' \code{CatchFile}, \code{LengthsFile}, \code{PlotsFile} files are
#' unimportant.
#' Additional columns may be also be part of theses files,
#' but they will be ignored.
#' @return
#' A list with DWEF catch, length, and plot data in three data frames (CAT,
#' LEN, PLT) and a character vector of the SOURCE directory and file names.
#' The plot data is reorganized to have only one row per plot, with the
#' trtd variable indicating the number of treatments each plot received
#' that year.
#' @import readxl
#' @export
DWEFprep <- function(Dir, CatchFile, LengthsFile, PlotsFile, TRTtiming="AFTER",
CatchHist, LengthHist, PlotHist, PEHist, b4plots=NULL) {
# treatment timing
tt <- casefold(substring(TRTtiming, 1, 1))
if(tt=="m" & is.null(b4plots)) stop('If some plots were surveyed',
' before and some plots were surveyed after treatment,',
' TRTtiming="MIXED", then b4plots should identify the plots that were',
' surveyed BEFORE they were treated, e.g., b4plots=c(18, 39, 112).')
if(tt!="m" & !is.null(b4plots)) stop('No plot numbers should be provided',
' for b4plots unless TRTtiming="MIXED".')
PERIOD <- GLFC::recode(tt, c("n", "a", "b", "m"), c(0, 1, -1, NA))
# bring in lengths data
lens <- vector("list", length(LengthsFile))
for(i in seq(length(lens))) {
filei <- LengthsFile[i]
if(length(grep("\\.xl", filei, ignore.case=TRUE))==1) {
lens[[i]] <- read_excel(paste(Dir, filei, sep="/"), sheet=1)
} else {
stop("LengthsFile file must be file type *.XLS*")
}
}
lens <- do.call(rbind, lens)
names(lens) <- casefold(names(lens))
lens <- lens[!is.na(lens$length), ]
lens$sampid <- as.numeric(lens$sampid)
lens$id <- as.numeric(lens$id)
lens$length <- as.numeric(lens$length)
lens$sl.larv.n <- ifelse(lens$class=="A", 1, 0)
lens$sl.meta.n <- ifelse(lens$class=="T", 1, 0)
# calculate catch adjusted for length-based gear efficiency
lens$sl.larv.adj = ifelse(lens$class=="A", DWEFgec(lens$length), 0)
lens$sl.meta.adj = ifelse(lens$class=="T", DWEFgec(lens$length), 0)
larvtots <- aggregate(cbind(sl.larv.n, sl.larv.adj, sl.meta.n, sl.meta.adj) ~
sampid, data=lens, sum)
lvarnamz <- c("sampid", "class", "length", "sl.larv.n", "sl.larv.adj",
"sl.meta.n", "sl.meta.adj")
# bring in catch data
if(length(grep("\\.xl", filei, ignore.case=TRUE))==1) {
fin <- read_excel(paste(Dir, CatchFile, sep="/"), sheet=1)
} else {
stop("CatchFile file must be file type *.XLS*")
}
names(fin) <- make.names(casefold(names(fin)), unique=TRUE, allow_=FALSE)
gps <- strsplit(fin$gpsdate, "/")
# sometimes date is entered as month first, other times day first,
# should be consistent within boat
d1 <- as.numeric(lapply(gps, "[", 1))
d2 <- as.numeric(lapply(gps, "[", 2))
# when a date field is greater than 12,
# that is an indication that the field is NOT the MONTH (nm)
nm1 <- d1>12
nm2 <- d2>12
boat <- fin$boat
bd <- aggregate(cbind(nm1, nm2) ~ boat, FUN=mean, na.rm=TRUE)
fin <- merge(fin, bd, all.x=TRUE)
fin$dd <- ifelse(fin$nm1 > fin$nm2, d1, d2)
fin$mm <- ifelse(fin$nm1 > fin$nm2, d2, d1)
fin$year <- as.numeric(lapply(gps, "[", 3))
miss <- rep(NA, dim(fin)[1])
fin$site <- fin$id
fin$transamp <- miss
fin$transect <- miss
fin$label <- miss
fin$cluster <- miss
fin$plot.num <- miss
fin$comment[is.na(fin$comment)] <- "No comment"
fin$commentwrap <- sapply(strwrap(fin$comment, width=30, simplify=FALSE),
paste, collapse="\n")
# if inbplot column is called inplot, change it
if("inplot" %in% names(fin)) fin$inbplot <- fin$inplot
fin2 <- merge(fin, larvtots, all=TRUE)
rm(gps, d1, d2, nm1, nm2, boat, bd, miss, larvtots)
# determine year of interest
if(var(fin2$year, na.rm=TRUE)>0) {
stop(paste("CatchFile data contains more than one year of data:",
sort(unique(fin2$year))))
}
# bring in plot information
if(length(grep("\\.xl", filei, ignore.case=TRUE))==1) {
plotinfo <- read_excel(paste(Dir, PlotsFile, sep="/"), sheet=1)
} else {
stop("Plot information file must be file type *.XLS*")
}
orig.names <- names(plotinfo)
names(plotinfo) <- make.names(casefold(names(plotinfo)), unique=TRUE,
allow_=FALSE)
plotinfo$new.numb <- plotinfo$plot.09
treatvarname <- names(plotinfo)[grep("treat", names(plotinfo))]
plotinfo$trtd <- plotinfo[[treatvarname]]
plotinfo$area.ha <- plotinfo$area
area.treated <- sum(plotinfo$area.ha[plotinfo$trtd==1])
plotinfo2 <- aggregate(trtd ~ area.ha + new.numb, data=plotinfo, FUN=sum)
varnamz <- c("year", "mm", "dd", "stime", "period", "sampid", "transamp",
"transect", "site", "boat", "latitude", "longitude", "region", "label",
"inbplot", "plot.num", "new.numb", "sample", "cluster", "depth", "hab.type",
"sub.major", "sub.minor1", "sub.minor2", "ab.total", "i.total", "sl.total",
"comment", "commentwrap")
rm(orig.names, treatvarname, filei)
# if plot numbers were entered, make sure they make sense
if(is.na(PERIOD)) {
check <- match(b4plots, fin2$new.numb)
if(sum(is.na(check))>0) stop(paste(
"No catch data corresponds to the hotspots numbers you provided:",
b4plots[is.na(check)]))
fin2$period <- ifelse(fin2$new.numb %in% b4plots, -1, 1)
} else {
fin2$period <- PERIOD
}
# any variable names missing?
missvar.c <- varnamz[is.na(match(varnamz, names(fin2)))]
if(length(missvar.c)>0) stop(paste(
"CatchFile is missing required variable(s):",
paste(missvar.c, collapse=", ")))
missvar.l <- lvarnamz[is.na(match(lvarnamz, names(lens)))]
if(length(missvar.l)>0) stop(paste(
"LengthsFile is missing required variable(s):",
paste(missvar.l, collapse=", ")))
# create data frames with just the specified variable names
smr <- fin2[, varnamz]
lens <- lens[, lvarnamz]
rm(fin, fin2, PERIOD, missvar.c, missvar.l, b4plots, varnamz, lvarnamz)
# prepare catch data
list.time <- strsplit(smr$stime, ":")
smr$hr <- as.numeric(sapply(list.time, "[", 1))
smr$mn <- as.numeric(sapply(list.time, "[", 2))
smr$dec.time <- smr$hr + smr$mn/60
smr$stime <- 100*smr$hr + smr$mn
smr$date <- rep(NA, dim(smr)[1])
sel <- !is.na(smr$year)
smr$date[sel] <- as.Date(paste(smr$year[sel], smr$mm[sel], smr$dd[sel],
sep="-"))
class(smr$date) <- "Date"
row.names(smr) <- 1+(1:dim(smr)[1])
smr <- smr[order(smr$boat, smr$date, smr$dec.time), ]
rm(list.time, sel, i)
# read in historic data
ch <- read.csv(paste(Dir, CatchHist, sep="/"))
lh <- read.csv(paste(Dir, LengthHist, sep="/"))
ph <- read.csv(paste(Dir, PlotHist, sep="/"))
rh <- read.csv(paste(Dir, PEHist, sep="/"))
list(
CAT=smr,
LEN=lens,
PLT=plotinfo2,
CAThist=ch,
LENhist=lh,
Plothist=ph,
PEhist=rh,
SOURCE=c(
Dir=Dir,
CatchFile=CatchFile,
LengthsFile=paste(LengthsFile, collapse=", "),
PlotsFile=PlotsFile,
CatchHist=CatchHist,
LengthHist=LengthHist,
PlotHist=PlotHist,
PEHist=PEHist
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.