# codeutils::listFunctions("C:/A_CSIRO/Rcode/r4cpue/R/cpuefuncs.r")
#' @import graphics
#' @importFrom grDevices png rgb dev.cur dev.new dev.off
#' @importFrom stats anova as.formula dnorm lm median qnorm qqline qqnorm
#' @importFrom stats quantile sd loess
#' @importFrom utils data read.csv str tail write.table
NULL
#' @title addcount - adds a count of years and avC to a data.frame
#'
#' @description addcount - given an input data.frame containing catch_kg,
#' Vessel, and Year as variables this function first sums the catch by
#' vessel, by year, then counts each vessels occurrence across the
#' years, and each vessels catch. Then stepping through each vessel it
#' populates two new colummns named 'count' and 'avC'.
#'
#' @param indat - a data.frame containing at least columns named 'catch_kg',
#' 'Vessel' (or in fact 'varid'), and 'Year'.
#' @param varid the variable in the data.frame for which the count and
#' avearge catch per year is to be estimated. Defaults to 'Vessel'
#' ready for the SESSF but could be diver_id for abalone, or any other
#' factor of interest.
#' @param group the first grouping variable, typically Vessel in the SESSF
#' @param catch the variable name for the catch, catch_kg in the SESSF
#'
#' @return a data.frame made of the input data.frame plus two new columns
#' @export addcount
addcount <- function(indat,varid="Vessel",group="Year",catch="catch_kg") {
outdat <- indat
outdat$count <- NA
outdat$avC <- NA
cbv <- tapply(outdat[,catch],list(outdat[,varid],outdat[,group]),sum,na.rm=TRUE)/1000
years <- as.numeric(colnames(cbv))
nyrs <- length(years)
count <- apply(cbv,1,function(x) length(which(x > 0)))
avC <- apply(cbv,1,mean,na.rm=TRUE)
vessels <- as.numeric(names(count))
numves <- length(vessels)
for (i in 1:numves) {
pickv <- which(outdat[,varid] == vessels[i])
outdat$count[pickv] <- count[i]
outdat$avC[pickv] <- avC[i]
}
return(outdat)
} # end of addcount
#' @title checkDF - checks a CPUE data.frame has all required fields
#'
#' @description checkDF - checks a CPUE data.frame contains all fields
#' expected for analysis. These are Year, Month, Vessel, catch_kg,
#' Long, Lat, Depth, DayNight, Zone, Effort, Method, Fishery, LnCE,
#' and DepCat. If any are missing it identifies them if not it confirms
#' all is ok. Although, except for their class checkDF does not consider
#' the contents of each field.
#'
#' @param x - a data.frame of CPUE data used in the SESSF
#'
#' @return only a text message to the console confirming all ok or
#' reporting which fields are missing
#' @export checkDF
#'
#' @examples
#' dataf <- as.data.frame(matrix(rnorm(100,mean=10,sd=1),nrow=20,ncol=5))
#' colnames(dataf) <- c("Year","catch_kg","Long","Zone","LnCE")
#' checkDF(dataf)
#' dataf <- as.data.frame(matrix(rnorm(42,mean=10,sd=1),nrow=1,ncol=14))
#' colnames(dataf) <- c("Year","Month","Vessel","catch_kg","Long","Lat",
#' "Depth","DepCat","DayNight","Zone","Effort",
#' "Method","Fishery","LnCE")
#' checkDF(dataf)
#' head(dataf)
checkDF <- function(x) { # x <- sps1
if (class(x) != "data.frame") stop("Input to checkDF must be a data.frame \n\n")
needed <- c("Year","Month","Vessel","catch_kg","Long","Lat","Depth","DepCat","Zone",
"Effort","LnCE","DayNight","Method","Fishery")
dflocation <- match(needed,colnames(x))
pick <- which(is.na(dflocation))
if (length(pick) == 0) {
cat("All fields needed for SESSF work are present \n")
whatclass <- sapply(x[dflocation],class)
numbers <- (whatclass[1:11] == "numeric")
nonnum <- (whatclass[12:14] == "factor")
if (length(numbers[numbers == FALSE]) == 0) cat("All numeric fields are numeric \n")
if (length(nonnum[nonnum == FALSE]) == 0) cat("All non-numeric fields are factors \n")
} else {
cat("The following fields are missing ",needed[pick],"\n\n")
}
if (dim(x)[1] < 100) cat("Very few records, ",dim(x)[1],", available for standardization \n\n")
} # end of checkDF
#' @title coef.CEout - S3 method to extract the parameter values from a CEout object
#'
#' @description Extract the parameter values from a CEout object from standLnCE
#' by extending the generic 'coef' function to apply to CEout objects, such
#' as generated by standLnCE
#' @param inout is a CEout object produced by standLnCE
#' @return a matrix containing the optimum model parameters, including the
#' log-space coefficients, the lower and upper 95% cofidence intervals,
#' the standard error of each parameter, and p the probability that the
#' parameter is significantly different from zero. These coefficients
#' would need to be back-transformed to return to the original scale. This
#' would use x = exp(coef + (sterr*sterr)/2); note the bias correction
#' @exportMethod coef.CEout
#' @examples
#' \dontrun{
#' data(sps)
#' splabel = "Species"
#' labelM <- c("Year","Vessel","Month")
#' mods <- makemodels(labelM)
#' sps1 <- makecategorical(labelM,sps)
#' out <- standLM(mods,sps1,splabel)
#' coef(out)
#' }
coef.CEout <- function(inout) { # S3 class development
test <- dim(inout$Parameters$mat)
if (length(test) > 0) {
warning("The input object is not a CEout object from standLnCE \n")
} else {
ans <- NA
cat("Output from standLnCE \n")
tmp <- inout$Parameters$coefficients
backTran <- exp((tmp[,"Estimate"] + (tmp[,"Std. Error"]^2)/2))
ans <- cbind(tmp,backTran)
colnames(ans)
}
return(ans)
} # end of coef.CEout
#' @title countgtzero used in apply to count the number of zeros in a vector
#'
#' @description countgtzero used in apply to count the number of zeros in a vector
#' @param invect vector of values
#' @return A single value of zero or the number of zeros
#' @export
#' @examples
#' x <- rnorm(30,mean=1,sd=1)
#' countgtzero(x)
#' cat(30 - countgtzero(x),"out of ",30, " are less than 0 \n")
#' y <- matrix(x,nrow=6,ncol=5)
#' apply(y,1,countgtzero)
countgtzero <- function(invect) {
pick <- which(invect > 0)
return(length(pick))
}
#' @title fishery - generates vectors of year, catch, effort, and cpue
#'
#' @description fishery - generates vectors of year, catch, effort, and cpue
#' This permits a summary of the basic fishery characteristics within a
#' data.frame or matrix. It requires the columns to be labelled.
#' @param indat the input data.frame or matrix
#' @param years the name of the year factor; defaults to "Year"
#' @param catch the name of the catch variable; defaults to "catch"
#' @param effort the name of effort variable; defaults to "effort"
#' @param cpue the name of the cpue variable; defaults to "CE"
#' @return a matrix with columns Year, Catch, Effort, Bias Corrected
#' geometric mean CPUE, the naive geometric means, the arithemtic mean
#' CPUE, and the number of records
#' @export
#' @examples
#' \dontrun{
#' data(sps)
#' fishery(sps,years="Year",catch="catch_kg",effort="Effort",cpue="CE")
#' }
fishery <- function(indat,years="Year",catch="catch",effort="effort",
cpue="CE") {
yrs <- sort(unique(indat[,years]))
catches <- tapply(indat[,catch],indat[,years],sum,na.rm=TRUE)
eff <- tapply(indat[,effort],indat[,years],sum,na.rm=TRUE)
geo <- tapply(indat[,cpue],indat[,years],geomean)
ngeo <- exp(tapply(log(indat[,cpue]),indat[,years],mean,na.rm=TRUE))
arith <- tapply(indat[,cpue],indat[,years],mean,na.rm=TRUE)
records <- table(indat[,years])
ans <- cbind(yrs,catches,eff,geo,ngeo,arith,records)
colnames(ans) <- c("Years","Catch","Effort","BCGeo","NaiveGeo","Arith","Records")
return(ans)
} # end of Fishery
#' @title geomean: the geometric mean of a vector corrected for log-normal bias
#'
#' @description Calculate the geometric mean of a vector corrected for log-normal
#' bias. NAs and zeros are removed from consideration.
#' @param invect is a vector of numbers in linear space.
#' @return The bias-corrected geometric mean of the vector
#' @export geomean
#' @examples
#' x <- c(1,2,3,4,5,6,7,8,9)
#' geomean(x)
## Calculate the geometric mean of a vector corected for log-normal bias
geomean <- function(invect) {
pick <- which((invect <= 0.0))
if (length(pick) == 0) {
avCE <- mean(log(invect),na.rm=TRUE)
stdev <- sd(log(invect),na.rm=TRUE)
} else {
avCE <- mean(log(invect[-pick]),na.rm=TRUE)
stdev <- sd(log(invect[-pick]),na.rm=TRUE)
}
gmean <- exp(avCE + (stdev^2)/2)
return(gmean)
} # end of geomean
#' @title getfact extracts parameter estimates for a given factor from a CEout object
#'
#' @description getfact extracts the parameter estimates for a given factor from a CEout object.
#' It does this by searching to rownames of the output parameters of the optimum model.
#' It also checks for interaction terms, which for categorical factors is the same as
#' determining a trend of the two factors relative to each other. e.g. for Zone:Month
#' the outcome is the monthly trend for each zone.
#'
#' @param inmat generally this will be a CEout object but it can be
#' a matrix of coefficients, an lm object, or a gam object
#' @param invar the model variable whose parameters are wanted
#'
#' @return a matrix containing the parameters for invar
#' @export getfact
getfact <- function(inmat,invar) { # inmat=model; invar="Year"
allowable <- c("matrix","CEout","lm","gam")
whatclass <- class(inmat)
if (length(whatclass) > 1) {
if ("gam" %in% whatclass) {
whatclass <- "gam"
} else {
if ("lm" %in% whatclass) {
whatclass <- "lm"
} else {
if ("matrix" %in% whatclass) {
whatclass <- "matrix"
} else {
whatclass <- "no"
}
}
}
}
if (whatclass %in% allowable) {
if (whatclass == "matrix") pardat <- inmat
if (whatclass == "CEout") pardat <- inmat$Parameters$coefficients
if (whatclass == "lm") pardat <- summary(inmat)$coefficients
if (whatclass == "gam") pardat <- summary(inmat)$p.table
} else {
stop("Input matrix is not, in fact, a matrix of coefficients")
}
pick <- grep(invar,rownames(pardat))
if (length(pick) == 0) stop("The selected factor is not in the selected model! \n")
startmat <- pardat[pick,] # isolate rows containing variable in question
pickI <- grep(":",rownames(startmat)) # check for interaction terms and split off
if (length(pickI) > 0) { # if present
intermat <- startmat[pickI,]
startmat <- startmat[-pickI,]
}
lnce <- startmat[,"Estimate"] # first do the non-interaction terms
se <- startmat[,"Std. Error"]
tval <- startmat[,"t value"]
Prob <- startmat[,"Pr(>|t|)"]
backtran <- exp(lnce + (se * se)/2)
ans <- cbind(c(1.0,backtran),c(0,se),c(0,lnce),
scaleCE(c(1.0,backtran),avCE=1.0),
c(NA,tval),c(NA,Prob)
)
colnames(ans) <- c("Coeff","SE","LogCE","Scaled","t value","Prob")
rownames(ans)[1] <- invar
norigvar <- dim(ans)[1]
if (length(pickI) > 0) { # do interaction terms if they exist
terms <- unlist(strsplit(rownames(intermat),":"))
nrow <- dim(intermat)[1]
firstvar <- grep(invar,terms)
nfirst <- length(unique(terms[firstvar]))
nsecond <- length(unique(terms[-firstvar]))
if ((nfirst * nsecond) != nrow)
stop(paste0("something wrong with the interactions terms for ",invar," \n",
"the first and second variables are not balanced"))
start <- 1
for (i in 1:(nrow/nfirst)) { # i <- 1
finish <- (start+nfirst-1)
tmp <- intermat[start:finish,]
lnce <- tmp[,"Estimate"] # first do the non-interaction terms
se <- tmp[,"Std. Error"]
backtran <- exp(lnce + (se * se)/2)
tmpans <- cbind(c(1.0,backtran),c(0,se),c(0,lnce),scaleCE(c(1.0,backtran),avCE=1.0))
ans <- rbind(ans,tmpans)
start <- finish + 1
}
}
return(ans)
} # end of getfact
#' @title getrmse calculates the rmse of the input 'invar' series
#'
#' @description getrmse calculates the rmse of the input invar series (defaults
#' to 'cpue') against an input 'year' time series. This is primarily
#' designed to generate a more feasible estimate of the intrinsic
#' variability of a cpue time-series that may be obtained from a cpue
#' standardization
#'
#' @param indat the matrix, spmdat, or data.frame containing both a 'year'
#' column and an invar column (default to 'cpue')
#' @param invar the column whose rmse is wanted; defaults to 'cpue'
#' @param inyr the column that points to the year name
#' @return a list of the rmse and the loess predicted values of the invar for
#' each year in the time-series
#' @export
#'
#' @examples
#' \dontrun{
#' year <- 1986:1995
#' cpue <- c(1.2006,1.3547,1.0585,1.0846,0.9738,1.0437,0.7759,1.0532,1.284,1.3327)
#' dat <- as.matrix(cbind(year,cpue))
#' getrmse(dat,invar="cpue") # should be 0.08856596
#' getrmse(dat,invar="cpue")$rmse
#' }
getrmse <- function(indat,invar="cpue",inyr="year"){ # indat=fish; invar="cpue"; inyr="year"
if(iscol(inyr,indat) & iscol(invar,indat)) {
nyr <- dim(indat)[1]
predictedCE <- rep(NA,nyr)
varloc <- grep(invar,colnames(indat))
nvar <- length(varloc)
if (nvar > 1) {
obsvar <- rep(NA,nyr)
for (i in 1:nvar) {
pick <- which(indat[,varloc[i]] > 0)
obsvar[pick] <- indat[pick,varloc[i]]
}
} else {
obsvar <- indat[,varloc]
}
picky <- which(obsvar > 0)
model <- loess(obsvar[picky] ~ indat[picky,inyr])
predictedCE[picky] <- model$fitted
rmse <- sqrt(sum(model$residuals^2)/model$n)
return(list(rmse=rmse,predictedCE=predictedCE))
} else {
cat("Input data should contain both 'year' and 'cpue' \n")
}
} # end of getrmseCE
#' @title getsps reads in the species catch and effort data
#'
#' @description getsps reads in the species catch and effort data that was
#' previously extracted from the Oracle data base.
#'
#' @param filename the names of the csv file to be read
#' @param datadir the path to the directory containing the csv files
#' @param depthclass the class width of the depth categories, defaults to 25
#' @param txtout report the filename to the screen? default = FALSE
#'
#' @return invisibly returns the sps data.frame ready for selection
#' @export
#'
#' @examples
#' \dontrun{
#' print("need an example that will work")
#' }
getsps <- function(filename,datadir,depthclass=25,txtout=FALSE) {
infile <- paste0(datadir,filename)
if (!file.exists(infile)) {
stop(paste0("Data file ",filename," does not exist in ",datadir))
} else {
if (txtout) cat("Read from ",filename,"\n")
sps <- read.csv(infile, header=TRUE)
}
sps$CE <- NA
pick <- which((sps$catch_kg > 0) & (sps$Effort > 0))
sps$CE[pick] <- sps$catch_kg[pick]/sps$Effort[pick]
sps$LnCE <- NA
sps$LnCE[pick] <- log(sps$CE[pick])
sps$DepCat <- NA
sps$DepCat <- trunc(sps$Depth/depthclass) * depthclass
return(invisible(sps))
} # end of getsps
#' @title getStand - extracts major results from an CEout object
#'
#' @description getStand - extracts major results from an CEout object. which
#' is generated using standLM. The main resutls include the year, the
#' geometric mean (the first column of the standardization), the optimum
#' model, the standard error of the optimum, and the lower and upper
#' confidence intervals
#'
#' @param x the CEout object derived from standLM
#' @param P the probability interval for the confidence intervals
#'
#' @return matrix containing the primary results
#' @export getStand
#'
#' @examples
#' \dontrun{
#' data(ab)
#' splabel = "Mollusc"
#' labelM <- c("year","diverID","month","block","boatID")
#' ab1 <- makecategorical(labelM,ab)
#' mods <- makemodels(labelM)
#' out <- standLM(mods,ab1,splabel)
#' round(out$Results,4)
#' round(out$WhichM,4)
#' getStand(out)
#' }
getStand <- function(x,P=0.95) {
if (class(x) != "CEout") stop("input to getStand is not a CEout object")
yrs <- as.numeric(rownames(x$Results))
geo <- x$Results[,1]
opt <- x$Results[,x$Optimum]
se <- x$StErr[,x$Optimum]
Zmult <- -qnorm((1-(P/100))/2.0)
lower <- opt * exp(-Zmult * se)
upper <- opt * exp(Zmult * se)
ans <- cbind(yrs,geo,opt,se,lower,upper)
colnames(ans) <- c("year","geom","optimum","sterr","LCI","UCI")
return(ans)
} # end of getStand
#' @title incol is a utility to determine is a column is present in a matrix
#'
#' @description incol is a utility to determine whether a names columns is
#' present in a given matrix or data.frame.
#'
#' @param incol the name of the column; defaults to "year" as an example
#' @param inmat the matrix or data.frame within which to search for incol
#'
#' @return TRUE or FALSE
#' @export
#'
#' @examples
#' \dontrun{
#' test <- matrix(c(1,2,3,4),nrow=2,ncol=2,dimnames=list(1:2,c("year","Catch")))
#' print(test)
#' iscol("year",test)
#' iscol("Catch",test)
#' iscol("catch",test)
#' iscol("ages",test)
#' }
iscol <- function(incol="year",inmat) { # incol="ages"; inmat=dat
if (length(grep(incol,colnames(inmat))) < 1) return(FALSE)
else return(TRUE)
}
#' @title makecategorical - converts given variables into categorical factors
#'
#' @description Given a list of variables, as character strings, this function
#' converts each variable into a factor after checking no mistake has been
#' made with the spelling of the variable name. A copy of the data is made
#' so that the non factored variables can be retained.
#' @param labelModel is the set of variables from the data.frame that are to
#' be converted into categorical factors.
#' @param indat is the data.frame that is to be analysed in the standardization
#' @return a copy of the database with the selected variables converted into
#' factors
#' @export makecategorical
#' @examples
#' \dontrun{
#' data(sps)
#' labelM <- c("Year","Vessel","Month")
#' sps1 <- makecategorical(labelM,sps)
#' }
makecategorical <- function(labelModel,indat) {
Interact <- grep(":",labelModel)
nInteract <- length(Interact)
numvars <- length(labelModel) - nInteract
for (fac in 1:numvars) {
if (length(indat[,labelModel[fac]]) > 0) {
indat[,labelModel[fac]] <- factor(indat[,labelModel[fac]])
} else { warning(paste0("Factor name ",labelModel[fac],
"does not appear in data.frame"))
}
}
return(indat)
} # end of makecategorical
#' @title makedatasum generates a table summarizing information about data
#'
#' @description makedatasum uses the two input data.frames to generates a
#' table summarizing information about data, this includes the Total
#' Catch of the species, number of records used, the catch used in the
#' analyses, the number of Vessels, the GeoMean, the optimum standardized
#' mean estimates, their StDev, the total catch <30Kg, and the
#' proportion of catch <30Kg.
#'
#' @param InData the unselected species data, generally sps
#' @param InData1 the data.frame after selection has been applied, sps1
#' @param out the output from standLM or dosingle, usually out
#'
#' @return a matrix of each summary variable by year
#' @export
#'
#' @examples
#' \dontrun{
#' data(sps)
#' sps$CE <- NA
#' sps$LnCE <- NA
#' pick <- which((sps$catch_kg > 0) & (sps$Effort > 0))
#' sps$CE[pick] <- sps$catch_kg[pick]/sps$Effort[pick]
#' sps$LnCE[pick] <- log(sps$CE[pick])
#' answer <- selectdata(sps,dependent="LnCE",Ldepth=0,Udepth=600,
#' startyear=1986,finalyear=2015,zones=c(10,20,30),method=c("TW","TDO"),
#' fishery="SET")
#' sps1 <- answer$sps1
#' makedatasum(sps,sps1)
#' }
makedatasum <- function(InData, InData1,out) { # InData=sps; InData1=sps1
years <- sort(unique(InData1$Year))
if (is.factor(InData1$Year)) years <- as.numeric(levels(years))
numrow <- length(years) # Number of years of observations in sps
firsty <- min(years)
lasty <- max(years)
pick <- which((InData$Year >= firsty) & (InData$Year <= lasty))
if (length(pick) > 0) { InData <- droplevels(InData[pick,]) }
# Set up matrix for data characterization
cols <- c("Total","N", "Catch", "Vess","GeoM","Opt",
"StDev","C<30Kg","P<30Kg")
DataC <- matrix(nrow=numrow, ncol=length(cols),dimnames = list(years, cols))
DataC[,1] <- tapply(InData$catch_kg,InData$Year,sum,na.rm=T)/1000.0 # Total Catch
DataC[,2] <- tapply(InData1$catch_kg,InData1$Year,length) # Number of records used
for (n in 1:numrow) { # Number of vessels in sps1
DataC[n,4] <- length(unique(InData1[InData1$Year==years[n],]$Vessel))
}
means <- tapply(InData1$LnCE,InData1$Year,mean,na.rm=T)
stdev <- tapply(InData1$LnCE,InData1$Year,sd,na.rm=T)
DataC[,5] <- exp(means + (stdev*stdev)/2)
DataC[,3] <-tapply(InData1$catch_kg,InData1$Year,sum,na.rm=T)/1000.0
pick <- which(InData1$catch_kg <= 30)
tmpdat <- InData1[pick,]
tmpdatout <- tapply(tmpdat$catch_kg,tmpdat$Year,sum,na.rm=T)/1000.0
pickrec <- match(as.numeric(names(tmpdatout)),as.numeric(rownames(DataC)))
DataC[pickrec,8] <- tmpdatout
DataC[,9] <- DataC[,8]/DataC[,3]
DataC[,6:7] <- cbind(out$Results[,out$Optimum],out$StErr[,out$Optimum])
return(DataC)
} # End of makedatasum
#' @title makeLabel Convert a vector of numbers or strings into a single label
#'
#' @description makeLabel Convert a vector of numbers of strings into a single
#' label
#' @param invect the vector of numbers or strings to be converted into a
#' single string.
#' @param insep defaults to '_' but can be any selected character used to
#' separate each value
#' @return a character string containing the invect as a single string.
#' @export makeLabel
#' @examples
#' x <- c(1,2,3,4,5)
#' makeLabel(x)
#' makeLabel(x,"-")
makeLabel <- function(invect,insep="_") {
invect <- as.character(invect)
invect <- invect[nchar(invect) > 0]
nlab <- length(invect)
ans <- invect[1]
if (nlab > 1) for (i in 2:nlab) ans <- paste(ans, invect[i], sep=insep)
return(ans)
} # end of makeLabel
#' @title makemodels - makes a list of models with their labels
#'
#' @description Makes a list of models with their labels from an input vector
#' of names. Can be used to prepare the input models ready for standLM.
#' @param labelModel is the set of variables from the data.frame that are to
#' be included as sequentially added models. Thus is labelModel is set equal
#' to c('Year',"Vessel') the first model will be LnCE ~ Year and the second
#' will be LnCE ~ Year + Vessel. labelModel will also provide the headings
#' to the tables in the output list from standLnCE
#' @param dependent - the name of the dependent variable in your dataset;
#' defaults to 'LnCE'
#' @return a list of formula representing the set of models to be fitted
#' along with the labels for each model
#' @export makemodels
#' @examples
#' labelM <- c("Year","Vessel","Month")
#' makemodels(labelM)
makemodels <- function(labelModel,dependent="LnCE") {
numvars <- length(labelModel)
interterms <- grep(":",labelModel)
ninter <- length(interterms)
mods <- vector("list",(numvars+1))
form <- paste0(dependent," ~ ",labelModel[1])
mods[[1]] <- assign(paste0("ff",1),as.formula(form))
if (numvars > 1){
if (ninter > 0) {
for (i in 2:(numvars - ninter)) {
form <- paste0(form," + ",labelModel[i])
mods[[i]] <- assign(paste0("ff",i),as.formula(form))
}
for (i in interterms) {
interform <- paste0(form," + ",labelModel[i])
mods[[i]] <- assign(paste0("ff",i),as.formula(interform))
}
} else {
for (i in 2:numvars) {
form <- paste0(form," + ",labelModel[i])
mods[[i]] <- assign(paste0("ff",i),as.formula(form))
}
}
}
mods[[(numvars+1)]] <- labelModel
return(mods)
} # end of makemodels
#' @title properties - used to check a data.frame before standardization
#'
#' @description properties - used to check a data.frame before
#' standardization
#' @param indat the data.frame containing the data fields to be used
#' in the subsequent standardization. It tabulates the number of
#' NAs and the number of unique values for each variable and finds
#' the minimum and maximum of the numeric variables
#' @param dimout determines whether or noth the dimensions of the data.frame
#' are printed to the screen or not; defaults to FALSE
#' @return a data.frame with the rows being each variable from the input
#' input data.frame and the columns being the number of NAs, the
#' number of unique values, and minimum and maximum (where possible).
#' @export properties
#' @examples
#' \dontrun{
#' data(ab)
#' properties(ab)
#' }
properties <- function(indat,dimout=FALSE) {
if(dimout) print(dim(indat))
isna <- sapply(indat,function(x) sum(is.na(x)))
uniques <- sapply(indat,function(x) length(unique(x)))
clas <- sapply(indat,class)
numbers <- c("integer","numeric")
pick <- which(clas %in% numbers)
minimum <- numeric(length(uniques))
maximum <- minimum
minimum[pick] <- sapply(indat[,pick],min,na.rm=TRUE)
maximum[pick] <- sapply(indat[,pick],max,na.rm=TRUE)
index <- 1:length(isna)
props <- as.data.frame(cbind(index,isna,uniques,clas,minimum,
maximum,t(indat[1,])))
colnames(props) <- c("Index","isNA","Unique","Class","Min",
"Max","Example")
return(props)
} # end of properties
#' @title quants used in apply to count the number > 1 in a vector
#'
#' @description quants used in apply to count the number > 1 in a vector
#' designed to be used in apply
#' @param invect vector of values
#' @return a vector of the c(0.025,0.05,0.5,0.95,0.975) quantiles
#' @export
#' @examples
#' x <- rnorm(100,mean=5,sd=1)
#' quants(x)
#' y <- matrix(x,nrow=10,ncol=10)
#' apply(y,2,quants)
quants <- function(invect) {
ans <- quantile(invect,probs = c(0.025,0.05,0.5,0.95,0.975),na.rm=T)
return(ans)
}
#' @title saveresults sends the tables generated to a text file in resultdir
#'
#' @description savereults saves all the tables generated to a text file
#' named after the species and fishery and puts it into resultdir. The
#' outputs include for all data relating to the species: catch-by-method,
#' catch-by-zone, catch-by-fishery, then only for the data selected for
#' the standardization, the: catch-by-method, catch-by-zone, and the
#' catch-by-DepCat, but also the number of observations modified by each
#' of the selction criteria, the data summary, and the raw output from the
#' standardizatio. Customized for use with SESSF data-sets
#'
#' @param sps the data.frame containing all data relating to the species
#' @param answer the list obtained from 'selectdata' containing the data.frame
#' of the selected data, and the nobs table containing how different
#' selection criteria affect the number of observations.
#' @param out the output from standLM or dosingle containing the
#' standardization
#' @param mods the mods defined for the standardization
#' @param splabel the name of the species and fishery
#' @param resdir the path to the results directory
#'
#' @return nothing, but it does write a text file to the results directory.
#' @export
#'
#' @examples
#' \dontrun{
#' print("this will take some doing")
#' }
saveresults <- function(sps,answer,out,mods,splabel,resdir) {
cmeth <- tapply(sps$catch_kg,list(sps$Year,sps$Method),sum,na.rm=T)/1000
czone <- tapply(sps$catch_kg,list(sps$Year,sps$Zone),sum,na.rm=T)/1000
cfishery <- tapply(sps$catch_kg,list(sps$Year,sps$Fishery),sum,na.rm=T)/1000
sps1 <- answer$sps1
nobs <- answer$nobs
LimCfishery <- tapply(sps1$catch_kg,list(sps1$Year,sps1$Fishery),sum,na.rm=T)/1000
LimCZone <- tapply(sps1$catch_kg,list(sps1$Year,sps1$Zone),sum,na.rm=T)/1000
LimCDep <- tapply(sps1$catch_kg,list(sps1$Year,sps1$DepCat),sum,na.rm=T)/1000
datasum <- makedatasum(sps,sps1,out)
fileout <- paste(resdir,splabel,".txt",sep="")
if (file.exists(fileout)) file.remove(fileout)
sink(fileout)
print(splabel,quote=F)
cat("\n")
print(nobs)
cat("\n")
print("Models Used",quote=F)
cat("\n")
print(mods)
cat("\n")
print("Fishery Summary",quote=F)
cat("\n")
print(datasum)
cat("\n")
print("Catch by Year and Method for All Data",quote=F)
cat("\n")
print(cmeth)
cat("\n")
print("Catch by Year and Zone All data",quote=F)
cat("\n")
print(czone)
cat("\n")
print("Catch by Year and by Fishery All data",quote=F)
cat("\n")
print(cfishery)
cat("\n")
print("Catch by Year and Fishery Selected data",quote=F)
cat("\n")
print(LimCfishery)
cat("\n")
print("Catch by Year and by Zone, selected data",quote=F)
cat("\n")
print(LimCZone)
cat("\n")
print("Catch by Year and by DepCat, selected data",quote=F)
cat("\n")
print(LimCDep)
cat("\n")
print("Standardization Output",quote=FALSE)
cat("\n")
for (i in 1:7){
print(names(out)[i],quote=FALSE)
cat("\n")
print(out[[i]])
cat("\n")
}
sink()
return(invisible(datasum))
} # end of saveresults
#' @title scaleCE - Function to scale a vector of CPUE to a mean of one of avCE
#'
#' @description Function to scale a vector of CPUE to a mean of one or avCE.
#' The use of a mean of one means that visual comparisons between different
#' time-series becomes visually simplified. The avCE option could be used
#' to scale the CPUE to the average geometric mean - so as to put it on
#' the nominal scale
#' @param invect a vector of linear scale CPUE
#' @param avCE defaults to one but can be set to a particular value
#' @return the time-series of CPUE re-scaled to a mean of one or avCE
#' @export scaleCE
#' @examples
#' ce <- c(0.4667187,1.2628564,0.8442146,0.9813531, 0.5554076,0.7426321)
#' scaleCE(ce)
#' scaleCE(ce,100.0)
scaleCE <- function(invect,avCE=1.0) {
average <- mean(invect,na.rm=TRUE)
ans <- avCE * (invect/average)
return(ans)
} # end of scaleCE
#' @title selectdata - select records from the sessf data
#'
#' @description selectdata - select records from the sessf data based upon
#' whether ther edependent variable is empty or not, depth range, years,
#' SESSF zones, fishing method, and fishery. If any are omitted then
#' all records will be used
#'
#' @param indat the data set to be filtered - usually sps
#' @param dependent - the dependent variable in the standardization,
#' defaults to LnCE
#' @param Ldepth the lower depth bound - defaults to 0
#' @param Udepth the upper depth bound, defaults to 1000
#' @param startyear the first year for inclusion, defaults to 0, which
#' implies the use of all years present
#' @param finalyear the final year for inclusion, defaults to 0
#' @param inzones the sessf zones to be considered, defaults to NA,
#' which implies either ORzone or sharkzone will be used
#' @param ORzone if the species uses the Orange Roughy zones rather than the
#' SESSF zones then use this input variable; defaults to NA
#' @param sharkzone if the species uses the Shark Regions rather than the
#' SESSF zones then use this input variable; defaults to NA
#' @param method the fishing method to be considered, defaults to NA,
#' which implies all methods to be included
#' @param fishery the three letter acronym for eahc fishery to be
#' considered, defaults to NA, which means include all.
#'
#' @return a list made up of the filtered data set named 'sps1', and
#' a matrix containing the effect of the filtering on the numbers of
#' observations; named 'nobs'.
#' @export selectdata
#'
#' @examples
#' \dontrun{
#' data(sps)
#' sort(unique(sps$Year))
#' print(range(sps$Depth))
#' sps$CE <- NA
#' sps$LnCE <- NA
#' pick <- which((sps$catch_kg > 0) & (sps$Effort > 0))
#' sps$CE[pick] <- sps$catch_kg[pick]/sps$Effort[pick]
#' sps$LnCE[pick] <- log(sps$CE[pick])
#' out <- selectdata(sps)
#' sps1 <- out$sps1
#' nobs <- out$nobs
#' nobs
#' }
selectdata <- function(indat,dependent="LnCE",Ldepth=0,Udepth=1000,
startyear=0,finalyear=3000,inzones=NA,ORzone=NA,
sharkzone=NA,method=NA,fishery=NA) {
nobs <- matrix(0,nrow=4,ncol=7)
colnames(nobs) <- c("Total","NoCE","Depth","Years","Zones","Method","Fishery")
rownames(nobs) <- c("Records","Difference","Catch","Difference")
nobs[1,"Total"] <- dim(indat)[1]
nobs[3,"Total"] <- sum(indat$catch_kg,na.rm=TRUE)/1000
if (!(dependent %in% colnames(indat)))
stop(paste0(dependent," not defined in the input data.frame"))
if (dependent == "LnCE") {
empty <- which((is.na(indat[,dependent])))
}
if (dependent == "catch_kg") {
empty <- which((is.na(indat[,dependent])) | (indat[,dependent] <= 0))
}
if (length(empty) > 0) {sps1 <- indat[-empty,] } else { sps1 <- indat }
nobs[1,"NoCE"] <- dim(sps1)[1]
nobs[3,"NoCE"] <- sum(sps1$catch_kg,na.rm=TRUE)/1000
pick <- which((sps1$Depth >= Ldepth) & (sps1$Depth <= Udepth))
if (length(pick) > 0) {sps1 <- sps1[pick,] }
nobs[1,"Depth"] <- dim(sps1)[1]
nobs[3,"Depth"] <- sum(sps1$catch_kg,na.rm=TRUE)/1000
if ((startyear > 0) & (finalyear > 0)) {
pick <- which((sps1$Year >= startyear) & (sps1$Year <= finalyear))
if (length(pick) > 0) sps1 <- sps1[pick,]
}
nobs[1,"Years"] <- dim(sps1)[1]
nobs[3,"Years"] <- sum(sps1$catch_kg,na.rm=TRUE)/1000
if (!is.na(inzones)[1]) {
pick <- which(sps1$Zone %in% inzones)
if (length(pick) > 0) sps1 <- sps1[pick,]
}
if (!is.na(ORzone)[1]) {
pick <- which(sps1$ORzone %in% ORzone)
if (length(pick) > 0) sps1 <- sps1[pick,]
}
if (!is.na(sharkzone)[1]) {
pick <- which(sps1$SharkRegion %in% sharkzone)
if (length(pick) > 0) sps1 <- sps1[pick,]
}
nobs[1,"Zones"] <- dim(sps1)[1]
nobs[3,"Zones"] <- sum(sps1$catch_kg,na.rm=TRUE)/1000
if (!is.na(method)[1]) {
pick <- which(sps1$Method %in% method)
if (length(pick) > 0) sps1 <- sps1[pick,]
}
nobs[1,"Method"] <- dim(sps1)[1]
nobs[3,"Method"] <- sum(sps1$catch_kg,na.rm=TRUE)/1000
if (!is.na(fishery)[1]) {
pick <- which(sps1$Fishery %in% fishery)
if (length(pick) > 0) sps1 <- sps1[pick,]
}
sps1 <- droplevels(sps1)
nobs[1,"Fishery"] <- dim(sps1)[1]
nobs[3,"Fishery"] <- sum(sps1$catch_kg,na.rm=TRUE)/1000
nobs[2,1] <- 0
nobs[2,2:7] <- nobs[1,1:6] - nobs[1,2:7]
nobs[4,1] <- 0
nobs[4,2:7] <- nobs[3,1:6] - nobs[3,2:7]
answer <- list(sps1=sps1,nobs=nobs)
return(answer)
} # end of selectdata
#' @title standLM Conduct a standarization on log-transformed CPUE using lm
#'
#' @description Conduct a standarization on log-transformed CPUE data using lm.
#' A number of predefined models are entered (use makemodels) with the raw
#' data and a label. The optimum model is taken to be the one with the
#' smallest AIC, though usually (not always) this is the same as with the
#' largest adj-r2.
#' @param inmods is the list of models to be analysed along with their
#' respective labels - can be generated using makemodels
#' @param indat is the data.frame containing the raw data, which must contain
#' columns with the same names as the factors being included in the models.
#' @param inlab is an optional label that is added to text and plotted outputs,
#' which defaults to the empty string
#' @param console defaults to TRUE; if TRUE write each model to the screen
#' as each model runs, which can mess up auto-documenting documents.
#' @return a list containing the standardization of class 'CEout'
#' @export standLM
#' @examples
#' \dontrun{
#' data(ab)
#' splabel = "Abalone"
#' print(tapply(ab$catch,list(ab$year,ab$block),sum,na.rm=TRUE)/1000)
#' labelM <- c("year","diverID","month","block","boatID")
#' ab1 <- makecategorical(labelM,ab)
#' mods <- makemodels(labelM)
#' out <- standLM(mods,ab1,splabel)
#' round(out$Results,4)
#' round(out$WhichM,4)
#' plotstand(out,bars=TRUE)
#' }
standLM <- function(inmods,indat,inlab="",console=TRUE){ #inmods=mods;indat=sps2;inlab=""; console=TRUE
NModels <- length(inmods)
labelM <- inmods[[NModels]]
NModels <- NModels - 1
ans <- vector("list",NModels)
names(ans) <- labelM
Yearnames <- levels(as.factor(indat[,labelM[1]]))
Nyrs <- length(Yearnames)
rows <- c("AIC","RSS", "MSS","Nobs", "Npars","adj_r2","%Change")
WhichM <- matrix(nrow=length(rows),ncol=NModels,dimnames=list(rows,labelM))
rows <- Yearnames
Results <- matrix(nrow=Nyrs,ncol=NModels,dimnames = list(rows,labelM))
ResStErr <- matrix(nrow=Nyrs,ncol=NModels,dimnames = list(rows,labelM))
modellist <- vector("list",NModels)
names(modellist) <- inlab
geomod <- inmods[[1]] # the Year term
model <- lm(geomod,data=indat) # used to estimate the total sum of squared
totalssq <- sum(anova(model)[2]) # used in the %variation estimates
for (index in 1:NModels) { # index <- 1
if (console) cat(as.character(inmods[[index]]),"\n")
model <- lm(inmods[[index]],data=indat)
modellist[[index]] <- model
modelsum <- summary(model)
mat <- modelsum$coefficients
ans <- getfact(mat,labelM[1])
Results[,index] <- scaleCE(ans[,"Coeff"])
ResStErr[,index] <- ans[,"SE"]
anv <- anova(model)
RSS <- tail(anv$"Sum Sq",1)
WhichM["RSS",index] <- RSS
WhichM["MSS",index] <- totalssq-RSS # Model SS
df <- unlist(anv[1])
ndf <- length(df)
nobs <- sum(df) + 1
npars <- sum(df[1:(ndf-1)]) + 1
WhichM["AIC",index] <- nobs * log(RSS/nobs) + (2.0 * npars)
WhichM["Nobs",index] <- nobs
WhichM["Npars",index] <- npars #npars
WhichM["adj_r2",index] <- 100.0 * modelsum$adj.r.squared # adj_r2
}
for (index in 2:NModels) { # calculate the %Change
WhichM["%Change",index] <- WhichM["adj_r2",index]-WhichM["adj_r2",index-1]
}
WhichM["%Change",1] <- 0.0
pickinter <- grep(":",labelM)
if (length(pickinter) > 0) {
lastsimple <- pickinter[1] - 1
WhichM["%Change",pickinter] <- WhichM["adj_r2",pickinter] - WhichM["adj_r2",lastsimple]
}
optimum <- which.max(WhichM["adj_r2",])
altopt <- which.min(WhichM["AIC",])
if (optimum != altopt) {
label <- "adj_r2 gives a different optimum to AIC! AIC being used as optimum."
warning(cat("\n",label,"\n\n"))
optimum <- altopt
}
msg <- paste("Optimum model ",inmods[optimum],sep="")
if (console) print(msg,quote=F)
if (NModels >= 3) {
count <- 0
for (i in 3:NModels) { # i <- NModels
if (WhichM["%Change",i] > WhichM["%Change",(i-1)]) count <- count + 1
}
if ((count > 0) & (console)) {
print("Models need re-ordering with the recommended order being:",quote=F)
msg <- c(labelM[1],labelM[order(WhichM["%Change",2:NModels],decreasing=TRUE)+1])
print(msg,quote=F)
}
}
out <- list(Results,ResStErr,WhichM,optimum,Nyrs,inlab,
summary(modellist[[optimum]]),modellist[[optimum]],modellist)
names(out) <- c("Results","StErr","WhichM","Optimum","Nyrs","Label",
"Parameters","optModel","Models")
class(out) <- "CEout"
return(out)
} # end of standLM
#' @title summary.CEout - S3 function to print standardization results
#'
#' @description summary.CEout An S3 function to print out the primary results
#' from a standardization. It includes the structure of the output soother
#' things can be obtained if wished.
#' @param x is the output from a standardization using either StandLM.
#' It must be of class: CEout - an S3 class
#' @return Summarizes the contents of the output from standardization, this
#' includes
#' \describe{
#' \item{Structure}{this lists the top-level structure of the object output
#' from the standardizations from standLM}
#' \item{Parameters by Model}{This tabulated the yearly parameters for each
#' of the models tested - the 'mods' input to standLM}
#' \item{Std Errors by Model}{The matching standard errors for each of the
#' models, one column for each column of paramters; large sample sizes
#' mean these are usually an significant under-estimate of the
#' true variaiton}
#' \item{Model Diagnostic Properties}{A table of the statistical performance
#' of each model, allowing the identification of the optimum model}
#' \item{Optimum model}{identifies which column relates to the optimum
#' statistical model}
#' \item{All parameters}{The un-back-transformed parameters for the complete
#' optimum model}
#' }
#' @exportMethod summary.CEout
#' @examples
#' \dontrun{
#' data(sps)
#' splabel = "Blue-Eye"
#' labelM <- c("Year","Vessel","Month")
#' sps1 <- makecategorical(labelM,sps)
#' mods <- makemodels(labelM)
#' out <- standLM(mods,sps1,splabel)
#' summary(out)
#' }
summary.CEout <- function(x) {
cat("Structure of Analytical Output \n")
str(x,max.level=1)
cat("\n")
cat("Parameters by Model \n")
print(x$Results)
cat("\n")
cat("Std Errors by Model \n")
print(x$StErr)
cat("\n")
cat("Model Diagnostic Properties \n")
print(round(x$WhichM,3))
cat("\n")
cat("Optimum Model \n")
print(x$Optimum)
cat("\n")
cat("All Un-backtransformed parameters from the optimum model \n")
print(x$Parameters$coefficients)
} # end of summary.CEout
#' @title toXL copies a data.frame or matrix to the clipboard
#'
#' @description toXL copies a data.frame or matrix to the clipboard
#' so one can then switch to Excel and just type <ctrl> + V to paste the
#' data in place
#'
#' @param x a vector or matrix
#' @param output a boolean determining whether to print the object to the
#' screen as well as the clipboard; defaults to TRUE
#' @return Places the object 'x' into the clipboard ready for pasting
#' @export
#' @examples
#' datamatrix <- matrix(data=rnorm(100),nrow=10,ncol=10)
#' colnames(datamatrix) <- paste0("A",1:10)
#' rownames(datamatrix) <- paste0("B",1:10)
#' toXL(datamatrix,output=TRUE)
toXL <- function(x,output=TRUE) {
write.table(x,"clipboard",sep="\t")
if(output) print(x)
}
#' @title turnover Estimate turnover of vessels from catch by vessel by year data
#'
#' @description Estimate turnover of vessels from catch by vessel by year data;
#' To specify the minimum number of years that a vessel needs stay in the fishery,
#' then give a value to the variable minyrs.
#' @param x A matrix of a continuous numeric property by year,
#' the original usage was to plot catch-by-vessel against year
#' @param minyrs limits the analysis to those vessels that remain in the
#' fishery for at least minyrs years - which would eliminate the occasional
#' opportunistic fisher who only fishes for one or two years, or whatever
#' minimum is selected. Vessels with zero catches are not included in case
#' zeros and NAs are counted as starting and leaving the fishery.
#' @return a matrix of years by Continue, Leave, Start, Total
#' @export turnover
#' @examples
#' \dontrun{
#' library(r4cpue)
#' data(sps)
#' cbv <- tapply(sps$catch_kg,list(sps$Vessel,sps$Year),sum,na.rm=TRUE)/1000
#' dim(cbv)
#' early <- rowSums(cbv[,1:6],na.rm=TRUE)
#' late <- rowSums(cbv[,7:14],na.rm=TRUE)
#' cbv1 <- cbv[order(late,-early),]
#' plotprep(width=7,height=6)
#' yearBubble(cbv1,ylabel="Catch by Trawl",vline=2006.5,diam=0.2)
#' turnover(cbv)
#' }
turnover <- function(x,minyrs=1) {
years <- as.numeric(colnames(x))
ny <- length(years)
count <- apply(x,1,countgtzero)
pick <- which(count == 0)
if (length(pick) > 0) {
warning("Some Rows only have zeros or NAs")
x <- x[-pick,]
}
pick <- which(count > (minyrs - 1))
if (length(pick) > 0) x <- x[pick,]
columns <- c("Continue","Leave","Start","Total")
turnover <- matrix(0,nrow=ny,ncol=length(columns),
dimnames=list(years,columns))
turnover[1,1] <- length(which(x[,1] > 0))
for (yr in 2:ny) {
pair <- x[,(yr-1):yr]
pickC <- which((pair[,1] > 0) & (pair[,2] > 0))
pickL <- which((pair[,1] > 0) & ((is.na(pair[,2])) |
(pair[,2] < 0.001)))
pickS <- which(((is.na(pair[,1])) | (pair[,2] < 0.001)) &
(pair[,2] > 0))
turnover[yr,1:3] <- c(length(pickC),length(pickL),length(pickS))
}
turnover[,4] <- turnover[,1] + turnover[,3]
return(turnover)
} # end of turnover
#' @title yearNA - counts NAs per year in each numeric field in a data.frame
#'
#' @description yearNA - counts the number of NAs in each year of each numeric
#' field in a data.frame and outputs the results as a matrix
#' @param indat the data.frame whose numeric fields are to be considered
#' @param years identifies the name of the "Year" field in the data.frame
#' @param empty logical default=FALSE, determines whether columns which have
#' no NA present are printed or not
#' @return a matrix of years x numeric fields with the number of records per
#' field per year as reference
#' @export yearNA
#' @examples
#' year <- sort(rep(1990:1994,5))
#' columns <- c("year","Var1","Var2","Var3")
#' dat <- matrix(runif(100),nrow=25,ncol=4,dimnames=list(year,columns))
#' dat[trunc(100*runif(20))] <- NA
#' dat[,1] <- year
#' print(dat)
#' yearNA(as.data.frame(dat),years="year")
yearNA <- function(indat,years="Year",empty=FALSE) {
records <- table(indat[,years])
nna <- function(x) sum(is.na(x))
columns <- colnames(indat)
clas <- sapply(indat, class)
numbers <- c("integer", "numeric")
pick <- which(clas %in% numbers)
num <- length(pick)
ans <- NULL
for (i in 1:num) {
ans <- cbind(ans,tapply(indat[,columns[pick[i]]],indat[,years],nna))
}
ans <- cbind(ans,records)
colnames(ans) <- c(columns[pick],"Records")
if (!empty) {
pick <- which(colSums(ans,na.rm=TRUE) > 0)
if (length(pick) > 0) ans <- ans[,pick]
}
return(ans)
} #end of yearNA
#' @title yearZero - examine listed variables for zeros and NAs
#'
#' @description yearZero - examines an input data.frame of matrix in the
#' variables listed in the parameter 'label' and counts the zeros,
#' NAs, and those greater than 0. It counts these relative to a 'Year' variable,
#' or any grouping variable identified using the 'years' parameter.
#' @param indat the data.frame or matrix to be examined
#' @param years the grouping variable for counts to be compared across,
#' defaults to 'Year'
#' @param label the collection of column names which are to be examined,
#' defaults to c('catch_kg','Effort'), which is suitable for SESSF
#' data sets.#'
#' @return A matrix of the variables in 'label' with a column for each of
#' Zeros, >0, and NAs, with a final column of the number of records
#' per grouping variable value.
#' @export yearZero
#'
#' @examples
#' insps <- matrix(runif(30),nrow=15,ncol=2)
#' insps[trunc(runif(7)*30)+1] <- 0
#' insps[trunc(runif(7)*30)+1] <- NA
#' insps <- cbind(sort(rep(2000:2004,3)),insps)
#' colnames(insps) <- c("Year","catch_kg","Effort")
#' print(round(insps,4))
#' yearZero(insps)
yearZero <- function(indat,years="Year",label=c("catch_kg","Effort")) { # indat <- sps
columns <- colnames(indat)
pick <- which(columns %in% label)
npick <- length(pick)
if (npick != length(label)) warning(paste0("Only ",columns[pick]," found"))
if (npick == 0) stop("None of the listed variable found; rename them")
records <- table(indat[,years])
nna <- function(x) sum(is.na(x)) # sums the logical outcomes
gtzero <- function(x) sum(x > 0,na.rm=TRUE)
ans <- NULL
for (i in 1:npick) {
gtz <- tapply(indat[,columns[pick[i]]],indat[,years],gtzero)
nas <- tapply(indat[,columns[pick[i]]],indat[,years],nna)
zeros <- records - gtz - nas
ans <- cbind(ans,zeros,gtz,nas)
}
ans <- cbind(ans,records)
tmplab <- sort(c(paste0(columns[pick],"NA"),paste0(columns[pick],"GT0"),paste0(columns[pick],"0")))
colnames(ans) <- c(tmplab,"Records")
return(ans)
} # end of yearZero
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.