R/mqmf_utils.r

Defines functions `%ni%` which.closest quants removeEmpty properties printV predfreq plotfishM penalty1 penalty0 outfit makelabel magnitude linter likeratio iscol halftable getvector gettime getsingle getseed getname getmax getmin freqMean facttonum countzeros countones countNAs countgtzero countgtone bracket aicbic

Documented in aicbic bracket countgtone countgtzero countNAs countones countzeros facttonum freqMean getmax getmin getname getseed getsingle gettime getvector halftable iscol likeratio linter magnitude makelabel outfit penalty0 penalty1 plotfishM predfreq printV properties quants removeEmpty which.closest

#' @title aicbic returns the AIC and BIC for a given model
#' 
#' @description aicbic calculates and returns the AIC and BIC using the
#'     standard definitions. It defaults to assuming that negative log-
#'     likelihoods have been used in the model fitting, but provides the
#'     option of having used SSQ (set nLL to FALSE). If using SSQ it 
#'     uses Burnham and Anderson's (2002) definition but sets BIC to NA. 
#'     aicbic can recognize the outputs from optim, nlm, and nlminb.
#'
#' @param model the optimum model fitted by either optim, nlm, or nlminb
#' @param dat the data set used in the modelling, or just n the number of
#'     observations; it can distinguish between them
#' @param nLL uses negative log-likelihood? default=TRUE
#'
#' @return a vector of four numbers, AIC first, then BIC, then negLL or 
#'     SSQ, depending on nLL, then number of parameters p
#' @export
#' 
#' @references Burnham, K.P. and D.R. Anderson (2002) \emph{Model Selection and 
#'     Inference. A Practical Information-Theoretic Approach.} Second Edition 
#'     Springer-Verlag, New York. 488 p.
#'
#' @examples
#' data(blackisland); bi <- blackisland
#' param <- c(Linf=170.0,K=0.3,sigma=4.0)
#' modelvb <- nlm(f=negNLL,p=param,funk=fabens,observed=bi$dl,indat=bi,
#'                initL="l1",delT="dt") # could have used the defaults
#' # Do not worry about the warning messages
#' aicbic(modelvb,blackisland)  # 588.3382 596.3846 291.1691   3
aicbic <- function(model,dat,nLL=TRUE) {  # model <- modelil; dat=bi
  if (inherits(dat,c("matrix","data.frame"))) {
    n <- nrow(dat)
  } else {
    n <- dat
    if (!inherits(n,"integer"))
      stop("input dat is neither the data used or the number of obs \n")
  }
    outputs <- c("value","minimum","objective")
  pars <- c("par","estimate","par")
  pick <- match(names(model),outputs)
  pickmeth <- pick[which(pick > 0)]
  LL <- model[[outputs[pickmeth]]]
  npar <- length(model[[pars[pickmeth]]])
  if (nLL) {
     aic <- -2.0*(-LL) + (2 * npar)
     bic <- -2.0*(-LL) + (log(n) * npar)
     out <- c(aic=aic,bic=bic,negLL=LL,p=npar)
  } else {
     aic <- n * (log(LL/n)) + 2.0 * npar
     bic <- NA
     out <- c(aic=aic,bic=bic,ssq=LL,p=npar)
  }
  return(out)
} # end of aicbic

#' @title bracket bounds a value on the x-axis and y-axis
#' 
#' @description bracket scans through a series of predicted values for
#'     the location of a target value of the y-axis and returns the two
#'     y values that bracket the target, perhaps finding the values 
#'     closest to 0.5 in a vector between 0 and 1. It also returns the
#'     x-axis values that gave rise to the two values bracketing the 
#'     target, and finally returns the target. For example, imagine 
#'     generating the proportion mature for a given length of fish using 
#'     an equation for which there was no analytical solution to what 
#'     the value of the L50 or the inter-quartile distance was. Bracket
#'     can find the two lengths that generate proportions just below 0.5
#'     and just above. It does not matter if, by chance, the target is
#'     one of those y-axis values. 
#'
#' @param x the target predicted value of interest
#' @param yaxis the predicted values reflecting the xaxis values 
#' @param xaxis the series of values used to generate the predicted 
#'     values
#'
#' @seealso linter
#'
#' @return a vector of 5 values, left, right, bottom, top and target
#' @export
#'
#' @examples
#'  L = seq(60,160,1)
#'  p=c(a=0.075,b=0.075,c=1.0,alpha=100)
#'  asym <- srug(p=p,sizeage=L) # Schnute and Richards unified growth curve
#'  L25 <- linter(bracket(0.25,asym,L)) 
#'  L50 <- linter(bracket(0.5,asym,L)) 
#'  L75 <- linter(bracket(0.75,asym,L)) 
#'  ans <- c(L25,L50,L75,L50-L25,L75-L50)
#'  {cat("   L25    L50      L75   L50-L25 L75-L50 \n")
#'  cat(round(ans,4),"\n")} 
bracket <- function(x,yaxis,xaxis) {
  pick <- which(yaxis < x) 
  bot <- max(pick)
  bottom<- yaxis[bot]
  top <- yaxis[bot+1]
  ans <- c(xaxis[bot], xaxis[bot+1],bottom, top,x )
  names(ans) <- c("left","right","bottom","top","target")
  return(ans)
} # end of bracket

#' @title countgtone used in apply to count the number > 1 in a vector
#'
#' @description countgtone used in the base function 'apply', or 'tapply' to 
#'     count the number > 1 in a vector
#' 
#' @param invect vector of values
#' 
#' @return the number of values in the vector > 1
#' @export
#' @examples
#' x <- matrix(trunc(runif(20)*10),nrow=4,ncol=5)
#' print(x)
#' apply(x,1,countgtone)
countgtone <- function(invect) {
  pick1 <- which(invect > 1.0)
  return(length(pick1))
} # end of countgtone

#' @title countgtzero used in apply to count numbers > zero in a vector
#'
#' @description countgtzero used in the base function 'apply', or 'tapply' to 
#'     count how many numbers are greater than zero in a vector
#'     
#' @param invect vector of values
#' @return An integer counting how many numbers are > 0
#' @export
#' @examples
#' x <- matrix(trunc(runif(20)*10),nrow=4,ncol=5)
#' print(x)
#' apply(x,1,countgtzero) # count by rows
#' apply(x,2,countgtzero) # count by columns
countgtzero <- function(invect) {
  pick <- which(invect > 0)
  return(length(pick))
}

#' @title countNAs used in apply to count the number of NAs in a vector
#'
#' @description countNAs used in the base function 'apply', or 'tapply' to count 
#'     the number of NAs in a vector
#' @param invect vector of values
#' @return A single value of zero or the number of NAs
#' @export
#' @examples
#' x <- matrix(trunc(runif(20)*10),nrow=4,ncol=5)
#' diag(x) <- NA
#' print(x)
#' apply(x,1,countNAs)
countNAs <- function(invect) {
  pick <- which(is.na(invect))
  return(length(pick))
}

#' @title countones used in apply to count the number of ones in a vector
#'
#' @description countones used in the base function 'apply', or 'tapply' to 
#'     count the number of ones in a vector
#' @param invect vector of values
#' @return A single value of zero or the number of ones
#' @export
#' @examples
#' x <- matrix(trunc(runif(20)*10),nrow=4,ncol=5)
#' print(x)
#' apply(x,1,countones)  # by rows
countones <- function(invect) {
   pick <- which(invect == 1)
   return(length(pick))
}

#' @title countzeros used in apply to count the number of zeros in a vector
#'
#' @description countzeros used in the base function 'apply', or 'tapply' to 
#'     count the number of zeros in a vector
#' @param invect vector of values
#' @return A single value of zero or the number of zeros
#' @export
#' @examples
#' x <- matrix(trunc(runif(20)*10),nrow=4,ncol=5)
#' print(x)
#' apply(x,1,countzeros) # count by rows
#' apply(x,2,countzeros) # count by columns
countzeros <- function(invect) {
   pick <- which(invect == 0.0)
   return(length(pick))
}
#' @title facttonum converts a vector of numeric factors into numbers
#'
#' @description facttonum converts a vector of numeric factors into numbers.
#'     If the factors are not numeric then the outcome will be a series of NA.
#'     It is up to you to apply this function only to numeric factors. A warning
#'     will be thrown if the resulting output vector contains NAs
#'
#' @param invect the vector of numeric factors to be converted back to numbers
#'
#' @return an output vector of numbers instead of the input factors
#' @export
#'
#' @examples
#'  DepCat <- as.factor(rep(seq(100,600,100),2))
#'  print(DepCat)
#'  5 * DepCat[3]    # throws an error, cannot multiply a factor
#'  as.numeric(DepCat) # ordinal values of the factors
#'  as.numeric(levels(DepCat)) #converts the levels not the replicates
#'  DepCat <- facttonum(DepCat)
#'  DepCat / 2.0     # now all values back to values
#'  x <- factor(letters) 
#'  facttonum(x)     # this would be silly, characters are not numbers
facttonum <- function(invect){
  if (inherits(invect,"factor")) {
    outvect <- suppressWarnings(as.numeric(levels(invect))[invect])
  }
  if (inherits(invect,"numeric")) outvect <- invect
  if (any(is.na(outvect)))
    warning("NAs produced, your input vector may have non-numbers present \n")
  return(outvect)
} # end of facttonum

#' @title freqMean calculates the mean and stdev of count data
#'
#' @description freqMean calculates the mean and stdev of count data
#'     it requires both the values and their associated counts and
#'     return a vector of two numbers.
#'
#' @param values the values for which there are counts
#' @param infreqs the counts for each of the values empty cells can be
#'     either 0 or NA
#'
#' @return a vector containing the mean and st.dev.
#' @export
#'
#' @examples
#' vals <- c(1,2,3,4,5)
#' counts <- c(3,NA,7,4,2)
#' freqMean(vals,counts)  # should give 3.125 and 1.258306
freqMean <- function(values,infreqs) {
   N <- length(values)
   if (N != length(infreqs)) {
      cat("vectors have different lengths \n")
      ans <- c(NA,NA)
      names(ans) <- c("mean","stdev")
   } else {
      nobs <- sum(infreqs,na.rm=TRUE)
      sumX <- sum(values * infreqs,na.rm=TRUE)
      av <- sumX/nobs
      if (nobs > 1) {
         sumX2 <- sum(values * values * infreqs,na.rm=TRUE)
         stdev <- sqrt((sumX2 - (sumX * sumX)/nobs)/(nobs-1))
      } else { stdev <- NA
      }
      ans <- c(av,stdev)
      names(ans) <- c("mean","stdev")
   }
   return(ans)
} # end of freq_Mean

#' @title getmin can be used to define the lower bound for a plot
#'
#' @description getmin generates a lower bound for a plot where it is unknown
#'     whether the minumum is less than zero of not. If less than 0 then
#'     multiplying by the default mult of 1.05 works well but if the outcome if
#'     > 0 then the multiplier is adjusted appropriately so that the minimum
#'     is slightly lower than the minimum of the data
#'
#' @param x the vector of data to be tested for its minimum
#' @param mult the multiplier for both ends, defaults to 1.05 (=0.95 if >0)
#'
#' @return a suitable lower bound for a plot if required
#' @export
#'
#' @examples
#' vect <- rnorm(10,mean=0,sd=2)
#' sort(vect)
#' getmin(vect)
#' getmin(vect,mult=1.0)  # finds actual minimum
getmin <- function(x,mult=1.05) {
   ymin <- min(x,na.rm=TRUE)
   if (ymin < 0) {
      ymin <- ymin * mult
   } else {
      ymin <- ymin * (2 - mult)
   }
   return(ymin)
} # end of getmin

#' @title getmax can be used to define the upper bound for a plot
#'
#' @description getmax generates an upper bound for a plot where it 
#'     is unknown whether the maximum is greater than zero of not. 
#'     If > 0 then multiplying by the default mult of 1.05 works well 
#'     but if the outcome if < 0 then the multiplier needs to be 
#'     adjusted appropriately so the maximum is slightly higher than 
#'     the maximum of the data
#'
#' @param x the vector of data to be tested for its maximum
#' @param mult the multiplier for both ends, defaults to 1.05 (=0.95 if < 0)
#'
#' @return a suitable upper bound for a plot if required
#' @export
#'
#' @examples
#'  vect <- rnorm(10,mean=0,sd=2)
#'  sort(vect,decreasing=TRUE)
#'  getmax(vect,mult=1.0)
#'  vect <- rnorm(10,mean = -5,sd = 1.5)
#'  sort(vect,decreasing=TRUE)
#'  getmax(vect,mult=1.0)
getmax <- function(x,mult=1.05) {
   ymax <- max(x,na.rm=TRUE)
   if (ymax > 0) {
      ymax <- ymax * mult
   } else {
      ymax <- ymax * (2 - mult)
   }
   return(ymax)
} # end of getmax

#' @title getname returns the name of a variable as character
#'
#' @description getname runs 'deparse(substitute(x))' to get the
#'     name of the input variable. Saves remembering the syntax
#'
#' @param x any variable whose name is wanted as a character string
#'
#' @return a character string with the name of input variable
#' @export
#'
#' @examples
#' a_variable <- c(1,2,3,4,5,6,7,8)
#' getname(a_variable)
getname <- function(x) {
   return((deparse(substitute(x))))
}

#' @title getseed generates a random number seed
#' 
#' @description getseed generates a seed for use within set.seed. 
#'     It produces up to a 7 digit integer from the Sys.time. 
#'     Initially, at the start of a session there is no seed; a new one 
#'     is created from the current time and the process ID when one is 
#'     first required. Here, in getseed, we do not use the process ID so 
#'     the process is not identical but this at least allows the 
#'     set.seed value to be stored should the need to repeat a set of 
#'     simulations arise. The process generates up to a seven digit number
#'     it then randomly reorders those digits and that becomes the seed.
#'     That way, if you were to call getseed in quick succession the
#'     seeds generated should differ even when they are generated close
#'     together in time.
#'
#' @return  an integer up to 7 digits long 
#' @export
#'
#' @examples
#' useseed <- getseed()
#' set.seed(useseed)
#' rnorm(5)
#' set.seed(12345)
#' rnorm(5)
#' set.seed(useseed)
#' rnorm(5)
getseed <- function() {
  pickseed <- as.character(as.integer(Sys.time()))
  nc <- nchar(pickseed)
  if (nc > 7) pickseed <- substr(pickseed,(nc-6),nc)
  nc <- nchar(pickseed)  
  pseed <- unlist(strsplit(pickseed,split=character(0)))
  pseed <- sample(pseed,nc)
  newseed <- paste(pseed,collapse="")
  newseed <- as.numeric(newseed)
  return(newseed)
} # end of getseed

#' @title getsingle extracts one number from an input line of characters
#'
#' @description getsingle splits up a text line and translates the 
#'     first non-empty character string into a number. This function can be
#'     useful when parsing an input data file.
#'
#' @param inline the line of text, usually taken after using readLines to read 
#'     in a text file
#' @param sep the separator used to divide the numbers from descriptive 
#'     text, defaults to a comma.
#'
#' @return a single number
#' @export
#'
#' @examples
#' x <- "12.3 , this is a number"
#' y <- "21.3 # 22.3 # here are two numbers"
#' getsingle(x)
#' getsingle(y,sep="#")
#' getsingle(y) # be sure to get the separator correct
getsingle <- function(inline,sep=",") {  # inline=dat[41]
  tmp <- unlist(strsplit(inline,sep))
  tmp <- gsub(" ","",tmp)
  tmp <- tmp[nchar(tmp) > 0]
  return(as.numeric(tmp[1]))
}

#' @title gettime calculates time in seconds passed each day
#' 
#' @description gettime is a function designed to facilitate the measurement
#'     of time between intervals within R software that are expected to
#'     take a maximum of hours. It calculates the time as seconds elapsed 
#'     from the start of each day. As long as the timing of events does not
#'     pass from one day to the next accurate results will be generated. To 
#'     measure the time taken one would store an initial value that would be 
#'     subtracted from a final value.
#'
#' @return the time in seconds from the start of a day
#' @export
#'
#' @examples
#'   begin <- gettime()
#'   for (i in 1:1e6) sqrt(i)
#'   finish <- gettime()
#'   print(finish - begin)
gettime <- function() {
  tim <- unlist(as.POSIXlt(Sys.time()))
  hr <- as.numeric(tim["hour"])*3600
  min <- as.numeric(tim["min"])*60
  sec <- as.numeric(tim["sec"])
  return(hr+min+sec)
} # end of gettime

#' @title getvector extracts a vector of numbers from a line of characters
#'
#' @description getvector, when reading in a csv file using readLines,
#'     getvector extracts a line of numbers from a specified line within
#'     the readLines object.This function works out how many numbers there
#'     are. If you wish to add a comment at the end of a vector of numbers
#'     it must be separated from them by the separator. e.g. a comma
#' @param indat the readLines object
#' @param locate the line number from which to extract the numbers
#' @param sep the separator between numbers, defaults to ","
#'
#' @return a vector of numbers
#' @export
#'
#' @examples
#' x <- "12.3, 15.1, 8.7,10.3,  # this is a vector of four numbers"
#' y <- "21.3 # 22.3 # 8.7 # 10.3 # here are another four numbers"
#' getvector(x)    # uses default separator
#' getvector(y,sep="#")
getvector <- function(indat,locate,sep=",") { 
  vect <- indat[locate]
  if (length(grep("\t",vect) > 0)) vect <- gsub("\t",sep,vect)
  vect <- unlist(strsplit(vect,sep))
  vect <- gsub(" ","",vect)
  vect1 <- vect
  vect <- suppressWarnings(as.numeric(vect))
  vect <- vect[nchar(vect) > 0]
  if (!any(vect1 == "NA")) {
    vect <- vect[!is.na(vect)]
  }
  return(vect)
}

#' @title halftable halves the height of a tall narrow data.frame for printing
#'
#' @description halftable would be used when printing a table using kable
#'     from knitr where one of the columns was Year. The objective would be to
#'     split the table in half taking the bottom half and attaching it on
#'     the right hand side of the top half. The year column would act as the
#'     index.
#'
#' @param inmat the data.frame to be subdivided
#' @param yearcol the column name of the year field default="year"
#' @param subdiv the number of times the data.frame should be subdivided;
#'     the default is 2 but the numbers can only be 2 or 3.
#'
#' @return a data.frame half the height and double the width of the original
#' @export
#'
#' @examples
#' x <- as.data.frame(matrix(runif(80),nrow=20,ncol=4))
#' x[,1] <- 1986:2005
#' x[,4] <- paste0("text",1:20)
#' halftable(x,yearcol="V1",subdiv=2)
#' halftable(x[,c(1,2,4)],yearcol="V1")
#' x1 <- rbind(x,x[1,])
#' x1[21,"V1"] <- 2006
#' halftable(x1,yearcol="V1",subdiv=3)
halftable <- function(inmat,yearcol="year",subdiv=2) {
   if (!(subdiv %in% c(2,3))) stop("\n subdiv must be 2 or 3 \n")
   numrow <- dim(inmat)[1]
   numcol <- dim(inmat)[2]
   extra <- rep(NA,numcol)
   if ((numrow %% subdiv) == 0) {
      newnr <- numrow/subdiv
      incomplete <- FALSE
   } else {
      newnr <- trunc(numrow/subdiv) + 1
      incomplete <- TRUE
   }
   # years <- inmat[,yearcol]
   first <- inmat[1:newnr,]
   if (subdiv == 2) {
      second <- inmat[-c(1:newnr),]
      diff <- (nrow(first) - nrow(second))
      if (diff > 0) {
         numcol <- ncol(inmat)
         third <- rbind(second,extra)
      } else {
         third <- second
      }
   } else {
      second <- inmat[c(newnr+1):c(2*newnr),]
      first <- cbind(first,second)
      third <- inmat[-c(1:(2*newnr)),]
      diff <- nrow(first) - nrow(third)
      if (diff > 0) third <- rbind(third,extra)
      if (diff > 1) third <- rbind(third,extra)
   }
   outmat <- cbind(first,third)
   rownames(outmat) <- 1:newnr
   return(outmat)
} # end of halftable

#' @title incol is a utility to determine if a column is present in a matrix
#'
#' @description incol is a utility to determine whether a named column is
#'     present in a given matrix or data.frame. Is case sensitive
#'
#' @param incol the name of the column; defaults to "year" as an example
#' @param inmat the matrix or data.frame within which to search for incol
#'
#' @return TRUE or FALSE
#' @export
#'
#' @examples
#' x <- 1:10
#' test <- matrix(x,nrow=5,ncol=2,dimnames=list(1:5,c("year","Catch")))
#' print(test)
#' iscol("year",test)
#' iscol("Catch",test)
#' iscol("catch",test)
#' iscol("ages",test)
iscol <- function(incol="year",inmat) {
  if (length(grep(incol,colnames(inmat))) < 1) return(FALSE)
  else return(TRUE)
}

#' @title likeratio conducts a likelihood ratio test
#' 
#' @description likeratio conducts a likelihood ratio test on two
#'     negative log-likelihoods. It produces the LR plus related 
#'     statistics detailing if a significant difference has been found.
#'     The order in which the log-likelihoods are entered does not 
#'     matter as that is checked for so a positive likelihood ratio
#'     is always generated.
#'
#' @param nLL1 the first -ve log-likelihood
#' @param nLL2 the second -ve log-likelihood 
#' @param df the number of degrees of freedom difference between the two
#'     model, a minimum of 1, which is the default.
#'
#' @return a vector of the likelihood ratio, the significance of any 
#'     difference, the minimum difference required for significance, and
#'     the degrees of freedom.
#' @export
#'
#' @examples
#'   one <- 291.1691
#'   two <- 277.0122
#'   dof <- 1
#'   round(likeratio(one,two,dof),8)  # LR = 28.3138
likeratio <- function(nLL1,nLL2,df=1) {
  if (!(nLL1 < nLL2)) {
    tmp <- nLL1
    nLL1 <- nLL2
    nLL2 <- tmp
  }
  dif <- -2.0 * (nLL1 - nLL2)
  signif <- dchisq(dif,df) 
  mindiff <- qchisq(0.95,df)
  ans <- c(LR=dif,P=signif,mindif=mindiff,df=df)
  return(ans)
} # end of likeratio

#' @title linter finds a value in a series using its location in another
#' 
#' @description linter is a tool for linearly interpolating in a 2-D
#'     cartesian space to search out an unknown value between two known
#'     points on the x-axis, based on a known value between two known 
#'     points on the y-axis. This might be answering the question of
#'     what would be the length at 50 percent maturity for a curve with no 
#'     analytical solution. We could find two points in a series of
#'     proportion values on the y-axis that bracketed the 50 percent value 
#'     using the function bracket. They would be associated with the two 
#'     length values on the x-axis used to generate the predicted 
#'     proportion values. If we assume the various points in the 2-D 
#'     space to be approximated by linear relations then the location 
#'     between the two known x-axis length values corresponding to the 
#'     L50 would have the same ratio as the 50 percent value has to the two
#'     points on the y-axis. See the example for details. The input
#'     arguments include five values, left, right, bottom, top, and 
#'     target. So, left and right are sequential values on the x-axis, 
#'     bottom and top are the corresponding sequential values on the 
#'     y-axis, and target is the value we are looking for on the y-axis.
#'
#' @param pars a vector of 5 values, left, right, bottom, top and target
#'     
#' @seealso bracket
#'     
#' @return a single value being the x-axis value between left and right
#'     corresponding to the target on the x-axis 
#' @export
#'
#' @examples
#'  L = seq(60,160,1)
#'  p=c(a=0.075,b=0.075,c=1.0,alpha=100)
#'  asym <- srug(p=p,sizeage=L)
#'  L25 <- linter(bracket(0.25,asym,L)) 
#'  L50 <- linter(bracket(0.5,asym,L)) 
#'  L75 <- linter(bracket(0.75,asym,L)) 
#'  ans <- c(L25,L50,L75,L50-L25,L75-L50)
#'  {cat("   L25    L50      L75   L50-L25 L75-L50 \n")
#'  cat(round(ans,4),"\n")} 
linter <- function(pars) {
  # pars= "left","right","bottom","top","target"
  rge <- pars[4] - pars[3]
  tarrge <- pars[5] - pars[3]
  ratio <- tarrge/rge  
  deprge <- pars[2] - pars[1]
  ans <- pars[1] + ratio * deprge  
  names(ans) <- "target"
  return(ans)
} # end of linter

#' @title magnitude returns the magnitude of numbers in base 10
#'
#' @description magnitude is useful when using an
#'     optimizer such as optim, which uses a parscale parameter.
#'     magnitude can determine the respective parscale value for each
#'     parameter value.
#'
#' @param x the vector of numbers (parameters) whose magnitudes are
#'     needed
#'
#' @return a vector of magnitudes
#' @export
#'
#' @examples
#'   x <- c(0,0.03,0.3,3,30,300,3000)
#'   magnitude(x)
magnitude <- function(x) {
   return(10^(floor(log10(abs(x)))))
}

#' @title makelabel generates a label from text and values
#'
#' @description It is common to want a label with text and a series of
#'     values. But paste and paste0 cycles the text and the values. To
#'     avoid this makelabel first combines the values as text and then
#'     adds the input text to the front of the values
#'
#' @param txt the input text for the label
#' @param vect the series of values to be included in the label
#' @param sep the separator for the components; defaults to  _
#' @param sigdig how many significant digits for the values; default = 3
#'
#' @return a character string made up of text and values
#' @export
#'
#' @examples
#' pars <- c(18.3319532,33.7935124,3.0378107,6.0194465,0.5815360,0.4270468)
#' makelabel("Cohort1",pars[c(1,3,5)],sep="__")
#' makelabel("Cohort1",pars[c(1,3,5)],sep="__",sigdig=4)
makelabel <- function(txt,vect,sep="_",sigdig=3) {
   tmp <- NULL
   nnum <- length(vect)
   for (i in 1:nnum) tmp <- paste(tmp,round(vect[i],sigdig),sep=sep)
   label <- paste0(txt,tmp)
   return(label)
} # end of makelabel

#' @title outfit tidy print of output from optim, nlminb, or nlm
#'
#' @description outfit takes in the output list from either optim,
#'     nlminb, or nlm and prints it more tidily to the console, In the
#'     case of nlm it also prints the conclusion regarding the
#'     solution. It might be more effective to implement an S3 method.
#'
#' @param inopt the list object output by nlm, nlminb, or optim
#' @param backtran a logical default = TRUE If TRUE it assumes
#'     that the parameters have been log-transformed for stability
#'     and need back-transforming
#' @param digits the number of digits to round the backtransformed 
#'     parameters. defaults to 5.
#' @param title character string used to label the output if desired,
#'     default = empty character string
#' @param parnames default="" which means the estimated parameters
#'     will merely be numbered. If a vector of names is given 
#'     then this will be used instead, at least, for nlm and optim.
#'
#' @return nothing but it does print the list to the console tidily
#' @export
#'
#' @examples
#'  x <- 1:10  # generate power function data from c(2,2) + random
#'  y <- c(2.07,8.2,19.28,40.4,37.8,64.68,100.2,129.11,151.77,218.94)
#'  alldat <- cbind(x=x,y=y)
#'  pow <- function(pars,x) return(pars[1] * x ^ pars[2])
#'  ssq <- function(pars,indat) {
#'     return(sum((indat[,"y"] - pow(pars,indat[,"x"]))^2))
#'  }  # fit a power curve using normal random errors
#'  pars <- c(2,2)
#'  best <- nlm(f=ssq,p=pars,typsize=magnitude(pars),indat=alldat)
#'  outfit(best,backtran=FALSE) #a=1.3134, b=2.2029 ssq=571.5804
outfit <- function(inopt,backtran=TRUE,digits=5,title="",
                   parnames=""){
#  inopt=bestvB; backtran = FALSE; digits=5; title=""; parnames=""
   nlmcode <- c("gradient close to 0, probably solution",
                ">1 iterates in tolerance, probably solution",
                "Either ~local min or steptol too small",
                "iteration limit exceeded",
                "stepmax exceeded ,5 times")
   if (length(grep("value",names(inopt))) > 0) { # optim
      cat("optim solution: ", title,"\n")
      cat("minimum     : ",inopt$value,"\n")
      cat("iterations  : ",inopt$counts," iterations, gradient\n")
      cat("code        : ",inopt$convergence,"\n")
      if (backtran) {
        ans <- cbind(par=inopt$par,transpar=round(exp(inopt$par),digits))
       } else {
        ans <- t(inopt$par)
      }
      if ((length(parnames) > 1) & (length(parnames) == length(inopt$par))) {
         rownames(ans) <- parnames
       } else {
         rownames(ans) <- 1:length(inopt$par)
      }
      print(ans)
      cat("message     : ",inopt$message,"\n")
   } # end of optim
   if (length(grep("minimum",names(inopt))) > 0) {  # nlm - preferred
      cat("nlm solution: ", title,"\n")
      cat("minimum     : ",inopt$minimum,"\n")
      cat("iterations  : ",inopt$iterations,"\n")
      cat("code        : ",inopt$code,nlmcode[inopt$code],"\n")
      if (backtran) {
         ans <- cbind(par=inopt$estimate,gradient=inopt$gradient,
                      transpar=round(exp(inopt$estimate),digits))
         } else {
         ans <- cbind(par=inopt$estimate,gradient=inopt$gradient)
      }
      if ((length(parnames) > 1) & 
          (length(parnames) == length(inopt$estimate))) {
        rownames(ans) <- parnames
      } else {
        rownames(ans) <- 1:length(inopt$estimate)
      }
      print(ans)
   } # end of nlm
   if (length(grep("objective",names(inopt))) > 0) {
      cat("nlminb solution: ", title,"\n")   # nlminb seems to be deprecated
      cat("par        : ",inopt$par,"\n")
      cat("minimum    : ",inopt$objective,"\n")
      cat("iterations : ",inopt$iterations,"\n")
      cat("code       : ",inopt$evaluations," iterations, gradient\n")
      cat("message    : ",inopt$message,"\n")
   }
   if (length(grep("hessian",names(inopt))) > 0) {
      cat("hessian     : \n")
      print(inopt$hessian)
   }
} # end of outfit

#' @title penalty0 enables the adding of a large penalty as one approaches 0.0
#'
#' @description penalty0 allows for the option of adding a large penalty as
#'     a parameter approaches 0.0 . See negLL1 for example code that 
#'     contains such a parameter. For example, when
#'     fitting an spm sometimes the optimal mathematical model fit can occur
#'     by depressing the r value to 0 or even go negative. Input values 
#'     < 0.006 begin to generate large values as one goes smaller. The
#'     examples below illustrate this.
#'
#' @param x the parameter value that potentially incurs a penalty
#'
#' @return a single value as a penalty to be added to a Log-Likelihood or SSQ
#' @export
#'
#' @examples
#'   penalty0(0.5)
#'   penalty0(0.1)
#'   penalty0(0.01)
#'   penalty0(0.005)
penalty0 <- function(x){
  ans <- 100*exp(-1000*x)
  return(ans)
} # end of penalty0

#' @title penalty1 adds an increasingly large penalty as a value approaches 1.0
#'
#' @description penalty1 allows for the option of adding a large penalty as
#'     a parameter approaches 1.0 and moves to become larger than 1. For 
#'     example, when fitting a surplus production model sometimes the 
#'     optimal mathematical model fit can occur by implying catches greater
#'     than available biomass, implying harvest rates > 1.0. By adding a 
#'     large penalty to such values and adding those to the likelihood 
#'     such strange outcomes can be avoided. This will accept a single
#'     value or a vector.
#'
#' @param x the parameter value that potentially incurs a penalty
#'
#' @return a single value as a penalty to be added to a Log-Likelihood or SSQ
#' @export
#'
#' @examples
#'  x <- c(0.5,0.8,0.88,0.9,0.98,0.99,1.01)
#'  round(cbind(x,penalty1(x)),4)
penalty1 <- function(x){
  nl <- length(x)
  ans <- numeric(nl)
  pick <- which(x > 0.5)
  ans[pick] <- 100.0*(abs((1-abs(x[pick])-0.5))/0.5)^50
  return(ans)
} # end of penalty1

#' @title plotfishM plots the catch and optionally the cpue
#'
#' @description plotfishM uses a matrix of fishery data. It requires 
#'     the matrix or data.frame to contain the columns 'year', 
#'     'catch', and optionally 'cpue'.
#'
#' @param fish the matrix or data.frame containing year, catch, and cpue.
#' @param spsname the name of the species concerned
#' @param ce a logical parameter determining whether to plot the cpue or not.
#'     the default = TRUE
#' @param title determines whether or not the spsname is printed at the top
#'     of the plot. Default = TRUE but for a more formal publication it
#'     might need to be set to FALSE, which also reallocates the room
#'     given to the title to the plot.
#' @param fnt the font used in the plot and axes.
#' @param both plot both the catches and the CPUE
#' @param maxy a vector of two zeros. These define the maximum y-axis value
#'     for each plot. If it remains zero then getmax will be used to find
#'     the maximum.
#' @param year the name of the column containing the years, default="year"
#' @param catch the name of the column containing catches, default="catch"
#' @param cpue the name of the column containing cpue, default="cpue"
#'
#' @return plots a graph but returns nothing to the console
#' @export
#'
#' @examples
#' \dontrun{
#'   data(dataspm)
#'   plotfishM(fish=dataspm,spsname="Pink Ling",ce=TRUE)
#' }
plotfishM <- function(fish,spsname="",ce=TRUE,title=TRUE,fnt=7,both=TRUE,
                      maxy=c(0,0),year="year",catch="catch",cpue="cpue") {
  colnames(fish) <- tolower(colnames(fish))
  rows <- 1
  if (ce) rows <- 2
  yrs <- fish[,year]
  oldpar <- par(no.readonly=TRUE)
  on.exit(par(oldpar))
  par(mfrow=c(rows,1),mai=c(0.5,0.45,0.025,0.05))
  if (title) par(oma=c(0.0,0,1.0,0.0))
      else   par(oma=c(0.0,0,0.0,0.0))
  par(cex=0.75, mgp=c(1.35,0.35,0), font.axis=fnt,font=fnt,font.lab=fnt)
  ymax <- maxy[1]
  if (maxy[1] == 0) ymax <- getmax(fish[,catch])
  plot(yrs,fish[,catch],type="l",lwd=2,ylab="Catch",xlab="Year",
       ylim=c(0,ymax),yaxs="i",panel.first = grid())
  if (title)
    mtext(spsname,side=3,cex=1.0,line=0,font=fnt,outer=TRUE)
  if (ce) {
    pickI <- grep(cpue,colnames(fish))
    nce <- length(pickI)
    ymax <- maxy[2]
    if (maxy[2] == 0) ymax <- getmax(fish[,pickI])
    plot(yrs,fish[,pickI[1]],type="l",lwd=2,ylab="CPUE",xlab="Year",
         ylim=c(0,ymax),yaxs="i",panel.first = grid())
    if (both) points(yrs,fish[,pickI[1]],cex=1.0,pch=16)
    if (nce > 1) {
      for (i in 2:nce) {
        lines(yrs,fish[,pickI[i]],lwd=2,col=i)
        if (both) points(yrs,fish[,pickI[i]],col=i,cex=1.0,pch=16)
      }
    }
  }
} # end of plotfishM

#' @title predfreq is used for modal analysis of count data
#' 
#' @description predfreq is used to calculate the expected 
#'     frequencies (counts) from a series of Normal distributions
#'     that are combined to describe observed counts from a 
#'     sample of the size distribution of a population,
#'     which may, or may not be made up of a number of cohorts.
#'     When used with a negative log-likelihood from a
#'     multinomial this can lead to estimates of the mean and
#'     standard deviation of the size of each cohort. Two 
#'     approaches are possible. An approximate method that 
#'     calculates the likelihood at the center of each size-class
#'     and a more precise analytical method that subtracts the
#'     cumulative probability of the bottom bound from each size-
#'     class from the upper bound. Usually the two are very 
#'     close.
#'
#' @param pars a vector of parameters, with the means first,
#'     followed by the standard deviations, followed by the
#'     proportion of total observations in each cohort expect
#'     the last, which is obtained through subtraction. 
#' @param n the sum of counts observed from nature
#' @param sizecl a representation of the size-classes used, these 
#'     can be the mid-points of each size-class or the lower and
#'     upper bounds of each class.
#' @param midval if TRUE, the default, the approximate analytical
#'     approach will be used.
#'
#' @return a vector of expected frequencies
#' @export
#'
#' @examples
#' \dontrun{
#' mids <- seq(6,56,2) #size classes = 2 mm as in 5-7, 7-9,...
#' av <- c(18.0,34.5)   # the means
#' stdev <- c(2.75,5.5)  # the standard deviations
#' prop1 <- 0.55  # the proportion of observations in cohort 1
#' pars <-c(av,stdev,prop1)  # combine parameters into a vector
#' predf <- predfreq(pars,n=262,sizecl=mids,midval=TRUE)
#' oldpar <- par(no.readonly=TRUE)
#' plot1(mids,predf,xlab="Sizes",ylab="Predicted Frequency",lwd=2)
#' par(oldpar)
#' }
predfreq <- function(pars,n,sizecl,midval=TRUE) { 
  cw <- sizecl[2] - sizecl[1]
  c <- (length(pars) + 1)/3 # number of normal cohorts
  props <- tail(pars,(c-1)) # input c-1 proportions, 
  props <- c(props,(1-sum(props))) #the last prop is difference
  nval <- length(sizecl)
  if (midval) { # use approximation
    freqs <- numeric(nval) #nval zero vector hold freqs
    for (i in 1:c) # step through the cohorts
      freqs <- freqs+(n*props[i]*cw)*dnorm(sizecl,pars[i],pars[i+c])
  } else { #use cumulative probablities
    freqs <- numeric(nval-1) #nval-1 zero vector to hold freqs
    for (i in 1:c) { # step through the cohorts  i=1
      tmpcump <- (n*props[i])*pnorm(sizecl,pars[i],pars[i+c])
      freqs <- freqs + (tmpcump[2:nval] - tmpcump[1:(nval-1)]) 
    } 
  } # end of if(midval) statement
  return(freqs)
} # end of predfreq

#' @title printV returns a vector cbinded to 1:length(invect)
#'
#' @description printV takes an input vector and generates another vector of
#'     numbers 1:length(invect) which it cbinds to itself. This is primarily
#'     useful when trying to print out a vector which can be clumsy to read when
#'     print across the screen. applying printV leads to a single vector being
#'     printed down the screen
#'
#' @param invect the input vector to be more easily visualized, this can be
#'     numbers, characters, or logical. If logical the TRUE and FALSE are
#'     converted to 1's and 0's
#' @param label the column labels for the vector, default is 'index' and 'value'
#'
#' @return a dataframe containing the vector 1:length(invect), and invect.
#' @export
#'
#' @examples
#' vec <- rnorm(10,mean=20,sd=2)
#' printV(vec)
#' vec <- letters
#' printV(vec)
#' vec <- c(TRUE,TRUE,TRUE,FALSE,FALSE,TRUE,TRUE,FALSE,FALSE,TRUE,TRUE)
#' printV(vec,label=c("index","logicstate"))
printV <- function(invect,label=c("index","value")) {
   n <- length(invect)
   outvect <- as.data.frame(cbind(1:n,invect))
   colnames(outvect) <- label
   return(outvect)
} # end of print_V

#' @title properties - used to examine the properties of a data.frame
#'
#' @description properties examines some of the properties of a data.frame 
#'     and outputs for each column an index for each variable, how many NAs 
#'     there are, how many unique values there are, the class of each variable,
#'     the minimum and maximum (of all numeric variables), and an example value.
#' @param indat the data.frame containing the columns to be examined.
#' @param dimout determines whether or not the dimensions of the data.frame
#'     are printed to the screen or not; defaults to FALSE
#'     
#' @return a data.frame with the rows being each variable from the 
#'     input data.frame and the columns being the number of NAs, the
#'     number of unique values, the class of variable, and minimum and maximum 
#'     (where the columns are numeric), and an example value.
#' @export
#' 
#' @examples
#' data(abdat)
#' properties(abdat)
properties <- function(indat,dimout=FALSE) {
  dominmax <- function(x) {
    if (length(which(x > 0)) == 0) return(c(NA,NA))
    mini <- min(x,na.rm=TRUE)
    maxi <- max(x,na.rm=TRUE)
    return(c(mini,maxi))
  }
  if(dimout) print(dim(indat))
  isna <- sapply(indat,function(x) sum(is.na(x)))
  uniques <- sapply(indat,function(x) length(unique(x)))
  columns <- length(indat)
  clas <- character(columns)
  for (i in 1:columns) {
    clas[i] <- class(indat[,i])[1]
  }
  numbers <- c("integer","numeric")
  pick <- which(clas %in% numbers)
  minimum <- numeric(length(uniques))
  maximum <- minimum
  for (i in 1:length(pick)) {
    minmax <- dominmax(indat[,pick[i]])
    minimum[pick[i]] <- minmax[1]
    maximum[pick[i]] <- minmax[2]
  }
  index <- 1:length(isna)
  props <- as.data.frame(cbind(index,isna,uniques,clas,round(minimum,4),
                               round(maximum,4),t(indat[1,])))
  colnames(props) <- c("Index","isNA","Unique","Class","Min",
                       "Max","Example")
  return(props)
} # end of properties

#' @title removeEmpty removes empty strings from a vector of strings
#'
#' @description removeEmpty removes empty strings from a vector of 
#'     strings. Such spaces can be created by spurious commas at the 
#'     end of parsed lines. It also removes strings made up only of spaces 
#'     and removes spaces from inside of individual chunks of text. 
#'     This should be useful when reading in data from a custom csv 
#'     file when parsing different formats
#'
#' @param invect a vector of input strings, possibly containing empty strings
#'
#' @return a possibly NULL vector of strings
#' @export
#'
#' @examples
#' x <- c("1","","2","","   ","3"," ","4","","a string","end")
#' x
#' length(x)
#' length(removeEmpty(x))
#' removeEmpty(x)
removeEmpty <- function(invect) {
  tmp <- gsub(" ","",invect) # remove all spaces within strings
  tmp <- tmp[nchar(tmp) > 0] # remove empty strings
  return(tmp)
}

#' @title quants used in apply to estimate quantiles across a vector
#'
#' @description quants used in 'apply' to estimate quantiles across a vector.
#'     Can also be used within tapply, etc.
#' 
#' @param invect vector of values
#' @param probs a vector of quantile, default=c(0.025,0.05,0.5,0.95,0.975)
#' 
#' @return a vector of the probs quantiles
#' @export quants
#' 
#' @examples
#' x <- matrix(rnorm(1000),ncol=10,nrow=100)
#' apply(x,2,quants)
#' apply(x,2,quants,probs=c(0.1,0.5,0.9))
quants <- function(invect,probs=c(0.025,0.05,0.5,0.95,0.975)) {
   ans <- quantile(invect,probs = probs,na.rm=T)
   return(ans)
}

#' @title which.closest find a number in a vector closest to a given value
#'
#' @description which.closest finds either the number in a vector which is
#'     closest to the input value or its index value
#'
#' @param x the value to lookup
#' @param invect the vector in which to lookup the value x
#' @param index should the index be returned or the closest value; default=TRUE
#'
#' @return by default it returns the index in the vector of the value closest to
#'     the input value x
#' @export
#'
#' @examples
#' vals <- rnorm(100,mean=5,sd=2)
#' pick <- which.closest(5.0,vals,index=TRUE) #closest to 5?
#' pick        # the index of the closest
#' vals[pick]  # its value
#' which.closest(5.0,vals,index=FALSE) # straight to the value
which.closest <- function(x,invect,index=TRUE) {
   pick <- which.min(abs(invect-x))
   if (index) {
      return(pick)
   } else {
      return(invect[pick])
   }
} # end of which_.closest

#' @title '\%ni\%' identifies which element in x is NOT in y
#'
#' @param x a vector of elements which can be numeric or character
#' @param y a vector of elements which can be numeric or character
#' 
#' @export
#' 
#' @examples 
#'    x <- 1:10
#'    y <- 6:15
#'    x
#'    y
#'    x[(x %ni% y)]   # are not in y
`%ni%` <- function(x,y) {  
  !(x %in% y)
}
haddonm/MQMF documentation built on Sept. 10, 2023, 10:49 a.m.