barrierALK: Age-Length Keys with physical barriers

Documented in ageFish anova.barrierALK barrierALK createALKs logLik.barrierALK predict.barrierALK print.barrierALK print.summary.barrierALK summary.barrierALK

#'Barrier ALK, fit a barrier ALK model to age-growth data to generate ALKs
#'
#' Fit smooth age-length keys with the possibility of incorporating spatial error terms represented by
#' approximations of Guassian Fields using SPDEs
#'
#' @param formula formula for the barrier alk fit
#' @param lengthVar the name of the length variable in the data
#' @param equalSlopes TRUE or FALSE for equal slopes assumption, or formla specifiying which are
#' @param data optionally data frame containing data
#' @param silent whether to run the trace from TMB
#' @param runSA Apply Symbolic Analysis from TMB package in effort to up model
#' @param control control options for optimr optimizer
#' @param optMethod the optimizer within optimr to use
#' @param lambda the value or vector for penalization factors, default is 0, no penalization
#' @param penalty.matrix manually set the penalty matrix to use
#' @param penalize.hyper penalize the hyper parameters for the gf approximations?
#' @param starting.param named list to replace default starting parameter values with
#'
#' @useDynLib spatialBarrier
#' @useDynLib barrierALK
#' @importFrom Rcpp sourceCpp
#' 
#' @export
barrierALK <- function(formula,minAge,maxAge,lengthVar="length",equalSlopes=FALSE,data=NULL,silent=TRUE,runSA=TRUE,control=NULL,...,optMethod="nlminb",cores=parallel::detectCores(),lambda=0,penalty.matrix=NULL,penalize.hyper=FALSE,starting.param=NULL,scale=FALSE,edf.calc=TRUE){
    
    call <- match.call()
    formula <- as.formula(formula)
    tmbD <- interpret.salk(formula,equalSlopes,data,minAge,maxAge,pred=FALSE,lambda=lambda,penalty.matrix=penalty.matrix,penrand=penalize.hyper,starting.param=starting.param,scale=scale)

    if(tmbD$data$model == 0){
        obj <- TMB::MakeADFun(tmbD$data,tmbD$parm,DLL="spatialBarrier",silent=silent,...)
    }else{
        if(!is.null(tmbD$map)){
            obj <- TMB::MakeADFun(tmbD$data,tmbD$parm,random="ranX",DLL="spatialBarrier",silent=silent,map=tmbD$map,...)
        }else{
            obj <- TMB::MakeADFun(tmbD$data,tmbD$parm,random="ranX",DLL="spatialBarrier",silent=silent,...)
        }
                if(runSA == TRUE){
            
            if(exists('runSymbolicAnalysis',where=asNamespace('TMB'),mode='function')){
                TMB::runSymbolicAnalysis(obj)
            }else{
                warning("runSymbolicAnalysis not installed, see TMB install documentation for details.")
            }
        }

    }

    if(optMethod == "parallel"){
        cl <- parallel::makeCluster(cores,type="FORK")
        parallel::setDefaultCluster(cl=cl)
        opt = optimParallel::optimParallel(obj$par,obj$fn,obj$gr,control=control)
        parallel::setDefaultCluster(cl=NULL)
        parallel::stopCluster(cl)
    }else{
        opt <- optimx::optimr(obj$par,obj$fn,obj$gr,control=control,method=optMethod)
    }

    if(edf.calc==TRUE){    
        EDFl <- edf(tmbD,opt$par,obj$env$last.par.best)
        EDF <- EDFl$edf
        LR <- tmbD$other$nullDev + EDFl$dev0
        mAIC <- LR - 2*EDF

        
    }

        
    ret <- list()
    ret$opt <- opt
    ret$obj <- obj
    ret$rep <- obj$report()
    ret$yrep <- tmbD$other$repped$y
    ret$repind <- tmbD$other$repped$subs
    ret$response <- tmbD$other$response
    ret$age <- tmbD$other$age
    class(ret) <- "barrierALK"
    attr(ret,"formula") <- formula
    attr(ret,"data") <- data
    attr(ret,"nobs") <- nrow(data) ##Right now doesn't support weights
    attr(ret,"ageV") <- attr(tmbD,"ageV")
    attr(ret,"equalSlopes") <- equalSlopes
    attr(ret,"lengthVar") <- lengthVar
    attr(ret,"call") <- call
    attr(ret,"Xnames") <- colnames(tmbD$data$X)
    attr(ret,"lambda") <- lambda
    attr(ret,"penalty.matrix") <- penalty.matrix
    attr(ret,"stdobj") <- tmbD$other$stdobj
    attr(ret,"scale") <- scale
    attr(ret,"tmbD") <- tmbD
    attr(ret,"edf") <- ifelse(edf.calc,EDF,length(opt$par))
    attr(ret,"nullDev") <- tmbD$other$nullDev
    attr(ret,"LR") <- LR
    attr(ret,"mAIC") <- mAIC
    print(opt$convergence)
    print(opt$message)
    ret
}

#'Get the log likelihood of a barrierALK fit
#'
#' @param object a fit barrierALK object
#'
#' @export
logLik.barrierALK <- function(object,...)
{
    val = -object$opt$value
    attr(val,"df") = attr(object,"edf")
    attr(val,"nobs") <- attr(object,"nobs")
    class(val) = "logLik"
    val

}


Rpred <- function(X,beta,ranX,A,cohort,ranXind){
    eta = X%*%beta
    AXs = apply(ranX,2,function(x) A%*%x)

    ind = ranXind[cohort+1]+1
    
    eta = sapply(AXs,function(x) as.matrix(eta + x))
    eta[cbind(seq_along(ind),ind)]

}

#'Function to predict from a barrierALK fit
#'
#' This will predict from a barrierALK object with three possible types,
#' the default response will return the unconditional probability of an
#' observation belonging to a class, link the etas and class the predicted
#' class of that observation.
#'
#' @param object the barrierALK fit object
#' @param newdata optionally the newdata to predict on
#' @param type the type of prediction
#'
#' @export
predict.barrierALK <- function(object,newdata=NULL,type="response",...)
{
    if(is.null(newdata)){
        newdata = attr(object,"data")
    }
    form <- attr(object,"formula")
    ages <- attr(object,"ageV")
    
    if(data.table::is.data.table(newdata)){
        newdata = as.data.frame(newdata)
    }

    scale = attr(object,"scale")
    stdobj = attr(object,"stdobj")

    ##Dealing with the the special
    tf <- terms.formula(form,specials="gf")
    terms <- attr(tf,"term.labels")
    gf <- attr(tf,"specials")$gf
    gfInd <- grep("gf\\(",terms)

    if(length(gf) != 0){
        gfFun <- terms[gfInd]
        gfFun <- parse(text=gfFun)
        mcall <- match.call(barrierALK::gf,gfFun[[1]])
        #mcall$loc_x[2] <- quote(newdata())
        #mcall$loc_y[2] <- quote(newdata())
        pgf = mcall
    }else{
        pgf = NULL
    }
    
    
    
    equalSlopes <- attr(object,"equalSlopes")
    predy <- rep(max(ages),nrow(newdata))
    predy <- factor(predy,levels=sort(ages))
    ls = formula.tools::lhs.vars(form)
    newdata[,ls] = predy

    MINAGE = min(ages)
    MAXAGE = max(ages)

    lambda = attr(object,"lambda")
    penalty.matrix = attr(object,"penalty.matrix")

    pred = interpret.salk(form,equalSlopes,newdata,minAge=MINAGE,maxAge=MAXAGE,pgf=pgf,pred=TRUE,lambda=lambda,
                          penalty.matrix=penalty.matrix,scale=scale,stdobj=stdobj)

    if(type == "response" | type == "class"){
        pred$data$p_type = 1
    }else if(type == "link"){
        pred$data$p_type = 0
    }

    pred$data$ages <- length(unique(ages))
    X = pred$data$X
    ages = pred$data$ages-1
    rep <- object$obj$report()
    beta <- rep$beta
    
    if(pred$data$model == 0){
        ##etas = X%*%beta
        ret = predict_fullB(X,beta,ages,pred$data$p_type)
    }else if(pred$data$model == 1 || pred$data$model == 2){
        ranX = rep$ranX
        A = pred$data$A
        cohort = as.integer(pred$data$cohort)-1L
        ranXind = as.integer(pred$data$ranXind)-1L
        ##etas = Rpred(X,beta,ranX,A,cohort,ranXind)
        ret = predict_full(X,beta,ranX,A,cohort,ranXind,ages,pred$data$p_type)
    }else if (pred$data$model == 3 | pred$data$model == 4){
        Xr = rep$Xr
        A = pred$data$A
        cohort = as.integer(pred$data$cohort)-1L
        year = as.integer(pred$data$year)-1L
        ageTimeKey = pred$data$ageTimeKey
        ret = predict_full_AR(X,beta,Xr,A,cohort,year,ageTimeKey,ages,pred$data$p_type)
    }
    

    ##ret = predict_probs(etas,ages,pred$data$p_type)
    

    if(type == "class"){
        ret = apply(ret,1,which.max)
    }

    ret
    
}




#'Print a barrierALK object
#'
#'Print a barrierALK object
#'
#' @param object the barrierALK object to print
#' @param digits the number of digits to round to
#'
#' @export
print.barrierALK <- function(object,digits = 3L,...){
    call <- attr(object,"call")
    cat("\nCall:  ",
        paste(deparse(call), sep = "\n", collapse = "\n"), "\n\n",sep="")
    if(length(object$opt$par)) {
        cat("Coefficients:\n")
        coeff <- object$opt$par
        betN <- attr(object,"Xnames")
        betL <- grep("beta",names(coeff))
        rhoL <- grep("rhoT",names(coeff))
        if(!is.null(rhoL)){
            names(coeff)[rhoL] <- "rho"
            coeff[rhoL] <- object$rep$rho
        }
        names(coeff)[betL] <- betN
        print.default(format(coeff,digits=digits),
                      print.gap = 2, quote=FALSE)
    }else cat("No coefficients\n\n")
    lik <- logLik(object)
    df <- attr(lik,"df")
    nobs <- attr(lik,"nobs")
    aic <- AIC(object)
    bic <- BIC(object)
    cat("\nDegrees of Freedom:",df,"\nNumber of Observations:",nobs,"\nAIC:",format(signif(aic,digits)),"\nBIC:",format(signif(bic,digits)))
    cat("\n")
    invisible(object)
}


#'Summary of a barrierALK object fit
#'
#'Generate a summary of a barrierALK object. This uses and stores a copy of running
#' TMB::sdreport() on the fit object. This can be a little slow for for some models on the first run.
#' Gives a summary only of the fixed effects.
#'
#'@param object the barrierALK object to generate a summary of
#'
#' @export
#'
summary.barrierALK <- function(object){
    sdr <- attr(object,"sdreport")
    if(is.null(sdr)){
        sdr <- TMB::sdreport(object$obj)
        attr(object,"sdreport") <- sdr
    }
    summ <- summary(sdr,select="fixed",p.value=TRUE)
    betL <- grep("beta",rownames(summ))
    betN <- attr(object,"Xnames")
    rownames(summ)[betL] <- betN

    ret <- list()
    ret$coeff <- summ
    ret$AIC <- AIC(object)
    ret$BIC <- BIC(object)
    ret$lik <- logLik(object)
    ret$df <- attr(ret$lik,"df")
    ret$nobs <- attr(ret$lik,"nobs")
    ret$call <- attr(object,"call")
    class(ret) <- "summary.barrierALK"
    ret

}


#'Print a summary of a barrierALK object fit
#'
#' Print a summary of a barrierALK object fit
#'
#' @param object the summary.barrierALK object to print
#'
#' @export
#'
print.summary.barrierALK <- function(object,digits=3L){
    call <- object$call
    cat("\nCall:\n",
        paste(deparse(call),sep="\n",collapse="\n"),"\n\n",sep="")
    if(nrow(object$coeff)){
        cat("Coefficients:\n")
        printCoefmat(object$coeff,digits)
    }else{
        cat("No Coefficients\n")
    }
    cat("\nDegrees of Freedom:",object$df,"\nNumber of Observations:",object$nobs,"\nAIC:",format(signif(object$AIC,digits)),"\nBIC:",format(signif(object$BIC,digits)),"\nLog Likelihood:",format(signif(object$lik,digits)))
    cat("\n")
    invisible(object)
}



#'LRT testing for barrierALK objects
#'
#' @param object barrierALK object to test
#' @param ... other barrierALK objects to test
#'
#' @export
#'
anova.barrierALK <- function(object,...,dispersion=NULL){
    dotargs <- list(...)
    named <- if(is.null(names(dotargs)))
                 rep(FALSE,length(dotargs))
             else(names(dotargs) != "")
    if(any(named))
        warning("The following arguments are invalid and dropped: ",
                paste(dparse(dotargs[named]),collapse=","))
    dotargs <- dotargs[!named]
    is.barrierALK <- unlist(lapply(dotargs,function(x) inherits(x,"barrierALK")))
    dotargs <- dotargs[is.barrierALK]

    ##Dispersion is assumed to be 1 since this is a binomial model
    if(is.null(dispersion))
        dispersion = 1

    if(length(dotargs) > 0){
        n <- attr(logLik(object),"nobs")
        objs <- c(list(object),dotargs)
        ns <- sapply(objs,function(x) attr(logLik(x),"nobs"))
        if(any(ns != n))
            stop("Models do not have the same number of observations")
        resdf <- sapply(objs,function(x) attr(logLik(x),"nobs")-attr(logLik(x),"df"))
        resdev <- sapply(objs,function(x) -2*as.numeric(logLik(x)))

        table <- data.frame(resdf,resdev,c(NA,-diff(resdf)),
                            c(NA,-diff(resdev)))
        variables <- lapply(objs,function(x)
            paste(deparse(attr(x,"formula")),collapse="\n"))
        dimnames(table) <- list(1:length(objs),c("Resid. Df", "Resid. Dev", "Df",
                                                 "Deviance"))
        title <- "Analysis of Deviance Table\n"
        topnote <- paste0("Model ", format(1:length(objs)),": ",
                          variables,collapse="\n")
        df.dispersion <- ifelse(dispersion==1,Inf,min(resdf))
        table <- stat.anova(table=table,test="Chisq",scale=dispersion,df.scale=df.dispersion,n=NULL)

        structure(table,heading = c(title,topnote),
                  class = c("anova","data.frame"))

    }
    else{
        print(summary(object))
    }
}


#'Age fish from long length frequencies
#'
#' Age fish into a long set of age frequencies instead
#'
#' @param object the fit barrierALK object
#' @param longLF the long length frquencies with covariates
#' @param lfname name of the length frequency column
#'
#' @export
#'
ageFish <- function(object,longLF,lfname="length_freq"){
    probs = predict(object,newdata=longLF)

    longLFdf = as.data.frame(longLF)
    
    aged = longLFdf[,lfname]*probs
    aged = as.data.frame(aged)
    names(aged) = paste0("age",sort(attr(object,"ageV")))
    wide = cbind(longLFdf,aged)

    long = tidyr::gather(wide,"age","n",paste0("age",sort(attr(object,"ageV"))))
    long$age = as.numeric(gsub("age","",long$age))
    long
}

        
#'Function to create a smooth ALK from a barrierALK object
#'
#' @param object The barrierALK fit object
#' @param lengthBins vector containing the length bins you want to generate an age-length key for
#' @param setData data you want to apply an ALK to (must contain covariates with same name as in in barrierALK fit)
#'
#' @export
createALKs <- function(object,lengthBins,setData){
    newDat <- reshape::expand.grid.df(as.data.frame(lengthBins),setData)
    lvar <- attr(object,"lengthVar")
    names(newDat)[1] <- lvar
    ALK <- predict(object,newdata=newDat)
    ALK <- split.data.frame(ALK,rep(1:nrow(setData),each=length(lengthBins)))

    ALKs <- list()

    for(i in 1:nrow(setData)){
        ALKs[[i]] = ALK[[i]]
        rownames(ALKs[[i]]) <- lengthBins
        colnames(ALKs[[i]]) <- sort(attr(object,"ageV"))
        class(ALKs[[i]]) <- "predALK"
    }
    ALKs
}

#'Perform k-fold cross validation to try and determine the best value of lambda
#'
#' @param formula the barrierALK formula to use
#' @param k the number of cross validation folds to use, at least 10 recommended
#' @param minAge the minimum age to be included in the model
#' @param maxAge the maximum age to be included in the model as a plus group
#' @param data the data frame containing the data
#' @param lengthVar the name of the length variable in the data
#' @param equalSlopes TRUE or FALSE for equal slopes assumption, or formla specifiying which are
#' @param silent display trace from TMB?
#' @param runSA Apply Symbolic Analysis from TMB package in effort to up model
#' @param control control options for optimr optimizer
#' @param optMethod the optimizer within optimr to use
#' @param lambda vector of lambda values to try and find the optimal value of
#' @param penalty.matrix manually set the penalty matrix to use
#'
#' @export
#'
barrierALKCV <- function(formula,k=10,minAge,maxAge,data,equalSlopes=FALSE,lengthVar="length",silent=TRUE,runSA=FALSE,control,optMethod="nlminb",lambda,penalty.matrix=NULL,scale=FALSE){
    resp = formula.tools::lhs.vars(as.formula(formula))
 
    folds = data
    folds$.folds = balanced.cv.fold(data[,resp],num.cv=k)

    acc <- list()
    olp <- list()
    for(i in 1:k){
        test = folds[folds$.folds == i,]
        train = folds[folds$.folds != i,]

        acc[[i]] <- list()
        for(j in 1:length(lambda)){
            if(i == 1){
                mod = barrierALK(formula,minAge,maxAge,lengthVar,equalSlopes,train,silent,runSA,control,optMethod=optMethod,lambda=lambda[j],penalty.matrix = penalty.matrix,scale=scale)
                pred = predict(mod,newdata=test,type="class")
                acc[[i]][[j]] = sum(test[,resp] == pred)/length(pred)
                olp[[j]] = list(beta=mod$rep$beta,log_ranges=mod$rep$log_ranges,log_sigma_us=mod$rep$log_sigma_us,log_kappas=mod$rep$log_kappas,log_taus=mod$rep$log_taus,rhoT=mod$rep$rhoT,log_range=mod$rep$log_range,log_sigma_u=mod$rep$log_sigma_u)
            }else{
                mod = barrierALK(formula,minAge,maxAge,lengthVar,equalSlopes,train,silent,runSA,control,optMethod=optMethod,lambda=lambda[j],penalty.matrix = penalty.matrix,starting.param=olp[[j]],scale=scale)
                pred = predict(mod,newdata=test,type="class")
                acc[[i]][[j]] = sum(test[,resp] == pred)/length(pred)
                }
                
        }
    }
    ret = data.frame(acc=unlist(acc),lamb=rep(lambda,k))
    ret = dplyr::group_by(ret,lamb)
    ret = dplyr::summarize(ret,acc=mean(acc))
    ret
    }

##Grabbed from bmrm R package
balanced.cv.fold <- function(y,num.cv=10) {
  fold <- factor(rep_len(sample(seq_len(num.cv)),length.out=length(y)))
  i <- sample(seq_along(y))
  o <- order(y[i])
  fold[i[o]] <- fold
  fold
}

#'Calculate the effective degrees of freedom 
edf <- function(tmbD,parP,fullPar){

    tmbD2 = tmbD
    tmbD2$data$lambda = rep(0,length(tmbD$data$lambda))
    
    if(tmbD$data$model == 0){
        objP <- TMB::MakeADFun(tmbD$data,tmbD$parm,DLL="spatialBarrier",silent=TRUE)
        obj0 <- TMB::MakeADFun(tmbD2$data,tmbD$parm,DLL="spatialBarrier",silent=TRUE)
        
        
    }else{
        if(!is.null(tmbD$map)){
            objP <- TMB::MakeADFun(tmbD$data,tmbD$parm,random="ranX",DLL="spatialBarrier",silent=TRUE,map=tmbD$map)
            obj0 <- TMB::MakeADFun(tmbD2$data,tmbD2$parm,random="ranX",DLL="spatialBarrier",silent=TRUE,map=tmbD$map)
            
        }else{
            objP <- TMB::MakeADFun(tmbD$data,tmbD$parm,random="ranX",DLL="spatialBarrier",silent=TRUE)
            obj0 <- TMB::MakeADFun(tmbD2$data,tmbD2$parm,random="ranX",DLL="spatialBarrier",silent=TRUE)
        }
  
    }

    ##Different way of thinking about it
    obj0$env$last.par.best = fullPar
    objP$env$last.par.best = fullPar
    sdr0 <- TMB::sdreport(obj0,parP)
    sdrP <- TMB::sdreport(objP,parP)

    h1 = solve(sdr0$cov.fixed)
    V = sdrP$cov.fixed
    
    
    ## h1 = obj0$he(parP)
    ## h2 = objP$he(parP)
    ## V = solve(h2)

    dev0 = -2*obj0$env$f(fullPar)


    
    edf = sum(diag(h1%*%V))

    ret = list(edf=edf,sdr=sdrP,dev0=dev0)
    
    ret

    
}

#'Find the optimal lambda penalty value using modified AIC
#'
#' @param model the fit barrierALK model to test
#' @param lambda vector of lambda values to test
#'
#' @export
#'
penaltyOptim <- function(model,lambda){
    call = attr(model,"call")
    call$edf.calc=TRUE
    ret = data.frame(lambda=lambda)
    ret$df = NA
    ret$mAIC = NA
    models = list()
    for(i in 1:length(lambda)){
        call$lambda = lambda[i]
        run = eval(call)
        ret$df[i] = attr(run,"edf")
        ret$mAIC[i] = attr(run,"mAIC")
        models[[i]] = run
    }
    bestModel = which.max(ret$mAIC)
    bmod = models[[bestModel]]
    rret = list(table=ret,best.model=bmod)
    class(rret) <- "penopt"
    rret
    
}

#'Print function to print the optimal value table and model
#'
#' @param object the object to print
print.penopt <- function(object,...){
    call <- attr(object$best.model,"call")
    cat("\nBest Model Penalty Factor:\n",
        object$table$lambda[which.max(object$table$lambda)])
    cat("\nBest Model Call:\n",
        paste(deparse(call),sep="\n",collapse="\n"),"\n\n",sep="")
    object$best.model
    cat("\nResults Table:\n")
    object$table
}
jgbabyn/barrierALK documentation built on Dec. 20, 2021, 11:09 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
jgbabyn/barrierALK
Age-Length Keys with physical barriers

R/barrierALK.R
In jgbabyn/barrierALK: Age-Length Keys with physical barriers

Defines functions print.penopt penaltyOptim edf balanced.cv.fold barrierALKCV createALKs ageFish anova.barrierALK print.summary.barrierALK summary.barrierALK print.barrierALK predict.barrierALK Rpred logLik.barrierALK barrierALK

Documented in ageFish anova.barrierALK barrierALK createALKs logLik.barrierALK predict.barrierALK print.barrierALK print.summary.barrierALK summary.barrierALK

R Package Documentation

Browse R Packages

We want your feedback!

jgbabyn/barrierALK Age-Length Keys with physical barriers

R/barrierALK.R In jgbabyn/barrierALK: Age-Length Keys with physical barriers

Defines functions print.penopt penaltyOptim edf balanced.cv.fold barrierALKCV createALKs ageFish anova.barrierALK print.summary.barrierALK summary.barrierALK print.barrierALK predict.barrierALK Rpred logLik.barrierALK barrierALK

Documented in ageFish anova.barrierALK barrierALK createALKs logLik.barrierALK predict.barrierALK print.barrierALK print.summary.barrierALK summary.barrierALK

R Package Documentation

Browse R Packages

We want your feedback!

jgbabyn/barrierALK
Age-Length Keys with physical barriers

R/barrierALK.R
In jgbabyn/barrierALK: Age-Length Keys with physical barriers