R/simSurveyCustom.R

Defines functions strat_data_pALK3 strat_data_pALK2 strat_error_plus compare_methods strat_data_perfect strat_data_pALK strat_data_bClassify strat_data_bALK run_stratC sim_distribution_same

Documented in run_stratC sim_distribution_same strat_data_bALK strat_data_bClassify

###Functions for use with simulation data generated by SimSurvey

#'Equally distribute fish among grid cells
#'
#' Equally distribute fish among grid grid cells based on the assumption that
#' there is no difference in spatial distribution among fish or age or year clustering,etc.
#'
#' @param sim A sim object with abundance
#' @param grid a grid object
#'
#'
#'
sim_distribution_same <- function(sim,grid=make_grid()){
    grid_dat <- data.table::data.table(raster::rasterToPoints(grid))
    data.table::setkeyv(grid_dat,"cell")
    xy <- grid_dat[,c("x","y")]

    na <- length(sim$ages)
    ny <- length(sim$years)
    nc <- nrow(grid_dat)

    prob <- array(rep(1,na*ny*nc),dim=c(na,ny,nc),
                  dimnames = list(age=sim$ages,year=sim$years,cell=grid_dat$cell))

    prob <- prob/nrow(grid_dat)

    N <- replicate(nrow(grid_dat),sim$N)
    dimnames(N) <- dimnames(prob)

    N <- N * prob
    df_N <- as.data.frame.table(prob,responseName="prob",
                                stringsAsFactors=FALSE)
    df_N <- data.table::data.table(df_N,N=c(N))
    df_N$year <- as.numeric(df_N$year)
    df_N$age <- as.numeric(df_N$age)
    df_N$cell <- as.numeric(df_N$cell)
    df_N$prob <- NULL
    data.table::setkeyv(df_N,"cell")
    c(sim,list(grid=grid,grid_xy=grid_dat,sp_N=df_N))

}


#'Version of SimSurvey run_strat to accept other strat_data function versionss
#'
#' @param sim Simulation from sim_survey
#' @param strat_data closure for calculating stratifed data
#'
#'
#'
run_stratC <- function(sim,strat_data=strat_data()){
    length_group <- get("length_group", envir = environment(sim$sim_length))
    data <- strat_data(sim, length_group = length_group)
    data$setdet <- data$setdet[, c("sim", "year", "division",
        "strat", "strat_area", "tow_area", "set", "n"), with = FALSE]
    data$lf <- merge(data$setdet[, setdiff(names(data$setdet),
        "n"), with = FALSE], data$lf, by = "set", all = TRUE)
    data$af <- merge(data$setdet[, setdiff(names(data$setdet),
        "n"), with = FALSE], data$af, by = "set", all = TRUE)
    strat_args <- list(data = data$setdet, metric = "n", strat_groups = c("sim",
        "year", "division", "strat", "strat_area", "tow_area"),
        survey_groups = c("sim", "year"))
    sim$total_strat <- do.call(strat_means, strat_args)
    strat_args$data <- data$lf
    strat_args$strat_groups <- c(strat_args$strat_groups, "length")
    strat_args$survey_groups <- c(strat_args$survey_groups, "length")
    sim$length_strat <- do.call(strat_means, strat_args)
    strat_args$data <- data$af
    strat_args$strat_groups[strat_args$strat_groups == "length"] <- "age"
    strat_args$survey_groups[strat_args$survey_groups == "length"] <- "age"
    sim$age_strat <- do.call(strat_means, strat_args)
    sim
}

#'Write one for GAM version?
#' Crusty

#'Alternative strat_data for barrierALK objects
#'
#' This is an alternative for strat_data utilitizing the results of barrierALK
#'
#' @param sim Simulation from sim_survey
#' @param length_group the length group
#' @param formula the barrierALK formula
#' @param bALKopts list of options for barrierALK
#' @param cores number of cores to use, on Windows ignored.
#' 
#'
#' @import data.table
#' @import foreach
#' @import doParallel
#'
#' @export
#'
strat_data_bALK <- function(formula,
                            b.opts=list(minAge=1,maxAge=13,equalSlopes=FALSE,
                                        silent=TRUE,runSA=TRUE,
                                        control=list(eval.max=5000,iter.max=5000)),
                            mesh=NULL,barrier.triangles=NULL,cores=parallel::detectCores()){
    function(sim,length_group=3){
        if(.Platform$OS.type == "windows"){
            cores = 1
        }
        setdet <- sim$setdet
        samp <- sim$samp
        samp <- merge(setdet[,list(set,sim,year,x,y)], samp,by =c("set"))
        lf <- samp
        lf[, `:=`(n, .N), by = "set"]
        lf <- lf[lf$measured, ]
        lf[, `:=`(n_lengths, .N), by = "set"]
        lf$length <- group_lengths(lf$length, length_group)
        lf[, `:=`(ratio, n_lengths/n)]
        lf <- lf[, list(length_freq = .N), by = c("set", "ratio",
                                                  "length","x","y")]
        lf[, `:=`(length_freq, length_freq/ratio)]
        lf[, `:=`(ratio, NULL)]
        data.table::setkeyv(lf, c("set", "length"))
        cj <- data.table::CJ(set = setdet$set, length = sort(unique(lf$length)),
                             unique = TRUE)
        data.table::setkeyv(cj, c("set", "length"))
        cj <- merge(setdet[, list(set, sim, year,x,y)], cj, by = "set",
                    all = TRUE)
        lf <- merge(cj, lf, by = c("set", "length","x","y"), all = TRUE)
        lf$length_freq[is.na(lf$length_freq)] <- 0
        ag <- samp
        ag <- ag[ag$aged, ]
        ag$len_cat <- group_lengths(ag$length, length_group)
        agS <- split(ag, ag$sim)
        models <- parallel::mclapply(agS, function(x) {
            x <- as.data.frame(x)
            barrierALK::barrierALK(formula, b.opts$minAge, b.opts$maxAge,
                                   "length", b.opts$equalSlopes, x, b.opts$silent, b.opts$runSA,
                                   b.opts$control,optMethod=b.opts$optMethod)
        },mc.cores=cores)
        converged <- lapply(models,function(x) x$opt$message) 
        lfS = split(lf,lf$sim)
        aged <- parallel::mcmapply(function(x,y){
            mat = ageFish(x,y)
            #mat = mat[mat$n !=0,]
            mat},models,lfS,SIMPLIFY=FALSE,mc.cores=cores)
        aged <- do.call(rbind,aged)
        aged <- as.data.table(aged)
        af <- aged[,list(n=sum(n)),by=c("set","age","x","y")]

        lf <- lf[, c("set", "length", "length_freq"), with = FALSE]
        setkeyv(lf, c("set", "length"))
        setkeyv(af, c("set", "age"))
        setnames(lf, "length_freq", "n")
        list(setdet = setdet, lf = lf, af = af,ager="barrierALK",models=models,converged=converged)
    }
}

#'Instead of making an ALK just classify fish
#'
#'
#' @param sim Simulation from sim_survey
#' @param length_group the length group
#' @param formula the barrierALK formula
#' @param bALKopts list of options for barrierALK
#'
#' @import data.table
#' @import foreach
#' @import doParallel
#'
#'
#'
strat_data_bClassify <- function(formula,
                            b.opts=list(minAge=1,maxAge=13,equalSlopes=FALSE,
                                        silent=TRUE,runSA=TRUE,
                                        control=list(eval.max=5000,iter.max=5000)),
                            mesh=NULL,barrier.triangles=NULL){
    function(sim,length_group=3){
        setdet <- sim$setdet
        samp <- sim$samp
        samp <- merge(setdet[,list(set,sim,year,x,y)], samp,by =c("set"))
        lf <- samp
        lf[, `:=`(n, .N), by = "set"]
        lf <- lf[lf$measured, ]
        lf[, `:=`(n_lengths, .N), by = "set"]
        lf$length <- group_lengths(lf$length, length_group)
        lf[, `:=`(ratio, n_lengths/n)]
        lf <- lf[, list(length_freq = .N), by = c("set", "ratio",
                                                  "length","x","y")]
        lf[, `:=`(length_freq, length_freq/ratio)]
        lf[, `:=`(ratio, NULL)]
        data.table::setkeyv(lf, c("set", "length"))
        cj <- data.table::CJ(set = setdet$set, length = sort(unique(lf$length)),
                             unique = TRUE)
        data.table::setkeyv(cj, c("set", "length"))
        cj <- merge(setdet[, list(set, sim, year,x,y)], cj, by = "set",
                    all = TRUE)
        lf <- merge(cj, lf, by = c("set", "length","x","y"), all = TRUE)
        lf$length_freq[is.na(lf$length_freq)] <- 0
        ag <- samp
        ag <- ag[ag$aged, ]
        ag$len_cat <- group_lengths(ag$length, length_group)
        agS <- split(ag, ag$sim)
        models <- parallel::mclapply(agS, function(x) {
            barrierALK::barrierALK(formula, b.opts$minAge, b.opts$maxAge,
                                   "length", b.opts$equalSlopes, x, b.opts$silent, b.opts$runSA,
                                   b.opts$control)
        },mc.cores=parallel::detectCores())
        lfS = split(lf,lf$sim)

        ##lf <- lf[, c("set", "length", "length_freq"), with = FALSE]
        setkeyv(lf, c("set", "length"))
        ##setkeyv(af, c("set", "age"))
        setnames(lf, "length_freq", "n")
        list(setdet = setdet, lf = lf, af = NULL,ager="barrierALK",models=models)
    }
}


#'Alternative strat_data for barrierALK objects with penalization
#'
#' This is an alternative for strat_data utilitizing the results of barrierALK
#'
#' @param sim Simulation from sim_survey
#' @param length_group the length group
#' @param formula the barrierALK formula
#' @param bALKopts list of options for barrierALK
#' @param cores number of cores to use, on Windows ignored.
#' 
#'
#' @import data.table
#' @import foreach
#' @import doParallel
#'
#' @export
#'
strat_data_pALK <- function(formula,
                            b.opts=list(minAge=1,maxAge=13,equalSlopes=FALSE,
                                        silent=TRUE,runSA=TRUE,
                                        control=list(eval.max=5000,iter.max=5000),lambda=0,penalty.matrix=NULL,penalize.hyper=FALSE,scale=TRUE),
                            mesh=NULL,barrier.triangles=NULL,cores=parallel::detectCores()){
    function(sim,length_group=3){
        if(.Platform$OS.type == "windows"){
            cores = 1
        }
        setdet <- sim$setdet
        samp <- sim$samp
        samp <- merge(setdet[,list(set,sim,year,x,y)], samp,by =c("set"))
        lf <- samp
        lf[, `:=`(n, .N), by = "set"]
        lf <- lf[lf$measured, ]
        lf[, `:=`(n_lengths, .N), by = "set"]
        lf$length <- group_lengths(lf$length, length_group)
        lf[, `:=`(ratio, n_lengths/n)]
        lf <- lf[, list(length_freq = .N), by = c("set", "ratio",
                                                  "length","x","y")]
        lf[, `:=`(length_freq, length_freq/ratio)]
        lf[, `:=`(ratio, NULL)]
        data.table::setkeyv(lf, c("set", "length"))
        cj <- data.table::CJ(set = setdet$set, length = sort(unique(lf$length)),
                             unique = TRUE)
        data.table::setkeyv(cj, c("set", "length"))
        cj <- merge(setdet[, list(set, sim, year,x,y)], cj, by = "set",
                    all = TRUE)
        lf <- merge(cj, lf, by = c("set", "length","x","y"), all = TRUE)
        lf$length_freq[is.na(lf$length_freq)] <- 0
        ag <- samp
        ag <- ag[ag$aged, ]
        ag$len_cat <- group_lengths(ag$length, length_group)
        agS <- split(ag, ag$sim)
        models <- parallel::mclapply(agS, function(x) {
            x <- as.data.frame(x)
            barrierALK::barrierALK(formula, b.opts$minAge, b.opts$maxAge,
                                   "length", b.opts$equalSlopes, x, b.opts$silent, b.opts$runSA,
                                   b.opts$control,optMethod=b.opts$optMethod,lambda=b.opts$lambda,penalty.matrix=b.opts$penalty.matrix,penalize.hyper = b.opts$penalize.hyper,scale=b.opts$scale)
        },mc.cores=cores)
        converged <- lapply(models,function(x) x$opt$message) 
        lfS = split(lf,lf$sim)
        aged <- parallel::mcmapply(function(x,y){
            mat = ageFish(x,y)
            #mat = mat[mat$n !=0,]
            mat},models,lfS,SIMPLIFY=FALSE,mc.cores=cores)
        aged <- do.call(rbind,aged)
        aged <- as.data.table(aged)
        af <- aged[,list(n=sum(n)),by=c("set","age","x","y")]

        lf <- lf[, c("set", "length", "length_freq"), with = FALSE]
        setkeyv(lf, c("set", "length"))
        setkeyv(af, c("set", "age"))
        setnames(lf, "length_freq", "n")
        list(setdet = setdet, lf = lf, af = af,ager="barrierALK",models=models,converged=converged)
    }
}

#'Perfect aging, perfect measurement
#'
#' This is for aging the fish perfectly. 100% success and seeing what exactly is the least attainable error
#' by an aging method is for comparison.
#'
#' Prepare simulated data for stratified analysis
#'
#' @description Generate set details (setdet), length-frequency (lf)
#' and age-frequency (af) data for stratified analysis
#'
#' @param sim           Simulation from \code{\link{sim_survey}}
#' @param length_group  Size of the length frequency bins
#'
#' @export
#'
strat_data_perfect <- function() {
    function(sim=sim,length_group=3){
        ## Extract setdet and samp objects, and add sim and year to samp data
        setdet <- sim$setdet
        samp <- sim$samp
        samp <- merge(setdet[, list(set, sim, year)], samp, by = "set")

        ## Construct length-frequency table
        lf <- samp
        lf[, n := .N, by = "set"]
        lf <- lf[lf$measured, ]
        lf[, n_lengths := .N, by = "set"]
        lf$length <- group_lengths(lf$length, length_group)
        lf[, ratio := n_lengths / n] # calculate ratio measured
        lf <- lf[, list(length_freq = .N),
                 by = c("set", "ratio", "length")]
        lf[, length_freq := length_freq / ratio] # scale frequencies
        lf[, ratio := NULL] # discard ratio column (no longer needed)
        setkeyv(lf, c("set", "length"))

        ## Add zeros to length-frequency table
        cj <- CJ(set = setdet$set, length = sort(unique(lf$length)), unique = TRUE)
        setkeyv(cj, c("set", "length"))
        cj <- merge(setdet[, list(set, sim, year)], cj, by = "set", all = TRUE)
        lf <- merge(cj, lf, by = c("set", "length"), all = TRUE)
        lf$length_freq[is.na(lf$length_freq)] <- 0 # replace NA's with 0's

        ## Construct age-length key by sim and year
        ag <- samp
        af <- ag[,.N,by=c("set","age")]

        
        ## Add zeros to age-frequency table (this step may be unnecessary)
        ages <- c(NA, seq(min(ag$age, na.rm = TRUE), max(ag$age, na.rm = TRUE)))
        gridd <- expand.grid(set=setdet$set,age=ages)
        af <- dplyr::left_join(gridd,af)
        af$N[is.na(af$N)] <- 0
        af <- as.data.table(af)

        ## Simplify/order and return data
        lf <- lf[, c("set", "length", "length_freq"), with = FALSE]
        setkeyv(lf, c("set", "length"))
        setkeyv(af, c("set", "age"))
        setnames(lf, "length_freq", "n")
        setnames(af, "N", "n")
        list(setdet = setdet, lf = lf, af = af)
    }
}
 
#' Compare to perfect, how well is the aging method performing relative to best possible performance?
#'
#' This function runs after strat_error() and will compare how well it improves on best possible survey performance with
#' perfect information about aging information
#'
#' @param sim the simulation object after strat_error()
#'
#' @export
#'
compare_methods <- function(sim){
    ##First, find the results of the perfect run
    
    perfect = SimSurvey::run_strat(sim,strat_fun = strat_data_perfect())
    perfect = SimSurvey::strat_error(perfect)

    age_strat_comp = dplyr::full_join(sim$age_strat_error,perfect$age_strat_error,suffix=c(".method",".perfect"),by=c("year","sim","age"))
    age_strat_comp$error_I_hat = age_strat_comp$I_hat.perfect=age_strat_comp$I_hat.method
                                      
    age_strat_comp_stats = SimSurvey::error_stats(age_strat_comp$error_I_hat)

    age_strat_comp_mult = sim$age_strat_error_stats/perfect$age_strat_error_stats

    sim$age_strat_comp = age_strat_comp
    sim$age_strat_comp_stats = age_strat_comp_stats
    sim$age_strat_comp_mult = age_strat_comp_mult

    
    sim

}


#'Custom version of SimSurvey's strat_error to support plus groups
#'
#' @export
#'
strat_error_plus <- function(sim,plus_group=10) {

  ## total_strat
  I_hat <- sim$total_strat[, list(sim, year, total)]
  names(I_hat) <- c("sim", "year", "I_hat")
  I <- data.frame(year = sim$years, I = colSums(sim$I))
  comp <- merge(I_hat, I, by = "year")
  comp$error <- comp$I_hat - comp$I
  means <- error_stats(comp$error)
  sim$total_strat_error <- comp
  sim$total_strat_error_stats <- means

  ## length_strat
  I_hat <- sim$length_strat[, list(sim, year, length, total)]
  names(I_hat) <- c("sim", "year", "length", "I_hat")
  sly <- expand.grid(sim = seq(max(sim$total_strat$sim)),
                     year = sim$years, length = sim$lengths)
  I_hat <- merge(sly, I_hat, by = c("sim", "year", "length"), all = TRUE) # expand to all lengths
  I_hat$I_hat[is.na(I_hat$I_hat)] <- 0                                    # fill missing lengths with zero
  I <- as.data.frame.table(sim$I_at_length, responseName = "I")
  I$year <- as.numeric(as.character(I$year))
  I$length <- as.numeric(as.character(I$length))
  comp <- merge(data.table(I_hat), data.table(I), by = c("year", "length"))
  comp$error <- comp$I_hat - comp$I
  means <- error_stats(comp$error)
  sim$length_strat_error <- comp
  sim$length_strat_error_stats <- means

  ## age_strat
  I_hat <- sim$age_strat[, list(sim, year, age, total)]
  names(I_hat) <- c("sim", "year", "age", "I_hat")
  say <- expand.grid(sim = seq(max(sim$total_strat$sim)),
                     year = sim$years, age = sim$ages)
  I_hat <- merge(say, I_hat, by = c("sim", "year", "age"), all = TRUE) # expand to all ages
  I_hat$I_hat[is.na(I_hat$I_hat)] <- 0                                 # fill missing ages with zero
  I <- as.data.frame.table(sim$I, responseName = "I")
  I$year <- as.numeric(as.character(I$year))
  I$age <- as.numeric(as.character(I$age))
  #I <- I[,lapply(.SD,sum,na.rm=TRUE),by=age]
  #I_hat <- I_hat[,lapply(.SD,sum,na.rm=TRUE),by=age]  
  comp <- merge(data.table(I_hat), data.table(I), by = c("year", "age"))
  comp$error <- comp$I_hat - comp$I
  comp$age[comp$age > plus_group] = plus_group
  comp <- dplyr::group_by(comp,year,age,sim)
  comp <- dplyr::summarise_all(comp,sum)
  comp <- data.table::as.data.table(comp)  
    
  means <- error_stats(comp$error)
  sim$age_strat_error <- comp
    sim$age_strat_error_stats <- means

    ##By strata and age
    I_by_strat = dplyr::left_join(sim$strat_I_hat,sim$strat_I)
    I_by_strat$error = I_by_strat$N - I_by_strat$total
    I_by_strat = I_by_strat[,c("year","age","strat","total","N","error")]
    names(I_by_strat) = c("year","age","strat","I_hat","I","error")
    I_by_strat$age[I_by_strat$age > plus_group] = plus_group
    I_by_strat = dplyr::group_by(I_by_strat,year,age,strat)
    I_by_strat = dplyr::summarise_all(I_by_strat,sum)
    sim$I_by_strat = I_by_strat
    

    sim$I_by_strat_stats = error_stats(I_by_strat$error[!is.na(I_by_strat$age)])
    

  sim

}


    
#'Alternative strat_data for barrierALK objects with penalization
#'
#' This is an alternative for strat_data utilitizing the results of barrierALK
#'
#' @param formula the barrierALK formula
#' @param b.opts list of options
#' @param mesh mesh to use
#' @param barrier.triangles list of barrier.triangles
#' @param cores the number of cores to use
#' @param k the number of cross validation folds to use
#' 
#'
#' @import data.table
#' @import foreach
#' @import doParallel
#'
#' @export
#'
strat_data_pALK2 <- function(formula,
                            b.opts=list(minAge=1,maxAge=13,equalSlopes=FALSE,
                                        silent=TRUE,runSA=TRUE,
                                        control=list(eval.max=5000,iter.max=5000),lambda=0,penalty.matrix=NULL,penalize.hyper=FALSE,scale=TRUE),
                            mesh=NULL,barrier.triangles=NULL,cores=parallel::detectCores(),k=10){
    function(sim,length_group=3){
        if(.Platform$OS.type == "windows"){
            cores = 1
        }
        setdet <- sim$setdet
        samp <- sim$samp
        samp <- merge(setdet[,list(set,sim,year,x,y)], samp,by =c("set"))
        lf <- samp
        lf[, `:=`(n, .N), by = "set"]
        lf <- lf[lf$measured, ]
        lf[, `:=`(n_lengths, .N), by = "set"]
        lf$length <- group_lengths(lf$length, length_group)
        lf[, `:=`(ratio, n_lengths/n)]
        lf <- lf[, list(length_freq = .N), by = c("set", "ratio",
                                                  "length","x","y")]
        lf[, `:=`(length_freq, length_freq/ratio)]
        lf[, `:=`(ratio, NULL)]
        data.table::setkeyv(lf, c("set", "length"))
        cj <- data.table::CJ(set = setdet$set, length = sort(unique(lf$length)),
                             unique = TRUE)
        data.table::setkeyv(cj, c("set", "length"))
        cj <- merge(setdet[, list(set, sim, year,x,y)], cj, by = "set",
                    all = TRUE)
        lf <- merge(cj, lf, by = c("set", "length","x","y"), all = TRUE)
        lf$length_freq[is.na(lf$length_freq)] <- 0
        ag <- samp
        ag <- ag[ag$aged, ]
        ag$len_cat <- group_lengths(ag$length, length_group)
        agS <- split(ag, ag$sim)

        
        models <- parallel::mclapply(agS, function(x) {
            x <- as.data.frame(x)
            lambs = barrierALKCV(formula,k,b.opts$minAge,b.opts$maxAge,x,b.opts$equalSlopes,"length",b.opts$silent,b.opts$runSA,b.opts$control,b.opts$optMethod,b.opts$lambda,b.opts$penalty.matrix)
            ulambda=lambs$lamb[which.max(lambs$acc)]
            barrierALK::barrierALK(formula, b.opts$minAge, b.opts$maxAge,
                                   "length", b.opts$equalSlopes, x, b.opts$silent, b.opts$runSA,
                                   b.opts$control,optMethod=b.opts$optMethod,lambda=ulambda,penalty.matrix=b.opts$penalty.matrix,penalize.hyper = b.opts$penalize.hyper,scale=b.opts$scale)
        },mc.cores=cores)
        converged <- lapply(models,function(x) x$opt$message) 
        lfS = split(lf,lf$sim)
        aged <- parallel::mcmapply(function(x,y){
            mat = ageFish(x,y)
            #mat = mat[mat$n !=0,]
            mat},models,lfS,SIMPLIFY=FALSE,mc.cores=cores)
        aged <- do.call(rbind,aged)
        aged <- as.data.table(aged)
        af <- aged[,list(n=sum(n)),by=c("set","age","x","y")]

        lf <- lf[, c("set", "length", "length_freq"), with = FALSE]
        setkeyv(lf, c("set", "length"))
        setkeyv(af, c("set", "age"))
        setnames(lf, "length_freq", "n")
        list(setdet = setdet, lf = lf, af = af,ager="barrierALK",models=models,converged=converged)
    }
}

#'Alternative strat_data for barrierALK objects with penalization using modified AIC
#'
#' This is an alternative for strat_data utilitizing the results of barrierALK
#'
#' @param formula the barrierALK formula
#' @param b.opts list of options
#' @param mesh mesh to use
#' @param barrier.triangles list of barrier.triangles
#' @param cores the number of cores to use
#' @param k the number of cross validation folds to use
#' 
#'
#' @import data.table
#' @import foreach
#' @import doParallel
#'
#' @export
#'
strat_data_pALK3 <- function(formula,
                            b.opts=list(minAge=1,maxAge=13,equalSlopes=FALSE,
                                        silent=TRUE,runSA=TRUE,
                                        control=list(eval.max=5000,iter.max=5000),lambda=0,penalty.matrix=NULL,penalize.hyper=FALSE,scale=TRUE),
                            mesh=NULL,barrier.triangles=NULL,cores=parallel::detectCores()){
    function(sim,length_group=3){
        if(.Platform$OS.type == "windows"){
            cores = 1
        }
        setdet <- sim$setdet
        samp <- sim$samp
        samp <- merge(setdet[,list(set,sim,year,x,y)], samp,by =c("set"))
        lf <- samp
        lf[, `:=`(n, .N), by = "set"]
        lf <- lf[lf$measured, ]
        lf[, `:=`(n_lengths, .N), by = "set"]
        lf$length <- group_lengths(lf$length, length_group)
        lf[, `:=`(ratio, n_lengths/n)]
        lf <- lf[, list(length_freq = .N), by = c("set", "ratio",
                                                  "length","x","y")]
        lf[, `:=`(length_freq, length_freq/ratio)]
        lf[, `:=`(ratio, NULL)]
        data.table::setkeyv(lf, c("set", "length"))
        cj <- data.table::CJ(set = setdet$set, length = sort(unique(lf$length)),
                             unique = TRUE)
        data.table::setkeyv(cj, c("set", "length"))
        cj <- merge(setdet[, list(set, sim, year,x,y)], cj, by = "set",
                    all = TRUE)
        lf <- merge(cj, lf, by = c("set", "length","x","y"), all = TRUE)
        lf$length_freq[is.na(lf$length_freq)] <- 0
        ag <- samp
        ag <- ag[ag$aged, ]
        ag$len_cat <- group_lengths(ag$length, length_group)
        agS <- split(ag, ag$sim)

        models <- parallel::mclapply(agS,function(x) {
            x <- as.data.frame(x)
            lambda = b.opts$lambda
            model = barrierALK(formula,b.opts$minAge,b.opts$maxAge,"length",equalSlopes=b.opts$equalSlopes,data=x,
                               silent=b.opts$silent,runSA=b.opts$runSA,control=b.opts$control,optMethod=b.opts$optMethod,
                               lambda=0,penalty.matrix = b.opts$penalty.matrix,scale=b.opts$scale)
            call = attr(model,"call")
            call$edf.calc=TRUE
            ret = data.frame(lambda=lambda)
            ret$df = NA
            ret$mAIC = NA
            models = list()
            for(i in 1:length(lambda)){
                call$lambda = lambda[i]
                run = eval(call)
                ret$df[i] = attr(run,"edf")
                ret$mAIC[i] = attr(run,"mAIC")
                models[[i]] = run
            }
            bestModel = which.max(ret$mAIC)
            bmod = models[[bestModel]]
            bmod

        },mc.cores=cores)
        
        converged <- lapply(models,function(x) x$opt$message) 
        lfS = split(lf,lf$sim)
        aged <- parallel::mcmapply(function(x,y){
            mat = ageFish(x,y)
            #mat = mat[mat$n !=0,]
            mat},models,lfS,SIMPLIFY=FALSE,mc.cores=cores)
        aged <- do.call(rbind,aged)
        aged <- as.data.table(aged)
        af <- aged[,list(n=sum(n)),by=c("set","age","x","y")]

        lf <- lf[, c("set", "length", "length_freq"), with = FALSE]
        setkeyv(lf, c("set", "length"))
        setkeyv(af, c("set", "age"))
        setnames(lf, "length_freq", "n")
        list(setdet = setdet, lf = lf, af = af,ager="barrierALK",models=models,converged=converged)
    }
}
jgbabyn/barrierALK documentation built on Dec. 20, 2021, 11:09 p.m.