library(tidyverse) library(DATRAS)
Lets read in some DATRAS data available in the package
raw <- readExchange(system.file("exchange","Exchange1.zip",package="DATRAS"), verbose = FALSE) d <- raw %>% subset(Species == "Microstomus kitt", haul.id %in% c("2009:1:DEN:DAN2:GOV:69:12", "2009:1:DEN:DAN2:GOV:202:35")) hh <- d[["HH"]] %>% as_tibble() hl <- d[["HL"]] %>% as_tibble() ca <- d[["CA"]] %>% as_tibble()
The addSpectrum
is one of the key function in DATRAS, sort of what a lot of follow up analysis depend on. The input has to be of class "DATRASraw" returning the same class. The addSpectrum does two things:
Here is a glimpse:
d <- addSpectrum(d) N <- d[["HH"]]$N class(N) N
In the tidyverse we would have in the haul-dataframe an object of class list rather than table containing the length frequency counts. An implementation emulating the addSpectrum could be:
grp <- 1 cm.breaks <- seq(min(hl$LngtCm, na.rm = TRUE), max(hl$LngtCm, na.rm=TRUE) + grp, grp) hl2 <- hl %>% select(haul.id, Count, LngtCm) %>% mutate(sizeGroup = cut(LngtCm, breaks = cm.breaks, right = FALSE)) hh2 <- hl2 %>% group_by(haul.id, sizeGroup) %>% summarise(Count = sum(Count)) %>% complete(sizeGroup, fill = list(Count = 0)) %>% ungroup() %>% right_join(hh %>% select(haul.id)) %>% group_by(haul.id) %>% nest() glimpse(hh2) hh2$data
As stated in the help file the addNage "... is just a short-cut for calling 'fitALK' to fit the model followed by 'predict.ALKmodel' and adding numbers-at-age per haul to the DATRASraw object in a variable called 'Nage'." The object Nage is of class matrix:
d <- addNage(d, 1:4) hh <- d[["HH"]] class(hh$Nage) hh %>% select(haul.id, Nage)
Again in the tidyverse we would have used a list, if we were to emulate this type of storing. But before emulating that lets look at this ALK stuff.
Start from scratch:
d <- raw %>% subset(Species == "Microstomus kitt", haul.id %in% c("2009:1:DEN:DAN2:GOV:69:12", "2009:1:DEN:DAN2:GOV:202:35")) %>% addSpectrum(by = 1)
Lets fit an ALK to the data:
fit <- d %>% fitALK(minAge = 1, maxAge = 4, model = "cra~LngtCm", gamma = NA, autoChooseK = FALSE, useBIC = FALSE, varCof = FALSE, maxK = 49) class(fit) pre <- fit %>% # uses predict.ALKmodel predict() class(pre) pre
So we get a matrix of dimension number of hauls x number of age groups where the value is the number at each age group.
Notes the data are contained in the attributes of the fit-object:
attributes(fit)$data[["HH"]] %>% glimpse()
Note: To get the parameters from the fit-object, could do:
ages <- attributes(fit)$ages x <- purrr::map(fit, "coefficients") n <- length(ages) names(x) <- ages cn <- names(x[[1]]) %>% str_extract('[A-Za-z]+') #only letters param <- x %>% purrr::map(as_tibble) %>% bind_rows(.id = "age") %>% mutate(parameter = rep(cn, n)) %>% spread(parameter, value) param
Deep down in the cascading set of functions in DATRAS the core stuff happens within the NageByHaul-function. The steps within the function are:
x <- fit #NageByHaul <- #function(row,x,returnALK=FALSE){ dat = attr(x, "data") # DATRASraw # EINAR: only ONE haul at a time dat <- dat %>% subset(haul.id == "2009:1:DEN:DAN2:GOV:69:12") models = x extraVars= unlist(lapply(lapply(attr(models,"ALKformula"),as.formula),xtraVars,x=dat)) if(length(extraVars)==0) extraVars=NULL; maxAge = length(models)+1; len=attr(dat,"cm.breaks")[1:ncol(dat$N)] N=length(len) cc = 1:N nd = data.frame(LngtCm=len, lat=dat[[2]]$lat, lon=dat[[2]]$lon) # add extra variables is used: #nd[,extraVars]=dat[[2]][row,extraVars] # Seed a probability matrix p = matrix(1,nrow=N,ncol=maxAge); W = getOption("warn") options(warn=-1); ## disable warnings temporarily for(i in 1:(maxAge-1)){ p[,i] = 1 - predict(models[[i]], newdata=nd, type="response", newdata.guaranteed=TRUE); } options(warn=W) ## restore warnings # sidestep --------------------------------------------------------------------- p2 <- p colnames(p2) <- c(attributes(x)$ages, maxAge) p2 %>% as_tibble() %>% mutate(length = len) %>% gather(age, p, -length, convert = FALSE) %>% ggplot(aes(length, p, colour = age)) + geom_line() + geom_point() # end of sidestep -------------------------------------------------------------- predProps=p punc <- function(k,a) { p[k,a]*prod(1-p[k,1:(a-1)]) } for(a in 2:maxAge) { ## unconditional prob of age_i = Pi_i * Prod(1-Pi_j , j=1..i-1) for i>1. [Rindorf,Lewy p.2] predProps[,a] = sapply(cc, punc, a = a) } # sidestep --------------------------------------------------------------------- p2 <- predProps colnames(p2) <- c(attributes(x)$ages, maxAge) p2 %>% as_tibble() %>% mutate(length = len) %>% gather(age, p, -length, convert = FALSE) %>% ggplot(aes(length, p, colour = age)) + geom_line() + geom_point() # end of sidestep -------------------------------------------------------------- #if(!returnALK) { # return(dat[[2]]$N[row,]%*%predProps) #} else { # return( predProps) #} #}
Lets try the tidyverse flow:
d <- raw %>% subset(Species == "Microstomus kitt", haul.id %in% c("2009:1:DEN:DAN2:GOV:69:12", "2009:1:DEN:DAN2:GOV:202:35")) %>% addSpectrum(by = 1) ca <- d[["CA"]] model <- "cra~LngtCm" %>% as.formula() a <- 3 myd <- ca %>% select(haul.id, Age, LngtCm, NoAtALK) %>% filter(Age >= a) %>% mutate(cra = as.factor(Age)) fit <- gam(cra ~ LngtCm, data = myd, family = "binomial", weights = NoAtALK, gamma = NA) class(fit) coefficients(fit) # the predictions nd <- data_frame(LngtCm = 1:100) predict(fit, newdata = nd, type = "response", newdata.guaranteed = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.