#' simTrawlHaulsDatras
#' @description Simulates trawl hauls with only length information used in the bootstrap procedure.
#' Keep the number of trawl ahuls within each statistical rectangle fixed, and sample them with replacement from the whole RFA.
#' @param RFA Roundfish area number.
#' @param year The year of interest.
#' @param quarter The quarter of interest.
#' @export
#' @return Returns simulations of the dataras-data with length information on a similar format as the data used in the functions for calculating the CPUEs
#' @examples
simTrawlHaulsDatras = function(RFA,year, quarter,hl_hh,ca_hh,nSimHauls = NULL)
{
#TODO: Ideally we should clean this to use only hh-data when sampling.
#Extract the data of interest-------------------------
dataOfInterest = hl_hh[!is.na(hl_hh$Year) & hl_hh$Year == year&
!is.na(hl_hh$Quarter) & hl_hh$Quarter == quarter&
!is.na(hl_hh$Roundfish) & hl_hh$Roundfish == RFA ,]
#-----------------------------------------------------
#Simulate trawl hauls---------------------------------
haulsID = unique(dataOfInterest$haul.id)
if(is.null(nSimHauls)){
nSim = length(haulsID) #Simualte as many hauls as was taken
}else{
nSim = nSimHauls #Simulate a predefined given number of hauls
}
simHauls = sample(haulsID,nSim,replace = T)
#Define a help variable for selecting statistical rectangle to each simulated haul----
rectangleID = rep(NA,length(haulsID))
for(i in 1:length(haulsID))
{
rectangleID[i] = as.character(unique(dataOfInterest$StatRec[dataOfInterest$haul.id== haulsID[i]]))
}
if(!is.null(nSimHauls)){#Shuffel and expand the rectangleID. This is done to not oversample any statisical rectangle
rectangleID = sample(rectangleID,length(rectangleID),replace = FALSE)
rectangleID = c(rectangleID, sample(rectangleID,length(rectangleID),replace = FALSE))
rectangleID = c(rectangleID, sample(rectangleID,length(rectangleID),replace = FALSE))
}
#-----------------------------------------------------
simDataHL = list(NULL)
simDataCA = list(NULL)
for(i in 1:nSim)
{
simDataHL[[i]]= dataOfInterest[dataOfInterest$haul.id==simHauls[i],]
simDataHL[[i]]$haul.id = paste(simDataHL[[i]]$haul.id,i) #Needs unique haul.id, which is achived here.
simDataHL[[i]]$StatRec = rectangleID[i] #Overwrite the statstical rectangle.
if(sum(ca_hh$haul.id==simHauls[i])>0){
simDataCA[[i]] = ca_hh[ca_hh$haul.id==simHauls[i],]
simDataCA[[i]]$haul.id = paste(simDataCA[[i]]$haul.id,i) #Needs unique haul.id, which is achived here.
simDataCA[[i]]$StatRec = rectangleID[i] #Overwrite the statstical rectangle.
}
}
simCAToBeReturned = do.call(rbind.data.frame,simDataCA)
simHLToBeReturned = do.call(rbind.data.frame,simDataHL)
#----------------------------------------------------
toReturn = list()
toReturn$ca_hh = simCAToBeReturned
toReturn$hl_hh = simHLToBeReturned
return(toReturn)
}
#' simTrawlHaulsCAStratified
#' @description Simulates trawl hauls with both length and age information used in the bootstrap procedure.
#' @param RFA Roundfish area number.
#' @param year The year of interest.
#' @param quarter The quarter of interest.
#' @export
#' @return Returns simulations of the dataras-data with both length and age information on a similar format as the data used in the functions for calculating the CPUEs
#' Sample stratisfied with respect to the length observed. This gives meaning since it is assumed that the ALK is similar in the whole roundfish area.
#' @examples
simCAdatras = function(RFA,year, quarter,data,species = "Gadus morhua")
{
#Extract the data of interest-------------------------
dataOfInterest = data[!is.na(data$Year) & data$Year == year&
!is.na(data$Quarter) & data$Quarter == quarter&
!is.na(data$Roundfish) & data$Roundfish == RFA &
!is.na(data$Species) & data$Species == species &
!is.na(data$Age),]
#-----------------------------------------------------
if(dim(dataOfInterest)[1]==0){
return(dataOfInterest)
}
#Simulate trawl hauls---------------------------------
lengths = unique(sort(floor(dataOfInterest$LngtCm)))
simData = list(NULL)
for(i in 1:length(lengths)){
l = lengths[i]
n = sum(floor(dataOfInterest$LngtCm)==l) #If we are to reduce the number of otoliths we mush change this value.
if(n >1)
{
dTmp = dataOfInterest[floor(dataOfInterest$LngtCm)==l,]
dTmp = sample_n(dTmp, size = n,replace = TRUE)
}else{ #Only one observed age with this length, sample either that or one close with equal probability
if(l<max(lengths) & l>min(lengths))
{
distDown = l-lengths[i-1]
distUp = lengths[i+1] -l
if(distUp == distDown) distDown = distDown + 0.5 - runif(1) #Go up or down with equal probablity
if(distDown<distUp)
{
extra = which(floor(dataOfInterest$LngtCm)==lengths[i-1])
}else if(distDown>distUp){
extra = which(floor(dataOfInterest$LngtCm)==lengths[i+1])
}
}else if(l==min(lengths)){
extra = which(floor(dataOfInterest$LngtCm)==lengths[i+1])
}else if(l==max(lengths)){
extra = which(floor(dataOfInterest$LngtCm)==lengths[i-1])
}
whichToAdd = sample(length(extra),1) #Extract one which is sampled with 0.5 probability
dTmp = rbind(dataOfInterest[floor(dataOfInterest$LngtCm)==l,],
dataOfInterest[extra[whichToAdd],])
dTmp$LngtCm = l
dTmp = sample_n(dTmp, size = n,replace = TRUE)
}
simData[[l]]= dTmp
}
simDataToBeReturned = do.call(rbind.data.frame,simData)
#------------------------------------------------------
return(simDataToBeReturned)
}
#' simHaulsStratified
#' @description Simulates trawl hauls with only length information used in the bootstrap procedure.
#' @param RFA Roundfish area number.
#' @param year The year of interest.
#' @param quarter The quarter of interest.
#' @export
#' @return Returns simulations of the dataras-data with length information on a similar format as the data used in the functions for calculating the CPUEs.
#' The simulated trawl hauls are simulated stratisfied on the statistical rectangles. TODO: choose a procedure if there is only one observation in the statistical recangle, I suggest to include the closest trawl haul no matter which statistical recangles it is assosiated with..
#' @examples
simHaulsStratified = function(RFA,year, quarter,dataHH, loc = NULL)
{
#Extract the data of interest-------------------------
dataOfInterest = dataHH[!is.na(dataHH$Year) & dataHH$Year == year&
!is.na(dataHH$Quarter) & dataHH$Quarter == quarter&
!is.na(dataHH$Roundfish) & dataHH$Roundfish == RFA ,]
#-----------------------------------------------------
if(dim(dataOfInterest)[1] ==1){
warning(paste("Only one haul in the whole RFA: ", RFA, sep = ""))
dataOfInterest$originalIdAtThisLocation = toString(dataOfInterest$haul.id[1])
return(dataOfInterest)
}
#Simulate trawl hauls---------------------------------
statRec = unique(dataOfInterest$StatRec)
simData = list(NULL)
for(i in 1:length(statRec))
{
rec = statRec[i]
trawls = unique(dataOfInterest$haul.id[dataOfInterest$StatRec==rec])
lon = dataOfInterest$lon[is.element(dataOfInterest$haul.id,trawls)]
lat = dataOfInterest$lat[is.element(dataOfInterest$haul.id,trawls)]
if(length(trawls)==1)
{
idClosest = loc$shortesDist[which(loc$uniqueId == trawls)]
toSample = c(toString(idClosest),toString(trawls[1]))
if(length(idClosest)==0){
toSample = toString(trawls[1])
print("There was an haul with zero information in HL-data. This was strange, we should use the HH data when defining all hauls, this is TODO")
}
sampledTreawls = sample(toSample,1)
dTmp = dataOfInterest[dataOfInterest$haul.id==sampledTreawls,]
dTmp$StatRec = rec
dTmp$lon = lon
dTmp$lat = lat
dTmp$originalIdAtThisLocation = toString(trawls[1])
}else{
sampledTreawls = sample(trawls,length(trawls),replace = TRUE)
dTmp = dataOfInterest[dataOfInterest$haul.id==sampledTreawls[1],]
dTmp$lon = lon[1]
dTmp$lat = lat[1]
dTmp$originalIdAtThisLocation = toString(trawls[1])
for(j in 2:length(trawls))
{
add = dataOfInterest[dataOfInterest$haul.id==sampledTreawls[j],]
add$lon = lon[j]
add$lat = lat[j]
add$originalIdAtThisLocation = toString(trawls[j])
dTmp = rbind(dTmp,add)
}
}
simData[[i]]= dTmp
}
simDataToBeReturned = do.call(rbind.data.frame,simData)
#----------------------------------------------------
return(simDataToBeReturned)
}
#' sampleCA
#' @description .
#' @param ca_hh The quarter of interest.
#' @export
#' @return Returns a modified data set of the data used for calculating the CPUE. The data is modified by removing
#' observations in a certain procedure.
#' @examples
sampleCA = function(ca_hh,species,quarter,lengthDivision,samplesWithinEachIntervall,hl_hh){
toReturn = ca_hh[1,]
for(id in unique(ca_hh$haul.id)){
obsThisHaul = ca_hh[which(ca_hh$haul.id==id),]
obsReduced = sampleCAHaul(obsThisHaul,lengthDivision, samplesWithinEachIntervall,species = species,hl_hh = hl_hh)
toReturn = rbind(toReturn,obsReduced)
}
toReturn = toReturn[-1,]
return(toReturn)
}
#' sampleCAHaul
#' @description .
#' @param datDetailed The quarter of interest.
#' @param RFA The roundfish area of interest.
#' @export
#' @return Returns a modified data set of the data used for calculating the CPUE. The data is modified by removing
#' observations in a certain procedure.
#' @examples
sampleCAHaul = function(obsThisHaul,lengthDivision,samplesWithinEachIntervall,species,hl_hh){
toReturn = obsThisHaul[1,]
for(i in 2:(length(lengthDivision)+1)){
if(i <= length(lengthDivision)){
obsInside = which(obsThisHaul$LngtCm>=lengthDivision[i-1] & obsThisHaul$LngtCm<lengthDivision[i])
}else if(i>length(lengthDivision)){
obsInside = which(obsThisHaul$LngtCm>=lengthDivision[i-1])
}
if(length(obsInside)>1){
whichLengthsInside = sort(unique(floor(obsThisHaul$LngtCm[obsInside])))
psaudoPopulation = NULL
counter = 1
for(j in 1:length(whichLengthsInside)){
insideThisLength = round(obsInHL(species = species,hl_hh = hl_hh,id = unique(obsThisHaul$haul.id),length = whichLengthsInside[j]))
nTmp = sum(floor(obsThisHaul$LngtCm)==whichLengthsInside[j])#Number of observed ages in this sub length group
repeatedObs = rep(which(floor(obsThisHaul$LngtCm)==whichLengthsInside[j]),
floor(insideThisLength/nTmp))
extraObs = sample(which(floor(obsThisHaul$LngtCm)==whichLengthsInside[j]),(insideThisLength %% nTmp),replace = FALSE)
if(insideThisLength!=0){
psaudoPopulation[counter:(counter + insideThisLength-1)] = c(repeatedObs,extraObs)
counter = counter + insideThisLength
}else{
#This should not happen, a length was reported in CA data but not in HL data.
psaudoPopulation[counter:((counter + length(c(which(floor(obsThisHaul$LngtCm)==whichLengthsInside[j]),extraObs))-1))] = c(which(floor(obsThisHaul$LngtCm)==whichLengthsInside[j]),extraObs)
counter = counter + length(c(which(floor(obsThisHaul$LngtCm)==whichLengthsInside[j]),extraObs))
}
}
if(length(psaudoPopulation)<length(obsInside)){
warning(paste("Haul with ID ", obsThisHaul$haul.id[1], " in year ", obsThisHaul$Year[1], " had more age observations of length ", i," than lenght observations." ))
}
nSample = min(samplesWithinEachIntervall,length(obsInside),length(psaudoPopulation))
obsSelected = sample(psaudoPopulation,size = nSample, replace = FALSE) #Sample without replecement from the psaudo population
}else{
obsSelected = obsInside
}
toReturn = rbind(toReturn,obsThisHaul[obsSelected,])
}
toReturn = toReturn[-1,]
return(toReturn)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.