R/StoxExport.R

Defines functions BioticData_NMDToICESBioticOne BioticData_ICESToICESBioticOne BioticDataToICESBioticOne ICESBiotic expandWidth moveIDsLast createRecordTypeMatrix convertToRecordTypeMatrix createHeaderRecordMatrix convertToHeaderRecordMatrix WriteICESAcousticOne WriteICESAcoustic AcousticData_ICESToICESAcousticOne AcousticDataToICESAcousticOne ICESAcoustic

Documented in ICESAcoustic ICESBiotic WriteICESAcoustic

#' Convert AcousticData to ICESAcousticData.
#'
#' Note that this function only supports \code{\link{AcousticData}} object created from reading an ICES acoustic XML files.
#'
#' @param AcousticData A \code{\link{AcousticData}} object from an ICES acoustic XML format file.
#'
#' @return An \code{\link{ICESAcousticData}} object.
#'
#' @export
ICESAcoustic <- function(AcousticData){
	
	ICESAcousticData <- lapply(
		AcousticData, 
		AcousticDataToICESAcousticOne
	)
	
	return(ICESAcousticData)
}



AcousticDataToICESAcousticOne <- function(AcousticDataOne){
	
	if(AcousticDataOne$metadata$useXsd=='icesAcoustic'){
		
		ICESAcousticDataOne <- AcousticData_ICESToICESAcousticOne(AcousticDataOne)
	}
	else{
		stop('StoX: only ices acoustic format is allowed')
	}
	
	return(ICESAcousticDataOne)
}


AcousticData_ICESToICESAcousticOne <- function(AcousticData_ICESOne){
	
	# Make a copy as we are modifying by reference:
	ICESAcousticDataOne <- data.table::copy(AcousticData_ICESOne)
	
	# Remove echo type, as this is not included in the CSV:
	ICESAcousticDataOne$Data$EchoType<-NULL
	
	
	independentTables <- c("Instrument", "Calibration", "DataAcquisition", "DataProcessing")
	hierarchicalTables <- c("Cruise", "Log", "Sample", "Data")
	tablesToKeep <- c(independentTables, hierarchicalTables)
	
	# Add the Survey to the Cruise table, with a hack to get the last line (until it has been fixed so that only the code and not the value is present):
	ICESAcousticDataOne$Cruise$Survey <- utils::tail(ICESAcousticDataOne$Survey$Code, 1)
	
	# Translate according to the vocabulary:
	if(length(ICESAcousticDataOne$vocabulary)) {
		vocabulary <- findVariablesMathcinigVocabulary(
			vocabulary = ICESAcousticDataOne$vocabulary, 
			data = ICESAcousticDataOne[tablesToKeep]
		)
		# Uniqueify since some columns (keys) are present in several tables:
		vocabulary <- unique(vocabulary)
		
		ICESAcousticDataOne[tablesToKeep] <- translateVariables(
			data = ICESAcousticDataOne[tablesToKeep], 
			Translation = vocabulary, 
			translate.keys = TRUE, 
			warnMissingTranslation = FALSE
		)
	}
	
	
	
	#Check metadata towards ices definitions
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_TransducerLocation.xml',unique(ICESAcousticDataOne$Instrument$TransducerLocation))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_TransducerBeamType.xml',unique(ICESAcousticDataOne$Instrument$TransducerBeamType))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_AcquisitionMethod.xml',unique(ICESAcousticDataOne$Calibration$AcquisitionMethod))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_ProcessingMethod.xml',unique(ICESAcousticDataOne$Calibration$ProcessingMethod))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_DataAcquisitionSoftwareName.xml',unique(ICESAcousticDataOne$DataAcquisition$SoftwareName))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_StoredDataFormat.xml',unique(ICESAcousticDataOne$DataAcquisition$StoredDataFormat))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_DataAcquisitionSoftwareName.xml',unique(ICESAcousticDataOne$DataAcquisition$SoftwareName))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_DataProcessingSoftwareName.xml',unique(ICESAcousticDataOne$DataProcessing$SoftwareName))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_TriwaveCorrection.xml',unique(ICESAcousticDataOne$DataProcessing$TriwaveCorrection))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_OnAxisGainUnit.xml',unique(ICESAcousticDataOne$DataProcessing$OnAxisGainUnit))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/ISO_3166.xml',unique(ICESAcousticDataOne$Cruise$Country))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/SHIPC.xml',unique(ICESAcousticDataOne$Cruise$Platform))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/EDMO.xml',unique(ICESAcousticDataOne$Cruise$Organisation))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_Survey.xml',unique(ICESAcousticDataOne$Survey$Code))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_LogOrigin.xml',unique(ICESAcousticDataOne$Log$Origin))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_LogValidity.xml',unique(ICESAcousticDataOne$Log$Validity))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_PingAxisIntervalType.xml',unique(ICESAcousticDataOne$Sample$PingAxisIntervalType))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_PingAxisIntervalUnit.xml',unique(ICESAcousticDataOne$Sample$PingAxisIntervalUnit))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_PingAxisIntervalOrigin.xml',unique(ICESAcousticDataOne$Sample$PingAxisIntervalOrigin))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_SaCategory.xml',unique(ICESAcousticDataOne$Data$SaCategory))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_AcousticDataType.xml',unique(ICESAcousticDataOne$Data$Type))
	compareICES('https://acoustic.ices.dk/Services/Schema/XML/AC_DataUnit.xml',unique(ICESAcousticDataOne$Data$Unit))
	
	#### Rename columns to start with the table name:
	# Rename the independent tables:
	independentTablesColumnNames <- lapply(ICESAcousticDataOne[independentTables], names)
	independentTablesNewColumnNames <- mapply(paste0, independentTables, independentTablesColumnNames)
	mapply(
		data.table::setnames, 
		ICESAcousticDataOne[independentTables], 
		old = independentTablesColumnNames, 
		new = independentTablesNewColumnNames
	)
	
	# Rename the hierarchical tables and take care of the keys:
	renameToTableNameFirst(
		ICESAcousticDataOne, 
		tableNames = hierarchicalTables, 
		setToID = independentTables, 
		formatType = "Acoustic"
	)
	
	# Merge the Log, Sample and Data to make the merged Data table:
	hierarchicalTablesSansCruise <- setdiff(hierarchicalTables, "Cruise")
	LogSampleData <- RstoxData::mergeDataTables(ICESAcousticDataOne[hierarchicalTablesSansCruise], output.only.last = TRUE, all = TRUE)
	LogSampleData <- unique(LogSampleData)
	
	# Create the output:
	ICESAcousticDataOne <- c(
		ICESAcousticDataOne[c("Instrument", "Calibration", "DataAcquisition", "DataProcessing", "Cruise")],
		list(Data = LogSampleData)
	)
	
	# Move ID columns last:
	ICESAcousticDataOne <- lapply(ICESAcousticDataOne, moveIDsLast)
	
	return(ICESAcousticDataOne)
}





#' Writes \code{\link{ICESAcousticData}} to a csv file for each input acoustic file used to create the \code{\link{ICESAcousticData}}
#'
#' @param ICESAcousticData A \code{\link{ICESAcousticData}} object obtained from an ICES acoustic XML format file.
#'
#' @return List of string matrices in the ICES acoustic CSV format.
#'
#' @export
#' 
WriteICESAcoustic <- function(ICESAcousticData){
	
	WriteICESAcousticData <- lapply(
		ICESAcousticData, 
		WriteICESAcousticOne
	)
	
	return(WriteICESAcousticData)
}


WriteICESAcousticOne <- function(ICESAcousticDataOne){
	
	# Convert all tables to string with header and reccord, and rbind:
	ICESAcousticCSVDataOne <- convertToHeaderRecordMatrix(ICESAcousticDataOne)
	ICESAcousticCSVDataOne <- expandWidth(ICESAcousticCSVDataOne)
	
	# Stack all matrices:
	ICESAcousticCSVDataOne <- do.call(rbind, ICESAcousticCSVDataOne)
	
	return(ICESAcousticCSVDataOne)
}




# Function to convert a list of ICESAcoustic data to a list of tables with Header and Reccord, fitting the ICES CSV formats:
convertToHeaderRecordMatrix <- function(ICESData) {
	# Run through the table names and convert to Header, Record, and stringify:
	lapply(names(ICESData), createHeaderRecordMatrix, ICESData = ICESData)
}

createHeaderRecordMatrix <- function(ICESDataTableName, ICESData) {
	
	thisTable <- ICESData[[ICESDataTableName]]
	# # Move IDs last:
	# endsWithID <- endsWith(names(thisTable), "ID")
	# data.table::setorderv(thisTable, c(names(thisTable)[!endsWithID], names(thisTable)[!endsWithID]))
	
	header <- c(
		ICESDataTableName, 
		"Header", 
		#paste0(ICESDataTableName, names(thisTable))
		names(thisTable)
	)
		
		
	
	record <- cbind(
		ICESDataTableName, 
		"Record", 
		# Convert all columns to string:
		as.matrix(thisTable)
	)
	
	unname(rbind(header, record))
}


# Function to convert a list of ICESAcoustic data to a list of tables with Header and Reccord, fitting the ICES CSV formats:
convertToRecordTypeMatrix <- function(ICESData) {
	# Run through the table names and convert to Header, Record, and stringify:
	lapply(names(ICESData), createRecordTypeMatrix, ICESData = ICESData)
}

createRecordTypeMatrix <- function(ICESDataTableName, ICESData) {
	
	thisTable <- ICESData[[ICESDataTableName]]
	
	header <- c(
		"RecordType", 
		names(thisTable)
	)
	
	record <- cbind(
		ICESDataTableName, 
		# Convert all columns to string, but use trim = FALSE to avoid left padding with spaces for integers:
		#as.matrix(thisTable, trim = TRUE)
		#as.matrix(format(thisTable, trim = TRUE))
		trimws(as.matrix(thisTable))
	)
	
	unname(rbind(header, record))
}


moveIDsLast <- function(x) {
	# Move IDs last:
	endsWithID <- endsWith(names(x), "ID")
	data.table::setcolorder(x, c(names(x)[!endsWithID], names(x)[endsWithID]))
}


expandWidth <- function(x, na = NA) {
	# Get the number of columns and rows:
	ncols <- sapply(x, ncol)
	nrows <- sapply(x, nrow)
	# Create matrices of NAs with dimension so that added to the original data we end up with identical number of rows:
	dims <- cbind(nrows, max(ncols) - ncols)
	dimsList <- split(dims, seq_along(x))
	NAArrays <- mapply(array, dim = dimsList, SIMPLIFY = FALSE, MoreArgs = list(data = na))
	mapply(cbind, x, NAArrays, SIMPLIFY = FALSE)
}









#' Convert BioticData to ICESBiotic format
#'
#' Given an \code{\link{BioticData}} object, this function converts to ICESBiotic format. Note that this function only supports
#' \code{BioticData} generated from NMDBiotic version > 3 XML files.
#'
#' @param BioticData a \code{BioticData} object from an XML file with NMD biotic version 3 format.
#' @param SurveyName A string naming the survey. Must be one of the names listed on \url{https://vocab.ices.dk/?ref=1453} or NONE (the default).
#' @param Country The ISO_3166 code of the country running the cruise. See \url{http://vocab.ices.dk/?ref=337}.
#' @param Organisation An integer code representing the organization running the cruise. See \url{https://vocab.ices.dk/?ref=1398} for a list of possible codes (e.g., Institute of Marine Research: 612).
#' @param AllowRemoveSpecies ICES submission will not allow the resulting CSV file to be uploaded if the file contains species not listed in
#' https://acoustic.ices.dk/Services/Schema/XML/SpecWoRMS.xml. Setting this parameter to TRUE will remove the unlisted species records.
#'        
#' @return An \code{\link{ICESBioticData}} object.
#'
#' @export
ICESBiotic <- function(
	BioticData, 
	SurveyName = character(), 
	Country = character(), 
	Organisation = integer(), 
	AllowRemoveSpecies = TRUE
) {

	# Convert to ICESBiotic:
	ICESBioticData <- lapply(
		BioticData, 
		BioticDataToICESBioticOne, 
		SurveyName = SurveyName,
		Country = Country,
		Organisation = Organisation,
		AllowRemoveSpecies = AllowRemoveSpecies
	)
	
	return(ICESBioticData)
} 






BioticDataToICESBioticOne <- function(
	BioticDataOne, 
	SurveyName = character(), 
	Country = character(), 
	Organisation = integer(),  
	AllowRemoveSpecies = TRUE
) {
	
	if(BioticDataOne$metadata$useXsd %in% "icesBiotic") {
		ICESBioticDataOne <- BioticData_ICESToICESBioticOne(BioticDataOne)
	}
	else if(BioticDataOne$metadata$useXsd %in% c("nmdbioticv3", "nmdbioticv3.1")) {
		ICESBioticDataOne <- BioticData_NMDToICESBioticOne(
			BioticDataOne, 
			SurveyName = SurveyName,
			Country = Country,
			Organisation = Organisation,
			AllowRemoveSpecies = AllowRemoveSpecies
		)
	}
	else {
		warning("StoX: Only NMD Biotic version 3 and 3.1 data can be converted to ICESBiotic (was ", BioticDataOne$metadata$useXsd, " for the file ", BioticDataOne$metadata$file, ". NA returned.")
		return(NULL)
	}
	
	
	# Copy the data to prepare for using data.tables by reference functions (set* and :=)
	hierarchicalTables <- c("Cruise", "Haul", "Catch", "Biology")
	ICESBioticDataOne <- ICESBioticDataOne[hierarchicalTables]
	
	# Create a table of the original and new column names, but remove keys:
	renameToTableNameFirst(
		ICESBioticDataOne, 
		tableNames = hierarchicalTables, 
		formatType = "Biotic"
	)
	
	# Move ID columns last:
	ICESBioticDataOne <- lapply(ICESBioticDataOne, moveIDsLast)
	
	return(ICESBioticDataOne)
}


BioticData_ICESToICESBioticOne <- function(BioticData_ICESOne) {
	
	# Make a copy for safety:
	BioticDataOne <- data.table::copy(BioticData_ICESOne)
	
	# Add the Survey to the Cruise table, with a hack to get the last line (until it has been fixed so that only the code and not the value is present):
	BioticDataOne$Cruise$Survey <- utils::tail(BioticDataOne$Survey$Code, 1)
	
	
	tablesToTranslate <- c("Cruise", "Haul", "Catch", "Biology")
	
	# Apply translations defined in the table 'vocabulary':
	if(length(BioticDataOne$vocabulary)) {
		vocabulary <- findVariablesMathcinigVocabulary(
			vocabulary = BioticDataOne$vocabulary, 
			data = BioticDataOne[tablesToTranslate]
		)
		# Uniqueify since some columns (keys) are present in several tables:
		vocabulary <- unique(vocabulary)
		
		BioticDataOne[tablesToTranslate] <- translateVariables(
			data = BioticDataOne[tablesToTranslate], 
			Translation = vocabulary, 
			translate.keys = TRUE, 
			warnMissingTranslation = FALSE
		)
	}
	
	
	return(BioticDataOne)
}

BioticData_NMDToICESBioticOne <- function(
	BioticData_NMDOne, 
	SurveyName = character(), 
	Country = character(), 
	Organisation = integer(), 
	AllowRemoveSpecies = TRUE
) {
	
	cruiseRaw <- BioticData_NMDOne$mission
	
	# Check that Survey, Country and Organisation are given:
	if(!all(length(SurveyName), length(Country), length(Organisation))) {
		stop("All of SurveyName, Country and Organisation must be given.")
	}
	
	Cruise <- cruiseRaw[, .(
		Survey = ..SurveyName,
		Country = ..Country,
		Organisation = ..Organisation,
		Platform = getICESShipCode(platformname),
		StartDate = gsub("Z", "", missionstartdate),
		EndDate = gsub("Z", "", missionstopdate),
		LocalID = cruise
	)]
	
	
	haulRaw <- merge(cruiseRaw, BioticData_NMDOne$fishstation)
	
	Haul <- haulRaw[, .(
		LocalID = cruise,
		Gear = gear, # Is gear assumed formatted correctly in the input file???
		Number = serialnumber,
		StationName = station,
		StartTime = ifelse(is.na(stationstartdate) | is.na(stationstarttime), NA, gsub("Z", " ", paste0(stationstartdate, substr(stationstarttime, 1, 5)))),
		Duration = getTimeDiff(stationstartdate, stationstarttime, stationstopdate, stationstoptime),
		Validity = getHaulValiditySimple(gearcondition, samplequality),
		StartLatitude = latitudestart,
		StartLongitude = longitudestart,
		StopLatitude = latitudeend,
		StopLongitude = longitudeend,
		StatisticalRectangle = getICESrect(latitudestart, longitudestart),
		MinTrawlDepth = ifelse(is.na(fishingdepthmin), fishingdepthmax, fishingdepthmin),
		MaxTrawlDepth = fishingdepthmax,
		BottomDepth = ifelse(bottomdepthstop > fishingdepthmax, bottomdepthstop, NA),
		Distance = getDistanceMeter(latitudestart, longitudestart, latitudeend, longitudeend),
		Netopening = verticaltrawlopening,
		CodendMesh = NA,
		#SweepLength = getGOVSweepByEquipment(gear),
		SweepLength = sweeplength,
		GearExceptions = NA, # Should this be set?
		DoorType = trawldoortype,
		# Warp length in integer meter: 
		WarpLength = round(wirelength),
		# Warp diameter in integer millimeter: 
		WarpDiameter = round(wirediameter),
		# Warp density in integer kg per linear meter of warp.: 
		WarpDensity = round(wiredensity),
		DoorSurface = trawldoorarea,
		# Door weight in integer kilograms.
		DoorWeight = round(trawldoorweight),
		DoorSpread = trawldoorspread,
		WingSpread = wingspread,
		Buoyancy = NA,
		KiteArea = NA,
		GroundRopeWeight = NA,
		Rigging = NA,
		Tickler = NA,
		HydrographicStationID = NA,
		# Direction is integer degrees: 
		TowDirection = round(direction),
		SpeedGround = NA,
		SpeedWater = gearflow,
		# Wind direction is integer degrees: 
		WindDirection = winddirection,
		# Wind speed is meter per second: 
		WindSpeed = round(windspeed),
		SwellDirection = NA,
		SwellHeight = NA,
		#LogDistance = NA,
		LogDistance = logstart,
		Stratum = NA
	)]
	
	catchRaw <- merge(BioticData_NMDOne$catchsample, haulRaw, by = intersect(names(BioticData_NMDOne$catchsample), names(haulRaw)))
	
	# We must filter records with aphia == NA
	catchRaw <- catchRaw[!is.na(aphia)]
	
	Catch <- catchRaw[, .(
		LocalID = cruise,
		Gear = gear,
		Number = serialnumber,
		SpeciesCode = aphia,
		SpeciesCategory = catchpartnumber,
		DataType = "R", # Always raw for NMDBiotic, and on 2021-04-21 only R is implemented by ICES at all
		SpeciesValidity = ifelse(is.na(catchproducttype), 0, catchproducttype), # What is meant here. SpeciesValidity can only be 0 or 1, but catchproducttype has a range of values. Should it be ifelse(is.na(catchproducttype), 0, 1) ??
		SpeciesCategoryNumber = catchcount,
		WeightUnit = "kg", # Always kg in NMDBiotic (see http://www.imr.no/formats/nmdbiotic/)
		SpeciesCategoryWeight = catchweight,
		SpeciesSex = NA,
		SubsampledNumber = lengthsamplecount,
		SubsamplingFactor = catchcount / lengthsamplecount,
		SubsampleWeight = lengthsampleweight,
		LengthCode = NA, # NMDBiotic has no way of storing a length distribution.
		LengthClass = NA, # NMDBiotic has no way of storing a length distribution.
		#LengthType = "1", # Should not this be interpreted from the catchsample$lengthmeasurement ???
		LengthType = lengthmeasurement, 
		NumberAtLength = lengthsamplecount,
		WeightAtLength = NA # Not relevant for NMDBiotic. Used in the Baltic Acoustic Survey, as noted in the documentation of ICESBiotic.
	)]
	
	# Logic for missing important records
	Catch[is.na(SpeciesCategoryNumber) & is.na(SpeciesCategoryWeight) & !is.na(SubsampledNumber), SpeciesCategoryNumber := SubsampledNumber]
	Catch[is.na(SpeciesCategoryNumber) & is.na(SpeciesCategoryWeight) & !is.na(SubsampleWeight),  SpeciesCategoryWeight := SubsampleWeight]
	
	# NA means that nothing is subsampled
	Catch[!is.na(SpeciesCategoryWeight) & is.na(SubsampleWeight), SubsampleWeight := 0]
	
	
	# Combine required tables for the Biology level
	indRaw <- BioticData_NMDOne$individual
	indRaw[is.na(preferredagereading), preferredagereading := 1]
	
	baseAge <- intersect(names(indRaw), names(BioticData_NMDOne$agedetermination))
	indRaw <- merge(indRaw, BioticData_NMDOne$agedetermination, by.x=c(baseAge, "preferredagereading"), by.y= c(baseAge, "agedeterminationid"), all.x = TRUE)
	indRaw <- merge(catchRaw, indRaw, by = intersect(names(catchRaw), names(indRaw)))
	
	# Take special care of the agingstructure, which should only be given for individuals with age:
	indRaw[is.na(age), agingstructure := NA_character_]
	
	Biology <- indRaw[, .(
		LocalID = cruise,
		Gear = gear,
		Number = serialnumber,
		SpeciesCode = aphia,
		SpeciesCategory = catchpartnumber,
		StockCode = NA,
		FishID = specimenid,
		#LengthCode = "mm", 
		LengthCode = getLengthCodeICES(lengthresolution), 
		#LengthClass = length * 1000
		LengthClass = scaleLengthUsingLengthCode(length, getLengthCodeICES(lengthresolution), inputUnit = "m"), 
		#WeightUnit = 'gr',
		WeightUnit = 'kg', # Always kg in NMDBiotic (see http://www.imr.no/formats/nmdbiotic/)
		#IndividualWeight = individualweight * 1000, # Always kg in NMDBiotic (see http://www.imr.no/formats/nmdbiotic/)
		IndividualWeight = individualweight,
		#IndividualSex = ifelse(is.na(sex), NA, ifelse(sex == "1", "F", "M")),
		IndividualSex = sex,
		#IndividualMaturity = getDATRASMaturity(getQuarter(stationstartdate), aphia, specialstage, maturationstage),
		# Here specialstage is assumed to be have been translated to ICES "M6" codes as per http://vocab.ices.dk/?ref=1480:
		IndividualMaturity = specialstage,
		#MaturityScale = "M6",
		MaturityScale = NA,
		IndividualAge = age,
		AgePlusGroup = NA,
		#AgeSource = "Otolith",
		# Do not interpret agingstructure, as this should be the responsibility of the user:
		AgeSource = agingstructure, 
		GeneticSamplingFlag = NA,
		StomachSamplingFlag = NA,
		ParasiteSamplingFlag = NA,
		IndividualVertebraeCount = NA
	)]
	
	if(AllowRemoveSpecies) {
		# Check for valid aphias, mark other as invalid
		xmlRaw <- read_xml("https://acoustic.ices.dk/Services/Schema/XML/SpecWoRMS.xml")
		validCodes <- xml_text(xml_find_all(xmlRaw, "//Code//Key"))
		
		notPresentInCatch <- unique(setdiff(Catch$SpeciesCode, validCodes))
		notPresentInBiology <- unique(setdiff(Biology$SpeciesCode, validCodes))
		
		if(length(notPresentInCatch)) {
			warning("StoX: The following species are not listed in https://acoustic.ices.dk/Services/Schema/XML/SpecWoRMS.xml were automatically removed from table Catch (set AllowRemoveSpecies = FALSE to prevent this):\n", paste(notPresentInCatch, collapse = ", "))
			
		}
		if(length(notPresentInBiology)) {
			warning("StoX: The following species are not listed in https://acoustic.ices.dk/Services/Schema/XML/SpecWoRMS.xml were automatically removed from table Biology (set AllowRemoveSpecies = FALSE to prevent this):\n", paste(notPresentInBiology, collapse = ", "))
			
		}
		
		Catch <- Catch[SpeciesCode %in% validCodes, ]
		Biology <- Biology[SpeciesCode %in% validCodes, ]
	} else {
		message("AllowRemoveSpecies is set to FALSE. Will only give warning for records with species that is not accepted by the ICES system.")
		compareICES("https://acoustic.ices.dk/Services/Schema/XML/SpecWoRMS.xml", unique(Catch$SpeciesCode))
	}
	
	ICESBioticCSV <- list(
		Cruise = Cruise, 
		Haul = Haul, 
		Catch = Catch, 
		Biology = Biology
	)
	
	return(ICESBioticCSV)
}




#' Writes \code{\link{ICESBioticData}} to a csv file for each input acoustic file used to create the \code{\link{ICESBioticData}}
#'
#' @param ICESBioticData A \code{\link{ICESBioticData}} object obtained from an ICES acoustic XML format file.
#'
#' @return List of string matrices in the ICES acoustic CSV format.
#'
#' @export
WriteICESBiotic <- function(ICESBioticData){
	
	WriteICESBioticData <- lapply(
		ICESBioticData, 
		WriteICESBioticOne
	)
	
	return(WriteICESBioticData)
}


WriteICESBioticOne <- function(ICESBioticDataOne){
	
	# Convert all tables to string with header and reccord, and rbind:
	ICESBioticCSVDataOne <- convertToHeaderRecordMatrix(ICESBioticDataOne)
	ICESBioticCSVDataOne <- expandWidth(ICESBioticCSVDataOne)
	
	# Stack all matrices:
	ICESBioticCSVDataOne <- do.call(rbind, ICESBioticCSVDataOne)
	
	return(ICESBioticCSVDataOne)
}






# Function to add the table name to the column names, but not to the keys.
renameToTableNameFirst <- function(data, tableNames, setToID = NULL, formatType = c("Biotic", "Acoustic")) {
	
	formatType <- match.arg(formatType)
	
	# Create a table of the original and new column names, but remove keys:
	columnName <- lapply(data[tableNames], names)
	tableName = rep(tableNames, lengths(columnName))
	columnName <- unlist(columnName)
	
	#areTableNames <- columnName  %in% names(data)
	areTableNames <- columnName  %in% setToID
	
	newColumnName <- character(length(columnName))
	newColumnName[areTableNames] <- paste0(columnName[areTableNames], "ID")
	newColumnName[!areTableNames] <- paste0(tableName[!areTableNames], columnName[!areTableNames])
	
	conversionTable <- data.table::data.table(
		tableName = tableName,
		columnName = columnName,
		newColumnName = newColumnName
	)
	
	# Remove the keys:
	ICESKeys <- getRstoxDataDefinitions(paste0("ICES", formatType, "Keys"))
	# Do not remove the key of the last table:
	ICESKeys <- unique(unlist(ICESKeys[!names(ICESKeys) %in% tail(tableNames, 1)]))
	
	conversionTable <- conversionTable[!(duplicated(columnName) & columnName %in% ICESKeys), ]
	
	# Remove rows where tableName and columnNames are identical:
	conversionTable <- subset(conversionTable, tableName != columnName)
	
	# Split the conversionTable by tableName to allow for the duplicately named columns between tables:
	conversionTableList <- split(conversionTable, by = "tableName")
	
	# Add again the keys:
	keys <- conversionTable[columnName %in% ICESKeys, ]
	conversionTableList <- lapply(conversionTableList, rbind, keys)
	conversionTableList <- lapply(conversionTableList, unique)
	
	# Rename by reference:
	mapply(
		data.table::setnames, 
		data[tableNames], 
		old = lapply(conversionTableList, "[[", "columnName"), 
		new = lapply(conversionTableList, "[[", "newColumnName"), 
		skip_absent = TRUE
	)
}











#' Convert BioticData to ICESDatras format
#'
#' Given an \code{\link{BioticData}} object, this function converts to ICESDatras format. Note that this function only supports
#' \code{\link{BioticData}} NMDBiotic version > 3 XML files.
#'
#' @param BioticData a \code{BioticData} object from an XML file with NMD biotic version 3 format.
#'
#' @return An \code{\link{ICESDatrasData}} object.
#'
#' @export
ICESDatras <- function(
	BioticData
) {

	ICESDatrasData <- lapply(
  		BioticData, 
  		ICESDatrasOne
  	)

	# Remove empty data (from invavlid files, non NMDBiotic >= 3)
	ICESDatrasData <- ICESDatrasData[lengths(ICESDatrasData) > 0]
	
	return(ICESDatrasData)
}


ICESDatrasOne <- function(
	BioticDataOne
) {
	
	# Check input is a NMD Biotic v3 data
	if(!(BioticDataOne$metadata$useXsd %in% c("nmdbioticv3", "nmdbioticv3.1"))) {
		warning("StoX: Currently, only NMD Biotic version 3 and 3.1 data can be written by ICESDatras")
		return(matrix(1, 0, 0))
	}

	## 1. HH ##
	'%ni%' <- Negate('%in%')

	finalHH <- merge(BioticDataOne$mission, BioticDataOne$fishstation)

	# Make HH records
	finalHH[, `:=`(
		"Quarter" = getQuarter(stationstartdate),
		"Country" = getTSCountryByIOC(nation),
		"Ship" = getICESShipCode(platformname),
		"Gear" = "GOV",
		"SweepLngt" = getGOVSweepByEquipment(gear),
		"GearEx" = getGearEx(getGOVSweepByEquipment(gear), startyear, serialnumber, bottomdepthstart),
		"DoorType" = "P",
		"StNo" = serialnumber,
		"HaulNo" = station,
		"Year" = getYear(stationstartdate),
		"Month" = getMonth(stationstartdate),
		"Day" = getDay(stationstartdate),
		"TimeShot" = getTimeShot(stationstarttime),
		"DepthStratum" = NA,
		"HaulDur" = as.numeric(getTimeDiff(stationstartdate, stationstarttime, stationstopdate, stationstoptime)),
		"DayNight" = getDayNight(stationstartdate, stationstarttime, latitudestart, longitudestart),
		"ShootLat" = roundDrop0(latitudestart, digits = 4), 
		"ShootLong" = roundDrop0(longitudestart, digits = 4), 
		"HaulLat" = roundDrop0(latitudeend, digits = 4),
		"HaulLong" = roundDrop0(longitudeend, digits = 4),
		"StatRec" = getICESrect(latitudestart, longitudestart),
		"Depth" = roundDrop0(bottomdepthstart),
		"HaulVal" = getHaulVal(gearcondition, samplequality),
		"HydroStNo" = NA,
		"StdSpecRecCode" = 1,
		"BycSpecRecCode" = 1,
		"DataType" = "R",
		"Netopening"= roundDrop0(verticaltrawlopening, digits = 1),
		"Rigging" = NA,
		"Tickler" = NA,
		"Distance" = roundDrop0(getDistanceMeter(latitudestart, longitudestart, latitudeend, longitudeend)),
		"Warplngt" = roundDrop0(wirelength),
		"Warpdia" = NA,
		"WarpDen" = NA,
		"DoorSurface" = 4.5,
		"DoorWgt" = 1075,
		"DoorSpread" = ifelse(!is.na(trawldoorspread), roundDrop0(trawldoorspread, digits = 1), NA),
		"WingSpread" = NA,
		"Buoyancy" = NA,
		"KiteDim" = 0.8,
		"WgtGroundRope" = NA,
		"TowDir" = ifelse(!is.na(direction), roundDrop0(direction), NA),
		"GroundSpeed" = roundDrop0(gearflow, digits = 1),
		"SpeedWater" = NA,
		"SurCurDir" = NA,
		"SurCurSpeed" = NA,
		"BotCurDir" = NA,
		"BotCurSpeed" = NA,
		"WindDir" = NA,
		"WindSpeed" = NA,
		"SwellDir" = NA,
		"SwellHeight" = NA,
		"SurTemp" = NA,
		"BotTemp" = NA,
		"SurSal" = NA,
		"BotSal" = NA,
		"ThermoCline" = NA,
		"ThClineDepth" = NA,
		"CodendMesh" = NA ,
		"SecchiDepth" = NA,
		"Turbidity" = NA,
		"TidePhase" = NA,
		"TideSpeed" = NA,
		"PelSampType" = NA,
		"MinTrawlDepth" = NA,
		"MaxTrawlDepth" = NA
	)]
	
	HHraw <- data.table::copy(finalHH[, c(
		"Quarter", "Country", "Ship", "Gear",
		"SweepLngt", "GearEx", "DoorType", "StNo", "HaulNo", "Year", "Month", "Day",
		"TimeShot", "DepthStratum", "HaulDur", "DayNight", "ShootLat", "ShootLong", "HaulLat", "HaulLong",
		"StatRec", "Depth", "HaulVal", "HydroStNo", "StdSpecRecCode", "BycSpecRecCode", "DataType", "Netopening",
		"Rigging", "Tickler", "Distance", "Warplngt", "Warpdia", "WarpDen", "DoorSurface", "DoorWgt",
		"DoorSpread", "WingSpread", "Buoyancy", "KiteDim", "WgtGroundRope", "TowDir", "GroundSpeed",
		"SpeedWater", "SurCurDir", "SurCurSpeed", "BotCurDir", "BotCurSpeed", "WindDir", "WindSpeed",
		"SwellDir", "SwellHeight", "SurTemp", "BotTemp", "SurSal", "BotSal", "ThermoCline", "ThClineDepth",
		"CodendMesh", "SecchiDepth", "Turbidity", "TidePhase", "TideSpeed", "PelSampType", "MinTrawlDepth", "MaxTrawlDepth")]
	)
	
	## 2. HL ##
	
	mergedHL <- merge(BioticDataOne$catchsample, finalHH, by=intersect(names(BioticDataOne$catchsample), names(finalHH)))
	
	groupCA <- c("missiontype", "startyear", "platform", "missionnumber", "serialnumber", "aphia", "sex")
	groupHL <- c(groupCA, "catchpartnumber")
	
	# Remove rows with empty aphia
	mergedHL <- mergedHL[!is.na(aphia)]
	
	getSpecVal <- function(HaulVal, catchcount, lengthsamplecount, catchweight){
		temp <-  as.data.table(cbind(hv=HaulVal, cc=catchcount, lsc=lengthsamplecount, cw=catchweight))
		
		# Default is invalid
		temp[, res := "0"]
		
		temp[!is.na(cc) & !is.na(lsc) & !is.na(cw), res:="1"]
		temp[!is.na(cc) &  is.na(lsc) &  is.na(cw), res:="4"]
		temp[ is.na(cc) &  is.na(lsc) & !is.na(cw), res:="6"]
		temp[!is.na(cc) &  is.na(lsc) & !is.na(cw), res:="7"]
		temp[ is.na(cc) &  is.na(lsc) &  is.na(cw), res:="5"]
		temp[!is.na(cc) & !is.na(lsc) &  is.na(cw), res:="0"]
		
		temp[hv == "I", res:="0"]
		
		return(temp$res)
	}
	
	
	mergedHL[, SpecVal := getSpecVal(HaulVal, catchcount, lengthsamplecount, catchweight)]
	
	# Get herring or sprat
	mergedHL[,`:=`( isHerringOrSprat = ifelse(aphia %in% c("126417", "126425"), TRUE, FALSE),
					isCrustacean = ifelse(aphia %in% c("107275", "107276", "107369", "107253", "107703", "107704", "107350", "107254", "107205", "140712", "140687", "140658"), TRUE, FALSE))]
	
	# Calculate lngtCode
	mergedHL[,lngtCode := "1"]
	mergedHL[is.na(sampletype), lngtCode := NA]
	mergedHL[isCrustacean == TRUE, lngtCode := "."]
	mergedHL[isHerringOrSprat == TRUE, lngtCode := "0"]
	
	# lenInterval, and reportInMM
	mergedHL[,`:=`(lenInterval = ifelse(lngtCode=="0", 5, 1), reportInMM = ifelse(lngtCode %ni% c("1", NA), TRUE, FALSE))]
	mergedHL[is.na(lenInterval), lenInterval := 1]
	
	# catCatchWgt & subWeight
	mergedHL[!is.na(catchweight), catCatchWgt := ceiling(catchweight * 1000)]
	mergedHL[!is.na(lengthsampleweight), subWeight := ceiling(lengthsampleweight * 1000)]
	
	# get sampleFac
	mergedHL[, sampleFac := catchweight / lengthsampleweight]
	
	# Merge with individual
	mergedHL <- merge(mergedHL, BioticDataOne$individual, by = intersect(names(mergedHL), names(BioticDataOne$individual)), all.x = TRUE)
	
	# Get count
	mergedHL[, N := sum(!is.na(specimenid)), by = groupHL]
	
	# For the record with empty individual data
	mergedHL[N == 0, `:=`(lngtClass = as.integer(NA), sex = as.character(NA))]
	
	# Get Individual length
	mergedHL[, length := length * 100]
	
	# Some species have very small length in cm, use mm instead
	mergedHL[length < 1, `:=`(lngtCode = ".", lenInterval = 1, reportInMM = TRUE)]
	
	# Process MM length
	mergedHL[reportInMM == TRUE, length := length * 10]
	
	# Get sex
	mergedHL[, sex := ifelse(is.na(sex), as.character(NA), ifelse(sex == "1", "F", "M"))]
	
	# Get lngtClass
	for(interval in unique(mergedHL$lenInterval)) {
		intVec <- seq(0, max(mergedHL$length, na.rm = T), by = interval)
		mergedHL[lenInterval == interval, lngtClass := intVec[findInterval(length, intVec)]]
	}
	
	# Count measured individual
	mergedHL[!is.na(length), lsCountTot := 1]
	
	# Aggregate hlNoAtLngth and lsCountTot
	finalHL <- mergedHL[, .(N, lsCountTot = sum(lsCountTot)), by = c(
		groupHL,  
		"lngtClass", "Quarter", "Country", "Ship", "Gear", "SweepLngt", "GearEx", "DoorType", "HaulNo", "SpecVal", "catCatchWgt", "sampleFac", "subWeight", "lngtCode", "stationtype", "lengthmeasurement"
		)
	]
	
	finalHL <- finalHL[!duplicated(finalHL)]
	finalHL[,`:=`(noMeas = sum(lsCountTot)), by = groupHL]
	finalHL[,`:=`(totalNo = noMeas * sampleFac, subFactor = sampleFac)]
	
	HLraw <- data.table::copy(finalHL[, .(
		"Quarter" = Quarter,
		"Country" = Country,
		"Ship" = Ship,
		"Gear" = Gear,
		"SweepLngt" = SweepLngt,
		"GearEx" = GearEx,
		"DoorType" = DoorType,
		"StNo" = serialnumber,
		"HaulNo" = HaulNo,
		"Year" = startyear,
		"SpecCodeType" = "W",
		"SpecCode" = aphia,
		"SpecVal" = SpecVal,
		"Sex" = sex,
		"TotalNo" = roundDrop0(totalNo, digits = 2),
		"CatIdentifier" = catchpartnumber,
		"NoMeas" = noMeas,
		"SubFactor" = roundDrop0(subFactor, 4),
		"SubWgt" = roundDrop0(subWeight),
		"CatCatchWgt" = roundDrop0(catCatchWgt),
		"LngtCode" = lngtCode,
		"LngtClass" = lngtClass,
		"HLNoAtLngt" = roundDrop0(lsCountTot, 2),
		"DevStage" = NA,
		"LenMeasType" = convLenMeasType(lengthmeasurement)
	)]
	)
	
	
	## 3. CA ##
	
	mergedHL[is.na(preferredagereading), preferredagereading := 1]
	baseAge <- intersect(names(mergedHL), names(BioticDataOne$agedetermination))
	mergedCA <- merge(mergedHL, BioticDataOne$agedetermination, by.x=c(baseAge, "preferredagereading"), by.y= c(baseAge, "agedeterminationid"), all.x = TRUE)
	
	# Remove empty individual
	mergedCA <- mergedCA[!is.na(specimenid)]
	
	# Get maturity
	mergedCA[, maturity:=getDATRASMaturity(Quarter, aphia, specialstage, maturationstage)]
	
	# Aggregate count
	mergedCA[!is.na(individualweight), `:=`(nWithWeight =.N, totWeight = sum(individualweight)), by = c(groupCA,  "lngtClass", "maturity", "age")]
	
	finalCA <- mergedCA[, .(nInd =.N), by = c(
		groupCA,  
		"lngtClass", "maturity", "age", "Quarter", "Country", "Ship", "Gear", "SweepLngt", "GearEx", "DoorType", "HaulNo", "SpecVal", "StatRec", "lngtCode", "stationtype", "nWithWeight", "totWeight", "specimenid", "tissuesample", "stomach", "agingstructure", "readability", "parasite")]
	finalCA[!is.na(nWithWeight),  meanW := totWeight / nWithWeight]
	
	CAraw <- data.table::copy(finalCA[,
		.(
			"Quarter" = Quarter,
			"Country" = Country,
			"Ship" = Ship,
			"Gear" = Gear,
			"SweepLngt" = SweepLngt,
			"GearEx" = GearEx,
			"DoorType" = DoorType,
			"StNo" = serialnumber,
			"HaulNo" = HaulNo,
			"Year" = startyear,
			"SpecCodeType" = "W",
			"SpecCode" = aphia,
			"AreaType" = "0",
			"AreaCode" = StatRec,
			"LngtCode" = lngtCode,
			"LngtClass" = lngtClass,
			"Sex" = sex,
			"Maturity" = maturity,
			"PlusGr" = as.character(NA),
			"AgeRings" = ifelse(!is.na(age), age, NA),
			"CANoAtLngt" = nInd,
			"IndWgt" = ifelse(!is.na(meanW), roundDrop0(meanW * 1000, 1), NA),
			"MaturityScale" = "M6",
			"FishID" = specimenid,
			"GenSamp" = ifelse(!is.na(tissuesample), "Y", "N"),
			"StomSamp" = ifelse(!is.na(stomach), "Y", "N"),
			"AgeSource" = convAgeSource(agingstructure),
			"AgePrepMet" = NA,
			"OtGrading" = ifelse(readability %in% as.character(c(1:4)), readability, NA),  # From http://tomcat7.imr.no:8080/apis/nmdapi/reference/v2/dataset/otolithreadability?version=2.0 and http://vocab.ices.dk/?ref=1395
			"ParSamp" = ifelse(!is.na(parasite), "Y", "N")
		)]
	)
	
	
	## Prepare for cleaning (mostly from the old Rstox. Will change as we improve) ##
	hh <- HHraw
	hl <- HLraw
	ca <- CAraw
	
	## WARN #0:
	# It's possible to have two same aphia (but different species, e.g. SILD05) catch sampes in a haul.
	# We need to combine them if we have two different TotalNo and catcatchwgt.
	
	# Find duplicate species in a haul
	dupl <- stats::aggregate(catchcategory ~ aphia + serialnumber, BioticDataOne$catchsample, FUN = function(x) length(unique(x)))
	dupl <- dupl[dupl$catchcategory > 1, ]
	
	# Find the above in our DATRAS HL
	if(nrow(dupl)) {
		found <- stats::aggregate(CatCatchWgt ~ StNo + SpecCode + Sex + CatIdentifier, hl[(hl$SpecCode %in% dupl$aphia & hl$StNo %in% dupl$serialnumber),], FUN = function(x) length(unique(x)))
		found <- found[found$CatCatchWgt > 1, ]
		for(iz in seq_len(nrow(found))) {
			tmpHL <- hl[hl$StNo==found[iz, "StNo"] & hl$SpecCode==found[iz, "SpecCode"] & hl$Sex==found[iz, "Sex"] & hl$CatIdentifier==found[iz, "CatIdentifier"], ]
			combinedCatCatchWgt <- tmpHL
			# Fix CatCatchWgt
			hl[hl$StNo==found[iz, "StNo"] & hl$SpecCode==found[iz, "SpecCode"] & hl$Sex==found[iz, "Sex"] & hl$CatIdentifier==found[iz, "CatIdentifier"], "CatCatchWgt"] <- roundDrop0(mean(tmpHL$CatCatchWgt))
			# Fix CatCatchWgt
			hl[hl$StNo==found[iz, "StNo"] & hl$SpecCode==found[iz, "SpecCode"] & hl$Sex==found[iz, "Sex"] & hl$CatIdentifier==found[iz, "CatIdentifier"], "SubWgt"] <- roundDrop0(mean(tmpHL$SubWgt))
			# Fix totalNo
			hl[hl$StNo==found[iz, "StNo"] & hl$SpecCode==found[iz, "SpecCode"] & hl$Sex==found[iz, "Sex"] & hl$CatIdentifier==found[iz, "CatIdentifier"], "TotalNo"] <- sum(unique(tmpHL$TotalNo))
			# Fix noMeas
			hl[hl$StNo==found[iz, "StNo"] & hl$SpecCode==found[iz, "SpecCode"] & hl$Sex==found[iz, "Sex"] & hl$CatIdentifier==found[iz, "CatIdentifier"], "NoMeas"] <- sum(tmpHL$HLNoAtLngt)
			# Finally, fix SubFactor
			hl[hl$StNo==found[iz, "StNo"] & hl$SpecCode==found[iz, "SpecCode"] & hl$Sex==found[iz, "Sex"] & hl$CatIdentifier==found[iz, "CatIdentifier"], "SubFactor"] <- sum(unique(tmpHL$TotalNo))/sum(tmpHL$HLNoAtLngt)
		}
	}
	
	## WARN #1:
	# Find species with different SpecVal, if any of them have SpecVal == 1, delete any other records with different SpecVal
	# otherwise, use the lowest SpecVal value for all
	
	tmp <- stats::aggregate(SpecVal ~ SpecCode + StNo, hl, FUN = function(x) length(unique(x)))
	tmp <- tmp[tmp$SpecVal>1, ]
	
	for( rownum in seq_len(nrow(tmp)) ) {
		tmpSpecs <- hl[(hl$StNo==tmp$StNo[rownum] & hl$SpecCode==tmp$SpecCode[rownum]),]$SpecVal
		if(any(tmpSpecs == 1))
			hl <- hl[!(hl$StNo==tmp$StNo[rownum] & hl$SpecCode==tmp$SpecCode[rownum] & hl$SpecVal!=1),]
		else
			hl[(hl$StNo==tmp$StNo[rownum] & hl$SpecCode==tmp$SpecCode[rownum]), c("SpecVal")] <- min(tmpSpecs)
	}
	
	## SpecVal Conditionals
	hl[hl$SpecVal==0, c("Sex", "TotalNo", "CatIdentifier", "NoMeas", "SubFactor", "SubWgt", "CatCatchWgt", "LngtCode", "LngtClass", "HLNoAtLngt")] <- NA
	
	hl[hl$SpecVal==4, c("NoMeas", "SubWgt", "CatCatchWgt", "LngtCode", "LngtClass", "HLNoAtLngt")] <- NA
	hl[hl$SpecVal==4, c("SubFactor")] <- 1
	
	hl[hl$SpecVal==5, c("TotalNo", "NoMeas", "SubWgt", "CatCatchWgt", "LngtCode", "LngtClass", "HLNoAtLngt")] <- NA
	hl[hl$SpecVal==5, c("SubFactor")] <- 1
	
	hl[hl$SpecVal==6, c("TotalNo", "NoMeas", "LngtCode", "LngtClass", "HLNoAtLngt")] <- NA
	
	hl[hl$SpecVal==7, c("NoMeas", "LngtCode", "LngtClass", "HLNoAtLngt")] <- NA
	
	hl[hl$SpecVal==10, c("CatCatchWgt")] <- NA
	
	## WARN #2:
	## will now get errors in DATRAS upload for duplicate records
	hl <- hl[!duplicated(hl),]
	
	## hl and ca contain 0-tow info - must throw these out
	hl <- hl[hl$StNo %in% hh$StNo,]
	ca <- ca[ca$StNo %in% hh$StNo,]
	# throw out ca records for Invalid hauls
	ca <- ca[!ca$StNo %in% hh$StNo[hh$HaulVal=='I'],]
	
	##########################################
	## Removing some benthos - this won't be needed in the future
	## keep 11725 138139 138482 138483 140600 140621 140624 140625 141443 141444 141449 153083 153131-- these are cephaolopods
	## required benthos: 107205
	hl <- hl[!hl$SpecCode %in% c(230,558,830,883,1302,1839,100635,100706,100930,103929,106048,106087,106204,106733,106791,
								 106854,106928,107044,107218,107230,107240,107273,107292,107318,107330,107346,107397,107398,107551,
								 107616,107643,111374,111597,111604,116986,117302,117809,117815,117890,123117,123867,123920,123970,
								 123987,124319,124418,124913,124929,124934,125128,125131,125134,129196,129229,130464,130867,132072,
								 132480,135144,135302,137704,137732,138223,138239,138760,138899,139004,139488,140299,140627,141753,
								 144129,150642,178639,181228,23986719494,21263,100817,100982,106738,107160,107232,107277,107322,
								 107323,107327,107387,107531,107552,107564,107649,107651,111367,123080,123083,123084,123776,123813,
								 124043,124154,124160,124287,124535,125166,125333,128517,129840,138802,138878,138920,140467,140717,
								 143755,145541,145546,145548,532031,589677,1762,123082,149),]
	
	ca <- ca[!ca$SpecCode %in% c(230,558,830,883,1302,1839,100635,100706,100930,103929,106048,106087,106204,106733,106791,
								 106854,106928,107044,107218,107230,107240,107273,107292,107318,107330,107346,107397,107398,107551,
								 107616,107643,111374,111597,111604,116986,117302,117809,117815,117890,123117,123867,123920,123970,
								 123987,124319,124418,124913,124929,124934,125128,125131,125134,129196,129229,130464,130867,132072,
								 132480,135144,135302,137704,137732,138223,138239,138760,138899,139004,139488,140299,140627,141753,
								 144129,150642,178639,181228,23986719494,21263,100817,100982,106738,107160,107232,107277,107322,
								 107323,107327,107387,107531,107552,107564,107649,107651,111367,123080,123083,123084,123776,123813,
								 124043,124154,124160,124287,124535,125166,125333,128517,129840,138802,138878,138920,140467,140717,
								 143755,145541,145546,145548,532031,589677,1762,123082,149),]
	
	#more benthods 10216 = skate egg case
	hl <- hl[!hl$SpecCode %in% c(443,938,1131,1292,1337,1360,19494,22988,100751,100757,100790,101054,103484,104062,
								 106122,106669,107011,107052,107148,107239,107388,107563,110690,110911,110956,111411,117136,
								 117258,123260,123276,123321,123335,123574,123593 ,123851,123922,123985,124085,125158,125269,
								 128506,130467,130987,131779,134591,137683,141872,146142 ,149864,445590,510534,105,175,927,1107,
								 1135,1267,100793),]
	hl <- hl[!hl$SpecCode %in% c(105,175,927,1107,1135,1267,100793,103443,103692,106057,106835,106903,107558,110908,111361,
								 117940,122348,123160,123426,124257,125027,125284,131495,135294,135301,135306,138992,140528,140687,
								 167882,178527,239867,291396,106763,137656,117225,100653,125125,100698,131774,134366,123386,117228,
								 117994,138923,123127,137701,123320,131629 ,152391,1363,214,103543,106994,103450,129400,140143,
								 146420,141905,22496,988,103717,107163,982,985,123622,102145,1082,10216,103483),]
	
	ca <- ca[!ca$SpecCode %in% c(443,938,1131,1292,1337,1360,19494,22988,100751,100757,100790,101054,103484,104062,
								 106122,106669,107011,107052,107148,107239,107388,107563,110690,110911,110956,111411,117136,
								 117258,123260,123276,123321,123335,123574,123593 ,123851,123922,123985,124085,125158,125269,
								 128506,130467,130987,131779,134591,137683,141872,146142 ,149864,445590,510534,105,175,927,1107,
								 1135,1267,100793),]
	ca <- ca[!ca$SpecCode %in% c(105,175,927,1107,1135,1267,100793,103443,103692,106057,106835,106903,107558,110908,111361,
								 117940,122348,123160,123426,124257,125027,125284,131495,135294,135301,135306,138992,140528,140687,
								 167882,178527,239867,291396,106763,137656,117225,100653,125125,100698,131774,134366,123386,117228,
								 117994,138923,123127,137701,123320,131629 ,152391,1363,214,103543,106994,103450,129400,140143,
								 146420,141905,22496,988,103717,107163,982,985,123622,102145,1082,10216,103483),]
	
	hl <- hl[!hl$SpecCode %in% c(NA, 101,106769,106782,107010,107726,122478,123506,12437,124951,128539,129402,196221,205077,124373, 123187, 124710),]
	ca <- ca[!ca$SpecCode %in% c(NA, 101,106769,106782,107010,107726,122478,123506,12437,124951,128539,129402,196221,205077,124373, 123187, 124710),]
	
	## IU: Filter out additional benthos:
	benthosSpecCodes <- c(104,956,966,1128,1296,1367,1608,11707,100782,100839,100854,103439,103732,104040,105865,106041,106673,106702,106789,106834,107152,
						  107205,107264,110749,110916,110993,111152,111355,111365,117093,117195,118445,122626,123204,123255,123613,124147,124151,124324,124670,
						  128490,128503,129563,130057,134691,136025,137710,138018,138068,138477,138631,138749,138938,140166,140173,140480,140625,141904,141929,
						  149854,152997,532035,816800)
	
	hl <- hl[!hl$SpecCode %in% benthosSpecCodes,]
	ca <- ca[!ca$SpecCode %in% benthosSpecCodes,]
	
	
	## WARN #3:
	## ca records with no HL records
	## these records are because there is no catch weight
	## DATRAS does not accept length info without catch weight
	## so create a line in the HL for each, but give SpecValue=4 and delete ca record
	
	#IU: Improved cleaning#
	# Use join to find missing value in HL
	if (nrow(ca) > 0) {
		#testca <- unique(data.frame(StNo=ca$StNo, SpecCode=ca$SpecCode, ca=TRUE))
		#testhl <- unique(data.frame(StNo=hl$StNo, SpecCode=hl$SpecCode, hl=TRUE))
		testca <- unique(data.table::data.table(StNo=ca$StNo, SpecCode=ca$SpecCode, ca=TRUE))
		testhl <- unique(data.table::data.table(StNo=hl$StNo, SpecCode=hl$SpecCode, hl=TRUE))
		tt <- merge(testca, testhl, by = c("StNo","SpecCode"), all=TRUE)
		missingHL <- tt[is.na(tt$hl),]
		
		# Populate missing value in HL
		for(idxHL in seq_len(nrow(missingHL))) {
			r <- missingHL[idxHL,]
			tmp <- hl[hl$StNo==r$StNo,][1,]
			tmp$SpecCode <- r$SpecCode
			tmp$SpecVal <- 4
			tmp$TotalNo <- c(hh$HaulDur[hh$StNo==r$StNo])
			tmp$CatCatchWgt <- NA
			hl <- rbind(hl,tmp)
		}
	}
	## WARN #4:
	
	# Use plus group for herring and mackerel individuals with age ring above 15
	ca[ which((ca$SpecCode==127023 | ca$SpecCode==126417) & ca$AgeRings >= 15), c("PlusGr", "AgeRings")] <- list("+", 15)
	
	# Order HL
	hl <- hl[order(hl$StNo),]
	
	#
	ICESDatrasData <- list(HH = hh, HL = hl, CA = ca)
	
	return(ICESDatrasData)
}



#' Writes \code{\link{ICESDatrasData}} to a csv file for each input acoustic file used to create the \code{\link{ICESDatras}}
#'
#' @param ICESDatrasData A \code{\link{ICESDatrasData}} object returned from \code{\link{ICESDatras}}.
#'
#' @return List of string matrices in the ICES Datras CSV format.
#'
#' @export
WriteICESDatras <- function(ICESDatrasData){
	
	WriteICESDatrasData <- lapply(
		ICESDatrasData, 
		WriteICESDatrasOne, 
		na = "-9"
	)
	
	return(WriteICESDatrasData)
}


WriteICESDatrasOne <- function(ICESDatrasDataOne, na = "-9"){
	
	# Convert all tables to string matrix with header and record, and rbind:
	ICESDatrasCSVDataOne <- convertToRecordTypeMatrix(ICESDatrasDataOne)
	
	# Replace NAs:
	if(length(na)) {
		ICESDatrasCSVDataOne <- lapply(ICESDatrasCSVDataOne, function(x) {x[is.na(x)] <- na; x})
	}
	
	#ICESDatrasCSVDataOne <- expandWidth(ICESDatrasCSVDataOne, na = na)
	
	# Stack all matrices:
	#ICESDatrasCSVDataOne <- do.call(rbind, ICESDatrasCSVDataOne)
	
	# Convert each line of each table to comma separated:
	ICESDatrasCSVDataOne <- lapply(ICESDatrasCSVDataOne, apply, 1, paste, collapse = ",")
	
	# Join to one vector, to be written to one file:
	ICESDatrasCSVDataOne <- unlist(ICESDatrasCSVDataOne)
	
	return(ICESDatrasCSVDataOne)
}










getTSCountryByIOC <- function(nation) {
	cnvTbl <- c("58" = "NO")
	
	x <- cnvTbl[as.character(nation)]
	x[is.null(x)] <- NA
	return(x)
}

getGearEx <- function(sweep, year, serialnumber, depth) {
	
	temp <-  as.data.table(cbind(sweep, year, serialnumber, depth))
	temp[, res:= "S"]
	
	temp[year == 2011 & serialnumber > 24362 & depth >= 70 | year == 2012
		 | year == 2011 & serialnumber >= 24135 & depth >= 70, res:="ST"]
	
	return (temp$res)
}

getYear <- function(stationstartdate) {
	format(as.Date(stationstartdate, format="%Y-%m-%dZ"), "%Y")
}

getMonth <- function(stationstartdate) {
	format(as.Date(stationstartdate, format="%Y-%m-%dZ"), "%m")
}

getDay <- function(stationstartdate) {
	format(as.Date(stationstartdate, format="%Y-%m-%dZ"), "%d")
}

getTimeShot <- function(stationstarttime) {
	
	timeshot <- function(y) {
		if(length(y) == 3) {
			return(paste0(y[1], y[2]))
		} else {
			return(NA)
		}
	}
	
	x <- strsplit(stationstarttime, ":")
	
	return(unlist(lapply(x, timeshot)))
}

getQuarter <- function(stationstartdate) {
	x <- format(as.Date(stationstartdate, format="%Y-%m-%dZ"), "%m")
	return(floor((as.numeric(x) - 1) / 3 + 1))
}

# Adopted from: https://www.mathworks.com/matlabcentral/fileexchange/62180-sunriseset-lat-lng-utcoff-date-plot
getDayNight <- function(stationstartdate, stationstarttime, latitudestart, longitudestart, UTCoff = 0) {
	
	deg2rad <- function(val) {
		return(val * (pi / 180))
	}
	
	rad2deg <- function(val) {
		return(val * (180 / pi))
	}
	
	datetime0 <- as.POSIXct("1990-12-30", tz = "UTC")
	
	uniqueDates <- unique(stationstartdate)
	
	nDaysA = as.numeric(difftime(uniqueDates, datetime0, units = "days")) # Number of days since 01/01
	
	nTimes = 24*3600                       # Number of seconds in the day
	tArray = seq(0, 1, length = nTimes)
	
	ssTab <- list()
	
	for(idx in seq_len(length(nDaysA))) {
		
		nDays <- nDaysA[idx]
		lat <- latitudestart[idx]
		lng <- longitudestart[idx]
		localdate <- as.POSIXct(uniqueDates[idx], tz = "UTC")
		
		# Compute
		# Letters correspond to colums in the NOAA Excel
		E = tArray
		F = nDays + 2415018.5 + E - UTCoff / 24
		G = (F - 2451545) / 36525
		I = (280.46646 + G * (36000.76983 + G * 0.0003032)) %% 360
		J = 357.52911 + G * (35999.05029 - 0.0001537 * G)
		K = 0.016708634 - G * (0.000042037 + 0.0000001267 * G)
		L = sin(deg2rad(J)) * (1.914602 - G * (0.004817 + 0.000014 * G))+sin(deg2rad(2 * J)) * (0.019993 - 0.000101*G) + sin(deg2rad(3 * J)) * 0.000289
		M = I + L
		P = M - 0.00569 - 0.00478 * sin(deg2rad(125.04 - 1934.136 * G))
		Q = 23 + (26 + ((21.448 - G * (46.815 + G * (0.00059 - G * 0.001813)))) / 60) / 60
		R = Q + 0.00256 * cos(deg2rad(125.04 - 1934.136 * G))
		T = rad2deg(asin(sin(deg2rad(R)) * sin(deg2rad(P))))
		U = tan(deg2rad(R/2)) * tan(deg2rad(R/2))
		V = 4 * rad2deg(U * sin(2 * deg2rad(I))-2 * K * sin(deg2rad(J)) + 4 * K * U * sin(deg2rad(J)) *  cos(2*deg2rad(I)) - 0.5 * U * U * sin(4 * deg2rad(I)) - 1.25 * K * K * sin(2 * deg2rad(J)))
		AB = (E * 1440 + V + 4 *lng - 60 * UTCoff) %% 1440
		
		
		AC = ifelse (AB/4 < 0, AB/4 + 180, AB/4 - 180)
		
		AD = rad2deg(acos(sin(deg2rad(lat)) * sin(deg2rad(T)) + cos(deg2rad(lat)) * cos(deg2rad(T)) * cos(deg2rad(AC))))
		
		# Test whether we are in midnightsun: 
		WArg <- cos(deg2rad(90.833)) / (cos(deg2rad(lat)) * cos(deg2rad(T))) - tan(deg2rad(lat)) * tan(deg2rad(T))
		# Truncate WArg to [-1, 1], where below is midnightsun and above 1 is myrketid:
		if(any(WArg < -1)) {
			WArg[WArg < -1] <- -1
		}
		if(any(WArg > 1)) {
			WArg[WArg > 1] <- 1
		}
		
		W = rad2deg(acos(WArg))
		X = (720 - 4 * lng - V + UTCoff * 60) * 60
		
		sunrise = which.min(abs(X - round(W * 4 * 60) - nTimes * tArray))
		sunset = which.min(abs(X+round(W*4*60) - nTimes*tArray))
		
		sunrisetime = localdate + sunrise
		sunsettime = localdate + sunset
		
		ssTab[[uniqueDates[idx]]] <- list(sunrise = sunrisetime, sunset = sunsettime)
	}
	
	getDN <- function(x, ssTab) {
		
		y <- ssTab[[format(x, "%Y-%m-%dZ")]]
		
		if(x < y$sunrise || x >= y$sunset) {
			return("N")
		} else {
			return("D")
		}
	}
	
	datetime <- as.POSIXct(gsub("Z", " ", paste0(stationstartdate, stationstarttime)), tz = "UTC")
	
	return(unlist(lapply(datetime, getDN, ssTab)))
}

# Convert Length Measurement Type
# http://tomcat7.imr.no:8080/apis/nmdapi/reference/v2/dataset/lengthmeasurement?version=2.0
# http://vocab.ices.dk/?ref=1392
convLenMeasType <- function(LenMeasType) {
	# Convert table
	ct <- c(
		"B" = 5,
		"C" = 6,
		"E" = 1,
		"F" = 8,
		"G" = 4,
		"H" = 3,
		"J" = 2,
		"L" = 7,
		"S" = 9
	)
	return(ct[LenMeasType])
}

# Convert aging structure source
# http://tomcat7.imr.no:8080/apis/nmdapi/reference/v2/dataset/agingstructure?version=2.0
# http://vocab.ices.dk/?ref=1507
# This function seems to incorrect, as the AgeSource documentation on here only lists three values:
# http://vocab.ices.dk/?ref=1482
convAgeSource <- function(AgeSource) {
	# Convert table
	if(!all(AgeSource %in% c("1", "2", "7"))) {
		warning("The conversion from agingstructure to AgeSource may be wrong for other values than 1, 2 and 7. Please noify the developers of StoX.")
	}
	
	ct <- c("1" = "scale",
			"2" = "otolith",
			"4" = "df-spine",
			"6" = "spine",
			"7" = "vertebra",
			"8" = "caudal-thorn")
	return(ct[AgeSource])
}


#translateAgeSource <- function(agingstructure) {
#	# Convert table:
#	ct <- c(
#		"1" = "Scale",
#		"2" = "Otolith",
#		"7" = "Vertebra"
#	)
#	return(ct[agingstructure])
#}

roundDrop0 <- function(x, digits = 0) {
	notNA <- !is.na(x)
	x[notNA] <- formatC(x[notNA], digits = digits, format = "f", drop0trailing = TRUE)
	return(x)
}

Try the RstoxData package in your browser

Any scripts or data that you put into this service are public.

RstoxData documentation built on July 17, 2021, 5:08 p.m.