#' @title match_trips
#' @description This function takes the results from get_marfis() and get_isdb()
#' and attempts to match trips based on:
#' \itemize{
#' \item 1 = MARFIS confirmation numbers (Hail in and Hail out)
#' \item 2 = ISDB TRIP names (e.g. J18-0000)*
#' \item 3 = Correct combination of VRN and LICENCE and appropriate date range
#' }
#' * - only the alphanumeric characters of the trip names are used (e.g.
#' "J18-0000B" becomes "J180000B").
#' @param isdbTrips default is \code{NULL}. This is the list output by the
#' \code{Mar.fleets::get_isdb()} function - it contains dataframes of both the
#' trip and set information from the ISDB database.
#' @param marfMatch default is \code{NULL}. This is the MARF_MATCH output of the
#' \code{Mar.fleets::get_marfis()} function - it contains dataframes of both the
#' trip and set information from MARFIS
#' @param ... other arguments passed to methods
#' @family fleets
#' @return returns a list with 3 dataframes - MAP_ISDB_MARFIS_TRIPS, MATCH_ISSUES, UNMATCHABLE & MATCH_SUMMARY
#' \itemize{
#' \item "MAP_ISDB_MARFIS_TRIPS" - contains the TRIP_IDs from MARFIS and ISDB, and
#' an additional field that indicates all of the ways the match was attained.
#' \item "MATCH_ISSUES" - contains any trips that were matched, but had at least one invalid/
#' unmatchable entry due to something like a bad trip name.
#' \item "UNMATCHABLE" - contains any records from both Marfis and ISDB that could not be
#' matched to the other database using any method.
#' }
#' @noRd
#' @author Mike McMahon, \email{Mike.McMahon@@dfo-mpo.gc.ca}
match_trips <- function(isdbTrips = NULL, marfMatch = NULL, ...){
args <- list(...)$args
Obs_Trip_Name <- 0
Hail_In_Conf_Code <- 0
Hail_Out_Conf_Code <- 0
Date_Lic_VR_Combo <- 0
Date_Lic_Combo <- 0
Date_VR_Combo <- 0
Likely_Swapped_VR_LIC <- 0
Total_Matches <- 0
matchGood <- NA
dupRows <- NA
matchNone <- NA
Unmatchables <- 0
MultiMatches <- 0
colnames(isdbTrips)[colnames(isdbTrips)=="TRIP_ISDB"] <- "TRIP_ID_ISDB"
if (args$debug) t22 <- Mar.utils::where_now(returnTime = T)
k <- TRIP_ID_MARF <- CLOSEST <- TRIP_ID_ISDB <- TRIP_ID_MARF_VRLICDATE <- CLOSEST1 <- PRIOR1 <- PRIOR2 <- NA
if(is.null(marfMatch) || is.null(isdbTrips) || !is.data.frame(isdbTrips) ){
message(paste0("\n","No trips to try match against"))
if (args$debug) {
t22_ <- proc.time() - t22
message("\tExiting match_trips() - No trips: (", round(t22_[1],0),"s elapsed)")
}
return(NULL)
}
marfMatch <- unique(marfMatch[,c("TRIP_ID_MARF","MON_DOC_ID","VR_NUMBER_FISHING", "LICENCE_ID","GEAR_CODE","VR_NUMBER_LANDING", "ISDB_TRIP","OBS_ID","OBS_PRESENT","CONF_NUMBER_HI","CONF_NUMBER_HO","T_DATE1","T_DATE2")])
marfMatch <- clean_ISDB_Trip(df = marfMatch, field = "ISDB_TRIP", out_name = "ISDB_TRIP_M")
isdbTrips <- clean_ISDB_Trip(df = isdbTrips, field = "TRIP", out_name = "ISDB_TRIP_O")
isdbTrips$VR_LIC = paste0(isdbTrips$VR,"_",isdbTrips$LIC )
isdbTrips_tripcd_id <- isdbTrips
# FOR EACH MATCH -----------------------------------------------------
# 1 - identify fields which are in both marf and isdb
# 2 - tick the appropriate column in isdb trips (indicating if matched)
# 3 - add matches to df called matches
matchTripNames <- function(df = NULL){
if (args$debug) t23 <- Mar.utils::where_now(returnTime = T)
# MARFIS TRIP NAME ----------------------------------------------------------------------------
thisIsdbTrips <- unique(df[!is.na(df$ISDB_TRIP_O),c("TRIP_ID_ISDB", "ISDB_TRIP_O")])
thisMarfMatch <- unique(marfMatch[!is.na(marfMatch$ISDB_TRIP_M),c("TRIP_ID_MARF","ISDB_TRIP_M")])
if (nrow(thisMarfMatch)==0){
thisMarfMatch$TRIP_ID_ISDB <- numeric()
colnames(thisMarfMatch)[colnames(thisMarfMatch)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_TRIP"
thisMarfMatch$match_TripName <- logical()
if (args$debug) {
t23_ <- proc.time() - t23
message("\tExiting matchTripNames() - no tripName matches: (", round(t23_[1],0),"s elapsed)")
}
return(thisMarfMatch)
}
match_TRIP <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "ISDB_TRIP_O", by.y = "ISDB_TRIP_M"))
colnames(match_TRIP)[colnames(match_TRIP)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_TRIP"
if(nrow(match_TRIP)>0){
match_TRIP$match_TripName <- TRUE
}else{
match_TRIP$match_TripName <- logical()
}
if (!is.null(dbEnv$debugISDBTripNames)) dbEnv$debugISDBTripNames <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripNames, expected = dbEnv$debugISDBTripNames, expectedID = "debugISDBTripNames", known = match_TRIP$ISDB_TRIP_O, stepDesc = "matchTrips_TripName")
match_TRIP$ISDB_TRIP_O <- match_TRIP$ISDB_TRIP_M <- NULL
if (!is.null(dbEnv$debugISDBTripIDs)) dbEnv$debugISDBTripIDs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripIDs, expected = dbEnv$debugISDBTripIDs, expectedID = "debugISDBTripIDs", known = match_TRIP$TRIP_ID_ISDB, stepDesc = "matchTrips_TripName")
if (args$debug) {
t23_ <- proc.time() - t23
message("\tExiting matchTripNames() (",round(t23_[1],0),"s elapsed)")
}
return(match_TRIP)
}
matchHI <- function(df = NULL){
if (args$debug) t24 <- Mar.utils::where_now(returnTime = T)
# MARFIS HAILIN CONFIRMATION NUMBER -----------------------------------------------------------
thisIsdbTrips <- unique(df[!is.na(df$MARFIS_CONF_NUMBER),c("TRIP_ID_ISDB", "MARFIS_CONF_NUMBER")])
thisMarfMatch <- unique(marfMatch[!is.na(marfMatch$CONF_NUMBER_HI),c("TRIP_ID_MARF","CONF_NUMBER_HI")])
if (nrow(thisMarfMatch)==0){
thisMarfMatch$TRIP_ID_ISDB <- numeric()
colnames(thisMarfMatch)[colnames(thisMarfMatch)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_HI"
thisMarfMatch$match_CONF_HI <- logical()
if (args$debug) {
t24_ <- proc.time() - t24
message("\tExiting matchHI() - no HI matches: (", round(t24_[1],0),"s elapsed)")
}
return(thisMarfMatch)
}
s <- strsplit(as.character(thisMarfMatch$CONF_NUMBER_HI), ',')
thisMarfMatch <- data.frame(CONF_NUMBER_HI=unlist(s), TRIP_ID_MARF=rep(thisMarfMatch$TRIP_ID_MARF, sapply(s, FUN=length)))
match_CONF_HI <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "MARFIS_CONF_NUMBER", by.y = "CONF_NUMBER_HI"))
colnames(match_CONF_HI)[colnames(match_CONF_HI)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_HI"
colnames(match_CONF_HI)[colnames(match_CONF_HI)=="MARFIS_CONF_NUMBER"] <- "MARFIS_CONF_NUMBER_HI"
if(nrow(match_CONF_HI)>0){
match_CONF_HI$match_CONF_HI <- TRUE
}else{
match_CONF_HI$match_CONF_HI <- logical()
}
if (!is.null(dbEnv$debugISDBTripIDs)) dbEnv$debugISDBTripIDs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripIDs, expected = dbEnv$debugISDBTripIDs, expectedID = "debugISDBTripIDs", known = match_CONF_HI$TRIP_ID_ISDB, stepDesc = "matchTrips_CONF_HI")
if (args$debug) {
t24_ <- proc.time() - t24
message("\tExiting matchHI() (",round(t24_[1],0),"s elapsed)")
}
return(match_CONF_HI)
}
matchHO <- function(df = NULL){
if (args$debug) t25 <- Mar.utils::where_now(returnTime = T)
# MARFIS HAILOUT CONFIRMATION NUMBER -----------------------------------------------------------
thisIsdbTrips <- unique(df[!is.na(df$MARFIS_CONF_NUMBER),c("TRIP_ID_ISDB", "MARFIS_CONF_NUMBER")])
thisMarfMatch <- unique(marfMatch[!is.na(marfMatch$CONF_NUMBER_HO),c("TRIP_ID_MARF","CONF_NUMBER_HO")])
if (nrow(thisMarfMatch)==0){
thisMarfMatch$TRIP_ID_ISDB <- numeric()
colnames(thisMarfMatch)[colnames(thisMarfMatch)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_HO"
thisMarfMatch$match_CONF_HO <- logical()
if (args$debug) {
t25_ <- proc.time() - t25
message("\tExiting matchHO() - no HO matches: (", round(t25_[1],0),"s elapsed)")
}
return(thisMarfMatch)
}
p <- strsplit(as.character(thisMarfMatch$CONF_NUMBER_HO), ',')
thisMarfMatch <- data.frame(CONF_NUMBER_HO=unlist(p), TRIP_ID_MARF=rep(thisMarfMatch$TRIP_ID_MARF, sapply(p, FUN=length)))
match_CONF_HO <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "MARFIS_CONF_NUMBER", by.y = "CONF_NUMBER_HO"))
colnames(match_CONF_HO)[colnames(match_CONF_HO)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_HO"
colnames(match_CONF_HO)[colnames(match_CONF_HO)=="MARFIS_CONF_NUMBER"] <- "MARFIS_CONF_NUMBER_HO"
if(nrow(match_CONF_HO)>0){
match_CONF_HO$match_CONF_HO <- TRUE
}else{
match_CONF_HO$match_CONF_HO <- logical()
}
if (!is.null(dbEnv$debugISDBTripIDs)) dbEnv$debugISDBTripIDs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripIDs, expected = dbEnv$debugISDBTripIDs, expectedID = "debugISDBTripIDs", known = match_CONF_HO$TRIP_ID_ISDB, stepDesc = "matchTrips_CONF_HI")
if (args$debug) {
t25_ <- proc.time() - t25
message("\tExiting matchHO() (",round(t25_[1],0),"s elapsed)")
}
return(match_CONF_HO)
}
matchVR <- function(df = NULL){
if (args$debug) t26 <- Mar.utils::where_now(returnTime = T)
# MARFIS VR NUMBER -----------------------------------------------------------
thisIsdbTrips <- unique(df[!is.na(df$VR),c("TRIP_ID_ISDB", "VR", "LIC")])
#MARF - combine using both fishing and landing vessel vrs, then combine and get unique list
marf_TRIPS_F <- unique(marfMatch[, c("TRIP_ID_MARF","VR_NUMBER_FISHING")])
colnames(marf_TRIPS_F)[colnames(marf_TRIPS_F)=="VR_NUMBER_FISHING"] <- "VR_m"
marf_TRIPS_L <- unique(marfMatch[, c("TRIP_ID_MARF","VR_NUMBER_LANDING")])
colnames(marf_TRIPS_L)[colnames(marf_TRIPS_L)=="VR_NUMBER_LANDING"] <- "VR_m"
thisMarfMatch <- unique(rbind(marf_TRIPS_F,marf_TRIPS_L))
if (nrow(thisMarfMatch)==0){
thisMarfMatch$TRIP_ID_ISDB <- numeric()
colnames(thisMarfMatch)[colnames(thisMarfMatch)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_VR"
thisMarfMatch$match_VR <- logical()
if (args$debug) {
t26_ <- proc.time() - t26
message("\tExiting matchHO() - no VR matches: (", round(t26_[1],0),"s elapsed)")
}
return(thisMarfMatch)
}
match_VR <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "VR", by.y = "VR_m"))
if (nrow(match_VR)>0){
match_VR$swapVR <- F
}else{
match_VR$swapVR <- logical()
}
match_VR_swap <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "LIC", by.y = "VR_m"))
if (nrow(match_VR_swap)>0){
match_VR_swap$swapVR <- T
match_VR <- rbind.data.frame(match_VR, match_VR_swap)
}
colnames(match_VR)[colnames(match_VR)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_VR"
if (nrow(match_VR)>0){
match_VR$match_VR <- TRUE
}else{
match_VR$match_VR <- logical()
}
if (!is.null(dbEnv$debugISDBTripIDs)) dbEnv$debugISDBTripIDs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripIDs, expected = dbEnv$debugISDBTripIDs, expectedID = "debugISDBTripIDs", known = match_VR$TRIP_ID_ISDB, stepDesc = "matchTrips_VR")
if (!is.null(dbEnv$debugVRs)) dbEnv$debugVRs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugVRs, expected = dbEnv$debugVRs, expectedID = "debugVRs", known = match_VR$VR, stepDesc = "matchTrips_VR")
if (!is.null(dbEnv$debugLics)) dbEnv$debugLics <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugLics, expected = dbEnv$debugLics, expectedID = "debugLics", known = match_VR$LIC, stepDesc = "matchTrips_VR")
if (args$debug) {
t26_ <- proc.time() - t26
message("\tExiting matchVR() (",round(t26_[1],0),"s elapsed)")
}
return(match_VR)
}
matchLIC <- function(df = NULL){
if (args$debug) t27 <- Mar.utils::where_now(returnTime = T)
# MARFIS VR NUMBER -----------------------------------------------------------
thisIsdbTrips <- unique(df[!is.na(df$LIC),c("TRIP_ID_ISDB", "LIC", "VR")])
thisMarfMatch <- unique(marfMatch[, c("TRIP_ID_MARF","LICENCE_ID")])
if (nrow(thisMarfMatch)==0){
thisMarfMatch$TRIP_ID_ISDB <- numeric()
colnames(thisMarfMatch)[colnames(thisMarfMatch)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_LIC"
thisMarfMatch$match_LIC <- logical()
if (args$debug) {
t27_ <- proc.time() - t27
message("\tExiting matchLIC() - no LIC matches: (", round(t27_[1],0),"s elapsed)")
}
return(thisMarfMatch)
}
match_LIC <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "LIC", by.y = "LICENCE_ID"))
if(nrow(match_LIC)>0){
match_LIC$swapLIC <- F
}else{
match_LIC$swapLIC <- logical()
}
match_LIC_swap <- unique(merge(thisIsdbTrips, thisMarfMatch, by.x= "VR", by.y = "LICENCE_ID"))
if (nrow(match_LIC_swap)>0){
match_LIC_swap$swapLIC <- T
match_LIC <- rbind.data.frame(match_LIC, match_LIC_swap)
}
colnames(match_LIC)[colnames(match_LIC)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_LIC"
if(nrow(match_LIC)>0){
match_LIC$match_LIC <- TRUE
}else{
match_LIC$match_LIC <- logical()
}
if (!is.null(dbEnv$debugISDBTripIDs)) dbEnv$debugISDBTripIDs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripIDs, expected = dbEnv$debugISDBTripIDs, expectedID = "debugISDBTripIDs", known = match_LIC$TRIP_ID_ISDB, stepDesc = "matchTrips_LIC")
if (!is.null(dbEnv$debugVRs)) dbEnv$debugVRs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugVRs, expected = dbEnv$debugVRs, expectedID = "debugVRs", known = match_LIC$VR, stepDesc = "matchTrips_LIC")
if (!is.null(dbEnv$debugLics)) dbEnv$debugLics <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugLics, expected = dbEnv$debugLics, expectedID = "debugLics", known = match_LIC$LIC, stepDesc = "matchTrips_LIC")
if (args$debug) {
t27_ <- proc.time() - t27
message("\tExiting matchLIC() (",round(t27_[1],0),"s elapsed)")
}
return(match_LIC)
}
matchDate <- function(df = NULL){
if (args$debug) t28 <- Mar.utils::where_now(returnTime = T)
#DATE RANGE --------------------------------------------------------
thisIsdbTrips <- unique(df[!is.na(df$BOARD_DATE) & !is.na(df$LANDING_DATE), c("TRIP_ID_ISDB","BOARD_DATE","LANDING_DATE", "SRC", "VR", "LIC","TRIPCD_ID")])
thisMarfMatch_F <- unique(marfMatch[, c("TRIP_ID_MARF", "T_DATE1","T_DATE2", "VR_NUMBER_FISHING", "LICENCE_ID")])
colnames(thisMarfMatch_F)[colnames(thisMarfMatch_F)=="VR_NUMBER_FISHING"] <- "VR_NUMBER"
thisMarfMatch_L <- unique(marfMatch[, c("TRIP_ID_MARF", "T_DATE1","T_DATE2","VR_NUMBER_LANDING", "LICENCE_ID")])
colnames(thisMarfMatch_L)[colnames(thisMarfMatch_L)=="VR_NUMBER_LANDING"] <- "VR_NUMBER"
thisMarfMatch <- unique(rbind.data.frame(thisMarfMatch_F, thisMarfMatch_F))
if (nrow(thisMarfMatch)==0){
thisMarfMatch$TRIP_ID_ISDB <- numeric()
colnames(thisMarfMatch)[colnames(thisMarfMatch)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_DATE"
thisMarfMatch$match_Date <- logical()
if (args$debug) {
t28_ <- proc.time() - t28
message("\tExiting matchDate() - no date matches: (", round(t28_[1],0),"s elapsed)")
}
return(thisMarfMatch)
}
#cross join of isdb and marf trips
thisIsdbTripsDt<-data.table::setDT(thisIsdbTrips)
thisMarfMatchDt<-data.table::setDT(thisMarfMatch)
xData<-data.table::setkey(thisIsdbTripsDt[,c(k=1,.SD)],k)[thisMarfMatchDt[,c(k=1,.SD)],allow.cartesian=TRUE][,k:=NULL]
xData[,"T1"]<- as.numeric(abs(difftime(xData$BOARD_DATE,xData$T_DATE1, units="days")))
xData[,"T2"]<- as.numeric(abs(difftime(xData$LANDING_DATE,xData$T_DATE1, units="days")))
xData[,"T3"]<- as.numeric(abs(difftime(xData$LANDING_DATE,xData$T_DATE2, units="days")))
xData[,"T4"]<- as.numeric(abs(difftime(xData$BOARD_DATE,xData$T_DATE2, units="days")))
xData$CLOSEST1 <- with(xData, pmin(T1, T2, T3, T4))
#below we first find the closest trips in time using the smallest difference of all calculated times
match_DateMin<- xData[, {tmp <- CLOSEST1; .SD[tmp==min(tmp)] }, TRIP_ID_ISDB]
match_DateMin <- as.data.frame(match_DateMin)
#hard cutoff - anything more than maxTripDiff_Hr different is not a match, and is dropped here
maxTripDiff_Day <- args$maxTripDiff_Hr/24
match_DateMin <- match_DateMin[match_DateMin$CLOSEST1 <=maxTripDiff_Day,]
if (nrow(match_DateMin[match_DateMin$CLOSEST1 >= 8,])>0) match_DateMin[match_DateMin$CLOSEST1 >= 8, "match_DATE_DETS"] <- "ISDB/MARF activity more than a week different"
if (nrow(match_DateMin[match_DateMin$CLOSEST1 < 8,])>0) match_DateMin[match_DateMin$CLOSEST1 < 8, "match_DATE_DETS"] <- "ISDB/MARF activity within a week"
if (nrow(match_DateMin[match_DateMin$CLOSEST1 < 3,])>0) match_DateMin[match_DateMin$CLOSEST1 < 3, "match_DATE_DETS"] <- "ISDB/MARF activity within 2 days"
if (nrow(match_DateMin[match_DateMin$CLOSEST1 < 2,])>0) match_DateMin[match_DateMin$CLOSEST1 < 2, "match_DATE_DETS"] <- "ISDB/MARF activity within 1 day"
if (nrow(match_DateMin[match_DateMin$CLOSEST1 < 1,])>0) match_DateMin[match_DateMin$CLOSEST1 < 1, "match_DATE_DETS"] <- "ISDB/MARF activity overlap (i.e. ideal A)"
withinners <- match_DateMin[(match_DateMin$T_DATE1 >= match_DateMin$BOARD_DATE & match_DateMin$T_DATE1 <= match_DateMin$LANDING_DATE) |
(match_DateMin$T_DATE2 >= match_DateMin$BOARD_DATE & match_DateMin$T_DATE2 <= match_DateMin$LANDING_DATE) , ]
if (nrow(withinners)>0) {
match_DateMin <- match_DateMin[!(match_DateMin$TRIP_ID_ISDB %in% withinners$TRIP_ID_ISDB),]
withinners[,"match_DATE_DETS"] <- "ISDB/MARF activity overlap (i.e. ideal)"
}
if(nrow(match_DateMin)>0){
#a single marfis trip can get matched against multiple ISDB recs based on vr/lic/date - only retain the closest in time
match_DateMin <- data.table::as.data.table(match_DateMin)
match_DateMin <- as.data.frame(match_DateMin[match_DateMin[, .I[CLOSEST1 == min(CLOSEST1)], by=TRIP_ID_MARF]$V1])
}
if (nrow(withinners)>0 & nrow(match_DateMin)>0) {
match_DateMin <- rbind(withinners, match_DateMin)
}else if (nrow(withinners)>0){
match_DateMin <- withinners
}
match_DateMin$T1 <- match_DateMin$T2 <- match_DateMin$T3 <- match_DateMin$T4 <- NULL
#not enough to only match time span. Must also match on VR or licence
match_DateMin$mVR <- match_DateMin$mLIC <- match_DateMin$mMix1 <- match_DateMin$mMix2 <- match_DateMin$mTripcd_id <- F
if (nrow(match_DateMin[which(match_DateMin$VR == match_DateMin$VR_NUMBER),])>1){
match_DateMin[which(match_DateMin$VR == match_DateMin$VR_NUMBER),"mVR"] <- T
}
if (nrow(match_DateMin[which(match_DateMin$LIC == match_DateMin$LICENCE_ID),])>1){
match_DateMin[which(match_DateMin$LIC == match_DateMin$LICENCE_ID),"mLIC"] <- T
}
if (nrow(match_DateMin[which(match_DateMin$VR == match_DateMin$LICENCE_ID),])>1){
match_DateMin[which(match_DateMin$VR == match_DateMin$LICENCE_ID),"mMix1"] <- T
}
if (nrow(match_DateMin[which(match_DateMin$VR_NUMBER == match_DateMin$LIC),])>1){
match_DateMin[which(match_DateMin$VR_NUMBER == match_DateMin$LIC),"mMix2"] <- T
}
if (!is.null(args$tripcd_id)){
if (nrow(match_DateMin[which(match_DateMin$TRIPCD_ID %in% args$tripcd_id),])>1){
match_DateMin[which(match_DateMin$TRIPCD_ID %in% args$tripcd_id),"mTripcd_id"] <- T
}
}
match_DateMin$match_Date <- F
match_DateMin[which(match_DateMin$mVR|match_DateMin$mLIC|match_DateMin$mMix1|match_DateMin$mMix2|match_DateMin$mTripcd_id),"match_Date"] <- TRUE
match_DateMin <- match_DateMin[match_DateMin$match_Date == TRUE,]
# match_DateMin$VR <- match_DateMin$VR_NUMBER <- match_DateMin$LIC <- match_DateMin$LICENCE_ID <-
match_DateMin$CLOSEST1 <- match_DateMin$SRC <- NULL
colnames(match_DateMin)[colnames(match_DateMin)=="TRIP_ID_MARF"] <- "TRIP_ID_MARF_DATE"
if (!is.null(dbEnv$debugISDBTripIDs)) dbEnv$debugISDBTripIDs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripIDs, expected = dbEnv$debugISDBTripIDs, expectedID = "debugISDBTripIDs", known = match_DateMin$TRIP_ID_ISDB, stepDesc = "matchTrips_date")
if (!is.null(dbEnv$debugVRs)) dbEnv$debugVRs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugVRs, expected = dbEnv$debugVRs, expectedID = "debugVRs", known = match_DateMin$VR, stepDesc = "matchTrips_date")
if (!is.null(dbEnv$debugLics)) dbEnv$debugLics <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugLics, expected = dbEnv$debugLics, expectedID = "debugLics", known = match_DateMin$LIC, stepDesc = "matchTrips_date")
if (args$debug) {
t28_ <- proc.time() - t28
message("\tExiting matchDate() (",round(t28_[1],0),"s elapsed)")
}
return(match_DateMin)
}
match_TripName <- matchTripNames(df = isdbTrips)
match_HI <- matchHI(df = isdbTrips)
match_HO <- matchHO(df = isdbTrips)
match_VR <- matchVR(df = isdbTrips)
match_LIC <- matchLIC(df = isdbTrips)
match_Date <- matchDate(df = isdbTrips)
knowncombos_all <- as.data.frame(rbind(as.matrix(match_TripName[,c("TRIP_ID_ISDB", "TRIP_ID_MARF_TRIP")]),
as.matrix(match_HI[,c("TRIP_ID_ISDB","TRIP_ID_MARF_HI")]),
as.matrix(match_HO[,c("TRIP_ID_ISDB","TRIP_ID_MARF_HO")]),
as.matrix(match_VR[,c("TRIP_ID_ISDB","TRIP_ID_MARF_VR")]),
as.matrix(match_LIC[,c("TRIP_ID_ISDB","TRIP_ID_MARF_LIC")]),
as.matrix(unique(match_Date[,c("TRIP_ID_ISDB","TRIP_ID_MARF_DATE")]))))
colnames(knowncombos_all) <- c("TRIP_ID_ISDB", "TRIP_ID_MARF")
if (nrow(knowncombos_all)>0){
knowncombos_cnt <- stats::aggregate(
x = list(cnt = knowncombos_all$TRIP_ID_MARF),
by = list(TRIP_ID_ISDB = knowncombos_all$TRIP_ID_ISDB,
TRIP_ID_MARF = knowncombos_all$TRIP_ID_MARF
),
length
)
knowncombos_cnt <- merge(knowncombos_cnt, isdbTrips[, c("TRIP_ID_ISDB", "TRIP", "TRIPCD_ID", "SRC")], all.x=T)
#matchNone must have values for either ISDB_TRIP, OBS_ID of non-NA value for OB_PRESENT that is not "N"
matchNone <- marfMatch[!is.na(marfMatch$ISDB_TRIP) |!is.na(marfMatch$OBS_ID) | (!is.na(marfMatch$OBS_PRESENT) & marfMatch$OBS_PRESENT!="N") ,]
# these guys are matched on trip-specific values (ie trip name, confirmation codes). Likelihood of duplicates is low
match_CONF1 <- merge(knowncombos_cnt, match_TripName, by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y=c("TRIP_ID_ISDB", "TRIP_ID_MARF_TRIP"), all.x=T)
match_CONF1 <- merge(match_CONF1, match_HI, by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y=c("TRIP_ID_ISDB","TRIP_ID_MARF_HI"), all.x=T)
match_CONF1 <- merge(match_CONF1, match_HO, by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y=c("TRIP_ID_ISDB","TRIP_ID_MARF_HO"), all.x=T)
match_CONF1$SRC <- NULL
#these guys are generic - vrs, lics and dates. some combination must occur together to match
#match_tmp grabs all of the records that got matched by lic or vr
#match_CONF2B than attempts to merge these lic/vr records with any lic/vr records that occurred within an acceptable window of time
#the date matches will also work for cases where the licence and vr were reversed (nMix1 or nMix2 = T).
match_tmp <- merge(knowncombos_cnt[,c("TRIP_ID_ISDB", "TRIP_ID_MARF","SRC")],
match_VR[,c("TRIP_ID_ISDB", "TRIP_ID_MARF_VR", "swapVR", "match_VR" )],
by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y=c("TRIP_ID_ISDB","TRIP_ID_MARF_VR"), all.x=T)
match_tmp <- merge(match_tmp,
match_LIC[,c("TRIP_ID_ISDB", "TRIP_ID_MARF_LIC", "swapLIC", "match_LIC" )],
by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y=c("TRIP_ID_ISDB","TRIP_ID_MARF_LIC"), all.x=T)
match_tmp <- merge(match_tmp,
match_Date[which(match_Date$mVR|match_Date$mLIC|match_Date$mMix1|match_Date$mMix2),
c("TRIP_ID_ISDB", "TRIP_ID_MARF_DATE", "VR", "LIC","mTripcd_id", "mMix2", "mMix1", "mLIC", "mVR", "match_Date", "match_DATE_DETS" )],
by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y = c("TRIP_ID_ISDB","TRIP_ID_MARF_DATE"), all.x=T)
# replace NAs for unmatched with false so can do math
match_tmp[c("match_Date", "match_VR", "match_LIC", "mTripcd_id", "swapVR", "swapLIC", "mMix1", "mMix2", "mLIC", "mVR")][is.na(match_tmp[c("match_Date", "match_VR", "match_LIC", "mTripcd_id", "swapVR", "swapLIC", "mMix1", "mMix2", "mLIC", "mVR")])] <- FALSE
# mTripcd_id not used for matching, just for math to help break ties
match_tmp <- match_tmp[which(match_tmp$match_Date & (match_tmp$match_LIC + match_tmp$match_VR + match_tmp$mTripcd_id) >0),]
match_tmp$SRC <- NULL
if (nrow(match_tmp)>0){
match_tmp$cnt_dateMatch <- match_tmp$match_LIC + match_tmp$match_VR + match_tmp$mTripcd_id
match_tmp$swappedLIC_VR <- FALSE
match_tmp[which(match_tmp$swapLIC|match_tmp$swapVR|match_tmp$mMix1|match_tmp$mMix2),"swappedLIC_VR"]<-T
match_tmp$swapLIC <- match_tmp$swapVR <- match_tmp$mMix1 <- match_tmp$mMix2 <- NULL
matches = merge(match_CONF1, match_tmp, by.x = c("TRIP_ID_ISDB","TRIP_ID_MARF"), by.y=c("TRIP_ID_ISDB","TRIP_ID_MARF"), all.x=T)
}else{
matches <- match_CONF1
matches$match_VR <- matches$match_LIC <- matches$match_Date <- matches$mTripcd_id <- matches$swappedLIC_VR <- FALSE
matches$cnt_dateMatch <- NA
matches$LIC <- NA
matches$match_DATE_DETS <-NA
matches$mLIC <- NA
matches$mVR <- NA
matches$VR <- NA
}
matches[c("match_TripName", "match_CONF_HI", "match_CONF_HO", "match_LIC", "match_VR", "match_Date","mTripcd_id","swappedLIC_VR")][is.na(matches[c("match_TripName", "match_CONF_HI", "match_CONF_HO", "match_LIC", "match_VR", "match_Date", "mTripcd_id","swappedLIC_VR")])] <- FALSE
if (!is.null(dbEnv$debugLics)) dbEnv$debugISDBTripNames <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugISDBTripNames, expected = dbEnv$debugISDBTripNames, expectedID = "debugISDBTripNames", known = matches$ISDB_TRIP_O, stepDesc = "matchTrips_Initial")
if (!is.null(dbEnv$debugVRs)) dbEnv$debugVRs <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugVRs, expected = dbEnv$debugVRs, expectedID = "debugVRs", known = matches$VR, stepDesc = "matchTrips_Initial")
if (!is.null(dbEnv$debugLics)) dbEnv$debugLics <- Mar.utils::updateExpected(quietly = T, df=dbEnv$debugLics, expected = dbEnv$debugLics, expectedID = "debugLics", known = matches$LIC, stepDesc = "matchTrips_Initial")
matches <- matches[which((matches$match_TripName|matches$match_CONF_HI|matches$match_CONF_HO) |
(matches$match_Date & (matches$match_LIC + matches$match_VR + matches$mTripcd_id) >0)),]
if(nrow(matches)==0){
message("No matches found")
matchGood <- matches
matchNone <- marfMatch[!is.na(marfMatch$ISDB_TRIP) |!is.na(marfMatch$OBS_ID) | (!is.na(marfMatch$OBS_PRESENT) & marfMatch$OBS_PRESENT!="N") ,]
Unmatchables <- nrow(matchNone)
if (nrow(matchNone)==0)matchNone <- NA
}else{
matches$cnt <- NULL
matches$cnt_dateMatch <- NULL
#PRIOR1 counts matches by trip name, Hail in code and hail out confirmation codes - these are very isdb specific - most likely matches
matches$PRIOR1 <- matches$match_TripName+matches$match_CONF_HI+matches$match_CONF_HO
#PRIOR2 counts matches within acceptable date range with correct licence, vr and tripcd_id (if provided) - less specific
matches$PRIOR2 <- (matches$match_Date & matches$match_LIC) + (matches$match_Date & matches$match_VR) + (matches$match_Date & matches$mTripcd_id)
matches <- data.table::setDT(matches)
#Should multiple matches occur for a single ISDB trip, the following can break ties - favouring :
# 1) those matched on trip name, Hail in code and hail out confirmation codes; followed by
# 2) those within acceptable date range with correct licence, vr and tripcd_id (if provided)
matches <- matches[, .SD[PRIOR1 %in% max(PRIOR1)], by=TRIP_ID_ISDB]
matches <-matches[, .SD[PRIOR2 %in% max(PRIOR2)], by=TRIP_ID_ISDB]
if ('mLIC' %in% colnames(matches)) matches$mLIC <- NULL
if ('mVR' %in% colnames(matches)) matches$mVR <- NULL
matches$PRIOR1 <- matches$PRIOR2 <- NULL
matches <- unique(as.data.frame(matches))
if (!is.null(args$tripcd_id)) {
colnames(matches)[colnames(matches)=="mTripcd_id"] <- "match_TRIPCD_ID"
}else{
matches$mTripcd_id <- NULL
}
matchNone <- matchNone[!(matchNone$TRIP_ID_MARF %in% matches$TRIP_ID_MARF),]
if(nrow(matchNone)>0) {
Unmatchables <- nrow(matchNone)
matchNone$ISDB_TRIP_M <- NULL
}else{
Unmatchables <- 0
matchNone <- NA
}
dups <- unique(matches[duplicated(matches[,"TRIP_ID_ISDB"]),"TRIP_ID_ISDB"])
if (length(dups)>0){
dupRows <- unique(matches[matches$TRIP_ID_ISDB %in% dups,])
dupRows[is.na(dupRows)] <- 0
dupRows <- stats::aggregate(TRIP_ID_MARF ~ ., dupRows, function(x) paste0(unique(x), collapse = ", "))
colnames(dupRows)[colnames(dupRows)=="TRIP_ID_MARF"] <- "POTENTIAL_TRIP_ID_MARF"
matches <- matches[!(matches$TRIP_ID_ISDB %in% dups),]
MultiMatches = nrow(dupRows)
}else{
MultiMatches <- 0
dupRows <- NA
}
matchGood <- matches[!is.na(matches$TRIP_ID_MARF),]
#not counting timeOverlap results as "matchNone" because there was really nothing to cause suspicion of a match.
if (nrow(matchGood)>0){
Obs_Trip_Name = nrow(matchGood[matchGood$match_TripName,])
Hail_In_Conf_Code = nrow(matchGood[matchGood$match_CONF_HI,])
Hail_Out_Conf_Code = nrow(matchGood[matchGood$match_CONF_HO,])
Date_Lic_VR_Combo = nrow(matchGood[matchGood$match_Date & matchGood$match_LIC & matchGood$match_VR,])
Date_Lic_Combo = nrow(matchGood[matchGood$match_Date & matchGood$match_LIC & !matchGood$match_VR,])
Date_VR_Combo = nrow(matchGood[matchGood$match_Date & matchGood$match_VR & !matchGood$match_LIC,])
Likely_Swapped_VR_LIC = nrow(matchGood[matchGood$swappedLIC_VR,])
Total_Matches = nrow(matchGood)
}else{
message("No matches found")
matchNone <- marfMatch[!is.na(marfMatch$ISDB_TRIP) |!is.na(marfMatch$OBS_ID) | (!is.na(marfMatch$OBS_PRESENT) & marfMatch$OBS_PRESENT!="N") ,]
Unmatchables <- nrow(matchNone)
if (nrow(matchNone)==0)matchNone <- NA
}
}
}
summ_df = as.data.frame(rbind(Obs_Trip_Name,
Hail_In_Conf_Code,
Hail_Out_Conf_Code,
Date_Lic_VR_Combo,
Date_Lic_Combo,
Date_VR_Combo,
Likely_Swapped_VR_LIC,
Total_Matches,
MultiMatches,
Unmatchables))
names(summ_df)<-"MATCHES_N"
res <- list()
res[["ISDB_MARFIS_POST_MATCHED"]] <- matchGood
res[["MATCH_SUMMARY_TRIPS"]] <- summ_df
res[["ISDB_UNMATCHABLES"]] <- matchNone
res[["ISDB_MULTIMATCHES"]] <- dupRows
if (args$debug) {
t22_ <- proc.time() - t22
message("\tExiting match_trips() (",round(t22_[1],0),"s elapsed)")
}
return(res)
}
#' @title match_sets
#' @description This function takes the results from get_marfis(), get_isdb()
#' and match_trips(), and attempts to match the sets for each trip.
#' @param isdb_sets default is \code{NULL}. This is the list output by the
#' \code{Mar.fleets::get_isdb()} function - it contains dataframes of both the
#' trip and set information from the ISDB database.
#' @param matched_trips default is \code{NULL}. This is the updated df output by the
#' \code{Mar.fleets::match_trips()} function - it information related to how trips from
#' the two databases are matched.
#' @param marf_sets default is \code{NULL}. This is the MARF_SETS output of the
#' \code{Mar.fleets::get_marfis()} function - it contains information about the MARFIS sets.
#' @param ... other arguments passed to methods
#' @import data.table
#' @family fleets
#' @return a list containing a single dataframe - "MAP_ISDB_MARFIS_SETS"
#' @author Mike McMahon, \email{Mike.McMahon@@dfo-mpo.gc.ca}
#' @note as this was developed for the Maritimes region, internal position QC requires that
#' Latitudes outside of 35:52 and Longitudes outside of -75:-45 are flagged and not used to match
#' @noRd
match_sets <- function(isdb_sets = NULL, matched_trips = NULL, marf_sets = NULL, ...){
args <- list(...)$args
if (args$debug) t23 <- Mar.utils::where_now(returnTime = T)
.I <- timeO <- timeM <- DATE_TIME<- EF_FISHED_DATETIME <-FISHSET_ID<- LOG_EFRT_STD_INFO_ID <- .SD <- NA
`:=`<- function (x, value) value
isdb_sets_o <- isdb_sets
marf_sets_o <- marf_sets
matchdf = matched_trips[!is.na(matched_trips$TRIP_ID_ISDB) & !is.na(matched_trips$TRIP_ID_MARF),c("TRIP_ID_ISDB","TRIP_ID_MARF")]
#only retain the sets from trips that we've matched
#add marf trip id to isdb and isdb trip id to marf
isdb_sets <- unique(isdb_sets[isdb_sets$TRIP_ID %in% matchdf$TRIP_ID_ISDB,])
colnames(isdb_sets)[colnames(isdb_sets)=="TRIP_ID"] <- "TRIP_ID_ISDB"
colnames(isdb_sets)[colnames(isdb_sets)=="LATITUDE"] <- "LATITUDE_I"
colnames(isdb_sets)[colnames(isdb_sets)=="LONGITUDE"] <- "LONGITUDE_I"
marf_sets <- unique(marf_sets[marf_sets$TRIP_ID %in% matchdf$TRIP_ID_MARF,c("LOG_EFRT_STD_INFO_ID", "TRIP_ID_MARF","EF_FISHED_DATETIME", "LATITUDE", "LONGITUDE" )])
#make lats and longs identifiable as marfis and add pseudo-set numbers to marfis data
colnames(marf_sets)[colnames(marf_sets)=="LATITUDE"] <- "LATITUDE_M"
colnames(marf_sets)[colnames(marf_sets)=="LONGITUDE"] <- "LONGITUDE_M"
qcer <- function(df=NULL, tripField = NULL,lat.field = "LATITUDE", lon.field = "LONGITUDE", timeField = NULL ){
#this takes a df, and for each unique trip, it returns the number of unique values from the time field,
#lat.field and lon.field (i.e. CNT_TIME, CNT_LAT and CNT_LON)
#this is done to help assess whether or not times and/or positions are appropriate for differentiating
#sets. If they're all the same, no point trying to use them
if (args$debug) t24 <- Mar.utils::where_now(returnTime = T)
df$BADTIM<- FALSE
df$BADPOS<- FALSE
#qc positions
df[(is.na(df[,lat.field]) | df[,lat.field] > 52 | df[,lat.field] < 35 | is.na(df[,lon.field]) | df[,lon.field] < -75 | df[,lon.field] > -45),"BADPOS"]<-"TRUE"
#qc times would go here, and populate BADTIM if they're bad
df[is.na(df[,timeField]),"BADTIM"]<-TRUE
nsets <- df[,c(tripField, timeField, lat.field, lon.field,"BADPOS", "BADTIM")]
#cnt the nsets/trip with good time
if (nrow(nsets[nsets$BADTIM ==F,])>0){
nsets_tim <- stats::aggregate(data=nsets[nsets$BADTIM ==F,],
nsets[,timeField]~nsets[,tripField],
FUN = function(x) length(unique(x))
)
colnames(nsets_tim) <- c(tripField, "CNT_TIM")
}else{
nsets_tim <- unique(nsets[,c(tripField), drop = F])
nsets_tim$CNT_TIM <- 0
}
if (nrow(nsets[nsets$BADPOS ==F,])>0){
nsets_pos <- stats::aggregate(data=nsets[nsets$BADPOS ==F,],
nsets[,lat.field]+nsets[,lon.field]~nsets[,tripField],
FUN = function(x) length(unique(x))
)
colnames(nsets_pos) <- c(tripField, "CNT_POS")
}else{
nsets_pos <- unique(nsets[,c(tripField), drop = F])
nsets_pos$CNT_POS <- 0
}
dets=merge(nsets_tim, nsets_pos, all=T)
dets[is.na(dets)] <- 0
dets$MAXMATCH <- pmax(dets$CNT_TIM, dets$CNT_POS, na.rm = T)
dets$CNT_TIM <- dets$CNT_POS <- NULL
df<- merge(df, dets)
if (args$debug) {
t24_ <- proc.time() - t24
message("\tExiting qcer() (",round(t24_[1],0),"s elapsed)")
}
return(df)
}
isdb_sets =qcer(df=isdb_sets, tripField = "TRIP_ID_ISDB", timeField = "DATE_TIME", lat.field = "LATITUDE_I", lon.field = "LONGITUDE_I")
colnames(isdb_sets)[colnames(isdb_sets)=="MAXMATCH"] <- "MAXMATCH_I"
colnames(isdb_sets)[colnames(isdb_sets)=="BADTIM"] <- "BADTIM_I"
colnames(isdb_sets)[colnames(isdb_sets)=="BADPOS"] <- "BADPOS_I"
marf_sets =qcer(df=marf_sets, tripField = "TRIP_ID_MARF", timeField = "EF_FISHED_DATETIME", lat.field = "LATITUDE_M", lon.field = "LONGITUDE_M")
colnames(marf_sets)[colnames(marf_sets)=="MAXMATCH"] <- "MAXMATCH_M"
colnames(marf_sets)[colnames(marf_sets)=="BADTIM"] <- "BADTIM_M"
colnames(marf_sets)[colnames(marf_sets)=="BADPOS"] <- "BADPOS_M"
#megadf is merged by trip - not set - so there are many false positives at this stage
megadf <- merge(isdb_sets, matchdf, by.x="TRIP_ID_ISDB", by.y="TRIP_ID_ISDB", all = T)
megadf <- merge(megadf, marf_sets, by.x="TRIP_ID_MARF", by.y="TRIP_ID_MARF", all = T)
#if there are no marfis sets, drop the recs -- can't match
megadf <- megadf[!is.na(megadf$LOG_EFRT_STD_INFO_ID),]
megadf$MAXMATCH <- pmin(megadf$MAXMATCH_I, megadf$MAXMATCH_M) #not sure if we'll get NAs here?
megadf$MAXMATCH_I <- megadf$MAXMATCH_M <- NULL
# calc time between isdb and marfis and flag those that exceed tolerance (maxSetDiff_hr)
megadf$DUR_DIFF <- NA
megadf[,"DUR_DIFF"]<- as.numeric(abs(difftime(megadf$DATE_TIME,megadf$EF_FISHED_DATETIME, units="hours")))
megadf$BADTIM <- FALSE
megadf <- megadf[megadf$DUR_DIFF <= args$maxSetDiff_Hr,]
if (nrow(megadf)==0){
if (args$debug) {
t23_ <- proc.time() - t23
message("\tExiting match_sets() - empty megadf: (", round(t23_[1],0),"s elapsed)")
}
return(NA)
}
megadf[megadf$BADTIM_I==T | megadf$BADTIM_M==T |is.na(megadf$DUR_DIFF),"BADTIM"]<-TRUE
megadf$BADTIM_I <- megadf$BADTIM_M <- NULL
# calc dist between isdb and marfis
megadf$BADPOS <- FALSE
megadf[(megadf$BADPOS_I ==T | megadf$BADPOS_M ==T | is.na(megadf$LATITUDE_I)| is.na(megadf$LONGITUDE_I)| is.na(megadf$LATITUDE_M)| is.na(megadf$LONGITUDE_M)), "BADPOS"]<-TRUE
megadf$BADPOS_I <- megadf$BADPOS_M <- NULL
megadf[,"DIST_DIFF"]<- round(geosphere::distGeo(p1 = megadf[,c("LONGITUDE_I","LATITUDE_I")],
p2 = megadf[,c("LONGITUDE_M","LATITUDE_M")]),0)
megadf <- megadf[megadf$DIST_DIFF <= (args$maxSetDiff_Km*1000),]
if (nrow(megadf)==0){
if (args$debug) {
t23_ <- proc.time() - t23
message("\tExiting match_sets() - empty megadf2: (", round(t23_[1],0),"s elapsed)")
}
return(NA)
}
megadf$MATCH<- NA
matches_all<- data.frame(TRIP_ID_ISDB=numeric(),
FISHSET_ID=numeric(),
TRIP_ID_MARF=numeric(),
LOG_EFRT_STD_INFO_ID = numeric(),
SET_MATCH = character())
utrips = sort(unique(megadf$TRIP_ID_ISDB))
for (i in 1:length(utrips)){
thisTrip_MATCHED_ALL <- thisTrip_MATCHED <- matches_all[FALSE,]
thisTrip<- megadf[which(megadf$TRIP_ID_ISDB == utrips[i]),]
if (nrow(thisTrip)==0) next
while (nrow(thisTrip_MATCHED) < thisTrip[1,"MAXMATCH"] ){
trippos <- thisTrip[thisTrip$BADPOS ==F,]
triptim <- thisTrip[thisTrip$BADTIM ==F,]
bestPos <- trippos[which.min(trippos$DIST_DIFF),]
bestPos <- bestPos[bestPos$BADPOS ==F,]
bestTim <- triptim[which.min(triptim$DUR_DIFF),]
bestTim <- bestTim[bestTim$BADTIM ==F,]
if (nrow(bestPos)==1 && nrow(bestTim)==1){
#these sets are closest in time and space
thisTrip_MATCHED <- thisTrip[rownames(bestPos),c("TRIP_ID_ISDB", "FISHSET_ID", "TRIP_ID_MARF", "LOG_EFRT_STD_INFO_ID")]
thisTrip_MATCHED$SET_MATCH <- "POS AND TIME"
}else if(nrow(bestPos)==0 && nrow(bestTim)==1){
#these sets are closest in time (but not closest in space)
thisTrip_MATCHED <- thisTrip[rownames(bestTim),c("TRIP_ID_ISDB", "FISHSET_ID", "TRIP_ID_MARF", "LOG_EFRT_STD_INFO_ID")]
thisTrip_MATCHED$SET_MATCH <- "TIME"
}else if(nrow(bestPos)==1 && nrow(bestTim)==0){
#these sets are closest in space (but not closest in time)
thisTrip_MATCHED <- thisTrip[rownames(bestPos),c("TRIP_ID_ISDB", "FISHSET_ID", "TRIP_ID_MARF", "LOG_EFRT_STD_INFO_ID")]
thisTrip_MATCHED$SET_MATCH <- "POS"
}else{
#multiple best? A tie?
warning("Something weird happened while attempting to match sets. Please let Mike.McMahon@dfo-mpo.gc.ca know what you were just doing - maybe send him the script you just ran?")
}
thisTrip <- thisTrip[!(thisTrip$FISHSET_ID %in% thisTrip_MATCHED$FISHSET_ID | thisTrip$LOG_EFRT_STD_INFO_ID %in% thisTrip_MATCHED$LOG_EFRT_STD_INFO_ID),]
thisTrip_MATCHED_ALL <- rbind(thisTrip_MATCHED_ALL, thisTrip_MATCHED)
if(nrow(thisTrip)==0)break
}
matches_all<- rbind(matches_all, thisTrip_MATCHED_ALL)
}
res= list()
res[["MAP_ISDB_MARFIS_SETS"]] <- unique(as.data.frame(matches_all))
if (args$debug) {
t23_ <- proc.time() - t23
message("\tExiting match_sets() (",round(t23_[1],0),"s elapsed)")
}
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.