#' @title fishPaths: creates a data frame of all observed paths
#'
#' @description The function builds a data frame of all the observed fish paths in the
#' observation file based on the parent child table. The function was originally called
#' FishPath_truncateToLastLeg() and was built by Greg K. Ryan K. altered the function
#' to work solely in R and doesn't require the SQLite backend.
#'
#' @param valid_obs
#'
#' @param valid_paths
#'
#' @param tagCntLimit
#'
#' @author Greg Kliewer
#'
#' @examples fishPaths()
#'
#' @import dplyr
#' @import lubridate
#' @import stringr
#' @export
#' @return NULL
#'
fishPaths <- function(valid_obs, valid_paths){
# may need to add species
# Species <- 'Chinook'
# startDate <- '03/01/16'
# endDate <- '08/16/16'
#Requests:
#Pre-script data check--e.g. Array in config or obs use lower case whereas in parent-child use all upper
#DONE
#add final userStatus to obs output: if any auto)rocess = "no" for entire tag history, set userStatus = null, else userStatus = "process"
#Species and Date range as input parameter
#Debug: miscalc of finalObsLoc in csv output--cf SFG-SFG
#seeming miscalculations of main vs ext vs outside: see same tag 3D9.1C2DAC419A, which is maybe right but is good starting point
#ct <- Sys.time()
#print(str_c("start time: ", ct))
#assume ending dates need time (11:59:59PM) added for all equivalency calculations to be accurate
# if( length( grep( "/:", startDate)) == 0 ) startDate <- paste0(startDate, " 00:00:00")
# if( length( grep( "/:", endDate)) == 0 ) endDate <- paste0(endDate, " 23:59:59")
#
# seasonBegin <- substr(mdy( seasonBegin ),1,10)
# seasonEnd <- substr(mdy_hms( seasonEnd ),1,19)
# spawnBegin <- substr(mdy( spawnBegin ),1,10)
# spawnEnd <- substr(mdy_hms( spawnEnd ),1,19)
#
# dbConn <- dbConnect(RSQLite::SQLite(), "db_pitClean.sqlite")
#
# #load and validate higher order data
# loadData_HigherOrder(dbConn)
# if( TRUE ) #RK commented out down to #load observational data
# {
# dataIsValid <- validateData_HigherOrder(dbConn)
# if( !dataIsValid ){ stop("Invalid data in config and/or parent-child file/table. See console for details.") }
# }
#
# #load observational data
# loadData_Observational(dbConn)
#
# #populate array values
# #note: this query assigns nulls wherever there is no matching siteID, which is ok in this context
# #to prevent nulls add: Where exists (Select Array FROM Site_Config Where SiteID = Obs.SiteID AND AntennaID = Obs.AntennaID)
# qry <- "Update Obs Set Array = (Select Array FROM Site_Config Where SiteID = Obs.SiteID AND AntennaID = Obs.AntennaID)"
# dbSendStatement(dbConn, qry)
#
# #after above, records with null arrays must be outside study area
# qry <- "Update Obs Set OutsideStudyArea = 'x' where Array is null OR Array = 'OUTSIDE STUDY AREA'"
# dbSendStatement(dbConn, qry)
#
# #populate temporary observational table for current date ranges and do pre-processing
# if( TRUE )
# {
# qry = "Delete from ObsTemp"
# dbSendStatement(dbConn, qry)
#
# qry <- str_c("Insert into ObsTemp( SiteID, Array, AntennaID, TagID, ObsDateTime, Species )
# SELECT SiteID, Array, AntennaID, TagID, ObsDateTime, Species
# FROM Obs
# where Species = '", Species, "' and OutsideStudyArea is null ")
# qry <- str_c(qry, " And ObsDateTime >= '", seasonBegin, "' and ObsDateTime < '", seasonEnd, "'")
# dbSendStatement(dbConn, qry)
#
# #validate observational data
# if( TRUE )
# {
# dataIsValid <- validateData_Obs(dbConn)
# if( !dataIsValid ){ stop("Invalid data in observational data. See console for details.") }
# }
# }
#
# #list of all possible shortest paths-to-root (i.e. node straight to root) for current data set
# if( TRUE ) { shortestPathToRoot_logCurrPaths(dbConn) }
#
# qry <- "Select StartSite, StartArray, EndSite, EndArray, NodeNum, NodeSite, NodeArray from Site_PathsFromMainRoot "
# pathList <- dbGetQuery(dbConn, qry)
#
# qry <- "Select * from ObsTemp "
# dfTagObsContainer <- dbGetQuery(dbConn, qry)
# qry <- "Select distinct TagID from ObsTemp "
# #qry <- "Select distinct TagID from ObsTemp where and tagID = '3D9.1C2DB1F855'"
# dfTag <- dbGetQuery(dbConn, qry)
#Create tag-site matrix, with 1 row per tagID and 1 column for each study area Node at which at least 1 fish was observed
#Note: must be dataframe, which is recursive, to use $ for referencing elements. vectors and matrices are atomic, not recursive
# dfCol <- dbGetQuery(dbConn, "Select distinct SiteID || '-' || Array from ObsTemp
# Where SiteID || '-' || Array in
# (Select ChildSite || '-' || ChildArray From Site_ParentChild)
# Or
# SiteID || '-' || Array in
# (Select ParentSite || '-' || ParentArray From Site_ParentChild)")
# dfTagSiteMatrix <- matrix(, nrow = nrow(dfTag), ncol = nrow(dfCol) + 5)
# dfTagSiteMatrix <- as.data.frame(dfTagSiteMatrix)
#
# #Assign column names 1 - 5
# #So either works: dfTagSiteMatrix[2, "GRA_GRA"] or dfTagSiteMatrix$GRA_GRA[2]
# names(dfTagSiteMatrix)[1] <- "TagID"
# names(dfTagSiteMatrix)[2] <- "ObsCnt"
# names(dfTagSiteMatrix)[3] <- "Status"
# names(dfTagSiteMatrix)[4] <- "FinalObsLoc"
# names(dfTagSiteMatrix)[5] <- "FinalObsDate"
#
# #Assign column names 6 - n, using nodes (siteID-Array pairs) for column names
# for(i in 1:nrow(dfCol)){
# names(dfTagSiteMatrix)[i+5] <- dfCol[i,1]
# }
alltagObs <- valid_obs %>%
mutate(InMainPath = NA,
InExtendedPath = NA)
#AutoProcStatus = NA,
#UserProcStatus = NA,
#UserComment = NA)
#OutsideExtPath = NA)
#OutsideStudyArea = NA, # don't need
#CodeComment = NA, # don't need
#UserComment = NA,
#MainPath = NA, # don't need
#ExtendedPath = NA) # don't need
tags <- distinct(alltagObs, TagID)
# nodes <- distinct(valid_paths, NodeSite, .keep_all = TRUE) %>%
# full_join(distinct(alltagObs, Node), by = c('NodeSite' = 'Node')) %>%
# group_by(EndNode) %>%
# arrange(desc(NodeNum))
# tag_mat <- as.data.frame(matrix(,nrow = nrow(tags), ncol = nrow(nodes) + 5)) #would be nice to order columns at some point
# names(tag_mat) <- c('TagID', 'ObsCnt', 'Status', 'FinalObsNode', 'FinalObsDate', nodes$NodeSite)
#
# tag_mat <- tag_mat %>%
# mutate_at(vars(5:(nrow(nodes)+5)), funs(ymd_hms))
totalObs = 0
tocIndex = 0
print(paste0(Sys.time(), " ", 1, " of ", nrow(tags), " tags. Current tag: ",tags$TagID[1]))
#for each tag
for (i in 1:nrow(tags)) { # goes to line 369
if( i %% 1000 == 0){ print(paste0(Sys.time(), " ", i, " of ", nrow(tags), " tags. Current tag: ",tags$TagID[i])) }
#if( i %% tagCntLimit == 0) break
#observations for current tag excluding those not in study area
# qry = str_c("Select * from ObsTemp where tagID = '", dfTag$TagID[i], "' Order by ObsDateTime")
# dfTagObs <- dbGetQuery(dbConn, qry)
tagObs <- alltagObs %>%
filter(TagID == tags$TagID[i]) %>% #174
arrange(ObsDate)
obsColCnt = ncol(tagObs)
obsRowCnt = nrow(tagObs)
totalObs = totalObs + obsRowCnt
lastNode_currTag = tagObs$Node[obsRowCnt]
#lastArray_currTag = tagObs$Array[obsRowCnt]
#for( j in 1:obsRowCnt ) print(str_c(j, " ", dfTagObs$SiteID[j], "-", dfTagObs$Array[j] ))
#confirm path for current tagID from list of paths.
pathFound <- FALSE
for( pIndex in 1:nrow(valid_paths) ){
if( valid_paths$EndNode[pIndex] == lastNode_currTag)
{
pathFound <- TRUE
break
}
}
#current path not found: some kind of problem
if (!pathFound){
stop(str_c("Path to Root not found for Node <", lastNode_currTag, ">. Program terminating"))
} else {
# qry <- str_c("Select NodeSite, NodeArray, NodeNum, PathString
# From Site_PathsFromMainRoot
# Where EndSite = '", lastSite_currTag, "' AND EndArray = '", lastArray_currTag, "' Order By NodeNum")
# pathNodes <- dbGetQuery(dbConn, qry)
# pathMain <- pathNodes$PathString[1]
# pathExt <- ""
#print(str_c(tocIndex, " ", pathMain))
pathNodes <- valid_paths %>%
filter(EndNode == lastNode_currTag) %>%
arrange(NodeNum)
pathMain <- pathNodes$pathString[1]
pathExt <- ""
}
SomeNodesNotFound = FALSE
for( iObs in 1:obsRowCnt ){
#print(str_c("Tag: ",dfTagObs$TagID[iTag], " ", dfTagObs$SiteID[iTag], "-", dfTagObs$Array[iTag]))
currTagNodeFound = FALSE
for( iNode in 1:nrow(pathNodes) )
{
#print(str_c("Node: ", dfTagObs$SiteID[iTag], "-", dfTagObs$Array[iTag], " ", pathNodes$NodeSite[iNode], "-", pathNodes$NodeArray[iNode] ))
#use string search to determine if current observation <siteID, Array> is in current main path
if( tagObs$Node[iObs] == pathNodes$NodeSite[iNode])
{
#print("currTagNodeFound")
tagObs$InMainPath[[iObs]] = "x"
currTagNodeFound == TRUE
break
}
}
if( !currTagNodeFound ) SomeNodesNotFound = TRUE
} # ends iObs
if( SomeNodesNotFound ){
#print( str_c("NodeNotFound for: ", dfTag$TagID[i], " Last Main Path Node: ", lastSite_currTag, "-", lastArray_currTag ))
# print( str_c("pathMain: ", pathMain) )
pLen = nchar(pathMain) + 1
# qry <- str_c( "Select EndSite, EndArray, trim(substr(PathString, ", pLen, ", 500)) as candExt from Site_PathsFromMainRoot " )
# qry <- str_c(qry, "Where NodeNum = 1 And PathString like '%", pathMain, "%' Order by trim(substr(PathString, ", pLen, ", 500))")
# #print(qry)
# dfCandExt <- dbGetQuery(dbConn, qry)
dfCandExt <- valid_paths %>%
filter(NodeNum == 1,
grepl(pathMain, pathString)) %>%
mutate(candExt = substr(pathString,pLen,500)) %>%
arrange(candExt)
#From list of candidates, select the extension with latest ObsDateTime. This will be the "preferred" extension for the purpose of tag cleaning.
highestBidder = 0
localNodeNum = ""
latestObsDT = "1900-01-01 11:11:11"
# TO DO: this non-recursive routinge won't get proper extension to extended path
# so suppose lastObs = Root A, though fish spawned at C after following very circuitous path first (C D E) then (C F G) then (C H I)
# this will get A as mainPath and (B C) as extended path but miss (C H I) as extension of extended path
# so still neeed recursive work-around or recursion
# work-around: do several more iterations below of:
# dfCandExt <- dbGetQuery(dbConn, ""Select from from Site_PathsFromMainRoot where PathString like pathMain +
# " " +
# dfCandExt$EndSite[highestBidder] +
# "-"
# dfCandExt$EndSite[highestBidder])
# key is that C above, eg., must be the root for any extension of the extension
# so suppose fish went (I J) and then (I K). We'd want (C H I K) in later iteration
# maybe better to work backwards as follows:
# beginning with lastObs as currEndNode, for each obs
# if thisObs not on path and thisObs is descendent to currEndNode (check parent-child table)
# currEndNode = thisObs
# add node to path
# final result should be FULL proper extension to main path
# also, since working through obs in descending fashion, don't need to reiterate
# So, at one and same time we are managing "descension" and time: earlier than
#each time selecting extension if any with lastestObs
for (j in 1:nrow( dfCandExt )){
for (k in 1:nrow(tagObs)){
#print( str_c( k, " Checking ", str_c(dfTagObs$SiteID[k], "-", dfTagObs$Array[k]), " in ", dfCandExt$candExt[j] ))
#is current SiteID-Array string in string representation of the candidate extension?
#if( str_detect( dfCandExt$candExt[j], str_c( dfTagObs$SiteID[k],"-", dfTagObs$Array[k] )))
if(grepl(tagObs$Node[k],dfCandExt$candExt[j])){
#print("hello A")
#is the current date more recent than the stored date?
if( latestObsDT < tagObs$ObsDate[k] ){
#print("hello B")
latestObsDT <- tagObs$ObsDate[k]
highestBidder = j
#print(c(highestBidder, latestObsDT))
}
}
}
}# ends line 258
#print(c("highestBidder", highestBidder))
#get full list of nodes for selected extension
if( highestBidder > 0){
#print(str_c( "hello ", i))
# qry <- str_c("Select NodeSite, NodeArray, pathString from site_pathsFromMainRoot where ",
# "EndSite = '", dfCandExt$EndSite[highestBidder], "' AND EndArray = '", dfCandExt$EndArray[highestBidder], "' ",
# "Order by NodeNum")
# #print(qry)
# dfExtPath <- dbGetQuery(dbConn, qry)
dfExtPath <- valid_paths %>%
filter(EndNode == dfCandExt$EndNode[highestBidder]) %>%
arrange(NodeNum)
#pathExt <- dfExtPath$PathString[1]
pathExt <- trimws(substr(dfExtPath$pathString[1], nchar(pathMain) + 1, 250))
#trimws(substr(y, str_length(x)+1, 44))
#print(str_c("highest bid path extension: ", pathExt))
#flag observations in right segment of extended path, where right segment = proper extension of main path
for (j in 1:nrow(tagObs)){
#print(str_c(j, dfTagObs$SiteID[j], "-", dfTagObs$Array[j] ))
#was fish observation already assigned to main path?
if( is.na(tagObs$InMainPath[[j]])){ #this effectively reduces search to proper extension
for (k in 1:nrow(dfExtPath)){
#print(c(dfTagObs$SiteID[[j]], dfExtPath$NodeSite[[k]], dfTagObs$Array[[j]], dfExtPath$NodeArray[[k]]) )
if(tagObs$Node[[j]] == dfExtPath$NodeSite[[k]]) tagObs$InExtendedPath[[j]] = "x"
}
}
} # ends line 298
} # ends line 281
} # ends if line 215
#mainObs <- FALSE
#extObs <- FALSE
#outsideObs <- FALSE
#for each observation of current tag: dfTagObsContainer contains rows for all tag observations
# tocIndex is the current row in dfTagObsContainer
# SO current tag will populate the next obsRowCnt records in dfTagObsContainer
# once program termnates, each record should be populated
for( j in 1:obsRowCnt ){
tocIndex = tocIndex + 1
for( k in 14:15 ){
#write value for current cell of observation matrix
alltagObs[tocIndex, k] <- tagObs[j, k]
# #populate tag-site matrix
# if( is.na( tagObs$InMainPath[j] ) && is.na( tagObs$InExtendedPath[j] ))
# {
#tag_mat[i, tagObs$Node[j]] <- tagObs$ObsDate[j] #"Outside"
# alltagObs$OutsideExtPath[tocIndex] <- "x"
#outsideObs = TRUE
# }
# else if( is.na( tagObs$InExtendedPath[j] ))
# {
# #tag_mat[i, tagObs$Node[j]] <- tagObs$ObsDate[j]#"Main"
# #mainObs = TRUE
#
# #dfTagObsContainer$MainPath[tocIndex] <- pathMain
# }
# else
# {
# #tag_mat[i, tagObs$Node[j]] <- tagObs$ObsDate[j]#"Extended"
# #ObservedInExtPath = TRUE
# #extObs = TRUE
#
# #dfTagObsContainer$ExtendedPath[tocIndex] <- pathExt
# }
} # end k loop line 324
# if( j == 1 ){
# alltagObs$MainPath[tocIndex] <- pathMain
# if( length( pathExt ) > 0 ){
# alltagObs$ExtendedPath[tocIndex] <- pathExt
# }
# }
} #end j loop line 321
#tag_mat$FinalObsNode[i] <- tagObs$Node[obsRowCnt]
#tag_mat$FinalObsDate[i] <- tagObs$ObsDate[obsRowCnt]
#tag_mat$TagID[i] <- tagObs$TagID[1]
#tag_mat$ObsCnt[i] <- obsRowCnt
#obsLoc <- "Obs:"
#if( mainObs ) obsLoc <- paste0(obsLoc, " Main")
#if( extObs ) obsLoc <- paste0(obsLoc, " ,Ext")
#if( outsideObs ) obsLoc <- paste0(obsLoc, " ,Outside")
#tag_mat$Status[i] <- obsLoc
} # end i loop from line 141
proc_obs <- alltagObs %>%
mutate(AutoProcStatus = ifelse(!is.na(InMainPath), TRUE,
ifelse(!is.na(InExtendedPath), TRUE, FALSE)))
proc_obs <- proc_obs %>%
left_join(proc_obs %>%
filter(AutoProcStatus == FALSE) %>%
distinct(TagID) %>%
mutate(UserProcStatus = FALSE)) %>%
mutate(UserProcStatus = ifelse(is.na(UserProcStatus),TRUE, ''),
UserComment = '') %>%
select(AutoProcStatus, UserProcStatus, TagID, MinObsDate = ObsDate, Node, UserComment)
#dbSendStatement(dbConn, "Drop Table ObsTemp_Cleaned")
# dbWriteTable(dbConn, "ObsTemp_Cleaned", dfTagObsContainer, overwrite = TRUE)
#add observatons Outside Study Area to _cleaned table
# qry <- str_c("Insert into ObsTemp_Cleaned( SiteID, Array, AntennaID, TagID, ObsDateTime, Species, OutsideStudyArea )
# SELECT SiteID, Array, AntennaID, TagID, ObsDateTime, Species, OutsideStudyArea
# FROM Obs
# where Species = '", Species, "' and OutsideStudyArea = 'x' ")
# qry <- str_c(qry, " And ObsDateTime >= '", seasonBegin, "' and ObsDateTime < '", seasonEnd, "'")
# dbSendStatement(dbConn, qry)
# #default process status
# dbSendStatement(dbConn, "Update ObsTemp_Cleaned Set AutoProcStatus = 'no', UserProcStatus = 'no' ")
#
# #update Autoprocess where appropriate
# dbSendStatement(dbConn, "Update ObsTemp_Cleaned Set AutoProcStatus = 'yes', UserProcStatus = 'yes' where InMainPath = 'x' or InExtendedPath = 'x' ")
#
# #update Userprocess where appropriate: for each tag, if 1 autoProc = no all UserProc = null
# qry <- "Update ObsTemp_Cleaned Set UserProcStatus = null where tagID in (Select distinct tagID from ObsTemp_Cleaned where AutoProcStatus = 'no')"
# dbSendStatement(dbConn, qry)
#
# df <- dbGetQuery(dbConn, "Select AutoProcStatus, UserProcStatus, TagID, ObsDateTime, SiteID, Array, AntennaID, Species,
# InMainPath, InExtendedPath, OutsideExtPath, OutsideStudyArea, MainPath, ExtendedPath, CodeComment, UserComment
# from ObsTemp_Cleaned Order by TagID, ObsDateTime, SiteID, Array, InMainPath, InExtendedPath ")
#
# write.csv(df, file = str_c( getwd(), "/Data/Output/Output_Tag_Obs_Scrub.csv" ), na = "")
# print(str_c("Output_Tag_Obs_Scrub.csv written to ", getwd(), "/Data/Output/" ))
#
# dbWriteTable(dbConn, "Output_Tag_Validate", dfTagSiteMatrix, overwrite = TRUE)
#
# write.csv(dfTagSiteMatrix, file = str_c( getwd(), "/Data/Output/Output_Tag_Validate.csv" ), na = "")
#
# print(str_c("Output_Tag_Validate.csv written to ", getwd(), "/Data/Output/" ))
#
# qry <- "Select distinct tagID from ObsTemp_Cleaned where tagID not in (Select distinct tagID from ObsTemp_Cleaned where AutoProcStatus = 'no')"
# dfx <- dbGetQuery(dbConn, qry)
# dfx
#
# if( TRUE )
# {
# print(str_c("Number of tags processed: ", i))
# print(str_c("Number of pure tags with all yes's: ", nrow(dfx)))
# print(str_c("Number of impure tags with one or more no: ", i - nrow(dfx)))
# print(str_c("Number of Observations in study area: ", tocIndex))
# print(str_c("Number of Observations outside study area: ", nrow(df) - tocIndex))
# print(str_c("Number of Observations total: ", nrow(df)))
# print(str_c("Start Time: ", ct))
# print(str_c("End Time: ", Sys.time()))
# }
return(proc_obs)
} # ends function
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.