library(DBI)
library(odbc)
library(RPostgreSQL)
library(data.table)
library(stringr)
library(stringr)
partdirstart
file.copy(
from = ..R.path()[["full_path"]],
to = paste0("C:\\Users\\bobbyf\\Dropbox\\WORK\\ISI\\Projects\\Cortex\\master\\model/BUILD/query_temp_annotations.R")
)
f <- function(){
list(temporalannotations = list(
## pipeline
## -
##
## plsid =
## 1.Connect
## d-origin: NA (this is step 1 and triggered by user or scheduler)
##
##: source connection object
##
fq = function(){
},
## query
fc = function(){
},
## query
fs = function(){
},
## transform
## d-origin:
## d-destin:
## task: Engineer features and necessary transformations before model exec
##
ft = function(){
}
))
fq. <- function(){
}
fc.temporalannotations
# change column name of dir path
setnames(cDT, "dataextractedcontainerlocal")
# setDT(tadf)
## EXTRACT COLUMNS OF INTEREST WITH REGEX, BUILD QUERY, GET ALL DATA FOR COLUMNS
# cnames <- colnames(tadf)
# pat <- "index|dataextracted(containerlocal$|avi$|evt$)|\\_extracted$"
# qColumns <- cnames[str_detect(cnames, pat)]
# query <- str_c("SELECT ", str_c(qColumns, collapse = ", "), " FROM temporalannotations")
## EXECUTE QUERY AND CLEAN DATA
# cDT <- dbGetQuery(conObj, query)
# setDT(cDT)
setkeyv(cDT, "index")
# change column name of dir path
setnames(cDT, "dataextractedcontainerlocal", "dirPath")
# clean the rest of the col names that have "extracted" in it (I want it to correspond to the search term)
old <- colnames(cDT)[str_detect(colnames(cDT), "extracted")]
new <- str_replace(str_replace_all(old, "(data)?\\_?extracted", ""), "0", "_")
setnames(cDT, old, new) ## change a few column names to correspond to the regex search term
## NOW SEARCH DIRECTORY FOR EACH RELEVANT COLUMN NAME AND UPDATE BOOLEAN
checkDT <- cDT[str_detect(dirPath, "[A-Z][a-z]")]
colNames <- c(colnames(checkDT[, -1:-4][1]), "mp4")
searchPats <- str_replace(colNames, "\\_", ".+")
##
## DEFINE FUNCTION TO CHECK EACH DIR FOR EVERY PATTERN
##
fnCheckData <- function(dir, spat){
count <<- count + 1
if(count %% 50 == 0) print(count)
fpaths <- list.files(dir, pattern = spat, ignore.case = TRUE, full.names = TRUE)
# if there are files AND they have positive size, return TRUE
if(length(fpaths) > 0){
for(fp in fpaths){
if(file.size(fp) > 0)
return(TRUE)
}
}
return(FALSE)
}
argDT <- CJ(checkDT$dirPath, searchPats, unique = TRUE)
setnames(argDT, c("dir", "spat"))
setkeyv(argDT, c("dir", "spat"))
count <- 0
if( TRUE )
resDT <- cbind(argDT[1:100], isValid = do.call(mapply, c(fnCheckData, argDT[1:100])))
else
resDT <- cbind(argDT, isValid = do.call(mapply, c(fnCheckData, argDT)))
# convert endoscope pattern back to column name, transform data back to wide
resDT[, spat := str_replace(spat, "\\.\\+", "_")]
wresDT <- dcast(resDT, dir ~ spat, value.var = "isValid", fill = FALSE)
# join back to table that has index column
indexDT <- checkDT[, .(index, dir = dirPath, startIndex = startindexissi, stopIndex = stopindexissi)]
setkeyv(wresDT, "dir")
setkeyv(indexDT, "dir")
return(indexDT[wresDT])
}
# ValidateData(istest = TRUE)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.