R/dbTemporalAnn.R

Defines functions f

library(DBI)
library(odbc)
library(RPostgreSQL)
library(data.table)
library(stringr)
library(stringr)


partdirstart



file.copy(
  from = ..R.path()[["full_path"]],
  to = paste0("C:\\Users\\bobbyf\\Dropbox\\WORK\\ISI\\Projects\\Cortex\\master\\model/BUILD/query_temp_annotations.R")
)



f <- function(){
  
  
  list(temporalannotations = list(
    
    ## pipeline
    ##    - 
    ##
    ## plsid = 
    ## 1.Connect
    ##    d-origin: NA        (this is step 1 and triggered by user or scheduler)
    ##    
    ##: source connection object
    ## 
    fq = function(){
      
    },
    
    ## query
    fc = function(){
      
    },
    
    ## query
    fs = function(){
      
    },
    
    ## transform
    ##    d-origin: 
    ##    d-destin:
    ##    task: Engineer features and necessary transformations before model exec
    ##    
    ft = function(){
      
    }
    
    
  ))
  fq. <- function(){
    
  }
  
  fc.temporalannotations
  
  
  
  
  
  # change column name of dir path
  setnames(cDT, "dataextractedcontainerlocal")
  
  # setDT(tadf)
  
  ## EXTRACT COLUMNS OF INTEREST WITH REGEX, BUILD QUERY, GET ALL DATA FOR COLUMNS
  # cnames <- colnames(tadf)
  # pat <- "index|dataextracted(containerlocal$|avi$|evt$)|\\_extracted$"
  # qColumns <- cnames[str_detect(cnames, pat)]
  
  # query <- str_c("SELECT ", str_c(qColumns, collapse = ", "), " FROM temporalannotations")
  
  ## EXECUTE QUERY AND CLEAN DATA
  # cDT <- dbGetQuery(conObj, query)
  # setDT(cDT)
  setkeyv(cDT, "index")
  
  # change column name of dir path
  setnames(cDT, "dataextractedcontainerlocal", "dirPath")
  
  # clean the rest of the col names that have "extracted" in it (I want it to correspond to the search term)
  old <- colnames(cDT)[str_detect(colnames(cDT), "extracted")]
  new <- str_replace(str_replace_all(old, "(data)?\\_?extracted", ""), "0", "_")
  setnames(cDT, old, new) ## change a few column names to correspond to the regex search term
  
  ## NOW SEARCH DIRECTORY FOR EACH RELEVANT COLUMN NAME AND UPDATE BOOLEAN
  checkDT <- cDT[str_detect(dirPath, "[A-Z][a-z]")]
  
  colNames <- c(colnames(checkDT[, -1:-4][1]), "mp4")
  searchPats <- str_replace(colNames, "\\_", ".+")
  
  ##
  ## DEFINE FUNCTION TO CHECK EACH DIR FOR EVERY PATTERN
  ##
  fnCheckData <- function(dir, spat){
    
    count <<- count + 1
    if(count %% 50 == 0) print(count)
    
    fpaths <- list.files(dir, pattern = spat, ignore.case = TRUE, full.names = TRUE)
    
    # if there are files AND they have positive size, return TRUE
    if(length(fpaths) > 0){
      for(fp in fpaths){
        if(file.size(fp) > 0)
          return(TRUE)
      }
    }
    return(FALSE)
  }
  
  argDT <- CJ(checkDT$dirPath, searchPats, unique = TRUE)
  setnames(argDT, c("dir", "spat"))
  setkeyv(argDT, c("dir", "spat"))
  
  count <- 0
  if( TRUE )
    resDT <- cbind(argDT[1:100], isValid = do.call(mapply, c(fnCheckData, argDT[1:100])))
  else
    resDT <- cbind(argDT, isValid = do.call(mapply, c(fnCheckData, argDT)))
  
    
  
  # convert endoscope pattern back to column name, transform data back to wide
  resDT[, spat := str_replace(spat, "\\.\\+", "_")]
  wresDT <- dcast(resDT, dir ~ spat, value.var = "isValid", fill = FALSE)
  
  # join back to table that has index column
  indexDT <- checkDT[, .(index, dir = dirPath, startIndex = startindexissi, stopIndex = stopindexissi)]
  setkeyv(wresDT, "dir")
  setkeyv(indexDT, "dir")
  
  return(indexDT[wresDT])
}
  

# ValidateData(istest = TRUE)
bfatemi/cortexapi documentation built on Oct. 5, 2019, 7:13 a.m.