R/marfissci.batch.process.R

marfissci.batch.process <- function(folder=file.path(project.datadirectory("aegis", "data"),"marfissci","raw_data"),
                                    out.folder="marfissci",
                                    combine=T){
  #' The purpose of this batch process function is to facilitate the mass
  #' generation of data products from marfis data.  It assumes that data has
  #' been extracted via marfissci.get.data(), and the resultant csv file(s) are
  #' saved locally.
  #'
  #' When called, this batch function will allow the user to select what data
  #' (i.e. years, species, and/or gears) should be aggregated together.  It will
  #' then generate rds files and figures for all the data.
  start.time <- Sys.time()

  do.it = function(all.data){
    library(lubridate)
    all.data$YEAR_FISHED=year(all.data$DATE_FISHED)
    if (range(all.data$YEAR_FISHED)[1] == range(all.data$YEAR_FISHED)[2]) {
      years.file = range(all.data$YEAR_FISHED)[1]
    }else{
      years.file = paste(range(all.data$YEAR_FISHED),collapse = "_")
    }

    agg.by=c("SPECIES_CODE","GEAR_CODE")

    channel <- ROracle::dbConnect( DBI::dbDriver("Oracle"), dbname="PTRAN", username= oracle.personal.username, password = oracle.personal.password)
    for (a in 1:length(agg.by)){
      #get all of the unique values for the field we want to aggregate by
      combos = unique(all.data[agg.by[a]])
      if(agg.by[a] == "GEAR_CODE"){
        query = "SELECT GEAR_CODE, DESC_ENG FROM MARFISSCI.GEARS"
      }else{
        query = "SELECT SPECIES_CODE, DESC_ENG FROM MARFISSCI.SPECIES"
      }
      the.codes = ROracle::dbGetQuery(channel,query)
      combos=merge(combos,the.codes)

      for (i in 1:nrow(combos)){
        writeLines(paste0("Analysing: ", combos[i,2]))
        #print(paste0("working on ",all.data$DESC_ENG[all.data[agg.by]==combos[i,]]))
        this <- marfissci.process.data(all.data[which(all.data[agg.by[a]]==combos[i,1]),],
                                       agg.by =agg.by[a],
                                       save.RDS = T,
                                       save.CSV = T,
                                       save.SHP = T,
                                       agg.by.year =F,
                                       name.det=paste0(years.file,"_"),
                                       out.folder=out.folder,
                                       output="RDS")
        if (i==1) gearKeep<<-this
        if (!is.null(this)){
          writeLines("Generating a plot...")
          if(agg.by[a] == "GEAR_CODE") {
            colour.by = "CNT_RND_WEIGHT_KGS"
          }else{
            colour.by = "SUM_RND_WEIGHT_KGS"
          }
          marfissci.simple.map(this, agg.by = agg.by[a], colour.by = colour.by, save.plot = T, out.folder=out.folder,name.det=years.file, plot.title=paste0(combos[i,2]," ",  years.file))
        }else{
          writeLines(paste0("Insufficient data to plot a figure for ",combos[i,2]))
        }
      }

    }
  }
  ROracle::dbDisconnect(channel)


  if (combine){
  writeLines("Combining all of the csv files into a single one")
    all.data=do.call(rbind,lapply(file.path(folder,list.files(path=folder, pattern="\\.csv$")),
                                  read.csv, header=TRUE, sep=","))
    do.it(all.data)
  } else {
    file.names <- dir(folder, pattern ="\\.csv$")
    for(i in 1:length(file.names)){
      all.data <- read.csv(file.path(folder,file.names[i]),header=TRUE, sep=",")
      do.it(all.data)
    }
  }

  diff=difftime(Sys.time(),start.time, units = "secs")
  diff = format(.POSIXct(diff,tz="GMT"), "%H:%M:%S")
  writeLines(paste0(diff, " elapsed"))
  return(NULL)
}
jae0/stmdat documentation built on May 28, 2019, 11 p.m.