R/mergeRDS.R

Defines functions mergeRDS

## go through all YEAR_PROJECT_SITE_alltags.rds files in the /SG/contrib/YEAR hierarchy,
## create a YEAR_PROJECT_SITE_alltags.sqlite file from each, then merge
## the latter into a single table database /SG/YEAR_alltags.sqlite

## This version accumulates a large SQL query to create the master database
## from the various files entirely within sqlite, as there is a mysterious
## intermittent bug in the RSQLite package when attempting to do the merge
## using that, in various ways.

## omit "2012" tags for now

mergeRDS = function(year) {
  library(RSQLite)
  library(lubridate)

  files = getAllSiteFiles(year, "_alltags.rds")

  ## drop files generated by running against 2012 tag database
  files = grep("201[23]", invert=TRUE, value=TRUE, files, perl=TRUE)

  ## read in tag deployment file for species code
  tagdeps = read.csv(sprintf("/SG/%d_tag_deployments.csv", year), as.is=TRUE)

  ## generate a uid, which uniquely identifies the physical tag
  ## (for now, we know that no project has more than one tag of the same ID and frequency)
  uid = sprintf("%s#%03d@%.3f", tagdeps$proj, tagdeps$tagID, tagdeps$tag.freq)
  
  ## spcodes is a list, whose names are the uid generated above
  ## each element of the list is a 
  dups = which(duplicated(uid))
  ii = 1
  suffixes = c('a', 'b', 'c', 'd', 'e', 'f', 'g')
  while(length(dups) > 0) {
    uid[dups] = sub("[a-z]?@", paste(suffixes[ii], '@', sep=""), uid[dups])
    dups = which(duplicated(uid))
    ii = ii + 1
  }
  rownames(tagdeps) = uid
  
  ## target column names

  colNames = c("ant", "ts", "fullID", "freq", "freqsd", "sig", "sigsd", "noise",  "runID", "posInRun", "slop", "burstSlop", "antFreq", "tsOrig",  "bootnum", "runLen", "id", "tagProj", "nomFreq", "lat", "lon",  "alt", "depYear", "proj", "site", "recv", "sp", "label", "gain", "dbm")
  
  cumRunID = 0
  mergeQuery = ""
  
  for (f in files) {
    siteSQLiteFile = sub(".rds$", ".sqlite", f)
    if (! file.exists(siteSQLiteFile) || diff(file.info(c(f, siteSQLiteFile))$mtime) < 0) {
        ## .rds file has changed more recently than .sqlite file, or the latter doesn't exist
        x = readRDS(f)

        if (is.null(nrow(x)))
            next
        
        ## force ID to be integer, with values as expected; there may be
        ## overlap among projects and frequencies, as this is just intended
        ## to be the manufacturer's ID code.
        
        if (is.factor(x$id))
            x$id = as.integer(as.character(x$id))
        
        ## make sure run IDs are unique across all sites
        x$runID = x$runID + cumRunID
        cumRunID = max(x$runID)

        x$id = x$id %% 1000

        ## generate nominal antenna frequency
        if (! "nomFreq" %in% names(x))
            x$nomFreq = ifelse(x$antFreq > 166.37, 166.380, 166.300)
                        
        ## add missing gain column
        if (! "gain" %in% names(x))
            x$gain = 0

        ## add missing dbm column (SG only)
        if (! "dbm" %in% names(x))
            x$dbm = sgPowerTodBm(x$sig, x$gain)

        ## add missing altitude column
        if (! "alt" %in% names(x))
            x$alt = NA
        
        ## convert bogus lat/lon to NA
        noLL = which(x$lat == 999)
        if (length(noLL) > 0) {
            x$lat[noLL] = NA
            x$lon[noLL] = NA
            x$alt[noLL] = NA
        }
        
        ## generate correct species code
        x$sp = tagdeps[sprintf("%s#%03d@%.3f", x$tagProj, x$id, x$nomFreq), "spcd"]

        ## generate clean tag label
        x$label = sprintf("%s %03d @ %.3f-%s", x$sp, x$id, x$nomFreq, x$tagProj)

        ## create / connect to database

        con = dbConnect(RSQLite::SQLite(), siteSQLiteFile)
        
        dbGetQuery(con, "drop table if exists tags")
        dbGetQuery(con, "CREATE TABLE tags \
( ant TEXT,
 ts REAL,
 fullID TEXT,
 freq REAL,
 freqsd REAL,
 sig REAL,
 sigsd REAL,
 noise REAL,
 runID REAL,
 posInRun INTEGER,
 slop REAL,
 burstSlop REAL,
 antFreq REAL,
 tsOrig REAL,
 bootnum INTEGER,
 runLen INTEGER,
 id REAL,
 tagProj TEXT,
 nomFreq REAL,
 lat REAL,
 lon REAL,
 alt REAL,
 depYear INTEGER,
 proj TEXT,
 site TEXT,
 recv TEXT,
 sp TEXT,
 label TEXT,
 gain REAL,
 dbm REAL
 )"
)

    ## write table with columns in correct order, since dbWriteTable makes
    ## no attempt to match column names between existing DB table and table
    ## being written

    dbWriteTable(con, "tags", x[, match(colNames, names(x))], row.names=FALSE, append=TRUE)
    dbDisconnect(con)
    cat (f, " ", "updated\n")
} else {
    cat (f, " ", "not changed\n")
}

    mergeQuery = c(mergeQuery, sprintf("attach database '%s' as site;\ninsert into tags select * from site.tags;\ndetach database site;\n", siteSQLiteFile))
  }


  mergeQuery = c( ## create new tags table in master db
"DROP TABLE IF EXISTS tags;  
CREATE TABLE tags 
( ant TEXT,
 ts REAL,
 fullID TEXT,
 freq REAL,
 freqsd REAL,
 sig REAL,
 sigsd REAL,
 noise REAL,
 runID REAL,
 posInRun INTEGER,
 slop REAL,
 burstSlop REAL,
 antFreq REAL,
 tsOrig REAL,
 bootnum INTEGER,
 runLen INTEGER,
 id REAL,
 tagProj TEXT,
 nomFreq REAL,
 lat REAL,
 lon REAL,
 alt REAL,
 depYear INTEGER,
 proj TEXT,
 site TEXT,
 recv TEXT,
 sp TEXT,
 label TEXT,
 gain REAL,
 dbm REAL
 );",

  mergeQuery,  ## append one merge query per site database

## create indexes
"create index tags_ts on tags ( ts );
create index tags_fullID on tags ( fullID );
create index tags_idproj on tags ( id, tagProj );
create index tags_tagProj on tags ( tagProj );
create index tags_id on tags ( id );
create index tags_site on tags ( site );
create index tags_proj on tags ( proj );
create index tags_sp on tags ( sp );
create index tags_recv on tags ( recv );
")

  mergeQueryFile = sprintf("/SG/%d_master_merge_query.sql", year)
  writeLines(mergeQuery, mergeQueryFile)
  masterDB = sprintf("/SG/new_%d_alltags.sqlite", year)
  system(sprintf("/usr/bin/sqlite3 %s < %s", masterDB, mergeQueryFile))
}


  
  
jbrzusto/sensorgnome-R-package documentation built on May 18, 2019, 9:19 p.m.