R/database.R

#' Decompose one or more OBPG filenames into the OBPG database
#'
#' @export
#' @param x character vector of filenames to decompose
#' @return a tibble of
#'  \itemize{
#'  \item{date Date}
#'  \item{id the item date identifier ala AYYYYjjj}
#'  \item{param charcater parameter name (chlor_a, sst, etc)}
#'  \item{per compositing period such as DAY, MO, 8D}
#'  \item{res numeric file resolution in km}
#'  \item{file basename of the input file less the extension}
#'  }
decompose_obpg <- function(
    x = file.path(obpg_path(), "gom", "DAY",
        c("chlor_a/A2016322.L3m_DAY_CHL_chlor_a_4km.grd",
        "poc/A2016145.L3m_DAY_POC_poc_4km.grd",
        "sst/A2016105.L3m_DAY_SST_sst_4km.grd",
        "poc/A2017118.L3m_DAY_POC_poc_4km.grd",
        "par/A2016144.L3m_DAY_PAR_par_4km.grd"))){

    f <- basename(x)
    ss <- strsplit(f, ".", fixed = TRUE)
    layer <- sapply(ss, '[[',1)
    #date <- as.POSIXct(paste(layer, '00:00:00'), format = 'A%Y%j %H:%M:%S',
    #    tz = 'UTC')
    date <- as.Date(layer, format = 'A%Y%j')
    pp <- strsplit(sapply(ss, '[[', 2), "_")
    len <- lengths(pp)
    period <- sapply(pp, '[[', 2)
    res <- as.numeric(gsub("km", "", sapply(pp, function(p) p[length(p)])))
    param <- sapply(1:length(pp),
        function(i){
            if (len[i] == 5) {
                r = pp[[i]][4]
            } else {
                r = paste(pp[[i]][4:(len[i]-1)], collapse = '_')
            }
            r
        })

    tibble::data_frame(date, id = layer, period , param, res,
        file = gsub(".grd", "", f, fixed = TRUE))
}

#' Given a database, build filenames
#'
#' @export
#' @param x a tibble database
#' @param path a path to raster files
#' @param ext character - the extension
#' @return character, filenames
compose_obpg <- function(x,
    path = ".",
    ext = '.grd'){

    file.path(path, x$param, paste0(x$file, ext[1]))
}



#' Decompose one or more full DAY filename into the OBPG database
#'
#' @export
#' @param x character vector of filenames to decompose
#' @return a tibble of
#'  \itemize{
#'  \item{date POSIXct UTC}
#'  \item{id the item date identifier ala AYYYYjjj}
#'  \item{param charcater parameter name (chlor_a, sst, etc)}
#'  \item{res numeric file resolution in km}
#'  \item{file basename of the input file less the extension}
#'  }
decompose_obpg_day <- function(
    x = file.path(obpg_path(), "gom", "DAY",
        c("chlor_a/A2016322.L3m_DAY_CHL_chlor_a_4km.grd",
        "poc/A2016145.L3m_DAY_POC_poc_4km.grd",
        "sst/A2016105.L3m_DAY_SST_sst_4km.grd",
        "poc/A2017118.L3m_DAY_POC_poc_4km.grd",
        "par/A2016144.L3m_DAY_PAR_par_4km.grd"))){

    f <- basename(x)
    ss <- strsplit(f, ".", fixed = TRUE)
    layer <- sapply(ss, '[[',1)
    #date <- as.POSIXct(paste(layer, '00:00:00'), format = 'A%Y%j %H:%M:%S',
    #    tz = 'UTC')
    date <- as.Date(layer, format = 'A%Y%j')
    pp <- strsplit(sapply(ss, '[[', 2), "_")
    len <- lengths(pp)
    res <- as.numeric(gsub("km", "", sapply(pp, function(p) p[length(p)])))
    param <- sapply(1:length(pp),
        function(i){
            if (len[i] == 5) {
                r = pp[[i]][4]
            } else {
                r = paste(pp[[i]][4:(len[i]-1)], collapse = '_')
            }
            r
        })

    tibble::data_frame(date, id = layer, param = param, res,
        file = gsub(".grd", "", f, fixed = TRUE))
}

#' Given a database, build filenames
#'
#' @export
#' @param x a tibble database
#' @param path a path to raster files
#' @param ext character - the extension
#' @return character, filenames
compose_obpg_day <- function(x,
    path = file.path(obpg_path(),'gom','DAY'),
    ext = '.grd'){

    file.path(path, x$param, paste0(x$file, ext[1]))
}

#' Given a directory, build a database
#'
#' @export
#' @param path a path to raster files
#' @param save_db logical, if TRUE then write the database
#' @param pattern regular expression of the file pattern to search for
#' @return tibble or NULL
build_database <- function(
    path = file.path(obpg_path(),'gom','DAY'),
    pattern = glob2rx("A*.grd"),
    save_db = FALSE){

    if (!dir.exists(path[1])) stop("path not found", path[1])
    ff <- list.files(path[1], pattern = pattern,
        recursive = TRUE, full.names = TRUE)

    #if (grepl("DAY", path, fixed = TRUE)){
    #    x <- decompose_obpg_day(x=ff)
    #}
    if (length(ff) > 0){
        x <- decompose_obpg(ff)
        if (save_db) x <- write_database(x, path)
    } else {
        warning("no files found to buid database")
        x <- NULL
    }

    x
}

#' Given a database with one or more parameters retrieve a listing of missing dates in the
#' sequence form first date to last date.
#'
#' @export
#' @param x a tibble database
#' @param last_date either NULL or a Date object to use as the last date of the
#'    date range to examine. If NULL then the last date in the database is used.
#'    By default we use 'yesterday'
#' @return named list of date vectors of mising days (possibly empty)
db_missing_days <- function(x = read_database(), last_date = Sys.Date()-1){

  xx = split(x, x$param)

  lapply(xx,
      function(x){
        d = as.Date(x$date)
        r = range(d)
        if (!is.null(last_date)) r[2] <- last_date[1]
        dd = seq(from = r[1], to = r[2], by = 'day')
        dd[!(dd %in% d)]
      })
}


#' Read the GOM database in the interval directory specified
#'
#' @export
#' @param interval character, currently on DAY and all others are ignored
#' @return a tibble
read_database_gom <- function(interval = 'DAY'){
    read_database(path = file.path(obpg_path(),'gom',interval[1]))
}


#' Read a file-list database
#'
#' @export
#' @param path character the directory with the database
#' @return a tibble
read_database <- function(
    path = file.path(obpg_path(),'gom','DAY')){
    if (!dir.exists(path[1])) stop("path not found:", path[1])
    filename <- file.path(path, "database")
    stopifnot(file.exists(filename))
    #readr::read_csv(filename)
    feather::read_feather(filename)
}

#' Write the file-list database
#'
#' @export
#' @param x the tibble or data.frame database
#' @param path character the directory to where the database should reside
#' @return a tibble
write_database <- function(x, path){
    if (!dir.exists(path[1])) stop("path not found:", path[1])
    filename <- file.path(path, "database")
    #readr::write_csv(x,filename)
    feather::write_feather(x, filename)
}
btupper/obpgtools documentation built on May 13, 2019, 8:42 a.m.