R/adcon_parse_data.R

Defines functions parse.adcon.data read.adcon.table

read.adcon.table <- function(file_csv){
    x <- readLines(file_csv, skipNul = TRUE, warn = FALSE)
    missval <- "\\*"

    row0 <- data.frame(matrix(NA, 0, 1))
    if(length(x) == 0) return(row0)
    h <- utils::read.table(text = x[1], header = TRUE, sep = "\t",
                           colClasses = "character", na.strings = "",
                           stringsAsFactors = FALSE, quote = "\"")
    lenh <- length(names(h))
    x <- x[-1]
    x <- strsplit(x, "\t")
    len <- sapply(x, length)
    il <- len == lenh
    if(!any(il)) return(row0)

    x <- lapply(x[il], function(v) gsub(missval, "", v))
    x <- lapply(x, function(v) gsub("\\\"", "", v))
    x <- do.call(rbind, x)
    x[x == ""] <- NA
    x <- as.data.frame(x, stringsAsFactors = FALSE)
    names(x) <- names(h)

    return(x)
}

parse.adcon.data <- function(qres, awsID, varTable){
    tz <- Sys.getenv("TZ")
    temps <- paste(qres$Date, qres$Time)
    temps <- strptime(temps, "%Y/%m/%d %H:%M:%S", tz = tz)
    ina <- is.na(temps)
    qres <- qres[!ina, , drop = FALSE]

    if(nrow(qres) == 0) return(NULL)

    temps <- temps[!ina]
    tmp <- qres[, names(qres) %in% varTable$parameter_code, drop = FALSE]
    nom_col <- names(tmp)

    tmp <- lapply(seq_along(nom_col), function(l){
        x <- data.frame(time = temps, par = nom_col[l], data = trimws(tmp[, l]))
        x$data <- suppressWarnings(as.numeric(as.character(x$data)))
        x[!is.na(x$data), , drop = FALSE]
    })
    tmp <- do.call(rbind, tmp)

    if(nrow(tmp) == 0) return(NULL)

    ix <- match(tmp$par, varTable$parameter_code)
    var_nm <- c("var_height", "var_code", "stat_code", "min_val", "max_val")
    var_dat <- varTable[ix, var_nm, drop = FALSE]
    tmp <- cbind(tmp, var_dat)

    tmp$min_val[is.na(tmp$min_val)] <- -Inf
    tmp$max_val[is.na(tmp$max_val)] <- Inf

    tmp$limit_check <- NA
    tmp$network <- 1
    tmp$id <- awsID
    tmp$raw_value <- tmp$data

    ## limit check
    tmp$limit_check[tmp$data < tmp$min_val] <- 1
    tmp$limit_check[tmp$data > tmp$max_val] <- 2

    tmp <- tmp[, c("network", "id", "var_height", "time", "var_code",
                   "stat_code", "data", "raw_value", "limit_check")]

    fun_format <- list(as.integer, as.character, as.numeric, as.integer,
                       as.integer, as.integer, as.numeric, as.numeric, as.integer)

    tmp <- lapply(seq_along(fun_format), function(j) fun_format[[j]](tmp[[j]]))
    tmp <- as.data.frame(tmp)

    names(tmp) <- c("network", "id", "height", "obs_time", "var_code",
                    "stat_code", "value", "raw_value", "limit_check")

    tmp$obs_id <- getObsId(tmp)

    tmp <- tmp[!duplicated(tmp$obs_id), , drop = FALSE]
    tmp <- tmp[order(tmp$obs_time), , drop = FALSE]

    return(tmp)
}
rijaf-iri/gmetawsParser documentation built on April 15, 2022, 12:08 a.m.