R/dB_readZRX.R

# AUTHOR: Johannes Brenner, Institute for Alpine Environments
# DATE.VERSION: 19.08.2014 V1.1

# PURPOSE
# read ZRX data file (province Bozen, WISKI, batch download)
# general quality check | min - max variable dependent
# & hourly aggregation (mean - sum)

# required libraries: zoo, chron

# ARGUMENTS
# file        ZRX file name (absolute path)
# do.hourly   logical, if TRUE data gets hourly aggregated
# do.quality  logical, if FALSE general quality check is performed (min - max)

dB_readZRX <- function(file, do.hourly=FALSE, do.quality=FALSE, chron=TRUE, multivar=FALSE)
{
  # load zoo library
  # require("zoo")
  
  # open connection
  dummy    <- readLines(con=file, n = -1)
  # get begining of meta data (file contains data for several stations)
  start_st <- c(grep(substr(dummy[1],1,22),dummy),length(dummy)+1)
  
  # dummies for data and metadata
  data_list <- list()
  meta_mat  <- c()
  
  # loop over start_st
  for (i in 1:(length(start_st)-1))
  {
    # get data for specific station
    data <- encodeString(dummy[ start_st[i]:(start_st[i+1]-1) ])
 
    #-----
    # get METADATA
    header <- data[ c(grep("#", data)) ]
    
    # get station info (name, id)
    st_name <- strsplit( x = header[ grep("#SNAME",header) ], split = ";")[[1]][1]
    st_name <- substr(st_name, 7, nchar(st_name))
    
    st_id <- strsplit( x = header[ grep("#SNAME",header) ], split = ";")[[1]][3]
    st_id <- substr(st_id, 5, nchar(st_id))
    
    rexchange <- strsplit( x = header[ grep("#REXCHANGE",header) ], split = ";")[[1]][1]
    #st_id <- substr(rexchange, 11, 14)
    #rexchange <- substr(rexchange, 11, nchar(rexchange))
    
    # get variable info
    #var_name <- substr(rexchange, 15, nchar(rexchange))
    
    var_name <- strsplit( x = header[ grep("#CNAME",header) ], split = ";")[[1]][1]
    var_name <- substr(var_name, 7, nchar(var_name))
    
    var_time <- strsplit( x = header[ grep("#TSNAME",header) ], split = ";")[[1]][1]
    var_time <- substr(var_time, 9, nchar(var_time))
    # Standardize name if var_time is a daily avg.
    if (length(grep("tagmittel", var_time, ignore.case = T))==1) var_time <- "TagMittel"
    
    # get time step in minutes
    if (length( grep("5", var_time ) )==1) time_scale <- 5
    if (length( grep("10", var_time) )==1) time_scale <- 10
    if (length( grep("30", var_time) )==1) time_scale <- 30
    if (length( grep("60", var_time) )==1) time_scale <- 60
    if (length( grep("TAG", var_time, ignore.case = T) )==1) time_scale <- 60*24
    # if (length( grep("5A", var_name ) )==1 | length( grep("5M", var_name ) )==1) time_scale <- 5
    # if (length( grep("10A", var_name) )==1) time_scale <- 10
    # if (length( grep("30A", var_name) )==1) time_scale <- 30
    # if (length( grep("60A", var_name) )==1) time_scale <- 60
    # if (length( grep("TAG", var_name) )==1 | length( grep("1440A", var_name) )==1) time_scale <- 60*24
    
    # meta data vector
    meta_mat <- rbind(meta_mat, c(st_id=st_id, st_name=st_name, var_name=var_name, 
                                  time_agg=as.character(time_scale),
                                  var_time=var_time) )
    #-----
    # get DATA
    
    # ommit metadata
    data_ts <- data[ -c(grep("#", data)) ]
    # split data character vector in colums
    data_ts <- strsplit(x = data_ts, split = " ")
    data_ts <- t(sapply(X = data_ts, FUN = function(x) x[c(1,2)]))
    
    # daily data
    if (time_scale==60*24) {
      
      # create date vector
      year <- substr(data_ts[,1],1,4); month <- substr(data_ts[,1],5,6); day <- substr(data_ts[,1],7,8)
      date_chr <- paste(year, "-", month, "-", day, sep="")
      date <- as.Date(x = date_chr, format = "%Y-%m-%d") 
      
      # create zoo object
      data_zoo <- zoo( x = as.numeric(data_ts[,2])[!is.na(date) & c(diff(date)!=0,T)], 
                       order.by = date[!is.na(date) & c(diff(date)!=0,T)] )
      
      # create dummy regular zoo object
      data_zooreg <- zooreg(data = NA, order.by = seq( from = time(data_zoo)[1],to = tail(x = time(data_zoo),n = 1), 
                                                       by = 1) )
      # combine both zoo objects 
      data_zooreg <- zoo(cbind(data_zoo, data_zooreg)$data_zoo)
      
    # no daily data  
    } else {
      # create datetime vector
      year <- substr(data_ts[,1],1,4); month <- substr(data_ts[,1],5,6); day <- substr(data_ts[,1],7,8)
      hour <- substr(data_ts[,1],9,10); min  <- substr(data_ts[,1],11,12); sec <- substr(data_ts[,1],13,14)
      
      if (chron) {
        date_chr <- paste(year, "-", month, "-", day, sep="")
        time_chr <- paste(hour, ":", min, ":", sec, sep="")
        date <- chron(dates. = date_chr, times. = time_chr, 
                      format = c(date="y-m-d", time="h:m:s"))
      } else {
        datetime <- paste(year, "-", month, "-", day, " ", hour, ":", min, ":", sec, sep="")
        date <- as.POSIXct( strptime(x = datetime, format = "%Y-%m-%d %H:%M:%S") )
      }
    
      # create zoo object
      data_zoo <- zoo(x = as.numeric(data_ts[,2][!is.na(date)]), order.by = date[!is.na(date)])
      
      # create dummy regular zoo object
      if (chron){
        if (time_scale==60) {
          data_zooreg <- zooreg(data = NA, order.by = seq( from = time(data_zoo)[1],to = tail(x = time(data_zoo),n = 1), 
                                                           by = times("01:00:00") ) )
                                                           
        } else {
          data_zooreg <- zooreg(data = NA, order.by = seq( from = time(data_zoo)[1],to = tail(x = time(data_zoo),n = 1), 
                                                           by = times( paste("00:",time_scale,":00",sep="") ) ) ) 
        }
      } else {
        data_zooreg <- zooreg(data = NA, order.by = seq( from = time(data_zoo)[1],to = tail(x = time(data_zoo),n = 1), 
                                                         by = time_scale*60) )
      }
      
      # combine both zoo objects
      data_zooreg <- zoo(cbind(data_zoo, data_zooreg)$data_zoo)
    }
    
    #-----
    # possible simple quality check
    if (do.quality)
    {
      # data quality check
      # relative air humidiy LF
      if (grepl("LF", var_name)) {
        data_zooreg <- ifelse(data_zooreg > 100, 100, data_zooreg)
        data_zooreg <- ifelse(data_zooreg < 0, 0, data_zooreg)
      }
      # air temperature LT
      if (grepl("LT", var_name)) {
        data_zooreg <- ifelse(data_zooreg > 50, NA, data_zooreg)
        data_zooreg <- ifelse(data_zooreg < -50, NA, data_zooreg)
      }
      # air temperature TD
      # could be around -70 in some extreme cases of really dry air (but maybe not in Bolzano Area)
      if (grepl("TD", var_name)) {
        data_zooreg <- ifelse(data_zooreg > 50, NA, data_zooreg)
        data_zooreg <- ifelse(data_zooreg < -50, NA, data_zooreg)
      }
      # for WG & WD quality check lock at 
        # Jimenez et al. (2010) - random, systematic, rough errors
        #                       - Manipulation errors - Limits consistency (0-30m/s, 0-360°)
        #                       - Temporal consistency (ABNORMALLY LOW VARIATIONS & ABNORMALLY HIGH VARIATIONS)
        # Chvez-Arroyo and Probst (2013)
      
      # wind velocity WG
      if (grepl("WG", var_name)) {
        data_zooreg <- ifelse(data_zooreg > 40, NA, data_zooreg)
        data_zooreg <- ifelse(data_zooreg < 0, NA, data_zooreg)
    
      }
      # wind direction WD
      if (grepl("WD", var_name)) {
        data_zooreg <- ifelse(data_zooreg > 360, NA, data_zooreg)
        data_zooreg <- ifelse(data_zooreg < 0, NA, data_zooreg)
      }
      # Precipitation NN
      if ( (grepl("N", var_name) && nchar(var_name)==1) ) { # grepl('NN',var_name)
        data_zooreg <- ifelse(data_zooreg < 0, NA, data_zooreg)
      }
    }
   
    #-----
    # possible hourly aggregation
    if (do.hourly)
    {
      if (chron) {
        #hour <- dB_trunc.minutes(x = time(data_zooreg), n.minutes = 60)
        hour <- floor(as.numeric(time(data_zooreg))*24)
      } else {
        hour <- as.POSIXct( strptime(format(time(data_zooreg), "%Y-%m-%d %H"), format= "%Y-%m-%d %H") )
      }
      if (time_scale<60)
      {
        # hourly aggregation for precipitation (sum)
        if ( (grepl("N", var_name) && nchar(var_name)==1) ) # grepl('NN',var_name)
        {
          data_zooreg <- aggregate(x = data_zooreg, by = hour, FUN = function (x) {if (any(is.na(x))) { 
          y <- NA } else { y <- sum(x) } 
          return(y)
          } )
        } else {
        # hourly aggregation for other variables (mean)
          data_zooreg <- aggregate(x = data_zooreg, by=hour, FUN=mean, na.rm=TRUE)
        }
        if (chron) data_zooreg <- zoo(coredata(data_zooreg),  chron(time(data_zooreg)/24))
      }
    }
   
    # save data in output list
    if (multivar) {
      data_list[[paste("st",st_id,"_",var_name,"_",var_time,sep="")]] <- data_zooreg
    } else {
      data_list[[paste("st",st_id,sep="")]] <- data_zooreg
    }

  }
  
  #return function output
  return(list(data=data_list, meta=meta_mat))
}
JBrenn/DataBaseAlpEnvEURAC documentation built on May 7, 2019, 6:48 a.m.