R/LWSlib.R

Defines functions LWquery LWhourlyquery LWcurvequery read.WF create.df wf.station.plot wf.plot stackr2c write.frame write_met_r2c get_plot_extents

Documented in create.df get_plot_extents LWcurvequery LWhourlyquery LWquery read.WF stackr2c wf.plot wf.station.plot write.frame write_met_r2c

# devtools::use_package("zoo")
# devtools::use_package("xts")
# devtools::use_package("RODBC")
# devtools::use_package("ggplot2")
# devtools::use_package("stringr")
# devtools::use_package("reshape2")
# devtools::use_package("raster")
# devtools::use_package("lubridate")
# devtools::use_package("scales")

#' Function to query all of the historical data in the database
#'
#' @param StationNumber Internal LWCB station number
#' @param Range Possible Range formats: 1) Range <- "2015-01-01/" 2) Range <- "/2005-05-01" 3) Range <- "2005-05-01/2009-05-01"  4) Range <- ""
#' @param DBSource Full path to local database
#' @param useSQL Is the database sql? (usually it is mdb)
#'
#' @return an xts object
#' @export


LWquery <-
  function(StationNumber = 75,
           Range = "2018-10-01/2019-05-01",
           DBSource = "Z:/LWS_Db/lwcb.mdb",
           useSQL = FALSE) {


    rangestring <- unlist(strsplit(Range, "/"))
    rangestring[rangestring == ""] <- "1900-01-01"

    # get start dates and end dates from Range
    if (Range == "") {
      start_date <- as.Date("1900-01-01")
      end_date <- Sys.Date()
    } else{
      if (length(rangestring) == 1) {
        start_date <- as.Date(rangestring)
        end_date <- Sys.Date()
      } else{
        start_date <-
          as.Date(rangestring[1])
        end_date <- as.Date(rangestring[2])
      }
    }


    suppressWarnings(rangestring <- as.Date(min(rangestring)))
    timediff <- Sys.Date() - rangestring

    #create empty time object for later
    dates <- seq(from = start_date, to = end_date, by = 1)
    empty <- zoo::zoo(, dates)
    emptyNA <- zoo::zoo(NA, dates)

    #Connect to Database
    if (useSQL == TRUE) {
      drv <- RODBC::dbDriver("SQLite")
      LWDB <- RODBC::dbConnect(drv, DBSource)

      #Query the 3 separate databases
      if (timediff > 730 || is.na(timediff)) {
        DataHis <-
          RODBC::dbGetQuery(
            LWDB,
            paste(
              "select Value, Date from DailyDataFullRec where StationNumber =",
              StationNumber
            )
          )
      }
      if (timediff > 59 || is.na(timediff)) {
        Data2year <-
          RODBC::dbGetQuery(
            LWDB,
            paste(
              "select Value, Date from DailyData2Years where StationNumber =",
              StationNumber
            )
          )
      }
      Data60Day <-
        RODBC::dbGetQuery(
          LWDB,
          paste(
            "select Value, Date from DailyData60Days where StationNumber =",
            StationNumber
          )
        )

      dbDisconnect(LWDB)
    } else{
      #Connect to Database
      LWDB <- RODBC::odbcConnectAccess(DBSource)

      #Query the 3 separate databases
      if (timediff > 730 || is.na(timediff)) {
        DataHis <-
          RODBC::sqlQuery(
            LWDB,
            paste(
              "select Value, Date from DailyDataFullRec where StationNumber =",
              StationNumber,
              "AND Date > ",
              format(start_date, "#%m/%d/%Y#"),
              "AND Date < ",
              format(end_date, "#%m/%d/%Y#")
            )
          )
      }
      if (timediff > 59 || is.na(timediff)) {
        Data2year <-
          RODBC::sqlQuery(
            LWDB,
            paste(
              "select Value, Date from DailyData2Years where StationNumber =",
              StationNumber,
              "AND Date > ",
              format(start_date, "#%m/%d/%Y#"),
              "AND Date < ",
              format(end_date, "#%m/%d/%Y#")
            )
          )
      }
      Data60Day <-
        RODBC::sqlQuery(
          LWDB,
          paste(
            "select Value, Date from DailyData60Days where StationNumber =",
            StationNumber,
            "AND Date > ",
            format(start_date, "#%m/%d/%Y#"),
            "AND Date < ",
            format(end_date, "#%m/%d/%Y#")
          )
        )

      RODBC::odbcClose(LWDB)

    }

    #Bind together
    if (exists("DataHis")) {
      DataAll <- rbind(DataHis, Data2year, Data60Day)
    } else if (exists("Data2year")) {
      DataAll <- rbind(Data2year, Data60Day)
    } else{
      DataAll <- Data60Day
    }

    #if the station is (-1), it means the user knows it doesn't exist in
    #the LWCB database and is meant to be a placeholder, create a dummy date variable
    if (StationNumber >= (9000)) {
      Dummy <- seq(as.Date("1986/05/01"), as.Date("1987/05/01"), by = 1)
      DataAll.ts <- xts::xts(rep(NA, length(Dummy)), Dummy)
    } else{
      #Create timeseries of actual data,if there is no data in the database, create empty dataset
      if (nrow(DataAll) > 0) {
        DataAll.ts <- xts::xts(DataAll$Value, as.Date(DataAll$Date))
        DataAll.ts <- DataAll.ts[Range]
      } else{
        DataAll.ts <- emptyNA
      }
    }
    #check if the Range filtering excluded all possible non-NA values
    if (length(DataAll.ts) == 0) {
      DataAll.ts <- emptyNA
    }

    #Pad timeseries with NAs
    filled.DataAll.ts <- merge(DataAll.ts, empty, all = TRUE)

    return(filled.DataAll.ts)
  }



#' Function to query all of the hourly data in the database (typically the last 2 years)
#'
#' @param StationNumber Internal LWCB station number
#' @param Range Possible Range formats: 1) Range <- "2015-01-01/" 2) Range <- "/2005-05-01" 3) Range <- "2005-05-01/2009-05-01"  4) Range <- ""
#' @param DBSource Full path to local database

#'
#' @return dfd

LWhourlyquery <-
  function(StationNumber = 155,
           Range = "2016-03-12/2016-03-13",
           DBSource = "Z:/LWS_Db/lwcb.mdb") {
    #Function to query all of the historical data in the database
    #User inputs the station number
    #Possible Range formats: 1) "2005-05-01/"
    #                        2) "/2005-05-01"
    #                        3) "2005-05-01/2009-05-01"
    #                        4) ""


    rangestring <- unlist(strsplit(Range, "/"))
    rangestring[rangestring == ""] <- "1900-01-01"
    suppressWarnings(rangestring <- as.Date(min(rangestring)))




    #Connect to Database
    LWDB <- RODBC::odbcConnectAccess(DBSource)

    #Query the database
    Data <-
      RODBC::sqlQuery(
        LWDB,
        paste(
          "SELECT HourlyData.Date",
          paste0(", HourlyData.[",seq(0,23, by = 1),":00] ", collapse = ""),
          "FROM HourlyData
          WHERE (((HourlyData.StationNumber)=",
          StationNumber,
          "));"
          )
          )
    names(Data) <- c("Date",stringr::str_pad(seq(0,23,by=1), 2, pad = "0"))

    #convert data frame in 'wide' format to 'long' format, then to a time series
    meltdata <- melt(Data, id.vars = c("Date"))

    meltdata$datetime <- strptime(paste(meltdata$Date, meltdata$variable),format = "%Y-%m-%d %H")
    meltdata <- meltdata[!is.na(meltdata$datetime),]
    data.ts <-
      xts(meltdata$value, order.by = meltdata$datetime)



    #Subset if required
    if (Range != "") {
      data.ts <- data.ts[Range]
    }

    RODBC::odbcClose(LWDB)

    return(data.ts)
  }

#' Queries the database CurveData table.
#'
#' @param CurveNumber This is the curvenumber found within the curvemaster table. The curvemaster table
#' lists the curves as sorted by date, the user must determine which specific curve is required
#' @param DBSource location of the lwcb database
#'
#' @return dataframe of the requested data
#' @export


LWcurvequery <- function(CurveNumber = 19,
                         DBSource = "Z:/LWS_Db/lwcb.mdb"){
  #Connect to Database
  LWDB <- RODBC::odbcConnectAccess(DBSource)


    CurveData <-
      RODBC::sqlQuery(
        LWDB,
        paste(
          "SELECT CurveDataHeaderID, elevation, discharge
          FROM CurveData
          WHERE CurveDataHeaderID =",
          CurveNumber)
      )

    return(CurveData)

    RODBC::odbcClose(LWDB)
}




#' Function to read an observed and modelled time series from a WATFLOOD csv (spl or resin)
#'
#' @param FileLocation path to the resin or csv file
#' @param Range Optional, if not included, will plot the whole timeseries
#' @param PlotFolder Path to where you would like the plots stored
#'
#' @return a list, the first item in the list is a vector of all the station names
#' the second item is a list of dataframes, with each df corresponding to the station names vector
#' each df has Date, Observed and Modelled columns
#' @export


read.WF <- function(FileLocation) {
  #load the result file
  dat <- read.csv(FileLocation)


  #first define the time series
  #get the date from the top left corner
  Start.Date <- as.Date(names(dat)[1], format = "X%Y..%m..%d")
  if(is.na(Start.Date)){
    Start.Date <- as.Date(names(dat)[1], format = "X%Y..%m.%d")
  }
  if(is.na(Start.Date)){
      Start.Date <- as.Date(names(dat)[1], format = "X%Y.%m..%d")
  }
  if(is.na(Start.Date)){
    Start.Date <- as.Date(names(dat)[1], format = "X%Y.%m.%d")
  }

  Date.Series <- Start.Date + seq(from = 0,
                                  to = nrow(dat) - 1,
                                  by = 1)

  #get observed and simulated columns
  header <- names(dat)[-1]
  obs.cols <-
    seq(1, length(header), 2) + 1 #the +1 acounts for the date column
  sim.cols <-
    seq(2, length(header), 2) + 1 #the +1 acounts for the date column

  #get the Station Names
  stn.names <- names(dat)[obs.cols]
  stn.names <- stringr::str_remove_all(stn.names, "\\.+_obs")
  stn.names <- stringr::str_remove_all(stn.names, "_+obs")
  stn.names <- stringr::str_replace_all(stn.names, "\\.", "_")


  #create a list of dataframes, one dataframe per station
  #each dataframe has a date, observed and simulated column
  wf.dfs <-
    lapply(
      1:length(stn.names),
      create.df,
      Dates = Date.Series,
      wf.dat = dat,
      obs.cols = obs.cols,
      sim.cols = sim.cols
    )

  #return data
  output <- list(stn.names, wf.dfs)
  return(output)

}


#' Function create a dataframe, used in read.WF()
#'
#' @param i an iterator
#' @param Dates a vector of Dates
#' @param wf.dat the original data from the WATFLOOD output file
#'
#' @return a dataframe

create.df <- function(i, Dates, wf.dat, obs.cols, sim.cols) {
  output.df <- data.frame(Dates,
                          Obs = wf.dat[obs.cols[i]],
                          Sim = wf.dat[sim.cols[i]])
  names(output.df) <- c("Dates", "Observed", "Simulated")

  #do some error checking
  output.df$Observed[output.df$Observed == (-999)] <- NA

  return(output.df)
}




#' Function to plot an observed and modelled time series from a WATFLOOD csv (spl or resin)
#'
#' @param stn.df a dataframe created from create.df(), contains a column for Date, Observed and Simulated
#' @param stn.name a string depicting the name of the station
#' @param date.range Optional, will subset the range
#' @param secondstation LWS station to add for comparison purposes (ex. upstream dam releases)
#'
#' @return a list, the first item in the list
#' @import ggplot2
#' @export




wf.station.plot <-
  function(stn.df,
           stn.name,
           date.start = NULL,
           date.end = NULL,
           exportfolder = getwd(),
           avg = 1,
           secondstation = NA) {

    #take a rolling average if called for
    stn.ts <- xts::xts(stn.df[, -1], order.by = stn.df$Dates)
    stn.ts <- zoo::rollmean(stn.ts, avg)

    if(!is.na(secondstation)){
      #append another stations data if called for, this is for comparison purposes
      secondstn.ts <- LWquery(secondstation,
                              Range = paste0(min(stn.df$Dates),"/",max(stn.df$Dates)))
      stn.ts <- merge(stn.ts,secondstn.ts)
      stn.df <- data.frame(
        Dates = zoo::index(stn.ts),
        Observed = stn.ts$Observed,
        Simulated = stn.ts$Simulated,
        Gauge = stn.ts[,3]
      )
      names(stn.df) <- c("Dates","Observed","Simulated","Secondary_Gauge")

    }else{
      stn.df <- data.frame(
        Dates = zoo::index(stn.ts),
        Observed = stn.ts$Observed,
        Simulated = stn.ts$Simulated
      )
    }



    #subset if called for
    if (!is.null(date.start)) {
      stn.df <- stn.df[stn.df$Dates >= date.start, ]
    }

    if (!is.null(date.end)) {
      stn.df <- stn.df[stn.df$Dates <= date.end, ]

    }

    #convert to long format
    dat.long <- reshape2::melt(stn.df, id = "Dates")


    p <-
      ggplot(data = dat.long, ggplot2::aes(x = Dates, y = value, colour = variable)) +
      geom_line() +
      scale_colour_manual(values = c("black", "red", "blue"), name = "") +
      theme_bw() +
      ggtitle(stn.name) +
      scale_y_continuous(expression(paste("Flow (", m ^ 3, "/s)", sep =
                                            ""))) +
      scale_x_date("Date")


    ggsave(
        file.path(getwd(), paste0("hyd_", stn.name, ".png")),
        width = 10,
        height = 5,
        dpi = 300,
        units = "in"
    )


    return(p)

  }




#' Function to plot an observed and modelled time series from a WATFLOOD csv (spl or resin)
#'
#' @param wf.list path to the resin or csv file
#' @param plotfolder Optional, if not included only return ggplot objects within R workspace
#' @param date.range Optional, will subset the range
#' @param stns A vector of numbers. Optional, will only plot selected stations
#' @param secondstation A vector of numbers corresponding to the plots. function will query and plot that stations number for comparison
#'
#' @return a list, the first item in the list
#' @export

wf.plot <- function(wf.list,
                    plotfolder = NULL,
                    date.start = as.Date("1800-01-01"),
                    date.end = as.Date("2100-01-01"),
                    stns = NULL,
                    secondstation = NA,
                    avg = 1) {
  #subset to user specified stations
  if (!is.null(stns)) {
    stn.names <- wf.list[[1]][[stns]]
    wf.df <- wf.list[[2]][[stns]]
  } else{
    stn.names <- wf.list[[1]]
    wf.df <- wf.list[[2]]
  }

  #loop through each station and plot
  if (!is.null(plotfolder)) {
    wf.plots <- mapply(wf.station.plot,
                       stn.df = wf.df,
                       stn.name = stn.names,
                       date.start = date.start,
                       date.end = date.end,
                       plotfolder = plotfolder,
                       avg = avg,
                       secondstation = secondstation,
                       SIMPLIFY = FALSE)
  } else{
    wf.plots <- mapply(wf.station.plot,
                       stn.df = wf.df,
                       stn.name = stn.names,
                       date.start = date.start,
                       date.end = date.end,
                       avg = avg,
                       secondstation = secondstation,
                       SIMPLIFY = FALSE)

  }

  return(wf.plots)
}




#' Function to import a WATFLOOD r2c file and convert to a raster stack for processing in R
#'
#' @param r2cfile.location the location of an r2c file
#'
#' @return raster stack of file
#' @export
#' @import raster

stackr2c <- function(r2cfile.location) {
  #Read the file into text
  r2cfile <- readLines(r2cfile.location)

  #get header data
  lines.count <- length(r2cfile)
  end.header <-
    grep(pattern = "EndHeader", ignore.case = T, r2cfile)
  header.lines <- r2cfile[1:end.header]
  frame.lines <- r2cfile[end.header + 1:lines.count]

  #remove r2c file to free up memory
  rm("r2cfile")


  #get extents
  xorigin <-
    grep(pattern = ":xOrigin", ignore.case = T, header.lines)
  xmn <- as.numeric(strsplit(header.lines[xorigin], " +")[[1]][2])

  xcount <- grep(pattern = ":xCount", ignore.case = T, header.lines)
  xcount <- as.numeric(strsplit(header.lines[xcount], " +")[[1]][2])

  xdelta <- grep(pattern = ":xDelta", ignore.case = T, header.lines)
  xdelta <- as.numeric(strsplit(header.lines[xdelta], " +")[[1]][2])

  xmx <- xmn + xcount * xdelta


  yorigin <-
    grep(pattern = ":yOrigin", ignore.case = T, header.lines)
  ymn <- as.numeric(strsplit(header.lines[yorigin], " +")[[1]][2])

  ycount <- grep(pattern = ":yCount", ignore.case = T, header.lines)
  ycount <- as.numeric(strsplit(header.lines[ycount], " +")[[1]][2])

  ydelta <- grep(pattern = ":yDelta", ignore.case = T, header.lines)
  ydelta <- as.numeric(strsplit(header.lines[ydelta], " +")[[1]][2])

  ymx <- ymn + ycount * ydelta



  #Get Attribute information
  AttributeName <- grep(pattern = "AttributeName", header.lines)
  AttributeTable <-
    as.data.frame(do.call(rbind, strsplit(header.lines[AttributeName], " +")), stringsAsFactors = FALSE)
  AttributeTable[, 2] <- as.numeric(AttributeTable[, 2])


  #Find whether a multi or single frame r2c
  frame.start <- grep(pattern = ":Frame", frame.lines)
  if (length(frame.start) == 0) {
    multiframe <- FALSE
  } else{
    multiframe <- TRUE
  }


  if (multiframe) {
    #find lines where new frames start and end
    frame.start <- grep(pattern = ":Frame", frame.lines)
    frame.end <- grep(pattern = ":EndFrame", frame.lines)
    frame.length <- frame.end[1] - frame.start[1] - 1

    #transfer r2c data into a multi-frame raster object
    rr <-
      do.call("stack", lapply(c(1:length(frame.start)), function(i) {
        #get frame header
        FrameData <- frame.lines[frame.start[i]]

        #get frame data and convert to new matrix
        jam <- frame.lines[(frame.start[i] + 1):(frame.end[i] - 1)]

        #remove leading whitespace
        jam <- trimws(jam, which = "both")
        #split the strings by whitespace (JMB: I found " +" to work for this case, may need to be changed as required

        tmpframe <-
          matrix(as.numeric(unlist(strsplit(jam, " +"))), frame.length, byrow = T)

        #Convert to a raster
        r <-
          raster(
            nrow = nrow(tmpframe),
            ncol = ncol(tmpframe),
            xmn = xmn,
            xmx = xmx,
            ymn = ymn,
            ymx = ymx
          )
        names(r) <- unlist(strsplit(FrameData, "\""))[2]
        r[] <- tmpframe

        return(r)
      }))
  }


  if (!multiframe) {
    rr <-
      do.call("stack", lapply(c(1:nrow(AttributeTable)), function(i) {
        #subset to appropriate lines
        if (i == 1) {
          start.line <- 1
        } else{
          start.line <- 1 + ycount * (i - 1)
        }
        end.line <- start.line + ycount - 1
        jam <- frame.lines[start.line:end.line]

        #remove leading whitespace if it is present
        jam <- trimws(jam, which = "left")

        #convert to matrix
        tmpframe <-
          matrix(as.numeric(unlist(strsplit(jam, " +"))), ycount, byrow = T) #(convert to mm)
        tmpframe[1, 1] <-
          0 #WATFLOOD has a '1' on the corner of each raster, remove this

        #convert to raster
        r <-
          raster(
            nrow = nrow(tmpframe),
            ncol = ncol(tmpframe),
            xmn = xmn,
            xmx = xmx,
            ymn = ymn,
            ymx = ymx
          )
        names(r) <- AttributeTable[i, 3]
        r[] <- tmpframe

        return(r)
      }))


  }

  rr <- flip(rr, 'y')
  return(list(
    rasterbrick = rr,
    header = header.lines,
    Attributes = AttributeTable
  ))
}




#' Title
#'
#' @param file.name jam
#' @param timestamp jham
#' @param raster jam
#' @param frame.counter jam
#' @param dec.format jam
#'
#' @return jam
#' @export
#'

write.frame <-
  function(file.name,
           timestamp,
           raster,
           frame.counter,
           dec.format = "%.2f") {
    #first flip the raster along the y-axis because GreenKenue/WATFLOOD requires this
    raster <- flip(raster, 'y')

    frameheader <-
      paste0(
        ":Frame  ",
        frame.counter,
        "   ",
        frame.counter,
        "   \"",
        format(timestamp, "%Y/%m/%d %H:%M"),
        "\""
      )
    framedata <-
      apply(as.matrix(raster), 2, function(x)
        sprintf(dec.format, x, quote = F))
    frameender <- ":EndFrame"


    #write frame header
    write(frameheader, file = file.name, append = T)

    #write table to the file, append=T,row.names=F,col.names=F,sep="\t" (tab deliminated)
    write.table(
      framedata,
      file.name,
      append = TRUE,
      row.names = FALSE,
      col.names = FALSE,
      sep = " ",
      quote = F
    )

    #write frame ender
    write(frameender, file = file.name, append = T)

    frame.counter <- frame.counter + 1

    return(frame.counter)
  }





#' Title
#'
#' @param r2cbrick dfd
#' @param FileName dfd
#'
#' @return dfd
#' @export
#'

write_met_r2c <- function(r2cbrick, FileName = "default.r2c") {
  #Write header to the file
  writeLines(r2cbrick$header, con = FileName)


  frame.counter <- 1
  for (i in 1:nlayers(r2cbrick$rasterbrick)) {
    current.raster <- r2cbrick$rasterbrick[[i]]
    maxprecip <- max(round(values(current.raster), 2))


    #if there was no rain and this isn't the first frame, don't write to file
    if (maxprecip == 0 & i != 1) {
      next
    }

    #if any cell has less than 1 mm in it, just write that frame and skip to the next frame
    if (maxprecip <= 1) {
      current.ts <-
        as.POSIXct(strptime(names(current.raster), "X%Y.%m.%d.%H.%M"), tz = "GMT")
      frame.counter <-
        write.frame(FileName, current.ts, current.raster, frame.counter)

      next

    }

    if (maxprecip > 1) {
      next.raster <- r2cbrick$rasterbrick[[i + 1]]
      current.ts <-
        as.POSIXct(strptime(names(current.raster), "X%Y.%m.%d.%H.%M"), tz = "GMT")
      next.ts <-
        as.POSIXct(strptime(names(next.raster), "X%Y.%m.%d.%H.%M"), tz = "GMT")

      timediff <- as.numeric(next.ts - current.ts)
      revised.raster <- current.raster / timediff

      for (j in 0:(timediff - 1)) {
        new.ts <- current.ts + lubridate::hours(j)

        frame.counter <-
          write.frame(FileName, new.ts, revised.raster, frame.counter)
      }
    }
  }
}


#' Function to calculate the ideal extents of a shapefile
#'
#' @param shp a shapefile of class sp
#' @param expand.ratio This is the how much the x and y limits will expand by. It defaults to 0.1 (or 10 percent)
#'
#' @return list of two vectors - x and y containing a min and max value

get_plot_extents <- function(shp, expand.ratio = 0.1){

  xbound <- c(extent(shp)@xmin, extent(shp)@xmax)
  expand <- diff(xbound) * expand.ratio
  xbound <- c(xbound[1] - expand, xbound[2] + expand)

  ybound <- c(extent(shp)@ymin, extent(shp)@ymax)
  expand <- diff(ybound) * expand.ratio
  ybound <- c(ybound[1] - expand, ybound[2] + expand)

  output <- list(x = xbound,
                 y = ybound)
  return(output)


}
jimmybom/LWSlib documentation built on Aug. 21, 2019, 3:55 a.m.