#' Import Data from Old WREG Format
#'
#' @description
#' The \code{importWREG} function reads the WREG inputs from a directory
#' set up for the old WREG program.
#'
#' @param wregPath A directory that contains all of the files needed to
#' implement the MatLab version of WREG.
#' @param sites (optional) A vetor of sites that should be return. Allows for
#' data subsetting.
#'
#' @details
#' This functions allows users to use the legacy format of WREG. This includes
#' an established directory that contains valid \dQuote{SiteInfo.txt},
#' \dQuote{FlowChar.txt}, \dQuote{LP3G.txt}, \dQuote{LP3K.txt},
#' \dQuote{LP3s.txt} and \dQuote{USGS##########.txt}, multiple files
#' containing time series for each site. The file \dQuote{UserWLS.txt} is
#' optional. For further information on the format of these files, see the
#' program manual (Techniques and Methods 4-A8). Files that are not valid
#' inputs files for the WREG version describe therein will not be accepted.
#'
#' @return All outputs are returned as part of a list. The list includes:
#' \item{sites}{A vector of site IDs.}
#' \item{Y}{A data frame whose comlumns represent unique frequency events,
#' while the row represent particular sites in the same order as \code{sites}.}
#' \item{X}{A data frame whose columns represent basin characteristics to be used
#' as dependent variables and whose rows represent sites corresponding
#' to \code{sites}.}
#' \item{LP3f}{A matrix containing the fitted LP# parameters that are fixed
#' across exceedence probability. These include the standard deviation,
#' skew and regional skew for each site.}
#' \item{LP3k}{A matrix of the fitted kappa parameters of the LP3 distribution
#' for each \code{AEP}.}
#' \item{BasChars}{A matrix containing the site IDs, latitudes and longitudes.}
#' \item{recLen}{A square matrix indicating the number of overlapping years
#' for each site pair.}
#' \item{recCor}{A matrix of the correlaiton between site paris.}
#' \item{UW}{A matrix of user weights, if included.}
#'
#'@examples
#'wregDir <- file.path(system.file("exampleDirectory", package = "WREG"),
#' "matlabImport")
#'importedData <- importWREG(wregPath = wregDir)
#'
#'@export
importWREG <- function(wregPath,sites='') {
# Developed by William Farmer, 10 February 2016
#
# In this format, the user must indicate the AEP and MSEGR manually.
# wregPath <- file.path('..','SampleInputFiles')
# sites <- c('5314900','5316900','5316920','5317845','5317850')
# Load and parse SiteInfo.txt
siteInfoFile <- file.path(wregPath,'SiteInfo.txt')
if (!file.exists(siteInfoFile)) {
stop(paste('Could not find',siteInfoFile))
}
siteInfo <- read.table(siteInfoFile, sep = '\t', header = TRUE, fill = TRUE,
colClasses = c("Station.ID" = "character"))
# Check to see if the file has standard column names,
# otherwise rename the first three columns to the apropriate name
if (sum(is.element(names(siteInfo), c('Station.ID', 'Lat', 'Long',
'No..Annual.Series', 'Zero.1.NonZero.2', 'FreqZero',
'Regional.Skew', 'Cont.1.PR.2'))) != 8) {
stop("Please check naming conventions of the Matlab file format")
}
#convert Station.ID to character class
siteInfo$Station.ID <- toupper(as(siteInfo$Station.ID, 'character'))
#check to see if any sites need a zero appended
zero_append <- which(nchar(siteInfo$Station.ID) != 8)
siteInfo$Station.ID[zero_append] <- paste0("0",siteInfo$Station.ID[zero_append])
BasChars <- siteInfo[,is.element(names(siteInfo),
c('Station.ID','Lat','Long'))]
X <- siteInfo[,c(1,9:ncol(siteInfo))]
# Screen for particular sites
n <- nrow(siteInfo)
ndx <- 1:nrow(siteInfo)
if (length(sites)<2) {
sites <- siteInfo$Station.ID
}
ndx <- which(is.element(siteInfo$Station.ID,sites))
n2 <- length(ndx)
if (n2!=length(sites)) {
stop('You have requested sites that are not in SiteInfo.txt.')
}
sitesOut <- siteInfo$Station.ID[ndx]
X <- X[ndx,]
BasChars <- BasChars[ndx,]
TestRecLen <- siteInfo[,is.element(names(siteInfo),
c('Station.ID','No..Annual.Series'))]
TestRecLen <- TestRecLen[ndx,]
site1 <- sitesOut
# Load and parse FlowChar.txt
flowCharFile <- file.path(wregPath,'FlowChar.txt')
if (!file.exists(flowCharFile)) {
stop(paste('Could not find',flowCharFile))
}
Y <- read.table(flowCharFile, sep = '\t', header = TRUE,
colClasses = c("Station.ID" = "character"))
if(!is.element('Station.ID', names(Y))){
names(Y)[1] <- 'Station.ID'
Y <- shiftColumns(Y,2)
}
Y$Station.ID <- toupper(as(Y$Station.ID,'character'))
# Y <- Y[sort.int(runif(nrow(Y)),index.return = TRUE)$ix,] # for testing
#check to see if any sites need a zero appended
zero_append <- which(nchar(Y$Station.ID) != 8)
Y$Station.ID[zero_append] <- paste0("0",Y$Station.ID[zero_append])
names(Y)[2:ncol(Y)] <- unlist(strsplit(names(Y)[2:ncol(Y)],split='[.]'))
ndx <- which(is.element(Y$Station.ID,sitesOut))
Y <- Y[ndx,]
site2 <- Y$Station.ID
Y <- Y[match(sitesOut,Y$Station.ID),]
# Load and parse LP3*.txt
lp3gFile <- file.path(wregPath,'LP3G.txt')
if (!file.exists(lp3gFile)) {
stop(paste('Could not find',lp3gFile))
}
lp3g <- read.table(lp3gFile, sep = '\t', header = TRUE,
colClasses = c("Station.ID" = "character"))
if(!is.element('Station.ID', names(lp3g))){
names(lp3g)[1] <- 'Station.ID'
lp3g <- shiftColumns(lp3g,2)
}
lp3g$Station.ID <- toupper(as(lp3g$Station.ID,'character'))
#check to see if any sites need a zero appended
zero_append <- which(nchar(lp3g$Station.ID) != 8)
lp3g$Station.ID[zero_append] <- paste0("0",lp3g$Station.ID[zero_append])
test <- sum(unlist(
lapply(lapply(t(lp3g[,2:ncol(lp3g)]),FUN=unique),FUN=length))>1)
if (test>0) {
stop(paste0('For a given set of tiem series, ',
'the columns in LP3G.txt must be identical.'))
}
ndx <- which(is.element(lp3g$Station.ID,sitesOut))
lp3g <- lp3g[ndx,]
site3a <- lp3g$Station.ID
lp3g <- lp3g[match(sitesOut,lp3g$Station.ID),]
lp3kFile <- file.path(wregPath,'LP3K.txt')
if (!file.exists(lp3kFile)) {
stop(paste('Could not find',lp3kFile))
}
lp3k <- read.table(lp3kFile, sep = '\t', header = TRUE,
colClasses = c("Station.ID" = "character"))
if(!is.element('Station.ID', names(lp3k))){
names(lp3k)[1] <- 'Station.ID'
lp3k <- shiftColumns(lp3k,2)
}
lp3k$Station.ID <- toupper(as(lp3k$Station.ID,'character'))
#check to see if any sites need a zero appended
zero_append <- which(nchar(lp3k$Station.ID) != 8)
lp3k$Station.ID[zero_append] <- paste0("0",lp3k$Station.ID[zero_append])
ndx <- which(is.element(lp3k$Station.ID,sitesOut))
lp3k <- lp3k[ndx,]
site3b <- lp3k$Station.ID
lp3k <- lp3k[match(sitesOut,lp3k$Station.ID),]
lp3sFile <- file.path(wregPath,'LP3S.txt')
if (!file.exists(lp3sFile)) {
stop(paste('Could not find',lp3sFile))
}
lp3s <- read.table(lp3sFile, sep = '\t', header = TRUE,
colClasses = c("Station.ID" = "character"))
if(!is.element('Station.ID', names(lp3s))){
names(lp3s)[1] <- 'Station.ID'
lp3s <- shiftColumns(lp3s,2)
}
lp3s$Station.ID <- toupper(as(lp3s$Station.ID,'character'))
#check to see if any sites need a zero appended
zero_append <- which(nchar(lp3s$Station.ID) != 8)
lp3s$Station.ID[zero_append] <- paste0("0",lp3s$Station.ID[zero_append])
test <- sum(unlist(
lapply(lapply(t(lp3s[,2:ncol(lp3s)]),FUN=unique),FUN=length))>1)
if (test>0) {
stop(paste0('For a given set of tiem series, ',
'the columns in LP3S.txt must be identical.'))
}
ndx <- which(is.element(lp3s$Station.ID,sitesOut))
lp3s <- lp3s[ndx,]
site3c <- lp3s$Station.ID
lp3s <- lp3s[match(sitesOut,lp3s$Station.ID),]
LP3f <- data.frame(Station.ID=sitesOut,S=lp3s[,2],G=lp3g[,2],
GR=siteInfo$Regional.Skew[ndx])
LP3k <- lp3k
# Load and parse UserWLS.txt (if available)
uwlsFile <- file.path(wregPath,'UserWLS.txt')
uwls <- NULL
if (file.exists(uwlsFile)) {
uwls <- read.table(uwlsFile, sep = '\t', header = FALSE)
if (nrow(uwls)!=ncol(uwls)) {
stop('UserWLS.txt must be a square matrix.')
}
if (!(identical(site1,site2)&identical(site1,site3a)&
identical(site1,site3b)&identical(site1,site3c))) {
uwls <- matrix(NA,ncol=length(site1),nrow=length(site2))
warning(paste0('Because the input files have a different order of sites,',
' the user-provided weighting matrix is invalid.',
' It has been replaced with NAs.'))
} else if (nrow(uwls)!=n2) {
uwls <- uwls[ndx,ndx]
warning(paste0('UserWLS.txt was ammended to match sites in arguments.',
' (Note: Be sure weights do not depend on sites that were removed.)'))
}
uwls <- as.matrix(uwls)
row.names(uwls) <- colnames(uwls) <- sitesOut
}
# Load and parse USGS*.txt files
siteTS <- list()
# See if a USGS Annual time series file exists, if so read it in
# Otherwise get data from each individual file
usgs_annual_file = file.path(wregPath,'USGSAnnualTimeSeries.txt')
if (file.exists(usgs_annual_file)) {
# split the files in to a list of tables based on site id
temp_table <- read.table(usgs_annual_file)
temp_table[,1] <- toupper(as(temp_table[,1], "character"))
sites = unique(temp_table[,1])
for (x in 1:length(sites)){
siteTS <- c(siteTS, split(temp_table,temp_table[,1] == sites[x])[2])
}
} else {
allFiles <- unique(apply(as.matrix(paste0('USGS',sitesOut,'*.txt')),
MARGIN=1,FUN=list.files,path=wregPath))
if (any(lapply(allFiles,length) == 0)) {
warning(paste("The following sites do not have timeseries data",
sitesOut[which(lapply(allFiles,length) == 0)]))
#Remove missing file from all files list
sitesOut <- sitesOut[-which(lapply(allFiles,length) == 0)]
allFiles <- allFiles[-which(lapply(allFiles,length) == 0)]
#Remove missing site from all files
Y <- Y[Y$Station.ID %in% sitesOut,]
X <- X[X$Station.ID %in% sitesOut,]
LP3f <- LP3f[LP3f$Station.ID %in% sitesOut,]
LP3k <- LP3k[LP3k$Station.ID %in% sitesOut,]
BasChars <- BasChars[BasChars$Station.ID %in% sitesOut,]
siteInfo <- siteInfo[siteInfo$Station.ID %in% sitesOut,]
TestRecLen <- TestRecLen[TestRecLen$Station.ID %in% sitesOut,]
}
if(length(allFiles) > length(sitesOut))
{
stop("Duplicate timeseries were found.")
}
allFiles <- file.path(wregPath,allFiles)
siteTS <- lapply(allFiles,read.table)
}
if (length(siteTS) == 0){
stop(paste("There are no timeseries files to process. Check ts naming convetions."))
}
recLen <- recCor <- matrix(NA,ncol=length(siteTS),nrow=length(siteTS))
for (i in 1:length(siteTS)) {
recLen[i,i] <- nrow(siteTS[[i]])
idata <- abs(siteTS[[i]][,2:3])
for (j in 1:i) {
jdata <- abs(siteTS[[j]][,2:3])
overlap <- intersect(idata[,1],jdata[,1])
recLen[i,j] <- recLen[j,i] <- length(overlap)
if (length(overlap)==0) {next}
ijdata <- idata[which(is.element(idata[,1],overlap)),2]
jidata <- jdata[which(is.element(jdata[,1],overlap)),2]
if (length(unique(ijdata))==1|length(unique(jidata))==1) {next}
#check to see if the data has the proper dimensions to be correlated
if (length(ijdata) != length(jidata)){
stop(paste('Overlap in data matrix caused by multiple entries for a year and site. Revisit data.'))
}
recCor[i,j] <- recCor[j,i] <- cor(ijdata,jidata)
}
}
tester <- diag(recLen)
names(tester) <- NULL
if (!identical(tester,TestRecLen[,2])) {
stop(paste('Reported record lengths in SiteInfo.txt do not match',
'time series files.'))
}
row.names(recLen) <- row.names(recCor) <- colnames(recLen) <-
colnames(recCor) <- sitesOut
names(LP3k) <- names(Y)
# Output result
result <- list(
sites=sitesOut,
Y=Y,
X=X,
LP3f=LP3f,
LP3k=LP3k,
BasChars=BasChars,
recLen=recLen,
recCor=recCor,
UW=uwls
)
return(result)
}
shiftColumns <- function(siteInfo, columnShift){
allNames = names(siteInfo)
ndx <- which(!is.na(siteInfo$Station.ID))
siteInfo <- siteInfo[ndx,]
blank_count <- 0
for (x in 1:length(siteInfo)){
if (Reduce('|',(siteInfo[,allNames[x]] == ""))){
blank_count <- blank_count + 1
}
}
if (blank_count > 0){
for (x in columnShift:length(siteInfo)-blank_count){
names(siteInfo)[x] <- names(siteInfo)[x+blank_count]
}
siteInfo <- siteInfo[,1:length(siteInfo)-blank_count]
}
return(siteInfo)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.