checkRawArgs <- function(variables, variableColNos){
# check raw arguments
if (length(variables) != length(variableColNos)){
stop('Number of variables and variableColNos doesnt match' )
}
}
getFileResFromMeta <- function(startDate, instrument){
# get the raw file time resolution from metadata system
print('Getting raw file time resolution from metadata site...')
# get Tinfo from start of time period
startTinfo <- lf_Tinfo(startDate)
# get sensor info
sensorInfo <- lf_sensorINFO(instrument$site, instrument$id, startTinfo)
#check sensor info and information given is consistent
checkSensorInfo(instrument, sensorInfo, startDate)
#if multiple serials at site use serial given
if (nrow(sensorInfo) > 1){
ftr <- sensorInfo$rawTimeResolution[sensorInfo$instSerial == instrument$serial][[1]]
# otherwise use only file res option
} else {ftr <- sensorInfo$rawTimeResolution[[1]]}
# get rid of space in string
ftr <- stringr::str_remove(ftr, ' ')
print(paste('Raw file res:', ftr))
# format like i want
fileResList <- getFileRes(ftr)
return(fileResList)
}
checkSensorInfo <- function(instrument, sensorInfo, theDate){
#check sensor info and information given is consistent
#if multiple sensors at site
if (nrow(sensorInfo) > 1){
# if the serial isn't specified
if (!('serial' %in% names(instrument))){
stop(paste('Multiple', instrument$id, 'at', instrument$site,
'. Specify serial in instrument list. Available serials:',
paste(sensorInfo$instSerial, collapse = ', ')))
# if serial not in available serials
} else if (!(instrument$serial %in% sensorInfo$instSerial)){
stop(paste('Serial', instrument$serial, 'not found at', instrument$site,
'on date', as.character(theDate),
'. Available serials:',
paste(sensorInfo$instSerial, collapse = ', ')))
}
# if one sensor at site but the serial specified doesn't match that on sensor info
} else if ('serial' %in% names(instrument) && instrument$serial != sensorInfo$instSerial){
stop(paste('Sensor info has found', instrument$id,
'at site', instrument$site, 'on date',
as.character(startDate), 'to have serial number',
sensorInfo$instSerial, 'not the specifed', instrument$serial))
}
}
readRawData <- function(x, separator, classCols, skipRows){
# read in data
if (file.exists(x)){
print(paste('Reading file:', x))
tryCatch({read.table(x, sep = separator, fill = NA,
skip = skipRows,
colClasses = classCols,
stringsAsFactors = F)},
error = function(e){
print(paste('Error reading file:', x))
print(e)
})
} else {
print(paste('File:', x, 'doesnt exist'))
return(NA)
}
}
getClassCols <- function(nTimeCols){
#time columns must be as characters to keep leading zeros
# this is used as an argument in read.table
timeCols <- paste0('V', seq(1:nTimeCols))
classCols <- rep('character', nTimeCols)
names(classCols) <- timeCols
return(classCols)
}
checkTimeColFormat <- function(dtObjsOne, dtFormat, Tinfo){
#try and format first time into timecol format and give error if doesnt work
firstTryDate <- paste(Tinfo['tD', 'YEAR'], dtObjsOne)
tryCatch({as.POSIXct(firstTryDate, format = dtFormat)}, error = function(e){
stop(paste('Error with timeColFormat. Attempting to convert', firstTryDate,
'to format', dtFormat))
})
if( is.na(as.POSIXct(firstTryDate, format = dtFormat))){
stop(paste('Error with timeColFormat. Attempting to convert', firstTryDate,
'to format', dtFormat))
}
}
formatTimeCol <- function(dayData, timeColFormat, variables, Tinfo, fileResList){
#format time column(s) of raw data to POSIXct
#the format to put into as.POSIXct
dtFormat <- paste('%Y', paste(timeColFormat, collapse = ' '))
# paste all time cols together
dtObjs <- apply(array(dayData[, timeColFormat]) , 1 , paste, collapse=" ")
#check that time columns will format
checkTimeColFormat(dtObjs[1], dtFormat, Tinfo)
#paste with year and convert to posixct
dayData <- dayData %>% dplyr::mutate(TIME = paste(Tinfo['tD', 'YEAR'], dtObjs)) %>%
dplyr::mutate(TIME = as.POSIXct(TIME, format = dtFormat)) %>%
dplyr::select(c('TIME', variables))
# round to the nearest fileTimeRes (so that padding doesn't make it go funny)
if (fileResList$Unit != 'Hz') {
dayData$TIME <- lubridate::ceiling_date(dayData$TIME, paste(fileResList, collapse = ' '))
}
return(dayData)
}
getAllTimes <- function(Tinfo, fileResList){
# get every interval in day
endDate <- as.POSIXct(Tinfo['tM', 'YD'], format = '%Y%j')
#allTimes depends on the file resolution
if (fileResList$Unit == 'Hz') {
startDate <- as.POSIXct(paste(Tinfo['tD', 'YD'], '00:00:00.0'), format = '%Y%j %H:%M:%OS') + (1/fileResList$Res)
allTimes <- seq(startDate, endDate, by = 1/fileResList$Res)
} else if (fileResList$Unit == 'sec'){
startDate <- as.POSIXct(Tinfo['tD', 'YD'], format = '%Y%j') + fileResList$Res
allTimes <- seq(startDate, endDate, by = fileResList$Res)
} else if (fileResList$Unit == 'min'){
startDate <- as.POSIXct(Tinfo['tD', 'YD'], format = '%Y%j') + (fileResList$Res *60)
allTimes <- seq(startDate, endDate, by = fileResList$Res * 60)
}
return(allTimes)
}
padData <- function(Tinfo, fileResList, dayData){
# all times must be present to make a decent timeseries so data must be padded
allTimes <- getAllTimes(Tinfo, fileResList)
allTimesDF <- tibble('TIME' = allTimes)
#merge the two, filling with NA
paddedData <- merge(allTimesDF, dayData, all = TRUE)
return(paddedData)
}
padWholeDate <- function(Tinfo, fileResList, vars){
# get all interval times in day
allTimes <- getAllTimes(Tinfo, fileResList)
# create dataframe with NAs for all times
wholeDay <- data.frame(matrix(NA, nrow = length(allTimes),
ncol = length(vars[, 'variables']) + 1))
names(wholeDay) <- c( 'TIME' , vars[, 'variables'])
wholeDay$TIME <- allTimes
return(wholeDay)
}
selectRawFiles <- function(sensorInfo, instrument, DRIVE){
#if there's multiple instruments at a site
if (nrow(sensorInfo) > 1){
# if more than 1 serial at site
rawFiles <- sensorInfo$rawFiles[sensorInfo$instSerial == instrument$serial][[1]]
# otherwise use only raw files option
} else {rawFiles <- sensorInfo$rawFiles[[1]]}
#Check if working on windows
if (!is.null(DRIVE)){
rawFilesSplit <- strsplit(rawFiles, '/|//')[[1]]
# find tier_raw index
tierRawInd <- which(rawFilesSplit == 'Tier_raw')
# alter path to include DRIVE
tierRawDir <- paste0(rawFilesSplit[tierRawInd:length(rawFilesSplit)], collapse = .Platform$file.sep)
rawFiles <- file.path(DRIVE, tierRawDir)
}
return( rawFiles )
}
getUnits <- function(variables, variableColNos){
# get the units for the variable from the metadata system
print('Getting variable units from metadata site...')
varInfo <- lf_getVariables()
#check all the varibles are on the metadata system
if (any(!(variables %in% varInfo$id))){
invalidVar <- variables[!(variables %in% varInfo$id)]
stop(paste('Variable:', invalidVar, 'not found on metadata system.'))
}
units <- unlist(lapply(variables, function(x){
return(varInfo[varInfo$id == x, ]$unit)
}))
vars <- as.data.frame(cbind(variables, units, variableColNos),
stringsAsFactors = FALSE)
return(vars)
}
readRawFiles <- function(dateList, instrument, sep, vars, timeColFormat,
fileResList, skipRows, DRIVE){
rawDataList <- list()
#find the number of time columns
nTimeCols <- length(timeColFormat)
# for every date
for (idate in 1:length(dateList)){
DATE <- dateList[idate]
# get Metadata for this date for this sensor
Tinfo <- lf_Tinfo(as.Date(DATE))
sensorInfo <- lf_sensorINFO(instrument$site, instrument$id, Tinfo)
#check sensor info and information given is consistent
checkSensorInfo(instrument, sensorInfo, DATE)
#select raw files for this serial
browser()
rawFiles <- selectRawFiles(sensorInfo, instrument, DRIVE)
#get columns that must be kept as characters (time columns)
classCols <- getClassCols(nTimeCols)
# read in all data in raw files and bind this into dataframe
dayRawData <- lapply(rawFiles, function(x){readRawData(x, separator = sep, classCols, skipRows)})
#### what do if only 1 list entry
dayData <- do.call(rbind, dayRawData)
if (!all(is.na(dayData))){
#only keep specified columns and time columns -> assuming time cols at the start
#davis data was pasting \032 in front so get rid of this
dayData <- dayData %>% dplyr::select(c(1:nTimeCols, as.numeric(vars[, 'variableColNos']))) %>%
dplyr::mutate(V1 = stringr::str_replace_all(V1, '\032', ''))
names(dayData) <- c(timeColFormat , vars[, 'variables'])
# reformat time to be as posixct
dayData <- formatTimeCol(dayData, timeColFormat, vars[, 'variables'],
Tinfo, fileResList)
#pad data
rawDataList[[idate]] <- padData(Tinfo, fileResList, dayData)
} else{
# what happens when no data for day
rawDataList[[idate]] <- padWholeDate(Tinfo, fileResList, vars)
}
}
return(rawDataList)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.