R/secondaryscripts.R

#' @title Aggregate Traffic Count Data over Time
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{DataAggregator} is designed to collect and summarise data the dataframe
#' generated by the \link[ScrapeMIDAS]{RoadData} function over a given number of minutes.
#'
#' @param data The data that the user wishes to aggregate.
#' @param n The number of minutes that the user wishes to aggregate over. Note that this number
#' must be a factor of the number of minutes in a day (1440 minutes). For example, 15 is a valid
#' number, whereas 23 is not.
#'
#' @examples fifteenMinuteDataSummary <- DataAggregator(trafficData, 15)
#' @examples hourlyData_M4 <- DataAggregator(M4_Road_Data, 60)
#'
#' @return A compressed version of the format of \code{data}, where the data has been summarised into
#' \code{n} minute intervals.
#'
DataAggregator <- function(data, n) {

  # Initialise list to return.
  dataList <- list()

  # Ensures that the n minute intervals will fit into a single day.
  if (1440 %% n != 0) {
    cat("\nMinutes selected do not fit within a single day. Please pick a value whose value will divide 1440.\n")
    return()
  }

  # For each site, collect n minute data.
  if (class(data) == "list") {
    dataLength <- length(data)
  } else {
    dataLength <- 1
  }

  for (i in 1:dataLength) {

    if (class(data) == "list") {
      siteData <- data[[i]]
    } else {
      siteData <- data
    }

    aggregatedSite <- c()

    numberOfRows <- dim(siteData)[1] / n

    for (j in 1:numberOfRows) {

      controlOfficeColumn   <- siteData[1 + ((j - 1)*n), 1]
      geographicAddress     <- siteData[1 + ((j - 1)*n), 2]
      year                  <- siteData[1 + ((j - 1)*n), 3]
      month                 <- siteData[1 + ((j - 1)*n), 4]
      day                   <- siteData[1 + ((j - 1)*n), 5]
      dayOfWeek             <- siteData[1 + ((j - 1)*n), 6]
      typeOfDay             <- siteData[1 + ((j - 1)*n), 7]
      daysAfterBankHoliday  <- siteData[1 + ((j - 1)*n), 8]
      time                  <- siteData[1 + ((j - 1)*n), 9]
      numberOfLanes         <- siteData[1 + ((j - 1)*n), 10]

      temporaryAggregatedSite <- cbind(controlOfficeColumn, geographicAddress, year, month, day, dayOfWeek, typeOfDay, daysAfterBankHoliday, time, numberOfLanes)

      flowCategory1 <- sum(as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 11]))
      flowCategory2 <- sum(as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 12]))
      flowCategory3 <- sum(as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 13]))
      flowCategory4 <- sum(as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 14]))

      temporaryAggregatedSite <- cbind(temporaryAggregatedSite, flowCategory1, flowCategory2, flowCategory3, flowCategory4)

      colnames(temporaryAggregatedSite)[01] <- "Control Office"
      colnames(temporaryAggregatedSite)[02] <- "Geographic Address"
      colnames(temporaryAggregatedSite)[03] <- "Year"
      colnames(temporaryAggregatedSite)[04] <- "Month"
      colnames(temporaryAggregatedSite)[05] <- "Day"
      colnames(temporaryAggregatedSite)[06] <- "Day of Week"
      colnames(temporaryAggregatedSite)[07] <- "Type of Day"
      colnames(temporaryAggregatedSite)[08] <- "Days After Bank Holiday"
      colnames(temporaryAggregatedSite)[09] <- "Time (GMT)"
      colnames(temporaryAggregatedSite)[10] <- "Number of Lanes"
      colnames(temporaryAggregatedSite)[11] <- "Flow (Category 1)"
      colnames(temporaryAggregatedSite)[12] <- "Flow (Category 2)"
      colnames(temporaryAggregatedSite)[13] <- "Flow (Category 3)"
      colnames(temporaryAggregatedSite)[14] <- "Flow (Category 4)"

      for (k in 1:numberOfLanes) {

        speedColumn <- as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 15 + (4*(k-1))])
        headwayColumn <- as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 18 + (4*(k-1))])
        occupancyColumn <- as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 17 + (4*(k-1))])

        if (all(speedColumn == 0)) {
          averageSpeed <- 0
          averageHeadway <- 0
          averageOccupancy <- 0
        } else {
          averageSpeed <- mean(speedColumn[speedColumn != 0])
          averageHeadway <- mean(headwayColumn[headwayColumn != 0])
          averageOccupancy <- mean(occupancyColumn[occupancyColumn != 0])
        }

        totalFlow       <- sum(as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 16 + (4*(k-1))]))
        #occupancy       <- sum(as.numeric(siteData[(1 + ((j - 1)*n)):(n + ((j - 1)*n)), 16 + (4*(k-1))]))

        temporaryAggregatedSite <- cbind(temporaryAggregatedSite, averageSpeed, totalFlow, averageOccupancy, averageHeadway)

        colnames(temporaryAggregatedSite)[dim(temporaryAggregatedSite)[2] - 3] <- paste("Average Speed (Lane ", k, ")", sep = "")
        colnames(temporaryAggregatedSite)[dim(temporaryAggregatedSite)[2] - 2] <- paste("Total Flow (Lane ", k, ")", sep = "")
        colnames(temporaryAggregatedSite)[dim(temporaryAggregatedSite)[2] - 1] <- paste("Occupancy (Lane ", k, ")", sep = "")
        colnames(temporaryAggregatedSite)[dim(temporaryAggregatedSite)[2]] <- paste("Average Headway (Lane ", k, ")", sep = "")
      }

      aggregatedSite <- rbind(aggregatedSite, temporaryAggregatedSite)
      rownames(aggregatedSite) <- NULL

    }


    dataList[[length(dataList) + 1]] <- aggregatedSite


  }

  if (length(dataList) == 1){
    return(dataList[[1]])
  } else {
    return(dataList)
  }

}



#' @title Download Traffic Count Data to .CSV File
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{DownloadCSV} is designed to download data found using the
#' \link[ScrapeMIDAS]{RoadData} into an appropriate .CSV format.
#'
#' @param data The data that the user wishes to aggregate.
#'
#' @examples DownloadCSV(trafficData)
#'
#' @return A CSV file for each site is downloaded, with the naming convention of
#' "X_DDMMMYYYY-DDMMMYYYY.csv", where "X" denotes the alphanumeric characters of the site's
#' geographic address, "DD" denotes the start and end date days respectively, "MMM" denotes the month
#' as three letters for the start and end date months respectively, and "YYYY" denotes the year
#' for the start and end date years respectively.
#'
DownloadCSV <- function(data) {

  if (class(data) == "list") {

    for (i in 1:length(data)) {

      site <- data[[i]]
      FileNameGenerator(site)
    }
  } else {

    FileNameGenerator(data)
  }
}


FileNameGenerator <- function(site) {

  siteName <- gsub("[^[:alnum:] ]", "", site[1,2])
  siteName <- gsub(" ", "", siteName)

  startDate <- paste(site[1,5], site[1,4], site[1,3], sep = "")
  endDate <- paste(site[nrow(site),5], site[nrow(site),4], site[nrow(site),3], sep = "")

  fileName <- paste(siteName, "_", startDate, "-", endDate, ".csv", sep = "")
  write.csv(site, file = fileName, row.names = FALSE)

}






#' @title Plotting Tools for Traffic Count Data
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{PlotData} is designed to make plotting data found using the
#' \link[ScrapeMIDAS]{RoadData} more accessible.
#'
#' @param data The data that the user wishes to examine.
#' @param timeRange The start and end minute of the data, formatted as "start minute:end minute".
#' These are measured from the start of the dataset. For example, "1:1440" denotes the whole of the first
#' day, whereas "5761:7200" denotes the whole of the fifth day.
#' @param n The plotting 'character' that is plotted on the graph (the same as "pch" in \link[graphics]{points}).
#' @param attributesXName The (quoted) name of the column found in the data that the user wishes to plot on the x-axis.
#' Note that to denote "Time" on the axis, use "" (2 quotation marks) as the value for this argument.
#' @param ... Any number of  (quoted) names of columns found in the data that the user wishes to plot on the y-axis.
#'
#' @examples PlotData(trafficData, 1:1440, ".", "", "Average Speed (Lane 1)")
#' @examples PlotData(roadData2016, 1380:1440, 20, "Average Speed (Lane 2)", "Occupancy (Lane 3)")
#'
#' @return A plot of the requested data.
#'
PlotData <- function(data, timeRange, n, attributesXName, ...) {

  attributesYName <- c(...)

  randomColours <- c('red','green','blue','yellow')#rgb(runif(5), runif(5), runif(5))

  if (attributesXName == "") {
    xPlot <- 1:length(timeRange)
    xLabel <- "Time (minutes from start of interval)"
  } else {
    attributesX <- match(attributesXName,colnames(data))
    xPlot <- as.numeric(data[timeRange, attributesX])
    xLabel <- attributesXName
  }
  attributesY <- match(attributesYName,colnames(data))


  if (length(attributesYName) == 1) {
    yLabel <- attributesYName[1]
  } else {
    yLabel <- ""
  }

  for (i in 1:length(attributesY)) {

    if (i == 1) {
      plot(xPlot, as.numeric(data[timeRange, attributesY[i]]), col= randomColours[i], pch = n, xlab = xLabel, ylab = yLabel)

    } else {
      points(xPlot, as.numeric(data[timeRange, attributesY[i]]), col = randomColours[i], pch = n)
    }

    if (length(attributesYName) > 1) {
      legend("topright", "groups", attributesYName, fill=randomColours, cex=0.75)
    }
  }
}





#' @title Incorporation Passenger Car Units to Traffic Count Data
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{PCUAddition} allows the user to add a "passenger car unit" column
#' to the data collected in the \link[ScrapeMIDAS]{RoadData}, with user defined values for
#' each of the four vehicle categories.
#'
#' @param data The data that the user wishes to aggregate.
#' @param category1 Numeric; the user-selected PCU value for category 1 of vehicles in MIDAS data.
#' @param category2 Numeric; the user-selected PCU value for category 2 of vehicles in MIDAS data.
#' @param category3 Numeric; the user-selected PCU value for category 3 of vehicles in MIDAS data.
#' @param category4 Numeric; the user-selected PCU value for category 4 of vehicles in MIDAS data.
#'
#' @examples JanuaryDataWithPCU <- PCUAddition(JanuaryData, 1, 1.5, 2, 2.5)
#'
#' @return A version of the format of \code{data}, where additional data has been added in the form of
#' a column at the end of the data, denoting the PCU value for each row of \code{data}.
#'
PCUAddition <- function(data, category1Unit, category2Unit, category3Unit, category4Unit) {

  if (class(data) == "list") {
    dataListToReturn <- list()

    for (i in 1:length(data)) {

      site <- data[[i]]
      appendedData <- PCUAdditionAppend(site, category1Unit, category2Unit, category3Unit, category4Unit)
      dataListToReturn[[length(dataListToReturn) + 1]] <- appendedData
    }

  } else {

    appendedData <- PCUAdditionAppend(data, category1Unit, category4Unit, category3Unit, category4Unit)
    dataListToReturn <- appendedData
  }

  return(dataListToReturn)

}

PCUAdditionAppend <- function(site, category1, category2, category3, category4) {

  pcuColumn <- c()

  for (i in 1:nrow(site)) {
    pcu <- (as.numeric(site[i, 10]) * category1) + (as.numeric(site[i, 11]) * category2) + (as.numeric(site[i, 12]) * category3) + (as.numeric(site[i, 13]) * category4)
    pcuColumn <- rbind(pcuColumn, pcu)
  }

  newSiteData <- cbind(site, pcuColumn)
  colnames(newSiteData)[ncol(newSiteData)] <- "PCU"
  row.names(newSiteData) <- NULL
  return(newSiteData)

}



#' @title Estimate Speed Percentiles for Traffic Count Data
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{GeneratePercentiles} provides the user with an estimated \eqn{n}th percentile speed,
#' (the speed that \eqn{n}\% of the drivers within a given time period drive at or below) based on the data
#' collected in the \link[ScrapeMIDAS]{RoadData}.
#'
#' Please note that this function only provides an estimate of the percentile speeds, due to the
#' nature of the MIDAS data. It should be acknowledged that the results from this function may not
#' be representative of the true percentile speeds.
#'
#' @param data The data that the user wishes to investigate.
#' @param percentile Numeric; the percentile of drivers that travel under a particular speed
#' (a value commonly investigated 85\%; that is, the function will find out the speed at which 85%
#' of drivers travel at or below).
#' @param timeInterval Numeric; the MIDAS data does not provide accurate details regarding the speed
#' of every vehicle travelling, only the average speed and the number of vehicles passing a MIDAS site
#' per minute. The function therefore assumes that all vehicles in a minute travel at the same average
#' speed, and so to calculate a percentile speed, a sample must be selected over a period of time. This
#' argument is that period of time, in minutes. This time interval must divide the number of rows in the
#' \code{data} dataframe.
#'
#' @examples dataWithHourly85Percentiles <- GeneratePercentiles(roadData, 85, 60)
#'
#' @return A version of the format of \code{data}, where additional data has been added in the form of
#' a column at the end of the data, denoting the percentile speed. Please note that all rows that are in
#' the same time interval share the same value in this column. For example, if 60 minute samples are taken,
#' then the first 60 rows will share the same value, followed by the second 60 rows, and so on.
#'
GeneratePercentiles <- function(data, percentile, timeInterval) {

  if (class(data) == "list") {
    dataListToReturn <- list()

    for (i in 1:length(data)) {

      site <- data[[i]]
      calculatedData <- PercentileCalculator(site, percentile, timeInterval)
      dataListToReturn[[length(dataListToReturn) + 1]] <- calculatedData
    }

  } else {

    calculatedData <- PercentileCalculator(data, percentile, timeInterval)
    dataListToReturn <- calculatedData
  }

  return(dataListToReturn)
}

PercentileCalculator <- function(site, percentile, timeInterval) {

  numberOfTimeIntervals   <- nrow(site) / timeInterval
  numberOfLanes           <- 1:((ncol(site) - 13) / 4)

  totalFlowColumns        <- 16 + ((numberOfLanes - 1) * 4)
  totalSpeedColumns       <- 15 + ((numberOfLanes - 1) * 4)

  percentileList <- c()

  for (i in 1:numberOfTimeIntervals) {

    startInterval <- 1 + ((i - 1) * timeInterval)
    endInterval   <- i * timeInterval

    totalFlowInTimeInterval <- sum(as.vector(as.numeric(site[startInterval:endInterval, totalFlowColumns])))

    percentileIndex <- ceiling(totalFlowInTimeInterval * (percentile / 100))

    collectedSpeeds <- sort(as.numeric(rep(site[startInterval:endInterval, totalSpeedColumns],site[startInterval:endInterval, totalFlowColumns])))
    percentileSpeed <- t(rep(collectedSpeeds[percentileIndex], timeInterval))
    percentileList  <- cbind(percentileList, percentileSpeed)
  }

  newSiteData <- cbind(site, t(percentileList))
  colnames(newSiteData)[ncol(newSiteData)] <- paste(as.character(percentile), "Percentile Speed Over", as.character(timeInterval), "Minutes")


  return(newSiteData)

}




#' @title Combine Logs with Traffic Count Data
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{LogDetails} allows the user to collect data from any relevant
#' locally-stored MIDAS logs (alerts, operations, faults and software exceptions) and
#' add additional columns on the provided dataframe that display the logs.
#'
#' @param data The data that the user wishes to investigate.
#' @param directory A string (without a "/" or "\\" character on the start or end of
#' the string) which points to the directory (from the current working directory) that
#' the user's log files (.mal, .mfl, .mol, .msl, .mal.bz2, .mfl.bz2, .mol.bz2, msl.bz2)
#' are stored. These files must be stored within this directory only and cannot be in
#' any subfolders of this directory. Their names must also follow the "XXDDMMYY" format
#' as downloaded from the MIDAS website, where "XX" denotes the control office, "DD"
#' denotes the day, "MM" denotes the month, and "YY" denotes the year. It is not necessary
#' to have all of these files, as the function will only scrape data out of what is provided.
#'
#'
#' @examples M25DataWithLogs <- LogDetails(M25Data, "LogsDirectory")
#'
#' @return A version of the format of \code{data}, where there are up to four additional columns,
#' depending on what relevant logs are found within the provided files. Alert, fault, operation and
#' software exception logs are put into their own columns, and in the row which corresponds to the
#' minute duration that the data was logged in. In the case of multiple logs of the same type and
#' minute duration, they are seperated with a semicolon (;).
#'
#' @importFrom R.utils toJSON bunzip2
#'
LogDetails <- function(data, directory) {

  library(R.utils)

  if (class(data) == "list") {
    dataListToReturn <- list()

    for (i in 1:length(data)) {

      site <- data[[i]]
      A <- LogHunter(site, directory)
      dataListToReturn[[length(dataListToReturn)+1]] <- A

    }

    return(dataListToReturn)

  } else {
    A <- LogHunter(data, directory)
    return(A)
  }


}


LogHunter <- function(site, directory) {

  controlOffice      <- site[1, 1]

  firstYear          <- site[1, 3]
  firstMonth         <- formatC(match(site[1,4], month.abb), width=2, flag="0")
  firstDay           <- site[1, 5]

  lastYear           <- site[dim(site)[1], 3]
  lastMonth          <- formatC(match(site[dim(site)[1],4], month.abb), width=2, flag="0")
  lastDay            <- site[dim(site)[1], 5]

  firstDate          <- as.Date(paste(firstYear, "/", firstMonth, "/", firstDay, sep=""))
  lastDate           <- as.Date(paste(lastYear, "/", lastMonth, "/", lastDay, sep=""))

  listOfDates        <- seq(as.Date(firstDate), as.Date(lastDate), by = "days")
  formattedDates     <- format(listOfDates, format = "%d%m%y")

  fileStems          <- paste(directory, "/", controlOffice, formattedDates, sep="")

  fileNameAlerts     <- paste(fileStems, ".mal", sep="")
  fileNameFaults     <- paste(fileStems, ".mfl", sep="")
  fileNameOperations <- paste(fileStems, ".mol", sep="")
  fileNameExceptions <- paste(fileStems, ".msl", sep="")

  zipNameAlerts      <- paste(fileNameAlerts, ".bz2", sep="")
  zipNameFaults      <- paste(fileNameFaults, ".bz2", sep="")
  zipNameOperations  <- paste(fileNameOperations, ".bz2", sep="")
  zipNameExceptions  <- paste(fileNameExceptions, ".bz2", sep="")

  site               <- AddLogs(fileNameAlerts, zipNameAlerts, site, "Alert")
  site               <- AddLogs(fileNameFaults, zipNameFaults, site, "Fault")
  site               <- AddLogs(fileNameOperations, zipNameOperations, site, "Operation")
  site               <- AddLogs(fileNameExceptions, zipNameExceptions, site, "Software Exception")


}

AddLogs <- function(regularFile, zipFile, site, fileType) {

  relevantLines <- c()

  for (i in 1:length(regularFile)) {



    if (file.exists(regularFile[i])) {

      openFile <- file(regularFile[i], "rt")
      relevantLines <- rbind(relevantLines, ExtractLines(openFile, site))
      close(openFile)

    } else if (file.exists(zipFile[i])) {

      bunzip2(zipFile[i], destname=regularFile[i], overwrite=TRUE, remove=FALSE)
      openFile <- file(regularFile[i], "rt")
      relevantLines <- rbind(relevantLines, ExtractLines(openFile, site))
      close(openFile)
    }


  }



  if (length(relevantLines) != 0) {

    newColumn                    <- rep(0, dim(site)[1])
    site                         <- cbind(site, newColumn)
    colnames(site)[dim(site)[2]] <- paste(fileType, "Logs")

    for (j in 1:length(relevantLines)) {

      logTime <- substr(strsplit(relevantLines[j], " ")[[1]][2], 1, 5)
      logDate <- strsplit(relevantLines[j], " ")[[1]][1]

      year          <- substr(site[,3], 3, 4)
      month         <- formatC(match(site[,4], month.abb), width=2, flag="0")
      day           <- site[,5]
      rowDate       <- paste(day, "/", month, "/", year, sep="")
      rowTime       <- site[, 9]

      index <- which(rowDate == logDate & rowTime == logTime)

      if (site[index, dim(site)[2]] == 0) {
        site[index, dim(site)[2]] <- relevantLines[j]
      } else {

        site[index, dim(site)[2]] <- paste(site[index, dim(site)[2]], relevantLines[j], sep="; ")
      }

    }

  }

  return(site)

}

ExtractLines <- function(file, site) {

  listOfRelevantLogs <- c()
  geographicAddress  <- gsub(" ", "", site[1,2])

  while (TRUE) {

    logLine <- readLines(file, n = 1)

    if (length(logLine) == 0) {
      break
    }

    if (grepl(geographicAddress, logLine)) {

      listOfRelevantLogs <- rbind(listOfRelevantLogs, logLine)
    }
  }

  return(listOfRelevantLogs)

}

#' @title Isolate or Remove Rush Hour Times with Traffic Count Data
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{RushHourFunction} allows the user to either remove or isolate the
#' rush hour periods, defined from 07:30 - 09:30 and 16:00 - 18:00 each day.
#'
#' @param data The data that the user wishes to isolate or remove the rush hour data from.
#' @param isolate A paramater, which if equal to TRUE, will return just the rush hour data.
#' If set to FALSE, it will return the data that is not during rush hour.
#'
#'
#' @examples M25DataRushHour <- SelectRushHour(M25Data, isolate = TRUE)
#'
#' @return A version of the format of \code{data}, either with just the rush hour data or
#' just the data that is not during rush hour, depending on if the \code{isolate} parameter
#' is set to TRUE or FALSE respectively.
#'
#'
SelectRushHour <- function(data, isolate) {

  if (class(data) == "list") {

    dataListToReturn <- list()

    for (i in 1:length(data)) {

      site <- data[[i]]
      rushHourData <- RushHourFunction(site, isolate)
      dataListToReturn[[length(dataListToReturn) + 1]] <- rushHourData
    }

    return(dataListToReturn)

  } else {
    rushHourData <- RushHourFunction(data, isolate)
    return(rushHourData)
  }



}

RushHourFunction <- function(data, isolate) {

  dataToReturnIsolated <- c()
  dataToReturnRemoved  <- c()

  for (i in 1:dim(data)[1]) {

    timeInRow <- data[i, "Time (GMT)"]
    hourInRow <- as.numeric(substr(timeInRow, 1, 2))
    minuteInRow <- as.numeric(substr(timeInRow, 4, 5))

    minutesFromMidnight <- (hourInRow * 60) + minuteInRow

    if ((minutesFromMidnight >= 450 && minutesFromMidnight < 570) || (minutesFromMidnight >= 960 && minutesFromMidnight < 1080)) {
      dataToReturnIsolated <- rbind(dataToReturnIsolated, data[i,])
    } else {
      dataToReturnRemoved <- rbind(dataToReturnRemoved, data[i,])
    }

  }

  if (isolate == TRUE) {
    return(dataToReturnIsolated)
  } else {
    return(dataToReturnRemoved)
  }

}




#' @title Add Variable Message Signs (VMS) Details with Traffic Count Data
#'
#' @author RAC Foundation, Luke Hudlass-Galley
#'
#' @description Motorway Incident Detection and Automatic Signalling (MIDAS) is a
#' sensor based network along UK motorways, and is designed to collect data regarding
#' traffic flows, average speeds and road occupancy, amongst other features, on the
#' road network. This data can be accessed via the MIDAS website
#' \url{https://www.midas-data.org.uk/} (login required) in the form of .tcd.bz2 files.
#'
#' The function \code{VMS} allows the user to view VMS details for every minute that the kit
#' is turned on, as found in the corresponding
#' alert logs, including the messages displayed on electronic traffic signs over each lane and
#' on the display signs on the side of roads.
#'
#' @param data The data that the user wishes to investigate.
#' @param directory A string (without a "/" or "\\" character on the start or end of
#' the string) which points to the directory (from the current working directory) that
#' the user's alert log files are stored. These files must be stored within this directory only and cannot be in
#' any subfolders of this directory. Their names must also follow the "XXDDMMYY.mal" or "XXDDMMYY.mal.bz2" format
#' as downloaded from the MIDAS website, where "XX" denotes the control office, "DD"
#' denotes the day, "MM" denotes the month, and "YY" denotes the year. It is not necessary
#' to have all of these files, as the function will only scrape data out of what is provided.
#'
#'
#' @examples M25DataRushHour <- VMSDetails(M25Data, "logs/alertLogs")
#'
#' @return A version of the format of \code{data}, with \eqn{n+1} additional columns (where \eqn{n} is the
#' number of lanes). These additional columns correspond to the traffic signs above each lane, with the \eqn{n+1}
#' column denoting the verbal messages displayed to the road users. In instances where there are no traffic signs,
#' "N/A" is used instead.
#'
#' @importFrom R.utils toJSON bunzip2
#'
VMSDetails <- function(data, directory) {


  if (class(data) == "list") {
    dataListToReturn <- list()

    for (i in 1:length(data)) {

      site <- data[[i]]
      vmsData <- VMSFunction(site, directory)
      dataListToReturn[[length(dataListToReturn) + 1]] <- vmsData
    }

    return(dataListToReturn)

  } else {
    vmsData <- VMSFunction(data, directory)
    return(vmsData)
  }

}

VMSFunction <- function(site, directory) {

  logDataFrame <- LogHunter(site, directory)
  alertLogData <- logDataFrame[which(logDataFrame[,"Alert Logs"] != "0"),"Alert Logs"]

  allRelevantLogsCombined <- paste(unlist(alertLogData), sep="; ", collapse="; ")
  allRelevantLogsSplit <- strsplit(allRelevantLogsCombined, "; ")



  subPropLogs <- c()
  subPropExists <- 0



  for (i in 1:length(allRelevantLogsSplit[[1]])) {

    row <- allRelevantLogsSplit[[1]][i]


    if (grepl("SUB-PROP", row)) {
      subPropExists <- 1
      subPropLogs <- rbind(subPropLogs, row)
    }

  }




  if (subPropExists == 1) {

    numberOfLanes <- as.numeric(site[1, "Number of Lanes"])

    newColumns    <- matrix("N/A", dim(site)[1], numberOfLanes + 1)
    newDataFrame  <- cbind(site, newColumns)

    colnames(newDataFrame)[dim(newDataFrame)[2]] <- "MSS Subsystem"

    for (k in 1:numberOfLanes) {
      colnames(newDataFrame)[dim(newDataFrame)[2] - k] <- paste("SIG Subsystem (Lane ", (numberOfLanes + 1 - k), ")", sep="")
    }

    year          <- substr(site[,3], 3, 4)
    month         <- formatC(match(site[,4], month.abb), width=2, flag="0")
    day           <- site[,5]
    rowDate       <- paste(day, "/", month, "/", year, sep="")
    rowTime       <- site[, 9]

    fileSite      <- gsub(" ", "",site[1, "Geographic Address"])

    for (i in 1:(dim(subPropLogs)[1])) {

      logSiteWithComma <- strsplit(subPropLogs[i], " ")[[1]][7]
      logSite <- gsub(",", "", logSiteWithComma)
      signalType        <- strsplit(subPropLogs[i], " ")[[1]][3]

      if (grepl(fileSite, logSite) && signalType == "MSS") {

        logTime           <- substr(strsplit(subPropLogs[i], " ")[[1]][2], 1, 5)
        logDate           <- strsplit(subPropLogs[i], " ")[[1]][1]
        index             <- which(rowDate == logDate & rowTime == logTime)

        subPropSplitComma <- strsplit(subPropLogs[i], ", ")[[1]]
        signalType        <- strsplit(subPropLogs[i], " ")[[1]][3]
        signalPresented   <- subPropSplitComma[length(subPropSplitComma) - 1]

        if (signalPresented == "OFF") {
          newDataFrame[index:dim(newDataFrame)[1], "MSS Subsystem"] <- "N/A"
        } else {
          newDataFrame[index:dim(newDataFrame)[1], "MSS Subsystem"] <- signalPresented
        }

      }

      # Adding SIG Subsystem messages for each lane
      if (numberOfLanes > 1) {

        subPropSplitComma <- strsplit(subPropLogs[i], ", ")[[1]]
        signalType        <- strsplit(subPropLogs[i], " ")[[1]][3]
        signalPresented   <- subPropSplitComma[length(subPropSplitComma) - 1]

        for (k in 1:numberOfLanes) {
          specificLane <- paste(fileSite, as.character(k), sep="")

          if (signalType == "SIG" && logSite == specificLane) {

            logTime           <- substr(strsplit(subPropLogs[i], " ")[[1]][2], 1, 5)
            logDate           <- strsplit(subPropLogs[i], " ")[[1]][1]
            index             <- which(rowDate == logDate & rowTime == logTime)

            subPropSplitComma <- strsplit(subPropLogs[i], ", ")[[1]]
            signalType        <- strsplit(subPropLogs[i], " ")[[1]][3]
            signalPresented   <- subPropSplitComma[length(subPropSplitComma) - 1]
            signalPresented   <- subPropSplitComma[length(subPropSplitComma) - 1]

            if (signalPresented == "OFF" || signalPresented == "RE/END") {
              newDataFrame[index:dim(newDataFrame)[1], paste("SIG Subsystem (Lane ", k, ")", sep="")] <- "N/A"
            } else {
              newDataFrame[index:dim(newDataFrame)[1], paste("SIG Subsystem (Lane ", k, ")", sep="")] <- signalPresented
            }

          }

        }



      }



    }
    return(newDataFrame)
  }


  return(site)



}
RACFoundation/oneminutetrafficdata documentation built on May 28, 2019, 2:26 p.m.