average.columns: Average Columns by Time Interval

Description Usage Arguments Value Author(s) Examples

View source: R/average.columns.R

Description

This function averages the columns of a matrix or a data frame by interval (e.g. seconds, minutes, days) beginning at start.time (must be in POSIXct format). The first column should contain the dates (in POSIXct format). If the data are not more finely discretized than the interval, some NaN values will emerge. NaN will also result if the start.time or end.time are outside the range of the data.

Usage

1
average.columns(data, interval = 3600, interval.type = "sec", start.time = NA, end.time = NA, use.snow = F, num.cpu = 1, weighting = FALSE)

Arguments

data

A matrix or data frame where the first column contains data in POSIXct.

interval

A scalar number for the quantify of units of interval.type to average over. E.g. if interval.type is "sec" and interval is 3600, then hourly averages will be computed (1 hour == 3600 seconds).

interval.type

A character string specifying the units for interval. E.g. whether inteval in seconds or minutes or hours, etc. Valid values for interval.type are "sec", "min", "hour", "day", "month", "year", "DSTday". Use "DSTday" instead of day to avoid daylight savings issues (it will assume all data are in standard time).

start.time

The beginning timestamp desired in the resulting matrix or data.frame.

end.time

The ending timestamp desired in the resulting matrix or data.frame.

use.snow

Logical specifying whether the R Package "snow" be employed to parallelize the computations.

num.cpu

If use.snow is TRUE, this argument is used to define the number of nodes in the snow cluster to create, it is most efficient if this number is no larger than the number of (virtual) CPUS on your machine.

weighting

Logical, if weighting is TRUE, the data will be weighted by the interval of time they represent (1/2 of the sum in the forward/backward looking directions with special cases for the end points). This is only relevant for irregularly spaced data. For example if the average interval is 1 hour and there are 30 regularly spaced samples from the first 30 mintutes of the hour and 1 sample in the second half of the hour (at minute 45), then the first sample would recieve a relative weighting of 1.5, the next 28 samples would each receive a weighting of 1, the 30th sample would receive a weighting of 8, and the last sample would receive a weighting of 22.5.

Value

A matrix or data frame containing the same columns as data, but with rows averaged over the specified time interval.

Author(s)

Colin Sheppard, Schatz Energy Research Center, colin@humboldt.edu

Examples

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
##---- Should be DIRECTLY executable !! ----
##-- ==>  Define data, use random,
##--	or do  help(data=index)  for the standard data sets.

## The function is currently defined as
function (data, interval = 3600, interval.type = "sec", start.time = NA, 
    end.time = NA, use.snow = F, num.cpu = 1, weighting = FALSE) 
{
    if (ncol(data) == 2) {
        data <- cbind(data, array(0, nrow(data)))
        dummy.column.added = T
    }
    else {
        dummy.column.added = F
    }
    time.part.list <- list(sec = 1, min = 2, hour = 3, day = 4, 
        month = 5, year = 6, DSTday = 4)
    if (use.snow) {
        if (interval.type != "sec") {
            logger("Error: use.snow only compatible with interval.type='sec', stopping....", 
                scenario.tag)
            stop()
        }
        library("snow")
        if (exists("cl")) 
            stopCluster(cl)
        rm("cl")
        cl <- makeSOCKcluster(rep("localhost", num.cpu))
        breaks <- seq(1, numrows, by = ceiling(numrows/length(cl)))
        break.pairs <- list()
        for (i in 1:(length(breaks) - 1)) {
            break.pairs[[i]] <- c(breaks[i], breaks[i + 1] - 
                1)
        }
        break.pairs[[i + 1]] <- c(breaks[i + 1], numrows)
        clusterEvalQ(cl, rm(inttimes, data, start.time, interval, 
            average.parallel.helper))
        clusterExport(cl, list("average.parallel.helper"))
        results <- clusterApply(cl, break.pairs, fun = "average.parallel.helper", 
            inttimes, data, start.time, interval)
        for (i in 1:length(results)) {
            avg[break.pairs[[i]][1]:break.pairs[[i]][2], ] <- results[[i]]
        }
        stopCluster(cl)
    }
    else {
        null.time <- c(rep(0, 3), 1, rep(0, 6))
        data.times <- as.POSIXct(data[, 1], origin = "1970-01-01", 
            tz = "PST")
        data.times.secs <- unclass(data[, 1])
        data.times.weights <- matrix(1, nrow = length(data.times.secs))
        if (weighting) {
            time.int <- (data.times.secs[2] - data.times.secs[1])/2
            data.times.weights[1] <- time.int
            time.int <- (data.times.secs[length(data.times.secs)] - 
                data.times.secs[length(data.times.secs) - 1])/2
            data.times.weights[length(data.times.secs)] <- time.int
            for (i in 2:(length(data.times.secs) - 1)) {
                time.int <- (data.times.secs[i] - data.times.secs[i - 
                  1])/2 + (data.times.secs[i + 1] - data.times.secs[i])/2
                data.times.weights[i] <- time.int
            }
        }
        if (is.na(start.time)) {
            start.time <- data[1, 1]
        }
        if (is.na(end.time)) {
            end.time <- data[nrow(data), 1]
        }
        start.avg <- as.POSIXlt(start.time, origin = "1970-01-01", 
            tz = "PST")
        end.avg <- as.POSIXlt(end.time, origin = "1970-01-01", 
            tz = "PST")
        if (interval.type != "sec") {
            time.part <- time.part.list[[interval.type]]
            for (i in 1:(time.part - 1)) {
                start.avg[[i]] <- null.time[i]
            }
        }
        avg.times <- seq.POSIXt(from = start.avg, to = end.avg, 
            by = paste(interval, interval.type, sep = " "))
        numrows <- length(avg.times)
        avg <- as.data.frame(array(NA, dim = c(numrows, ncol(data)), 
            dimnames = list(as.character(1:numrows), names(data))))
        avg[, 1] <- avg.times
        found.indices <- findInterval(data[, 1], avg.times)
        if (!weighting) {
            cat("col:")
            numcols <- ncol(data)
            if (dummy.column.added) 
                numcols <- ncol(data) - 1
            for (col in 2:numcols) {
                cat(paste(col, ",", sep = ""))
                tmp <- mapply(data[, col], found.indices, FUN = mean, 
                  na.rm = T)
                missing.i <- which(!1:numrows %in% found.indices)
                non.missing.begin <- 1
                if (length(missing.i) > 1) {
                  for (j in 2:(length(missing.i) - 1)) {
                    if (non.missing.begin < missing.i[j]) {
                      avg[non.missing.begin:(missing.i[j] - 1), 
                        col] <- tmp[non.missing.begin:(missing.i[j] - 
                        1)]
                    }
                    avg[missing.i[j], col] <- NA
                    non.missing.begin <- missing.i[j] + 1
                  }
                }
                if (non.missing.begin <= numrows) {
                  avg[non.missing.begin:numrows, col] <- tmp[non.missing.begin:numrows]
                }
            }
        }
        else {
            for (i in 1:numrows) {
                begin <- avg.times[i]
                end <- seq.POSIXt(begin, by = paste(interval, 
                  interval.type, sep = " "), length.out = 2)[2]
                active.rows <- which(data.times >= begin & data.times < 
                  end)
                avg[i, 2:ncol(data)] <- apply(data[active.rows, 
                  2:(ncol(data))], MARGIN = 2, FUN = weighted.mean, 
                  na.rm = T, w = data.times.weights[active.rows])
            }
        }
    }
    if (dummy.column.added) {
        return(avg[, 1:(ncol(avg) - 1)])
    }
    else {
        return(avg)
    }
  }

colinsheppard/colinmisc documentation built on July 10, 2020, 5:59 p.m.