
Defines functions impute.nas.by.surrounding

Documented in impute.nas.by.surrounding

impute.nas.by.surrounding <- function(dataset, window.size=5) {
    # Create a matrix to hold the results.
    #retval=as.data.frame( matrix(0, nrow(dataset), ncol(dataset)) )
    # The following schematic shows the meaning of the variables used in the
    # rest of the function. A (part of a) vector is depicted. "start" indicates
    # the start of the window under scrutiny. "end" indicates the "end" of the
    # window. "offset" indicates the center.
    # |  X  |  X  |  X  |  X  |  X  |  NA  |  X  |  X  |  X  |  X  |
    # |     |     |     |start|     |offset|     | end |     |     |
    # window.radius=(window.size - 1) / 2
    # start =offset - window.radius
    # end   =offset + window.radius
    method <-  median
    window.radius <- (window.size - 1) / 2
    my.seq <- seq(1,nrow(dataset),1)
    # Iterate over the columns (samples) in the dataset.
    for (col.idx in 1:ncol(dataset)) {
        # If the current column contains NAs, process it ...
        if ( length(NAs) > 0 ) {
            cat("Processing column: ", col.idx, sep="", "\n")
            for(i in 1:length(NAs)){
                # We'll only have to impute things if the entry at "offset" happens to
                # be an NA.
                start=offset - window.radius
                end  =offset + window.radius
                # If we're at the beginning of the stream, add a couple of items from
                # the other end until we're at "window.size".
                if (start < 0) {
                    end  =end + abs(start) + 1
                # If we're at the end of the stream, add a couple of items from the
                # other end until we're at "window.size".
                if (end > nrow(dataset)) {
                    start=start - (end - nrow(dataset)) - 1
                    end  =nrow(dataset)
                window=dataset[start:end, col.idx]
                # If there's only a single NA in the window, we can impute it.
                # If not, we'll let it be and remove the row later on (just before
                # we return the dataset).
                if ( sum(NAs.in.window) == 1 ) {
                    # Before computing anything, we need to remove the NA from the
                    # window. Otherwise, it'll screw up computations.
                    dataset[offset, col.idx]=imputed.value
    return( na.omit(dataset) )

#impute.nas.by.surrounding <- function(x, window.size=5)

Try the SIM package in your browser

Any scripts or data that you put into this service are public.

SIM documentation built on Nov. 8, 2020, 4:58 p.m.