R/get-features-key-functions.R

#' Identify map scale
#'
#' Identify the scale marker on the image. Will either output a list containing the details of the scale identified, or a new feature set object with the raster rescaled as directed by the user. If automatic detection fails, the legend can be selected automatically by the user. (Can be run more than once to rescale map repeatedly if necessary)
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param l Length of horizontal strip of pixels to search for. If the image contains a lot of long horizontal objects, this may need to be increased (default is 17).
#' @return New feature set list containing a rescaled raster of numbered features and a matrix of corresponding feature types.
#' @details Uses a focal raster to find clusters containing large horizontal strips. Function will also rescale map to use life-size coordinate system, but doesn't have to; can be run again at any time to rescale further).
#' @export
#' @examples
#' get.scale(genlis)
#' # will output a new feature set object, with coordinates rescaled
get.scale <- function(site, l = 17) {
    
    ct <- site$feature.types
    cc <- site$features
    cs <- cbind(ct[,1],0)
    
    # recreate binary data from clumps
    r0 <- reclassify(cc, rcl = rbind(cbind(ct[,1], 1), c(NA, 0)))
    
    org.par <- par()
    par(mar = c(0,0,0,0))
    
    if (length(ct[ct[,2] == 2, 1]) == 1) {
        candidate <- ct[ct[,2] == 2, 1]
        cs[candidate, 2] <- 2
        
        # show it on the map
        plot(reclassify(cc, rcl = cs), col = c("Black", "Red"), asp = T, legend = F)
        par(mar = org.par$mar)
    } else {
        # filter to look for long horizontal lines of black cells
        scale.filter <- matrix(c(0,1,0), nrow = 3, ncol = l)
        
        # only picks up the highest-scoring regions
        cf <- table(cc[focal(r0, w = scale.filter, pad = TRUE, padValue = 0) == l])
        
        # find the largest of the highest-scoring regions, set to 2
        candidate <- as.numeric(names(cf)[which.max(cf)])
        reset <- ct[as.numeric(names(cf)[which.max(cf)]),2]
        ct[candidate,2] <- 2 
        cs[candidate,2] <- 2
        
        # show it on the map
        plot(reclassify(cc, rcl = cs), col = c("Black", "Red"), asp = T, legend = F)
        par(mar = org.par$mar)
        
        # get confirmation: is this the right scale item?
        conf <- readline(prompt = "Is the highlighted object the map scale? (y/n) > ")
        if (tolower(substr(conf, 1, 1)) != "y") {
            ct[candidate,2] <- reset
            cs[candidate,2] <- 0
            print("Please click on the scale marker...", quote = F)    
            scale.clump <- click(cc, n = 1, xy = T, id = F, show = F)
            
            # If not on a point, get nearest point 
            if (is.na(scale.clump$value)) {
                coords <- cbind(getValues(cc), xyFromCell(cc, cell = 1:ncell(cc)))
                coords <- coords[!(is.na(coords[,1])), ]
                nn <- knnx.index(coords[,2:3], data.frame(scale.clump$x, scale.clump$y), k = 1)
                candidate <- coords[as.numeric(nn),1]
            } else {
                candidate <- scale.clump$value
            }  
            ct[candidate,2] <- 2
            cs[candidate,2] <- 2
            plot(reclassify(cc, rcl = cs), col = c("Black", "Red"), asp = T, legend = F)
        }
    }           
    # get real length of scale
    m.scale <- "x"
    while (is.na(suppressWarnings(as.numeric(m.scale)))) {
        m.scale <- readline(prompt = "What is the length, in metres, represented by the map scale? > ")
    }
    m.scale <- as.numeric(m.scale)
    
    # update feature found as type 2
    ct <- ct
    
    # get length of scale on plan
    sc <- xyFromCell(cc, 1:ncell(cc))
    sc <- data.frame(sc[(!is.na(getValues(cc))) & (getValues(cc) == candidate),])
    scale.length <- max(sc$x) - min(sc$x)
    sc.check <- min(sc$x) + scale.length
    
    rescale <- readline(prompt = "Do you want to rescale the map coordinates now? > ")
    if (tolower(substr(rescale, 1, 1)) != "y") {
        list(true.length = m.scale, scale.length = scale.length, clump.id = candidate)
    } else {
        scale <- scale.length / m.scale
        cc.xy <- rasterFromXYZ(cbind(xyFromCell(cc, cell = 1:ncell(cc))/ scale,
                                     z = getValues(cc)))
        print("New object 'rescaled' created containing rescaled raster.", quote = F)
        rescaled <<- list(features = cc.xy, feature.types = ct)
    }
}



#' Identify N-S marker on map
#'
#' Identify the N-S marker on the image and calculate the angle of the N-S axis. Since the marker shape varies from plan to plan, it must be selected manually.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @return The angle (in radians) of the N-S axis.
#' @export
#' @examples
#' genlis.NS <- get.NS.axis(genlis)
get.NS.axis <- function(site, show.result = F) {
    
    cc <- site$features
    ct <- site$feature.types
    coords <- xyFromCell(cc, cell = 1:ncell(cc))
    coords <- cbind(getValues(cc), coords)
    coords <- coords[!(is.na(coords[,1])), ]
    
    if (length(ct[ct[,2] == 3, 1]) == 1) {
        # already identified - just need to extract angle
        target <- ct[ct[,2] == 3, 1]
    } else {
        # manually select object to use as marker
        show.features(site)
        print("Please click on the N-S marker...", quote = F)    
        scale.clump <- click(site$features, n = 1, xy = T, id = F, show = F)
        
        # If not on a point, get nearest point 
        if (is.na(scale.clump$value)) {
            nn <- knnx.index(coords[,2:3], data.frame(scale.clump$x, scale.clump$y), k = 1)
            target <- coords[as.numeric(nn),1]
        } else {
            target <- scale.clump$value
        }
    }
    
    # extract points in selected shape
    coords <- coords[coords[,1] == target, 2:3]
    
    # idenfity longest axis
    L <- coords[which.min(coords[,1]),]
    R <- coords[which.max(coords[,1]),]
    B <- coords[which.min(coords[,2]),]
    U <- coords[which.max(coords[,2]),]
    
    h <- sqrt(sum((L-R)^2))
    v <- sqrt(sum((U-B)^2))
    if (v > h) {
        axis <- atan2((U-B)[2], (U-B)[1])
        x.0 <- B[1]; y.0 <- B[2]; l <- v * 0.75
    } else {
        axis <- atan2((R-L)[2], (R-L)[1])
        x.0 <- L[1]; y.0 <- L[2]; l <- h * 0.75
    }
    
    ct[target, 2] <- 3
    
    angle.found <- list(features = site$features, feature.types = ct)
    if (show.result) {
        show.features(angle.found)
        r <- mean(res(site$features)) * 100
        Arrows(x0 = x.0, y0 = y.0, 
               x1 = x.0 + l * cos(axis), y1 = y.0 + l * sin(axis),
               code = 2, lwd = 2, col = "blue")
    }
    NS.marked <<- angle.found
    axis
}



#' Classify sparse shapes
#'
#' Identify shapes that are not compact by calculating the proportion of cells in a bounding square that are coloured, rather than white. Particularly large shapes are marked as linear features, while smaller ones are marked as annotations. Extremely small shapes are also marked as annotations, considering them to be noise.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param density A number from 0 to 1, setting the density threshold below which features are considered 'sparse'. The default is pi/8, based on the assumption that a post-hole is a filled circle, which will take up roughly pi/4 of the cells in a square drawn around it; the threshold is set at half this limit.
#' @details Density cutoff is the approximate midpoint between the density of a solid circle (pi/8) and the density of the shortest possible straight line (being 3 pixels long, so having density 1/3)
#' @param lower Number of cells to be considered as just noise. Any cluster of cells below this size is marked as an annotation. Defaults to 3.
#' @param upper (Optional) Number of cells above which a feature is considered to be a linear feature of the map, rather than an annotation. No linear features will be identified if this is omitted.
#' While we are only looking at post-hole positions, the distinction between annotations and linear features is not very important, and only really exists to speed up processing of functions that look at each annotation in turn.
#' @param plot Boolean indicator: should the new set of features be displayed on a plot?
#' @return New feature set list containing the original raster of numbered features and a matrix of newly assigned feature types.
#' @export
#' @examples
#' exclude.sparse.shapes(genlis)
#' genlis <- sparse.shapes.classified
exclude.sparse.shapes <- function(site, density = 0.55, lower = 3, upper, plot = T) {
    
    # find annotations/smaller linear features: long, thin shapes
    # get sizes of all clumps identified
    cc <- site$features
    fr <- freq(cc)
    fr <- cbind(fr[!is.na(fr[,1]),], sq.density = 0)
    
    # if no limit set for large features, ignore by setting limit just above size of largest feature 
    if (missing(upper)) {upper <- max(fr[,2] + 1)}
    
    xy <- cbind(xyFromCell(cc, cell = 1:ncell(cc)), z = getValues(cc))
    xy <- xy[!is.na(xy[,3]),]
    
    for (i in 1:nrow(fr)) {
        coords <- matrix(xy[xy[,3] == i, ], ncol = 3)
        max.l <- max(length(unique(coords[,1])),length(unique(coords[,2])))
        
        # shape density: if shape was bounded by a square, how much is filled?
        fr[i,3] <- round(fr[i,2] / max.l^2,3)
    }        
    
    ct <- site$feature.types
    
    ct[(ct[,2] <= 1) & (fr[,2] < lower), 2] <- 4
    ct[(ct[,2] <= 1) & (fr[,3] < density), 2] <- 4
    
    # treat large features separately - won't be tested for neighbours later
    ct[(ct[,2] %in% c(0, 4)) & (fr[,2] >= upper), 2] <- 5
    
    sparse.shapes.classified <<- list(features = cc, feature.types = ct)
    if (plot) {show.features(sparse.shapes.classified)
               title (main = "Features reclassified according to size & compactness")}
}



#' Remove annotations
#'
#' Identify horizontal sequences of similarly-sized features as annotations.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param plot Boolean indicator: should the new set of features be displayed on a plot?
#' @param show.process Boolean indicator: show progress bar or not? Defaults to T.
#' @param remove.similar Boolean indicator: If T, after finding all sequences of horizontal features, will also add in any features that are within a 1-cell tolerance of the modal height and width of the features already identified.
#' @return New feature set list containing the original raster of numbered features and a matrix of newly assigned feature types.
#' @export
#' @examples
#' remove.annotations(genlis)
remove.annotations <- function(site, plot = T, show.progress = T, remove.similar = F) {
    
    # identify text in same way as when extending annotations
    find.similar.neighbours <- function(feats, n) {
        xy <- xyFromCell(feats, Which(feats == n, cells=TRUE))
        
        all.x <- unique(xy[,1])
        all.y <- unique(xy[,2])
        baseline <- min(xy[,2])
        r <- res(feats)[2]
        
        block <- cbind(x = rep(all.x, length(all.y)),
                       y = sort(rep(all.y, length(all.x))))
        
        offset.x <- c(rep(max(block[,1]) - min(block[,1]), nrow(block)), rep(0, nrow(block))) 
        
        neighbours <- unique(c(extract(feats, block - offset.x), extract(feats, block + offset.x)))
        neighbours <- neighbours[(!is.na(neighbours)) & (neighbours != n)]
        
        if (length(neighbours) > 0) {
            for (k in 1:length(neighbours)) {
                # check that size is comparable
                xy.chk <- xyFromCell(feats, Which(feats == neighbours[k], cells=TRUE))
                chk.x <- abs(length(unique(xy.chk[,1])) - length(all.x))
                chk.y <- abs(length(unique(xy.chk[,2])) - length(all.y))
                base.chk <- round(abs(min(xy.chk[,2]) - baseline) / r,1)
                if (((chk.x > 3) & (chk.y > 3)) | base.chk > 1) {
                    neighbours[k] <- n
                }
            }
        }
        neighbours
    }
    
    cc <- site$features
    ct <- site$feature.types
    
    unc <- site$feature.types[site$feature.types[,2] <= 1,1]
    cv <- getValues(cc)
    cv[!(cv %in% unc)] <- NA
    cc <- setValues(cc, cv)
    
    B <- length(unc)
    if (show.progress) {pb <- txtProgressBar(min = 0, max = B, style = 3)}
    fsizes <- c()
    xsizes <- c()
    
    while (length(unc) > 0) {
        
        nbs <- unique(c(unc[1], find.similar.neighbours(cc, unc[1])))
        unc <- unc[!(unc %in% nbs)]    
        
        # find all horizontally-neighbouring features & mark as a set of letters 
        if (length(nbs) > 1) {            
            j <- 1
            while (j <= length(nbs)) {
                xy <- xyFromCell(cc, Which(cc == nbs[j], cells=TRUE))
                nbs <- unique(c(nbs, find.similar.neighbours(cc, nbs[j])))
                fsizes <- rbind(fsizes, cbind(ind = nbs[j], x = length(unique(xy[,1])), y = length(unique(xy[,2]))))
                j <- j+1
            }
            ct[nbs, 2] <- 6
        } else {
            xy <- xyFromCell(cc, Which(cc == nbs, cells=TRUE))
            xsizes <- rbind(xsizes, cbind(ind = nbs, x = length(unique(xy[,1])), y = length(unique(xy[,2]))))
        }
        if (show.progress) {setTxtProgressBar(pb, B-length(unc))}
    }
    
    same.size <- xsizes[(findInterval(xsizes[,2], c(as.numeric(names(which.max(table(fsizes[,2])))) - 1,
                                                    as.numeric(names(which.max(table(fsizes[,2])))) + 1)) == 1) &
                            (findInterval(xsizes[,3], c(as.numeric(names(which.max(table(fsizes[,3])))) - 1,
                                                        as.numeric(names(which.max(table(fsizes[,3])))) + 1)) == 1),1]
    
    fr <- freq(cc)
    fr <- fr[fr[,1] %in% fsizes[,1],]
    small <- fr[fr[,2] < as.numeric(names(which.max(table(fr))))/2]
    
    if (remove.similar) {
        ct[same.size, 2] <- 6
    } else {
        same.size <<- same.size
        cat("\n Lists of possible same-size objects and smaller objects created.")
    }
    
    if (plot) {show.features(list(features = site$features, feature.types = ct))
               title(main = "\n Sequences of similar horizontal shapes marked as annotations")}
    if (show.progress) {close(pb)}
    ct[ct[,2] == 6, 2] <- 4
    annotations.removed <<- list(features = site$features, feature.types = ct)
}



#' Remove unusually tall features
#'
#' Calculated the height of all unclassified/post-hole features and finds the threshold for an outlier in the same way as for a boxplot (1.5 x IQR above the upper quartile of the data); any features taller than this are set as annotations.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param plot Boolean indicator: should the new set of features be displayed on a plot?
#' @return New feature set list containing the original raster of numbered features and a matrix of newly assigned feature types.
#' @export
#' @examples
#' remove.tall.features(genlis)
remove.tall.features <- function(site, plot = T) {
    
    cc <- site$features
    ct <- site$feature.types
    
    xy <- data.frame(cbind(id = getValues(cc), xyFromCell(cc, 1:ncell(cc))))
    xy <- xy[!is.na(xy[,1]),]
    
    d <- ddply(xy, .(id), summarise, xm = mean(x), ym = mean(y),
               x.max = max(x), x.min = min(x),
               y.max = max(y), y.min = min(y),
               freq = length(x))
    # add width & height of shape, in cells
    d <- cbind(d, width = (d$x.max - d$x.min) / res(site$features)[1] + 1,
               height = (d$y.max - d$y.min) / res(site$features)[2] + 1)
    
    # get distribution of height & find outlier limit
    d.unc <- d[ct[,2] == 0,]
    l <- quantile(d.unc$height, 0.75) + (1.5 * IQR(d.unc$height))
    
    ct[(ct[,2] == 0) & (d$height > l), 2] <- 6
    
    if (plot) {
        show.features(list(features = cc, feature.types = ct))
        title("Unusually tall features marked as annotations")
    }
    ct[ct[,2] == 6, 2] <- 4
    tall.features.removed <<- list(features = cc, feature.types = ct)
}


#' Identify any features that lie directly between two annotations
#'
#' For each feature classed as an annotation, the function extends the feature's footprint directly left, right, up and down. Any features that appear twice in these extensions (ie. features that are directly horizontally or vertically between two annotation features) are also classed as annotations. Helps with picking up broken line boundaries, dots in text etc.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param plot Boolean indicator: should the new set of features be displayed on a plot?
#' @return New feature set list containing the original raster of numbered features and a matrix of newly assigned feature types.
#' @export
#' @examples
#' extend.annotations(genlis)
extend.annotations <- function(site, plot = T) {
    
    ct <- site$feature.types
    cc <- site$features
    
    # only consider annotation marks
    ann <- ct[ct[,2] == 4,1]
    extensions <- list()
    
    for (i in 1:length(ann)) {
        # create a solid block of points
        xy <- xyFromCell(cc, Which(cc == ann[i], cells=TRUE))
        all.x <- unique(xy[,1])
        all.y <- unique(xy[,2])
        block <- cbind(x = rep(all.x, length(all.y)),
                       y = sort(rep(all.y, length(all.x))))
        
        offset.x <- c(rep(max(block[,1]) - min(block[,1]), nrow(block)), rep(0, nrow(block)))        
        offset.y <- c(rep(0, nrow(block)), rep(max(block[,2]) - min(block[,2]), nrow(block)))
        
        neighbours <- unique(c(extract(cc, block - offset.x), extract(cc, block + offset.x),
                               extract(cc, block - offset.y), extract(cc, block + offset.y)))
        neighbours <- neighbours[(!is.na(neighbours)) & (neighbours != ann[i])]
        extensions[[i]] <- neighbours
    }
    ext <- table(unlist(extensions))
    
    ct[(ct[,1] %in% names(ext[ext == 2])) & (ct[,2] <= 1),2] <- 6
    if (plot) {show.features(list(features = cc, feature.types = ct))
               title (main = "Features directly between two other annotations reclassified as annotations")
    }
    ct[ct[,2] == 6, 2] <- 4
    with.extensions <<- list(features = cc, feature.types = ct)
}



#' Extract centre-points of post-holes
#'
#' For each post-hole feature identified, get the x and y coordinates of the centre of the feature. If no post-holes have been positively identified, treats all unclassified features as post-holes.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param plot Boolean indicator: should the new set of features be displayed on a plot?
#' @return Matrix of coordinates showing the midpoints of the features selected
#' @export
#' @examples
#' get.postholes(genlis)
get.postholes <- function(site, plot = T) {
    cc <- site$features
    ct <- site$feature.types
    
    # get mid-points of all clumps
    xy <- data.frame(cbind(id = getValues(cc), xyFromCell(cc, 1:ncell(cc))))
    mids <- ddply(xy[!is.na(xy$id),], .(id), summarise, xm = mean(x), ym = mean(y))
    
    # filter to include only centroids of post-holes
    # can use 'extract(cc, centres)' to get feature id numbers if necessary
    if (length(ct[ct[,2]==1, 2]) == 0) {
        ct[ct[,2] == 0, 2] <- 1
        final.classification <<- list(features = cc, feature.types = ct)
    }
    centres <<- mids[ct[,2] == 1,2:3]
    if (plot) {
        x.lim <- c(floor(min(xy[,2])/10) * 10, ceiling(max(xy[,2])/10) * 10)
        y.lim <- c(floor(min(xy[,3])/10) * 10, ceiling(max(xy[,3])/10) * 10)
        
        plot(cc, col = "grey", asp = T, legend = F, xlim = x.lim, ylim = y.lim)
        points(centres[,1], centres[,2], pch = 20, cex = 0.5, asp = T, xlim = x.lim, ylim = y.lim)
    }   
}


#' Overlay map showing postholes
#'
#' Plot the site's features, with the midpoints of the identified posthole features overlaid.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param postholes Matrix of coordinates showing the midpoints of the posthole features to be plotted.
#' @export
#' @examples
#' overlay.postholes(genlis)
overlay.postholes <- function(site, postholes) {
    xy <- xyFromCell(site$features, 1:ncell(site$features))
    
    x.lim <- c(floor(min(xy[,1])/10) * 10, ceiling(max(xy[,1])/10) * 10)
    y.lim <- c(floor(min(xy[,2])/10) * 10, ceiling(max(xy[,2])/10) * 10)
    
    plot(site$features, col = "darkgrey", asp = T, legend = F, xlim = x.lim, ylim = y.lim)
    points(postholes[,1], postholes[,2], pch = 20, cex = 0.5, asp = T, xlim = x.lim, ylim = y.lim)
}


#' Identify remote points
#'
#' For a set of x and y coordinates, find the distance to the nearest neighbour. Extremely isolated points (outliers) are defined in the same way as in a boxplot: any points whose nearest-neighbour distance lies more than 1.5 x the IQR above the third quartile.
#' @param pts A two-column matrix containing the coordinates of the points to be compared.
#' @param plot Boolean indicator: should the new set of features be displayed on a plot?
#' @return A Boolean array of length n that can be used to filter points or angles. TRUE indicates that the point is particularly remote from its nearest neighbours; FALSE indicates that point is relatively close to its neighbours.
#' @export
#' @examples
#' nn.filter <- filter.by.distance(centres, plot = T)
filter.by.distance <- function(pts, plot = F) {
    d <- c(knn.dist(pts, k = 1))
    l <- quantile(d, 0.75) + (1.5 * IQR(d))
    
    remote <- d <= l
    if (plot) {
        plot(pts[remote, ], pch = 20, cex = 0.5, asp = T)
        points(pts[!remote, ], col = adjustcolor("red", alpha.f = 0.5), asp = T, pch = 20, cex = 0.5)
    }
    remote
}


#' Fill in broken (-.-) line boundary
#'
#' Find all linear features in site and identify any features lying along their longest axis. Any features aligned to 2 or more linear features are considered to be part of a broken-line boundary.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param plot.progress Boolean indicator: should the function's progress be displayed on a plot?
#' @param s Threshold value between 0 and 1, indicating how 'sparse' a feature must be to be considered as a line. Default is 0.2.
#' @return Vector of ids of features belonging to the boundary.
#' @export
#' @examples
#' # find boundary features
#' fill.broken.boundary(genlis, plot.progress = T)
fill.broken.boundary <- function(site, plot.progress = F, s = 0.2) {
    z <- feature.dims(site)
    
    if (plot.progress) {plot(reclassify(site$features, cbind(z$id, z$density < s)), legend = F)}
    # only look at very sparse features
    sp <- z$id[z$density < s]
    cc <- site$features
    nbs <- c()
    n <- c()
    
    for (i in 1:length(sp)) {
        xy <- xyFromCell(cc, Which(cc == sp[i], cells = TRUE))
        if (plot.progress) {points(xy, pch = 20, cex = 0.2, col = "black")}
        ext <- extent(xy)
        
        # find ends of lines
        if ((ext@xmax - ext@xmin) > (ext@ymax - ext@ymin)) {
            e1 = xy[which.max(xy[,1]),]
            e2 = xy[which.min(xy[,1]),]
        } else {
            e1 = xy[which.max(xy[,2]),]
            e2 = xy[which.min(xy[,2]),]
        }
        l <- sqrt(sum((e1 - e2)^2))
        a <- atan2(e1[2] - e2[2], e1[1] - e2[1])
        adj <- c(l * cos(a), l * sin(a))
        ends <- rbind(e1 + adj, e2 - adj)
        transect <- SpatialLines(list(Lines(list(Line(ends)), ID = 1)))
        if (plot.progress) {lines(transect, col = "blue")}
        tmp <- unique(unlist(extract(cc, transect)))
        nbs <- c(nbs, tmp[!is.na(tmp) & tmp != sp[i]])
        n[i] <- length(tmp[!is.na(tmp) & tmp != sp[i]])
    }
    # boundary contains any features between two or more linear features on list
    # and features with neighbours
    b <- unique(c(as.numeric(rownames(table(nbs))[table(nbs) > 1]), sp[n > 0]))
    
    # remove any unusually large features (outliers)
    l <- (1.5 * IQR(z$freq[b])) + quantile(z$freq[b], 0.75)
    
    boundary <- z$id[z$id %in% b & z$freq <= l]
    site$feature.types[site$feature.types[,1] %in% boundary, 2] <- 4
    boundary.filled <<- site
}


#' Find features whose shape differs after a morphological closing
#'
#' For all unclassified features, removes holes from polygons and performs a morphological closing with radius 1 pixel, and determines which features are unaffected by this process.
#' @param site Feature set list, containing a raster of all features, and a matrix assigning each feature to a particular type.
#' @param progress.bar Boolean: display progress bar or not? Default is F.
#' @param plot.progress Boolean: plot features and colour those that have been checked, or not? Default is F
#' @param rm Radius modifier: how many pixels' widths should be used as a radius for the dilation and erosion processes of the closing? Default is 1. Higher values will lead to a greater degree of smoothing.
#' @return Matrix showing the cluster ID and area before and after the closing operation, and a list of polygon objects representing the features after closing.
#' @export
#' @examples
#' close.features(sparse.shapes.classified, plot.progress = T)
feature.closing <- function(site, progress.bar = F, plot.progress = F, rm = 1) {
    fts <- site$features
    
    # select only potential post-hole candidates for conversion
    cand <- cbind(site$feature.types[,1], site$feature.types[,1])
    cand[site$feature.types[,2] > 0, 2] <- NA
    
    r <- signif(mean(res(fts)),2) * rm
    
    site.poly <- rasterToPolygons(reclassify(fts, cand), dissolve = T)
    if (plot.progress) {plot(site.poly)}
    
    dilated.areas <- matrix(nrow = length(site.poly), ncol = 3)
    closed <- list()
    if (progress.bar) {pb <- txtProgressBar(min = 0, max = length(site.poly), style = 3)}
    
    # support functions (because coercion function has known bug)
    # functions taken from https://stat.ethz.ch/pipermail/r-sig-geo/2009-May/005781.html
    owin2Polygons <- function(x, id="1") {
        stopifnot(is.owin(x))
        x <- as.polygonal(x)
        closering <- function(df) { df[c(seq(nrow(df)), 1), ] }
        pieces <- lapply(x$bdry,
                         function(p) {
                             Polygon(coords=closering(cbind(p$x,p$y)),
                                     hole=is.hole.xypolygon(p))  })
        z <- Polygons(pieces, id)
        return(z)
    }
    
    owin2SP <- function(x) {
        stopifnot(is.owin(x))
        y <- owin2Polygons(x)
        z <- SpatialPolygons(list(y))
        return(z)
    }
    
    # function taken from https://stat.ethz.ch/pipermail/r-sig-geo/2014-January/020139.html
    remove.polygon.holes <- function(SPDF) {
        # SPDF can be a SpatialPolygonsDataFrame or a SpatialPolygons object
        polys <- slot(SPDF, "polygons")
        holes <- lapply(polys, function(x) sapply(slot(x, "Polygons"), slot,
                                                  "hole"))
        res <- lapply(1:length(polys), function(i) slot(polys[[i]],
                                                        "Polygons")[!holes[[i]]])
        IDs <- row.names(SPDF)
        SpatialPolygons(lapply(1:length(res), function(i)
            Polygons(res[[i]], ID=IDs[i])), proj4string=CRS(proj4string(SPDF)))
    }
    
    for (i in 1:length(site.poly)) {
        
        f <- remove.polygon.holes(site.poly[i,])
        closed[[i]] <- closing(as(f, "owin"), r, polygonal = T)
        
        dilated.areas[i,1] <- unique(getValues(fts)[cellFromPolygon(fts, site.poly[i,])[[1]]])
        dilated.areas[i,2] <- length(cellFromPolygon(fts, site.poly[i,])[[1]])
        dilated.areas[i,3] <- length(cellFromPolygon(fts, owin2SP(closed[[i]]))[[1]])
        
        if (plot.progress) {
            if (abs(dilated.areas[i,2] - dilated.areas[i,3]) > 1) {cl = "red"} else {cl = "blue"}
            plot(closed[[i]], col = adjustcolor(cl, alpha.f = 0.5), add = T)
        }
        
        if (progress.bar) {setTxtProgressBar(pb, i)}
    }
    
    fts <- site$feature.types
    fts[fts[,1] %in% dilated.areas[dilated.areas[,3] - dilated.areas[,2] > 1,1],2] <- 4
    features.closed <<- list(features = site$features, feature.types = fts)
    
    list(ft.areas = dilated.areas, closing = closed)
}
ClairBee/AS.preprocessing documentation built on May 6, 2019, 11:50 a.m.