# R/JunctionDetection.R In handwriter: Handwriting Analysis in R

```## Junction Detection ##
# Provide skeletonized image from ThinText.
# A black pixel becomes a node if its removal creates exactly one or at least
# three 4-connected black components in its 1-neighborhood.
# Also from Zhang thinning paper (allegedly)

#' countChanges
#'
#' Internal function for counting 4-connected components around a pixel.
#'
#' @param coords coordinates to consider
#' @param img The non-thinned image as binary bit map
#' @return The sum of the 4-connected components around a pixel.
countChanges = function(coords, img)
{
rr = coords
cc = coords
if(rr>1 & cc>1 & rr<dim(img) & cc<dim(img))
{
neighbs = c(t(img[(rr-1):(rr+1),][,(cc-1):(cc+1)]))[c(2,3,6,9,8,7,4,1,2)]
return(sum(neighbs == 1 & c(neighbs[-1], neighbs) == 0))
}
else
{
}
}

#' whichNeighbors
#'
#' Internal function for identifying which neighbors are black.
#'
#' @param coords coordinates to consider
#' @param img The image as a bitmap
#' @return Return a list of which neighbors are a black pixel
whichNeighbors = function(coords, img)
{
rr = coords
cc = coords
neighbs = c(t(img[(rr-1):(rr+1),][,(cc-1):(cc+1)]))[c(2,3,6,9,8,7,4,1)]
yesNeighbs = which(neighbs == 0)
res = as.matrix(rep(0, 8), nrow = 1)
res[yesNeighbs] = 1

return(res)
}

#' whichNeighbors0
#'
#' Internal function for identifying which neighbors are black excluding diagonals
#' to the middle point when a non-diagonal between those two vertices exists.
#'
#' @param coords coordinates to consider
#' @param img The image as a bitmap
#' @return Return a list of which neighbors are a black pixel excluding diagonals to the
#' middle point when a non-diagonal between those two vertices exists.
whichNeighbors0 = function(coords, img)
{
rr = coords
cc = coords
neighbs = c(t(img[(rr-1):(rr+1),][,(cc-1):(cc+1)]))[c(2,3,6,9,8,7,4,1)]
yesNeighbs = which(neighbs == 0)
res = as.matrix(rep(0, 8), nrow = 1)
res[yesNeighbs] = 1

if(res == 1 | res == 1)
res = 0
if(res == 1 | res == 1)
res = 0
if(res == 1 | res == 1)
res = 0
if(res == 1 | res == 1)
res = 0
return(res)
}

#' findMergeNodes
#'
#'Internal function to merge nodes that are very close together.
#'
#' @param skel_graph the skeltonized graph
#' @param mergeMat sets of the nodes to merge into a single nodes
#' @return The merged node
findMergeNodes = function(skel_graph, mergeMat)
{
newNodes = rep(NA, dim(mergeMat))
for(i in 1:dim(mergeMat))
{
fromNode = as.character(format(mergeMat[i,1], scientific = FALSE, trim = TRUE))
toNode = as.character(format(mergeMat[i,2], scientific = FALSE, trim = TRUE))
path = shortest_paths(skel_graph, from = fromNode, to = toNode, weights = E(skel_graph)\$pen_dist)\$vpath[]
len = length(path)
newNodes[i] = as.numeric(names(path[ceiling(len/2)]))
}
return(newNodes)
}

#' AllUniquePaths
#'
#' Internal function for getting a list of all non loop paths in a writing sample.
#'
#' @param graph first skeletonized graph
#' @param graph0 second skeletonized graph
#' @return a list of all non loop paths
{
#Gets all paths that are not loops
#paths = apply(adj, 1, LooplessPaths, graph = graph, graph0 = graph0)
paths = list()
#
{
fromNode = as.character(format(adj[i,1], scientific = FALSE, trim = TRUE))
toNode = as.character(format(adj[i,2], scientific = FALSE, trim = TRUE))

while(shortest.paths(graph0, v = fromNode, to = toNode, weights = E(graph0)\$nodeOnlyDist) < 3 & shortest.paths(graph0, v = fromNode, to = toNode, weights = E(graph0)\$nodeOnlyDist) >= 1)
{
shortest = shortest_paths(graph0, from = fromNode, to = toNode, weights = E(graph0)\$nodeOnlyDist)
len = length(unlist(shortest[]))
paths = c(paths, list(as.numeric(names(shortest\$vpath[]))))
if(len>2)
{
graph = delete.edges(graph, paste0(names(shortest\$vpath[])[len%/%2], "|", names(shortest\$vpath[])[len%/%2+1]))
graph0 = delete.edges(graph0, paste0(names(shortest\$vpath[])[len%/%2], "|", names(shortest\$vpath[])[len%/%2+1]))
}
else if(len == 2)
{
graph0 = delete.edges(graph0, paste0(names(shortest\$vpath[]), "|", names(shortest\$vpath[])))
}
else
stop("There must be some mistake. Single node should have nodeOnlyDist path length of 0.")
}
}

return(paths)
}

#' getLoops
#'
#' Internal function for getting looped paths.
#'
#' @param nodeList A list of all found nodes
#' @param graph first skeletonized graph
#' @param graph0 second skeletonized graph
#' @param pathList The current path list to check for loops
#' @param dims dimensions of the image
#' @return A list of all loops found
#'
#' @importFrom utils combn
getLoops = function(nodeList, graph, graph0, pathList, dims)
{
vertexNames = names(V(graph0))

fullGraph0 = graph0

used = unlist(lapply(pathList, function(x){x[-c(1, length(x))]}))
unused = as.numeric(vertexNames)[which(!(as.numeric(vertexNames) %in% used))]
unusedAdj = matrix(1, ncol = length(unused), nrow = length(unused))
colnames(unusedAdj) = as.character(format(unused, scientific = FALSE, trim = TRUE))
rownames(unusedAdj) = as.character(format(unused, scientific = FALSE, trim = TRUE))
if(length(nodeList) > 1)
{
unusedAdj[,which(unused %in% nodeList)][which(unused %in% nodeList),] = 0
}
else
unusedAdj[which(unused %in% nodeList),which(unused %in% nodeList)] = 0

graph0 = intersection(graph0, unusedGraph, keep.all.vertices = TRUE)
graph = intersection(graph, graph0, byname = TRUE, keep.all.vertices = TRUE)
check = unused[degree(graph0, as.character(format(unused, scientific = FALSE, trim = TRUE))) > 1]
check = check[which(check %in% nodeList)]

loopList = list()

neighbors = neighborhood(graph, nodes = as.character(check))

if(any(unlist(lapply(neighbors, length)) > 3))
{
warning("At least 1 of the nodes in the potential loops has more than 2 neighbors after removal of the connections. Try again! \nThe nodes in question are: \n", dput(names(neighbors)[which(unlist(lapply(neighbors, length)) > 3)]))
}

## Get paths that start and end at the same point, where that point is a node in nodeList
if(length(neighbors) > 0)
{
for(i in 1:length(neighbors))
{
neigh = as.numeric(names(neighbors[[i]]))
graph = delete.edges(graph, paste0(neigh, "|", neigh))
if(distances(graph, v = as.character(neigh), to = as.character(neigh)) < Inf)
{
newPath = as.numeric(names(unlist(shortest_paths(graph, from = format(neigh, scientific = FALSE), to = format(neigh, scientific = FALSE), weights = E(graph)\$pen_dist)\$vpath)))
loopList = append(loopList, list(c(newPath, newPath)))
}
}
}

## Eliminate loop paths that we have found and find ones that dont have vertex on the loop. This is caused by combining of nodes that are close together.
used = as.numeric(unique(c(unlist(pathList), unlist(loopList))))
unused = as.numeric(vertexNames)[which(!(as.numeric(vertexNames) %in% used))]
remaining0 = induced_subgraph(graph0, vids = format(c(unused, nodeList), scientific = FALSE, trim = TRUE))
numNeighbors = lapply(neighborhood(remaining0, nodes = V(remaining0)), length)
remaining0 = induced_subgraph(remaining0, vids = V(remaining0)[numNeighbors > 1])

roots = format(nodeList[which(nodeList %in% names(V(remaining0)))], scientific = FALSE, trim = TRUE)

if(length(roots) > 0)
{
for(i in 1:length(roots))
{
loopPart1 = names(na.omit(dfs(remaining0, roots[i], unreachable = FALSE)\$order))
loopPart2 = shortest_paths(fullGraph0, from = loopPart1[length(loopPart1)], to = roots[i])\$vpath[][-1]
loopList = append(loopList, list(as.numeric(c(loopPart1, names(loopPart2)))))
}
}

## Now get loops that are more difficult. They are close to nodes, but separated by paths already found previously. Have to dig a little further.
remaining0 = induced_subgraph(graph0, vids = format(unused, scientific = FALSE, trim = TRUE))
used = as.numeric(unique(c(unlist(pathList), unlist(loopList))))
unused = as.numeric(vertexNames)[which(!(as.numeric(vertexNames) %in% used))]
if(length(unused) > 0)
{
ends = lapply(neighborhood(fullGraph0, order = 2, nodes = format(unused, scientific = FALSE, trim = TRUE)), function(x) nodeList[which(format(nodeList, scientific = FALSE, trim = TRUE) %in% names(x))])

roots = format(unique(unlist(ends)), scientific = FALSE, trim = TRUE)
if(length(roots) > 0)
{
ends = neighborhood(fullGraph0, order = 2, nodes = roots)
ends = unlist(lapply(ends, function(x) names(x)[which(names(x) %in% format(unused, scientific = FALSE, trim = TRUE))]))

for(i in 1:length(roots))
{
loopPart1 = names(na.omit(dfs(remaining0, ends[i], unreachable = FALSE)\$order))
loopPart2a = shortest_paths(fullGraph0, from = loopPart1, to = roots[i])\$vpath[][-1]
loopPart2b = shortest_paths(fullGraph0, from = loopPart1[length(loopPart1)], to = roots[i])\$vpath[][-1]
loopList = append(loopList, list(as.numeric(c(rev(names(loopPart2a)), loopPart1, names(loopPart2b)))))
}
}
}

## And a little deeper
remaining0 = induced_subgraph(graph0, vids = format(unused, scientific = FALSE, trim = TRUE))
used = as.numeric(unique(c(unlist(pathList), unlist(loopList))))
unused = as.numeric(vertexNames)[which(!(as.numeric(vertexNames) %in% used))]
if(length(unused) > 0)
{
ends = lapply(neighborhood(fullGraph0, order = 3, nodes = format(unused, scientific = FALSE, trim = TRUE)), function(x) nodeList[which(format(nodeList, scientific = FALSE, trim = TRUE) %in% names(x))])

roots = format(unique(unlist(ends)), scientific = FALSE, trim = TRUE)
if(length(roots) > 0)
{
ends = neighborhood(fullGraph0, order = 3, nodes = roots)
ends = unlist(lapply(ends, function(x) names(x)[which(names(x) %in% format(unused, scientific = FALSE, trim = TRUE))]))

for(i in 1:length(roots))
{
loopPart1 = names(na.omit(dfs(remaining0, ends[i], unreachable = FALSE)\$order))
loopPart2a = shortest_paths(fullGraph0, from = loopPart1, to = roots[i])\$vpath[][-1]
loopPart2b = shortest_paths(fullGraph0, from = loopPart1[length(loopPart1)], to = roots[i])\$vpath[][-1]
loopList = append(loopList, list(as.numeric(c(rev(names(loopPart2a)), loopPart1, names(loopPart2b)))))
}
}
}

## All that remains now is perfect loops. Start and end at same point with no intersections or end points.
remaining0 = induced_subgraph(remaining0, vids = V(remaining0)[!(names(V(remaining0)) %in% unlist(loopList))])
while(TRUE)
{
if(length(V(remaining0)) > 0)
{
perfectLoop = names(na.omit(dfs(remaining0, V(remaining0), unreachable = FALSE)\$order))
remaining0 = delete.vertices(remaining0, v = perfectLoop)
loopList = append(loopList, list(as.numeric(c(perfectLoop, perfectLoop))))
}
else break
}

return(loopList)
}

#' checkBreakPoints
#'
#' Internal function called by processHandwriting that eliminates breakpoints based on rules to try to coherently separate letters.
#'
#' @param candidateNodes possible breakpoints
#' @param allPaths list of paths
#' @param nodeGraph graph of nodes; call the getNodeGraph function
#' @param terminalNodes nodes at the endpoints of the graph
#' @param dims graph dimensions
#'
#' @return a graph without breakpoints and separated letters
checkBreakPoints = function(candidateNodes, allPaths, nodeGraph, terminalNodes, dims)
{
#Check rules for candidate breakpoints
breakFlag = rep(TRUE, length(candidateNodes))

for(i in 1:length(allPaths))
{
tempPath = format(allPaths[[i]], scientific = FALSE, trim = TRUE)
nodeChecks = which(candidateNodes %in% tempPath)
tempNodeGraph = delete.edges(nodeGraph, paste0(tempPath, "|", tempPath[length(tempPath)]))

if(distances(tempNodeGraph, v = tempPath, to = tempPath[length(tempPath)]) < Inf)
{
#No breaking on multiple paths between nodes.
breakFlag[nodeChecks] = FALSE
}
else if(any(tempPath %in% terminalNodes))
{
# No break if path has an endpoint
breakFlag[nodeChecks] = FALSE
}
else if(any(which(tempPath %in% c(candidateNodes[nodeChecks])) <= 4 | which(tempPath %in% c(candidateNodes[nodeChecks])) >= length(tempPath) - 3) | length(tempPath) <= 10)
{
#No breaks too close to a vertex
breakFlag[nodeChecks[which(candidateNodes[nodeChecks] <= 5 | candidateNodes[nodeChecks] >= length(tempPath) - 4)]] = FALSE
}
}

return(breakFlag)
}

#' letterPaths
#'
#' Internal function that uses existing breakPoint list to assign letters to the nodes in nodeGraph0.
#'
#' @param allPaths list of every path
#' @param nodeGraph0 graph of all nodes
#' @param breakPoints breakpoint list
#' @return assigned letters to nodes in graph
letterPaths = function(allPaths, nodeGraph0, breakPoints)
{
oldVerts = V(nodeGraph0)\$name
if(any(as.character(format(breakPoints, scientific = FALSE, trim = TRUE)) %in% names(V(nodeGraph0))))
nodeGraph0 = delete_vertices(nodeGraph0, v = as.character(format(breakPoints, scientific = FALSE, trim = TRUE)))
grIDs = rep(NA, length(V(nodeGraph0)))
dists = distances(nodeGraph0, v = names(V(nodeGraph0)), to = names(V(nodeGraph0)), weights = E(nodeGraph0)\$nodeOnlyDist)
vertList = V(nodeGraph0)\$name

grPaths = list()
i = 1
while(length(vertList) > 0)
{
tempIDs = which(dists[which(V(nodeGraph0)\$name == vertList),] < Inf)
grPaths = c(grPaths, list(as.numeric(V(nodeGraph0)\$name[tempIDs])))
#  grIDs[V(nodeGraph0)\$name %in% as.character(grPaths[[i]])] = i
grIDs[tempIDs] = i
vertList = vertList[vertList %in% setdiff(vertList, format(grPaths[[i]], scientific = FALSE, trim = TRUE))]
i = i+1
}
grIDs2 = rep(NA, length(oldVerts))
grIDs2[which(!(oldVerts %in% format(breakPoints, scientific = FALSE, trim = TRUE)))] = grIDs

return(list(grPaths, grIDs2))
}

#' getNodes
#'
#' Detect intersection points of an image thinned with thinImage.
#'
#' @param indices Where to check for intersection at
#' @param dims dimensions of the image
#' @return Returns image matrix. 1 is blank, 0 is a node.
#'
#' @keywords vertex Zhang
getNodes = function(indices, dims)
{
## First, we find endpoints and intersections of skeleton.
img = matrix(1, ncol = dims, nrow = dims)
img[indices] = 0
img.m = cbind(((indices-1) %% dims) + 1, ((indices - 1) %/% dims) + 1)
changeCount = matrix(apply(X = img.m, MARGIN = 1, FUN = countChanges, img = img), byrow = F, nrow = 1)
nodes = matrix(1, dims, dims)
nodes[indices] = ifelse(changeCount == 1 | changeCount >= 3, 0, 1)

## If there is a 2x2 block in the thinned image and none of those pixels are nodes, make one of them a node.
## All will have connectivity of 2. Choose pixel with most neighbors as node. Also make opposite diagonal pixel a node.
## When nodes are combined later this will form 1 node that absorbs all connections.

node2by2fill = function(coords, img)
{
rr = coords
cc = coords

if(img[rr,cc] == 0 & img[rr+1, cc] == 0 & img[rr,cc+1] == 0 & img[rr+1,cc+1] == 0)
{
index2by2 = matrix(c(rr,cc,rr+1, cc, rr,cc+1, rr+1,cc+1), byrow = TRUE, ncol = 2)
numNeighbors = colSums(apply(X = index2by2, MARGIN = 1, FUN = whichNeighbors, img = img))
newNode = index2by2[which.max(numNeighbors),]
oppositeCorner = index2by2[(4:1)[which.max(numNeighbors)],]
return(c(newNode + (newNode - 1)*dim(img), oppositeCorner + (oppositeCorner - 1)*dim(img)))
}
else
return(c(NA,NA))
}

nodes2by2 = t(apply(img.m, 1, FUN = node2by2fill, img = img))
nodes[c(nodes2by2[apply(nodes2by2, 1, function(x){all(!is.na(x))}),])] = 0

return(list(which(nodes == 0), c(indices[changeCount >= 3], c(nodes2by2[apply(nodes2by2, 1, function(x){all(!is.na(x))}),]))))
}

#' processHandwriting
#'
#' Main driver of handwriting processing.
#' Takes in thin image form and the breakpoints suggested by getNodes and parses the writing into letters.
#' Returns final letter separation points, a list of the paths in the image, and a list of the letter paths in the image.
#'
#' @param img Thinned binary image.
#' @param dims Dimensions of thinned binary image.
#' @return Returns a list of length 3. Object [] (breakPoints) is the set of final letter separation points.
#' Object [] (pathList) is a list of the paths between the input specified nodes.
#' Object [] (letters) is a list of the pixels in the different letters in the handwriting sample.
#'
#' @useDynLib handwriter, .registration = TRUE
#'
#' @importFrom Rcpp sourceCpp
#' @importFrom reshape2 melt
#' @importFrom grDevices as.raster
#' @importFrom graphics hist
#' @importFrom stats na.omit
#' @importFrom utils install.packages
#' @import igraph
#'
#' @examples
#' twoSent_document = list()
#' twoSent_document\$image = twoSent
#' twoSent_document\$thin = thinImage(twoSent_document\$image)
#' twoSent_processList = processHandwriting(twoSent_document\$thin, dim(twoSent_document\$image))
#'
#' @export
processHandwriting = function(img, dims){

value <- from <- to <- nodeOnlyDist <- man_dist <- euc_dist <- pen_dist <- NULL

# Next, we have to follow certain rules to find non intersection breakpoints.
message("Starting Processing...")
indices = img
img = matrix(1, nrow = dims, ncol = dims)
img[indices] = 0

message("Getting Nodes...", appendLF = FALSE)
nodeList = getNodes(indices, dims)
nodeConnections = nodeList[]
terminalNodes = nodeList[!(nodeList %in% nodeConnections)]
nodeList = nodeList[]
img.m = cbind(((indices-1) %% dims) + 1, ((indices - 1) %/% dims) + 1)

neighborList = matrix(NA, nrow = indices, ncol = 8)
neighborList = t(apply(as.matrix(img.m, ncol = 2), 1, whichNeighbors, img = img))
graphdf = melt(neighborList)
graphdf = subset(graphdf, value == 1)
graphdf\$from = indices[graphdf\$Var1]
graphdf\$to = graphdf\$from + c(-1, dims-1, dims, dims + 1, 1, 1-dims, -dims, -1-dims)[graphdf\$Var2]
graphdf\$man_dist = rep(c(1,2), 4)[graphdf\$Var2]
graphdf\$euc_dist = c(1,sqrt(2), 1, sqrt(2), 1, sqrt(2), 1, sqrt(2))[graphdf\$Var2]
graphdf\$pen_dist = c(1,3,1,3,1,3,1,3)[graphdf\$Var2]

neighborList0 = matrix(NA, nrow = indices, ncol = 8)
neighborList0 = t(apply(as.matrix(img.m, ncol = 2), 1, whichNeighbors0, img = img))
graphdf0 = melt(neighborList0)
graphdf0 = subset(graphdf0, value == 1)
graphdf0\$from = indices[graphdf0\$Var1]
graphdf0\$to = graphdf0\$from + c(-1, dims-1, dims, dims + 1, 1, 1-dims, -dims, -1-dims)[graphdf0\$Var2]
graphdf0\$nodeOnlyDist = ifelse(graphdf0\$from %in% nodeList | graphdf0\$to %in% nodeList, 1, 0)
graphdf0\$from = as.character(format(graphdf0\$from, scientific = FALSE, trim = TRUE))
graphdf0\$to = as.character(format(graphdf0\$to, scientific = FALSE, trim = TRUE))
graphdf0 = subset(graphdf0, select = c(from, to, nodeOnlyDist))

graphdf\$from = as.character(format(graphdf\$from, scientific = FALSE, trim = TRUE))
graphdf\$to = as.character(format(graphdf\$to, scientific = FALSE, trim = TRUE))
graphdf = subset(graphdf, select = c(from, to, man_dist, euc_dist, pen_dist))

skel_graph = graph_from_data_frame(d = graphdf, vertices = as.character(format(indices, scientific = FALSE, trim = TRUE)), directed = FALSE)
skel_graph0 = graph_from_data_frame(d = graphdf0, vertices = as.character(format(indices, scientific = FALSE, trim = TRUE)), directed = FALSE)
skel_graph = simplify(skel_graph, remove.multiple = TRUE, edge.attr.comb="mean")
skel_graph0 = simplify(skel_graph0, remove.multiple = TRUE, edge.attr.comb="mean")

V(skel_graph)\$color = ifelse(V(skel_graph)\$name %in% nodeList, 1, 0)
V(skel_graph0)\$color = ifelse(V(skel_graph0)\$name %in% nodeList, 1, 0)

terminalNodes = nodeList[!(nodeList %in% nodeConnections)]
dists0 = distances(skel_graph0, v = as.character(format(nodeList, scientific = FALSE, trim = TRUE)), to = as.character(format(nodeList, scientific = FALSE, trim = TRUE)), weights = E(skel_graph0)\$nodeOnlyDist)
adj0 = ifelse(dists0 == 1 | dists0 == 2, 1, 0)

message("attempting to merge them...", appendLF = FALSE)
emergencyBreak = 20;
while(TRUE)
{
originalNodeList = nodeList
distsFull = distances(skel_graph0, v = as.character(format(nodeList, scientific = FALSE, trim = TRUE)), to = as.character(format(nodeList, scientific = FALSE, trim = TRUE)), weights = NA)
distsFull[!upper.tri(distsFull)] = 0
nodesToMerge = which(distsFull <= 2 & distsFull > 0)

rNodes = ((nodesToMerge-1) %% length(nodeList)) + 1
cNodes = ((nodesToMerge-1) %/% length(nodeList)) + 1
mergeSets = cbind(nodeList[rNodes], nodeList[cNodes])
mergeSets = cbind(mergeSets, apply(mergeSets, 1, function(x){all(!(x %in% terminalNodes))}))
mergeSets = mergeSets[mergeSets[,3] == 1,c(1,2)]
mergeSets = matrix(mergeSets, ncol = 2)

if(dim(mergeSets) == 0) break

if(anyDuplicated(c(mergeSets)) > 0)
{
duplicates = which(mergeSets %in% mergeSets[apply(matrix(duplicated(c(mergeSets)), ncol = 2), 1, any)])
rduplicates = ((duplicates - 1) %% dim(mergeSets)) + 1
duplicateDists = distsFull[matrix(as.character(format(mergeSets[rduplicates,], scientific = FALSE, trim = TRUE)), ncol = 2)]
}
newNodes = findMergeNodes(skel_graph, mergeSets)
nodeList = unique(c(nodeList[!(nodeList %in% c(mergeSets[,c(1,2)]))], newNodes))

#At this point have the updated nodeList, if we wanted to break it letter by letter we would do that here

### Migrate connections from original nodes to new nodes
toDelete = NULL
for(i in 1:dim(mergeSets))
{
whichRowCol = which(colnames(adj0) %in% format(mergeSets[i,c(1,2)], scientific = FALSE, trim = TRUE))
newConnectivities = apply(matrix(adj0[whichRowCol,], nrow = length(whichRowCol)), 2, function(x) x == 1 | x == 1)
newConnectivities[is.na(newConnectivities)] = 0

toDelete = c(toDelete, which(rownames(adj0) %in% format(mergeSets[i,c(1,2)], scientific = FALSE, trim = TRUE)))

}
if(length(toDelete) > 0)

emergencyBreak = emergencyBreak - 1
if(emergencyBreak == 0){
warning("Could not merge nodes... stopping execution")
stop()
}

}
message("merged successfully.")

graphdf0 = as_data_frame(skel_graph0)
graphdf0\$nodeOnlyDist = ifelse(graphdf0\$from %in% nodeList | graphdf0\$to %in% nodeList, 1, 0.00001)
skel_graph0 = graph_from_data_frame(graphdf0, directed = FALSE)

message("Finding direct paths...", appendLF = FALSE)

message("and loops...")
loopList = getLoops(nodeList, skel_graph, skel_graph0, pathList, dim(img))

allPaths = append(pathList, loopList)

graphdf0 = as_data_frame(skel_graph0)
graphdf0\$nodeOnlyDist = ifelse(graphdf0\$from %in% nodeList | graphdf0\$to %in% nodeList, 1, 0)
skel_graph0 = graph_from_data_frame(graphdf0, directed = FALSE)

#Nominate and check candidate breakpoints
message("Looking for letter break points...", appendLF = FALSE)
hasTrough = rep(FALSE, length(pathList))
troughNodes = c()
candidateNodes = c()
for(i in 1:length(pathList))
{
# Look for troughs in edges.
tempPath = pathList[[i]]
if(length(tempPath) > 10)
{
rows = ((tempPath-1) %% dims) + 1
for(j in 5:(length(rows)-4))
{
if(any(rows[1:(j-1)] < rows[j]-1) & any(rows[(j+1):length(rows)] < rows[j]-1))
{
lowerEnd = max(which(rows[1:(j-1)] < rows[j]-1))
upperEnd = min(which(rows[(j+1):length(rows)] < rows[j]-1))
if(!any(rows[lowerEnd:(j+upperEnd)] > rows[j]))
{
troughNodes = c(troughNodes, tempPath[j])
hasTrough[i] = TRUE
}
}
}
}
if(hasTrough[i] == FALSE)
{
candidateNodes = c(candidateNodes, tempPath[ceiling(length(tempPath)/2)])
}
}
breaks = which((((troughNodes[-1]-1) %% dims) + 1) != (((troughNodes[-length(troughNodes)] - 1) %% dims) + 1) |
((((troughNodes[-1]-1) %/% dims) + 1) != (((troughNodes[-length(troughNodes)] - 1) %/% dims)) &
(((troughNodes[-1]-1) %/% dims)) != (((troughNodes[-length(troughNodes)] - 1) %/% dims) + 1)))
breaks = c(1, breaks, length(troughNodes))
candidateNodes = c(candidateNodes, troughNodes[ceiling((breaks[-1] + breaks[-length(breaks)])/2)])

goodBreaks = checkBreakPoints(candidateNodes = candidateNodes, allPaths = pathList, nodeGraph = getNodeGraph(pathList, nodeList), terminalNodes = terminalNodes, dims)
preStackBreaks = candidateNodes[goodBreaks]

pathsWithBreaks = lapply(allPaths, function(x){which(x %in% preStackBreaks)})
breaksPerPath = unlist(lapply(pathsWithBreaks, length))
for(i in which(breaksPerPath > 1))
{
newBreak = floor(mean(which(allPaths[[i]] %in% preStackBreaks)))
preStackBreaks = preStackBreaks[which(!(preStackBreaks %in% allPaths[[i]]))]
preStackBreaks = c(preStackBreaks, allPaths[[i]][newBreak])
}

message("Isolating letter paths...")

## Break on breakpoints and group points by which letter they fall into. Adjust graph accordingly.
letterList = letterPaths(allPaths, skel_graph0, preStackBreaks)
letters = letterList[][unlist(lapply(letterList[], length)) > 5]

V(skel_graph0)\$letterID = letterList[]
skel_graph0 = delete.vertices(skel_graph0, V(skel_graph0)[which(V(skel_graph0)\$letterID %in% which(unlist(lapply(letterList[], length)) <= 5))])
V(skel_graph0)\$letterID[!is.na(V(skel_graph0)\$letterID)] = as.numeric(as.factor(na.omit(V(skel_graph0)\$letterID)))

# Remove breakpoints that shouldn't have broken.
finalBreaks = preStackBreaks[!(checkStacking(preStackBreaks, allPaths, letters, skel_graph0, dims))]
finalBreaks = finalBreaks[!(checkSimplicityBreaks(finalBreaks, pathList, loopList, letters, skel_graph0, nodeList, terminalNodes, hasTrough, dims))]

pathsWithBreaks = lapply(allPaths, function(x){which(x %in% preStackBreaks)})
breaksPerPath = unlist(lapply(pathsWithBreaks, length))
for(i in which(breaksPerPath > 0))
{
newNodes = pathsWithBreaks[[i]]
if(allPaths[[i]][newNodes] %in% finalBreaks)
{
E(skel_graph0, P = format(allPaths[[i]][c(newNodes - 2, newNodes - 1)], scientific = FALSE, trim = TRUE))\$nodeOnlyDist = 1
E(skel_graph0, P = format(allPaths[[i]][c(newNodes + 1, newNodes + 2)], scientific = FALSE, trim = TRUE))\$nodeOnlyDist = 1
newNodes = c(newNodes - 1, newNodes + 1)
nodeList = c(nodeList, allPaths[[i]][newNodes])
allPaths[[i]] = list(allPaths[[i]][1:(newNodes)], allPaths[[i]][(newNodes):length(allPaths[[i]])])
}
else
{
letterIDs = range(V(skel_graph0)\$letterID[names(V(skel_graph0)) %in% format(allPaths[[i]], scientific = FALSE, trim = TRUE)], na.rm = TRUE)

V(skel_graph0)\$letterID[which(V(skel_graph0)\$letterID == letterIDs)] = letterIDs
V(skel_graph0)\$letterID[which(names(V(skel_graph0)) %in% format(allPaths[[i]][newNodes], scientific = FALSE, trim = TRUE))] = letterIDs
}
}

allPaths = lapply(rapply(allPaths, enquote, how="unlist"), eval)

message("Organizing letters...")
letters = organize_letters(skel_graph0)

message("Creating letter lists...")
letterList = create_letter_lists(allPaths, letters, nodeList, nodeConnections, terminalNodes, dims)

letterList = add_character_features(img, letterList, letters, dims)

message("Document processing complete.")

return(list(nodes = nodeList, connectingNodes = nodeConnections, terminalNodes = terminalNodes, breakPoints = sort(finalBreaks), letterList = letterList))
}

organize_letters = function(skel_graph0){
letters = replicate(n = length(na.omit(unique(V(skel_graph0)\$letterID))), list())
strs = names(V(skel_graph0))
for(i in 1:length(na.omit(unique(V(skel_graph0)\$letterID))))
{
tmp = as.numeric(as.factor(V(skel_graph0)\$letterID))
letters[[i]] = as.numeric(strs[which(tmp == i)])
}
return(letters)
}

create_letter_lists = function(allPaths, letters, nodeList, nodeConnections, terminalNodes, dims){

#Assign nodes to each letter
nodesinGraph = replicate(length(letters), list(NA))
connectingNodesinGraph = replicate(length(letters), list(NA))
terminalNodesinGraph = replicate(length(letters), list(NA))

for(i in 1:length(letters))
{
nodesinGraph[[i]] = letters[[i]][which(letters[[i]] %in% nodeList)]
connectingNodesinGraph[[i]] = letters[[i]][which(letters[[i]] %in% nodeConnections)]
terminalNodesinGraph[[i]] = letters[[i]][which(letters[[i]] %in% terminalNodes)]
}

letterList = replicate(length(letters), list(path = NA, nodes = NA), simplify=FALSE)
for(i in 1:length(letters))
{
letterList[[i]]\$path = letters[[i]]
#letterList[[i]]\$nodes = nodesinGraph[[i]][nodeOrder[[i]]]
letterList[[i]]\$allPaths = pathLetterAssociate(allPaths,letters[[i]])
}

nodeOrder = replicate(list(), n = length(letters))
decCode = rep("", length(letters))
connectivityScores = replicate(list(), n = length(letters))

getConnectivity = function(pathEndings, nodesSingle)
{
res = rep(NA, length(nodesSingle))
for(j in 1:length(nodesSingle))
{
res[j] = sum(pathEndings == nodesSingle[j])
}
return(res)
}

for(i in 1:length(letters))
{
if(length(nodesinGraph[[i]]) > 0)
{
letterList[[i]]\$adjMatrix = matrix(0,ncol = length(nodesinGraph[[i]]), nrow = length(nodesinGraph[[i]]))

pathStarts = unlist(lapply(letterList[[i]]\$allPaths, function(x)x))
pathEnds = unlist(lapply(letterList[[i]]\$allPaths, function(x)x[length(x)]))

connectivityScores[[i]] = getConnectivity(pathEndings = c(pathStarts, pathEnds), nodesSingle = nodesinGraph[[i]])

nodeOrder[[i]] = getNodeOrder(letters[[i]], nodesinGraph[[i]], connectivityScores[[i]], dims)

nodeSet = nodesinGraph[[i]][order(nodeOrder[[i]])]
for(j in 1:length(pathStarts))
{
if(!(pathStarts[j] %in% nodeSet))
{
warning(paste0("Maybe a loop that didn't merge with node. letterList[[",i,"]]"))
}
else
pathStarts[j] = which(nodeSet == pathStarts[j])

if(!(pathEnds[j] %in% nodeSet))
{
warning(paste0("Maybe a loop that didn't merge with node. letterList[[",i,"]]"))
}
else
pathEnds[j] = which(nodeSet == pathEnds[j])
}
lenBinCode = length(binCode)
binCode = c(rep(0, (-1*lenBinCode)%%4), binCode)
for(j in 1:(length(binCode)/4))
{
decCode[i] = paste0(decCode[i], LETTERS[sum(binCode[(4*(j-1)+1):(4*j)]*2^((4:1) - 1))+1])
}
letterList[[i]]\$letterCode = decCode[i]
letterList[[i]]\$nodes = sort(nodesinGraph[[i]][order(nodeOrder[[i]])])
letterList[[i]]\$connectingNodes = sort(connectingNodesinGraph[[i]][order(nodeOrder[[i]])])
letterList[[i]]\$terminalNodes = sort(terminalNodesinGraph[[i]][order(nodeOrder[[i]])])
colnames(letterList[[i]]\$adjMatrix) = format(letterList[[i]]\$nodes, scientific = FALSE, trim = TRUE)
rownames(letterList[[i]]\$adjMatrix) = format(letterList[[i]]\$nodes, scientific = FALSE, trim = TRUE)
}
else
{
letterList[[i]]\$adjMatrix = matrix(0,ncol = 0, nrow = 0)
letterList[[i]]\$nodes = sort(nodesinGraph[[i]])
letterList[[i]]\$connectingNodes = sort(connectingNodesinGraph[[i]])
letterList[[i]]\$terminalNodes = sort(terminalNodesinGraph[[i]])
letterList[[i]]\$letterCode = "A"
}
}

return(letterList)
}

#'
#' Internal method that adds features to characters
#'
#' @param img thinned binary image
#' @param letterList list containing letter characters
#' @param letters individual characters from letterList
#' @param dims image graph dimensions
#' @return a list of letters with features applied
add_character_features = function(img, letterList, letters, dims){
featureSets = extract_character_features(img, letterList, dims)

for(i in 1:length(letters))
{
letterList[[i]]\$characterFeatures = featureSets[[i]]
}

letterPlaces = matrix(unlist(lapply(featureSets, FUN = function(x) {c(x\$line_number, x\$order_within_line)})), ncol = 2, byrow = TRUE)
letterOrder = order(letterPlaces[,1], letterPlaces[,2])
letterList = letterList[letterOrder]

return(letterList)
}

#' pathLetterAssociate
#'
#' Function associating entries in allPaths to each letter
#'
#' @param allPaths list of paths
#' @param letter individual character
#' @return associated path to each letter
#'
pathLetterAssociate = function(allPaths,letter){
associatedPaths = list()
for(i in 1:length(allPaths)){
if(all(allPaths[[i]] %in% letter)){
associatedPaths = c(associatedPaths,list(allPaths[[i]]))
}
}
return(associatedPaths)
}

#' checkSimplicityBreaks
#'
#' Internal function for removing breakpoints that separate graphs that are too simple to be split. Remove break if graph on left and right of the break have 4 or fewer nodes and no loops or double paths. Never remove break on a trough.
#'
#' @param candidateBreaks possible breakpoints
#' @param pathList list of paths
#' @param loopList list of loops
#' @param letters list of individual letter characters
#' @param nodeGraph0 skeletonized graph
#' @param nodeList list of nodes
#' @param terminalNodes nodes at the ends of letters
#' @param hasTrough wether or not break has a trough
#' @param dims graph dimensions
#' @return removes breakpoints on simple graphs
checkSimplicityBreaks = function(candidateBreaks, pathList, loopList, letters, nodeGraph0, nodeList, terminalNodes, hasTrough, dims)
{
tooSimpleFlag = rep(FALSE, length(candidateBreaks))
for(i in 1:length(pathList))
{
tempPath = pathList[[i]]
nodestoCheck = which(candidateBreaks %in% tempPath)
if(length(nodestoCheck) >= 1)
{
if(!hasTrough[i])
{
pathIndex = which(tempPath == candidateBreaks[nodestoCheck])

borderLetters = c(V(nodeGraph0)\$letterID[which(V(nodeGraph0)\$name == tempPath[pathIndex - 1])],
V(nodeGraph0)\$letterID[which(V(nodeGraph0)\$name == tempPath[pathIndex + 1])])
left = letters[[borderLetters]]
right = letters[[borderLetters]]

nodesOnLeft = sum(nodeList %in% left)
nodesOnRight = sum(nodeList %in% right)
terminalLeft = sum(terminalNodes %in% left)
terminalRight = sum(terminalNodes %in% right)

if(nodesOnLeft == 3 & nodesOnRight == 3 & terminalLeft == 2 & terminalRight == 2)
{
pathsOnLeft = length(pathLetterAssociate(c(pathList, loopList),left))
pathsOnRight = length(pathLetterAssociate(c(pathList, loopList), right))
if(pathsOnLeft == 2 & pathsOnRight == 2)
{
tooSimpleFlag[nodestoCheck] = TRUE
}
}
}
}
}
return(tooSimpleFlag)
}

#' checkStacking
#'
#' Internal function for removing breakpoints that follow all of the rules, but separate two letters that are stacked on top of each other.
#'
#' @param candidateBreaks possible breaks for letterpath
#' @param allPaths list of paths
#' @param letters list of individual letter characters
#' @param nodeGraph0 skeletonized graph
#' @param dims graph dimensions
#' @return stackPtFlag
checkStacking = function(candidateBreaks, allPaths, letters, nodeGraph0, dims)
{
stackPtFlag = rep(FALSE, length(candidateBreaks))

for(i in 1:length(allPaths))
{
tempPath = allPaths[[i]]
tempRow = ((tempPath - 1) %% dims) + 1
tempCol = ((tempPath - 1) %/% dims) + 1
nodeChecks = which(candidateBreaks %in% tempPath)
if(length(nodeChecks) == 1)
{
if(abs((max(tempRow) - min(tempRow))/(max(tempCol) + 1 - min(tempCol))) > 2)
{
stackPtFlag[nodeChecks] = TRUE
}
else
{
pathIndex = which(tempPath == candidateBreaks[nodeChecks])

borderLetters = c(V(nodeGraph0)\$letterID[which(V(nodeGraph0)\$name == tempPath[pathIndex - 1])],
V(nodeGraph0)\$letterID[which(V(nodeGraph0)\$name == tempPath[pathIndex + 1])])
gr1 = letters[[borderLetters]]
gr1Rows = ((gr1 - 1) %% dims) + 1
gr2 = letters[[borderLetters]]
gr2Rows = ((gr2 - 1) %% dims) + 1

# Call a break a stack point if the overlap between the bordering letters is
# less than 10% of the total range of the combined letters.

overlap = min(abs(max(gr1Rows) - min(gr2Rows)), abs(max(gr2Rows) - min(gr1Rows)))
totalRange = (diff(range(c(gr1Rows,gr2Rows))))
overlapPercentage = overlap/totalRange
if(overlapPercentage < .1)
{
stackPtFlag[nodeChecks] = TRUE
}
}
}
}
return(stackPtFlag)
}

#' countNodes
#'
#' Function for counting nodes in a list of letters.
#'
#' @param letterList list containing letter characters
#' @param nodes list of nodes
#' @return number of nodes in letterList
countNodes = function(letterList, nodes)
{
unlist(lapply(letterList, function(x){sum(x %in% nodes)}))
}

#' getNodeGraph
#'
#' Internal function for creating a graph from a path list and node list.
#'
#' @param allPaths list of paths
#' @param nodeList list of nodes
#' @return a graph of nodes
getNodeGraph = function(allPaths, nodeList)
{
nodeGraph = make_empty_graph(directed = FALSE)
nodeGraph = add_vertices(nodeGraph, length(nodeList), name = format(nodeList, scientific = FALSE, trim = TRUE))
for(i in 1:length(allPaths))
{
nodeGraph = add.edges(nodeGraph, format(c(allPaths[[i]], allPaths[[i]][length(allPaths[[i]])]), scientific = FALSE, trim = TRUE))
}
return(nodeGraph)
}

#' getNodeOrder
#'
#' Internal function for ordering nodes in a letter.
#'
#' @param letter letter graph containing nodes to be ordered
#' @param nodesInGraph how many nodes are in the letter
#' @param nodeConnectivity how nodes are connected to each other
#' @param dims graph dimensions
#' @return order of the nodes
getNodeOrder = function(letter, nodesInGraph, nodeConnectivity, dims)
{
toRC = function(nodes, dims)
{
cs = (nodes-1)%/%dims + 1
rs = (nodes-1)%%dims + 1
return(matrix(c(rs,cs), ncol = 2))
}
angleDiff = function(fromIndex, toIndex, dims)
{
vecs = toRC(c(fromIndex, toIndex), dims)
diff = c(vecs[1,1] - vecs[2,1], vecs[2,2] - vecs[1,2])
return(atan2(diff, diff))
}

if(length(nodesInGraph) == 0)
return(nodesInGraph)
else
{
nodeOrder = rep(NA, length(nodesInGraph))

nodeCounter = 1
maxConnectivity = max(nodeConnectivity)

for(i in maxConnectivity:1)
{
thisTier = which(nodeConnectivity == i)
if(length(thisTier) == 1)
{
nodeOrder[thisTier] = nodeCounter
if(i == maxConnectivity)
{
baseNode = nodesInGraph[thisTier]
}
nodeCounter = nodeCounter + 1
}
else if(length(thisTier) > 1)
{
if(i == maxConnectivity)
{
#Left most node is first. If tie, then higher one.
nodeOrder[thisTier] = nodeCounter
nodeCounter = nodeCounter + 1
baseNode = nodesInGraph[thisTier]
thisTier = thisTier[-1]
}
count = 1
angles = rep(NA, length(thisTier))
for(point in nodesInGraph[thisTier])
{
angles[count] = angleDiff(baseNode, point, dims)
count = count + 1
}
angleOrder = order(angles, decreasing = TRUE)
nodeOrder[thisTier[angleOrder]] = nodeCounter:(nodeCounter + length(thisTier) - 1)
nodeCounter = nodeCounter + length(thisTier)
}
}
return(nodeOrder)
}
}
```

## Try the handwriter package in your browser

Any scripts or data that you put into this service are public.

handwriter documentation built on Aug. 16, 2021, 5:07 p.m.