# R/plotSoilRelationGraph.R In sharpshootR: A Soil Survey Toolkit

#### Documented in plotSoilRelationGraph

```## Ideas:
# http://stackoverflow.com/questions/9471906/what-are-the-differences-between-community-detection-algorithms-in-igraph

## TODO: community evaluation will crash with most community detection algorithms:
#     examples: plano, miami

## NOTE: dendrogram representation of community structure is only possible with some community detection algorithms

## TODO: investigate some heuristics for layout algorithm selection:
### layout_with_fr works most of the time
### layout_with_lgl works for most large graphs, but not when there are many disconnected sub-graphs

.maximum.spanning.tree <- function(x){
# convert cost representation of weights to "strength"
E(x)\$weight <- -1 * E(x)\$weight
# compute min spanning tree on "strength"
x <- minimum.spanning.tree(x)
# convert "strength" representation of weights back to cost
E(x)\$weight <- -1 * E(x)\$weight
return(x)
}

# dendrogram representation relies on ape plotting functions
# ... are passed onto plot.igraph or plot.phylo
# 2015-12-22: swap layout algorithms when > 20 individuals
plotSoilRelationGraph <- function(m, s='', plot.style='network', graph.mode='upper', spanning.tree=NULL, del.edges=NULL, vertex.scaling.factor=2, edge.scaling.factor=1, edge.transparency=1, edge.col=grey(0.5), edge.highlight.col='royalblue', g.layout=layout_with_fr, ...) {

# dumb hack to make R CMD check happy
weight <- NULL

# generate graph

### TODO ###
## figure out some hueristics for selecting a method: layout_with_fr is almost always the best one

# when there are many clusters, layout_with_lgl doesn't work properly
# switch back to layout_with_fr when > 5
# g.n.clusters <-  clusters(g)\$no

# maybe this can be used as a hueristic as well:
# betweenness(g)

# 	# select layout if not provided
# 	if(missing(g.layout)) {
# 	  if(dim(m)[1] > 20 & g.n.clusters < 5) {
# 	    g.layout <- layout_with_lgl
# 	    message('layout: Large Graph Layout algorithm')
# 	  }
#
# 	  else {
# 	    g.layout <- layout_with_fr
# 	    message('layout: Fruchterman-Reingold algorithm')
# 	  }
#
# 	}
### TODO ###

# optionally prune weak edges less than threshold quantile
if(!is.null(del.edges))
g <- delete.edges(g, E(g) [ weight < quantile(weight, del.edges) ])

# optionally compute min/max spanning tree
if(! is.null(spanning.tree)) {
# min spanning tree: not clear how this is useful
if(spanning.tree == 'min'){
g <- minimum.spanning.tree(g)
}

if(spanning.tree == 'max'){
g <- .maximum.spanning.tree(g)
}

## TODO: this needs more testing
# max spanning tree + edges with weights > n-tile added back
if(is.numeric(spanning.tree)){
# select edges and store weights
es <- E(g) [ weight > quantile(weight, spanning.tree) ]
es.w <- es\$weight
# trap ovious errors
if(length(es.w) < 1)
stop('no edges selected, use a smaller value for `spanning.tree`')

# convert edge sequence to matrix of vertex ids
e <- get.edges(g, es)
# compute max spanning tree
g.m <- .maximum.spanning.tree(g)

## TODO: there must be a better way
# add edges and weight back one at a time
for(i in 1:nrow(e)){
g.m <- add.edges(g.m, e[i, ], weight=es.w[i])
}
# remove duplicate edges
g <- simplify(g.m)
}
}

# transfer names
V(g)\$label <- V(g)\$name

# adjust size of vertex based on sqrt(degree / max(degree))
g.degree <- degree(g)
V(g)\$size <- sqrt(g.degree/max(g.degree)) * 10 * vertex.scaling.factor

# optionally adjust edge width based on weight
if(!missing(edge.scaling.factor))
E(g)\$width <- sqrt(E(g)\$weight) * edge.scaling.factor

## extract communities
# the fast-greedy algorithm is fast, but dosn't work with directed graphs
if(graph.mode == 'directed')
g.com <- cluster_walktrap(g) ## this works OK with directed graphs
else
g.com <- cluster_fast_greedy(g) ## this can crash with some networks

# community metrics
g.com.length <- length(g.com)
g.com.membership <- membership(g.com)

# colors for communities: choose color palette based on number of communities
if(g.com.length <= 9 & g.com.length > 2)
cols <- brewer.pal(n=g.com.length, name='Set1')
if(g.com.length < 3)
cols <- brewer.pal(n=3, name='Set1')
if(g.com.length > 9)
cols <- colorRampPalette(brewer.pal(n=9, name='Set1'))(g.com.length)

# set colors based on community membership
cols.alpha <- alpha(cols, 0.65)
V(g)\$color <- cols.alpha[membership(g.com)]

# get an index to edges associated with series specified in 's'
el <- get.edgelist(g)
idx <- unique(c(which(el[, 1] == s), which(el[, 2] == s)))

# set default edge color
E(g)\$color <- edge.col
# set edge colors based on optional series name to highlight
E(g)\$color[idx] <- alpha(edge.highlight.col, edge.transparency)

# previous coloring of edges based on in/out community
# E(g)\$color <- alpha(c('grey','black')[crossing(g.com, g)+1], edge.transparency)

# generate vector of fonts, highlighting main soil
font.vect <- rep(1, times=length(g.com.membership))
font.vect[which(names(g.com.membership) == s)] <- 2

if(plot.style == 'network') {
set.seed(1010101) # consistant output
plot(g, layout=g.layout, vertex.label.color='black', vertex.label.font=font.vect, ...)
}
if(plot.style == 'dendrogram') {
plot_dendrogram(g.com, mode='phylo', label.offset=0.1, font=font.vect, palette=cols, ...)
}

# invisibly return the graph
invisible(g)
}
```

## Try the sharpshootR package in your browser

Any scripts or data that you put into this service are public.

sharpshootR documentation built on May 2, 2019, 4:46 p.m.