#' neuron: class to represent traced neurons
#'
#' neuron objects consist of a list containing multiple fields describing the 3D
#' location and connectivity of points in a traced neuron. The critical fields
#' of a neuron, n, are n$d which contains a dataframe in SWC format and
#' n$SegList which contains a representation of the neuron's topology used for
#' most internal calculations. For historical reasons, n$SegList is limited to a
#' \emph{single fully-connected} tree. If the tree contains multiple unconnected
#' subtrees, then these are stored in n$SubTrees and nTrees will be >1; the
#' "master" subtree (typically the one with the most points) will then be stored
#' in n$SegList and n$NumPoints will refer to the number of points in that
#' subtree, not the whole neuron.
#' @description \code{neuron} makes a neuron object from appropriate variables.
#' @details StartPoint, BranchPoints, EndPoints are indices matching the rows of
#' the vertices in \code{d} \strong{not} arbitrary point numbers typically
#' encoded in \code{d$PointNo}.
#' @rdname neuron
#' @export
#' @family neuron
#' @seealso \code{\link{neuronlist}}
#' @param d matrix of vertices and associated data in SWC format
#' @param NumPoints Number of points in master subtree
#' @param StartPoint,BranchPoints,EndPoints Nodes of the neuron
#' @param SegList List where each element contains the vertex indices for a
#' single segments of the neuron, starting at root.
#' @param SubTrees List of SegLists where a neuron has multiple unconnected
#' trees (e.g. because the soma is not part of the graph, or because the
#' neuronal arbour has been cut.)
#' @param ... Additional fields to be included in neuron. Note that if these
#' include CreatedAt, NodeName, InputFileStat or InputFileMD5, they will
#' override fields of that name that are calculated automatically.
#' @param InputFileName Character vector with path to input file
#' @param NeuronName Character vector containing name of neuron or a function
#' with one argument (the full path) which returns the name. The default
#' (\code{NULL}) sets NeuronName to the file name without the file extension.
#' @param MD5 Logical indicating whether to calculate MD5 hash of input
#' @importFrom tools md5sum
#' @examples
#' ## See help for functions listed in See Also for more detailed examples
#' ## Basic properties
#' # a sample neuron
#' n = Cell07PNs[[1]]
#'
#' # summarise it
#' n
#'
#' # inspect its internal structure
#' str(n)
#' # summary of 3D points
#' summary(xyzmatrix(n))
#' # identify 3d location of endpoints
#' xyzmatrix(n)[endpoints(n),]
#'
#' ## Other methods
#' # plot
#' plot(n)
#' # all methods for neuron objects
#' methods(class = 'neuron')
#'
#' ## Neurons as graphs
#' # convert to graph and find longest paths by number of nodes
#' ng=as.ngraph(n)
#' hist(igraph::distances(ng))
#' # ... or in distances microns
#' ngw=as.ngraph(n, weights=TRUE)
#' hist(igraph::distances(ngw))
#'
#' # converting back and forth between neurons and graphs
#' g=as.ngraph(Cell07PNs[[1]])
#' gstem=igraph::induced.subgraph(g, 1:10)
#' # this is fine
#' plot(gstem)
#' plot(as.neuron(gstem))
#'
#' # but if you had an undirected graph
#' ug=igraph::as.undirected(gstem)
#' # you get a warning because there is no explicit origin for the graph
#' as.neuron(ug)
#'
#' # If you need finer control of the conversion process
#' gstem2=as.ngraph(ug, root = 10)
#' plot(gstem2)
#' plot(as.neuron(gstem2))
neuron<-function(d, NumPoints=nrow(d), StartPoint, BranchPoints=integer(), EndPoints,
SegList, SubTrees=NULL, InputFileName=NULL, NeuronName=NULL, ...,
MD5=TRUE){get
coreFieldOrder=c("NumPoints", "StartPoint", "BranchPoints",
"EndPoints", "nTrees", "NumSegs", "SegList", "SubTrees","d" )
mcl<-as.list(match.call())
n=c(mcl,list(NumPoints=NumPoints,
nTrees=ifelse(is.null(SubTrees),1,length(SubTrees)),
NumSegs=length(SegList)))
n=n[intersect(coreFieldOrder,names(n))]
n=lapply(n, eval)
if(is.null(InputFileName)){
n$NeuronName=NeuronName
} else {
if(is.null(NeuronName)) NeuronName=sub("\\.[^.]+$","",basename(InputFileName))
else if(is.function(NeuronName)) NeuronName=NeuronName(InputFileName)
neuron_extra=list(NeuronName=NeuronName,
InputFileName=InputFileName,
CreatedAt=Sys.time(),
NodeName=Sys.info()["nodename"])
if(file.exists(InputFileName)) {
neuron_extra$InputFileStat=file.info(InputFileName)[1,]
if(MD5) neuron_extra$InputFileMD5=md5sum(InputFileName)
}
n=c(neuron_extra,n)
}
dots=list(...)
if(length(dots)) {
n[names(dots)]=dots
}
as.neuron(n)
}
#' @param x A neuron or other object to test/convert
#' @description \code{is.neuron} will check if an object looks like a neuron.
#' @param Strict Whether to check class of neuron or use a more relaxed
#' definition based on object being a list with a SegList component.
#' @export
#' @rdname neuron
is.neuron<-function(x,Strict=FALSE) {
inherits(x,"neuron") ||
(!Strict && is.list(x) && "SegList" %in% names(x))
}
#' @description \code{as.neuron} will convert a suitable object to a neuron
#' @export
#' @rdname neuron
as.neuron<-function(x, ...) UseMethod('as.neuron')
#' @export
as.neuron.neuron<-function(x, ...) x
#' @rdname neuron
#' @export
#' @details Columns will be ordered c('PointNo','Label','X','Y','Z','W','Parent')
#' @description \code{as.neuron.data.frame} expects a block of SWC format data
as.neuron.data.frame<-function(x, ...) {
x=normalise_swc(x)
as.neuron(as.ngraph(x), vertexData=x, ...)
}
#' Normalise an SWC format block of neuron morphology data
#' @param x A data.frame containing neuron morphology data
#' @param requiredColumns Character vector naming columns we should have
#' @param ifMissing What to do if \code{x} is missing a required column
#' @param includeExtraCols Whether to include any extra columns include in
#' code{x}
#' @param defaultValue A list containing default values to use for any missing
#' columns
#' @return A data.frame containing the normalised block of SWC data with
#' standard columns in standard order.
#' @seealso \code{\link{as.neuron.data.frame}}, \code{\link{seglist2swc}}
#' @details Note that row.names of the resultant data.frame will be set to NULL
#' so that they have completely standard values.
#' @export
normalise_swc<-function(x, requiredColumns=c('PointNo','Label','X','Y','Z','W','Parent'),
ifMissing=c('usedefaults','warning','stop'),
includeExtraCols=TRUE,
defaultValue=list(PointNo=seq.int(nrow(x)),Label=2L,
X=NA_real_,Y=NA_real_,Z=NA_real_,
W=NA_real_,Parent=NA_integer_)
){
cnx=colnames(x)
ifMissing=match.arg(ifMissing)
if(ifMissing!='usedefaults') ifMissing=match.fun(ifMissing)
missingColumns=setdiff(requiredColumns, cnx)
if(length(missingColumns)){
if(is.character(ifMissing)){
x[,missingColumns]=defaultValue[missingColumns]
} else {
ifMissing("Columns ", paste(missingColumns, collapse=","), " are missing from x")
}
}
# if only giving a warning for missing columns we may may be missing some
selectedCols=intersect(requiredColumns, colnames(x))
if(includeExtraCols)
selectedCols=c(selectedCols, setdiff(cnx, requiredColumns))
row.names(x)=NULL
x[,selectedCols]
}
#' Make SegList (and other core fields) from full graph of all nodes and origin
#'
#' @description \code{as.neuron.ngraph} converts a graph (typically an
#' \code{ngraph} object) to a neuron
#' @details Uses a depth first search on the tree to reorder using the given
#' origin.
#' @details When the graph contains multiple subgraphs, only one will be chosen
#' as the master tree and used to construct the SegList of the resultant
#' neuron. However all subgraphs will be listed in the SubTrees element of the
#' neuron and nTrees will be set appropriately.
#' @details When the graph vertices have a label attribute derived from PointNo,
#' the origin is assumed to be specified with respect to the vertex labels
#' rather than the raw vertex ids.
#' @param vertexData A dataframe with SWC fields especially X,Y,Z,W,PointNo,
#' Parent.
#' @param origin Root vertex, matched against names (aka PointNo) when
#' available (see details)
#' @param Verbose Whether to be verbose (default: FALSE)
#' @return A list with elements:
#' (NumPoints,StartPoint,BranchPoints,EndPoints,nTrees,NumSegs,SegList,
#' [SubTrees]) NB SubTrees will only be present when nTrees>1.
#' @export
#' @importFrom igraph V V<- vcount decompose.graph
#' @rdname neuron
#' @seealso \code{\link{graph.dfs}, \link{as.seglist}}
as.neuron.ngraph<-function(x, vertexData=NULL, origin=NULL, Verbose=FALSE, ...){
# translate origin into raw vertex id if necessary
if(length(origin)){
vertex_names=igraph::V(x)$name
if(!is.null(vertex_names)){
origin=match(origin, vertex_names)
if(is.na(origin)) stop("Invalid origin")
}
}
# save original vertex ids
igraph::V(x)$vid=seq.int(igraph::vcount(x))
# check if we have multiple subgraphs
if(igraph::no.clusters(x)>1){
if(!length(origin)){
# no origin specified, will pick the biggest subtree
# decompose into list of subgraphs
gg=igraph::decompose.graph(x)
# reorder by descending number of vertices
gg=gg[order(sapply(gg,igraph::vcount), decreasing=TRUE)]
subtrees=lapply(gg, as.seglist, Verbose=Verbose)
sl=subtrees[[1]]
masterg=gg[[1]]
} else {
# origin specified, subtree containing origin will be the master
cg=igraph::clusters(x)
master_tree_num=cg$membership[origin]
# make a master graph with the vertices from subgraph including origin
masterg=igraph::induced.subgraph(x, which(cg$membership==master_tree_num))
# ... and then corresponding seglist
sl=as.seglist(masterg, origin=origin)
# now deal with remaining vertices
remainderg=igraph::induced.subgraph(x, which(cg$membership!=master_tree_num))
gg=igraph::decompose.graph(remainderg)
# reorder by descending number of vertices
gg=gg[order(sapply(gg,igraph::vcount), decreasing=TRUE)]
subtrees=c(list(sl),lapply(gg, as.seglist, Verbose=Verbose))
}
nTrees=length(subtrees)
} else {
# this is a well-behaved graph that is immediately ready to be master graph
# of neuron
sl=as.seglist(masterg<-x, origin=origin, Verbose=Verbose)
subtrees=list(sl)
nTrees=1
}
if(length(sl)==0 || length(sl[[1]])<2)
stop("Invalid neuron! Must contain at least one segment with 2 points")
# Finalise StartPoint - should always be head point of first segment
StartPoint=sl[[1]][1]
# sort out the vertex information
# TODO refactor this into a separate function e.g. normalise.swc since
# we need to do something similar in as.neuron.dataframe and seglist2swc etc
d=data.frame(PointNo=get.vertex.attribute(x,'name'))
if(is.null(vertexData)){
# get vertex information from graph object
xyz=xyzmatrix(x)
if(!is.null(xyz)) d[,c("X","Y","Z")]=xyz[igraph::V(x),]
diam=V(x)$diam
if(!is.null(diam)) d[, "W"]=diam[igraph::V(x)]
} else {
# we were given a block of vertexData
if("PointNo"%in%colnames(vertexData)){
# to be on the safe side, let's reorder the vertex data so that PointNos
# matches PointNos stored in graph as vertex attributes
ids=match(d$PointNo, vertexData$PointNo)
if(any(is.na(ids)))
stop("Mismatch between PointNos stored in graph and those in vertexData")
d=cbind(d, vertexData[ids,colnames(vertexData)!='PointNo'])
} else {
# the datablock doesn't have a PointNo field so just assume that it
# is ordered according to the vertex indices
if(nrow(d)!=nrow(vertexData))
stop("vertexData does not have PointNo column and does not have as",
"many rows as there are points in the graph.")
d=cbind(d, vertexData)
}
}
d=seglist2swc(x=subtrees,d=d)
d=normalise_swc(d)
n=list(d=d,NumPoints=igraph::vcount(masterg),
StartPoint=StartPoint,
BranchPoints=branchpoints(masterg, original.ids='vid'),
EndPoints=endpoints(masterg, original.ids='vid'),
nTrees=nTrees,
NumSegs=length(sl),
SegList=sl)
if(nTrees>1) n=c(n,list(SubTrees=subtrees))
# NB unname is a guard against named fields coming in.
# The name would otherwise turn into a suffix in the neuron that would cause
# trouble when constructing the neuron
# e.g. InputFileName->InputFileName.XT23L1
if(!missing(...)) n=c(n, lapply(pairlist(...), unname))
do.call(neuron, n)
}
#' @description \code{as.neuron.igraph} will convert an \code{ngraph} compatible
#' \code{\link[igraph]{igraph}} object into a \code{neuron}.
#' @export
#' @rdname neuron
as.neuron.igraph <- function(x, ...) {
must_have=c("X","Y","Z","diam")
if(!all(must_have %in% igraph::vertex_attr_names(x)))
stop("Sorry this does not look like an ngraph! Missing XYZ/diam data!")
as.neuron.ngraph(x, ...)
}
#' @description \code{as.neuron.default} will add class "neuron" to a neuron-like
#' object.
#' @rdname neuron
#' @export
as.neuron.default<-function(x, ...){
if(is.null(x)) return (NULL)
if(is.neuron(x,Strict=FALSE)) class(x)=c("neuron",class(x))
x
}
#' Arithmetic for neuron coordinates
#'
#' If x is a 1-vector or a 3-vector, operate on xyz only
#' If x is a 4-vector, apply operation to xyz and diameter
#' @param e1 a neuron
#' @param e2 (a numeric vector to multiply neuron coords in neuron)
#' @return modified neuron
#' @export
#' @rdname neuron-arithmetic
#' @seealso neuron
#' @examples
#' n1<-Cell07PNs[[1]]*2
#' n2<-Cell07PNs[[1]]*c(2,2,2,1)
#' stopifnot(all.equal(n1,n2))
#' n3<-Cell07PNs[[1]]*c(2,2,4)
Ops.neuron <- function(e1, e2=NULL) {
ok <-
switch(
.Generic,
`-` = ,
`*` = ,
`/` = ,
`+` = ,
`^` = ,
'>' = ,
'<' = ,
'>=' = ,
'<=' = TRUE,
FALSE
)
if (!ok) {
stop(gettextf("%s not meaningful for neurons", sQuote(.Generic)))
}
r=e1
lx=length(e2)
e1 <- if(lx==3) {
t(xyzmatrix(e1))
} else if(lx==4) {
t(cbind(xyzmatrix(e1), e1$d$W))
} else if(lx>1) {
stop("second operand must be a numeric vector of length 0, 1, 3 or 4")
} else {
e1=xyzmatrix(r)
}
# I don't exactly know why it is necessary to change this directly, but if not
# NextMethod dispatches on original class of e1 even when I specify object=
.Class=class(e1)
res <- NextMethod(generic=.Generic)
if(lx==4) {
r$d$W=res[4,]
res=t(res[-4,])
} else if(lx==3){
res=t(res)
}
xyzmatrix(r)=res
r
}
#' Scale and centre neuron 3D coordinates
#'
#' @details If \code{scale=TRUE}, the neuron will be rescaled to unit sd in each
#' axis. If \code{center=TRUE}, the neuron will be centred around the axis
#' means. See \code{base::\link{scale.default}} for additional details.
#' @param x A neuron
#' @param center 3-vector to subtract from x,y,z coords
#' @param scale 3-vector used to divide x,y,z coords
#' @return neuron with scaled coordinates
#' @export
#' @seealso \code{\link{scale.default}}, \code{\link{Ops.neuron}}
#' @aliases scale
#' @examples
#' n1.scaledown=scale(Cell07PNs[[1]],scale=c(2,2,3))
#' n1.scaleup=scale(Cell07PNs[[1]],scale=1/c(2,2,3))
scale.neuron<-function(x, center=TRUE, scale=TRUE){
xyzmatrix(x)<-scale(xyzmatrix(x),scale=scale,center=center)
x
}
#' Check equality on key fields of neuron object
#'
#' @inheritParams base::all.equal.default
#' @param fieldsToCheck Which fields in the neuron are always checked. The
#' special value of \code{NA} indicates that \bold{all} fields in the neurons
#' will be compared.
#' @param fieldsToCheckIfPresent These fields are only checked if they are
#' present
#' @param fieldsToExclude Character vector of fields to exclude from check
#' @param CheckSharedFieldsOnly Logical whether to check shared fields only
#' (default: FALSE)
#' @param ... additional arguments passed to \code{all.equal}
#' @export
#' @seealso \code{\link[base]{all.equal}}
#' @examples
#' x=Cell07PNs[[1]]
#' y=x
#' y$NeuronName='rhubarb'
#' # NOT TRUE
#' all.equal(x, y)
#' # TRUE
#' all.equal(x, y, fieldsToExclude='NeuronName')
all.equal.neuron<-function(target,current,tolerance=1e-6,check.attributes=FALSE,
fieldsToCheck=c("NumPoints", "StartPoint", "BranchPoints",
"EndPoints", "NumSegs", "SegList", "d"),
fieldsToCheckIfPresent=c("NeuronName","nTrees","SubTrees"),
fieldsToExclude=character(),
CheckSharedFieldsOnly=FALSE, ...){
if(length(fieldsToCheck)==1 && is.na(fieldsToCheck))
fieldsToCheck=union(names(current), names(target))
if(!is.neuron(target) || !is.neuron(current))
return ("target and current must both be neurons")
fieldsInCommon=intersect(names(target),names(current))
# figure out which of the optional fields to check are present
fieldsToCheckIfPresent=intersect(fieldsInCommon,fieldsToCheckIfPresent)
# and add those to the fields to check
fieldsToCheck=unique(c(fieldsToCheck,fieldsToCheckIfPresent))
if(CheckSharedFieldsOnly){
fieldsToCheck=intersect(fieldsInCommon,fieldsToCheck)
} else{
# check all core fields
missingfields=setdiff(fieldsToCheck,names(current))
if(length(missingfields)>0)
return(paste("Current missing fields: ",missingfields))
missingfields=setdiff(fieldsToCheck,names(target))
if(length(missingfields)>0)
return(paste("Target missing fields: ",missingfields))
}
fieldsToCheck=setdiff(fieldsToCheck,fieldsToExclude)
all.equal(target[fieldsToCheck],current[fieldsToCheck],
tolerance=tolerance, check.attributes=check.attributes, ...)
}
#' Calculate length of all segments in neuron
#'
#' @param x A neuron
#' @param all Whether to calculate lengths for all segments when there are
#' multiple subtrees (default: \code{FALSE})
#' @param flatten Whether to flatten the lists of lists into a single list when
#' \code{all=TRUE}
#' @param sumsegment Whether to return the length of each segment (when
#' \code{sumsegment=TRUE}, the default) or a list of vectors of lengths of each
#' individual edge in the segment.
#' @details A segment is an unbranched portion of neurite consisting of at least
#' one vertex joined by edges.Only segments in x$SegList will be calculated
#' unless \code{all=TRUE}. Segments containing only one point will have 0
#' length.
#' @return A \code{vector} of lengths for each segment or when
#' \code{sumsegment=FALSE} a \code{list} of vectors
#' @export
#' @seealso \code{\link{as.seglist.neuron}}
#' @examples
#' summary(seglengths(Cell07PNs[[1]]))
#' hist(unlist(seglengths(Cell07PNs[[1]], sumsegment = FALSE)),
#' br=20, main='histogram of edge lengths', xlab='edge lengths /microns')
seglengths=function(x, all=FALSE, flatten=TRUE, sumsegment=TRUE){
# convert to numeric matrix without row names
sts<-as.seglist(x, all=all, flatten=flatten)
if(isTRUE(sumsegment) && use_natcpp()) {
# prefer c implementation
if(isFALSE(all) || isTRUE(flatten)) {
res=natcpp::c_seglengths(sts, x$d$X, x$d$Y, x$d$Z)
} else {
# sts should be a list of lists of vectors
res=lapply(sts, natcpp::c_seglengths, x$d$X, x$d$Y, x$d$Z)
}
return(res)
}
d=data.matrix(x$d[, c("X", "Y", "Z")])
if(all && !flatten) {
lapply(sts,
function(st) sapply(st, function(s) seglength(d[s, , drop=FALSE], sum=sumsegment),
simplify=sumsegment, USE.NAMES = FALSE ))
} else sapply(sts, function(s) seglength(d[s, , drop=FALSE], sum=sumsegment),
simplify=sumsegment, USE.NAMES = FALSE)
}
# Calculate length of single segment in neuron
seglength=function(ThisSeg, sum=TRUE){
#ThisSeg is an array of x,y and z data points
#In order to calculate the length
#Need to find dx,dy,dz
#Then find sqrt(dx^2+...)
#Then sum over the path
if(nrow(ThisSeg)==1) return(0)
ds=diff(ThisSeg)
edgelengths=sqrt(rowSums(ds*ds))
if(sum) sum(edgelengths) else unname(edgelengths)
}
#' Resample an object with a new spacing
#' @param x An object to resample
#' @param ... Additional arguments passed to methods
#' @export
resample<-function(x, ...) UseMethod("resample")
#' resample a neuron with a new spacing
#'
#' @param stepsize The new spacing along the tracing
#' @details \code{resample.neuron} Floating point columns including X,Y,Z,W will
#' be interpolated using linear interpolation, while integer or factor columns
#' will be interpolated using constant interpolation. See \code{\link{approx}}
#' for details.
#' @export
#' @rdname resample
#' @seealso \code{\link{approx}}, \code{\link{seglengths}}
#' @family neuron
resample.neuron<-function(x, stepsize, ...) {
# extract original vertex array before resampling
cols=c("X","Y","Z")
if(!is.null(x$d$W)) cols=c(cols, 'W')
# if(!is.null(x$d$Label)) cols=c(cols, 'Label')
d=data.matrix(x$d[, cols, drop=FALSE])
# if(!is.null(d$Label)) d$Label=as.integer(d$Label)
if(any(is.na(d[,1:3])))
stop("Unable to resample neurons with NA points")
# fetch all segments and process each segment in turn
sl=as.seglist(x, all = T, flatten = T)
npoints=nrow(d)
dl=list(d)
for (i in seq_along(sl)){
s=sl[[i]]
# interpolate this segment
dold=d[s, , drop=FALSE]
dnew=resample_segment(dold, stepsize=stepsize, ...)
if(is.null(dnew)) next
dl[[length(dl)+1]]=dnew
# if we've got here, we need to do something
# add new points to the end of the swc block
# and give them sequential point numbers
newids=seq.int(from = npoints+1, length.out = nrow(dnew))
npoints=npoints+nrow(dnew)
# replace internal ids in segment so that proximal point is connected to head
# and distal point is connected to tail
sl[[i]]=c(s[1], newids, s[length(s)])
}
d=do.call(rbind, dl)
d=as.data.frame(d)
rownames(d)=NULL
# let's deal with the label column which was dropped - assume that always the
# same within a segment
head_idxs=sapply(sl, "[", 1)
seglabels=x$d$Label[head_idxs]
# in order to avoid re-ordering the segments when as.neuron.ngraph is called
# we can renumber the raw indices in the seglist (and therefore the vertices)
# in a strictly ascending sequence based on the seglist
# it is *much* more efficient to compute this on a single vector rather than
# separately on each segment in the seglist. However this does involve some
# gymnastics
usl=unlist(sl)
old_ids=unique(usl)
# reorder vertex information to match this
d=d[old_ids,]
node_per_seg=sapply(sl, length)
df=data.frame(id=usl, seg=rep(seq_along(sl), node_per_seg))
df$newid=match(df$id, old_ids)
sl=split(df$newid, df$seg)
labels_by_seg=rep(seglabels, node_per_seg)
# but there will be some duplicated ids (branch points) that we must remove
d$Label=labels_by_seg[!duplicated(df$newid)]
swc=seglist2swc(sl, d)
as.neuron(swc, origin=match(x$StartPoint, old_ids))
}
#' @export
#' @rdname resample
resample.neuronlist<-function(x, stepsize, ...){
nlapply(x, resample, stepsize=stepsize, ...)
}
# Interpolate ordered 3D points (optionally w diameter)
# NB returns NULL if unchanged (when too short or <=2 points)
# and only returns _internal_ points, omitting the head and tail of a segment
#' @importFrom stats approx
resample_segment<-function(d, stepsize, ...) {
# we must have at least 2 points to resample
if(nrow(d) < 2) return(NULL)
dxyz=xyzmatrix(d)
# we should only resample if the segment is longer than the new stepsize
l=seglength(dxyz)
if(l<=stepsize) return(NULL)
# figure out linear position of new internal points
internalPoints=seq(stepsize, l, by=stepsize)
nInternalPoints=length(internalPoints)
# if the last internal point is actually in exactly the same place
# as the endpoint then discard it
if(internalPoints[nInternalPoints]==l) {
internalPoints=internalPoints[-length(internalPoints)]
nInternalPoints=length(internalPoints)
}
# find cumulative length stopping at each original point on the segment
diffs=diff(dxyz)
cumlength=c(0,cumsum(sqrt(rowSums(diffs*diffs))))
# find 3D position of new internal points
# using linear approximation on existing segments
# make an emty object for results
# will have same type (matrix/data.frame as input)
dnew=matrix(nrow=nInternalPoints, ncol=ncol(d))
colnames(dnew)=colnames(d)
if(is.data.frame(d)){
dnew=as.data.frame(dnew)
}
for(n in seq.int(ncol(dnew))) {
dnew[,n] <- if(!all(is.finite(d[,n]))) {
rep(NA, nInternalPoints)
} else {
approx(cumlength, d[,n], internalPoints,
method = ifelse(is.double(d[,n]), "linear", "constant"),
ties="ordered")$y
}
}
dnew
}
#' Smooth the 3D coordinates of a neuron skeleton
#'
#' \code{smooth_neuron} smooths a neuron.
#' @param n Neuron to smooth
#' @param method Smoothing method
#' @param ... Additional parameters passed to segment smoothing functions
#'
#' @return A new neuron with smoothed 3d coordinates
#' @export
#'
#' @examples
#' ns=smooth_neuron(Cell07PNs[[1]], sigma=2)
#' # plot in 2D zooming in on axon terminals
#' plot(Cell07PNs[[1]], col='grey', xlim=c(260,290), ylim=c(115,90))
#' plot(ns, col='red', add=TRUE)
#' \donttest{
#' # 3D plot
#' plot3d(Cell07PNs[[1]], col='grey')
#' plot3d(ns, col='red')
#' }
smooth_neuron <- function(n, method=c("gauss", "spline"), ...) {
method=match.arg(method)
FUN=get(paste0('smooth_segment_', method), mode='function')
# iterate over segments
d=xyzmatrix(n)
if(any(is.na(d[,1:3])))
stop("Unable to resample neurons with NA points")
# fetch all segments and process each segment in turn
sl=as.seglist(n, all = T, flatten = T)
for (i in seq_along(sl)){
s=sl[[i]]
# interpolate this segment
d[s,]=FUN(d[s, , drop=FALSE], ...)
}
xyzmatrix(n) <- d
n
}
#' @rdname smooth_neuron
#' @param xyz A block of 3D coordinates defining an unbranched segment
#' @param sigma The standard deviation of the Gaussian smoothing kernel (which
#' has the same spatial units as the object being smoothed)
#' @importFrom stats dnorm
smooth_segment_gauss <- function(xyz, sigma, ...){
if(nrow(xyz)<2) return(xyz)
# make variable t as the cumulative position along segment
t=c(0,cumsum(seglength(xyz, sum = F)))
xyzt=xyz
for(i in 2:(nrow(xyz)-1)){
weights=dnorm(abs(t-t[i]), sd = sigma)
weights=weights/sum(weights)
xyzt[i,]=colSums(xyz*weights)
}
xyzt
}
#' @importFrom stats smooth.spline
smooth_segment_spline <- function(xyz, ...) {
if(nrow(xyz)<4) return(xyz)
# make variable t as the cumulative position along segment
t=c(0,cumsum(seglength(xyz, sum = F)))
# ensure that ends are fixed
w=rep(1,nrow(xyz))
w[1]=1e6
w[length(w)]=w[1]
fittedxyz=apply(xyz, 2, function(u) smooth.spline(t, u, w=w, ...)$y)
fittedxyz
}
#' Subset neuron by keeping only vertices that match given conditions
#'
#' @details \code{subset} defines which vertices of the neuron to keep and is
#' one of \itemize{
#'
#' \item logical or numeric indices, in which case these are simply used to
#' index the vertices in the order of the data.frame \code{x$d}. Note that any
#' NA values are ignored.
#'
#' \item a function (which is called with the 3D points array and returns T/F
#' vector)
#'
#' \item an expression evaluated in the context of the \code{x$d} data.frame
#' containing the SWC specification of the points and connectivity of the
#' neuron. This can therefore refer e.g. to the X,Y,Z location of vertices in
#' the neuron.
#'
#' }
#'
#' Note that due to its use of
#' \href{http://adv-r.had.co.nz/Computing-on-the-language.html}{non-standard
#' evaluation} \code{subset.neuron}, which is convenient interactive use but
#' can be fragile when used inside other functions. If you run into trouble it
#' is recommended to use the underlying \code{\link{prune_vertices}} function.
#' @param x A neuron object
#' @param subset A subset of points defined by indices, an expression, or a
#' function (see Details)
#' @param invert Whether to invert the subset criteria - a convenience when
#' selecting by function or indices.
#' @param ... Additional parameters (passed on to \code{\link{prune_vertices}})
#' @return subsetted neuron
#' @export
#' @seealso \code{\link{prune.neuron}}, \code{\link{prune_vertices}},
#' \code{\link{subset.dotprops}}
#' @examples
#' n=Cell07PNs[[1]]
#' # keep vertices if their X location is > 2000
#' n1=subset(n, X>200)
#' # diameter of neurite >1
#' n2=subset(n, W>1)
#' # first 50 nodes
#' n3=subset(n, 1:50)
#' # everything but first 50 nodes
#' n4=subset(n, 1:50, invert=TRUE)
#'
#' ## subset neuron by graph structure
#' # first plot neuron and show the point that we will use to divide the neuron
#' n=Cell07PNs[[1]]
#' plot(n)
#' # this neuron has a tag defining a point at which the neuron enters a brain
#' # region (AxonLHEP = Axon Lateral Horn Entry Point)
#' points(t(xyzmatrix(n)[n$AxonLHEP, 1:2]), pch=19, cex=2.5)
#'
#' # now find the points downstream (distal) of that with respect to the root
#' ng=as.ngraph(n)
#' # use a depth first search
#' distal_points=igraph::graph.dfs(ng, root=n$AxonLHEP, unreachable=FALSE,
#' mode='out')$order
#' distal_tree=subset(n, distal_points)
#' plot(distal_tree, add=TRUE, col='red', lwd=2)
#'
#' # Find proximal tree as well
#' # nb this does not include the AxonLHEP itself as defined here
#' proximal_points=setdiff(igraph::V(ng), distal_points)
#' proximal_tree=subset(n, proximal_points)
#' plot(proximal_tree, add=TRUE, col='blue', lwd=2)
#'
#' \dontrun{
#' ## subset using interactively defined spatial regions
#' plot3d(n)
#' # nb you can save this select3d object using save or saveRDS functions
#' # for future non-interactive use
#' s3d=select3d()
#' n4=subset(n, s3d(xyzmatrix(n)))
#' # special case of previous version
#' n5=subset(n, s3d)
#' stopifnot(all.equal(n4,n5))
#' # keep the points that were removed from n1
#' n4.not=subset(n,Negate(s3d))
#' # vertices with x position > 100 and inside the selector function
#' n6=subset(n,X>100 & s3d(X,Y,Z))
#'
#' ## subset each neuron object in a whole neuronlist
#' n10=Cell07PNs[1:10]
#' plot3d(n10, lwd=0.5, col='grey')
#' n10.crop = nlapply(n10, subset, X>250)
#' plot3d(n10.crop, col='red')
#'
#' ## subset a neuron using a surface
#' library(nat.flybrains)
#' # extract left lateral horn surface and convert to mesh3d
#' lh=as.mesh3d(subset(IS2NP.surf, "LH_L"))
#' # subset neuron with this surface
#' x=subset(Cell07PNs[[1]], function(x) pointsinside(x, lh))
#' shade3d(lh, alpha=0.3)
#' plot3d(x, lwd=3, col='blue')
#' # Now find the parts of the neuron outside the surface
#' y=subset(Cell07PNs[[1]], function(x) Negate(pointsinside)(x, lh))
#' plot3d(y, col='red', lwd=2)
#' }
#' @family neuron
subset.neuron<-function(x, subset, invert=FALSE, ...){
e <- substitute(subset)
r <- eval(e, x$d, parent.frame())
if (!is.logical(r) && !is.numeric(r)) {
# a function that tells us whether a point is in or out
if(is.function(r)) r=subset(x$d[,c("X","Y","Z")])
else stop("Cannot evaluate subset")
}
if(is.logical(r)) {
r <- r & !is.na(r)
r <- which(r)
} else if(is.numeric(r)) {
r=r[!is.na(r)]
} else stop("Subset must evaluate to a logical or numeric index")
# nb !invert since prune_vertices drops vertices whereas subset.neuron keeps vertices
prune_vertices(x, r, invert=!invert, ...)
}
#' Simplify a neuron to the longest tree with n branch points
#'
#' @details If the neuron already contains fewer than or exactly the requested
#' number of branches, then the original neuron is returned. The approach is
#' to build up the new neuron starting from the longest tree including no
#' branches all the way up to the longest tree containing n branches. The
#' distance calculations are only carried out once so it should be reasonably
#' efficient. Nevertheless at each iteration, the longest path from the tree
#' so far to the newly selected leaf is calculated and it is likely that this
#' step could be avoided. Furthermore for large values of n, pruning excess
#' branches rather than building would presumably be more efficient.
#'
#' @param x A \code{\link[nat]{neuron}} to simplify
#' @param n Required number of branch points (default=1, minimum 0)
#' @param invert Whether to keep the simplified backbone (when
#' \code{invert=FALSE}, the default) or its inverse.
#' @param ... Additional arguments (currently ignored)
#'
#' @return The simplified \code{neuron} or the untouched original neuron for
#' neurons that have <=n branch points.
#' @author Gregory Jefferis \email{jefferis@gmail.com}
#' @export
#' @seealso \code{\link[nat]{spine}}
#' @examples
#' \donttest{
#' n=Cell07PNs[['ECA34L']]
#' n.simp=simplify_neuron(n)
#' n.simp4=simplify_neuron(n, n=4)
#'
#' plot(n, col='green', WithNodes = FALSE)
#' plot(n.simp, col='red', add = TRUE)
#' plot(n.simp4, col='blue', add = TRUE)
#'
#' # calculate the inverse as well
#' n.simp4.inv=simplify_neuron(n, n=4, invert=TRUE)
#' plot(n.simp4, col='blue')
#' plot(n.simp4.inv, col='red', add = TRUE)
#' }
#'
#' # 3D plots
#' \dontrun{
#' nclear3d()
#' plot3d(n.simp, col='red', add = TRUE)
#' plot3d(n.simp4, col='blue', add = TRUE)
#' plot3d(n, col='green', WithNodes = FALSE)
#' }
#'
#' # or with plotly where transparency works
#' \dontrun{
#' op <- options(nat.plotengine = 'plotly')
#' nclear3d()
#' plot3d(n.simp, col='red', alpha = 0.5, add = TRUE)
#' plot3d(n.simp4, col='blue', alpha = 0.5, add = TRUE)
#' plot3d(n, col='green', alpha = 0.5, WithNodes = FALSE)
#' }
#'
simplify_neuron <- function(x, n=1, invert=FALSE, ...) {
#Step 1a:Get the number of branch points in the neuron..
bps = branchpoints(x, original.ids=FALSE)
nbps=length(bps)
#Step 1b:Compare with the actual branch points requested..
if (nbps <= n)
return(x)
if (n < 0)
stop("Must request >=0 branch points!")
#Step 2:Convert to ngraph object..
ng = as.ngraph(x, weights = T)
if (!igraph::is_dag(ng)) {
stop("I can't simplify neurons with cycles!")
}
# Step 3a: Compute all the leaf nodes..
leaves=setdiff(endpoints(ng, original.ids=FALSE), rootpoints(ng, original.ids=FALSE))
start = rootpoints(ng, original.ids=FALSE)
# Step 3b: Compute downstream paths for all the branch nodes
bpdesccount=igraph::ego_size(ng, order = 1, nodes = bps, mode='out', mindist = 1)
names(bpdesccount)=bps
bpsused=rep(0L, length(bps))
names(bpsused)=bps
lp_verts=list()
# Find the longest tree (aka spine) from the root to the farthest leaf node
# Then add the longest possible additional branch at each iteration
for (i in 0:n) {
if (i == 0) {
# Step 4: Find path to farthest leaf from root
dists=igraph::distances(ng, v=start, to=leaves, mode = 'out')
path=leafpath(ng, from=start, to=leaves[which.max(dists)])
} else {
# Step 5: find farthest leaf from root (with some edges set to 0)
dists=igraph::distances(ng, v=start, to=leaves, mode = 'out')
maxidx=which.max(dists)
if(dists[maxidx]==0) {
warning("unable to find a new path at step:", i)
break
}
farthest_leaf=leaves[maxidx]
# now search backwards from the farthest leaf to the bps in the spine
# restricting to those that still have unused downstream paths.
bps_available = bpsused > 0 & bpsused < bpdesccount
# there will be warnings because most branch points will not be reachable
# from the selected leaf
res=suppressWarnings(
igraph::get.shortest.paths(ng, from = farthest_leaf,
to = bps[bps_available],
mode = "in", weights = NA))
pathlengths=sapply(res$vpath, length)
pathlengths[pathlengths==0]=NA
chosen_path=which.min(pathlengths)
if(length(chosen_path)==0) {
warning("unable to find a path back to spine at step:", i)
break
}
path=rev(res$vpath[[chosen_path]])
}
# set edge weights on path to 0 so that these edges no longer contribute to
# the distance to the leaves in subsequent iterations
igraph::E(ng, path=path)$weight=0
lp_verts[[i+1]]=path
# add one to count of any bps used
bpsused[bps %in% path] = bpsused[bps %in% path] + 1
}
# Step 10: Find the edgelist for the tree defined by the list of paths
el=EdgeListFromSegList(lp_verts)
# Step 11: Prune the original neuron, optionally retaining the inverse
prune_edges(ng, el, invert = !invert)
}
leafpath <- function(ng, from, to) {
res=igraph::get.shortest.paths(ng,from = from,to = to,mode = "out", weights=NA)
as.integer(res$vpath[[1]])
}
robust_max=function(x) {
x=x[is.finite(x)]
if(length(x)) max(x) else {
warning("Some points in neuron cannot be reached! Multiple trees?")
-Inf
}
}
#' Stitch multiple fragments into single neuron using minimum spanning tree
#'
#' @details The neurons are joined using the minimum spanning tree i.e. the tree
#' that minimises the sum of edge weights (here, the Euclidean distance). The
#' neuron is rooted in the largest cluster; if this cluster contained the
#' original root of the neuron, then this should be retained.
#'
#' Note that when a threshold length is used, it must be specified in the same
#' units (microns, nm etc) as the neuron being stitched.
#'
#' If you wish to process a neuronlist containing multiple neurons each of
#' which must have all their subtrees stitched then you must use
#' \code{\link{nlapply}} to process the list iteratively.
#' @param x Fragments that could be a \code{\link{neuronlist}} containing
#' multiple neurons or a single neuron with multiple unconnected subtrees.
#' @param threshold The threshold distance above which new vertices will not be
#' connected (default=\code{Inf} disables this feature). This parameter
#' prevents the merging of vertices that are so far away from the main neuron
#' that they are likely to be spurious.
#' @param k The number of nearest neighbours to consider when trying to merge
#' different clusters.
#' @return A single \code{neuron} object containing all input fragments.
#' @author Sridhar Jagannathan \email{j.sridharrajan@gmail.com}
#' @seealso \code{\link{simplify_neuron}}, \code{\link{spine}},
#' \code{\link{ngraph}}, \code{igraph::\link[igraph]{mst}}
#' @export
#' @examples
#' n=Cell07PNs[['ECA34L']]
#' # find the tree with the 10 most important branches
#' n_main=simplify_neuron(n, n = 10)
#' # and the complement
#' n_branches=simplify_neuron(n, n = 10, invert = TRUE)
#'
#' # plot the inputs
#' plot(n_main, col='red', WithNodes=FALSE)
#' plot(n_branches, col='blue', add=TRUE, WithNodes=FALSE)
#'
#' # join the two fragments back together again
#' # first make a neuronlist containing the two fragments
#' nl=neuronlist(n_main, n_branches)
#' # and stitch those
#' n_stitched=stitch_neurons_mst(nl)
#'
#' \dontrun{
#' # look at the neurons in 3D - they appear identical in this case
#' plot3d(n, alpha=.5, col='cyan', WithNodes=FALSE)
#' plot3d(n_stitched, alpha=.5, col='red', WithNodes=FALSE)
#' }
#'
#' ## second example neuron containing multiple sub trees
#' n=Cell07PNs[['ECA34L']]
#' # remove a single vertex, breaking the neuron in two
#' # note that the root (purple node) has moved
#' np=prune_vertices(n, 105)
#' plot(np)
#'
#' # now stitch the broken neuron back together again
#' nph=stitch_neurons_mst(np)
#' # note that the root remains the same as the input neuron (np)
#' plot(nph)
stitch_neurons_mst <- function(x, threshold = Inf, k=10L) {
#Step 1: First check if the input is fragmented and then proceed further..
if(is.neuron(x)){
if(x$nTrees == 1){
return(x)
}
}
#Step 2: Check input and convert to ngraph object..
if(is.neuronlist(x)){
if(length(x)==0) return(NULL)
if(length(x)==1) return(x[[1]])
for (baseidx in 1:(length(x)-1)){
for (targetidx in (baseidx+1):length(x)) {
# if there are any repeats in PointNo, augment those in subsequent neuron
if(any(x[[baseidx]]$d$PointNo%in%x[[targetidx]]$d$PointNo)){
x[[targetidx]]$d$PointNo=x[[targetidx]]$d$PointNo+max(x[[baseidx]]$d$PointNo)
x[[targetidx]]$d$Parent=x[[targetidx]]$d$Parent+max(x[[baseidx]]$d$PointNo)
}
}
}
#Convert the neuronlist to list of ngraph objects..
ngraph_list=nlapply(x, FUN = function(x) {as.ngraph(x, weights = T, method = 'seglist')})
#Make a single ngraph object..
ng=as.ngraph(igraph::disjoint_union(ngraph_list))
} else if(is.neuron(x)) {
ng = as.ngraph(x, weights = T)
} else {
stop("x must be a neuronlist or a neuron object!")
}
#make a copy of the master graph now..
masterng <- ng
#Step 3a: Find the rootnode of the largest cluster, this will be the rootnode of the stitched neuron..
cc=igraph::components(ng)
sorted=order(cc$csize, decreasing = T)
root_points <- rootpoints(ng, original.ids = FALSE)
master_root <- names(which(cc$membership[root_points] == sorted[1]))
#Step 3b: Set the weights of existing edges to zero, so they are not affected by
#any operations done below..
igraph::E(ng)$weight = 0
#Step 4: Find all the leaf nodes now, these are the potential sites to stitch..
root_id <- which(names(V(ng)) == master_root)
leaves = 1:length(V(ng))#actually use all of them including the root
#Step 5: Create list of edges that will be added (from potential sites..) and compute the distances between them..
#Now divide them into clusters and compute combinations among them..
cc_membership <- unique(cc$membership)
combedge_start <- NULL
combedge_stop <- NULL
xyz <- xyzmatrix(x) #for using with knn..
ccsort=order(cc$csize, decreasing = TRUE)
for (i in 1:(length(cc_membership)-1)){
for (j in (i+1):length(cc_membership)) {
clusterbaseidx=ccsort[i]
clustertargetidx=ccsort[j]
leaves_base <- intersect(leaves, which(cc$membership == clusterbaseidx))
leaves_target <- intersect(leaves, which(cc$membership == clustertargetidx))
#take the locations of pts in base leaf and target leaf..
base_pt=xyz[leaves_base,]
target_pt=xyz[leaves_target,]
#find the pts nearest in base leaf to the target leaf..
minval <- min(k,length(leaves_base),length(leaves_target))
if(minval == 1){#For processing point inputs..
if(!is.matrix(base_pt)){base_pt = t(base_pt)}
if(!is.matrix(target_pt)){target_pt = t(target_pt)}
}
nnres=nabor::knn(base_pt, target_pt, k=minval) #Get k nearest pts in base_pt
sortedvals <- sort(nnres$nn.dists,index.return = T)
sortedidx <- arrayInd(sortedvals$ix, dim(nnres$nn.dists))
#The first column here is coresponds to 'leaves_target', the second to 'leaves_base'
targetpt_idx = sortedidx[1:minval,1]
basept_idx_temp = sortedidx[1:minval,2]
basept_idx = NULL
for (bpidx in 1:length(basept_idx_temp)){
basept_idx = c(basept_idx,nnres$nn.idx[targetpt_idx[bpidx],basept_idx_temp[bpidx]])
}
trunc_base <- leaves_base[basept_idx]
trunc_target <- leaves_target[targetpt_idx]
#based on the nearest points in base cluster, but all pts in target cluster..
leaves_combo <- expand.grid(trunc_base,leaves_target)
#Only based on actual nearest points in both clusters..
#leaves_combo <- cbind(trunc_base,trunc_target)
#leaves_combo <- expand.grid(leaves_base,leaves_target)
combedge_start <- c(combedge_start,leaves_combo[,1])
combedge_stop <- c(combedge_stop,leaves_combo[,2])
}
}
#edge_list <- utils::combn(leaves,m =2)
edge_list <- rbind(combedge_start,combedge_stop)
starts<-edge_list[1,]
stops<-edge_list[2,]
# nb drop = FALSE to ensure that we always have a matrix
vecs=xyz[stops, , drop=FALSE] - xyz[starts, , drop=FALSE]
weights=sqrt(rowSums(vecs*vecs))
#Step 6: Add those edges and create a new graph..
mod_graph <- igraph::add_edges(ng,edge_list,"weight"= weights)
#Step 7: Find the minimum spanning tree of the new graph..
mst <- igraph::minimum.spanning.tree(mod_graph)
#Step 8: Find the new edges added by the mst..
new_edges <- igraph::difference(igraph::E(mst),igraph::E(masterng))
newedge_list <- igraph::as_ids(new_edges)
rawel <- igraph::ends(mst,new_edges)
weight_attr <- igraph::edge_attr(graph = mst, 'weight', index = new_edges)
nodenames <- names(igraph::V(masterng))
#Step 9: Now add the new edges in the master graph vertex by vertex..
stitchedng <- masterng
for (idx in 1:nrow(rawel)) {
vertex_ids <- match(rawel[idx, ], nodenames)
if (isTRUE(is.finite(threshold))){
#Choose only those edges that are below a certain threshold..
if(weight_attr[idx]<=threshold){
stitchedng <- igraph::add_edges(stitchedng, c(vertex_ids[[1]], vertex_ids[[2]]),
"weight" = weight_attr[idx])
}else{
warning(paste0("Could not connect two vertices as edge length",
round(weight_attr[idx],digits = 2), " is above threshold"))
}
} else {
stitchedng <- igraph::add_edges(stitchedng, c(vertex_ids[[1]], vertex_ids[[2]]),
"weight" = weight_attr[idx])
}
}
#Step 10: Now keep only vertices that are connected to the master root and cross verify the same..
cc_stitched=igraph::components(stitchedng)
stitchedroot_id <- which(names(V(stitchedng)) == master_root)
verticestoprune <- which(cc_stitched$membership != cc_stitched$membership[stitchedroot_id])
sorted=order(cc_stitched$csize, decreasing = T)
if(sorted[1] != cc_stitched$membership[stitchedroot_id]){
warning("The root node doesn't belong to the largest fragment")
}
stitchedng=igraph::delete.vertices(stitchedng, verticestoprune)
#Step 11: Set the root of the stitched graph now..
stitchedneuron <- as.neuron(stitchedng, origin = master_root)
if(stitchedneuron$nTrees!= 1){
stop('The neuron being returned has multiple fragments')}
stitchedneuron
}
#' Stitch two neurons together at their closest endpoint
#'
#' @param a,b Neurons to join together
#' @details This function joins two neurons at their nearest point (only one).
#' Let's say you have two neurons a and b. a and b will be joined at one point that are closest to each other.
#' However, when say there are multiple points at a and b which are closer and could be joined,
#' then do not use this function, use the function \code{\link{stitch_neurons_mst}},
#' which is slower but will merge at multiple points.
#' Note that for CATMAID neurons the neuron with the soma tag will be
#' treated as the first (master neuron). Furthermore in this case the PointNo
#' (aka node id) should already be unique. Otherwise it will be adjusted to
#' ensure this.
#' @author Gregory Jefferis \email{jefferis@gmail.com}
#' @export
#' @seealso \code{\link{stitch_neurons}}
#' @examples
#' \donttest{
#' dl1_main=simplify_neuron(dl1neuron, n = 1, invert = FALSE)
#' dl1_branches=simplify_neuron(dl1neuron, n = 1, invert = TRUE)
#' dl1_whole = stitch_neuron(dl1_main,dl1_branches)
#' }
stitch_neuron<-function(a, b){
#Step1: if there are any repeats in PointNo, augment those in second neuron
if(any(a$d$PointNo%in%b$d$PointNo)){
b$d$PointNo=b$d$PointNo+max(a$d$PointNo)
b$d$Parent=b$d$Parent+max(a$d$PointNo)
}
#Step2: Convert them to ngraph objects and make a single graph(that can have multiple subtrees)
ag=as.ngraph(a, weights = T, method = 'seglist')
bg=as.ngraph(b, weights = T, method = 'seglist')
abg=as.ngraph(igraph::disjoint_union(ag, bg))
#Step3: find closest node (or endpoint?) in each neuron and join those
ce=closest_ends(a, b) #b neuron is the query neuron let's say has 3 pts and a neuron has 230 pts, then we
#end up with a 230x3 matrix and find out the closest point in b neuron
a_pointno=a$d$PointNo[ce$a_idx]
b_pointno=b$d$PointNo[ce$b_idx]
# older versions of nat use label for nodes, newer use name
node_label=intersect(c("name","label"),
igraph::list.vertex.attributes(ag))[1]
if(all(is.na(node_label))) stop("Graph nodes are not labelled!")
abg=abg+igraph::edge(which(igraph::vertex_attr(abg, node_label)==a_pointno),
which(igraph::vertex_attr(abg, node_label)==b_pointno))
#Step4: Convert them back to neuron..
as.neuron(as.ngraph(abg))
}
# Find raw vertex ids and distance for closest end points of two neurons
closest_ends<-function(a, b){
epa=endpoints(a)
epb=endpoints(b)
axyz=a$d[epa,c("X","Y","Z")]
bxyz=b$d[epb,c("X","Y","Z")]
nnres=nabor::knn(axyz, bxyz, k=1)
b_idx=which.min(nnres$nn.dists)
a_idx=nnres$nn.idx[b_idx,1]
return(list(a_idx=epa[a_idx], b_idx=epb[b_idx], dist=min(nnres$nn.dists)))
}
#' Stitch multiple fragments into single neuron using nearest endpoints
#'
#' @details Neurons will be ordered by default such the largest (by node count)
#' neuron with a soma tag is the \code{master} neuron - i.e. the one
#' containing the root node. Fragments are joined recursively in this sort
#' order each time simply picking the closest fragment to the current
#' \emph{master}. Closest is here defined by the distance between nearest
#' endpoints.
#' @param x A neuronlist containing fragments of a single neuron
#' @param prefer_soma When TRUE (the default) the fragment tagged as the soma
#' will be used as the master neuron.
#' @param sort When TRUE (the default) the fragments will be sorted by the
#' number of nodes they contain before stitching.
#' @param warndist If distance is greater than this value, create a warning.
#' @return A single \code{neuron} object containing all input fragments.
#' @importFrom igraph E set_edge_attr
#' @seealso \code{\link{stitch_neuron}}
#' @author Gregory Jefferis \email{jefferis@gmail.com}
#' @export
#' @examples
#' \dontrun{
#' dl1_main=simplify_neuron(dl1neuron, n = 1, invert = FALSE)
#' dl1_branches=simplify_neuron(dl1neuron, n = 1, invert = TRUE)
#' dl1_branches1=simplify_neuron(dl1_branches, n = 1, invert = FALSE)
#' dl1_branches2=simplify_neuron(dl1_branches, n = 1, invert = TRUE)
#' dl1_fragment <- list(dl1_main,dl1_branches1,dl1_branches2)
#' dl1_fragment <- as.neuronlist(dl1_fragment)
#' dl1_whole = stitch_neurons(dl1_fragment)
#' }
stitch_neurons <- function(x, prefer_soma=TRUE, sort=TRUE, warndist=1000) {
#Step1: Check if it is neuronlist
if(!is.neuronlist(x)) stop("x must be a neuronlist object!")
if(length(x)==0) return(NULL)
if(length(x)==1) return(x[[1]]) # as function promises to return a neuron
#Step2a: Check if there is a soma tag associated (this will be considered as base or master neuron)
if(prefer_soma) {
svec=sapply(x, has_soma)
} else {
svec=rep(0,length(x))
}
#Step2b: If sort is enabled then you sort by number of nodes, so that the neuron with the largest number of nodes
#becomes the base or master neuron
if(sort){
nnodes=sapply(x, function(n) nrow(n$d))
eps=1/(max(nnodes)+1)
svec=(eps+svec)*nnodes #just normalising here such that the highest one gets 1 and others are weighted by it..
}
#Step3: Sort the neurons in the list so you can get a base or master neuron (the first element in the vector)
if(any(svec>0))
x=x[order(svec, decreasing = TRUE)]
#Step4: if there are only two neurons, just merge them by their closest points by knn algoirthm
if(length(x)==2) return(stitch_neuron(x[[1]], x[[2]]))
#Step4a: if there are more than two neurons, just merge them by their closest points by knn algoirthm
dists=sapply(x[-1], function(n) closest_ends(x[[1]], n)$dist) #find the closest fragment to the base fragment..
mindist=min(dists)
if(isTRUE(is.finite(warndist)) && mindist>warndist){
warning("Suspicious minimum distance between fragments ( ",mindist, ")!")
}
chosen=which.min(dists)+1 #find the index of the closest to the base fragment..
#Step4b: stitch the base fragment to the closest fragment..
x[[1]]=stitch_neuron(x[[1]], x[[chosen]])
#Step4c: iteratively do this process by removing the already merged fragment..
stitch_neurons(x[-chosen], prefer_soma=FALSE, sort=FALSE)
}
has_soma<-function(x){
!is.null(x$tags$soma)
}
#' Return indices of points in a neuron distal to a given node
#'
#' @description This function returns a list (containing the order of nodes) travelled using a depth
#' first search starting from the given node.
#' @param x A neuron
#' @param node.idx,node.pointno The id(s) of node(s) from which distal points
#' will be selected. \code{node.idx} defines the integer index (counting from
#' 1) into the neuron's point array whereas \code{node.pointno} matches the
#' PointNo column which will be the CATMAID id for a node.
#' @param root.idx,root.pointno The root node of the neuron for the purpose of
#' selection. You will rarely need to change this from the default value. See
#' \code{node} argument for the difference between \code{root.idx} and
#' \code{root.pointno} forms.
#' @return Integer 1-based indices into the point array of points that are
#' distal to the specified node(s) when traversing the neuron from the root to
#' that node. Will be a vector if only one node is specified, otherwise a list is returned
#' @export
#' @seealso \code{\link[nat]{subset.neuron}}, \code{\link[nat]{prune}}
#' @examples
#' \donttest{
#' ## Use EM finished DL1 projection neuron
#'
#' ## subset to part of neuron distal to a tag "SCHLEGEL_LH"
#' # nb distal_to can accept either the PointNo vertex id or raw index as a
#' # pivot point
#' dl1.lh=subset(dl1neuron, distal_to(dl1neuron,
#' node.pointno = dl1neuron$tags$SCHLEGEL_LH))
#' plot(dl1neuron,col='blue', WithNodes = FALSE)
#' plot(dl1.lh, col='red', WithNodes = FALSE, add=TRUE)
#' }
distal_to <- function(x, node.idx=NULL, node.pointno=NULL, root.idx=NULL,
root.pointno=NULL) {
#Step1: Check the node.idx and node.pointno argument and map them to node.idx..
if(is.null(node.idx)) {
if(is.null(node.pointno))
stop("At least one of node.idx or node.pointno must be supplied")
node.idx=match(node.pointno, x$d$PointNo)
if(any(is.na(node.idx)))
stop("One or more invalid node.pointno. Should match entries in x$d$PointNo!")
}
#Step2: Convert the neuron to ngraph object and reroot it if root.idx or root.pointno is given..
if(is.null(root.idx) && is.null(root.pointno)) {
g=as.ngraph(x)
} else {
if(!is.null(root.pointno))
root.idx=match(root.pointno, x$d$PointNo)
if(length(root.idx)>1)
stop("A single unique root point must be supplied")
# we need to re-root the graph onto the supplied root
gorig=as.ngraph(x)
g=as.directed.usingroot(gorig, root = root.idx)
}
#Step3: For each node id, travese the graph from the given node using depth first search and return the visited
#nodes..
l=sapply(node.idx, dfs_traversal, g, simplify = FALSE)
if(length(node.idx)==1) l[[1]] else l
}
dfs_traversal <- function(x, g) {
gdfs=igraph::dfs(g, root = x, unreachable = FALSE)
as.integer(gdfs$order)[!is.na(gdfs$order)]
}
#' @description \code{prune_twigs} will prune twigs less than a certain path length from a neuron
#' @export
#' @rdname prune_twigs
prune_twigs<-function(x, ...) UseMethod('prune_twigs')
#' Remove all twigs less than a certain path length from a neuron
#'
#' @param x A \code{\link{neuron}} or \code{\link{neuronlist}} object
#' @param twig_length Twigs shorter than this will be pruned
#' @param ... Additional arguments passed to \code{\link{nlapply}},
#' \code{\link{prune_vertices}} and eventually \code{\link{as.ngraph}}.
#'
#' @author Gregory Jefferis \email{jefferis@gmail.com}
#' @export
#' @rdname prune_twigs
#' @examples
#' # Prune twigs up to 5 microns long
#' pt5=prune_twigs(Cell07PNs[1:3], twig_length = 5)
#' # compare original (coloured) and pruned (black) neurons
#' plot(Cell07PNs[1:3], WithNodes=FALSE, lwd=2, xlim=c(240,300), ylim=c(120, 90))
#' plot(pt5, WithNodes=FALSE, add=TRUE, lwd=2, col='black')
prune_twigs.neuron <- function(x, twig_length, ...) {
#Step1: Convert the neuron to ngraph object..
ng = as.ngraph(x, weights = T)
if (!igraph::is_dag(ng)) {
stop("I can't prune neurons with cycles!")
}
root <- rootpoints(ng, original.ids=FALSE)
if(length(root)!=1)
stop("I can't prune neurons with more than 1 root!")
#Step2: Make a table with rows(branch pts) and columns (leaves), the item entry would be the distance..
#This will help us identify the leaves (end pts) that are farthest away from the branch pts and eliminate them..
#Step2a: Compute the leaves and branchpoints..
leaves=setdiff(endpoints(ng, original.ids=FALSE), root)
bps=branchpoints(ng, original.ids=FALSE)
#Step2b: Compute the table where rows(branch points) and columns (leaves)..
dd=igraph::distances(ng, v=bps, to=leaves, mode = 'out')
#Step3: For each column(leaf) set the bps that are farther than twig_length as -1, if all the bps are farther
#then set the particular column as NA..Then each column find the index of the element(which is the bps)
#that is farthest..
max_bp_idx <- apply(dd, 2, function(x) {
# set values that are too long to signalling length of -1
too_long=x>twig_length
if(all(too_long)) return(NA_integer_)
x[too_long]=-1
wm=which.max(x) #here choose the leaf that has the max length to prune
if(is.finite(x[wm]) & x[wm]>0) wm else NA_integer_
})
#Step4: Now take only leaves that have finite values (which are acceptable eps that have less than the twig_length)
# and also compute their corresponding starting pts(which are bps)
eps_to_prune <- leaves[is.finite(max_bp_idx)]
bps_to_start <- bps[max_bp_idx[is.finite(max_bp_idx)]]
#Step5: Now we need a structure that will hold the bps and eps that we want to keep, the easiest approach
#would be to simply remove the eps we don't need by removing path to those eps, but the path may
#actually pass through other bps that we want to keep, so we need to isolate those bps we want to keep
#and those eps we want to keep
#For e.g. we also want to keep paths up until the branch points proximal to
# the twigs that we are pruning - even if we prune all the twigs
# downstream of those branch points
#Step5a: Find the eps that we want to keep
eps_to_keep <- setdiff(leaves, eps_to_prune)
#Step5b: Also find the bps that we want to keep and collate them to nodes that we want to keep...
nodes_to_keep=c(eps_to_keep, bps_to_start)
#Step5c: Now compute the paths from the root to all the nodes that we want to keep..
# note that mode = 'o' should be fine(as we have a directed graph from root), but mode 'all' is probably safer
res2 <-
igraph::shortest_paths(
ng,
from = root,
to = nodes_to_keep,
mode = 'all',
weights = NA
)
#Step5d:Now compute the vertices you want to keep and prune the rest of the vertices
verts_to_keep=unique(unlist(res2$vpath))
# now we can basically prune everything not in that set
prune_vertices(ng, verts_to_keep, invert=TRUE, ...)
}
#' @param ... Additional arguments passed to methods
#' @export
#' @rdname reroot
reroot <- function(x, ...) UseMethod('reroot')
#' Reroot neurons
#'
#' Change the root node of a neuron (typically denoting the soma) to a new node
#' specified by a node index, identifier or an XYZ position.
#'
#' @details All neurons in the natverse have a root point, which is used for
#' during many operations on the branching structure of the neuron. This will
#' often correspond to the soma of a neuron, but the soma is not always
#' present and sometimes its position may be unknown. For example some
#' connectomics datasets will have a certain position on a neuron marked as
#' \code{to soma} when the soma is not present in the reconstruction but it is
#' known to which branch it is attached.
#'
#' The root point of a neuron is stored in the \code{StartPoint} field of the
#' neuron (see Examples) and can also be accessed using the
#' \code{\link{rootpoints}} function. For further details, please consult the
#' \href{http://natverse.org/nat/articles/neurons-as-graph.html}{Neurons as
#' graph structures} vignette. As an extension to the original nat
#' specification, the point identifier (not point index) of the anatomical
#' soma can be stored in the \code{tags$soma} field of the neuron
#'
#' The node index refers is a number between 1 and N, the number of points in
#' the neuron. It provides an index into the point array. The node id is an
#' arbitrary identifier which may sometime be the same as the index, but may
#' be e.g. a 64 bit integer that uniquely identifies nodes across all neurons
#' in a database. Node ids can be retained after neurons are pruned even if
#' the indices for each point change. For further details, again see the
#' vignette mentioned above.
#'
#' @param x A \code{\link{neuron}} or \code{\link{neuronlist}} object
#' @param idx index of the node for the new root (between 1 and the number of
#' nodes in the neuron).
#' @param pointno new root node identifier (i.e. the \code{PointNo} column in
#' the point array of the neuron, see details).
#' @param point 3-vector with X,Y,Z coordinates (data.frame or Nx3 matrix for
#' neuronlist)
#'
#' @return neuron with a new root position (unless \code{idx}, \code{pointno},
#' and \code{point} are all \code{NULL}, when the original neuron is
#' returned).
#' @export
#' @rdname reroot
#' @examples
#' newCell07PN <- reroot(Cell07PNs[[2]], 5)
#' newCell07PN$StartPoint # 5
reroot.neuron <- function(x, idx=NULL, pointno=NULL, point=NULL, ...) {
if (sum(c(!is.null(idx), !is.null(point), !is.null(pointno)))!=1)
stop("Exactly one argument (idx, pointno, point) must be specified.")
if (!is.null(idx)) {
nid <- x$d$PointNo[idx]
} else if (!is.null(pointno)) {
if(any(is.na(match(pointno, x$d$PointNo))))
stop("One or more invalid pointno. Should match entries in x$d$PointNo!")
nid <- pointno
}
else {
if ((is.matrix(point) && nrow(point) != 3) || !(is.numeric(point) && length(point) == 3))
stop("Wrong point format, see docs!")
nidx <- which.min((x$d$X-point[[1]])^2+(x$d$Y-point[[2]])^2+(x$d$Z-point[[3]])^2)
nid <- x$d$PointNo[nidx]
}
as.neuron(as.ngraph(x), origin = nid)
}
#' @export
#' @rdname reroot
reroot.neuronlist<-function(x, idx=NULL, pointno=NULL, point=NULL, ...){
if (sum(c(!is.null(idx), !is.null(point), !is.null(pointno)))!=1)
stop("Exactly one argument (idx, point, pointno) must be specified.")
if (!is.null(idx)) {
if (length(idx) == 1)
res <- nlapply(x, reroot, idx=idx)
else if (length(idx) == length(x))
res <- nmapply(reroot, x, idx=idx)
else
stop("Number of indices must be 1, or equal to the number of neurons.")
}
if (!is.null(pointno)) {
if (length(pointno) == 1)
res <- nlapply(x, reroot, pointno=pointno)
else if (length(pointno) == length(x))
res <- nmapply(reroot, x, pointno=pointno)
else
stop("Number of point ids must be 1, or equal to the number of neurons.")
}
if (!is.null(point)) {
if (is.vector(point) || (is.data.frame(point) && nrow(point) == 1))
res <- nlapply(x, reroot, point=point)
else {
stopifnot(
"Number of points must be 1, or equal to the number of neurons."=nrow(points) == length(x)
)
point <- as.data.frame(t(point))
res <- nmapply(reroot, x, point=point)
}
}
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.