#' Read a single neuron from a file
#'
#' @details This function will handle \code{neuron} and \code{dotprops} objects
#' saved in R .rds or .rda format by default. Additional file formats can be
#' registered using \code{fileformats}.
#'
#' At the moment the following formats are supported using file readers
#' already included with the nat package: \itemize{
#'
#' \item \bold{swc} See \code{\link{read.neuron.swc}}. SWC files can also
#' return an \code{\link{ngraph}} object containing the neuron structure in a
#' (permissive) general graph format that also contains the 3D positions for
#' each vertex.
#'
#' \item \bold{neuroml} See \code{\link{read.neuron.neuroml}}
#'
#' \item \bold{fijitraces} See \code{\link{read.neuron.fiji}}. The file format
#' used by the \href{https://imagej.net/plugins/snt/}{SNT} plugin of
#' Fiji/ImageJ.
#'
#' \item \bold{hxlineset,hxskel} Two distinct fileformats used by Amira.
#' \code{hxlineset} is the generic one, \code{hxskel} is used by the
#' hxskeletonize extension of Schmitt and Evers (see refs).
#'
#' \item \bold{rda,rds} Native R cross-platform binary formats (see
#' \code{\link{load}, \link{readRDS}}). Note that RDS only contains a single
#' unnamed neuron, whereas rda contains one or more named neurons.
#'
#' \item \bold{obj,ply} 3D Mesh formats encoding surface models of neurons.
#' These depend on the suggested package \code{\link[Rvcg]{Rvcg}} (for 'ply'
#' format) and \code{\link[readobj]{readobj}} (for Wavefront 'obj' format).
#'
#' }
#' @export
#' @param f Path to file. This can be a URL, in which case the file is
#' downloaded to a temporary location before reading.
#' @param format The file format of the neuron. When \code{format=NULL}, the
#' default, \code{read.neuron} will infer the file format from the extension
#' or file header (aka magic) using the \code{fileformats} registry.
#' @param class The class of the returned object - presently either
#' \code{"neuron"} or \code{"ngraph"}
#' @param ... additional arguments passed to format-specific readers
#' @seealso \code{\link{write.neuron}}, \code{\link{read.neurons}},
#' \code{\link{fileformats}}
#' @references Schmitt, S. and Evers, J. F. and Duch, C. and Scholz, M. and
#' Obermayer, K. (2004). New methods for the computer-assisted 3-D
#' reconstruction of neurons from confocal image stacks. Neuroimage 4,
#' 1283--98. \doi{10.1016/j.neuroimage.2004.06.047}
#'
#' @examples
#' \dontrun{
#' # note that we override the default NeuronName field
#' n=read.neuron(system.file("tests/testthat/testdata","neuron","EBT7R.CNG.swc",package='nat'),
#' NeuronName="EBT7R")
#' # use a function to set the NeuronName field
#' n3=read.neuron(system.file("tests/testthat/testdata","neuron","EBT7R.CNG.swc",package='nat'),
#' NeuronName=function(x) sub("\\..*","",x))
#' # show the currently registered file formats that we can read
#' fileformats(class='neuron', read=TRUE)
#' }
read.neuron<-function(f, format=NULL, class=c("neuron", "ngraph"), ...){
if(grepl("^http[s]{0,1}://", f)) {
url=f
# download remote url to local file in tempdir
f=file.path(tempdir(), basename(f))
on.exit(unlink(f))
filecontents=httr::GET(url)
writeBin(httr::content(filecontents,type = 'raw'), con = f)
} else url=NULL
#if(!file.exists(f)) stop("Unable to read file: ",f)
if(is.null(format))
format=tolower(sub(".*\\.([^.]+$)","\\1",basename(f)))
if(format=="rds")
n=readRDS(f)
else if(format=="rda"){
objname=load(f,envir=environment())
if(length(objname)>1) stop("More than 1 object in file:",f)
n=get(objname,envir=environment())
} else {
class=match.arg(class, choices = c("neuron", "ngraph"))
ffs=getformatreader(f, class = class)
if(is.null(ffs)) {
# as a special test, check to see if this looks like an swc file
# we don't do this by default since is.swc is a little slow
if(is.swc(f)) {
if(class=='neuron') n=read.neuron.swc(f, ...)
else n=read.ngraph.swc(f, ...)
} else stop("Unable to identify file type of:", f)
} else {
n=match.fun(ffs$read)(f, ...)
}
}
if(!is.null(url)){
attr(n, 'url')=url
}
# make sure that neuron actually inherits from neuron
# we can rely on dotprops/neuronlist objects to have the correct class
if(is.dotprops(n) || inherits(n, 'ngraph') || is.neuronlist(n)) n else as.neuron(n)
}
#' Read one or more neurons from file to a neuronlist in memory
#'
#' @details This function will cope with the same set of file formats offered by
#' \code{\link{read.neuron}}.
#'
#' If the \code{paths} argument specifies a (single) directory then all files
#' in that directory will be read unless an optional regex pattern is also
#' specified. Similarly, if \code{paths} specifies a zip archive, all neurons
#' within the archive will be loaded.
#'
#' \code{neuronnames} must specify a unique set of names that will be used as
#' the names of the neurons in the resultant neuronlist. If \code{neuronnames}
#' is a function then this will be applied to the path of each input file. The
#' default value of \code{basename=NULL} results in each neuron being named
#' for the input file from which it was read \emph{after trimming the file
#' extension}. This should match the \code{NeuronName} field of each
#' individual neuron.
#'
#' The optional dataframe (\code{df}) detailing each neuron should have
#' \code{rownames} that match the names of each neuron. It would also make
#' sense if the same key was present in a column of the data frame. If the
#' dataframe contains more rows than neurons, the superfluous rows are dropped
#' with a warning. If the dataframe is missing rows for some neurons an error
#' is generated. If SortOnUpdate is TRUE then updating an existing neuronlist
#' should result in a new neuronlist with ordering identical to reading all
#' neurons from scratch.
#' @param paths Paths to neuron input files \emph{or} a directory containing
#' neurons \emph{or} a \code{\link{neuronlistfh}} object, \emph{or} a zip
#' archive containing multiple neurons.
#' @param pattern If paths is a directory, \code{\link[base]{regex}} that file
#' names must match.
#' @param neuronnames Character vector or function that specifies neuron names.
#' See details.
#' @param format File format for neuron (see \code{\link{read.neuron}})
#' @param df Optional data frame containing information about each neuron
#' @param OmitFailures Omit failures (when TRUE) or leave an NA value in the
#' list
#' @param SortOnUpdate When \code{nl!=NULL} the resultant neuronlist will be
#' sorted so that neurons are ordered according to the value of the
#' \code{paths} argument.
#' @param nl An existing neuronlist to be updated (see details)
#' @param ... Additional arguments to passed to \code{read.neuron} methods
#' @return \code{\link{neuronlist}} object containing the neurons
#' @export
#' @seealso \code{\link{read.neuron}}, \code{\link{write.neurons}},
#' \code{\link{fileformats}}
#' @family neuronlist
#' @examples
#' \dontrun{
#' ## Read C. elegans neurons from OpenWorm github repository
#' vds=paste0("VD", 1:13)
#' vdurls=paste0("https://raw.githubusercontent.com/openworm/CElegansNeuroML/",
#' "103d500e066125688aa7ac5eac7e9b2bb4490561/CElegans/generatedNeuroML/",vds,
#' ".morph.xml")
#' vdnl=read.neurons(vdurls, neuronnames=vds)
#' plot3d(vdnl)
#'
#' ## The same, but this time add some metadata to neuronlist
#' # fetch table of worm neurons from wormbase
#' library(rvest)
#' nlurl="http://wormatlas.org/neurons/Individual%20Neurons/Neuronframeset.html"
#' wormneurons = html_table(read_html(nlurl), fill=TRUE)[[4]]
#' vddf=subset(wormneurons, Neuron%in%vds)
#' rownames(vddf)=vddf$Neuron
#' # attach metadata to neuronlist
#' vdnl=read.neurons(vdurls, neuronnames=vds, df=vddf)
#' # use metadata to plot a subset of neurons
#' nclear3d()
#' plot3d(vdnl, grepl("P[1-6].app", Lineage))
#' }
#' @importFrom tools file_path_sans_ext
read.neurons<-function(paths, pattern=NULL, neuronnames=NULL, format=NULL,
nl=NULL, df=NULL, OmitFailures=TRUE, SortOnUpdate=FALSE,
...){
if(is.null(neuronnames)) neuronnames=function(x, ...) file_path_sans_ext(basename(x))
if(length(paths) == 1 && grepl("\\.zip$", paths)) {
if(grepl("^http[s]{0,1}://", paths)) {
url=paths
# download remote url to local file in tempdir
paths=tempfile(pattern=basename(paths), fileext = '.zip')
on.exit(unlink(paths))
download.file(url, destfile = paths)
}
neurons_dir <- file.path(tempfile(pattern = "user_neurons"))
on.exit(unlink(neurons_dir, recursive=TRUE), add = TRUE)
unzip(paths, exdir=neurons_dir)
paths=dir(neurons_dir, full.names = TRUE, recursive=TRUE)
}
else if(is.neuronlistfh(paths)){
if(!inherits(attr(paths,'db'),'filehashRDS'))
stop("read.neurons only supports reading neuronlistfh with an RDS format filehash")
nlfh=paths
dbdir=attr(nlfh,'db')@dir
kfm=attr(nlfh,'keyfilemap')
paths=structure(file.path(dbdir,kfm),.Names=names(kfm))
if(OmitFailures) {
fep=file.exists(paths)
if(!all(fep))
warning("There are ", sum(fep), " missing neurons!")
paths=paths[fep]
}
neuronnames=names(paths)
df=attr(nlfh,'df')
format='rds'
}
if(!is.character(paths)) stop("Expects a character vector of filenames")
if(length(paths)==1 && isTRUE(file.info(paths)$isdir))
paths=dir(paths,pattern=pattern,full.names=TRUE)
nn <- if(is.function(neuronnames)) neuronnames(paths) else neuronnames
duplicateNames=nn[duplicated(nn)]
if(length(duplicateNames)) {
stop("Neurons cannot have duplicate names: ",
paste(duplicateNames, collapse=" "))
}
all_names=nn
names(paths)=nn
dfin <- NULL
if("write.neurons.dataframe" %in% nn) {
nn=setdiff(nn, "write.neurons.dataframe")
dff <- paths["write.neurons.dataframe"]
dfin <- readRDS(dff)
}
# Handle updates of an existing neuronlist
if(!is.null(nl)) {
if(!is.neuronlist(nl)) stop("nl must be a neuronlist")
new_neurons=setdiff(nn,names(nl))
old_neurons_we_can_see=intersect(names(nl),nn)
old_paths=paths[old_neurons_we_can_see]
new_md5s=md5sum(old_paths)
old_md5s=sapply(nl,"[[","InputFileMD5")
names(old_md5s)=names(nl)
old_md5s=old_md5s[old_neurons_we_can_see]
stopifnot(length(old_md5s)==length(new_md5s))
modified_neurons=old_neurons_we_can_see[new_md5s!=old_md5s]
# now just select the paths that need to be (re)loaded
nn=c(modified_neurons,new_neurons)
# no paths to load => existing list is up to date
if(!length(nn)) return(nl)
message("There are ",length(modified_neurons)," modified neurons",
" and ",length(new_neurons),' new neurons')
paths=paths[nn]
if(!is.null(dfin)) {
# only fiddle with the data.frame if there was one in the zip file
df=as.data.frame(nl)
df[nn,]=dfin[nn,]
}
} else {
if(!is.null(dfin)) {
df=dfin
# reorder to match incoming data.frame
nn=intersect(rownames(df), nn)
}
nl=neuronlist()
}
# Actually read in the neurons, making sure that warnings/errors are thrown
# immediately so that we can tell which neuron generated them
ow=options(warn=1)
on.exit(options(ow), add = TRUE)
if(interactive())
pb <- progress::progress_bar$new(format = " reading :current/:total [:bar] eta: :eta",
clear = FALSE,
total = length(paths),
show_after=2)
for(n in nn){
if(interactive())
pb$tick()
f=unname(paths[n])
x=withCallingHandlers(try(read.neuron(f, format=format, ...), silent=TRUE),
warning = function(w) message("While reading file: ",f),
error=function(e) message("While reading file: ",f))
if(inherits(x,'try-error')){
if(OmitFailures) x=NULL
else x=NA
}
if(is.neuronlist(x)) {
if(length(paths)>1)
stop("It is not possible to load multiple neuronlists or a mix of",
" neurons and neuronlists!\nPlease specfiy a single file or",
" use a pattern to exclude files you did not mean to read.")
nl=x
break
}
nl[[n]]=x
}
if(SortOnUpdate) {
names_missing_from_all_names=setdiff(names(nl),all_names)
if(length(names_missing_from_all_names)){
warning("Cannot SortOnUpdate when supplied paths do not include all neurons: ",
paste(names_missing_from_all_names,collapse=' '))
} else {
# nb names_we_have will be ordered like all_names
names_we_have=intersect(all_names,names(nl))
# resort if required
if(!isTRUE(all.equal(names_we_have,names(nl))))
nl=nl[names_we_have]
}
}
# nb only keep dataframe rows for neurons that were successfully read in
# Look after the attached dataframe
if(!is.null(df)){
data.frame(nl)=df
}
nl
}
#' Set or return list of registered file formats that we can read
#'
#' @description \code{fileformats} returns format names, a format definition
#' list or a table of information about the formats that match the given
#' filter conditions.
#' @details if a \code{format} argument is passed to \code{fileformats} it will
#' be matched with partial string matching and iif a unique match exists that
#' will be returned.
#' @param rval Character vector choosing what kind of return value
#' \code{fileformats} will give.
#' @return \itemize{
#'
#' \item \code{fileformats} returns a character vector, matrix or list
#' according to the value of rval.
#'
#' \item \code{getformatreader} returns a list. The reader can be accessed
#' with \code{$read} and the format can be accessed by \code{$format}.
#'
#' \item \code{getformatwriter} returns a list. The writer can be accessed
#' with \code{$write}.}
#' @export
#' @examples
#' # information about the currently registered file formats
#' fileformats(rval='info')
fileformats<-function(format=NULL,ext=NULL,read=NULL,write=NULL,class=NULL,
rval=c("names",'info','all')){
currentformats<-ls(envir=.fileformats)
if(!is.null(class)){
currentformats<-Filter(function(x) isTRUE(
get(x,envir=.fileformats)$class%in%class), currentformats)
}
if(isTRUE(read)){
currentformats<-Filter(function(x)
isTRUE(!is.null(get(x,envir=.fileformats)$read)), currentformats)
}
if(isTRUE(write)){
currentformats<-Filter(function(x)
isTRUE(!is.null(get(x,envir=.fileformats)$write)), currentformats)
}
if(!is.null(format)) {
m=pmatch(format, currentformats)
if(is.na(m)) stop("No format available to meet this specification: ", format)
currentformats=currentformats[m]
} else {
if(!is.null(ext) && !is.na(ext)){
if(substr(ext,1,1)!=".") ext=paste(".",sep="",ext)
currentformats<-Filter(function(x) isTRUE(
ext%in%get(x,envir=.fileformats)$ext), currentformats)
}
}
rval=match.arg(rval)
if(rval=='names'){
return(currentformats)
}
fi=mget(currentformats, envir=.fileformats)
if(rval=='info'){
l=lapply(currentformats, function(f) {
fx=fi[[f]]
data.frame(format=f, class=fx$class, ext=fx$ext,
read=!is.null(fx$read), write=!is.null(fx$write),
magic=!is.null(fx$magic))
})
do.call(rbind, l)
} else if(rval=='all') {
fi
}
}
#' @description \code{registerformat} registers a format in the io registry
#' @export
#' @param format Character vector naming the format
#' @param ext Character vector of file extensions (including periods)
#' @param read,write Functions to read and write this format
#' @param magic Function to test whether a file is of this format
#' @param magiclen Optional integer specifying maximum number of bytes required
#' from file header to determine file's type.
#' @param class The S3 class for the format (character vector e.g. 'neuron')
#' @rdname fileformats
#' @examples
#' \dontrun{
#' registerformat("swc",read=read.swc,write=read.swc,magic=is.swc,magiclen=10,
#' class='neuron')
#' }
registerformat<-function(format=NULL,ext=format,read=NULL,write=NULL,magic=NULL,
magiclen=NA_integer_,class=NULL){
currentformats=ls(envir=.fileformats)
if(format%in%currentformats)
warning("This format has already been registered")
if(is.null(read) && is.null(write))
stop("Must provide at least one read or write function")
ext=sub("^([^.])",".\\1",ext)
assign(format,list(ext=ext,read=read,write=write,magic=magic,magiclen=magiclen,
class=class),
envir=.fileformats)
invisible()
}
#' @description \code{getformatreader} gets the function to read a file
#' @rdname fileformats
#' @param file Path to a file
#' @details \code{getformatreader} starts by reading a set number of bytes from
#' the start off the current file and then checks using file extension and
#' magic functions to see if it can identify the file. Presently formats are
#' in a queue in alphabetical order, dispatching on the first match.
#' @export
#' @examples
#' swc=tempfile(fileext = '.swc')
#' write.neuron(Cell07PNs[[1]], swc)
#' stopifnot(isTRUE(getformatreader(swc)$format=='swc'))
#' unlink(swc)
getformatreader<-function(file, class=NULL){
formatsforclass<-fileformats(class=class, read = TRUE)
if(!length(formatsforclass)) return(NULL)
magiclens=sapply(formatsforclass,function(f) get(f,envir=.fileformats)$magiclen)
max_magiclen=max(c(-Inf,magiclens),na.rm=TRUE)
if(is.finite(max_magiclen)) {
magicbytes = readBin(file,what=raw(),n=max_magiclen)
# check if this looks like a gzip file
gzip_magic=as.raw(c(0x1f, 0x8b))
if(all(magicbytes[1:2]==gzip_magic)){
gzf=gzfile(file,open='rb')
on.exit(close(gzf))
magicbytes=readBin(gzf,what=raw(),n=max_magiclen)
}
} else magicbytes=NULL
ext=tolower(sub(".*(\\.[^.]+$)","\\1",basename(file)))
for(format in formatsforclass){
ffs=get(format,envir=.fileformats)
ffs$format=format
# check that we have a read function for this format
if (!"read"%in%names(ffs)) next
if(!is.null(ffs$magic)){
# we have a magic function for this file, so check by candidate magic bytes
if(ffs$magic(file, magicbytes)) return(ffs)
} else {
# else check by file extension
if(ext%in%ffs$ext) return(ffs)
}
}
return(NULL)
}
#' @description \code{getformatwriter} gets the function to write a file
#' @section getformatwriter output file: If \code{getformatwriter} is passed a
#' \code{file} argument, it will be processed based on the registered
#' fileformats information and the \code{ext} argument to give a final output
#' path in the \code{$file} element of the returned \code{list}.
#'
#' If \code{ext='.someext'} \code{getformatwriter} will use the specified
#' extension to overwrite the default value returned by \code{fileformats}.
#'
#' If \code{ext=NULL}, the default, and \code{file='somefilename.someext'}
#' then \code{file} will be untouched and \code{ext} will be set to
#' \code{'someext'} (overriding the value returned by \code{fileformats}).
#'
#' If \code{file='somefile_without_extension'} then the supplied or
#' calculated extension will be appended to \code{file}.
#'
#' If \code{ext=NA} then the input \code{file} name will not be touched (even
#' if it has no extension at all).
#'
#' Note that if \code{ext=NULL} or \code{ext=NA}, then only the specified
#' format or, failing that, the \code{file} extension will be used to query
#' the fileformats database for a match.
#'
#' See \code{\link{write.neuron}} for code to make this discussion more
#' concrete.
#' @rdname fileformats
#' @export
#' @seealso \code{\link{write.neuron}}
getformatwriter<-function(format=NULL, file=NULL, ext=NULL, class=NULL){
if(!is.null(file) && is.null(ext)){
incomingext=tools::file_ext(file)
if(nzchar(incomingext)) ext=paste0(".", incomingext)
}
ext_was_set=!is.null(ext) && !is.na(ext)
nfs=fileformats(format=format, ext=ext, class=class, rval='all', write=TRUE)
if(length(nfs)>1) stop("Ambiguous file format specification!")
if(length(nfs)==0) stop("No matching writer for this file format!")
r=nfs[[1]]
if(ext_was_set) r$ext=ext
# Process file name if one was supplied
if(!is.null(file)) {
r$file=if(length(ext) && is.na(ext)) {
# ext=NA, don't touch file name!
file
} else if(nzchar(tools::file_ext(file))){
# the file we were given has an extension, replace it
sub("\\.[^.]+$", r$ext[1], file)
} else {
# the input file did not have an extension
# append if ext was anything other than NA
paste0(file, r$ext[1])
}
}
r
}
#' Read a neuron in swc file format
#' @description \code{read.neuron.swc} reads an SWC file on disk into a fully
#' parsed \code{\link{neuron}} representation. However we normally recommend
#' using \code{\link[=read.neuron]{read.neuron(s)}} since those functions cope
#' with any file type.
#' @details These functions will accept SWC neurons with multiple trees and
#' arbitrary point index order. However only \code{read.ngraph.swc} will
#' accept SWC files with cycles.
#'
#' These functions would normally be called from
#' \code{\link[=read.neuron]{read.neuron(s)}} rather than used directly. The
#' only benefit of using \code{read.neuron.swc} is to avoid a very small
#' overhead in identifying the SWC file type. Note that only
#' \code{\link{read.neurons}} can read many files in one command to construct
#' a \code{\link{neuronlist}} object.
#'
#' @section SWC Format: According to
#' \url{http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html}
#' SWC file format has a radius not a diameter specification
#' @param f path to file
#' @param ... Additional arguments. \code{read.neuron.swc} passes these to
#' \code{\link{as.neuron}} and then on to \code{\link{neuron}}.
#' \code{read.neuron.swc} passes them to \code{\link{ngraph}}.
#' @seealso \code{\link{is.swc}}, \code{\link{read.neuron}}
#' @export
read.neuron.swc<-function(f, ...){
d=read.swc(f)
# multiply by 2 to get diam which is what I work with internally
d$W=d$W*2
as.neuron(d, InputFileName=f, ...)
}
# internal function that just reads a table of SWC format data
read.swc<-function(f){
ColumnNames<-c("PointNo","Label","X","Y","Z","W","Parent")
d=read.table(f, header = FALSE, sep = "", quote = "\"'", dec = ".",
col.names=ColumnNames, check.names = TRUE, fill = FALSE,
strip.white = TRUE, blank.lines.skip = TRUE, comment.char = "#",
colClasses=c("integer",'integer','numeric','numeric','numeric','numeric','integer'))
badpids=d$Parent < -1
if(any(badpids)) {
warning("Invalid negative point index in file: ",f, ". Converting to -1")
d$Parent[badpids]=-1
}
d
}
#' @rdname read.neuron.swc
#' @inheritParams ngraph
#' @export
#' @description \code{read.ngraph.swc} reads an SWC file on disk into the more
#' generic (and forgiving) \code{\link{ngraph}} representation which provides
#' a bridge to the \code{\link[igraph]{igraph}} library.
read.ngraph.swc<-function(f, weights=FALSE, directed=TRUE, ...){
d=read.swc(f)
# multiply by 2 to get diam which is what I work with internally
d$W=d$W*2
as.ngraph(d, weights=weights, directed=directed, ...)
}
#' Test if a file is an SWC format neuron
#'
#' @details Note that this test is somewhat expensive compared with the other
#' file tests since SWC files do not have a consistent magic value. It
#' therefore often has to read and parse the first few lines of the file in
#' order to determine whether they are consistent with the SWC format.
#' @param f Path to one or more files
#' @inheritParams is.nrrd
#' @return logical value
#' @seealso \code{\link{read.neuron}}
#' @export
is.swc<-function(f, TrustSuffix=TRUE) {
if(length(f)>1) return(sapply(f, is.swc, TrustSuffix))
if(TrustSuffix && tools::file_ext(f)=='.swc') return(TRUE)
first_char=readChar(f, nchars = 1, useBytes = TRUE)
# must start with a number or a comment character
if(!grepl(first_char, "#0123456789", fixed = TRUE, useBytes = T))
return(FALSE)
# try reading a line, treating any warnings as errors
first_line=try(withCallingHandlers(
read.table(f, nrows = 1, col.names =
c("PointNo","Label","X","Y","Z","W","Parent")),
warning=function(w) stop(w) ),
silent = TRUE)
if(inherits(first_line, 'try-error')) return(FALSE)
# these columns must be integer
if(!all(sapply(first_line[c("PointNo", "Label", "Parent")], is.integer)))
return(FALSE)
# final check these columns must be numeric (double or integer)
all(sapply(first_line[c("X", "Y", "Z", "W")], is.numeric))
}
#' Write out a neuron skeleton or mesh in any of the file formats we know about
#'
#' If file is not specified the neuron's InputFileName field will be checked
#' (for a dotprops object it will be the \code{'file'} attribute). If this is
#' missing there will be an error. If dir is specified it will be combined with
#' basename(file). If file is specified but format is not, it will be inferred
#' from file's extension.
#'
#' @details Note that if \code{file} does not have an extension then the default
#' extension for the specified \code{format} will be appended. This behaviour
#' can be suppressed by setting \code{ext=NA}.
#'
#' If you find that some software rejects your SWC files, try setting
#' \code{normalise.ids=TRUE} (see examples). This will ensure that the vertex
#' ids are sequentially ascending integers (1:N). The default value of
#' \code{normalise.ids=NA} will normalise \code{PointNo} vertex ids only when
#' a vertex is connected (by the \code{Parent} field) to a vertex that had not
#' yet been defined. Many readers make the assumption that this is true. When
#' \code{normalise.ids=FALSE} the vertex ids will not be touched.
#'
#' @param n A neuron
#' @param file Path to output file
#' @param dir Path to directory (this will replace \code{dirname(file)} if
#' specified)
#' @param format Unique abbreviation of one of the registered file formats for
#' neurons including 'swc', 'hxlineset', 'hxskel' (skeletons) and 'ply', 'obj'
#' (neuron meshes). If no format can be extracted from the filename or the
#' \code{ext} parameter, then it defaults to 'swc' for skeletons and 'ply' for
#' meshes.
#' @param ext Will replace the default extension for the filetype and should
#' include the period e.g. \code{ext='.amiramesh'} or \code{ext='_reg.swc'}.
#' The special value of ext=NA will prevent the extension from being changed
#' or added e.g. if the desired file name does not have an extension.
#' @param Force Whether to overwrite an existing file
#' @param MakeDir Whether to create directory implied by \code{file} argument.
#' @inheritParams write.neurons
#' @param ... Additional arguments passed to selected writer function
#' @return return value
#' @export
#' @seealso \code{\link{write.neurons}}, \code{\link{read.neuron}},
#' \code{\link{fileformats}}, \code{\link{saveRDS}}
#' @examples
#' # show the currently registered file formats that we can write
#' fileformats(class='neuron', write=TRUE)
#' \dontrun{
#' # write neuron to "myneuron.swc" in SWC format
#' write.neuron(Cell07PNs[[1]], file='myneuron.swc')
#' # write in SWC format, normalising the integer ids that label every node
#' # (this is required by some SWC readers e.g. Fiji)
#' write.neuron(Cell07PNs[[1]], file='myneuron.swc', normalise.ids=TRUE)
#' # write out "myneuron.swc" in SWC format withour the final extension
#' write.neuron(Cell07PNs[[1]], file='myneuron.swc')
#' # write out "myneuron.amiramesh" in Amira hxlineset format
#' write.neuron(Cell07PNs[[1]], format = 'hxlineset', file='myneuron.amiramesh')
#'
#' # write out "myneuron.am" in Amira hxlineset format
#' write.neuron(Cell07PNs[[1]], format = 'hxlineset', file='myneuron')
#'
#' # write a mesh in Stanford ply
#' write.neuron(MBL.surf, file = 'MBL.surf.ply')
#' # ... or Wavefront obj format
#' write.neuron(MBL.surf, file = 'MBL.surf.obj')
#' # specify the format directly if not evident from file suffix
#' # not recommended though as will probably just cause trouble when reading
#' write.neuron(MBL.surf, file = 'MBL.surf', format='obj')
#' }
write.neuron<-function(n, file=NULL, dir=NULL, format=NULL, ext=NULL,
Force=FALSE, MakeDir=TRUE, metadata=NULL, ...){
if(is.dotprops(n)){
# we only know how to save dotprops objects in R's internal format
format=if(is.null(format)) 'rds' else match.arg(format, c("swc", "rds", "rdsb", "qs"))
if(is.null(file)) {
file=basename(attr(n,"file"))
if(is.null(file))
stop("No file specified and dotprops obj does not have a file attribute")
# trim off the suffix unless we have specified ext=NA
if(!(length(ext) && is.na(ext))) file=tools::file_path_sans_ext(file)
}
}
if(is.null(file)){
#Check if the InputFileName exists first..
if(isTRUE(nzchar(n$InputFileName)))
file=basename(n$InputFileName)
else
stop("No file specified and neuron does not have an InputFileName")
# trim off the suffix if ext!=NA
if(!(length(ext) && is.na(ext)))
file=tools::file_path_sans_ext(file)
}
if(!is.null(format)) {
# TODO - one day it should be possible to have one file format associated
# with different R classes
if(format=='obj') format='neuron.obj'
else if(format=='ply') format='neuron.ply'
}
fw=try(getformatwriter(format=format, file=file, ext=ext, class='neuron'), silent = T)
if(inherits(fw, 'try-error')) {
if(is.null(format)){
format <- if(inherits(n, 'mesh3d')) 'neuron.ply' else 'swc'
fw=getformatwriter(format=format, file=file, ext=ext, class='neuron')
warning('write.neuron: using default format="',format,'"')
} else {
# rethrow the error
stop(fw)
}
}
file=fw$file
if(!is.null(dir)) file=file.path(dir,basename(file))
# Now check that we can write to the location that we have chosen
if(!Force && file.exists(file)){
warning(file," already exists; use Force=T to overwrite")
return(NA_character_)
}
if(!file.exists(dirname(file))){
if(MakeDir){
if(!dir.create(dirname(file)))
stop("Unable to create ",dirname(file))
} else {
stop(dirname(file)," does not exist; use MakeDir=T to overwrite")
}
}
if(!file.create(file)){
stop("Unable to write to file ",file)
}
# OK all fine, so let's write
FUN=match.fun(fw$write)
write_metadata=FALSE
if(!is.null(metadata)) {
if(!isTRUE("metadata" %in% names(formals(FUN))))
warning("neuron writing function does not accept metadata")
else {
write_metadata=TRUE
if(!is.character(metadata)){
if(!requireNamespace('jsonlite', quietly = TRUE))
stop("Please install the jsonlite package to write metadata!")
if(is.data.frame(metadata))
metadata=as.list(metadata)
metadata=jsonlite::toJSON(metadata, auto_unbox = TRUE)
}
if(length(metadata)>1)
metadata=paste(metadata, collapse = ' ')
}
}
if(isTRUE(write_metadata)) FUN(n, file=file, metadata=metadata, ...)
else FUN(n, file=file, ...)
invisible(file)
}
# write neuron to SWC file
write.neuron.swc<-function(x, file, normalise.ids=NA, metadata=NULL, ...){
if(is.dotprops(x)) {
return(write.dotprops.swc(x, file, ...))
}
# assume we have a neuron
our_col_names<-c("PointNo","Label","X","Y","Z","W","Parent")
if(!all(our_col_names%in%colnames(x$d))) stop("Some columns are missing!")
df=x$d[,our_col_names]
colnames(df)[colnames(df)=="W"]="Radius"
# nb neurolucida seems to use diam, but swc uses radius
df$Radius=df$Radius/2
if(is.na(normalise.ids)) {
# figure out if any parents refer to nodes *after* the current one
# some swc readers do not like this
parent_idx=match(df$Parent, df$PointNo, nomatch = -1L)
premature_parents = parent_idx > seq_len(nrow(df))
normalise.ids=any(premature_parents)
}
if(normalise.ids) {
g=as.ngraph(x)
r=igraph::dfs(g, root=x$StartPoint, mode = 'all')
new_order=as.integer(r$order)
df=df[new_order,]
df$Parent=match(df$Parent, df$PointNo, nomatch = -1L)
df$PointNo=seq_along(df$PointNo)
}
writeLines(c("# SWC format file",
"# based on specifications at http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html"),
con=file)
cat("# Created by nat::write.neuron.swc\n", file=file, append=TRUE)
if(!is.null(metadata)) {
# read like so
# jsonlite::fromJSON(substr(j,8,nchar(j)), bigint_as_char=TRUE)
cat(paste0("# Meta: ", metadata, "\n"), file=file, append=TRUE)
}
cat("#", colnames(df), "\n", file=file, append=TRUE)
write.table(df, file, col.names=F, row.names=F, append=TRUE, ...)
}
write.dotprops.swc<-function(x, file, ...) {
df=dotprops2swc(x, ...)
writeLines(c("# SWC dotprops format", "# Created by nat::write.dotprops.swc",
"# see http://www.neuronland.org/NLMorphologyConverter/MorphologyFormats/SWC/Spec.html"),
con=file)
cat("#", colnames(df), "\n", file=file, append=TRUE)
write.table(df, file, col.names=F, row.names=F, append=TRUE)
}
#' Write neurons from a neuronlist object to individual files, or a zip archive
#'
#' @details See \code{\link{write.neuron}} for details of how to specify the
#' file format/extension/name of the output files and how to establish what
#' output file formats are available. A zip archive of files can be written by
#' specifying a value of \code{dir} that ends in \code{.zip}. When rds files
#' (R's binary data representation, which is compressed by default) are stored
#' inside a zip file the are not further compressed (zip option 0).
#'
#' @param nl neuronlist object
#' @param dir directory to write neurons, or path to zip archive (see Details).
#' @inheritParams write.neuron
#' @param subdir String naming field in neuron that specifies a subdirectory OR
#' expression to evaluate in the context of neuronlist's df attribute
#' @param INDICES Character vector of the names of a subset of neurons in
#' neuronlist to write.
#' @param files Character vector or expression specifying output filenames. See
#' examples and \code{\link{write.neuron}} for details.
#' @param metadata Whether to encode some metadata in the header file (currently
#' only supported for SWC format). Either a data.frame or \code{TRUE} to
#' indicate that the attached data.frame should be written. Default
#' \code{FALSE}.
#' @param include.data.frame Whether to include the metadata when writing a zip
#' file (it will be called \code{"write.neurons.dataframe.rds"}).
#' @param cl Either the integer number of cores to use for parallel writes (2 or
#' 3 seem useful) or a \code{cluster} object created by
#' \code{\link{makeCluster}}. See the \code{cl} argument of
#' \code{\link[pbapply]{pbsapply}} for details.
#' @param ... Additional arguments passed to \code{\link{write.neuron}}
#' @inheritParams write.neuron
#' @return the path to the output file(s), absolute when this is a zip file.
#' @author jefferis
#' @export
#' @seealso \code{\link{write.neuron}}, \code{\link{read.neurons}},
#' \code{\link{fileformats}}
#' @family neuronlist
#' @examples
#' \dontrun{
#' # write some neurons in swc format
#' write.neurons(Cell07PNs, dir="testwn", format='swc')
#' # write some neurons in swc format for picky software
#' write.neurons(Cell07PNs, dir="testwn", format='swc', normalise.ids=TRUE)
#' # write some neurons in swc format and zip them up
#' write.neurons(Cell07PNs, dir="testwn.zip", format='swc')
#'
#' # write some neurons in R's native RDS format using 3 cores for
#' # parallel writes and then zip them up (storing rather than compressing)
#' write.neurons(Cell07PNs, dir="testwn.zip", format='rds', cl=3)
#'
#' # write some neurons in Amira hxlineset format
#' write.neurons(Cell07PNs, dir="testwn", format='hxlineset')
#'
#' # write some neuron meshes in Stanford ply format (the default for meshes)
#' write.neurons(myneurons, dir="testwn")
#' # specify the format to avoid a warning. Write to a zip file.
#' write.neurons(myneurons, dir="testmeshes.zip", format='ply')
#' # Wavefront obj format
#' write.neurons(myneurons, dir="testwn", format='obj')
#'
#' # organise new files in directory hierarchy by glomerulus and Scored.By field
#' write.neurons(Cell07PNs,dir="testwn",
#' subdir=file.path(Glomerulus,Scored.By),format='hxlineset')
#' # ensure that the neurons are named according to neuronlist names
#' write.neurons(Cell07PNs, dir="testwn", files=names(Cell07PNs),
#' subdir=file.path(Glomerulus,Scored.By),format='hxlineset')
#' # only write a subset
#' write.neurons(subset(Cell07PNs, Scored.By="ACH"),dir="testwn2",
#' subdir=Glomerulus,format='hxlineset')
#' # The same, but likely faster for big neuronlists
#' write.neurons(Cell07PNs, dir="testwn3",
#' INDICES=subset(Cell07PNs,Scored.By="ACH",rval='names'),
#' subdir=Glomerulus,format='hxlineset')
#' # set file name explicitly using a field in data.frame
#' write.neurons(subset(Cell07PNs, Scored.By="ACH"),dir="testwn4",
#' subdir=Glomerulus, files=paste0(ID,'.am'), format='hxlineset')
#' }
write.neurons<-function(nl, dir, format=NULL, subdir=NULL,
INDICES=names(nl), files=NULL,
include.data.frame=FALSE,
metadata=FALSE,
Force=FALSE, cl=NULL, ...) {
if(!inherits(nl, 'neuronlist')) {
# check if this looks like some of the other kinds of object we might
# write by accident
single_object_classes <- c("mesh3d", "neuron", "dotprops", "hxsurf")
if(any(single_object_classes %in% class(nl)))
stop("Please use `write.neuron` to write a single neuron/mesh object!")
}
if(grepl("\\.zip", dir)) {
zip_file=dir
# check if file exists (and we want to overwrite)
if(file.exists(zip_file)){
if(!Force)
stop("Zip file: ", zip_file, "already exists")
unlink(zip_file)
}
# Get absolute path of parent dir
zip_dir=tools::file_path_as_absolute(dirname(zip_file))
# ... and use that to construct absolute path to output zip
zip_file=file.path(zip_dir, basename(zip_file))
dir <- file.path(tempfile("user_neurons"))
# we will be writing to an empty temp dir, so skip the file.exists check
Force=TRUE
#
if(isTRUE(include.data.frame)) {
if(any(names(nl) %in% "write.neurons.dataframe"))
stop("Cannot write a neuronlist object with a neuron named 'write.neurons.dataframe' when include.data.frame=TRUE")
}
} else {
zip_file=NULL
}
if(!file.exists(dir)) dir.create(dir)
df=attr(nl,'df')
# Construct subdirectory structure based on variables in attached data.frame
ee=substitute(subdir)
subdirs=NULL
if(!is.null(ee) && !is.character(ee)){
if(!is.null(df)) df=df[INDICES,]
subdirs=file.path(dir, eval(ee, df, parent.frame()))
names(subdirs)=INDICES
}
ff=substitute(files)
if(!is.null(ff)){
if(!is.character(ff))
files=eval(ff, df, parent.frame())
if(is.null(names(files))) names(files)=INDICES
}
written=structure(rep("",length(INDICES)), .Names = INDICES)
if(isTRUE(metadata)) metadata=df
else if(isFALSE(metadata)) metadata=NULL
if(!is.null(metadata)) {
checkmate::assert_data_frame(metadata, nrows = length(INDICES))
# turn the data.frame into a list with one entry for each neuron
# metadata=lapply(seq_len(nrow(metadata)), function(i) as.list(metadata[i,]))
}
if(interactive())
pb <- progress::progress_bar$new(format = " writing :current/:total [:bar] eta: :eta",
clear = FALSE,
total = length(INDICES),
show_after=2)
NINDICES=stats::setNames(nm = INDICES)
written=pbapply::pbsapply(NINDICES, cl = cl, metadata=metadata, ..., FUN=function(nn, metadata=NULL, ...) {
n=nl[[nn]]
thisdir=dir
if(is.null(subdirs)){
if(!is.null(subdir)){
propval=n[[subdir]]
if(!is.null(propval)) thisdir=file.path(dir, propval)
}
} else {
thisdir=subdirs[nn]
}
if(!file.exists(thisdir)) dir.create(thisdir, recursive=TRUE)
file=files[nn]
if(!isTRUE(nchar(file)>0)){
# the filename was not specified explicitly and we can't figure it out
# from field inside the neuron, so set to name of object in neuronlist
if(!is.neuron(n) || is.null(n$InputFileName))
file=nn
}
metadatarow <- if(!is.null(metadata)) {
metadata[match(nn, NINDICES), ]
} else NULL
write.neuron(n, dir=thisdir, file = file, format=format, Force=Force, metadata=metadatarow, ...)
})
if(!is.null(zip_file)) {
owd=setwd(dir)
on.exit(setwd(owd))
if(isTRUE(include.data.frame)) {
f='write.neurons.dataframe.rds'
saveRDS(as.data.frame(nl)[NINDICES,,drop=FALSE], f)
files=c(files, f)
}
# rds is already compressed so just store
zipflags=ifelse(format%in%c("rds", "rdsb", "qs"), "-qr0X", "-qr9X")
zip(zip_file, flags = zipflags,
files=if(getRversion()<"3.6.0") dir else dir(dir, recursive = TRUE))
unlink(dir, recursive=TRUE)
written<-zip_file
}
invisible(written)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.