#' Download a large set of well-traced skeletonised neurons from FlyWire
#'
#'@description Get a large number of skeletonised neurons from FlyWire. The neuronlist is saved as a
#' \code{nat::neuronlistfh} object so certain neurons may be read from it
#' without loading the entire, large \code{neuronlist} into memory. You will need access to the hemibrain Google Team Drive and
#' have it mounted with Google filestream.
#' If you want to flag flywire neurons that should be added to the Google drive, without doing this yourself, you can use
#' \code{flywire_request}. The \code{flywire_basics} function will calculate some useful meta-data, namely
#' a point in the primary neurite tract that can be used as a stable reference for this neuron.
#'
#' @param brain the brainspace in which hemibrain neurons have been registered. Defaults to raw voxel space for the FlyWire project.
#' @param x flywire IDs for desired. If left as \code{NULL}, all flywire neurons that can be summoned from the hemibrainr Google drive are summoned.
#' @param WithConnectors logical, if \code{TRUE} the neuron fetched have predicted synapses located within each neuron at \code{neuron$connectors} and
#' have a predicted axon/dendrite split at \code{neuron$d$Label} for each point in its skeleton, created using \code{\link{flow_centrality}}.
#' @param local \code{FALSE} or path. By default (\code{FALSE}) data is read from \code{options()$remote_connectome_data}), but the user can specify an alternative path.
#' @param mirror logical, whether or not to read neurons that have been mirrored (i.e. flipped to the 'other' brain hemisphere).
#' @param zip logical. If \code{TRUE} then \code{nat::neuronlistz} is used to read neurons from a \code{.zip} archive. Otherwise, \code{nat::neuronlistfh} is used to read neurons from a \code{.rds} file with a linked 'data' folder.
#' The \code{.zip} method is preferred. Both methods load a dynamic neuronlist, which only loads skeleton data into memory at the point where it is processed.
#' @param flywire.neurons a \code{neuronlist} of flywire neurons in FlyWire space. The \code{flywire_basics} function will calculate some useful meta-data, namely
#' a point in the primary neurite tract that can be used as a stable reference for this neuron.
#' @param request a \code{neuronlist}, matrix of x,y,z position or flywire ID to add to a
#' \href{https://docs.google.com/spreadsheets/d/1rzG1MuZYacM-vbW7100aK8HeA-BY6dWAVXQ7TB6E2cQ/edit#gid=0}{Google sheet} that records flywire positions
#' flagged to be processed into neuron skeletons that can be called by \code{flywire_neurons}.
#' @param selected_sheet the Google sheet onto which to add new flywire coordinate. I.e. \href{https://docs.google.com/spreadsheets/d/1rzG1MuZYacM-vbW7100aK8HeA-BY6dWAVXQ7TB6E2cQ/edit#gid=0}{Google sheet}.
#' @param sheet the tab onto which to add your requests.
#' @param gsheet logical, whether or not the request is are googlesheet keys. If they are, every item in their \code{flywire_xyz} columns is added to
#' the sheet specified by \code{selected_sheet}.
#' @param type When using neurons with \code{flywire_neurons} from the Google drive a neuronlistfh object of neuron skeletons (default),
#' the saved \code{.swc} files, a \code{nat::dotprops} objects or a \code{nat::dotprops object} cut to the hemibrain volume. Not all brainspaces supported.
#' @param user character, the user who is added new data.
#' @param ... Additional arguments passed to \code{nat::nlapply}.and/or \code{fafbseg::skeletor}.
#'
#' @examples
#' \donttest{
#' \dontrun{
#' # Loads all processed flywire neurons as a neuronlistfh object
#' fw.neurons = flywire_neurons()
#'
#' # Get them already bridged to the JRC2018F brainsapce
#' ## (Bogovic et al. 2018, high performance brain space)
#' fw.neurons.jrc2018f = flywire_neurons(brain = "JRC2018F")
#'
#' # Get them already mirrored to the other hemispehre, i.e. flipped
#' fw.neurons.jrc2018f.m = flywire_neurons(brain = "JRC2018F", mirror = TRUE)
#'
#' # Now say you have some flywire ID~s you wants to add
#' ## to the nightly processing so they are available from Google drive.
#' ## And they are stored in a .csv:
#' library(readr)
#' csv = read_csv("/users/abates/Downloads/FlyWire_list.txt",
#' col_type = cols(.default = "c"))
#' ids = csv[,1][[1]]
#' neurons = skeletor(ids)
#' neurons.with.info = hemibrainr:::flywire_basics(neurons)
#' new.points = nat::xyzmatrix(neurons.with.info[,"flywire_xyz"])
#' flywire_request(new.points)
#' ## Now they are added to a google sheet, and will be read and
#' ## processed as part of this nightly pipeline:
#' ### https://github.com/flyconnectome/fafbpipeline
#'
#' # Now get some split neurons with synapses
#' fw.split = flywire_neurons(WithConnectors=TRUE)
#' nopen3d()
#' nlscan_split(fw.split[sample(length(fw.split),15)])
#'
#' # Get neuron dotprops object in hemibrain brainspace,
#' ## ready for NBLAST with hemibrain neurons
#' fw.dps.cut = flywire_neurons(type="cut_dotprops", brain = "JRCFIB2018F")
#'
#'}}
#'@return A \code{neuronlist} object containing flywire skeletons. In the meta-data, it might be useful for some users to note that
#'you will get:
#'
##' \itemize{
##' \item{"root_id"}{ The ID given to the corresponding volumetric body in flywire.
##' These are used to do things like fetch volumes and are the input to the \code{skeletor} function. However, they are highly volatile and
##' change a lot with active tracing.}
##' \item{"flywire_xyz"}{ The voxel coordinate of a single point in this neuron, usually a cell body fiber tract position. This is a more accurate way
##' of keeping tract of neuron as it will always correspond to the same 'neuron' even though its related root_id will change with merge/split events in flywire.}
##' \item{"hemilineage"}{ An estimated hemilineage identity from both of two naming systems, Ito et al. 2013 and Wong et al. 2013}
##' \item{"side"}{ An estimate as to the 'side', i.e. brain hemisphere, in which the neuron lies}
##' \item{"skid"}{ The 'skeleton ID' of this neuron's match in CATMAID for FAFBv14}
##' \item{"fafb_xyz"}{ The coordinates in nanometres of a point in the neuron, in FAFBv14 space}
##' \item{"hemibrain_match"}{ The bodyid of an estimated hemibrain_match}
##' }
#'
#'@export
#'@seealso \code{\link{hemibrain_read_neurons}}
#'@importFrom utils download.file
#'@importFrom googledrive drive_ls as_id
flywire_neurons <- function(x = NULL,
WithConnectors = FALSE,
local = FALSE,
version = NULL,
brain = c("FlyWire", "JRCFIB2018Fraw","JRCFIB2018F","FAFB","FAFB14","JFRC2", "JFRC2013","JRC2018F","FCWB"),
mirror = FALSE,
type = c("neurons", "swc","dotprops","cut_dotprops"),
zip = TRUE,
...){
brain = match.arg(brain)
type = match.arg(type)
pattern = ifelse(zip,"zip","rds")
if(type=="neurons"){
type=""
}
if(brain == "JRCFIB2018Fraw"){
brain = "JRCFIB2018F"
scale = TRUE
}else if (brain %in% c("FAFB")){
brain = "FAFB14"
scale = FALSE
}else{
scale = FALSE
}
# Get Google drive folder
savedir = good_savedir(local = local)
neuronsdir = file.path(savedir,"flywire_neurons/")
if(!is.null(version)){
fhdir = file.path(neuronsdir,version,brain,"/")
}else{
fhdir = file.path(neuronsdir,brain,"/")
}
# Get synapses from private drive
if(type%in%c("dotprops","cut_dotprops")){
if(brain!="JRCFIB2018F"){
brain = "JRCFIB2018F"
fhdir = file.path(neuronsdir,brain,"/")
warning("When type = 'dotprops' only JRCFIB2018F brainspace supported from hemibrain google drive")
}
}else{
if(WithConnectors){
type = ""; local = FALSE; mirror = FALSE; brain = "FlyWire"
if(brain!="FlyWire"){
warning("When WithConnectors = TRUE only FlyWire brainspace supported from hemibrain google drive")
}
}
}
# Exists?
if(!file.exists(fhdir)){
stop("Cannot find file: ", fhdir)
}
# Read
if(type=="swc"){
if(brain!="FlyWire"){
brain = "FlyWire"
warning("Only native flywire neurons in FlyWire space supported when swc = TRUE")
}
if(zip){
warning("zip set to FALSE, when swc is TRUE")
zip = FALSE
}
swc.folder = file.path(neuronsdir,"FlyWire","swc")
swc.list = list.files(swc.folder, full.names = TRUE, pattern = "swc")
if(!is.null(x)){
swc.list = swc.list[grepl(paste(x,collapse="|"),swc.list)]
}
neurons.fh = nat::read.neurons(paths = swc.list)
fw.meta = flywire_meta(sql=FALSE)
neurons.fh[,] = fw.meta[match(names(neurons.fh),fw.meta$root_id),]
}else{
message("Loading ", fhdir)
filelist = list.files(path = fhdir, pattern = pattern, full.names = TRUE)
if(type=="cut_dotprops"){
filelist = filelist[grepl(type,filelist)]
}else if (WithConnectors){
filelist = filelist[grepl('flow',filelist)]
}else{
filelist = filelist[!grepl("cut_dotprops",filelist)]
filelist = filelist[grepl(type,filelist)]
}
filelist = filelist[grepl("mirror",filelist)==mirror]
filelist = sort(filelist,decreasing = TRUE)
if(length(filelist)){
fh.file = filelist[1]
if(zip){
neurons.fh = nat::neuronlistz(fh.file)
}else{
neurons.fh = nat::read.neuronlistfh(fh.file)
}
test = tryCatch(neurons.fh[[1]], error = function(e){
warning(as.character(e))
try(file.remove(paste0(attributes(neurons.fh)$db@datafile,"___LOCK")), silent = TRUE)
})
attr(neurons.fh,"df") = neurons.fh[,]
}else{
warning(pattern, " file for neuronlist not found at: ", fhdir)
return(NULL)
}
}
# Scale neurons if needs be
if(scale){
neurons.fh = scale_neurons(neurons.fh,scaling=8/1000, ...)
}
# Return
if(!is.null(x)){
neurons.fh = neurons.fh[names(neurons.fh)%in%x]
}
neurons.fh[,] = root_id_correct(neurons.fh[,])
neurons.fh
}
#' @rdname flywire_neurons
#' @export
flywire_basics <- function(flywire.neurons, ...){
if(!nat::is.neuronlist(flywire.neurons)){
stop("flywire.neurons must be a neuronlist")
}
# Get xyz for primary branch points
simp = nat::nlapply(flywire.neurons,nat::simplify_neuron,n=1)
branchpoints = sapply(simp, function(y) nat::xyzmatrix(y)[ifelse(length(nat::branchpoints(y)),nat::branchpoints(y),max(nat::endpoints(y))),])
branchpoints = t(branchpoints)
flywire.nm.xyz = apply(branchpoints, 1, paste_coords)
# Convert to voxel space
branchpoints.flywire.raw = scale(branchpoints, scale = c(4, 4, 40), center = FALSE)
flywire_xyz = apply(branchpoints.flywire.raw, 1, paste_coords)
# Flywire svid
svids=fafbseg::flywire_xyz2id(nat::xyzmatrix(branchpoints.flywire.raw), root=FALSE, rawcoords = TRUE)
# Get FAFBv14 nm coordinates
fafb_xyz = nat.templatebrains::xform_brain(branchpoints, sample = "FlyWire", reference = "FAFB14", ...)
fafb_xyz = apply(fafb_xyz, 1, paste_coords)
#fafb_xyz = ""
# Add meta data
flywire.neurons[,"root_id"] = names(flywire.neurons)
flywire.neurons[,"flywire_xyz"] = flywire_xyz
flywire.neurons[,"flywire_svid"] = svids
flywire.neurons[,"fafb_xyz"] = fafb_xyz
flywire.neurons[,"dataset"] = "flywire"
flywire.neurons[,"id"] = NULL
# Add IDs
flywire.neurons = add_field_seq(flywire.neurons,flywire.neurons[,"root_id"],field="root_id")
# return
flywire.neurons
}
# Add neuron to request Google sheets
#' @rdname flywire_neurons
#' @export
flywire_request <- function(request,
gsheet = FALSE,
selected_sheet = options()$flywire_flagged_gsheet,
sheet = "flywire",
user = "hemibrainr",
...){
if(!requireNamespace("fafbseg", quietly = TRUE)) {
stop("Please install fafbseg using:\n", call. = FALSE,
"remotes::install_github('natverse/fafbseg')")
}
# What kind of request is it?
if(gsheet){
xyz = c()
type = "googlesheet"
for(req in request){
tabs = gsheet_manipulation(FUN = googlesheets4::sheet_names,
ss = req,
return = TRUE)
for(tab in tabs){
fts = try(flywire_tracing_sheet(ws = tab,
selected_sheet=req,
Verbose = FALSE), silent = TRUE)
xyz = unique(c(xyz,fts$flywire_xyz))
}
if(!length(xyz)){
stop("No flywire positions to add")
}
}
xyz=xyz[!is.na(xyz)]
xyz = nat::xyzmatrix(xyz)
}else{
type = if(nat::is.neuronlist(request)){
"neuronlist"
}else if(is.data.frame(request)|is.matrix(request)){
"xyz"
}else{
if(nrow(nat::xyzmatrix(request))){
"xyz"
}else{
"ids"
}
}
}
message("Request is ", type)
# Get coordinates
if(type=='ids'){
request = fafbseg::skeletor(request, ...)
}
if(nat::is.neuronlist(request)){
fb = flywire_basics(request)
xyz = do.call(rbind, lapply(fb[,"flywire_xyz"], nat::xyzmatrix))
}else if(type!="googlesheet"){
xyz = as.data.frame(nat::xyzmatrix(request), stringsAsFactors =FALSE)
}
fw.xyz = apply(xyz,1,paste_coords)
fw.xyz = fw.xyz[fw.xyz!="(NA,NA,NA)"]
# Add to Google sheet
gs = try(flywire_tracing_sheet(ws=sheet,open=FALSE,selected_sheet=selected_sheet,Verbose=FALSE), silent = FALSE)
fw.xyz = setdiff(fw.xyz,gs$flywire_xyz)
if(is.null(fw.xyz)||!length(fw.xyz)){
warning("No new flywire positions to add")
return(NULL)
}
if(class(gs)!="try-error"){
fw.xyz = setdiff(fw.xyz,gs$flywire_xyz)
update = data.frame(user = "flywire", flywire_xyz = fw.xyz)
for(col in setdiff(colnames(gs),colnames(update))){
if(col=="status"){
update[[col]] = "unassessed"
}else if(col=="workflow"){
update[[col]] = "trace"
}else if(col=="added_by"){
update[[col]] = user
}else if(col=="user"){
update[[col]] = user
}else if(col=="user"){
update[[col]] = randomwords(n=nrow(update),words= 2,collapse = "_")
}else{
update[[col]] = NA
}
}
update = update[,colnames(gs)]
}else{
update = data.frame(user = "flywire", flywire_xyz = fw.xyz)
}
batches = split(1:nrow(update), ceiling(seq_along(1:nrow(update))/500))
for(i in batches){
gsheet_manipulation(FUN = googlesheets4::sheet_append,
data = update[min(i):max(i),],
ss = selected_sheet,
sheet = sheet)
}
message(sheet, ": ", nrow(update), " FlyWire positions added")
}
#' Read precomputed flywire data from the hemibrainr Google Drive
#'
#' @description Read precomputed data available on the hemibrain Google Team
#' Drive. (see \code{\link{hemibrainr_set_drive}}) and (see
#' \code{\link{hemibrainr_rclone}}). This includes body IDs for all flywire
#' neurons ((\code{flywire_ids})), and user contributions towards their
#' creation (\code{flywire_contributions}), as well as Flywire related NBLAST
#' scores retrieved using \code{\link{hemibrain_nblast}}.
#'
#' @inheritParams hemibrainr_googledrive_data
#' @param simplified flywire synapses, where nearby Buhmann predicted synapses (often multiple per real synapses) are algorithmically combined into approximate single synapses
#' using \code{hemibrainr:::flywire_synapse_simplify}.
#'
#' @return a \code{data.frame}. Depending on which synapse function was called, it can contain the columns:
#'
#' \itemize{
#'
#' \item{"flywire_xyz"} { - coordinates of a point in the neuron in flywire voxel space. XYZ, separated by a semicolon.}
#'
#' \item{"root_id"}{ - the unique ID associated with this flywire neuron. This ID changes every time a neuron is, even slightly, modified. So it is an unstable identifier.
#' This is why \code{flywire_xyz} is sometimes used.}
#'
#' \item{"fw.x"}{ - the x coordinate of a point in the flywire neuron, in flywire voxel space..}
#'
#' \item{"fw.y"}{ - the y coordinate of a point in the flywire neuron, in flywire voxel space..}
#'
#' \item{"fw.z"}{ - the z coordinate of a point in the flywire neuron, in flywire voxel space..}
#'
#' \item{"user_name"}{ - the name of the user who made the number of edits given in this row.}
#'
#' \item{"edits"}{ - the number of edits (merges, splits, etc.) made by a user for the given \code{root_id}.}
#'
#' \item{"proportion"}{ - the proportion of total edits for this neuron, that the given user made.}
#'
#' \item{"dataset"}{ - the dataset this neuron is from, i.e. flywire.}
#'
#'}
#'
#' @examples
#' \donttest{
#' \dontrun{
#'
#' # All flywire IDs for neurons that have a split precomputed
#' fw.ids = flywire_ids()
#'
#' # For these flywire IDs, their meya data:
#' fw.meta = flywire_meta()
#'
#' # For flywire IDs, which users contributed what:
#' fw.edits = flywire_contributions()
#'
#' }}
#' @seealso \code{\link{hemibrain_splitpoints}},
#' \code{\link{hemibrain_flow_centrality}},
#' \code{\link{hemibrainr_googledrive_data}},
#' \code{\link{hemibrain_metrics}}
#' @name flywire_googledrive_data
#' @aliases flywire_meta
#' @export
flywire_meta <-function(local = FALSE, folder = "flywire_neurons/", sql = FALSE, ...){
savedir = good_savedir(local = local)
if(sql){
gcsv = find_gsql(savedir = savedir, tab = "flywire_meta", sql.db = "flywire_data.sqlite", folder = folder, ...)
}else{
gfile = find_gfile(savedir = savedir, file = "flywire_meta", folder = folder)
gcsv = suppressWarnings(readr::read_csv(gfile, col_types = sql_col_types))
}
if(!length(gcsv)){
warning('no data, file may be online-only?')
}
colnames(gcsv) = snakecase::to_snake_case(colnames(gcsv))
gcsv
}
#' @rdname flywire_googledrive_data
#' @export
flywire_failed <-function(local = FALSE, folder = "flywire_neurons/", sql = FALSE, ...){
savedir = good_savedir(local = local)
if(sql){
gcsv = find_gsql(savedir = savedir, tab = "flywire_failed", sql.db = "flywire_data.sqlite", folder = folder, ...)
}else{
gfile = find_gfile(savedir = savedir, file = "flywire_failed", folder = folder)
gcsv = suppressWarnings(readr::read_csv(gfile, col_types = sql_col_types))
}
if(!length(gcsv)){
warning('no data, file may be online-only?')
}
colnames(gcsv) = snakecase::to_snake_case(colnames(gcsv))
gcsv
}
# hidden
sql_col_types <- readr::cols(.default = "c",
edits = "i",
total.edits = "i",
total_edits = "i",
upstream = "i",
downstream = "i",
voxels = "i",
layer = "n",
ct.layer = "n",
ct_layer = "n",
orig.soma = "?",
orig_soma = "?",
soma = "?",
soma.edit = "?",
truncated = "?",
splittable = "?",
checked = "?",
edited.cable = "?",
edited_cable = "?",
skeletonization = "?",
time = "?",
total_outputs = "i",
axon_outputs = "i",
dend_outputs = "i",
total_inputs = "i",
axon_inputs = "i",
dend_inputs = "i",
axon_outputs = "i",
dend_outputs = "i",
axon_inputs = "i",
dend_inputs = "i",
total_outputs_density = "n",
total_inputs_density = "n",
axon_outputs_density = "n",
dend_outputs_density = "n",
axon_inputs_density = "n",
dend_inputs_density = "n",
total_length = "n",
axon_length = "n",
dend_length = "n",
pd_length = "n",
length = "?",
segregation_index = "n",
X = "n",
Y = "n",
Z = "n",
x = "n",
y = "n",
z = "n",
fw.x ="n",
fw.y = "n",
fw.z = "n",
fw_x ="n",
fw_y = "n",
fw_z = "n",
cable.length = "n",
cable_length = "n",
proportion = "n",
count = "i",
weight = "n",
norm = "n",
synapses = "n",
syn = "n",
syns = "n",
n.syn = "n",
nodes = "n",
endpoints = "n",
segments = "n",
branchpoints = "n",
root = "i",
top_p = "n",
scores = 'n',
cleft_scores = 'n',
top_nt = "c",
top_nt_p = 'n',
gaba = "n",
acetylcholine = "n",
glutamate = "n",
octopamine = "n",
serotonin = "n",
dopamine = "n",
dcv_count = "i",
dcv_area = "n",
prepost = "i",
total.pre = "i",
total.post = "i",
total_pre = "i",
total_post = "i",
axon.pre = "i",
axon.post = "i",
dend.post = "i",
dend.pre = "i",
overlap_locality = "n",
position = "i",
confidence = "n",
bodyid = "c",
root_id = "c",
pre_id = "c",
post_id = "c",
pre = "c",
hemibrain_nblast_1 = "c",
hemibrain_nblast_2 = "c",
flywire_mirror_nblast_1 = "c",
flywire_mirror_nblast_2 = "c",
hemilineage_nblast_1 = "c",
hemilineage_nblast_2 = "c",
postsynapse_side_index='n',
presynapse_side_index='n',
axon_postsynapse_side_index='n',
axon_presynapse_side_index='n',
dendrite_postsynapse_side_index='n',
dendrite_presynapse_side_index='n',
conf_nt = 'c',
conf_nt_p = 'n',
Tags = 'c',
tags = 'c',
neither = 'n',
conf_nt_p = 'n',
hemibrain_match_conf_nt_p='n',
hemisphere_match_conf_nt_p='n',
match_conf_nt_p='n',
prod_i='n',
sum_a='n',
conf_nt = 'c',
dcv_count = 'i',
dcv_density = 'n')
#' @rdname flywire_googledrive_data
#' @export
flywire_contributions <-function(local = FALSE, folder = "flywire_neurons/", sql = TRUE, ...){
savedir = good_savedir(local = local)
if(sql){
gcsv = find_gsql(savedir = savedir, tab = "flywire_edits", sql.db = "flywire_data.sqlite", folder = folder, ...)
}else{
gfile = find_gfile(savedir = savedir, file = "flywire_edits", folder = folder)
gcsv = suppressWarnings(readr::read_csv(gfile, col_types = sql_col_types))
}
colnames(gcsv) = snakecase::to_snake_case(colnames(gcsv))
gcsv
}
#' @rdname flywire_googledrive_data
#' @export
flywire_ids <-function(local = FALSE, folder = "flywire_neurons/", sql = FALSE, ...){
savedir = good_savedir(local = local)
if(sql){
gcsv = find_gsql(savedir = savedir, tab = "flywire_ids", sql.db = "flywire_data.sqlite", folder = folder, ...)
}else{
gfile = find_gfile(savedir = savedir, file = "flywire_ids", folder = folder)
gcsv = suppressWarnings(readr::read_csv(gfile, col_types = sql_col_types))
gcsv = as.character(gcsv$x)
}
colnames(gcsv) = snakecase::to_snake_case(colnames(gcsv))
gcsv
}
#' Update the root_id column in a set of google sheets based on flywire_xyz positions
#'
#' @description This function retrieves flywire IDs based on xyz positions in flywire voxel space, from a set of google sheets.
#' It also writes the updated flywire IDs to the same google sheets. This is often helpful because flywire IDs are inherently unstable, they change every time
#' a neuron is modified even slightly. users can record 'stable' points in a neuron that identify it, e.g. a single xyz position in the cell body fibre, or at the soma, and
#' then use this function to update and get the correct flywire ID whenever they wish.
#'
#' @param selected_sheets character vector. the google sheet(s) to update. Each entry is a unique google sheet ID. You can find these in a sheet's URL.
#' If \code{NULL} then defaults to \code{option('hemibrainr_gsheets')}.
#' @param chosen.columns as well as writing column updates to the specified google sheets, this function returns a \code{data.frame} built from all given sheets and their
#' individual tabs, that have been updated. This argument specifies which column you want returned. Filled with NAs if it does not exist.
#' @param ws character vector, tabs (i.e. work sheets) on the google sheet to query/read. This works with regex, so you only need to give the name partially.
#' If set to \code{NULL} for \code{flywire_tracing_sheets}, the whole google sheet is read and all tabs are combined using \code{plyr::rbind.fill}.
#' @param regex logical, use \code{ws} with regex.
#' @param match logical. If \code{TRUE}, hemibrain_matches given.
#' @param meta meta data for flywire neurons, e.g. as retrieved using \code{\link{flywire_meta}}. Used to efficiently input \code{flywire_xyz} column if only a \code{root_id} entry has been given.
#' Only works if that id is also in this provided \code{data.frame}, \code{meta}.
#' @param Verbose logical, whether or not to supply you with messages.
#' @param retry integer, sometimes \code{fafbseg::flywire_xyz2id} fails due to a server timeout. This is the number of times to re-attempt failed calls before accepting defeat.
#' @inheritParams matches_update
#'
#' @details For this function to work, the specified google sheet(s) must have either the column \code{flywire_xyz},
#' which gives the xyz position of points in a format that can be read by \code{nat::xyzmatrix}, for example \code{"(135767,79463,5284)"} or \code{"(135767;79463;5284)"}.
#' If this is missing, then the columns: \code{fw.x}, \code{fw.y}, \code{fw.z} must be specified. The xyz positions must be in FlyWire voxel space, which is what you get if you use the
#' copy location tool in the flywire.ai web-interface.
#'
#' The logic of the update procedure is:, find the \code{flywire_xyz} column.
#' If that does not exist, find: \code{fw.x}, \code{fw.y}, \code{fw.z}, and use that to create a \code{flywire_xyz} column.
#' We use \code{flywire_xyz} if both are given, and there is a mismatch.
#' For each row, a \code{root_id} is then found based on these points, using \code{fafbseg::flywire_xyz2id} (using the argument \code{rawcoords = TRUE}).
#' The google sheet columns \code{root_id},\code{flywire_xyz}, \code{fw.x}, \code{fw.y}, \code{fw.z} are then updated if they exist in the original google sheet.
#' If they do not, they are not updated. The function returns a \code{data.frame} combining all tabs of all googlesheets specified, but returning only the columns
#' specified by the argument \code{chosen.columns}.
#'
#' @return a \code{data.frame} with columns from the given google sheet(s), specified using the argument \code{chosen.columns}.
#'
#' @examples
#' \donttest{
#' \dontrun{
#'
#' # Update flywire_ids in the sheet:
#' ### https://docs.google.com/spreadsheets/d/
#' ### 1rzG1MuZYacM-vbW7100aK8HeA-BY6dWAVXQ7TB6E2cQ
#' ### /edit#gid=0
#' ## And return
#' fw.gsheet.meta = flywire_ids_update(selected_sheets =
#' "1rzG1MuZYacM-vbW7100aK8HeA-BY6dWAVXQ7TB6E2cQ")
#'
#' }}
#' @seealso \code{\link{flywire_ids}},
#' \code{\link{flywire_meta}},
#' \code{\link{flywire_neurons}}
#' @name flywire_ids_update
#' @export
flywire_ids_update <- function(selected_sheets = NULL, # "1rzG1MuZYacM-vbW7100aK8HeA-BY6dWAVXQ7TB6E2cQ"
chosen.columns = c('flywire_xyz', "flywire_svid",
"root_id", "skid",
"fafb_xyz", "cell_type", "side",
"ito_lee_hemilineage", "hartenstein_hemilineage",
"status"),
ws = NULL,
regex = FALSE,
meta = NULL,
match = FALSE,
matching_sheet = options()$hemibrainr_matching_gsheet,
priority = c("FAFB","hemibrain"),
Verbose = TRUE,
retry = 1){
if(is.null(selected_sheets)){
selected_sheets = getOption("hemibrainr_gsheets", stop("Please set option('hemibrainr_gsheets')"))
}
tracing.list = list()
for(selected_sheet in selected_sheets){
## Read Google sheets and extract flywire neuron positions
if(Verbose) { message("### working on google sheet: ", selected_sheet) }
if(is.null(ws)){
tabs = gsheet_manipulation(FUN = googlesheets4::sheet_names,
ss = selected_sheet,
return = TRUE,
Verbose = FALSE)
}else{
if(regex){
ws=regex_tab_names(regex=ws,selected_sheet=selected_sheet)$name
}
tabs=ws
}
pb = progress::progress_bar$new(
format = " updating tab :what [:bar] :percent eta: :eta",
clear = !Verbose, total = length(tabs))
for(tab in tabs){
fw.columns = c("root_id","fw_x","fw_y","fw_z","flywire_xyz","flywire_svid")
# Update progress bar
pb$tick(tokens = list(what = tab))
# Get google sheet tab as data frame
gs.t = gs.t.current = flywire_tracing_sheet(ws = tab, selected_sheet=selected_sheet, Verbose = FALSE)
# If post_id or pre_id a stand in for root_id
if(!"root_id"%in%gs.t.current){
if(sum(c("pre_id","post_id")%in%colnames(gs.t.current))==1){
fw.id = intersect(c("pre_id","post_id"), colnames(gs.t.current))
fw.columns = c(fw.columns,fw.id)
}
}else{
fw.id = "root_id"
}
# Get original columns for later
used.cols = colnames(gs.t.current)
## if valid data frame with flywire columnes
if(nrow(gs.t)&&ncol(gs.t)&&sum(fw.columns%in%used.cols)>0){
xyz_there = "flywire_xyz" %in% used.cols | all( c("fw_x","fw_y","fw_z") %in% used.cols )
if(xyz_there){
# Separate x,y,z positions
for(fw.c in fw.columns){
if(is.null(gs.t[[fw.c]])){
gs.t[[fw.c]] = NA
}
}
# Get seemingly good xyz flywire positions
good.xyz = sapply(gs.t$flywire_xyz,function(x) length(tryCatch(nat::xyzmatrix(x),error = function(e) NA))==3)
# If coordinates separately given, combine
gs.t[!good.xyz,c("flywire_xyz")] = apply(gs.t[!good.xyz,c("fw_x","fw_y",'fw_z')],1,paste_coords)
good.xyz = sapply(gs.t$flywire_xyz,function(x) length(tryCatch(nat::xyzmatrix(x),error = function(e) NA))==3)
# Provide separate columns for x,y,z positions
if(sum(good.xyz)){
gs.t[good.xyz,c("fw_x","fw_y","fw_z")] = nat::xyzmatrix(gs.t[good.xyz,"flywire_xyz"])
}
good.xyz = !is.na(gs.t$fw_x)
# If flywire_svid missing, update
bad.svids = (is.na(gs.t$flywire_svid)|gs.t$flywire_svid=="0")&!is.na(gs.t$flywire_xyz)
if(sum(bad.svids)>0 & "flywire_svid"%in%colnames(gs.t) & sum(good.xyz)>1){
gs.t$flywire_svid[bad.svids] = tryCatch(fafbseg::flywire_xyz2id(nat::xyzmatrix(gs.t$flywire_xyz[bad.svids]),
root=FALSE,
rawcoords = TRUE),
error = function(e){
warning(as.character(e))
NA
})
}
fids = NULL
if(!all(is.na(gs.t$flywire_xyz))){
# Get the ids that need to be updated
fwids.old = gs.t[[fw.id]]
fwids.old.not.good = is.na(gs.t[[fw.id]])|gs.t[[fw.id]]%in%c("0","NA",""," ","\n")|!grepl("^[0-9]{1,}$", gs.t[[fw.id]])
latest = !fwids.old.not.good
latest[latest] = tryCatch(fafbseg::flywire_islatest(fwids.old[latest]), error = function(e){
sapply(fwids.old[latest], function(fo) tryCatch(fafbseg::flywire_islatest(fo), error = function(e) FALSE))
}
)
latest = as.logical(latest)
latest[is.na(latest)] = FALSE
#gs.n = gs.t[!latest,]
if(sum(!latest)>0){ ### Try to work out from xyz even if out of range
# Get flywire IDs from these positions
bbx = matrix(c(5100, 1440, 16, 59200, 29600, 7062),ncol=3,byrow = TRUE)
bbx = nat::boundingbox(scale(bbx, scale = 1/c(4, 4, 40), center = FALSE))
gs.n$fw.x = as.numeric(gs.n$fw.x)
gs.n$fw.y = as.numeric(gs.n$fw.y)
gs.n$fw.z = as.numeric(gs.n$fw.z)
pos = gs.n[apply(gs.n, 1, function(row) sum(is.na(row[c("fw.x","fw.y",'fw.z')]))==0),]
p = nat::pointsinside(pos[,c("fw.x","fw.y",'fw.z')],bbx)
pos = pos[p,]
pos = pos[!is.na(pos$fw.x),]
if(nrow(pos)){
foreach.ids = try(fafbseg::flywire_xyz2id(pos[,c("fw.x","fw.y",'fw.z')], rawcoords = TRUE), silent = TRUE)
sleep = 10
if(class(foreach.ids)=="try-error" & retry>0){
retry = as.integer(retry)
while(retry>0&class(foreach.ids)=="try-error"){
Sys.sleep(sleep)
retry=retry-1
foreach.ids = try(fafbseg::flywire_xyz2id(pos[,c("fw.x","fw.y",'fw.z')], rawcoords = TRUE), silent = TRUE)
sleep = sleep+sleep
}
}
if(class(foreach.ids)=="try-error"){
stop(paste0("fafbseg::flywire_xyz2id could not be used for ", tab," in sheet ", selected_sheet))
}
names(foreach.ids) = pos[,"flywire_xyz"]
}else{
foreach.ids = NULL
}
fids = unlist(foreach.ids)
if(!is.null(fids)){
fids[is.na(fids)|is.nan(fids)] = "0"
replacement = fids[match(gs.n$flywire_xyz ,names(fids))]
coordsmissing = gs.n$flywire_xyz==paste_coords(matrix(NA,ncol=3))
coordsmissing[is.na(gs.n$flywire_xyz)] = TRUE
coordsmissing[is.na(replacement)] = TRUE
replacement[coordsmissing] = gs.n[[fw.id]][coordsmissing]
gs.t[[fw.id]][!latest] = as.character(replacement)
}
}# END latest
}
# If ID is given and no xyz
bad.xyz = (is.na(gs.t$flywire_xyz)|gs.t$flywire_xyz=="0")&!is.na(gs.t[[fw.id]])
if(sum(bad.xyz)>0){
ids.fresh = try(fafbseg::flywire_updateids(gs.t[[fw.id]][bad.xyz]),silent = TRUE)
if(!"try-error"%in%class(ids.fresh)){
gs.t[[fw.id]][bad.xyz] = as.character(ids.fresh)
}else{
warning(ids.fresh)
}
}
}
# If only skid given try and guess root_id
if("skid"%in%used.cols){
justskids=((gs.t$flywire_xyz==paste_coords(matrix(NA,ncol=3))|is.na(gs.t$flywire_xyz))
&(is.na(gs.t[[fw.id]])|gs.t[[fw.id]]=='0')
&!is.na(gs.t$skid))
justskids[is.na(justskids)] = FALSE
if(sum(justskids)>0){
if(Verbose) message("Geting flywire IDs for skids")
replacement.ids = unlist(pbapply::pbsapply(gs.t[justskids,"skid"], function(x)
tryCatch(fafb14_to_flywire_ids_timed(x, only.biggest = TRUE),error=function(e){warning(as.character(e));NA})))
if(inherits(replacement.ids,"try-error")){
warning(replacement.ids)
}else{
replacement.ids = tryCatch(as.character(replacement.ids[[fw.id]]), error = function(e) NA)
gs.t[justskids,"root_id"] = replacement.ids
}
}
}
# update with flywire_xyz if possible
if(!is.null(meta)){
justids=(gs.t$flywire_xyz==paste_coords(matrix(NA,ncol=3))
|is.na(gs.t$flywire_xyz)
&!is.na(gs.t[[fw.id]]))
justids[is.na(justids)] = FALSE
if(sum(justids)>0){
replacement.xyz = meta[match(gs.t[justids,"root_id"],meta[[fw.id]]),]$flywire_xyz
gs.t[justids,"flywire_xyz"] = replacement.xyz
}
}
# Change 0 to NA
good.xyz = sapply(gs.t$flywire_xyz,function(x) length(tryCatch(nat::xyzmatrix(x),error = function(e) NA))==3)
if(sum(good.xyz)){
gs.t$flywire_xyz[good.xyz] = try(apply(nat::xyzmatrix(gs.t$flywire_xyz[good.xyz]),1,paste_coords), silent = TRUE)
}
gs.t$flywire_xyz[gs.t$flywire_xyz==paste_coords(matrix(NA,ncol=3))] = NA
gs.t[[fw.id]][gs.t[[fw.id]]==0]=NA
# Write to google sheet
if(nrow(gs.t)!=nrow(gs.t.current)){
stop("Sheet processing corruption. Flywire.id update failed.")
}
gs.t[[fw.id]] = as.character(gs.t[[fw.id]])
gs.t$flywire_svid = as.character(gs.t$flywire_svid)
write.cols = intersect(fw.columns,used.cols)
if(length(fids) & length(write.cols)){
gsheet_update_cols(
write.cols = write.cols,
gs=gs.t[,used.cols],
selected_sheet = selected_sheet,
sheet = tab,
Verbose = FALSE)
}
# Now continue processing
gs.t = gs.t[,colnames(gs.t)%in%chosen.columns]
for(col in chosen.columns){
if(is.null(gs.t[[col]])){
gs.t[[col]] = NA
}
}
if(nrow(gs.t)){
gs.t$gsheet = selected_sheet
entry = paste0(selected_sheet,"_",tab)
tracing.list[[entry]] = gs.t
}
}
}
}
# Make this unique, but keep row with most information
master = do.call(plyr::rbind.fill, tracing.list)
if("root_id"%in%colnames(master)){
master = master[!is.na(master$flywire_xyz),]
master$filled = apply(master, 1, function(r) sum(!is.na(r)))
master = master[order(master$filled,decreasing = TRUE),]
master = master[!duplicated(master$flywire_xyz),]
rownames(master) = master$flywire_xyz
master$root_id = bit64::as.integer64(master$root_id)
master$flywire_svid = bit64::as.integer64(master$flywire_svid)
if(match){
if(is.null(matching_sheet)){
flytable = TRUE
}else{
flytable = FALSE
}
matches = hemibrain_matches(selected_file = matching_sheet, priority = priority, flytable = flytable)
matches = subset(matches, matches$match.dataset == "hemibrain" & matches$dataset == "flywire")
master$hemibrain_match = matches[match(master$root_id, matches$id), "match"]
master$hemibrain_match_quality = matches[match(master$root_id, matches$id),"quality"]
master$fafb_hemisphere_match = matches[match(master$root_id, matches$id), "fafb_hemisphere_match"]
master$fafb_hemisphere_match.quality = matches[match(master$root_id, matches$id),"fafb_hemisphere_match.quality"]
master$cell_type = matches[match(master$root_id, matches$id), "cell_type"]
}
master = subset(master, !is.na(master$root_id))
master$filled = NULL
colnames(master) = snakecase::to_snake_case(colnames(master))
master
}else{
invisible()
}
}
flywire_gsheet_update_ids <- function(selected_sheets, ws = NULL){
for(selected_sheet in selected_sheets){
## Read Google sheets and extract flywire neuron positions
message("### Working on google sheet: ", selected_sheet)
if(is.null(ws)){
tabs = gsheet_manipulation(FUN = googlesheets4::sheet_names,
ss = selected_sheet,
return = TRUE,
Verbose = FALSE)
}else{
if(regex){
ws=regex_tab_names(regex=ws,selected_sheet=selected_sheet)$name
}
tabs=ws
}
pb = progress::progress_bar$new(
format = " updating tab :what [:bar] :percent eta: :eta",
clear = FALSE, total = length(tabs))
for(tab in tabs){
pb$tick(tokens = list(what = tab))
# Get google sheet tab as data frame
gs.t = gs.t.current = flywire_tracing_sheet(ws = tab, selected_sheet=selected_sheet, Verbose = FALSE)
if(!nrow(gs.t)){
next
}
# If post_id or pre_id a stand in for root_id
if(!"root_id"%in%gs.t.current){
if(sum(c("pre_id","post_id")%in%colnames(gs.t.current))==1){
fw.id = intersect(c("pre_id","post_id"), colnames(gs.t.current))
}else{
warning("not root_id column to update for ", tab)
next
}
}else{
fw.id = "root_id"
}
# Update ID
gs.t[[fw.id]] = tryCatch(flywire_updateids(x=gs.t[[fw.id]]),
error = function(e)
pbapply::pbsapply(gs.t[[fw.id]], function(x) tryCatch(flywire_updateids(x=x),error=function(e) x))
)
# Write to google sheet
if(nrow(gs.t)!=nrow(gs.t.current)){
stop("Sheet processing corruption. Flywire.id update failed.")
}
gs.t[[fw.id]] = as.character(gs.t[[fw.id]])
gsheet_update_cols(
write.cols = fw.id,
gs=gs.t,
selected_sheet = selected_sheet,
sheet = tab,
Verbose = FALSE)
}
}
}
#' Update neuron match information on a google sheet
#'
#' @description This function retrieves neuron matches (\code{\link{hemibrain_matches}}) from a master-matching google sheet. If then
#' can update other google sheets, specified by the user to update neuron-match information.
#' Just columns giving the match, match quality and cell_type are updated.
#'
#' @param selected_sheets character vector. the google sheet(s) to update. Each entry is a unique google sheet ID. You can find these in a sheet's URL.
#' If \code{NULL} then defaults to \code{option('hemibrainr_gsheets')}.
#' @param matching_sheet the master matching sheet. Cannot be in \code{selected_sheets}. This sheet will be processed using \code{\link{hemibrain_matches}} to produce a
#' \code{data.frame} describing hemibrain-FAFB and FAFB-hemibrain_matches.
#' @param priority whether to use FAFB->hemibrain_matches (FAFB) or hemibrain->FAFB matches (hemibrain) in order to ascribe
#' cell_type names to FAFB neurons. In both cases, cell_type names are attached to hemibrain bodyids, and propagated to their FAFB matches.
#' @param id the ID specifying unique neuron identity for each row, for each tab of the google sheets specified with \code{selected_sheets}. When matches are added
#' they are for the neuron specified in the \code{id} column of each worksheet.
#' @param match.field which match to record. E.g. if \code{id} is \code{"root_id"} you may want to add the hemibrain_match from the master matching sheets. To do this
#' set \code{match.field} to \code{"hemibrain"}. Then, if there is a \code{"hemibrain_match"} and/or \code{"hemibrain_match_quality"} column, it will be updated. You
#' may also want to know the hemispheric match within FAFB, in which case \code{"FAFB.hemisphere"} could be used.
#' @param chosen.columns as well as writing column updates to the specified google sheets, this function returns a \code{data.frame} built from all given sheets and their
#' individual tabs, that have been updated. This argument specifies which column you want returned. Filled with NAs if it does not exist.
#'
#' @details For this function to work, the specified google sheet(s) must have either the column specified with the argument \code{id},
#' which gives the ID of a neuron from the FAFB (CATMAID or flywire) data set, or the hemibrain data set.
#' Each tab of each specified google sheet is read in turn. The data set match (specified with the argument \code{match.field}). A match is recovered
#' using \code{hemibrain_matching(selected_file = matching_sheet, priority=priority)}. If the right columns exist in the google sheet, i.e. 'hemibrain_match'
#' and 'hemibrain_match_quality' for a hemibrain_match, then this column in the google sheet is wiped and updated. If not, no update takes place.
#'
#' @return a \code{data.frame} with columns from the given google sheet(s), specified using the argument \code{chosen.columns}.
#'
#' @examples
#' \donttest{
#' \dontrun{
#'
#' # Update flywire_ids in the sheet:
#' ## https://docs.google.com/spreadsheets/d/
#' ## 1spGSuhUX6Hhn-8HH0U_ArIWUuPpMBFNjIjeSSh_MFVY
#' ## /edit#gid=603762040
#' lineage.gsheet.meta = matches_update(id = "root_id",
#' match.field = "hemibrain",
#' selected_sheets = "1spGSuhUX6Hhn-8HH0U_ArIWUuPpMBFNjIjeSSh_MFVY")
#'
#' }}
#' @seealso \code{\link{hemibrain_matches}},
#' \code{\link{hemibrain_matched}},
#' \code{\link{hemibrain_matching}}
#' @name matches_update
#' @export
matches_update <- function(matching_sheet = options()$hemibrainr_matching_gsheet,
priority = c("FAFB","hemibrain"),
selected_sheets,
id = c("root_id","bodyid","skid","id"),
match.field = c("hemibrain","CATMAID","flywire","LM","FAFB.hemisphere"),
chosen.columns = c("cell_type", "flywire_xyz", "side",
"ito_lee_hemilineage", "hartenstein_hemilineage")){
id = match.arg(id)
match.field = match.arg(match.field)
priority = match.arg(priority)
matches = hemibrain_matches(selected_file = matching_sheet, priority = priority)
matches = subset(matches, ! matches$quality %in% "NBLAST")
if(sum(selected_sheets%in%c(options()$hemibrainr_matching_gsheet,matching_sheet))>1){
stop("selected_sheets should not indicate master matching sheets.
I.e. in options()$hemibrainr_matching_gsheet.")
}
# iterate through sheets and tabs
if(is.null(selected_sheets)){
selected_sheets = getOption("hemibrainr_gsheets", stop("Please set option('hemibrainr_gsheets')"))
}
tracing.list = list()
for(selected_sheet in selected_sheets){
## Read Google sheets and extract flywire neuron positions
tabs = gsheet_manipulation(FUN = googlesheets4::sheet_names,
ss = selected_sheet,
return = TRUE,
Verbose = FALSE)
pb = progress::progress_bar$new(
format = " updating :what [:bar] :percent eta: :eta",
clear = FALSE, total = length(tabs))
for(tab in tabs){
pb$tick(tokens = list(what = tab))
gs.t = gs.t.current = flywire_tracing_sheet(ws = tab, selected_sheet=selected_sheet, Verbose = FALSE)
used.cols = colnames(gs.t.current)
field = match.field
if(field=="CATMAID"){ # for historic reasons. Should fix.
field = "FAFB"
}
match.column = paste0(field,".match")
quality.column = paste0(field,".match.quality")
if(!match.column%in%used.cols){
match.column = paste0(field,"_match")
if(!quality.column%in%used.cols){
quality.column = paste0(field,"_match.quality")
}
}
chosen.columns = unique(c(chosen.columns,match.column,quality.column,"cell_type"))
in.sheet = intersect(gs.t[[id]],matches$id)
if(nrow(gs.t) & length(in.sheet)){
matches.sel = subset(matches, matches$id%in%in.sheet & matches$match.dataset == match.field)
if(match.field=="hemibrain"){
gs.t[[match.column]] = matches.sel$match[match(gs.t[[id]], matches.sel$id)]
gs.t[[quality.column]] = matches.sel$quality[match(gs.t[[id]], matches.sel$id)]
}else if(match.field == "FAFB.hemisphere"){
gs.t[[match.column]] = matches.sel$fafb_hemisphere_match[match(gs.t[[id]], matches.sel$id)]
gs.t[[quality.column]] = matches.sel$fafb_hemisphere_match.quality[match(gs.t[[id]], matches.sel$id)]
}else if(match.field == "LM"){
gs.t[[match.column]] = matches.sel$LM.match[match(gs.t[[id]], matches.sel$id)]
gs.t[[quality.column]] = matches.sel$LM.match.quality[match(gs.t[[id]], matches.sel$id)]
}else if(match.field == "CATMAID"){
gs.t[[match.column]] = matches.sel$match[match(gs.t[[id]], matches.sel$id)]
gs.t[[quality.column]] = matches.sel$quality[match(gs.t[[id]], matches.sel$id)]
}
gs.t[["cell_type"]] = matches.sel$cell_type[match(gs.t[[id]], matches.sel$id)]
if(nrow(gs.t)!=nrow(gs.t.current)){
stop("Sheet processing corruption.")
}
write.cols = intersect(c(match.column,quality.column,"cell_type"),used.cols)
if(nrow(matches.sel) & length(write.cols)){
suppressMessages(gsheet_update_cols(
write.cols = write.cols,
gs=gs.t[,used.cols],
selected_sheet = selected_sheet,
sheet = tab,
Verbose = FALSE))
}
gs.t = gs.t[,colnames(gs.t)%in%chosen.columns]
for(col in chosen.columns){
if(is.null(gs.t[[col]])){
gs.t[[col]] = NA
}
}
entry = paste(selected_sheet,tab,collapse ="_")
tracing.list[[entry]] = gs.t
}
}
}
d = do.call(plyr::rbind.fill, tracing.list)
colnames(d) = snakecase::to_snake_case(colnames(d))
d
}
# flywire read swc files
flywire_read_swc <- function(swc,
meta = NULL,
ids = NULL,
id = 'root_id',
template = "Flywire",
nams = NULL){
if(!is.null(ids)){
fw.skels <- nat::read.neurons(swc,
pattern = paste(ids,collapse="|"),
OmitFailures = TRUE)
}else{
fw.skels <- nat::read.neurons(swc,
OmitFailures = TRUE)
}
if(is.null(nams)){
fw.skels <- fw.skels[!duplicated(names(fw.skels))]
attr(fw.skels,"df") <- data.frame(root_id=names(fw.skels))
}else{
new.nams <- nams[names(fw.skels)]
new.nams[is.na(new.nams)] = names(fw.skels)[is.na(new.nams)]
fw.skels <- fw.skels[!duplicated(new.nams)]
attr(fw.skels,"df") <- data.frame(root_id=new.nams)
}
fw.skels[,"dataset"] = "flywire"
nat.templatebrains::regtemplate(fw.skels) = regtemplate
if(!is.null(meta)){
meta <- meta[!duplicated(meta$root_id),]
fw.skels <- hemibrainr:::update_metdata(fw.skels, meta = meta, id = id)
}
fw.skels
}
flywire_read_synapse_csvs <- function(save.path,
prepost = NULL,
numCores = NULL,
process = TRUE,
flywire.cleft.threshold = 50,
nt.cols = c("gaba", "acetylcholine", "glutamate",
"octopamine", "serotonin", "dopamine",
"neither")){
if(!is.null(numCores)){
cl <- parallel::makeCluster(2L)
}else{
cl = NULL
}
files <- list.files(save.path, full.names = TRUE)
message("Reading synapses: ")
synapse.csvs <- pbapply::pblapply(files, function(file){
root_id <- gsub("\\.csv","",basename(file))
csv <- try(as.data.frame(suppressMessages(readr::read_csv(file, col_types = hemibrainr:::sql_col_types))))
if(is.data.frame(csv)){
if(nrow(csv)){
csv$root_id <- root_id
if(process){
csv <- flywire_process_synapses(csv,
prepost=prepost,
flywire.cleft.threshold = flywire.cleft.threshold,
nt.cols = nt.cols)
csv$treenode_id <- NULL
}else{
csv
}
}else{
NULL
}
}else{
NULL
}
},
cl = cl)
if(!is.null(cl)){
parallel::stopCluster(cl)
}
synapse.csvs <- synapse.csvs[sapply(synapse.csvs,is.data.frame)]
do.call(plyr::rbind.fill, synapse.csvs)
}
flywire_process_synapses <-function(fw.synapses,
prepost = NULL,
flywire.cleft.threshold = 50,
nt.cols = c("gaba", "acetylcholine", "glutamate", "octopamine", "serotonin", "dopamine", "neither")){
colnames(fw.synapses) <- snakecase::to_snake_case(colnames(fw.synapses))
key.cols <- c("offset", "connector_id", "treenode_id", "x", "y", "z", "scores", "cleft_scores",
"syn_top_p", "syn_top_nt", "gaba", "acetylcholine", "glutamate",
"octopamine", "serotonin", "dopamine", "prepost", "pre_id", "post_id",
"inside", "strahler_order", "label", "geodesic_distance", "geodesic_distance_norm")
fw.synapses <- fw.synapses[,!duplicated(colnames(fw.synapses))]
fw.synapses <- fw.synapses %>%
dplyr::rename(syn_top_p = top_p,
syn_top_nt = top_nt)
missin.cols <- setdiff(key.cols,colnames(fw.synapses))
for(m in missin.cols){
warning("missing synapse column: ", m, " for rootid: ", fw.synapses$root_id[[1]])
fw.synapses[[m]] <- NA
}
if(!all(key.cols%in%colnames(fw.synapses))){
warning("not all column names found, dropping a synapse list")
return(NULL)
}
fw.synapses = fw.synapses %>%
dplyr::filter(cleft_scores>flywire.cleft.threshold,
pre_id != post_id) %>%
dplyr::select(c(offset, connector_id, treenode_id,
x, y, z,
scores, cleft_scores,
syn_top_p, syn_top_nt,
gaba, acetylcholine, glutamate, octopamine, serotonin, dopamine,
prepost,
#segmentid_pre, segmentid_post,
#pre_svid, post_svid,
pre_id, post_id,
inside, strahler_order, label,
geodesic_distance, geodesic_distance_norm
)) %>%
dplyr::mutate(label = hemibrainr:::standard_compartments(label)) %>%
dplyr::group_by(pre_id, post_id, prepost, label) %>%
dplyr::mutate(count = dplyr::n(),
geodesic_distance = as.numeric(geodesic_distance),
geodesic_distance_norm = as.numeric(geodesic_distance_norm)
)%>%
dplyr::ungroup() %>%
as.data.frame(stringsAsFactors = FALSE)
if(!is.null(prepost)){
if(prepost==0){
fw.synapses <- fw.synapses %>%
dplyr::filter(prepost==0) %>%
dplyr::rowwise() %>%
dplyr::mutate(syn_top_nt = nt.cols[which.max(c(gaba, acetylcholine, glutamate, octopamine, serotonin, dopamine))]) %>%
dplyr::mutate(syn_top_p = max(c(gaba, acetylcholine, glutamate, octopamine, serotonin, dopamine))) %>%
dplyr::ungroup()
}else{
fw.synapses <- fw.synapses %>%
dplyr::filter(prepost==1) %>%
dplyr::rowwise() %>%
dplyr::mutate(syn_top_nt = nt.cols[which.max(c(gaba, acetylcholine, glutamate, octopamine, serotonin, dopamine))]) %>%
dplyr::mutate(syn_top_p = max(c(gaba, acetylcholine, glutamate, octopamine, serotonin, dopamine))) %>%
dplyr::ungroup()
}
}
fw.synapses <- round_dataframe(fw.synapses, digits = 4)
fw.synapses
}
flywire_read_metrics_csvs <- function(save.path){
files <- list.files(save.path, full.names = TRUE)
message("Reading metrics: ")
metrics.csvs <- pbapply::pblapply(files, function(file){
root_id <- gsub("\\.csv","",basename(file))
csv <- try(suppressMessages(readr::read_csv(file, col_types = hemibrainr:::sql_col_types)))
csv
})
metrics.csvs <- metrics.csvs[sapply(metrics.csvs,is.data.frame)]
do.call(plyr::rbind.fill, metrics.csvs)
}
flywire_read_swc_split <-function(swc, synapses, meta, ids = NULL, regtemplate = "Flywire"){
message("Reading skeletons: ")
fw.skels <- flywire_read_swc(swc = swc,
ids = ids,
meta = fw.meta)
pb <- progress::progress_bar$new(total = length(fw.skels))
message("reading synapses: ")
for(id in names(fw.skels)){
pb$tick()
file <- file.path(synapses,paste0(id,".csv"))
csv <- suppressMessages(readr::read_csv(file, col_types = hemibrainr:::sql_col_types))
csv <- flywire_process_synapses(csv)
fw.skels[match(id,names(fw.skels))][[1]]$connectors <- csv
}
fw.skels <- nat::nlapply(fw.skels, function(neuron){
class(neuron) <- c("catmaidneuron","neuprintneuron","neuron")
neuron
})
fw.skels
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.