R/wsx_get_poppaths.R

Defines functions wsx_get_poppaths

Documented in wsx_get_poppaths

#' Obtain paths to populations/nodes/gates in a flowjo workspace
#'
#' @param ws path to flowjo workspace or a parsed xml-document (xml2::read_xml(ws))
#' @param collapse logical whether to collapse FileNames to a list column
#' @param groups which groups in from flowjo workspace to consider
#'
#' @return data frame
#' @export
#'
#' @examples
#' \dontrun{
#' pp <- wsx_get_poppaths(ws, collapse = F)
#' # check which files have equal gating trees
#' # and get the node (or path, or population) names
#' pp <- pp %>%
#' dplyr::group_by(PopulationFullPath, Population, ws) %>%
#' dplyr::summarise(FileName = list(FileName), .groups = "drop")
#' }
wsx_get_poppaths <- function(ws,
                             groups = NULL,
                             collapse = T) {

  ws <- check_ws(ws)

  ids <- wsx_get_groups(ws)
  if (is.null(groups)) {
    groups <- unique(ids[,"FlowJoGroup", drop=T])
  }
  ids <- ids[which(ids$FlowJoGroup %in% groups),"sampleID"]
  rel_nodes <- xml2::xml_children(xml2::xml_child(ws, "SampleList"))
  rel_nodes <- rel_nodes[which(sapply(seq_along(rel_nodes), function(x) xml2::xml_attrs(xml2::xml_child(rel_nodes[[x]], "DataSet"))[["sampleID"]]) %in% ids)]
  gg <- xml2::xml_find_all(rel_nodes, ".//Gate|.//Dependents")


  gates <- lapply(seq_along(gg), function(n) {

    prnts <- xml2::xml_parents(gg[n])

    s_node <- prnts[which(xml2::xml_name(prnts) == "Sample")]
    sampleID <- xml2::xml_attr(xml2::xml_child(s_node, "DataSet")[[1]], "sampleID")
    FilePath <- gsub("^file:", "", xml2::xml_attr(xml2::xml_child(s_node, "DataSet")[[1]], "uri"))
    FileName <- basename(FilePath)

    p_nodes <- prnts[which(xml2::xml_name(prnts) %in% c("AndNode", "OrNode", "NotNode", "Population"))]
    PopulationFullPath <- paste(rev(xml2::xml_attr(p_nodes, "name")), collapse = "/")
    Population <- basename(PopulationFullPath)

    if (xml2::xml_name(gg[n]) == "Dependents") {
      origin <- "Dependents"
    } else {
      origin <- "Gate"
    }


    return(data.frame(FileName = FileName,
                      PopulationFullPath = PopulationFullPath,
                      Population = Population,
                      sampleID = sampleID,
                      origin = origin,
                      stringsAsFactors = F))
  })

  roots <- do.call(rbind, lapply(rel_nodes, function(y) {
    data.frame(FileName = basename(xml2::xml_attr(xml2::xml_child(y, "DataSet"), "uri")),
               PopulationFullPath = "root",
               Population = "root",
               sampleID = xml2::xml_attr(xml2::xml_child(y, "DataSet"), "sampleID"),
               origin = "root",
               stringsAsFactors = F)
  }))

  gates_df <- do.call(rbind, gates)
  gates_df <- rbind(roots,gates_df)
  gates_list <- split(gates_df, gates_df$sampleID)
  # remove duplicate rows from gate+dependents
  gates_list <- lapply(gates_list, function(y) {
    ex <- base::intersect(c(which(duplicated(y$PopulationFullPath)),
                            which(duplicated(y$PopulationFullPath, fromLast=T))),
                          which(y$origin == "Dependents"))
    if (length(ex) > 0) {
      y <- y[-ex,]
    }
    return(y)
  })

  full_paths <- unique(lapply(gates_list, function(y) {
    if (length(unique(y$PopulationFullPath)) != length(y$PopulationFullPath)) {
      stop("PopulationFullPaths not unique which cannot or should not be. Check.")
    }
    y$PopulationFullPath
  }))
  auto_paths <- lapply(full_paths, function(y) shortest_unique_path(y))

  for (y in seq_along(gates_list)) {
    gates_list[[y]][["Population"]] <- auto_paths[[which(sapply(full_paths, function(z) identical(z,  gates_list[[y]][["PopulationFullPath"]])))]]
  }
  gates_out <- do.call(rbind, gates_list)
  gates_out <- gates_out[order(gates_out$FileName),]
  rownames(gates_out) = seq(1,nrow(gates_out),1)
  gates_out[,"ws"] <- basename(xml2::xml_attr(ws, "nonAutoSaveFileName"))
  gates_out <- gates_out[,-which(names(gates_out) == "sampleID")]

  if (collapse) {
    gates_out <-
      gates_out %>%
      dplyr::group_by(PopulationFullPath, Population, ws) %>%
      dplyr::summarise(FileName = list(FileName), .groups = "drop")
  }

  return(as.data.frame(gates_out))
}
Close-your-eyes/fcexpr documentation built on Sept. 29, 2023, 12:27 a.m.