R/Metadata.R

#' Handle metadata in BBrowser study
Metadata <- setRefClass("MetadataClass", fields = list(
  dir = "character",
  metalist = "ANY",
  content = "list",
  default = "ANY",
  version = "numeric",
  reader = "ANY"
), methods = list(
  initialize = function(metadata.dir, reader) {
    .self$dir <- metadata.dir
    .self$metalist <- reader$new(file.path(.self$dir, "metalist.json"))
    .self$content <- NamedList()
    .self$default <- NULL
    .self$version <- 1
    .self$reader <- reader

    if (.self$metalist$exists()) {
      raw <- .self$metalist$read(simplifyVector=FALSE)
      if ("content" %in% names(raw)) {
        .self$content <- raw$content
        .self$default <- if (is.null(raw$default)) names(.self$content)[1] else raw$default
      } else { # Old version that does not have "content"
        .self$content <- raw
        .self$default <- names(raw)[1]
      }
    }
  },
  getMetaFile = function(id) {
    "Get the file location of individual metadata"

    meta.file <- file.path(.self$dir, paste0(id, ".json"))
    return(.self$reader$new(meta.file))
  },
  get = function(id) {
    "Return an array of characters/numbers"

    info <- .self$content[[id]]
    if (is.null(info)) return(NULL)

    meta.file <- .self$getMetaFile(id)
    meta <- meta.file$read()

    ## If metadata is a numeric, get info from metalist
    if (is(meta, "numeric")) {
      id <- gsub(".json$", "", basename(meta.file$path))
      meta <- list(
        id = id,
        clusters = meta,
        clusterName = .self$content[[id]]$clusterName,
        clusterLength = .self$content[[id]]$clusterLength
      )
    }
  
    cls <- meta$clusters
    cls.type <- if (is.null(meta$type)) "category" else meta$type
    if (cls.type == "numeric") return(cls) # Numeric returns as is

    cls.names <- meta$clusterName
    return(factor(cls.names[cls + 1], cls.names))
  },
  add = function(cls, name, id=GenerateUUID(), commit=CreateCommit()) {
    "Create a metadata"

    meta.file <- .self$getMetaFile(id)
    if (is.numeric(cls)) {
      info <- list(
        id = id,
        clusters = cls,
        name = name,
        type = "numeric",
        history = list(commit)
      )
    } else {
      cls <- as.factor(cls)
      lvl <- unique(trimws(levels(cls))) # Get original levels
      cls <- trimws(as.character(cls))
      cls[is.na(cls)] <- "Unassigned"
      lvl <- c("Unassigned", lvl[lvl != "Unassigned"])
      cls <- factor(cls, lvl)
      info <- list(
        id = id,
        clusters = as.integer(cls) - 1, # 0-index
        clusterName = as.list(levels(cls)),
        clusterLength = as.list(as.integer(table(cls))),
        name = name,
        type = "category",
        history = list(commit)
      )
    }
    meta.file$write(info, auto_unbox=TRUE)
    info$clusters <- NULL
    .self$content[[info$id]] <- info
    .self$metalist$write(list(
      content = .self$content,
      default = .self$default,
      version = .self$version
    ), auto_unbox=TRUE)
    return(info$id)
  },
  ls = function() {
    "Returns the content of metalist.json"

    return(data.frame(
      id = sapply(.self$content, function(x) x$id),
      name = sapply(.self$content, function(x) x$name),
      type = sapply(.self$content, function(x) x$type)
    ))
  }
))
bioturing/rBCS documentation built on May 18, 2022, 5:26 p.m.