R/WriteStudy.R

Defines functions WriteStudy

Documented in WriteStudy

#' @title Write a BBrowser study
#' @param expr.data list of counts, norms, and feature_type
#' @param metadata a data.frame of metadata
#' @param dimred.data a named list of dimensionality reductions
#' @param spatial.data a named list of spatial info
#' @param study.path path to study directory
#' @param author email of the creator
#' @param unique.limit ignore a metadata if it has number of unique labels larger than this number
WriteStudy <- function(
  expr.data, metadata, dimred.data, spatial.data, study.path, author, unique.limit
) {
  CreateCommit <- function() {
    return(list(
      hash_id = uuid::UUIDgenerate(),
      created_at = as.numeric(Sys.time()) * 1000,
      created_by = author,
      description = "Converted with rBCS"
    ))
  }

  WriteH5Matrix <- function(matrix, h5, group, write.labels=TRUE) {
    rhdf5::h5createGroup(h5, group)
    rhdf5::h5createDataset(h5, paste0(group, "/data"), storage.mode="double", dims=length(matrix@x), chunk=min(10000, length(matrix@x)), level=1)
    rhdf5::h5write(matrix@x, h5, paste0(group, "/data"))
    rhdf5::h5write(matrix@p, h5, paste0(group, "/indptr"))
    rhdf5::h5createDataset(h5, paste0(group, "/indices"), storage.mode="integer", dims=length(matrix@i), chunk=min(10000, length(matrix@i)), level=1)
    rhdf5::h5write(matrix@i, h5, paste0(group, "/indices"))
    rhdf5::h5write(dim(matrix), h5, paste0(group, "/shape"))
    if (write.labels) {
      rhdf5::h5write(colnames(matrix), h5, paste0(group, "/barcodes"))
      rhdf5::h5write(rownames(matrix), h5, paste0(group, "/features"))
    }
  }

  WriteExpressionData <- function(expr.data, study.path) {
    Meow("Writing column sums...")
    h5.path <- file.path(study.path, "main", "matrix.hdf5")
    rhdf5::h5createFile(h5.path)
    h5 <- rhdf5::H5Fopen(h5.path)
    rhdf5::h5createGroup(h5, "colsum")
    raw.sum <- Matrix::colSums(expr.data$counts)
    rhdf5::h5write(raw.sum, h5, "colsum/raw")
    rhdf5::h5write(log2(raw.sum + 1), h5, "colsum/log")
    rhdf5::h5write(Matrix::colSums(expr.data$norms), h5, "colsum/lognorm")
    Meow("Writing matrices...")
    WriteH5Matrix(expr.data$counts, h5, "bioturing")
    rhdf5::h5write(expr.data$feature_type, h5, "bioturing/feature_type")
    WriteH5Matrix(Matrix::t(expr.data$counts), h5, "countsT")
    WriteH5Matrix(Matrix::t(expr.data$norms), h5, "normalizedT")
    rhdf5::H5Fclose(h5)
    writeLines(expr.data$feature.name, file.path(study.path, "main", "genes.tsv"))
    writeLines(colnames(expr.data$norms), file.path(study.path, "main", "barcodes.tsv"))
  }

  WriteMetadata <- function(metadata, study.path) {
    Meow("Writing metadata...")
    meta.dir <- file.path(study.path, "main", "metadata")
    meta <- Metadata$new(meta.dir, JSONFile)
    for (name in colnames(metadata)) {
      meta$add(metadata[[name]], name)
    }
  }

  WriteDimred <- function(dimred.data, study.path) {
    Meow("Writing cell embeddings...")
    dimred.dir <- file.path(study.path, "main", "dimred")
    dir.create(dimred.dir)
    dimred <- list(version=1)
    dimred$data <- lapply(seq(length(dimred.data)), function(i) {
      info <- list(
        name = names(dimred.data)[i],
        id = uuid::UUIDgenerate(),
        param = dimred.data[[i]]$param,
        coords = dimred.data[[i]]$coords,
        history = list(CreateCommit())
      )
      info$size <- dim(info$coords)
      if (dimred.data[[i]]$type == "dimred") {
        jsonlite::write_json(info, file.path(dimred.dir, info$id), auto_unbox=TRUE)
      }
      info$coords <- NULL # meta does not need this info
      return(info)
    })
    dimred$data <- dimred$data[sapply(dimred.data, function(x) x$type == "dimred")]
    names(dimred$data) <- sapply(dimred$data, function(x) x$id)
    dimred$default <- tail(names(dimred$data), 1)
    jsonlite::write_json(dimred, file.path(dimred.dir, "meta"), auto_unbox=TRUE)
  }

  WriteLowDimred <- function(dimred.data, study.path) {
    Meow("Writing intermediate embeddings...")
    h5.path <- file.path(study.path, "main", "pca_result.hdf5")
    h5 <- rhdf5::H5Fcreate(h5.path)
    for (i in seq_along(dimred.data)) {
      coords <- dimred.data[[i]]$coords
      if (dimred.data[[i]]$type == "low_dimred") {
        if (class(coords)[1] == "matrix") {
          rhdf5::h5write(coords, h5.path, names(dimred.data)[i])
        } else if (class(coords)[1] == "dgCMatrix") {
          WriteH5Matrix(coords, h5.path, names(dimred.data)[i], write.labels=FALSE)
        }
      }
    }
    rhdf5::H5Fclose(h5)
  }

  WriteRunInfo <- function(study.path, meta.data, omics, n.cell, title) {
    Meow("Writing general information...")
    n.batch <- if (is.null(meta.data$bioturing_batch)) 1 else length(unique(meta.data$bioturing_batch))
    run.info <- list(
      species = "human",
      omics = as.list(omics), # Must be list
      hash_id = basename(study.path),
      version = 16,
      n_cell = n.cell,
      addon = "SingleCell",
      n_batch = n.batch,
      platform = "unknown",
      title = "Untitled study",
      unit = "umi",
      author = list(),
      abstract = "",
      ana_setting = list(inputType="bcs"),
      remote = list(),
      history = list(CreateCommit()),
      tag = list(),
      shareTag = list(),
      modified_date = as.numeric(Sys.time()) * 1000,
      created_date = as.numeric(Sys.time()) * 1000
    )
    jsonlite::write_json(run.info, file.path(study.path, "run_info.json"), auto_unbox=TRUE)
  }

  WriteSpatialDimred <- function(misc, output_path, author) {
    dimred.path <- file.path(output_path, "dimred")
    dir.create(dimred.path, recursive=TRUE)
    meta.path <- file.path(dimred.path, "meta")
    meta <- list(version=1)
    # Spatial coordinates
    if (!is.null(misc$tissue_positions)) {
      coords <- as.matrix(misc$tissue_positions)
      uuid <- (gsub('-', '', uuid::UUIDgenerate()))
      info <- list(coords = coords, name = "Spatial coordinates", id=uuid, size=dim(coords))
      # write spatial coords dimred
      info$param <- list(omics="spatial")
      history <- list(list(
        created_by = author,
        created_at = as.numeric(Sys.time()) * 1000,
        hash_id = info$id,
        description = "Imported with BioTuring Browser"
      ))
      info$history <- history
      jsonlite::write_json(info, file.path(dimred.path, info$id), auto_unbox=TRUE)
      info$coords <- NULL
      meta$data[[info$id]] <- info
    }

    jsonlite::write_json(meta, meta.path, auto_unbox=TRUE)
  }

  WriteSpatialData <- function(misc, output_path) {
    spatial.dir <- file.path(output_path, "spatial")
    if (!is.null(misc$image)) {
      dir.create(spatial.dir)
      png::writePNG(misc$image, file.path(spatial.dir, paste0(misc$image_name, ".png")))
    }
    if (!is.null(misc$spatial_info)) {
      jsonlite::write_json(misc$spatial_info, file.path(spatial.dir, "info.json"), auto_unbox=TRUE)
    }
  }

  WriteSubCluster <- function(data, expr.data, study_path, author) {
    batches <- names(data)
    sub.dir <- file.path(study_path, "sub")
    info.file <- file.path(sub.dir, "graph_cluster.json")
    dir.create(sub.dir)
    all.barcodes <- colnames(expr.data$counts)
    for (name in batches) {
      # General info
      image.data <- data[[name]]
      barcodes <- rownames(image.data$tissue_positions)
      arr.select <- sort(match(barcodes, all.barcodes))
      id <- digest::digest(jsonlite::toJSON(arr.select), algo = "md5")

      # Write result
      output_path <- file.path(sub.dir, id)
      dir.create(output_path)
      WriteSpatialData(image.data, output_path)
      WriteSpatialDimred(image.data, output_path, author)

      # Write cluster_info.json
      jsonlite::write_json(list(
        name = name,
        selectedArr = arr.select - 1, # nodejs needs index based 0
        created_at = as.numeric(Sys.time()) * 1000,
        created_by = author,
        version = 1,
        path = id
      ), file.path(output_path, "cluster_info.json"), auto_unbox=TRUE)
      
      # Update graph_cluster.json
      if (file.exists(info.file)) {
        info <- jsonlite::read_json(info.file)
        info$main <- c(info$main, id)
      } else {
        info <- list(main=id)
      }
      jsonlite::write_json(info, info.file)
    }
  }

  WriteSpatial <- function(data, expr.data, study.path, author) {
    # multiple batches
    if (length(data) > 1) {
      WriteSubCluster(data, expr.data, study.path, author)
    } else {
      output_path <- file.path(study.path, "main")
      WriteSpatialData(data[[1]], output_path)
      WriteSpatialDimred(data[[1]], output_path, author)
    }
  }

  dir.create(file.path(study.path, "main"), recursive=TRUE)
  WriteExpressionData(expr.data, study.path)
  WriteMetadata(metadata, study.path)
  WriteDimred(dimred.data, study.path)
  WriteLowDimred(dimred.data, study.path)
  omics <- unique(expr.data$feature_type)
  if (length(spatial.data) > 0) {
    WriteSpatial(spatial.data, expr.data, study.path, author)
    omics <- append(omics, "spatial")
  }
  WriteRunInfo(study.path, metadata, omics, ncol(expr.data$norms))
}
bioturing/rBCS documentation built on May 18, 2022, 5:26 p.m.