library(Matrix)
get_gene_ids = function(spmat){spmat@Dimnames[[1]]}
get_cell_ids = function(spmat){spmat@Dimnames[[2]]}
matrix_to_file = function(spmat, dir){
spmat = as(spmat, "dgTMatrix")
file_name = file.path(dir, "expression_data.csv")
df = data.frame(
cellId = get_cell_ids(spmat)[spmat@j + 1],
geneId = get_gene_ids(spmat)[spmat@i + 1],
expression = spmat@x)
message(stringr::str_interp("Saving expression matrix in a sparse format as '${file_name}'"))
write.csv(df, file_name, row.names = FALSE)
return(file_name)
}
gene_metadata_to_file = function(gene_metadata, dir){
file_name = file.path(dir, "gene_metadata.csv")
message(stringr::str_interp("Saving gene metadata as '${file_name}'"))
write.csv(gene_metadata, file_name, row.names = FALSE)
return(file_name)
}
cell_metadata_to_file = function(cell_metadata, dir){
file_name = file.path(dir, "cell_metadata.csv")
message(stringr::str_interp("Saving cell metadata as '${file_name}'"))
write.csv(cell_metadata, file_name, row.names = FALSE)
return(file_name)
}
create_tmp_files = function(matrix, cell_metadata, gene_metadata, tmpdir=NULL){
if (is.null(tmpdir)) {
tmpdir = file.path(tempdir(),
stringi::stri_rand_strings(n = 1, length = 20)[[1]])
}
dir.create(tmpdir)
tmpdir = normalizePath(tmpdir)
files = list(
matrix_csv = matrix_to_file(matrix, tmpdir),
gene_metadata = gene_metadata_to_file(gene_metadata, tmpdir),
cell_metadata = cell_metadata_to_file(cell_metadata, tmpdir))
return(files)
}
#' Submits a data set from a sparse matrix of dgTmatrix type and gene/cell metadata.
#'
#' dgTMatrix has to have named dimensions, cell and gene metadata must have cellId and
#' geneId columns respectively. The names of the dimensions in the matrix must be a
#' subset of the respective gene/cell metadata data frames.
#'
#' @param connection FASTGenomics connection object
#' @param matrix sparseMatrix storing the expression table. The first
#' dimension is assumed to be genes and the second must be cell
#' names.
#' @param cell_metadata dataframe with cell metadata, must have a
#' cellId column.
#' @param gene_metadata dataframe with gene metadata, must have a
#' geneId column.
#' @param organism_id One of 9606 (Mouse) and 10090 (Human)
#' @param title The title of the data set
#' @param description long description, Default: ""
#' @param zipfiles Weather to compress files before submitting,
#' Default: TRUE
#' @param tmpdir The location of temporary files, Default: NULL
#' @param optional_parameters Object representing further optional
#' parameters, see \code{\link{FGDatasetUploadParameters}},
#' Default: NULL
#' The properties batch_column, gene_metadata and cell_metadata of this object are always ignored
#'
#' @return either of \code{\link{FGResponse}},
#' \code{\link{FGErrorResponse}},
#' \code{\link{FGErrorModelResponse}},
#' \code{\link{FGValidationProblem}}.
#'
#' @export
create_dataset_df <- function(connection, matrix, cell_metadata,
gene_metadata, gene_nomenclature,
organism_id, title, zipfiles=TRUE,
description="",
optional_parameters=FGDatasetUploadParameters() , tmpdir=NULL)
{
assert_is_connection(connection)
match.arg(gene_nomenclature, c("Entrez", "GeneSymbol", "Ensembl"))
if (!is.numeric(organism_id))
{
stop(stringr::str_interp("The organism id '${organism_id}' is not an integer. Valid NCBI Ids are integers, e.g. Homo Sapiens: 9606 Mouse: 10090, 0: unknown organism"))
}
## check if the matrix is sparse
if ( !is(matrix, "sparseMatrix") ) {
stop("Unsupported matrix format, expected a \"sparseMatrix\".")
}
if ( !is(cell_metadata, "data.frame")) {
stop("cell_metadata must be a data frame.")
}
if ( !is(gene_metadata, "data.frame")) {
stop("gene_metadata must be a data frame.")
}
if ( !'cellId' %in% colnames(cell_metadata) ) {
stop("cell_metadata must have a cellId column.")
}
if ( !'geneId' %in% colnames(gene_metadata) ) {
stop("gene_metadata must have a geneId column.")
}
if ( length(intersect(get_cell_ids(matrix), cell_metadata$cellId)) == 0 ) {
stop("No common cell names found in matrix and cell_metadata.")
}
if ( length(intersect(get_gene_ids(matrix), gene_metadata$geneId)) == 0 ) {
stop("No common gene names found in matrix and gene_metadata.")
}
if ( nchar(title) < 5 && nchar(title) <= 200 ) {
stop("Title has to be a string with length between 5 and 200.")
}
# adds a nice progress bar
headers <- c(get_default_headers(connection), httr::progress("up"))
url <- paste(connection$base_url, "dataset/api/v4/datasets", sep = "")
files = create_tmp_files(matrix, cell_metadata, gene_metadata, tmpdir = tmpdir)
if (zipfiles)
files = lapply(files, zip_file)
body = list(
expression_data = httr::upload_file(files[["matrix_csv"]]),
title = title,
description = description,
organism_id = organism_id,
matrix_format = "sparse_cell_gene_expression",
gene_nomenclature = gene_nomenclature)
tryCatch({
if (!is.null(optional_parameters))
if (!is(optional_parameters, "FGDatasetUploadParameters"))
stop("the optional_parameters need to be either NULL or a FGDatasetUploadParameters object. Call new('FGDatasetUploadParameters', ..) to obtain such an object.")
else
{
if (optional_parameters@gene_metadata != "")
message("Warning, replacing gene_metadata with a table inferred from the Seurat object")
if (optional_parameters@cell_metadata != "")
message("Warning, replacing cell_metadata with a table inferred from the Seurat object")
optional_parameters@gene_metadata = files[["gene_metadata"]]
optional_parameters@cell_metadata = files[["cell_metadata"]]
body <- c(get_data_from_FGDatasetUploadParameters(
optional_parameters, connection), body)
}
response <- httr::POST(url, headers, body = body)},
error = stop,
finally = {lapply(files, file.remove)}
)
return(parse_response(response, "dataset"))
}
#' Uploads a seurat dataset.
#'
#' Runs \code{\link{create_dataset_df}} with matrix, cell_metadata and gene_metadata
#' inferred from the seurat object.
#'
#' @param connection FASTGenomics connection object
#' @param seurat_obj Seurat object to submitted
#' @param ... Other parameters passed to \code{\link{create_dataset_df}}
#'
#' @export
create_dataset_from_seurat <- function(connection, seurat_obj, ...){
matrix = as(seurat_obj@data, "dgTMatrix")
cell_metadata = seurat_obj@meta.data
cell_metadata = cbind(cellId = rownames(cell_metadata), cell_metadata)
gene_metadata = data.frame(geneId = seurat_obj@data@Dimnames[[1]])
create_dataset_df(connection,
matrix = matrix,
cell_metadata = cell_metadata,
gene_metadata = gene_metadata,
title = seurat_obj@project.name,
...)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.