library(methods)
library(stringr)
library(httr)
library(jsonlite)
library(lubridate)
scopes = list("All", "Public", "Private")
#' Get all datasets
#'
#' This list does not contain rejected datasets, only valid, usable datasets.
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#' @param scope Filters the datasets by their scope. Possible Values are: 'All': return all datasets, 'Private': Only your personal datasets, 'Public': Only public datasets
#'
#' @return A FGResponse object
#' @export
#' @examples
#' connection <- fastgenomicsRclient::connect("https://fastgenomics.org/", "Beaer ey...")
#' datasets <- fastgenomicsRclient::get_datasets(connection)
#' print(datasets@content) # all datasets available to you
get_datasets <- function(connection, scope="All"){
url <- paste(connection$base_url, "dataset/api/v4/datasets", sep = "")
result <- get_data_list(connection, scope, url, "dataset", queries = list(includeHateoas = "true" ))
return(result)
}
#' Get a specific dataset
#'
#' Gets information about a specific dataset. This can also be used to obtain information about a failed dataset upload
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#' @param dataset_id The id of the dataset, usually starting with dts_*****
#'
#' @return A FGResponse object
#' @export
#'
#' @examples
#' connection <- fastgenomicsRclient::connect("https://fastgenomics.org/", "Beaer ey...")
#' datasets <- fastgenomicsRclient::get_dataset(connection, "dts_abc")
#' print(datasets@content) # the dataset
get_dataset <- function(connection, dataset_id){
url <- paste(connection$base_url, "dataset/api/v4/datasets/", dataset_id, sep = "")
result <- get_data(connection, dataset_id, url, "dataset")
return(result)
}
#' Downloads the whole dataset content as a zip file to the specified folder
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#' @param dataset_id The id of the dataset, usually starting with dts_*****
#' @param folder_path Where to store the dataset? Must exist and be writeable
#'
#' @return None, see the folder for the content
#' @export
#'
#' @examples
#' connection <- fastgenomicsRclient::connect("https://fastgenomics.org/", "Beaer ey...")
#' fastgenomicsRclient::download_dataset(connection, "dts_abc", "/temp/")
download_dataset <- function(connection, dataset_id, folder_path){
if (!dir.exists(folder_path))
{
msg = stringr::str_interp("The folder '${folder_path}' does not exist, please create it")
stop(msg)
}
dataset <- get_dataset(connection, dataset_id)
download_link <- ""
for (lnk in dataset@content[["links"]]) {
rel <- lnk[["rel"]]
if (rel == "download-dataset-complete-zip")
{
download_link <- lnk[["href"]]
}
}
if (download_link == "")
{
msg = stringr::str_interp("No download link found, something is wrong. Please contact us.")
stop(msg)
}
url <- paste(substr(connection$base_url, 1, nchar(connection$base_url) - 1), download_link, sep = "")
headers <- get_default_headers(connection)
headers["output"] = httr::write_disk(
file.path(folder_path, stringr::str_interp("${dataset_id}.zip")),
overwrite = TRUE
)["output"]
headers <- c(headers, httr::progress())
response <- httr::GET(url, headers)
httr::stop_for_status(response)
}
#' Creates a new dataset on fastgenomics
#'
#' This call will create the dataset, but the validation on the server
#' can take a long time. The dataset cannot be used before the
#' validation is complete. Use
#' \code{\link{poll_dataset_until_validated}} to query the server for
#' the validation status. For details on what data formats are
#' supported by FASTGenomics refer to the documentation
#' \href{https://github.com/FASTGenomics/fastgenomics-docs/blob/master/doc/api/dataset_api.md}{here}.
#'
#' @param connection The connection to be used, call
#' \code{\link{connect}} to obtain one.
#' @param title The Title of the dataset
#' @param description A description of the dataset, ca be Markdown
#' @param organism_id The NCBI Taxonomy ID of your dataset, passed as
#' an integer. Currently supported IDs are 9606 (Homo Sapiens) and
#' 10090 (Mouse)
#' @param matrix The path to your datafile in a supported
#' \href{https://github.com/FASTGenomics/fastgenomics-docs/blob/master/doc/api/dataset_api.md}{format}
#' OR a dataframe. If it's a data frame it will be saved in a
#' temporary location on your hard drive and uploaded as a file.
#' @param matrix_format The
#' \href{https://github.com/FASTGenomics/fastgenomics-docs/blob/master/doc/api/dataset_api.md#upload-file-structure}{format}
#' of your matrix.
#' @param gene_nomenclature The gene nomenclature to be used, call
#' \code{\link{get_valid_gene_nomenclatures}} to get a list of
#' supported formats
#' @param optional_parameters Further parameters to be used, eg. gene
#' metadata or cell metadata files. Use
#' \code{\link{FGDatasetUploadParameters}} to define these
#' parameters.
#'
#' @return FGResponse in case of success, FGErrorResponse if the validation failed for any reason.
#' @export
#'
#' @examples
#' connection <- fastgenomicsRclient::connect("https://fastgenomics.org/", "Beaer ey...")
#' optional <- fastgenomicsRclient::FGDatasetUploadParameters(
#' license ="MIT",
#' technology = "Smart-Seq",
#' web_link="https://example.com",
#' notes="This is a TEST",
#' citation="FG et al",
#' current_normalization_status="Counts",
#' cell_metadata="./cell_metadata.tsv", # you can also use a dataframe directly
#' gene_metadata="./gene_metadata.tsv" ) # you can also use a dataframe directly
#' result <- fastgenomicsRclient::create_dataset(connection,
#' "R client test",
#' "description",
#' 9606,
#' "./matrix.tsv" , # you can also use a dataframe directly
#' "sparse_cell_gene_expression",
#' "Entrez",
#' optional )
#'
#' status <- fastgenomicsRclient::poll_dataset_until_validated(connection, result, 1 ) # validation messages are shown as messages
#' print(status) # should be TRUE
create_dataset <- function(connection, title, description, organism_id, matrix , matrix_format, gene_nomenclature, optional_parameters=NULL)
{
assert_is_connection(connection)
gene_nomenclatures <- get_valid_gene_nomenclatures(connection)
if (!gene_nomenclature %in% gene_nomenclatures)
{
str = paste(as.character(gene_nomenclatures), collapse = ", ")
stop(stringr::str_interp("The Gene Nomenclature '${gene_nomenclature} is unknown. Choose one of: ${str}' "))
}
if (!is.numeric(organism_id))
{
stop(stringr::str_interp("The organism id '${organism_id}' is not an integer. Valid NCBI Ids are integers, e.g. Homo Sapiens: 9606 Mouse: 10090, 0: unknown organism"))
}
matrix_formats <- get_valid_matrix_formats(connection)
if (!matrix_format %in% matrix_formats)
{
str = paste(as.character(matrix_formats), collapse = ", ")
stop(stringr::str_interp("The Matrix format '${matrix_format}' is unknown. Choose one of: ${str}' "))
}
matrix_path <- ""
if (is.character(matrix)) {
matrix_path <- matrix
}
else if (is.data.frame(matrix))
{
matrix_path <- get_df_as_file(matrix, "matrix.csv")
}
else
{
stop("the given matrix is neither a file path nor a dataframe!")
}
if (!file.exists(matrix_path))
{
stop(stringr::str_interp("The file '${matrix_path}' does not exist. Please provide a valid file!. See https://github.com/FASTGenomics/fastgenomics-docs/blob/master/doc/api/dataset_api.md for valid file formats "))
}
optional_data <- list()
if (!is.null(optional_parameters))
{
if (!is(optional_parameters, "FGDatasetUploadParameters"))
{
stop("the optional_parameters need to be either NULL or a FGDatasetUploadParameters object. Call new('FGDatasetUploadParameters', ..) to obtain such an object.")
}
optional_data <- get_data_from_FGDatasetUploadParameters(optional_parameters, connection)
}
headers <- get_default_headers(connection)
headers <- c(headers, httr::progress("up")) # adds a nice progress bar
url <- paste(connection$base_url, "dataset/api/v4/datasets", sep = "")
body = list(
expression_data = httr::upload_file(matrix_path),
title = title,
description = description,
organism_id = organism_id,
matrix_format = matrix_format,
gene_nomenclature = gene_nomenclature)
body = c(body, optional_data)
response <- httr::POST(url, headers, body = body )
return(parse_response(response, "dataset"))
}
#' Waits for the validation of the dataset to complete.
#'
#' Messages and errors are used to show messages. If you need all messages, use \code{\link{get_dataset}} with the id of this dataset
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#' @param dataset_id The id of the dataset, usually starting with dts_***** OR a FGResponse object
#' @param poll_intervall The time to wait for a new status update in seconds
#'
#' @return TRUE if the validation succeeded, otherweise FALSE
#' @export
#'
#' @examples
#' See create_dataset example
poll_dataset_until_validated <- function(connection, dataset_id, poll_intervall=10){
assert_is_connection(connection)
if (is(dataset_id, "FGResponse"))
{
dtype <- dataset_id@DataType
if (!dtype == "dataset") {
stop(stringr::str_interp("Only FgResponse with a DataType of 'dataset' can be polled! This is a ${dtype}"))
}
dataset_id <- dataset_id@Id
}
if (!is.character(dataset_id))
{
stop(stringr::str_interp("dataset_id can be either a character vector or a FgResponse object."))
}
headers <- get_default_headers(connection)
url <- paste(connection$base_url, "dataset/api/v4/datasets/", dataset_id, "/validationmessages", sep = "")
last_check <- lubridate::ymd("2010/03/17") # something old
while (TRUE) {
Sys.sleep(poll_intervall)
response <- httr::GET(url, headers)
httr::stop_for_status(response)
parsed <- jsonlite::fromJSON(httr::content(response, "text"), simplifyVector = FALSE)
for (msg in parsed) {
msg_time <- lubridate::as_datetime(msg[["timestamp"]], tz = "UTC")
if (msg_time > last_check) {
status <- msg[["status"]]
file_name <- msg[["file_name"]]
msg_text <- msg[["msg"]]
if (status == "Error") {
warning(stringr::str_interp("${msg_time} | ${status} | ${file_name} | ${msg_text}"))
}
else{
message(stringr::str_interp("${msg_time} | ${status} | ${file_name} | ${msg_text}"))
}
}
}
newest_message <- parsed[[length(parsed)]]
if (newest_message[["status"]] == "Ready")
{
# success!
return(TRUE)
}
if (newest_message[["status"]] == "Rejected")
{
# error!
warning(stringr::str_interp("There where upload errors. Call get_dataset with the id of this dataset to obtain more information."))
return(FALSE)
}
last_check <- lubridate::now(tz="UTC")
}
}
#' Get a list of all supported gene nomenclatures
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#'
#' @return a list of the valid gene nomenclatures
#' @export
#'
#' @examples
#' None
get_valid_gene_nomenclatures = function(connection){
result <- get_known_defaults(connection)
result <- result[["known_gene_nomenclatures"]]
data = lapply(result, function(x){ return(x[["key"]])})
return(data)
}
#' Get a list of all supported matrix formats
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#'
#' @return a list of the valid matrix formats
#' @export
#'
#' @examples
#' None
get_valid_matrix_formats = function(connection){
result <- get_known_defaults(connection)
result <- result[["known_csv_matrix_formats"]]
data = lapply(result, function(x){ return(x[["key"]])})
return(data)
}
#' Get a list of all supported technologies
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#'
#' @return a list of the valid technologies
#' @export
#'
#' @examples
#' None
get_valid_technologies = function(connection){
result <- get_known_defaults(connection)
result <- result[["known_technologies"]]
data = lapply(result, function(x){ return(x[["key"]])})
return(data)
}
#' Get a list of all known defaults
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain one.
#'
#' @return a list of the known defaults
#' @export
#'
#' @examples
#' None
get_known_defaults <- function(connection){
assert_is_connection(connection)
headers <- get_default_headers(connection)
url <- paste(connection$base_url, "dataset/api/v4/knowndefaults", sep = "")
response <- httr::GET(url, headers)
httr::stop_for_status(response)
parsed <- jsonlite::fromJSON(httr::content(response, "text"), simplifyVector = FALSE)
return(parsed)
}
get_df_as_file <- function(df, file_name){
column_names <- names(df)
rand_folder <- stringi::stri_rand_strings(n = 1, length = 20)[[1]]
message(stringr::str_interp("Saving dataframe for '${file_name}' to disk"))
tmp_dir <- file.path(tempdir(), rand_folder)
tmp_file <- file.path(tmp_dir, file_name)
dir.create(tmp_dir)
write.csv(df, tmp_file, row.names = FALSE)
message(stringr::str_interp("compressing file '${tmp_file}', this may take a while"))
file.zip = zip_file(tmp_file)
message(stringr::str_interp("Compressing ${tmp_file}' finished"))
return(file.zip)
}
#' Deletes a single data set.
#'
#' @param connection The connection to be used, call \code{\link{connect}} to obtain
#' one.
#' @param dataset_id Id of the data set to be deleted. To get data set ids call
#' \code{\link{get_datasets}} and parse the results.
#'
#' @export
delete_dataset <- function(connection, dataset_id){
url <- paste0(connection$base_url, "dataset/api/v4/datasets/", dataset_id)
headers <- get_default_headers(connection)
response <- httr::DELETE(url, headers)
httr::stop_for_status(response)
}
#' Modify the data set metadata
#'
#' Modifies the data set metadata. The keyword arguments are one of \code{organism_id},
#' \code{organism_id}, \code{current_normalization_status}, \code{contact},
#' \code{contact_name}, \code{description}, \code{title}, \code{license},
#' \code{short_description}, \code{web_link}, \code{notes}, \code{citation},
#' \code{technology}.
#'
#' @param connection The connection to be used, see \code{\link{connect}}.
#' @param dataset_id Id of the data set to be deleted. To get data set ids call
#' \code{\link{get_datasets}} and parse the results.
#'
#' @export
#'
#' @examples
#'
#' # modify a title and description of the data set
#' modify_dataset(connection, dataset_id, title="A new title", description="A new description")
modify_dataset <- function(connection, dataset_id, ...){
url <- paste0(connection$base_url, "dataset/api/v4/datasets/", dataset_id)
headers <- get_default_headers(connection)
body <- list(...)
allowed_keys <- list("organism_id", "organism_id", "current_normalization_status", "contact", "contact_name", "description", "title", "license", "short_description", "web_link", "notes", "citation", "technology")
unknown_keys <- setdiff(names(body), allowed_keys)
if (length(unknown_keys)>0){
stop(stringr::str_interp("Unrecognized keys ${unknown_keys}. Only the following keys are allowed: ${allowed_keys}"))
}
response <- httr::PUT(url, headers, body=body, encode="json")
httr::stop_for_status(response)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.