R/call_model_api.R

Defines functions predict_query validate_modality validate_query get_valid_query get_valid_modes get_valid_modalities

Documented in get_valid_modalities get_valid_modes get_valid_query predict_query validate_modality validate_query

#' @title Get Valid Modalities
#' @description Returns a vector of possible output modalities for the supported model.
#' These modalities represent different types of gene expression data that can be
#' generated by the Synthesize Bio API. Note only version 2 can be accessed with this
#' version of the package. If you would like to use v1 models return to 1.x.x versions of this package.
#'
#' @return A character vector containing the valid modality strings.
#' @examples
#' # Get all supported modalities
#' modalities <- get_valid_modalities()
#' print(modalities)
#'
#' # Check if a specific modality is supported
#' "bulk_rna-seq" %in% get_valid_modalities()
#' @export
get_valid_modalities <- function() {
  unlist(MODEL_MODALITIES$"v2.0")
}

#' @title Get Valid Modes
#' @description Returns a vector of possible modes for the supported model.
#' These modes represent different types of gene expression data that can be
#' generated by the Synthesize Bio API.
#'
#' @return A character vector containing the valid mode strings.
#' @examples
#' # Get all supported modes
#' modes <- get_valid_modes()
#' print(modes)
#'
#' # Check if a specific mode is supported
#' "sample generation" %in% get_valid_modes()
#' @export
get_valid_modes <- function() {
  return(c("sample generation", "mean estimation", "metadata prediction"))
}


#' @title Get Valid Query Example
#' @description Generates a sample query for prediction and validation for the v2.0 model.
#' This function provides an example query structure that can be modified for specific needs.
#' The sample query contains two example inputs: one for a cell line with CRISPR perturbation
#' and another for a primary tissue sample with disease information.
#'
#' @return A list representing a valid query structure for v2.0.
#' @examples
#' # Get a sample query
#' query <- get_valid_query()
#'
#' # Modify the query for a different modality
#' query$modality <- "bulk_rna-seq"
#'
#' # Adjust the number of samples to generate
#' query$inputs[[1]]$num_samples <- 10
#' @export
get_valid_query <- function() {
  list(
    modality = "bulk",
    mode = "sample generation",
    return_classifier_probs = TRUE,
    seed = 11,
    inputs = list(
      list(
        metadata = list(
          cell_line_ontology_id = "CVCL_0023",
          perturbation_ontology_id = "ENSG00000156127",
          perturbation_type = "crispr",
          perturbation_time = "96 hours",
          sample_type = "cell line"
        ),
        num_samples = 5
      ),
      list(
        metadata = list(
          disease_ontology_id = "MONDO:0011719",
          age_years = "65",
          sex = "female",
          sample_type = "primary tissue",
          tissue_ontology_id = "UBERON:0000945"
        ),
        num_samples = 5
      )
    )
  )
}

#' @title Validate Query Structure
#' @description Validates the structure and contents of the query based on the v2.0 model.
#' This function checks that the query is a list and contains all required keys.
#'
#' @param query A list containing the query data.
#' @return Invisibly returns TRUE if validation passes.
#' Throws an error If the query structure is invalid or missing required keys.
#' @examples
#' # Create a valid query
#' query <- get_valid_query()
#' validate_query(query) # Passes validation
#'
#' # Example with invalid query (missing required key)
#' \dontrun{
#' invalid_query <- list(inputs = list(), mode = "mean estimation")
#' validate_query(invalid_query) # Throws error for missing modality
#' }
#' @export
validate_query <- function(query) {
  if (!is.list(query)) {
    stop(paste0("Expected `query` to be a list, but got ", class(query)))
  }

  required_keys <- c("inputs", "mode", "modality")

  missing_keys <- setdiff(required_keys, names(query))
  if (length(missing_keys) > 0) {
    stop(paste0(
      "Missing required keys in query: ", paste(missing_keys, collapse = ", "), ". ",
      "Use `get_valid_query()` to get an example."
    ))
  }

  invisible(TRUE)
}

#' @title Validate Query Modality
#' @description Validates that the modality specified in the query is allowed for the v2.0 model.
#' This function checks that the `modality` value is one of the supported modalities.
#'
#' @param query A list containing the query data.
#' @return Invisibly returns TRUE if validation passes.
#' Throws an error If the modality key is missing or if the selected modality is not allowed.
#' @examples
#' # Create a valid query
#' query <- get_valid_query()
#' validate_modality(query) # Passes validation
#'
#' # Example with invalid modality
#' \dontrun{
#' invalid_query <- get_valid_query()
#' invalid_query$modality <- "unsupported_modality"
#' validate_modality(invalid_query) # Throws error for invalid modality
#' }
#' @export
validate_modality <- function(query) {
  allowed_modalities <- unlist(MODEL_MODALITIES$"v2.0")

  modality_key <- "modality"
  if (!(modality_key %in% names(query))) {
    stop(paste0("Query requires '", modality_key, "' key."))
  }

  selected_modality <- query[[modality_key]]

  if (!(selected_modality %in% allowed_modalities)) {
    stop(paste0(
      "Invalid modality '", selected_modality, "'. ",
      "Allowed modalities: ", paste(allowed_modalities, collapse = ", ")
    ))
  }

  invisible(TRUE)
}

#' @title Predict Gene Expression
#' @description Sends a query to the Synthesize Bio API (v2.0) for prediction
#' and retrieves gene expression samples. This function validates the query, sends it
#' to the API, and processes the response into usable data frames.
#'
#' @param query A list representing the query data to send to the API.
#'        Use `get_valid_query()` to generate an example.
#' @param raw_response If you do not want the gene expression data extracted from the JSON
#' response set this to FALSE. Default is to return only the expression and metadata.
#' @param as_counts passed to extract_expression() function. Logical, if FALSE,
#' transforms the predicted expression counts into logCPM (default is TRUE, returning raw counts).
#' @return A list with two data frames:
#'         - 'metadata': contains metadata for each sample
#'         - 'expression': contains expression data for each sample
#' Throws an error If the API request fails or the response structure is invalid.
#' @importFrom httr POST add_headers content http_status status_code
#' @importFrom jsonlite toJSON fromJSON
#' @examples
#' # Set your API key (in practice, use a more secure method)
#' \dontrun{
#'
#' # To start using pysynthbio, first you need to have an account with synthesize.bio.
#' # Go here to create one: https://app.synthesize.bio/
#'
#' Sys.setenv(SYNTHESIZE_API_KEY = "your_api_key_here")
#'
#' # Create a query
#' query <- get_valid_query()
#'
#' # Request raw counts
#' result <- predict_query(query, as_counts = TRUE)
#'
#' # Access the results
#' metadata <- result$metadata
#' expression <- result$expression
#'
#' # Request log CPM transformed data
#' log_result <- predict_query(query, as_counts = FALSE)
#' log_expression <- log_result$expression
#'
#' # Explore the top expressed genes in the first sample
#' head(sort(expression[1, ], decreasing = TRUE))
#' }
#' @export
predict_query <- function(query, raw_response = FALSE, as_counts = TRUE) {
  if (!has_synthesize_token()) {
    stop("Please set your API key for synthesize Bio using set_synthesize_token()")
  }

  api_url <- paste0(API_BASE_URL, "/api/model/v2.0")

  validate_query(query)
  validate_modality(query)

  # Convert the query list to JSON
  query_json <- toJSON(query, auto_unbox = TRUE)

  # Make the API request
  response <- POST(
    url = api_url,
    add_headers(
      Accept = "application/json",
      Authorization = paste("Bearer", Sys.getenv("SYNTHESIZE_API_KEY")),
      `Content-Type` = "application/json"
    ),
    body = query_json,
    encode = "json"
  )

  if (http_status(response)$category != "Success") {
    stop(paste0(
      "API request to ", api_url, " failed with status ",
      status_code(response), ": ", content(response, "text")
    ))
  }

  # Parse JSON response and handle errors
  parsed_content <- tryCatch(
    {
      json_text <- content(response, "text")
      parsed_content <- fromJSON(json_text, simplifyDataFrame = TRUE)
    },
    error = function(e) {
      stop(paste0("Failed to decode JSON from API response: ", e$message))
    }
  )

  # If response is a single-item list, use its contents
  if (is.list(parsed_content) && length(parsed_content) == 1 && is.list(parsed_content[[1]])) {
    parsed_content <- parsed_content[[1]]
  }

  # Check for API-reported errors
  if (!is.null(parsed_content$error)) {
    stop(paste0("API error: ", parsed_content$error))
  }
  if (!is.null(parsed_content$errors)) {
    stop(paste0("API errors: ", paste(parsed_content$errors, collapse = "; ")))
  }

  if (!raw_response) {
    result <- extract_expression_data(
      parsed_content,
      as_counts = as_counts
    )
  } else {
    result <- parsed_content
  }

  return(result)
}

Try the rsynthbio package in your browser

Any scripts or data that you put into this service are public.

rsynthbio documentation built on June 8, 2025, 12:11 p.m.