Nothing
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
echo = TRUE,
message = FALSE,
warning = FALSE,
eval = FALSE
)
## -----------------------------------------------------------------------------
# library(mLLMCelltype)
# library(Seurat)
# library(dplyr)
#
# # Step 1: Perform initial high-level annotation
# high_level_results <- annotate_cell_types(
# input = marker_data,
# tissue_name = "human PBMC",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY"),
# top_gene_count = 10
# )
#
# # Step 2: Add high-level annotations to Seurat object
# seurat_obj$high_level_celltype <- plyr::mapvalues(
# x = as.character(Idents(seurat_obj)),
# from = names(high_level_results),
# to = high_level_results
# )
#
# # Step 3: Subset T cells for further annotation
# t_cells <- subset(seurat_obj, high_level_celltype == "T cells")
#
# # Step 4: Find markers within T cells
# t_cell_markers <- FindAllMarkers(t_cells, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
#
# # Step 5: Perform T cell subtype annotation
# t_cell_subtypes <- annotate_cell_types(
# input = t_cell_markers,
# tissue_name = "human PBMC T cells",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY"),
# top_gene_count = 10
# )
#
# # Step 6: Add T cell subtypes back to original object
# t_cell_barcodes <- WhichCells(t_cells)
# seurat_obj$detailed_celltype <- seurat_obj$high_level_celltype
# seurat_obj$detailed_celltype[t_cell_barcodes] <- plyr::mapvalues(
# x = as.character(Idents(t_cells)),
# from = names(t_cell_subtypes),
# to = paste0("T cells: ", t_cell_subtypes)
# )
## -----------------------------------------------------------------------------
# # Create a simple function to check parent-child consistency
# validate_hierarchy <- function(high_level, detailed_level) {
# # Extract parent type from detailed annotation (before the colon)
# parent_from_detailed <- sapply(strsplit(detailed_level, ": "), function(x) x[1])
#
# # Check if parent matches high-level annotation
# consistent <- parent_from_detailed == high_level
#
# # Return consistency check results
# data.frame(
# high_level = high_level,
# detailed_level = detailed_level,
# consistent = consistent
# )
# }
#
# # Apply validation
# hierarchy_validation <- validate_hierarchy(
# seurat_obj$high_level_celltype,
# seurat_obj$detailed_celltype
# )
#
# # Identify inconsistencies
# inconsistencies <- hierarchy_validation[!hierarchy_validation$consistent, ]
# print(inconsistencies)
## -----------------------------------------------------------------------------
# # For noisy data, use fewer top genes
# results_fewer_genes <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY"),
# top_gene_count = 5 # Use fewer genes to focus on strongest signals
# )
## -----------------------------------------------------------------------------
# # Apply stricter filtering to marker genes
# filtered_markers <- marker_data %>%
# filter(p_val_adj < 0.01, avg_log2FC > 1.0) # Stricter thresholds
#
# # Annotate with filtered markers
# results_filtered <- annotate_cell_types(
# input = filtered_markers,
# tissue_name = "human PBMC",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
## -----------------------------------------------------------------------------
# # Set up API keys
# api_keys <- list(
# anthropic = Sys.getenv("ANTHROPIC_API_KEY"),
# openai = Sys.getenv("OPENAI_API_KEY"),
# gemini = Sys.getenv("GEMINI_API_KEY")
# )
#
# # Define multiple models to use
# models <- c(
# "claude-sonnet-4-6",
# "gpt-5.5",
# "gemini-3.1-pro-preview"
# )
#
# # Create consensus using interactive_consensus_annotation
# consensus_results <- interactive_consensus_annotation(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# models = models,
# api_keys = api_keys,
# controversy_threshold = 0.7,
# entropy_threshold = 1.0,
# consensus_check_model = "claude-sonnet-4-6"
# )
## -----------------------------------------------------------------------------
# # For data with batch effects, use consensus with lower threshold
# batch_consensus <- interactive_consensus_annotation(
# input = marker_data, # Your marker gene data with batch effects
# tissue_name = "mouse brain",
# models = c("claude-sonnet-4-6", "gpt-5.5", "gemini-3.1-pro-preview"),
# api_keys = api_keys,
# controversy_threshold = 0.4, # Lower threshold to discuss more clusters
# entropy_threshold = 0.8 # Lower entropy threshold
# )
## -----------------------------------------------------------------------------
# # Include batch information in the tissue context
# batch_aware_results <- annotate_cell_types(
# input = marker_data, # Your marker gene data with batch effects
# tissue_name = "mouse brain with technical batch effects", # Include batch context
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
## -----------------------------------------------------------------------------
# # Basic annotation without specific tissue context
# basic_results <- annotate_cell_types(
# input = marker_data,
# tissue_name = "human sample", # Generic context
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# # Annotation with specific tissue context
# specific_results <- annotate_cell_types(
# input = marker_data,
# tissue_name = "human fetal liver at 20 weeks gestation", # Detailed context
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
## -----------------------------------------------------------------------------
# # Create a custom annotation prompt
# custom_prompt <- create_annotation_prompt(
# input = marker_data,
# tissue_name = "human PBMC",
# top_gene_count = 10
# )
#
# # Modify the prompt to include additional context
# modified_prompt <- paste0(
# custom_prompt$prompt,
# "\n\nAdditional context: This sample is from a patient with rheumatoid arthritis. ",
# "Previous studies have identified activated T cells, B cells, and CXCR4-high monocytes in this condition."
# )
#
# # Use the modified prompt directly
# custom_results <- get_model_response(
# prompt = modified_prompt,
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
## -----------------------------------------------------------------------------
# library(Seurat)
# library(dplyr)
#
# # Example: Using CellMarker database information to validate annotations
# # This is a conceptual example - implementation would depend on your specific needs
#
# # 1. Get annotations with mLLMCelltype
# annotations <- annotate_cell_types(
# input = marker_data,
# tissue_name = "human PBMC",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# # 2. Compare with known marker genes (conceptual)
# # In a real workflow, you would query a database or use a reference dataset
# known_markers <- list(
# "T cells" = c("CD3D", "CD3E", "CD3G"),
# "B cells" = c("CD19", "MS4A1", "CD79A"),
# "Monocytes" = c("CD14", "LYZ", "CSF1R")
# )
#
# # 3. Validate annotations against known markers
# # This is a simplified example of how you might validate annotations
# validate_annotations <- function(annotations, marker_data, known_markers) {
# validation_results <- list()
#
# for (i in 1:length(annotations)) {
# cluster_id <- i
# predicted_type <- annotations[i]
#
# # Get markers for this cluster
# cluster_markers <- marker_data %>%
# filter(cluster == cluster_id) %>%
# arrange(desc(avg_log2FC)) %>%
# pull(gene) %>%
# head(20)
#
# # Check overlap with known markers for this cell type
# if (predicted_type %in% names(known_markers)) {
# expected_markers <- known_markers[[predicted_type]]
# overlap <- intersect(cluster_markers, expected_markers)
#
# validation_results[[i]] <- list(
# cluster = cluster_id,
# predicted_type = predicted_type,
# overlap_count = length(overlap),
# overlap_genes = paste(overlap, collapse = ", "),
# confidence = length(overlap) / length(expected_markers)
# )
# } else {
# validation_results[[i]] <- list(
# cluster = cluster_id,
# predicted_type = predicted_type,
# overlap_count = 0,
# overlap_genes = "",
# confidence = 0
# )
# }
# }
#
# return(validation_results)
# }
#
# # This is a conceptual example of how you might validate annotations
# # validation_results <- validate_annotations(annotations, marker_data, known_markers)
## -----------------------------------------------------------------------------
# library(Seurat)
# library(mLLMCelltype)
# library(ggplot2)
# library(dplyr)
#
# # Load example PBMC data
# # In a real workflow, you would use your own data
# data("pbmc_small") # Example dataset from Seurat
#
# # Find marker genes
# pbmc_markers <- FindAllMarkers(pbmc_small,
# only.pos = TRUE,
# min.pct = 0.25,
# logfc.threshold = 0.25)
#
# # Set up API keys
# api_keys <- list(
# anthropic = Sys.getenv("ANTHROPIC_API_KEY"),
# openai = Sys.getenv("OPENAI_API_KEY"),
# gemini = Sys.getenv("GEMINI_API_KEY")
# )
#
# # Use consensus annotation
# consensus_results <- interactive_consensus_annotation(
# input = pbmc_markers,
# tissue_name = "human PBMC",
# models = c("claude-sonnet-4-6", "gpt-5.5", "gemini-3.1-pro-preview"),
# api_keys = api_keys,
# controversy_threshold = 0.7,
# entropy_threshold = 1.0,
# consensus_check_model = "claude-sonnet-4-6"
# )
#
# # Add results to Seurat object
# pbmc_small$cell_type <- plyr::mapvalues(
# x = as.character(Idents(pbmc_small)),
# from = names(consensus_results$final_annotations),
# to = consensus_results$final_annotations
# )
#
# # Visualize results
# # In a real workflow, you would create a UMAP or t-SNE plot
# # DimPlot(pbmc_small, group.by = "cell_type", label = TRUE) +
# # ggtitle("PBMC Cell Types")
## -----------------------------------------------------------------------------
# # For rare cell types, use these strategies:
#
# # 1. Increase the number of marker genes considered
# rare_cell_annotation <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human bone marrow",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY"),
# top_gene_count = 20 # Use more genes for rare cell types
# )
#
# # 2. Use consensus with lower thresholds to discuss more clusters
# rare_cell_consensus <- interactive_consensus_annotation(
# input = marker_data, # Your marker gene data
# tissue_name = "human bone marrow",
# models = c("claude-sonnet-4-6", "gpt-5.5", "gemini-3.1-pro-preview"),
# api_keys = api_keys,
# controversy_threshold = 0.4, # Lower threshold to discuss more clusters
# entropy_threshold = 0.8, # Lower entropy threshold
# consensus_check_model = "claude-sonnet-4-6"
# )
#
# # 3. Provide more specific tissue context
# specific_annotation <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human bone marrow with expected rare plasma cells and basophils",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
## -----------------------------------------------------------------------------
# # Example workflow for cross-species comparison
#
# # 1. Annotate human and mouse datasets separately
# # (Assuming you have marker data for both species)
# human_annotations <- annotate_cell_types(
# input = human_marker_data, # Your human marker data
# tissue_name = "human brain cortex",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# mouse_annotations <- annotate_cell_types(
# input = mouse_marker_data, # Your mouse marker data
# tissue_name = "mouse brain cortex",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# # 2. Compare annotations
# # This is a conceptual example - in a real workflow, you would:
# # - Map annotations to Seurat objects
# # - Calculate proportions
# # - Create comparison visualizations
# # - Identify conserved and species-specific cell types
#
# # Example comparison function (conceptual)
# compare_species_annotations <- function(human_annotations, mouse_annotations) {
# # Get unique cell types from both species
# human_types <- unique(human_annotations)
# mouse_types <- unique(mouse_annotations)
#
# # Find common cell types
# common_types <- intersect(human_types, mouse_types)
#
# # Find species-specific cell types
# human_specific <- setdiff(human_types, mouse_types)
# mouse_specific <- setdiff(mouse_types, human_types)
#
# # Return comparison results
# list(
# common_types = common_types,
# human_specific = human_specific,
# mouse_specific = mouse_specific
# )
# }
#
# # This is a conceptual example
# # comparison <- compare_species_annotations(human_annotations, mouse_annotations)
## -----------------------------------------------------------------------------
# # Example of cost-efficient model selection
# # Choose models based on your specific needs and budget
#
# # For initial exploration or smaller datasets
# # Use more affordable models
# affordable_annotation <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# model = "claude-haiku-4-5-20251001", # More affordable model
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# # For final analysis or challenging datasets
# # Use larger models
# premium_annotation <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# model = "claude-sonnet-4-6", # Larger model
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# # Use OpenRouter for access to free models
# openrouter_annotation <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# model = "meta-llama/llama-3.3-70b-instruct:free", # Free model via OpenRouter
# api_key = Sys.getenv("OPENROUTER_API_KEY")
# )
## -----------------------------------------------------------------------------
# # 1. Use caching with interactive_consensus_annotation
# consensus_with_cache <- interactive_consensus_annotation(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# models = c("claude-sonnet-4-6", "gpt-5.5"),
# api_keys = api_keys,
# use_cache = TRUE, # Enable caching
# cache_dir = NULL # Uses default system cache directory
# )
#
# # 2. Process clusters in batches
# # This is a conceptual example - implementation would depend on your workflow
# process_in_batches <- function(marker_data, batch_size = 5) {
# # Get unique clusters
# clusters <- unique(marker_data$cluster)
#
# # Process in batches
# results <- list()
# for (i in seq(1, length(clusters), by = batch_size)) {
# # Get current batch of clusters
# batch_clusters <- clusters[i:min(i + batch_size - 1, length(clusters))]
#
# # Filter marker data for current batch
# batch_data <- marker_data %>% filter(cluster %in% batch_clusters)
#
# # Process batch
# batch_results <- annotate_cell_types(
# input = batch_data,
# tissue_name = "human PBMC",
# model = "claude-sonnet-4-6",
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
#
# # Store results
# results <- c(results, batch_results)
# }
#
# return(results)
# }
#
# # 3. Use faster models for initial exploration
# fast_annotation <- annotate_cell_types(
# input = marker_data, # Your marker gene data
# tissue_name = "human PBMC",
# model = "claude-haiku-4-5-20251001", # Faster model
# api_key = Sys.getenv("ANTHROPIC_API_KEY")
# )
## -----------------------------------------------------------------------------
# # Define a custom processing function
# # This function must accept prompt, model, and api_key parameters
# custom_process_fn <- function(prompt, model, api_key) {
# # Custom implementation to process prompts and get responses
# # This is a simplified example
# cat("Processing prompt with custom provider\n")
# cat("Model:", model, "\n")
#
# # In a real implementation, you would make API calls here
# # For example:
# # response <- httr::POST(
# # url = "https://api.custom-provider.com/v1/chat/completions",
# # body = list(prompt = prompt, model = model),
# # httr::add_headers(Authorization = paste("Bearer", api_key)),
# # encode = "json"
# # )
# # result <- httr::content(response)$choices[[1]]$text
#
# # For this example, just return a fixed response
# result <- "T cells"
# return(result)
# }
#
# # Register the custom provider
# register_custom_provider(
# provider_name = "custom_provider",
# process_fn = custom_process_fn,
# description = "My custom LLM provider"
# )
#
# # Register a custom model
# register_custom_model(
# model_name = "custom-model",
# provider_name = "custom_provider",
# model_config = list(
# temperature = 0.7,
# max_tokens = 2000
# )
# )
#
# # Use the custom model
# # custom_results <- annotate_cell_types(
# # input = marker_data,
# # tissue_name = "human PBMC",
# # model = "custom-model",
# # api_key = "your-custom-api-key"
# # )
## -----------------------------------------------------------------------------
# # Configure the global logger (recommended approach)
# configure_logger(level = "INFO", console_output = TRUE, json_format = TRUE)
#
# # Use simple logging functions
# log_info("Starting analysis of cluster 0", list(
# cluster_id = "0",
# tissue_name = "human PBMC",
# marker_genes = c("CD3D", "CD3E", "CD2", "IL7R", "LTB")
# ))
#
# # Log API calls with performance tracking
# log_info("API call completed", list(
# provider = "anthropic",
# model = "claude-sonnet-4-6",
# duration_seconds = 2.34,
# success = TRUE
# ))
#
# # Log warnings and errors
# log_warn("Model response had unusual format", list(
# model = "gpt-5.5",
# response_length = 50
# ))
#
# log_error("API call failed", list(
# provider = "openai",
# error = "Rate limit exceeded"
# ))
#
# # Alternatively, create a custom logger instance
# custom_logger <- UnifiedLogger$new(
# base_dir = "custom_logs",
# level = "DEBUG",
# console_output = TRUE,
# json_format = TRUE
# )
#
# # Use the custom logger
# custom_logger$info("Custom log message", list(analysis_step = "preprocessing"))
# custom_logger$debug("Detailed debugging info", list(variable_state = "initialized"))
#
# # Get performance summary
# performance <- get_logger()$get_performance_summary()
# print(performance)
## -----------------------------------------------------------------------------
# # Create a cache manager
# cache_manager <- CacheManager$new(cache_dir = NULL)
#
# # Generate a cache key
# cache_key <- cache_manager$generate_key(
# input = marker_data,
# models = c("claude-sonnet-4-6", "gpt-5.5"),
# cluster_id = "0"
# )
#
# # Check if results exist in cache
# if (cache_manager$has_cache(cache_key)) {
# # Load from cache
# cached_results <- cache_manager$load_from_cache(cache_key)
# } else {
# # Process and save to cache
# # results <- process_cluster(...)
# # cache_manager$save_to_cache(cache_key, results)
# }
#
# # Get cache statistics
# cache_stats <- cache_manager$get_cache_stats()
#
# # Clear cache (with confirmation)
# # cache_manager$clear_cache(confirm = TRUE)
## -----------------------------------------------------------------------------
# # Check cache location
# mllmcelltype_cache_dir()
#
# # Use local cache
# mllmcelltype_cache_dir("local")
#
# # Clear cache
# mllmcelltype_clear_cache()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.