knitr::opts_chunk$set(warning = FALSE, message = FALSE, fig.width = 8, fig.height = 6)
ZetaSuite is an R package designed for analyzing multi-dimensional high-throughput screening data, particularly two-dimensional RNAi screens and single-cell RNA sequencing data. The package addresses the limitations of simple Z-based statistics when dealing with complex multi-dimensional datasets where experimental noise and off-target effects accumulate.
# Install from CRAN install.packages("ZetaSuite") # Load the package library(ZetaSuite)
ZetaSuite includes an interactive web interface for easy-to-use analysis:
# Launch the Shiny app ZetaSuiteApp() # Launch without opening browser automatically ZetaSuiteApp(launch.browser = FALSE) # Launch on a specific port ZetaSuiteApp(port = 3838)
The Shiny app provides: - Interactive data upload and visualization - Step-by-step analysis workflow with progress indicators - Real-time results and interactive plots - Data export capabilities for all analysis results - Built-in example dataset for demonstration - Bug report integration with GitHub issues
The package includes an example dataset from an in-house HTS2 screening experiment. This dataset contains:
library(ZetaSuite) # Load example data data(countMat) data(negGene) data(posGene) data(nonExpGene) data(ZseqList) data(SVMcurve) # Display data dimensions cat("Count matrix dimensions:", dim(countMat), "\n") cat("Negative controls:", nrow(negGene), "genes\n") cat("Positive controls:", nrow(posGene), "genes\n") cat("Non-expressed genes:", nrow(nonExpGene), "genes\n")
Quality control evaluates the ability of functional readouts to discriminate between negative and positive controls. This step provides diagnostic plots and SSMD (Strictly Standardized Mean Difference) scores.
# Perform quality control analysis qc_results <- QC(countMat, negGene, posGene) # Display QC plots cat("QC analysis completed. Generated", length(qc_results), "diagnostic plots.\n")
This plot shows the distribution of raw scores across all readouts for positive and negative controls.
qc_results$score_qc
Global evaluation of sample separation based on all readouts.
qc_results$tSNE_QC
Side-by-side comparison of score distributions between control groups.
grid::grid.draw(qc_results$QC_box)
Distribution of SSMD scores with quality threshold (SSMD ≥ 2).
qc_results$QC_SSMD
Z-score normalization standardizes the data using negative controls as reference, making readouts comparable across different conditions.
# Calculate Z-scores zscore_matrix <- Zscore(countMat, negGene) # Display first few rows and columns cat("Z-score matrix dimensions:", dim(zscore_matrix), "\n") cat("First 5 rows and columns:\n") print(zscore_matrix[1:5, 1:5])
Event coverage quantifies the proportion of readouts that exceed different Z-score thresholds for each gene, creating the foundation for zeta score calculations.
# Calculate event coverage ec_results <- EventCoverage(zscore_matrix, negGene, posGene, binNum = 100, combine = TRUE) # Display event coverage plots cat("Event coverage analysis completed.\n")
# Decrease direction (exon skipping) ec_results[[2]]$EC_jitter_D
# Increase direction (exon inclusion) ec_results[[2]]$EC_jitter_I
Zeta scores represent the area under the event coverage curve, quantifying the cumulative regulatory effect of each gene across all Z-score thresholds.
# Calculate zeta scores without SVM correction zeta_scores <- Zeta(zscore_matrix, ZseqList, SVM = FALSE) # Display summary statistics cat("Zeta score summary:\n") cat("Number of genes:", nrow(zeta_scores), "\n") cat("Zeta_D range:", range(zeta_scores$Zeta_D), "\n") cat("Zeta_I range:", range(zeta_scores$Zeta_I), "\n") # Show top hits cat("\nTop 10 genes by Zeta_D (decrease direction):\n") top_decrease <- head(zeta_scores[order(zeta_scores$Zeta_D, decreasing = TRUE), ], 10) print(top_decrease)
SVM analysis creates decision boundaries to separate positive and negative controls, enabling background correction in zeta score calculations.
# Run SVM analysis (can be computationally intensive) svm_results <- SVM(ec_results) # Calculate zeta scores with SVM correction zeta_scores_svm <- Zeta(zscore_matrix, ZseqList, SVMcurve = svm_results, SVM = TRUE)
Screen Strength analysis determines optimal cutoff thresholds by balancing sensitivity and specificity, using the ratio of apparent FDR to baseline FDR.
# Calculate FDR cutoffs and Screen Strength fdr_results <- FDRcutoff(zeta_scores, negGene, posGene, nonExpGene, combine = TRUE) # Display Screen Strength plots cat("Screen Strength analysis completed.\n")
fdr_results[[2]]$Zeta_type
fdr_results[[2]]$SS_cutOff
# Display FDR cutoff results fdr_table <- fdr_results[[1]] cat("FDR cutoff results summary:\n") cat("Number of thresholds tested:", nrow(fdr_table), "\n") cat("Screen Strength range:", range(fdr_table$SS), "\n") # Show optimal thresholds (SS > 0.8) optimal_thresholds <- fdr_table[fdr_table$SS > 0.8, ] cat("\nOptimal thresholds (SS > 0.8):\n") print(head(optimal_thresholds, 10))
Based on the Screen Strength analysis, select appropriate thresholds for hit identification.
# Example: Select threshold with SS > 0.8 and reasonable number of hits selected_threshold <- fdr_table[fdr_table$SS > 0.8 & fdr_table$TotalHits > 50, ] if(nrow(selected_threshold) > 0) { best_threshold <- selected_threshold[which.max(selected_threshold$SS), ] cat("Recommended threshold:", best_threshold$Cut_Off, "\n") cat("Screen Strength:", best_threshold$SS, "\n") cat("Total hits:", best_threshold$TotalHits, "\n") # Identify hits combined_zeta <- zeta_scores$Zeta_D + zeta_scores$Zeta_I hits <- names(combined_zeta[combined_zeta >= best_threshold$Cut_Off]) cat("Number of hits identified:", length(hits), "\n") }
ZetaSuite also provides functionality for single-cell RNA-seq quality control, helping to differentiate high-quality cells from damaged ones.
# Example single cell analysis (requires single cell count matrix) # single_cell_results <- ZetaSuitSC(count_matrix_sc, binNum = 10, filter = TRUE)
# Custom event coverage analysis ec_custom <- EventCoverage(zscore_matrix, negGene, posGene, binNum = 200, # More bins for finer resolution combine = FALSE) # Separate decrease/increase directions # Custom zeta score calculation with SVM zeta_custom <- Zeta(zscore_matrix, ZseqList, SVMcurve = svm_results, SVM = TRUE) # Use SVM correction # Custom FDR analysis fdr_custom <- FDRcutoff(zeta_scores, negGene, posGene, nonExpGene, combine = FALSE) # Analyze directions separately
# Example: Process multiple datasets datasets <- list(dataset1 = list(countMat = countMat1, negGene = negGene1), dataset2 = list(countMat = countMat2, negGene = negGene2)) results <- lapply(datasets, function(ds) { zscore <- Zscore(ds$countMat, ds$negGene) ec <- EventCoverage(zscore, ds$negGene, ds$posGene, binNum = 100) zeta <- Zeta(zscore, ZseqList, SVM = FALSE) return(list(zscore = zscore, ec = ec, zeta = zeta)) })
combine = TRUE for faster event coverage analysisbinNum for large datasetsHao, Y., Shao, C., Zhao, G., Fu, X.D. (2021). ZetaSuite: A Computational Method for Analyzing Multi-dimensional High-throughput Data, Reveals Genes with Opposite Roles in Cancer Dependency. Forthcoming
Shao, C., Hao, Y., Qiu, J., Zhou, B., Li, H., Zhou, Y., Meng, F., Jiang, L., Gou, L.T., Xu, J., Li, Y., Wang, H., Yeo, G.W., Wang, D., Ji, X., Glass, C.K., Aza-Blanc, P., Fu, X.D. (2021). HTS2 Screen for Global Splicing Regulators Reveals a Key Role of the Pol II Subunit RPB9 in Coupling between Transcription and Pre-mRNA Splicing. Cell. Forthcoming
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.