knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
The lisat (Longitudinal Integration Site Analysis Toolkit) package provides a comprehensive set of tools for the analysis of longitudinal integration site data. This vignette demonstrates the basic workflow, including data simulation, annotation, statistical modeling, and visualization.
First, load the lisat package.
library(lisat)
We will generate simulated chromosome integration site (IS) raw data to demonstrate the functionality of the package.
set.seed(12345) n_rows <- 10000 sample_names <- c("Sample_A", "Sample_B", "Sample_C") chr_list <- paste0(1:23) # Generate random data Sample <- sample(sample_names, size = n_rows, replace = TRUE) SCount <- sample(1:1000, size = n_rows, replace = TRUE) Chr <- sample(chr_list, size = n_rows, replace = TRUE) Locus <- sample(1:150000000, size = n_rows, replace = TRUE) IS_raw <- data.frame( Sample = Sample, SCount = SCount, Chr = Chr, Locus = Locus, stringsAsFactors = FALSE ) # Simulate some high-frequency clones (updated) IS_raw$SCount[1:100] <- sample(500000:800000, 100, replace = TRUE) head(IS_raw)
Validate the data structure:
check_validity <- validate_IS_raw(IS_raw)
Create a patient-timepoint mapping table for longitudinal analysis.
Patient_timepoint <- data.frame( Sample_ID = c("Sample_A", "Sample_B", "Sample_C"), Time_Point = c("3m", "12m", "24m"), Patient_ID = rep("Pt1", 3), stringsAsFactors = FALSE ) head(Patient_timepoint)
Annotate the integration sites with genomic features.
Note: This step requires TxDb.Hsapiens.UCSC.hg38.knownGene and org.Hs.eg.db packages.
if (requireNamespace("TxDb.Hsapiens.UCSC.hg38.knownGene", quietly = TRUE) && requireNamespace("org.Hs.eg.db", quietly = TRUE)) { IS_raw <- get_feature(IS_raw) # Check for overlap with specific genomic elements IS_raw <- Enhancer_check(IS_raw) IS_raw <- Promotor_check(IS_raw) IS_raw <- Safeharbor_check(IS_raw) names(IS_raw) } else { message("Skipping annotation: Required annotation packages not installed.") }
Identify regions with recurrent integration sites.
CIS_top <- CIS(IS_raw = IS_raw, connect_distance = 50000) CIS_by_sample <- CIS_overlap(CIS_data = CIS_top, IS_raw = IS_raw) CIS_by_sample
chr_stats <- chr_distribution(IS_raw) print(chr_stats)
if (requireNamespace("TxDb.Hsapiens.UCSC.hg38.knownGene", quietly = TRUE) && requireNamespace("org.Hs.eg.db", quietly = TRUE)) { # Adverse Event genes ae_overlap <- is_in_AE_gene(IS_raw = IS_raw, Distance = 100000) # Cancer Genes cg_overlap <- is_in_CG_gene(IS_raw = IS_raw, threashold = 0.001) print(cg_overlap) # Immune Genes immune_overlap <- is_in_immune_gene(IS_raw = IS_raw, threashold = 0.001) }
Perform Population Matching Distribution (PMD) analysis.
PMD_data <- pmd_analysis(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint) head(PMD_data) # Plot Richness and Evenness plot_richness_evenness(PMD_data = PMD_data) # Analyze linked timepoints linked_data <- Linked_timepoints(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint) print(linked_data)
Analyze potential dominant clones using cumulative distribution.
IS_ratio <- fit_cum_simple(IS_raw$SCount) print(Cumulative_curve(IS_ratio)) # Function for plotting
if (requireNamespace("treemapify", quietly = TRUE)) { IS_treemap(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint) }
Region_data <- Count_regions(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint) head(Region_data) plot_regions(Region_data = Region_data)
# Example usage: # ideogram_plot(IS_raw, output_dir = tempdir())
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.