Introduction to LISAT"

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Introduction

The lisat (Longitudinal Integration Site Analysis Toolkit) package provides a comprehensive set of tools for the analysis of longitudinal integration site data. This vignette demonstrates the basic workflow, including data simulation, annotation, statistical modeling, and visualization.

Setup

First, load the lisat package.

library(lisat)

Step 1: Data Preparation

We will generate simulated chromosome integration site (IS) raw data to demonstrate the functionality of the package.

set.seed(12345)

n_rows <- 10000
sample_names <- c("Sample_A", "Sample_B", "Sample_C")
chr_list <- paste0(1:23)

# Generate random data
Sample <- sample(sample_names, size = n_rows, replace = TRUE)
SCount <- sample(1:1000, size = n_rows, replace = TRUE)
Chr <- sample(chr_list, size = n_rows, replace = TRUE)
Locus <- sample(1:150000000, size = n_rows, replace = TRUE)

IS_raw <- data.frame(
  Sample = Sample,
  SCount = SCount,
  Chr = Chr,
  Locus = Locus,
  stringsAsFactors = FALSE
)

# Simulate some high-frequency clones (updated)
IS_raw$SCount[1:100] <- sample(500000:800000, 100, replace = TRUE)

head(IS_raw)

Validate the data structure:

check_validity <- validate_IS_raw(IS_raw)

Create a patient-timepoint mapping table for longitudinal analysis.

Patient_timepoint <- data.frame(
  Sample_ID = c("Sample_A", "Sample_B", "Sample_C"),
  Time_Point = c("3m", "12m", "24m"),
  Patient_ID = rep("Pt1", 3),
  stringsAsFactors = FALSE
)

head(Patient_timepoint)

Step 2: Genomic Feature Annotation

Annotate the integration sites with genomic features. Note: This step requires TxDb.Hsapiens.UCSC.hg38.knownGene and org.Hs.eg.db packages.

if (requireNamespace("TxDb.Hsapiens.UCSC.hg38.knownGene", quietly = TRUE) &&
    requireNamespace("org.Hs.eg.db", quietly = TRUE)) {

  IS_raw <- get_feature(IS_raw)

  # Check for overlap with specific genomic elements
  IS_raw <- Enhancer_check(IS_raw)
  IS_raw <- Promotor_check(IS_raw)
  IS_raw <- Safeharbor_check(IS_raw)

  names(IS_raw)
} else {
  message("Skipping annotation: Required annotation packages not installed.")
}

Step 3: Integration Site Analysis

Common Integration Sites (CIS)

Identify regions with recurrent integration sites.

CIS_top <- CIS(IS_raw = IS_raw, connect_distance = 50000)
CIS_by_sample <- CIS_overlap(CIS_data = CIS_top, IS_raw = IS_raw)
CIS_by_sample

Chromosome Distribution

chr_stats <- chr_distribution(IS_raw)
print(chr_stats)

Gene Set Overlap

if (requireNamespace("TxDb.Hsapiens.UCSC.hg38.knownGene", quietly = TRUE) &&
    requireNamespace("org.Hs.eg.db", quietly = TRUE)) {
  # Adverse Event genes
  ae_overlap <- is_in_AE_gene(IS_raw = IS_raw, Distance = 100000)

  # Cancer Genes
  cg_overlap <- is_in_CG_gene(IS_raw = IS_raw, threashold = 0.001)
  print(cg_overlap)

  # Immune Genes
  immune_overlap <- is_in_immune_gene(IS_raw = IS_raw, threashold = 0.001)
}

Step 4: Longitudinal Analysis (PMD)

Perform Population Matching Distribution (PMD) analysis.

PMD_data <- pmd_analysis(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint)
head(PMD_data)
# Plot Richness and Evenness
plot_richness_evenness(PMD_data = PMD_data)

# Analyze linked timepoints
linked_data <- Linked_timepoints(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint)
print(linked_data)

Step 5: Clonal Dominance Analysis

Analyze potential dominant clones using cumulative distribution.

IS_ratio <- fit_cum_simple(IS_raw$SCount)
print(Cumulative_curve(IS_ratio)) # Function for plotting

Step 6: Visualization

Treemap

if (requireNamespace("treemapify", quietly = TRUE)) {
    IS_treemap(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint)
}

Region Counts

Region_data <- Count_regions(IS_raw = IS_raw, Patient_timepoint = Patient_timepoint)
head(Region_data)
plot_regions(Region_data = Region_data)

Ideogram

# Example usage:
# ideogram_plot(IS_raw, output_dir = tempdir())


Try the lisat package in your browser

Any scripts or data that you put into this service are public.

lisat documentation built on March 27, 2026, 5:07 p.m.