inst/doc/CNV.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

has_pkg = requireNamespace("dplyr", quietly = TRUE) &&
  requireNamespace("tidyr", quietly = TRUE) &&
  requireNamespace("gplots", quietly = TRUE)
knitr::opts_chunk$set(eval = has_pkg)

## -----------------------------------------------------------------------------
library(scPloidy)
library(dplyr)
library(tidyr)
library(gplots)

## ----eval = FALSE-------------------------------------------------------------
#  SU008_Tumor_Pre_windowcovariates =
#    read.table(
#      "multi_tissue_peaks.hg37.20MB.bed",
#      header = FALSE)
#  colnames(SU008_Tumor_Pre_windowcovariates) =
#    c("chr", "start", "end", "window", "peaks")

## ----eval = FALSE-------------------------------------------------------------
#  simpleRepeat = readr::read_tsv(
#    "~/human/publichuman/hg37_ucsc/simpleRepeat.chrom_chromStart_chromEnd.txt.gz",
#    col_names = c("chrom", "chromStart", "chromEnd"))
#  rmsk = readr::read_tsv(
#    "~/human/publichuman/hg37_ucsc/rmsk.Simple_repeat.genoName_genoStart_genoEnd.txt.gz",
#    col_names = c("chrom", "chromStart", "chromEnd"))
#  simpleRepeat = rbind(simpleRepeat, rmsk)
#  rm(rmsk)
#  # convert from 0-based position to 1-based
#  simpleRepeat[, 2] = simpleRepeat[, 2] + 1
#  simpleRepeat = GenomicRanges::makeGRangesFromDataFrame(
#    as.data.frame(simpleRepeat),
#    seqnames.field = "chrom",
#    start.field    = "chromStart",
#    end.field      = "chromEnd")
#  # remove duplicates
#  simpleRepeat = GenomicRanges::union(simpleRepeat, GenomicRanges::GRanges())

## ----eval = FALSE-------------------------------------------------------------
#  window = read.table("window.hg37.20MB.bed", header = FALSE)
#  colnames(window) = c("chr", "start", "end", "window")
#  at = GenomicRanges::makeGRangesFromDataFrame(window[, 1:3])
#  barcodesuffix = paste0(".", window$window)

## ----eval = FALSE-------------------------------------------------------------
#  sc = read.csv(
#    "GSE129785_scATAC-TME-All.cell_barcodes.txt",
#    header = TRUE,
#    sep = "\t")

## ----eval = FALSE-------------------------------------------------------------
#  sample = "GSM3722064"
#  tissue = "SU008_Tumor_Pre"
#  
#  bc = sc$Barcodes[sc$Group == tissue]
#  SU008_Tumor_Pre_fragmentoverlap =
#    fragmentoverlapcount(
#      paste0("SRX5679934/", sample, "_", tissue, "_fragments.tsv.gz"),
#      at,
#      excluderegions = simpleRepeat,
#      targetbarcodes = bc,
#      Tn5offset = c(1, 0),
#      barcodesuffix = barcodesuffix
#    )

## -----------------------------------------------------------------------------
data(GSE129785_SU008_Tumor_Pre)

## -----------------------------------------------------------------------------
levels = c(2, 4)
result = cnv(SU008_Tumor_Pre_fragmentoverlap,
             SU008_Tumor_Pre_windowcovariates,
             levels = levels,
             deltaBICthreshold = -600)

## -----------------------------------------------------------------------------
windowcovariates = SU008_Tumor_Pre_windowcovariates
windowcovariates$w =
  as.numeric(sub("window_", "", windowcovariates$window))

fragmentoverlap = SU008_Tumor_Pre_fragmentoverlap
fragmentoverlap$cell =
  sub(".window.*", "", fragmentoverlap$barcode)
fragmentoverlap$window =
  sub(".*window", "window", fragmentoverlap$barcode)
fragmentoverlap$w =
  as.numeric(sub("window_", "", fragmentoverlap$window))

x = match(fragmentoverlap$barcode,
        result$cellwindowCN$barcode)
fragmentoverlap$CN = result$cellwindowCN$CN[x]
fragmentoverlap$ploidy.moment.cell = result$cellwindowCN$ploidy.moment.cell[x]
fragmentoverlap = fragmentoverlap[!is.na(fragmentoverlap$CN), ]

# For better hierarchical clustering
fragmentoverlap$pwindownormalizedcleanedceiled =
  pmin(fragmentoverlap$CN, min(levels) * 2)

## -----------------------------------------------------------------------------
dataplot =
  fragmentoverlap %>%
  dplyr::select("w", "cell", "pwindownormalizedcleanedceiled") %>%
  tidyr::pivot_wider(names_from = "w", values_from = "pwindownormalizedcleanedceiled")
dataplot = as.data.frame(dataplot)
rownames(dataplot) = dataplot$cell
dataplot = dataplot[, colnames(dataplot) != "cell"]
dataplot = as.matrix(dataplot)
n = max(as.numeric(colnames(dataplot)))
dataplot = dataplot[, match(as.character(1:n), colnames(dataplot))]
colnames(dataplot) = as.character(1:n)

## ----eval = FALSE-------------------------------------------------------------
#  breaks = c(0, min(levels) - 1, min(levels) + 1, min(levels) * 2)
#  
#  x = windowcovariates
#  x$chr[duplicated(windowcovariates$chr)] = NA
#  x = x$chr[match(colnames(dataplot), x$w)]
#  
#  RowSideColors =
#    unlist(
#      lapply(
#        fragmentoverlap$ploidy.moment.cell[
#          match(rownames(dataplot), fragmentoverlap$cell)],
#        function (x) { which(sort(levels) == x)}))
#  RowSideColors = topo.colors(length(levels))[RowSideColors]
#  
#  gplots::heatmap.2(
#    dataplot,
#    Colv = FALSE,
#    dendrogram = "none",
#    breaks = breaks,
#    col = c("blue", "gray80", "red"),
#    trace = "none", labRow = FALSE, na.color = "white",
#    labCol = x,
#    RowSideColors= RowSideColors)

Try the scPloidy package in your browser

Any scripts or data that you put into this service are public.

scPloidy documentation built on May 29, 2024, 10:37 a.m.