# USARC CNS ----------------------------------------------------------------
usarc <- read_excel("data-raw/USARC.CNS.xlsx", sheet = 1, skip = 1)
colnames(usarc)[1] <- "class"
CN40_map <- usarc %>%
dplyr::select("class") %>%
tidyr::separate("class", c("type", "type2", "size"), sep = ":", remove = FALSE) %>%
tidyr::separate("size", c("size_min", "size_max"), sep = ",") %>%
dplyr::mutate(
size_min = as.numeric(sub("\\(", "", size_min)),
size_max = as.numeric(sub("\\]", "", size_max))
) %>%
dplyr::mutate(
type = dplyr::case_when(
type == "amp" ~ "5+",
type == "dup" ~ "3-4",
type == "neut" ~ "2",
type == "del" ~ "0-1"
),
size = dplyr::case_when(
size_min < 0 & size_max < 1 ~ paste0("0-", 1000 * size_max, "Kb"),
# size_min < 0 & size_max >= 1 ~ paste0("0-", size_max, "Mb")
is.infinite(size_max) ~ paste0(">", size_min, "Mb"),
size_max < 1 ~ paste0(1000 * size_min, "Kb-", 1000 * size_max, "Kb"),
size_min < 1 & size_max == 1 ~ paste0(1000 * size_min, "Kb-", size_max, "Mb"),
size_min >= 1 ~ paste0(size_min, "Mb-", size_max, "Mb")
)
) %>%
tidyr::unite("label", c("type", "type2", "size"), sep = ":") %>%
dplyr::mutate(
cn = as.integer(substr(label, 1, 1))
) %>%
dplyr::arrange(cn) %>%
dplyr::select(class, label) %>%
dplyr::rename(
label1 = class,
label2 = label
)
data.table::fwrite(CN40_map, file = "inst/extdata/CN40-Map.txt", sep = "\t")
map <- CN40_map$label2
names(map) <- CN40_map$label1
usarc2 <- usarc %>%
tibble::column_to_rownames("class")
rownames(usarc2) <- map[rownames(usarc2)]
colnames(usarc2) <- paste0("USARC_", colnames(usarc2))
# check sum
colSums(usarc2)
CNS_USARC <- list(
db = as.matrix(usarc2),
aetiology = data.frame(
V1 = colnames(usarc2),
V2 = "See https://doi.org/10.1016/j.ccell.2019.02.002"
) %>% tibble::column_to_rownames("V1") %>% setNames("aetiology"),
date = "2021/01/15"
)
saveRDS(CNS_USARC, file = "inst/extdata/CNS_signatures_USARC.rds")
# PANCAN CNS ---------------------
download.file("https://raw.githubusercontent.com/AlexandrovLab/SigProfilerExtractor/master/SigProfilerExtractor/data/CNV_signatures.txt", destfile = "data-raw/CNV_signatures.txt")
library(tidyverse)
# Extracting from 9873 primary cancer samples
# ref: https://www.nature.com/articles/s41586-022-04738-6
# data source: https://github.com/AlexandrovLab/SigProfilerExtractor/tree/master/SigProfilerExtractor/data
cns <- read_tsv("data-raw/CNV_signatures.txt")
colnames(cns)[1] <- "class"
cns$class <- gsub("k", "K", cns$class)
cns <- column_to_rownames(cns, "class")
colSums(cns)
CNS_TCGA <- list(
db = as.matrix(cns),
aetiology = data.frame(
V1 = colnames(cns),
V2 = "See https://www.nature.com/articles/s41586-022-04738-6"
) %>% tibble::column_to_rownames("V1") %>% setNames("aetiology"),
date = "2021/07/23"
)
saveRDS(CNS_TCGA, file = "inst/extdata/CNS_signatures_TCGA.rds")
CNS_TCGA = readRDS("inst/extdata/CNS_signatures_TCGA.rds")
CNS_TCGA$aetiology$aetiology = "See https://cancer.sanger.ac.uk/signatures/cn/"
saveRDS(CNS_TCGA, file = "inst/extdata/CNS_signatures_TCGA.rds")
# PCAWG CNS ---------------------------------------------------------------
# Ref: https://doi.org/10.1093/bib/bbad053
library(readxl)
pcawgcns <- read_excel("data-raw/PCAWG.CNS.xlsx") %>%
tibble::column_to_rownames(var = "class")
colnames(pcawgcns) <- gsub("CNS", "PCAWG_CNS", colnames(pcawgcns))
CNS_PCAWG <- list(
db = as.matrix(pcawgcns),
aetiology = data.frame(
V1 = colnames(pcawgcns),
V2 = "See https://doi.org/10.1093/bib/bbad053"
) %>% tibble::column_to_rownames("V1") %>% setNames("aetiology"),
date = "2023/03/17"
)
saveRDS(CNS_PCAWG, file = "inst/extdata/CNS_signatures_PCAWG176.rds")
# TCGA CNS ---------------------------------------------------------------
# Ref: https://doi.org/10.1093/bib/bbad053
tcgacns <- read_excel("data-raw/TCGA.CNS.xlsx") %>%
tibble::column_to_rownames(var = "class")
colnames(tcgacns) <- gsub("Sig", "TCGA_CNS", colnames(tcgacns))
CNS_TCGA <- list(
db = as.matrix(tcgacns),
aetiology = data.frame(
V1 = colnames(tcgacns),
V2 = "See https://doi.org/10.1093/bib/bbad053"
) %>% tibble::column_to_rownames("V1") %>% setNames("aetiology"),
date = "2023/03/17"
)
saveRDS(CNS_TCGA, file = "inst/extdata/CNS_signatures_TCGA176.rds")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.