#' Multi-scenario analysis of single-cell epigenomic data
#'
#' Add description here
#'
#' @param path directory
#'
#' @return Add return here
#'
#' @details Add details here
#'
#' @author Pedro L. Baldoni, \email{pedrobaldoni@gmail.com}
#'
#' @importFrom magrittr %<>%
#'
#' @export
#'
runGrosselin <- function(path){
mati2 = V1 = V2 = V3 = rl = Counts = N = SumN = NULL
system2("mkdir",file.path(path))
# Running methods on original data
if(!file.exists(file.path(path,'Analysis.RData'))){
load('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/scChIPseq/datasets/HBCx-22-human-paper/scChIPseq.RData')
load('/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/scChIPseq/datasets/HBCx-22-human-paper/cor_filtered_data/HBCx-22-human-paper_99_1.RData')
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = Matrix::Matrix(dt,sparse = TRUE)),
rowRanges = GenomicRanges::makeGRangesFromDataFrame(data.table::rbindlist(lapply(strsplit(rownames(dt),"_"),FUN = function(x){
data.table::data.table(chr=paste(x[seq_len(length(x)-2)],collapse = '_'),start=x[length(x)-1],end=x[length(x)])
}))),
colData = data.frame(Cluster = as.numeric(as.factor(metadata$ChromatinGroup))))
shape <- metadata$sample_id
shape %<>% plyr::mapvalues(from = c("GSM3290891_CountTable_HBCx-22_scChIP_H3K27me3_hg38","GSM3290892_CountTable_HBCx-22-TamR_scChIP_H3K27me3_hg38"),
to = c('HBCx-22','HBCx-22-TamR')) %<>% factor(levels = c('HBCx-22','HBCx-22-TamR'))
Cusanovich2018 <- runCusanovich2018(sc,peaks = FALSE,returnSc = FALSE,legend.title = 'Grosselin Clustering',shape = shape,shape.title = 'Sample Origin')
SnapATAC <- runSnapATAC(sc,peaks = FALSE,returnSc = FALSE,legend.title = 'Grosselin Clustering',shape = shape,shape.title = 'Sample Origin')
Scasat <- runScasat(sc,peaks = FALSE,returnSc = FALSE,legend.title = 'Grosselin Clustering',shape = shape,shape.title = 'Sample Origin')
CisTopic <- runCisTopic(sc,peaks = FALSE,returnSc = FALSE,legend.title = 'Grosselin Clustering',shape = shape,shape.title = 'Sample Origin')
# Reproducing Grosseling figure
Grosselin2019 <- plot_umap(x = run_umap(mati2), labels = as.numeric(as.factor(metadata$ChromatinGroup)), title = 'Grosselin2019',legend.title = 'Grosselin Clustering',shape = shape,shape.title = 'Sample Origin')
save(Cusanovich2018,
SnapATAC,
Scasat,
CisTopic,
Grosselin2019,shape,file = file.path(path,'Analysis.RData'),compress = 'xz')
rm(list=setdiff(ls(),'path'))
}
# Analysis for 50kb
if(!file.exists(file.path(path,'Analysis_50kb.RData'))){
tmpdir <- file.path(path,'Data')
system(paste('mkdir',tmpdir))
dtname1 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_50000bp/counts/SRR7536862_hg38_50000bp_flagged_rmPCR_RT_rmDup_counts_50000_filt_1000.tsv'
dtname2 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_50000bp/counts/SRR7536863_hg38_50000bp_flagged_rmPCR_RT_rmDup_counts_50000_filt_1000.tsv'
system(paste('cp',dtname1,file.path(tmpdir,'dt1_50kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt1_50kb.txt'),'>',file.path(tmpdir,'dt1_50kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt1_50kb.txt')))
system(paste('cp',dtname2,file.path(tmpdir,'dt2_50kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt2_50kb.txt'),'>',file.path(tmpdir,'dt2_50kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt2_50kb.txt')))
dt1 <- scater::readSparseCounts(file.path(tmpdir,'dt1_50kb.txt.gz'))
dt1 <- dt1[unique(rownames(dt1)),]
dt2 <- scater::readSparseCounts(file.path(tmpdir,'dt2_50kb.txt.gz'))
dt2 <- dt2[unique(rownames(dt2)),]
sc <- Seurat::RowMergeSparseMatrices(dt1,dt2)
rn <- rownames(sc)
grDatamatrix <- with(as.data.table(do.call(rbind,strsplit(rn,":|-"))), GRanges(V1,IRanges::IRanges(as.numeric(V2),as.numeric(V3))))
orderRanges <- order(grDatamatrix)
grDatamatrix <- grDatamatrix[orderRanges]
rn <- rn[orderRanges]
dt1 <- dt1[orderRanges,]
dt2 <- dt2[orderRanges,]
sc <- sc[orderRanges,]
total_cell <- c(ncol(dt1),ncol(dt2))
sample_name <- c('HBCx-22','HBCx-22-TamR')
annot_raw <- rbindlist(list(data.table(barcode=colnames(dt1), cell_id=paste0(sample_name[1], "_c", 1:total_cell[1]), sample_id=rep(sample_name[1], total_cell[1]), batch_id=1),
data.table(barcode=colnames(dt2), cell_id=paste0(sample_name[2], "_c", 1:total_cell[2]), sample_id=rep(sample_name[2], total_cell[2]), batch_id=1)))
colnames(sc) <- c(paste0(sample_name[1], "_c", 1:total_cell[1]),
paste0(sample_name[2], "_c", 1:total_cell[2]))
rm(dt1,dt2)
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = sc),
rowRanges = grDatamatrix,
colData = data.frame(Cluster = annot_raw$sample_id))
sc <- sc[seqnames(sc)%in%c(paste0('chr',1:22),'chrX','chrY'),]
Rleagg <- data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(sc)))[,rl := rleid(Counts)][,N := 1:.N]
Rleagg <- merge(Rleagg,Rleagg[,list(SumN = .N),by = 'rl'],by = 'rl',all.x = TRUE)
sc <- sc[-Rleagg[,which(Counts==0 & SumN>2)],]
Cusanovich2018.50kb <- runCusanovich2018(sc, peaks = TRUE, returnSc = TRUE,legend.title = 'Sample Origin')
cisTopic.50kb <- runCisTopic(sc = Cusanovich2018.50kb$sc,legend.title = 'Sample Origin')
SnapATAC.50kb <- runSnapATAC(sc = Cusanovich2018.50kb$sc,legend.title = 'Sample Origin')
Scasat.50kb <- runScasat(sc = Cusanovich2018.50kb$sc,legend.title = 'Sample Origin')
save(Cusanovich2018.50kb,
cisTopic.50kb,
SnapATAC.50kb,
Scasat.50kb,file = file.path(path,'Analysis_50kb.RData'),compress = 'xz')
rm(list=setdiff(ls(),'path'))
}
if(!file.exists(file.path(path,'Analysis_25kb.RData'))){
# Analysis for 25kb
tmpdir <- file.path(path,'Data')
system(paste('mkdir',tmpdir))
dtname1 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_25000bp/counts/SRR7536862_hg38_25000bp_flagged_rmPCR_RT_rmDup_counts_25000_filt_1000.tsv'
dtname2 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_25000bp/counts/SRR7536863_hg38_25000bp_flagged_rmPCR_RT_rmDup_counts_25000_filt_1000.tsv'
system(paste('cp',dtname1,file.path(tmpdir,'dt1_25kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt1_25kb.txt'),'>',file.path(tmpdir,'dt1_25kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt1_25kb.txt')))
system(paste('cp',dtname2,file.path(tmpdir,'dt2_25kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt2_25kb.txt'),'>',file.path(tmpdir,'dt2_25kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt2_25kb.txt')))
dt1 <- scater::readSparseCounts(file.path(tmpdir,'dt1_25kb.txt.gz'))
dt1 <- dt1[unique(rownames(dt1)),]
dt2 <- scater::readSparseCounts(file.path(tmpdir,'dt2_25kb.txt.gz'))
dt2 <- dt2[unique(rownames(dt2)),]
sc <- Seurat::RowMergeSparseMatrices(dt1,dt2)
rn <- rownames(sc)
grDatamatrix <- with(as.data.table(do.call(rbind,strsplit(rn,":|-"))), GRanges(V1,IRanges::IRanges(as.numeric(V2),as.numeric(V3))))
orderRanges <- order(grDatamatrix)
grDatamatrix <- grDatamatrix[orderRanges]
rn <- rn[orderRanges]
dt1 <- dt1[orderRanges,]
dt2 <- dt2[orderRanges,]
sc <- sc[orderRanges,]
total_cell <- c(ncol(dt1),ncol(dt2))
sample_name <- c('HBCx-22','HBCx-22-TamR')
annot_raw <- rbindlist(list(data.table(barcode=colnames(dt1), cell_id=paste0(sample_name[1], "_c", 1:total_cell[1]), sample_id=rep(sample_name[1], total_cell[1]), batch_id=1),
data.table(barcode=colnames(dt2), cell_id=paste0(sample_name[2], "_c", 1:total_cell[2]), sample_id=rep(sample_name[2], total_cell[2]), batch_id=1)))
colnames(sc) <- c(paste0(sample_name[1], "_c", 1:total_cell[1]),
paste0(sample_name[2], "_c", 1:total_cell[2]))
rm(dt1,dt2)
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = sc),
rowRanges = grDatamatrix,
colData = data.frame(Cluster = annot_raw$sample_id))
sc <- sc[seqnames(sc)%in%c(paste0('chr',1:22),'chrX','chrY'),]
Rleagg <- data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(sc)))[,rl := rleid(Counts)][,N := 1:.N]
Rleagg <- merge(Rleagg,Rleagg[,list(SumN = .N),by = 'rl'],by = 'rl',all.x = TRUE)
sc <- sc[-Rleagg[,which(Counts==0 & SumN>2)],]
Cusanovich2018.25kb <- runCusanovich2018(sc, peaks = TRUE, returnSc = TRUE,legend.title = 'Sample Origin')
cisTopic.25kb <- runCisTopic(sc = Cusanovich2018.25kb$sc,legend.title = 'Sample Origin')
SnapATAC.25kb <- runSnapATAC(sc = Cusanovich2018.25kb$sc,legend.title = 'Sample Origin')
Scasat.25kb <- runScasat(sc = Cusanovich2018.25kb$sc,legend.title = 'Sample Origin')
save(Cusanovich2018.25kb,
cisTopic.25kb,
SnapATAC.25kb,
Scasat.25kb,file = file.path(path,'Analysis_25kb.RData'),compress = 'xz')
rm(list=setdiff(ls(),'path'))
}
# Analysis for 5kb
if(!file.exists(file.path(path,'Analysis_5kb.RData'))){
tmpdir <- file.path(path,'Data')
system(paste('mkdir',tmpdir))
dtname1 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_5000bp/counts/SRR7536862_hg38_5000bp_flagged_rmPCR_RT_rmDup_counts_5000_filt_1000.tsv'
dtname2 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_5000bp/counts/SRR7536863_hg38_5000bp_flagged_rmPCR_RT_rmDup_counts_5000_filt_1000.tsv'
system(paste('cp',dtname1,file.path(tmpdir,'dt1_5kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt1_5kb.txt'),'>',file.path(tmpdir,'dt1_5kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt1_5kb.txt')))
system(paste('cp',dtname2,file.path(tmpdir,'dt2_5kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt2_5kb.txt'),'>',file.path(tmpdir,'dt2_5kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt2_5kb.txt')))
dt1 <- scater::readSparseCounts(file.path(tmpdir,'dt1_5kb.txt.gz'))
dt1 <- dt1[unique(rownames(dt1)),]
dt2 <- scater::readSparseCounts(file.path(tmpdir,'dt2_5kb.txt.gz'))
dt2 <- dt2[unique(rownames(dt2)),]
sc <- Seurat::RowMergeSparseMatrices(dt1,dt2)
rn <- rownames(sc)
grDatamatrix <- with(as.data.table(do.call(rbind,strsplit(rn,":|-"))), GRanges(V1,IRanges::IRanges(as.numeric(V2),as.numeric(V3))))
orderRanges <- order(grDatamatrix)
grDatamatrix <- grDatamatrix[orderRanges]
rn <- rn[orderRanges]
dt1 <- dt1[orderRanges,]
dt2 <- dt2[orderRanges,]
sc <- sc[orderRanges,]
total_cell <- c(ncol(dt1),ncol(dt2))
sample_name <- c('HBCx-22','HBCx-22-TamR')
annot_raw <- rbindlist(list(data.table(barcode=colnames(dt1), cell_id=paste0(sample_name[1], "_c", 1:total_cell[1]), sample_id=rep(sample_name[1], total_cell[1]), batch_id=1),
data.table(barcode=colnames(dt2), cell_id=paste0(sample_name[2], "_c", 1:total_cell[2]), sample_id=rep(sample_name[2], total_cell[2]), batch_id=1)))
colnames(sc) <- c(paste0(sample_name[1], "_c", 1:total_cell[1]),
paste0(sample_name[2], "_c", 1:total_cell[2]))
rm(dt1,dt2)
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = sc),
rowRanges = grDatamatrix,
colData = data.frame(Cluster = annot_raw$sample_id))
sc <- sc[seqnames(sc)%in%c(paste0('chr',1:22),'chrX','chrY'),]
Rleagg <- data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(sc)))[,rl := rleid(Counts)][,N := 1:.N]
Rleagg <- merge(Rleagg,Rleagg[,list(SumN = .N),by = 'rl'],by = 'rl',all.x = TRUE)
sc <- sc[-Rleagg[,which(Counts==0 & SumN>2)],]
Cusanovich2018.5kb <- runCusanovich2018(sc, peaks = TRUE, returnSc = TRUE,legend.title = 'Sample Origin')
cisTopic.5kb <- runCisTopic(sc = Cusanovich2018.5kb$sc,legend.title = 'Sample Origin')
SnapATAC.5kb <- runSnapATAC(sc = Cusanovich2018.5kb$sc,legend.title = 'Sample Origin')
Scasat.5kb <- runScasat(sc = Cusanovich2018.5kb$sc,legend.title = 'Sample Origin')
save(Cusanovich2018.5kb,
cisTopic.5kb,
SnapATAC.5kb,
Scasat.5kb,file = file.path(path,'Analysis_5kb.RData'),compress = 'xz')
rm(list=setdiff(ls(),'path'))
}
if(!file.exists(file.path(path,'Analysis_1kb.RData'))){
# Analysis for 1kb
tmpdir <- file.path(path,'Data')
system(paste('mkdir',tmpdir))
dtname1 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536862/output_hg38_1000bp/counts/SRR7536862_hg38_1000bp_flagged_rmPCR_RT_rmDup_counts_1000_filt_1000.tsv'
dtname2 <- '/nas/longleaf/home/baldoni/scratch/Rashid/Project3/Data/Grosselin2019/processedFiles/SRR7536863/output_hg38_1000bp/counts/SRR7536863_hg38_1000bp_flagged_rmPCR_RT_rmDup_counts_1000_filt_1000.tsv'
system(paste('cp',dtname1,file.path(tmpdir,'dt1_1kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt1_1kb.txt'),'>',file.path(tmpdir,'dt1_1kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt1_1kb.txt')))
system(paste('cp',dtname2,file.path(tmpdir,'dt2_1kb.txt')))
system(paste('gzip -c',file.path(tmpdir,'dt2_1kb.txt'),'>',file.path(tmpdir,'dt2_1kb.txt.gz')))
system(paste('rm',file.path(tmpdir,'dt2_1kb.txt')))
dt1 <- scater::readSparseCounts(file.path(tmpdir,'dt1_1kb.txt.gz'))
dt1 <- dt1[unique(rownames(dt1)),]
dt2 <- scater::readSparseCounts(file.path(tmpdir,'dt2_1kb.txt.gz'))
dt2 <- dt2[unique(rownames(dt2)),]
sc <- Seurat::RowMergeSparseMatrices(dt1,dt2)
rn <- rownames(sc)
grDatamatrix <- with(as.data.table(do.call(rbind,strsplit(rn,":|-"))), GRanges(V1,IRanges::IRanges(as.numeric(V2),as.numeric(V3))))
orderRanges <- order(grDatamatrix)
grDatamatrix <- grDatamatrix[orderRanges]
rn <- rn[orderRanges]
dt1 <- dt1[orderRanges,]
dt2 <- dt2[orderRanges,]
sc <- sc[orderRanges,]
total_cell <- c(ncol(dt1),ncol(dt2))
sample_name <- c('HBCx-22','HBCx-22-TamR')
annot_raw <- rbindlist(list(data.table(barcode=colnames(dt1), cell_id=paste0(sample_name[1], "_c", 1:total_cell[1]), sample_id=rep(sample_name[1], total_cell[1]), batch_id=1),
data.table(barcode=colnames(dt2), cell_id=paste0(sample_name[2], "_c", 1:total_cell[2]), sample_id=rep(sample_name[2], total_cell[2]), batch_id=1)))
colnames(sc) <- c(paste0(sample_name[1], "_c", 1:total_cell[1]),
paste0(sample_name[2], "_c", 1:total_cell[2]))
rm(dt1,dt2)
sc <- SingleCellExperiment::SingleCellExperiment(assays = list(counts = sc),
rowRanges = grDatamatrix,
colData = data.frame(Cluster = annot_raw$sample_id))
sc <- sc[seqnames(sc)%in%c(paste0('chr',1:22),'chrX','chrY'),]
Rleagg <- data.table(Counts = Matrix::rowSums(SummarizedExperiment::assay(sc)))[,rl := rleid(Counts)][,N := 1:.N]
Rleagg <- merge(Rleagg,Rleagg[,list(SumN = .N),by = 'rl'],by = 'rl',all.x = TRUE)
sc <- sc[-Rleagg[,which(Counts==0 & SumN>2)],]
Cusanovich2018.1kb <- runCusanovich2018(sc, peaks = TRUE, returnSc = TRUE,legend.title = 'Sample Origin')
cisTopic.1kb <- runCisTopic(sc = Cusanovich2018.1kb$sc,legend.title = 'Sample Origin')
SnapATAC.1kb <- runSnapATAC(sc = Cusanovich2018.1kb$sc,legend.title = 'Sample Origin')
Scasat.1kb <- runScasat(sc = Cusanovich2018.1kb$sc,legend.title = 'Sample Origin')
save(Cusanovich2018.1kb,
cisTopic.1kb,
SnapATAC.1kb,
Scasat.1kb,file = file.path(path,'Analysis_1kb.RData'),compress = 'xz')
rm(list=setdiff(ls(),'path'))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.