#options
options(
dplyr.summarise.inform = FALSE,
scipen = 999
)
#parse input
option_list = list(
optparse::make_option(
c("-K", "--Kronos_conf_file"),
type = "character",
default = NULL,
help = "Kronos setting file. If provided -F,-T,-S,-b and -g are ignored. Tab file containing: Per cell stat file <TAB> tracks file <TAB> settings file <TAB> basename (optional) <TAB> group (optional) ",
metavar = "character"
),
optparse::make_option(
c("-F", "--file"),
type = "character",
default = NULL,
help = "Per cell stat file , if multiple files are provided they have to be separated by a comma",
metavar = "character"
),
optparse::make_option(
c("-T", "--tracks"),
type = "character",
default = NULL,
help = "Tracks file, if multiple files are provided they have to be separated by a comma",
metavar = "character"
),
optparse::make_option(
c("-C", "--chrSizes"),
type = "character",
default = NULL,
help = "Chromosome size file",
metavar = "character"
),
optparse::make_option(
c("-o", "--out"),
type = "character",
default = "output",
help = "Output directory [default= %default]",
metavar = "character"
),
optparse::make_option(
c("-f", "--output_file_base_name"),
type = "character",
default = "out",
help = "Base name for the output file [default= %default]",
metavar = "character"
),
optparse::make_option(
c("-b", "--base_name"),
type = "character",
default = NULL,
help = "Basename of an experiment, if multiple basenames are provided they have to be separated by a comma",
metavar = "character"
),
optparse::make_option(
c("-g", "--groups"),
type = "character",
default = NULL,
help = "Group name of an experiment, if multiple groups are provided they have to be separated by a comma",
metavar = "character"
),
optparse::make_option(
c("-S", "--settings_file"),
type = "character",
help = "File generated by Kronos diagnostic, if multiple files are provided they have to be separated by a comma",
metavar = "character"
),
optparse::make_option(
c("-B", "--binSize"),
type = "character",
default = '500Kb',
help = "RT resolution (supports units) [default= %default] ",
metavar = "character"
),
optparse::make_option(
c("-c", "--cores"),
type = "integer",
default = 3,
help = "Number of parallel jobs to run [default= %default]",
metavar = "integer"
),
optparse::make_option(
c("--min_correlation"),
type = "double",
default = 0.25,
help = "Minimum correlation value between one cell and its best correlating cell for this cell to not be discarded [default= %default]",
metavar = "double"
),
optparse::make_option(
c("--extract_G1_G2_cells"),
type = "logical",
default = F,
action = "store_true",
help = "Extract G1/G2 single cells copy numebr file [default= %default]",
metavar = "logical"
),
optparse::make_option(
c("--chr_prefix"),
type = "character",
action = 'store',
help = "Chromosome prefix, if there is no prefix use none [default= %default]",
default = "chr",
metavar = "character"
),
optparse::make_option(
c("--chr_range"),
type = "character",
action = 'store',
help = "Chromosomes to consider in the analysis (example 1:5,8,15:18,X) [default= %default]",
default = "1:22",
metavar = "character"
)
)
#recover inputs
opt = optparse::parse_args(object = optparse::OptionParser(option_list = option_list))
#set plotting theme
ggplot2::theme_set(ggplot2::theme_bw())
#define operators
`%do%` = foreach::`%do%`
`%>%` = tidyr::`%>%`
`%dopar%` = foreach::`%dopar%`
#check inputs
if ('Kronos_conf_file' %in% names(opt)) {
if (!file.exists(opt$Kronos_conf_file)) {
stop('Provided setting file does not exist')
}
settings = tryCatch(
expr = readr::read_tsv(
opt$Kronos_conf_file,
col_names = c('file', 'traks', 'settings', 'basename', 'groups'),
col_types = readr::cols()
) %>%
dplyr::mutate(
basename = ifelse(
is.na(basename),
paste0('exp', dplyr::row_number()),
basename
),
groups = ifelse(is.na(groups), basename, groups)
),
error = function(e) {
stop('Settings file does not exitst. See script usage (--help)')
},
warning = function(w) {
tmp = suppressWarnings(readr::read_tsv(
opt$Kronos_conf_file,
col_names = c('file', 'traks', 'settings'),
col_types = readr::cols()
))
return(tmp)
}
)
#reformat files
opt$file = settings$file
opt$settings_file = settings$settings
opt$tracks = settings$traks
if ('basename' %in% names(settings)) {
opt$base_name = settings$basename
}
if ('basename' %in% names(settings)) {
opt$groups = settings$groups
}
} else{
if (!'file' %in% names(opt)) {
stop("Per cell stat file or Kronos setting file must be provided. See script usage (--help)")
}
if (!'tracks' %in% names(opt)) {
stop(
"File containing cells CNV or Kronos setting file must be provided. See script usage (--help)"
)
}
if (!'settings_file' %in% names(opt)) {
stop("File containing settings sizes must be provided. See script usage (--help)")
}
if (!'groups' %in% names(opt)) {
opt$groups = opt$base_name
}
#reformat files
opt$file = stringr::str_split(opt$file, ',', simplify = T)
opt$settings_file = stringr::str_split(opt$settings_file, ',', simplify = T)
opt$tracks = stringr::str_split(opt$tracks, ',', simplify = T)
if (!is.null(opt$base_name)) {
opt$base_name = stringr::str_split(opt$base_name, ',', simplify = T)
#if provided base names are not enough
if (length(opt$base_name) != length(opt$file)) {
opt$base_name = NULL
opt$groups = NULL
}
}
if (!is.null(opt$groups)) {
opt$groups = stringr::str_split(opt$groups, ',', simplify = T)
#if provided groups are not enough
if (length(opt$groups) != length(opt$file)) {
opt$groups = NULL
opt$base_name = NULL
}
}
}
if (!'chrSizes' %in% names(opt)) {
stop("File containing Chromosomes sizes must be provided. See script usage (--help)")
}
#check per cell files
results = paste(lapply(opt$file, function(x)
Kronos.scRT::right_format(
file_path = x,
delim = ',',
columns_to_check = c(
'Cell',
'normalized_dimapd',
'mean_ploidy',
'ploidy_confidence',
'is_high_dimapd',
'is_noisy',
'coverage_per_1Mbp',
"basename",
"group"
),
wrong_message = paste(x, ',provided as a per cell file, does not have the right format. ')
)),
collapse = '')
if (results != '') {
stop(results)
}
#check tracks files
results = paste(lapply(opt$tracks, function(x)
Kronos.scRT::right_format(
file_path = x,
delim = '\t',
columns_to_check = c(
'Cell',
'chr',
'start',
'end',
'copy_number',
'reads',
"basename",
"group"
),
wrong_message = paste(x, ',provided as a track file, does not have the right format')
)),
collapse = '')
if (results != '') {
stop(results)
}
#check settings files
results = paste(lapply(opt$settings_file, function(x)
Kronos.scRT::right_format(
file_path = x,
delim = '\t',
columns_to_check = c(
'threshold_Sphase',
'threshold_G1G2phase',
'Sphase_first_part',
'Sphase_second_part',
'RPM_TH',
'Ploidy',
"basename",
"group"
),
wrong_message = paste(x, ',provided as a setting file, does not have the right format')
)),
collapse = '')
if (results != '') {
stop(results)
}
#check chr size file
results = paste(
Kronos.scRT::right_format(
file_path = opt$chrSizes,
delim = '\t',
columns_to_check = 2,
wrong_message = paste(
opt$chrSizes,
',provided as a chromosome size file, does not have the right format'
)
),
collapse = ''
)
if (results != '') {
stop(results)
}
# convert binsize to numeric
extract_unit = stringr::str_extract(opt$binSize, pattern = '.{2}$')
resolution = as.numeric(stringr::str_remove(opt$binSize, "[Bb][Pp]|[Kk][Bb]|[Mm][Bb]")) * dplyr::case_when(
grepl(x = extract_unit, pattern = '[Kk][Bb]') ~ 1000,
grepl(x = extract_unit, pattern = '[Mm][Bb]') ~ 1000000,
grepl(x = extract_unit, pattern = '[Bp][Pp]') ~ 1,
grepl(x = extract_unit, pattern = '[0-9][0-9]') ~ 1
)
if (any(is.na(resolution))) {
stop('binsize have an incorrect format')
}
# prepare name file
if (grepl(x = extract_unit, pattern = '[0-9][0-9]')) {
n_of_zeros = stringr::str_length(stringr::str_extract(opt$binSize, '0{1,10}$'))
opt$binSize = dplyr::case_when(
is.na(n_of_zeros) ~ paste0(opt$binSize, 'bp'),
n_of_zeros < 3 ~ paste0(opt$binSize, 'bp'),
n_of_zeros < 6 ~ paste0(stringr::str_remove(opt$binSize, '0{3}$'), 'Kb'),
n_of_zeros >= 6 ~ paste0(stringr::str_remove(opt$binSize, '0{6}$'), 'Mp')
)
} else{
opt$binSize = opt$binSize
}
#create directory
if (!dir.exists(opt$out)) {
dir.create(opt$out, recursive = T)
}
# check inputs
if (length(opt$tracks) != length(opt$file)) {
stop("The number stat files does not match provided trakcs. See script usage (--help)")
}
#select chrs
chr_list = paste0(
ifelse(opt$chr_prefix == 'none', '', opt$chr_prefix),
unlist(Kronos.scRT::String_to_Range(
stringr::str_split(opt$chr_range, ',', simplify = T)
))
)
#load genome sizes
Chr_Size = readr::read_tsv(
opt$chrSizes,
col_names = c('chr', 'size'),
col_types = readr::cols(chr = 'c')
) %>%
dplyr::filter(chr %in% chr_list) %>%
dplyr::mutate(chr = factor(x = chr, levels = chr_list)) %>%
tidyr::drop_na()
chr_list = chr_list[chr_list %in% unique(Chr_Size$chr)]
#load single cells info
data <- foreach::foreach(i = 1:length(opt$file),
.combine = 'rbind') %do% {
tmp = readr::read_csv(opt$file[i], col_types = readr::cols())
if (!is.null(opt$base_name)) {
tmp %>%
dplyr::mutate(
basename = factor(opt$base_name[i], levels = opt$base_name),
group = factor(opt$groups[i], levels = opt$groups)
)
} else{
tmp
}
}
#load settings
settings <- foreach::foreach(i = 1:length(opt$settings_file),
.combine = 'rbind') %do% {
tmp = readr::read_tsv(opt$settings_file[i], col_types = readr::cols())
if (!is.null(opt$base_name)) {
tmp %>%
dplyr::mutate(
basename = factor(opt$base_name[i], levels = opt$base_name),
group = factor(opt$groups[i], levels = opt$groups)
)
} else{
tmp
}
}
#load tracks
all_tracks <-
foreach::foreach(i = 1:length(opt$tracks),
.combine = 'rbind') %do% {
tmp = readr::read_delim(opt$tracks[i],
delim = '\t',
col_types = readr::cols(chr = 'c'))
if (!is.null(opt$base_name)) {
tmp %>%
dplyr::mutate(
basename = factor(opt$base_name[i], levels = opt$base_name),
group = factor(opt$groups[i], levels = opt$groups),
chr = factor(x = chr, levels = chr_list)
) %>%
tidyr::drop_na()
} else{
tmp %>%
dplyr::mutate(chr = factor(x = chr, levels = chr_list)) %>%
tidyr::drop_na()
}
}
#Adjust Percell
data = Kronos.scRT::AdjustPerCell(PerCell = data,
Settings = settings)
#adjust CNV
all_tracks = Kronos.scRT::AdjustCN(PerCell = data, scCN = all_tracks)
#create genome bins for the final output
#load chromosome file
bins = Kronos.scRT::GenomeBinning(
Chr_size = Chr_Size,
size = resolution,
Chr_filter = chr_list,
Cores = opt$cores
)
#save bins to resize References
saveRDS(
object = list(bins = bins, bs = opt$binSize),
file = paste0(file.path(opt$out,
opt$output_file_base_name),
'_bins.rds')
)
#rebin data
signal_smoothed = Kronos.scRT::Rebin(
PerCell = data,
scCN = all_tracks,
Bins = bins,
Sphase = T
)
G1G2_smoothed = Kronos.scRT::Rebin(
PerCell = data,
scCN = all_tracks,
Bins = bins,
Sphase = F
)
#free some memory
rm('all_tracks')
#calculate background
backgroud_smoothed = Kronos.scRT::BackGround(G1_scCN = G1G2_smoothed)
# binarizes data into Replicated or not replicated bins
signal_smoothed = Kronos.scRT::Replication_state(
Samples = signal_smoothed ,
background = backgroud_smoothed,
Chr_filter = chr_list,
cores = opt$cores
)
if (opt$extract_G1_G2_cells) {
# create single G1/G2 single cell file
G1G2_smoothed = Kronos.scRT::Replication_state(
Samples = G1G2_smoothed,
background = backgroud_smoothed,
Chr_filter = chr_list,
cores = opt$cores
)
#write results
G1G2_smoothed %>%
dplyr::select(
chr,
start,
end,
CN,
background,
CN_bg,
th,
Rep,
PercentageReplication,
Cell,
basename,
group,
newIndex
) %>%
readr::write_delim(
file = paste0(
file.path(opt$out,
opt$output_file_base_name),
'_G1_G2_single_cells_CNV_',
opt$binSize,
'.tsv'
),
delim = '\t',
col_names = T
)
}
# remove control track
rm('backgroud_smoothed')
rm('G1G2_smoothed')
#plot profile binning
plot = signal_smoothed %>%
dplyr::group_by(index, group) %>%
dplyr::summarise(Rep_percentage = mean(Rep)) %>%
ggplot2::ggplot(ggplot2::aes(Rep_percentage, color = group)) +
ggplot2::geom_density(ggplot2::aes(y = ..scaled..)) +
ggplot2::scale_x_continuous(labels = scales::percent) +
ggplot2::xlab('Percentage of the genome that has been replicated') +
ggplot2::ylab('density') + ggplot2::coord_cartesian(xlim = c(0, 1))
suppressMessages(ggplot2::ggsave(plot = plot,
filename = file.path(
opt$out,
paste0(
opt$output_file_base_name,
'_percentage_of_replicating_cells.pdf'
)
)))
#Filter and save plots
signal_smoothed = Kronos.scRT::FilterCells(
scCN = signal_smoothed,
min_cor = opt$min_correlation,
save_plot_basename = file.path(opt$out, paste0(opt$output_file_base_name,
'_S_phase'))
)
#plot dist after filtering
rep_percentage = signal_smoothed %>%
dplyr::group_by(Cell, basename, group, newIndex) %>%
dplyr::summarise(Rep_percentage = mean(Rep))
plot = rep_percentage %>%
ggplot2::ggplot(ggplot2::aes(Rep_percentage, color = group)) +
ggplot2::geom_density(ggplot2::aes(y = ..scaled..)) +
ggplot2::scale_x_continuous(labels = scales::percent) +
ggplot2::xlab('Percentage of the genome that has been replicated') +
ggplot2::ylab('density') + ggplot2::coord_cartesian(xlim = c(0, 1))
suppressMessages(ggplot2::ggsave(
plot = plot,
filename = paste0(
file.path(opt$out,
opt$output_file_base_name),
'_percentage_of_replicating_cells_after_filtering.pdf'
)
))
#new index
readr::write_delim(
x = signal_smoothed,
file = paste0(
file.path(opt$out,
opt$output_file_base_name),
'_single_cells_CNV_',
opt$binSize,
'.tsv'
),
delim = '\t',
col_names = T
)
#calculate pseudo bulk RT
scRT = Kronos.scRT::pseudoBulkRT(S_scCN = signal_smoothed)
#write scRT
readr::write_delim(
x = scRT,
file = paste0(
file.path(opt$out,
opt$output_file_base_name),
'_calculated_replication_timing_',
opt$binSize,
'.tsv'
),
delim = '\t',
col_names = T
)
# Variability S-phase
Var = Kronos.scRT::Variability(S_scCN = signal_smoothed, scRT = scRT)
Var %>%
readr::write_delim(
file = paste0(
file.path(opt$out,
opt$output_file_base_name),
'_variability.tsv'
),
delim = '\t',
col_names = T
)
#free some space
rm('Var')
#select used data and save the new per cell files
data =
data %>%
dplyr::filter(Type == 'G1/G2-phase cells' |
Cell %in% unique(signal_smoothed$Cell))
if (!dir.exists(file.path(opt$out, 'Cells_used_in_the_analysis_info'))) {
dir.create(file.path(opt$out, 'Cells_used_in_the_analysis_info'))
}
bs = data %>%
dplyr::select(basename, group) %>%
unique()
bs = foreach::foreach(i = 1:nrow(bs)) %do% {
data %>%
dplyr::filter(basename == bs$basename[i] &
group == bs$group[i]) %>%
dplyr::select(
Cell,
normalized_dimapd,
mean_ploidy,
ploidy_confidence,
is_high_dimapd,
is_noisy,
coverage_per_1Mbp,
basename,
group
) %>%
readr::write_csv(paste0(
file.path(opt$out,
'Cells_used_in_the_analysis_info',
bs$basename[i]),
'_',
bs$group[i],
'_per_Cell_summary_metrics.csv'
))
i
}
print('done')
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.