# brentlabRnaSeqSet methods ----------------------------------------------------
## coercetoDds
#' coerce brentlabRnaSeqSet to DESeqDataSet
#'
#' @rdname coerceToDds
#'
#' @description coerce brentlabRnaSeqSet to DESeqDataSet
#'
#' @importFrom DESeq2 DESeqDataSetFromMatrix sizeFactors
#'
#' @param x a brentlabRnaSeqSet
#'
#' @export
setMethod("coerceToDds", "brentlabRnaSeqSet", function(x) {
size_factors = sizeFactors(x)
design = design(x)
colData = extractColData(x)
counts = counts(x)
dds = DESeqDataSetFromMatrix(
colData = colData,
countData = counts,
design = design
)
if(!is.null(size_factors)){
sizeFactors(dds) = size_factors
}
dds
})
## createExperimentSet ----
#'
#' Create a pre-defined experiment set
#'
#' @description for pre-defined experiment definitions, see the github repo
#' R/ExperimentSetFunctions.R
#'
#' @rdname createExperimentSet
#'
#' @param x a brentlabRnaSeqSet object
#' @inheritParams brentlabRnaSeqExperiment
#'
#' @export
setMethod("createExperimentSet", "brentlabRnaSeqSet", function(x, set_name) {
subset_blrs = -1
if(set_name == "ninetyMin_2016Grant"){
subset_blrs = createNinetyMinuteInduction_2016grant(x)
}
else if(set_name == "ninetyMin_2016GrantWithDoubles"){
subset_blrs = createNinetyMinuteInduction_2016grantWithDoubles(x)
}
else if(set_name == "ninetyMin_non2016grant"){
subset_blrs = createNinetyMinuteInduction_non2016grant(x)
}
else if(set_name == "ninetyMin_all"){
subset_blrs = createNinetyMinuteInduction_all(x)
}
else if(set_name == "envPert_epWT"){
subset_blrs = createEnvPert_epWT(x)
}
else if(set_name == "envPert_perturbed"){
subset_blrs = createEnvPertSet_perturbed(x)
}
else if(set_name == "envPert_titrationWT"){
subset_blrs = createEnvPertSet_titrationWT(x)
}
if(is.numeric(subset_blrs)){
stop(paste0("set_name '", set_name,
"' is not recognized. see ?createExperimentSet for
acceptable set_names"))
}
brentlabRnaSeqExperiment(subset_blrs, set_name)
})
## qaFilter(brentlabRnaSeqSet) ----
#'
#' Quality filter a brentlabRnaSeqSet object
#'
#' Filter for manual passes (overrides auto fail),
#' automatic passes (unless auto failed), and optionally RLE IQR
#'
#' @note on the brentlabRnaSeqSet (as opposed to the brentlabRnaSeqExperiment),
#' this only filters on manualAudit and autoAudit. no rle filtering
#'
#' @rdname qaFilter
#'
#' @return a quality assessment filtered brentlabRnaSeqSet object
#'
#' @export
setMethod("qaFilter",
"brentlabRnaSeqSet",
function(x) {
passing_fastqFileNumbers = as_tibble(colData(x)) %>%
filter(manualAudit == FALSE |
(is.na(manualAudit) & autoAudit == FALSE)) %>%
pull(fastqFileNumber)
# create a sample quality filtered brentlabRnaSeqSet
blrs_qc_fltr = x[,colData(x)$fastqFileNumber %in% passing_fastqFileNumbers]
blrs_qc_fltr
})
## splitProtocolGroups ----
#' Split a brentlabRnaSeqSet into two by protocol
#'
#' @description split an object into 'SolexaPrep' and 'E7420L'
#'
#' @rdname splitProtocolGroups
#'
#' @param x a brentlabRnaSeqSet
#'
#' @return a list 'SolexaPrep' and 'E7420L' group. We call 'SolexaPrep' "old"
#'
#' @export
setMethod("splitProtocolGroups", "brentlabRnaSeqSet", function(x) {
solexaprep = x[,colData(x)$libraryProtocol == 'SolexaPrep']
colData(solexaprep)$libraryProtocol =
droplevels(as.factor(colData(solexaprep)$libraryProtocol))
# helper function to perform split and clean up relevant factored columns
# TODO: think about which columns to factor in R/Database.R, and which others
# to clean here
cleanSplit = function(obj, protocol){
split_obj = obj[,colData(obj)$libraryProtocol == as.character(protocol)]
colData(split_obj)$libraryProtocol =
droplevels(as.factor(colData(split_obj)$libraryProtocol))
colData(split_obj)$libraryDate =
droplevels(colData(split_obj)$libraryDate)
split_obj
}
list(
'SolexaPrep' = cleanSplit(x, 'SolexaPrep'),
'E7420L' = cleanSplit(x, 'E7420L')
)
})
## extractColData ----
#' extract colData as tibble
#'
#' @importFrom tibble as_tibble
#'
#' @rdname extractColData
#'
#' @param x a brentlabRnaSeqSet
#'
#' @return colData of the object as a tibble
#'
#' @export
setMethod("extractColData", "brentlabRnaSeqSet", function(x) {
as_tibble(colData(x))
})
#' extract the model.matrix using the design and colData
#'
#' @rdname extractDesignMatrix
#'
#' @importFrom rlang is_formula
#'
#' @param x a brentlabRnaSeqSet
#'
#' @return the model matrix, using the design and coldata
#'
#' @export
setMethod("extractDesignMatrix", "brentlabRnaSeqSet", function(x) {
if(is_formula(design(x))){
message(paste0("Model Matrix for design: ", as.character(design(x))))
model.matrix(design(x), extractColData(x))
} else if(is.matrix(design(x))){
design(x)
} else{
stop("data type of the object is not recognized")
}
})
# brentlabRnaSeqExperiment methods ---------------------------------------------
## qaFilter(brentlabRnaSeqExperiment) ----
#'
#' Quality filter a brentlabRnaSeqExperiment object
#'
#' Filter for manual passes (overrides auto fail),
#' automatic passes (unless auto failed), and optionally RLE IQR
#'
#' @note If there are less than 3 replicates, the RLE stats will be NA. These
#' are returned, also. Filter them out of the returned object if you wish to
#' have only those samples with more than 3 replicates.
#'
#' @rdname qaFilter
#'
#' @importFrom dplyr filter as_tibble
#'
#' @param x a brentlabRnaSeqSet object
#' @param rle_iqr_threshold default is NA, which means no rle_iqr_threshold filtering.
#' Set a value to filter based on RLE IQR. Note: RLE stats are all calculated
#' after removing the libraryDate effect.
#' @param iqr_colname default is NA, which creates the iqr col name from the
#' set name in the object. Pass the col name itself, eg "my_set_iqr", to
#' specify a iqr column name manually
#'
#' @return a quality assessment filtered brentlabRnaSeqExperiment object
#'
#' @export
setMethod("qaFilter",
"brentlabRnaSeqExperiment",
function(x, rle_iqr_threshold = NA, iqr_colname = NA) {
# note: I tried the filter commented out below first, but had trouble with
# handling NAs. dplyr filter works. Used the same method for rle
# filtering below. This should be fixed for clarity/speed/reducing
# dependencies eventually
# # create a filter on manualAudit and autoAudit
# qc1_fltr = (colData(x)$manualAudit == FALSE |
# is.na(colData(x)$manualAudit)) &
# colData(x)$autoAudit == FALSE
passing_fastqFileNumbers = as_tibble(colData(x)) %>%
filter(manualAudit == FALSE |
(is.na(manualAudit) & autoAudit == FALSE)) %>%
pull(fastqFileNumber)
# create a sample quality filtered brentlabRnaSeqSet
blrs_qc_fltr = x[,colData(x)$fastqFileNumber %in% passing_fastqFileNumbers]
# if rle_iqr_threshold is set, filter based on IQR
if(!is.na(rle_iqr_threshold) & is.numeric(rle_iqr_threshold)){
iqr_colname =
if(is.na(iqr_colname)){
iqr_colname = paste0(x@set_name, "_iqr")
} else{
iqr_colname
}
if(!iqr_colname %in% colnames(colData(blrs_qc_fltr))){
stop("iqr column, '", iqr_colname, "', is not in the metadata. This
is a problem in the definition of the ExperimentSet object --
please issue a report on github. Paste this error, and the name
of the experiment set that you are using.")
}
iqr_passing_fastqFileNumbers = as_tibble(colData(blrs_qc_fltr)) %>%
filter(!!rlang::sym(iqr_colname) < rle_iqr_threshold |
is.na(!!rlang::sym(iqr_colname))) %>%
pull(fastqFileNumber)
blrs_qc_fltr = blrs_qc_fltr[,colData(blrs_qc_fltr)$fastqFileNumber %in%
iqr_passing_fastqFileNumbers]
}
blrs_qc_fltr
})
## estimateSizeFactorsByProtocol ----
#'
#' Calculate Size Factors within protocol groups
#'
#' Size Factors will be calculated for the set libraryProtocol 'SolexaPrep'
#' and the complement separately
#'
#' @rdname estimateSizeFactorsByProtocol
#'
#' @importFrom DESeq2 estimateSizeFactors
#' @importFrom SummarizedExperiment cbind
#'
#' @param x a brentlabRnaSeqSet object
#'
#' @return a brentlabRnaSeqExperiment with sizeFactors estimated from within
#' protocol groups (SolexaPrep and not SolexaPrep)
#'
#' @export
setMethod("estimateSizeFactorsByProtocol",
"brentlabRnaSeqExperiment",
function(x) {
old_dds = estimateSizeFactors(x[, colData(x)$libraryProtocol == 'SolexaPrep'])
new_dds = estimateSizeFactors(x[, colData(x)$libraryProtocol != 'SolexaPrep'])
SummarizedExperiment::cbind(old_dds, new_dds)
})
## replicateByProtocolTally ----
#'
#' experimental replicates by libraryProtocol tally
#'
#' @description tally experimental replicate columns against the library prep
#' protocol
#'
#' @rdname replicateByProtocolTally
#'
#' @importFrom tibble as_tibble
#'
#' @param x a brentlabRnaSeqExperiment
#'
#' @export
setMethod("replicateByProtocolTally",
"brentlabRnaSeqExperiment",
function(x) {
if(x@set_name == "ninetyMin_2016Grant"){
# see R/ExperimentSetFunctions.R
replicateByProtocolTally_90min(as_tibble(colData(x)))
} else{
message(paste0('There is no replicateByProtocolTally for set: ', x@set_name))
}
})
## test_train_partition ----
#' create test train set
#'
#' @description For brentlabRnaSeqExperiment objects, this offers methods to
#' create train test sets
#'
#' @rdname test_train_partition
#'
#' @param x a brentlabRnaSeqExperiment object
#' @param min_set_size the minimum replicate set size to consider for hold outs
#'
#' @return a list with slots train and test, each with brentlabRnaSeqSetExperiments
#'
#' @export
setMethod("test_train_partition",
"brentlabRnaSeqExperiment",
function(x, min_set_size) {
if(x@set_name == 'ninetyMin_2016Grant'){
genotype_list = as_tibble(colData(x)) %>%
group_by(genotype1) %>%
tally() %>%
filter(genotype1 != "CKF44_00000" & n > min_set_size) %>%
pull(genotype1)
test_ffn = as_tibble(colData(x)) %>%
filter(genotype1 %in% c(genotype_list)) %>%
group_by(genotype1) %>%
sample_n(1) %>%
pull(fastqFileNumber)
test = x[,colData(x)$fastqFileNumber %in% test_ffn]
colData(test) = colData(test,droplevels(as_tibble(colData(test))))
# note -- this will include all wildtypes, also
train = x[,!colData(x)$fastqFileNumber %in% test_ffn]
colData(train) = colData(train,droplevels(as_tibble(colData(train))))
} else{
stop(paste0("There is no defined method for set: ", x@set_name, ". If you need one,
post an issues report along with a completed explanation of how you
would like it done."))
}
list(
train = train,
test = test
)
})
#' @rdname VariantExplorer
#' @param x a VariantExplorer object
#' @inheritParams brentlabRnaSeqExperiment
setMethod("igv_script", "VariantExplorer", function(x,...){
# check igv_genome attribute
if (identical(x@igv_genome, character(0))){
stop("The igv_genome slot must exist")
} else if(!file.exists(x@igv_genome)){
stop("The path to the igv genome is not valid")
}
# create the locus granges
granges = x@gff[x@gff$gene == locus & x@gff$type == "gene"]
if (length(granges) == 0){
stop(sprintf("Gene: %s is not recognized -- no ranges present in the gff",
locus))
}
# create output directories
script_output_dir = file.path(output_dir, "igv", "igv_scripts")
dir.create(script_output_dir, recursive = TRUE)
browser_output_dir = file.path(output_dir, "igv", "browser_shots")
dir.create(browser_output_dir, recursive=TRUE)
# create output paths
if (image_basename == ""){
script_output_path = file.path(script_output_dir,paste0(locus,".txt"))
browser_output_filepath = file.path(browser_output_dir,paste0(locus,".png"))
} else{
script_output_path = file.path(script_output_dir,paste0(image_basename,".txt"))
browser_output_filepath = file.path(browser_output_dir,paste0(image_basename,".png"))
}
# get bam list
sample_id_list = x@variants %>%
filter(gene_id == locus) %>%
pull(sample) %>%
unique()
bam_list = x@metadata %>%
filter(sample_id %in% sample_id_list) %>%
pull(bam) %>%
unique()
# write igv batchscript lines
load_samples = paste0(map(bam_list, ~sprintf("\tload %s\n", .)), collapse=" ")
parsed_range = paste(as.character(granges@seqnames),
as.character(granges@ranges), sep=":")
batch_script = paste0("new\nsnapshotDirectory %s\nmaxPanelHeight %s\ngenome %s\n",
load_samples, "goto %s", collapse = " ")
batch_script = sprintf(batch_script,
path.expand(output_dir),
maxPanelHeight,
path.expand(x@igv_genome),
parsed_range)
if(exit_browser){
batch_script = paste0(batch_script, "\nsnapshot %s\nexit", collapse = " ")
batch_script = sprintf(batch_script, browser_output_filepath)
}
cat(batch_script, file = script_output_path)
})
#' @export
#' @rdname VariantExplorer
#' @param object a VariantExplorer instance
setMethod('summary', signature(object = 'VariantExplorer'), function(object){
split_variants = object@variants %>%
ungroup() %>%
group_by(effect,impact) %>%
tally() %>%
dplyr::rename(num_genes = n) %>%
ungroup() %>%
group_by(impact) %>%
arrange(impact, desc(num_genes), .by_group = TRUE) %>%
droplevels() %>%
group_split()
names(split_variants) = unlist(map(split_variants, ~unique(pull(.,impact))))
pretty_print = function(impact_table, impact_level){
message(sprintf("Impact level: %s", impact_level))
dplyr::select(impact_table,-impact) %>% print()
}
for(i in names(split_variants)){
pretty_print(split_variants[[i]], i)
}
})
#' Visualize coverage at a locus
#' @rdname VariantExplorer
#' @param x a VariantExplorer object
#' @param gene the name of the gene you wish to visualize
#' @param sample the sample_id you wish to see
#' @param plot_type either 'pileup' or 'coverage'. Defaults to 'pileup'
#'
#' @importFrom withr local_options
#' @importFrom Gviz AlignmentsTrack AnnotationTrack GenomeAxisTrack SequenceTrack plotTracks
#' @importFrom dplyr filter pull
#'
#' @export
setMethod("visualize", "VariantExplorer",
function(x, gene, sample, plot_type = 'pileup'){
# set options locally -- this will revert to outer when function completes
withr::local_options(list(ucscChromosomeNames = FALSE))
gene_granges = x@gff[x@gff$gene == gene]
bamfile = filter(ve@metadata, sample_id == sample) %>%
pull(bam)
track_list = list(
axis_track = GenomeAxisTrack(),
annotation = AnnotationTrack(gene_granges, group = gene_granges$ID),
alignment = AlignmentsTrack(bamfile,
isPaired = TRUE,
showMismatches = TRUE,
showIndels = TRUE,
type = plot_type),
sequence = SequenceTrack(ve@bsgenome,chromosome = unique(gene_granges$seqnames))
)
plotTracks(track_list,
chromosome = unique(gene_granges$seqnames),
start = gene_granges[gene_granges$type == 'gene']$start,
end = gene_granges[gene_granges$type == 'gene']$end,
add53 = TRUE,
showId = TRUE,
labelPos = 'below')
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.