Nothing
## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
dev="png",
message=TRUE, error=FALSE, warning=TRUE)
## ----vignettes, eval=FALSE----------------------------------------------------
# browseVignettes("BANDITS")
## ----citation-----------------------------------------------------------------
citation("BANDITS")
## ----Bioconductor_installation, eval=FALSE------------------------------------
# if (!requireNamespace("BiocManager", quietly=TRUE))
# install.packages("BiocManager")
# BiocManager::install("BANDITS")
## ----github_installation, eval=FALSE------------------------------------------
# devtools::install_github("SimoneTiberi/BANDITS")
## ----github_installation_2, eval=FALSE----------------------------------------
# devtools::install_github("SimoneTiberi/BANDITS",
# build_opts = c("--no-resave-data", "--no-manual"))
## ----gene-transcript_from_gtf, eval=FALSE-------------------------------------
# suppressMessages(library(GenomicFeatures))
# gtf_file = system.file("extdata","GTF_files","Aedes_aegypti.partial.gtf",
# package="GenomicFeatures")
# tx = makeTxDbFromGFF(gtf_file)
# ss = unlist(transcriptsBy(tx, by="gene"))
# gene_tr_id_gtf = data.frame(gene_id = names(ss), transcript_id = ss$tx_name )
# # remove eventual NA's:
# gene_tr_id_gtf = gene_tr_id_gtf[ rowSums( is.na(gene_tr_id_gtf)) == 0, ]
# # remove eventual duplicated rows:
# gene_tr_id_gtf = unique(gene_tr_id_gtf)
## ----gene-transcript_from_fasta, eval=FALSE-----------------------------------
# suppressMessages(library(Biostrings))
# data_dir = system.file("extdata", package = "BANDITS")
# fasta = readDNAStringSet(file.path(data_dir, "Homo_sapiens.GRCh38.cdna.all.1.1.10M.fa.gz"))
# ss = strsplit(names(fasta), " ")
# gene_tr_id_fasta = data.frame(gene_id = gsub("gene:", "", sapply(ss, .subset, 4)),
# transcript_id = sapply(ss, .subset, 1))
# # remove eventual NA's
# gene_tr_id_fasta = gene_tr_id_fasta[ rowSums( is.na(gene_tr_id_fasta)) == 0, ]
# # remove eventual duplicated rows:
# gene_tr_id_fasta = unique(gene_tr_id_fasta)
## ----load_BANDITS, message=FALSE----------------------------------------------
library(BANDITS)
## ----specify_data-dir---------------------------------------------------------
data_dir = system.file("extdata", package = "BANDITS")
## ----load_gene-transcript-----------------------------------------------------
data("gene_tr_id", package = "BANDITS")
head(gene_tr_id)
## ----specify_quantification_path----------------------------------------------
sample_names = paste0("sample", seq_len(4))
quant_files = file.path(data_dir, "STAR-salmon", sample_names, "quant.sf")
file.exists(quant_files)
## ----load_counts--------------------------------------------------------------
library(tximport)
txi = tximport(files = quant_files, type = "salmon", txOut = TRUE)
counts = txi$counts
head(counts)
## ----specify_design-----------------------------------------------------------
samples_design = data.frame(sample_id = sample_names,
group = c("A", "A", "B", "B"))
samples_design
## ----groups-------------------------------------------------------------------
levels(samples_design$group)
## ----effective_transcript_length----------------------------------------------
eff_len = eff_len_compute(x_eff_len = txi$length)
head(eff_len)
## ----filter_lowly_abundant_transcripts----------------------------------------
transcripts_to_keep = filter_transcripts(gene_to_transcript = gene_tr_id,
transcript_counts = counts,
min_transcript_proportion = 0.01,
min_transcript_counts = 10,
min_gene_counts = 20)
head(transcripts_to_keep)
## ----specify_salmon_EC_path---------------------------------------------------
equiv_classes_files = file.path(data_dir, "STAR-salmon", sample_names,
"aux_info", "eq_classes.txt")
file.exists(equiv_classes_files)
## ----check_same_order_salmon--------------------------------------------------
equiv_classes_files
samples_design$sample_id
## ----create_data_salmon-------------------------------------------------------
input_data = create_data(salmon_or_kallisto = "salmon",
gene_to_transcript = gene_tr_id,
salmon_path_to_eq_classes = equiv_classes_files,
eff_len = eff_len,
n_cores = 2,
transcripts_to_keep = transcripts_to_keep)
## ----filter_genes_salmon------------------------------------------------------
input_data = filter_genes(input_data, min_counts_per_gene = 20)
## ----specify_kallisto_EC_path-------------------------------------------------
kallisto_equiv_classes = file.path(data_dir, "kallisto", sample_names, "pseudoalignments.ec")
kallisto_equiv_counts = file.path(data_dir, "kallisto", sample_names, "pseudoalignments.tsv")
file.exists(kallisto_equiv_classes); file.exists(kallisto_equiv_counts)
## ----check_same_order_kallisto------------------------------------------------
kallisto_equiv_classes; kallisto_equiv_counts
samples_design$sample_id
## ----create_data_kallisto-----------------------------------------------------
input_data_2 = create_data(salmon_or_kallisto = "kallisto",
gene_to_transcript = gene_tr_id,
kallisto_equiv_classes = kallisto_equiv_classes,
kallisto_equiv_counts = kallisto_equiv_counts,
kallisto_counts = counts,
eff_len = eff_len, n_cores = 2,
transcripts_to_keep = transcripts_to_keep)
input_data_2
## ----filter_genes_kallisto----------------------------------------------------
input_data_2 = filter_genes(input_data_2, min_counts_per_gene = 20)
## ----prior_precision----------------------------------------------------------
set.seed(61217)
precision = prior_precision(gene_to_transcript = gene_tr_id,
transcript_counts = counts, n_cores = 2,
transcripts_to_keep = transcripts_to_keep)
## ----prior--------------------------------------------------------------------
precision$prior
## ----plot_precision-----------------------------------------------------------
plot_precision(precision)
## ----test_DTU-----------------------------------------------------------------
set.seed(61217)
results = test_DTU(BANDITS_data = input_data,
precision = precision$prior,
samples_design = samples_design,
group_col_name = "group",
R = 10^4, burn_in = 2*10^3, n_cores = 2,
gene_to_transcript = gene_tr_id)
## ----visualize_results--------------------------------------------------------
results
## ----top_genes----------------------------------------------------------------
head(top_genes(results))
## ----top_genes_by_DTU_measure-------------------------------------------------
head(top_genes(results, sort_by = "DTU_measure"))
## ----top_transcripts----------------------------------------------------------
head(top_transcripts(results, sort_by = "transcript"))
## ----convergence--------------------------------------------------------------
head(convergence(results))
## ----top_gene-----------------------------------------------------------------
top_gene = top_genes(results, n = 1)
gene(results, top_gene$Gene_id)
## ----top_transcript-----------------------------------------------------------
top_transcript = top_transcripts(results, n = 1)
transcript(results, top_transcript$Transcript_id)
## ----plot_proportions---------------------------------------------------------
plot_proportions(results, top_gene$Gene_id, CI = TRUE, CI_level = 0.95)
## ----specify_design_3_groups--------------------------------------------------
samples_design_3_groups = data.frame(sample_id = sample_names,
group = c("A", "B", "B", "C"))
samples_design_3_groups
levels(samples_design_3_groups$group)
## ----test_DTU_3_groups--------------------------------------------------------
set.seed(61217)
results_3_groups = test_DTU(BANDITS_data = input_data,
precision = precision$prior,
samples_design = samples_design_3_groups,
group_col_name = "group",
R = 10^4, burn_in = 2*10^3, n_cores = 2,
gene_to_transcript = gene_tr_id)
results_3_groups
## ----top_genes_3_groups-------------------------------------------------------
head(top_genes(results_3_groups))
head(top_transcripts(results_3_groups))
## ----top_gene_3_groups--------------------------------------------------------
gene(results_3_groups, top_genes(results_3_groups)$Gene_id[1])
transcript(results_3_groups, top_transcripts(results_3_groups)$Transcript_id[1])
## ----plot_proportions_3_groups------------------------------------------------
plot_proportions(results_3_groups, top_genes(results_3_groups)$Gene_id[1], CI = TRUE, CI_level = 0.95)
## ----specify_design_1_group---------------------------------------------------
samples_design_1_group = data.frame(sample_id = sample_names,
group = c("A", "A", "A", "A"))
samples_design_1_group
levels(samples_design_1_group$group)
## ----test_DTU_1_group---------------------------------------------------------
set.seed(61217)
results_1_group = test_DTU(BANDITS_data = input_data,
precision = precision$prior,
samples_design = samples_design_1_group,
group_col_name = "group",
R = 10^4, burn_in = 2*10^3, n_cores = 2,
gene_to_transcript = gene_tr_id)
results_1_group
## ----top_genes_1_group--------------------------------------------------------
head(top_genes(results_1_group))
head(top_transcripts(results_1_group))
## ----top_gene_1_group---------------------------------------------------------
gene(results_1_group, top_genes(results_1_group)$Gene_id[1])
transcript(results_1_group, top_transcripts(results_1_group)$Transcript_id[1])
## ----plot_proportions_1_group-------------------------------------------------
plot_proportions(results_1_group, top_genes(results)$Gene_id[1], CI = TRUE, CI_level = 0.95)
## ----sessionInfo--------------------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.