inst/doc/BANDITS.R

## ----setup, echo=FALSE, results="hide"----------------------------------------
knitr::opts_chunk$set(tidy=FALSE, cache=TRUE,
dev="png",
message=TRUE, error=FALSE, warning=TRUE)

## ----vignettes, eval=FALSE----------------------------------------------------
#  browseVignettes("BANDITS")

## ----citation-----------------------------------------------------------------
citation("BANDITS")

## ----Bioconductor_installation, eval=FALSE------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#    install.packages("BiocManager")
#  BiocManager::install("BANDITS")

## ----github_installation, eval=FALSE------------------------------------------
#  devtools::install_github("SimoneTiberi/BANDITS")

## ----github_installation_2, eval=FALSE----------------------------------------
#  devtools::install_github("SimoneTiberi/BANDITS",
#                           build_opts = c("--no-resave-data", "--no-manual"))

## ----gene-transcript_from_gtf, eval=FALSE-------------------------------------
#  suppressMessages(library(GenomicFeatures))
#  gtf_file = system.file("extdata","GTF_files","Aedes_aegypti.partial.gtf",
#                         package="GenomicFeatures")
#  tx = makeTxDbFromGFF(gtf_file)
#  ss = unlist(transcriptsBy(tx, by="gene"))
#  gene_tr_id_gtf = data.frame(gene_id = names(ss), transcript_id = ss$tx_name )
#  # remove eventual NA's:
#  gene_tr_id_gtf = gene_tr_id_gtf[ rowSums( is.na(gene_tr_id_gtf)) == 0, ]
#  # remove eventual duplicated rows:
#  gene_tr_id_gtf = unique(gene_tr_id_gtf)

## ----gene-transcript_from_fasta, eval=FALSE-----------------------------------
#  suppressMessages(library(Biostrings))
#  data_dir = system.file("extdata", package = "BANDITS")
#  fasta = readDNAStringSet(file.path(data_dir, "Homo_sapiens.GRCh38.cdna.all.1.1.10M.fa.gz"))
#  ss = strsplit(names(fasta), " ")
#  gene_tr_id_fasta = data.frame(gene_id = gsub("gene:", "", sapply(ss, .subset, 4)),
#                                transcript_id = sapply(ss, .subset, 1))
#  # remove eventual NA's
#  gene_tr_id_fasta = gene_tr_id_fasta[ rowSums( is.na(gene_tr_id_fasta)) == 0, ]
#  # remove eventual duplicated rows:
#  gene_tr_id_fasta = unique(gene_tr_id_fasta)

## ----load_BANDITS, message=FALSE----------------------------------------------
library(BANDITS)

## ----specify_data-dir---------------------------------------------------------
data_dir = system.file("extdata", package = "BANDITS")

## ----load_gene-transcript-----------------------------------------------------
data("gene_tr_id", package = "BANDITS")
head(gene_tr_id)

## ----specify_quantification_path----------------------------------------------
sample_names = paste0("sample", seq_len(4))
quant_files = file.path(data_dir, "STAR-salmon", sample_names, "quant.sf")
file.exists(quant_files)

## ----load_counts--------------------------------------------------------------
library(tximport)
txi = tximport(files = quant_files, type = "salmon", txOut = TRUE)
counts = txi$counts
head(counts)

## ----specify_design-----------------------------------------------------------
samples_design = data.frame(sample_id = sample_names,
                            group = c("A", "A", "B", "B"))
samples_design

## ----groups-------------------------------------------------------------------
levels(samples_design$group)

## ----effective_transcript_length----------------------------------------------
eff_len = eff_len_compute(x_eff_len = txi$length)
head(eff_len)

## ----filter_lowly_abundant_transcripts----------------------------------------
transcripts_to_keep = filter_transcripts(gene_to_transcript = gene_tr_id,
                                         transcript_counts = counts, 
                                         min_transcript_proportion = 0.01,
                                         min_transcript_counts = 10, 
                                         min_gene_counts = 20)
head(transcripts_to_keep)

## ----specify_salmon_EC_path---------------------------------------------------
equiv_classes_files = file.path(data_dir, "STAR-salmon", sample_names, 
                                "aux_info", "eq_classes.txt")
file.exists(equiv_classes_files)

## ----check_same_order_salmon--------------------------------------------------
equiv_classes_files
samples_design$sample_id

## ----create_data_salmon-------------------------------------------------------
input_data = create_data(salmon_or_kallisto = "salmon",
                         gene_to_transcript = gene_tr_id,
                         salmon_path_to_eq_classes = equiv_classes_files,
                         eff_len = eff_len, 
                         n_cores = 2,
                         transcripts_to_keep = transcripts_to_keep)

## ----filter_genes_salmon------------------------------------------------------
input_data = filter_genes(input_data, min_counts_per_gene = 20)

## ----specify_kallisto_EC_path-------------------------------------------------
kallisto_equiv_classes = file.path(data_dir, "kallisto", sample_names, "pseudoalignments.ec")
kallisto_equiv_counts  = file.path(data_dir, "kallisto", sample_names, "pseudoalignments.tsv")
file.exists(kallisto_equiv_classes); file.exists(kallisto_equiv_counts)

## ----check_same_order_kallisto------------------------------------------------
kallisto_equiv_classes; kallisto_equiv_counts
samples_design$sample_id

## ----create_data_kallisto-----------------------------------------------------
input_data_2 = create_data(salmon_or_kallisto = "kallisto",
                           gene_to_transcript = gene_tr_id,
                           kallisto_equiv_classes = kallisto_equiv_classes,
                           kallisto_equiv_counts = kallisto_equiv_counts,
                           kallisto_counts = counts,
                           eff_len = eff_len, n_cores = 2,
                           transcripts_to_keep = transcripts_to_keep)
input_data_2

## ----filter_genes_kallisto----------------------------------------------------
input_data_2 = filter_genes(input_data_2, min_counts_per_gene = 20)

## ----prior_precision----------------------------------------------------------
set.seed(61217)
precision = prior_precision(gene_to_transcript = gene_tr_id,
                            transcript_counts = counts, n_cores = 2,
                            transcripts_to_keep = transcripts_to_keep)

## ----prior--------------------------------------------------------------------
precision$prior

## ----plot_precision-----------------------------------------------------------
plot_precision(precision)

## ----test_DTU-----------------------------------------------------------------
set.seed(61217)
results = test_DTU(BANDITS_data = input_data,
                   precision = precision$prior,
                   samples_design = samples_design,
                   group_col_name = "group",
                   R = 10^4, burn_in = 2*10^3, n_cores = 2,
                   gene_to_transcript = gene_tr_id)

## ----visualize_results--------------------------------------------------------
results

## ----top_genes----------------------------------------------------------------
head(top_genes(results))

## ----top_genes_by_DTU_measure-------------------------------------------------
head(top_genes(results, sort_by = "DTU_measure"))

## ----top_transcripts----------------------------------------------------------
head(top_transcripts(results, sort_by = "transcript"))

## ----convergence--------------------------------------------------------------
head(convergence(results))

## ----top_gene-----------------------------------------------------------------
top_gene = top_genes(results, n = 1)
gene(results, top_gene$Gene_id)

## ----top_transcript-----------------------------------------------------------
top_transcript = top_transcripts(results, n = 1)
transcript(results, top_transcript$Transcript_id)

## ----plot_proportions---------------------------------------------------------
plot_proportions(results, top_gene$Gene_id, CI = TRUE, CI_level = 0.95)

## ----specify_design_3_groups--------------------------------------------------
samples_design_3_groups = data.frame(sample_id = sample_names,
                            group = c("A", "B", "B", "C"))
samples_design_3_groups
levels(samples_design_3_groups$group)

## ----test_DTU_3_groups--------------------------------------------------------
set.seed(61217)
results_3_groups = test_DTU(BANDITS_data = input_data,
                   precision = precision$prior,
                   samples_design = samples_design_3_groups,
                   group_col_name = "group",
                   R = 10^4, burn_in = 2*10^3, n_cores = 2,
                   gene_to_transcript = gene_tr_id)
results_3_groups

## ----top_genes_3_groups-------------------------------------------------------
head(top_genes(results_3_groups))

head(top_transcripts(results_3_groups))

## ----top_gene_3_groups--------------------------------------------------------
gene(results_3_groups, top_genes(results_3_groups)$Gene_id[1])

transcript(results_3_groups, top_transcripts(results_3_groups)$Transcript_id[1])

## ----plot_proportions_3_groups------------------------------------------------
plot_proportions(results_3_groups, top_genes(results_3_groups)$Gene_id[1], CI = TRUE, CI_level = 0.95)

## ----specify_design_1_group---------------------------------------------------
samples_design_1_group = data.frame(sample_id = sample_names,
                            group = c("A", "A", "A", "A"))
samples_design_1_group
levels(samples_design_1_group$group)

## ----test_DTU_1_group---------------------------------------------------------
set.seed(61217)
results_1_group = test_DTU(BANDITS_data = input_data,
                   precision = precision$prior,
                   samples_design = samples_design_1_group,
                   group_col_name = "group",
                   R = 10^4, burn_in = 2*10^3, n_cores = 2,
                   gene_to_transcript = gene_tr_id)
results_1_group

## ----top_genes_1_group--------------------------------------------------------
head(top_genes(results_1_group))

head(top_transcripts(results_1_group))

## ----top_gene_1_group---------------------------------------------------------
gene(results_1_group, top_genes(results_1_group)$Gene_id[1])

transcript(results_1_group, top_transcripts(results_1_group)$Transcript_id[1])

## ----plot_proportions_1_group-------------------------------------------------
plot_proportions(results_1_group, top_genes(results)$Gene_id[1], CI = TRUE, CI_level = 0.95)

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the BANDITS package in your browser

Any scripts or data that you put into this service are public.

BANDITS documentation built on Nov. 8, 2020, 5:30 p.m.