knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Hands-on

Loading package

library(txomics)
set.seed(42)
salmon_test <- structure(list(
  Name = c("AAEL000001-RA", "AAEL000004-RA", "AAEL000005-RB", "AAEL000005-RC", "AAEL000005-RD", "AAEL000006-RC", "AAEL000008-RA", "AAEL000008-RB", "AAEL000008-RC", "AAEL000010-RA", "AAEL000011-RB", "AAEL000011-RC", "AAEL000012-RA", "AAEL000013-RB", "AAEL000016-RA", "AAEL000017-RA", "AAEL000017-RB", "AAEL000018-RA", "AAEL000020-RA", "AAEL000021-RA", "AAEL000022-RA", "AAEL000024-RA", "AAEL000024-RB", "AAEL000024-RC", "AAEL000025-RB", "AAEL000026-RA", "AAEL000027-RA", "AAEL000028-RA", "AAEL000029-RB", "AAEL000032-RE", "AAEL000033-RA", "AAEL000035-RB", "AAEL000036-RA", "AAEL000037-RA", "AAEL000038-RA", "AAEL000039-RA", "AAEL000041-RA", "AAEL000041-RB", "AAEL000041-RC", "AAEL000042-RA", "AAEL000043-RA", "AAEL000044-RB", "AAEL000046-RA", "AAEL000047-RA", "AAEL000048-RB", "AAEL000049-RA", "AAEL000049-RB", "AAEL000049-RC", "AAEL000050-RA", "AAEL000051-RA", "AAEL000053-RA", "AAEL000055-RB", "AAEL000057-RB", "AAEL000058-RB", "AAEL000063-RB", "AAEL000064-RA", "AAEL000065-RA", "AAEL000065-RB", "AAEL000065-RC", "AAEL000066-RA", "AAEL000069-RA", "AAEL000071-RA", "AAEL000073-RA", "AAEL000074-RA", "AAEL000075-RA", "AAEL000076-RB", "AAEL000077-RA", "AAEL000077-RB", "AAEL000077-RC", "AAEL000079-RB", "AAEL000079-RC", "AAEL000079-RD", "AAEL000080-RA", "AAEL000081-RB", "AAEL000084-RA", "AAEL000084-RB", "AAEL000085-RA", "AAEL000087-RB", "AAEL000088-RA", "AAEL000089-RA", "AAEL000090-RA", "AAEL000090-RB", "AAEL000090-RC", "AAEL000091-RA", "AAEL000092-RA", "AAEL000094-RB", "AAEL000094-RC", "AAEL000095-RB", "AAEL000095-RC", "AAEL000097-RD", "AAEL000098-RB", "AAEL000098-RC", "AAEL000099-RA", "AAEL000101-RA", "AAEL000102-RB", "AAEL000106-RA", "AAEL000107-RA", "AAEL000108-RA", "AAEL000109-RA", "AAEL000111-RA"),
    Length = c(1745L, 2365L, 1301L, 1361L, 1379L, 2283L, 2675L, 2603L, 2615L, 599L, 2000L, 1985L, 1744L, 1558L, 1031L, 843L, 1170L, 2187L, 2023L, 1958L, 898L, 1437L, 1430L, 1345L, 2320L, 794L, 1915L, 1376L, 1440L, 1390L, 2134L, 658L, 1386L, 1289L, 1376L, 2110L, 2249L, 2053L, 2826L, 1284L, 2053L, 1549L, 974L, 2078L, 2192L, 1715L, 1505L, 1514L, 1011L, 881L, 3762L, 1676L, 3023L, 1776L, 2072L, 1634L, 2594L, 2627L, 2605L, 2058L, 1307L, 687L, 647L, 1388L, 1509L, 1114L, 1845L, 1747L, 1770L, 8621L, 8624L, 8581L, 2368L, 4419L, 1428L, 1436L, 963L, 4345L, 2544L, 2038L, 2924L, 2231L, 2221L, 1247L, 899L, 2679L, 3137L, 1997L, 2289L, 1240L, 2353L, 2404L, 1589L, 2571L, 1243L, 1244L, 2126L, 1876L, 5004L, 1385L),
    EffectiveLength = c(1496,2116, 1052, 1112, 1130, 2034, 2426, 2354, 2366, 350, 1751, 1736, 1495, 1309, 782, 594, 921, 1938, 1774, 1709, 649, 1188, 1181, 1096, 2071, 545, 1666, 1127, 1191, 1141, 1885, 409, 1137, 1040, 1127, 1861, 2000, 1804, 2577, 1035, 1804, 1300, 725, 1829, 1943, 1466, 1256, 1265, 762, 632, 3513, 1427, 2774, 1527, 1823, 1385, 2345, 2378, 2356, 1809, 1058, 438, 398, 1139, 1260, 865, 1596, 1498, 1521, 8372, 8375, 8332, 2119, 4170, 1179, 1187, 714, 4096, 2295, 1789, 2675, 1982, 1972, 998, 650, 2430, 2888, 1748, 2040, 991, 2104, 2155, 1340, 2322, 994, 995, 1877, 1627, 4755, 1136                                                                 )),
  row.names = c(NA,-100L),
  class = c("tbl_df", "tbl", "data.frame")
  )

  fs::dir_create(paste0("temp_test/quants/salmon_output",c(1,2,3,4,5,6,7,8),"/"))

for( i in 1:8){  
  salmon_test %>%
    dplyr::mutate(TPM = stats::rnbinom(n = 100,size = 5,prob = 0.05)) %>%
    dplyr::mutate(NumReads = stats::rnbinom(n = 100,size = 5,prob = 0.05)) %>%
    readr::write_delim(paste0("temp_test/quants/salmon_output",i,"/quant.sf"), delim = "\t")
}


 go_gene_sets <- structure(list(gene_ontology = c("GO:0000154_rRNA modification", 
"GO:0000179_rRNA (adenine-N6,N6-)-dimethyltransferase activity", 
"GO:0000287_magnesium ion binding", "GO:0000413_protein peptidyl-prolyl isomerization", 
"GO:0003676_nucleic acid binding", "GO:0003676_nucleic acid binding", 
"GO:0003676_nucleic acid binding", "GO:0003676_nucleic acid binding", 
"GO:0003676_nucleic acid binding", "GO:0003700_DNA binding transcription factor activity", 
"GO:0003735_structural constituent of ribosome", "GO:0003735_structural constituent of ribosome", 
"GO:0003755_peptidyl-prolyl cis-trans isomerase activity", "GO:0003824_catalytic activity", 
"GO:0003824_catalytic activity", "GO:0003824_catalytic activity", 
"GO:0004177_aminopeptidase activity", "GO:0004252_serine-type endopeptidase activity", 
"GO:0004252_serine-type endopeptidase activity", "GO:0004252_serine-type endopeptidase activity", 
"GO:0004252_serine-type endopeptidase activity", "GO:0004252_serine-type endopeptidase activity", 
"GO:0004611_phosphoenolpyruvate carboxykinase activity", "GO:0004611_phosphoenolpyruvate carboxykinase activity", 
"GO:0004611_phosphoenolpyruvate carboxykinase activity", "GO:0004613_phosphoenolpyruvate carboxykinase (GTP) activity", 
"GO:0004613_phosphoenolpyruvate carboxykinase (GTP) activity", 
"GO:0004613_phosphoenolpyruvate carboxykinase (GTP) activity", 
"GO:0004866_endopeptidase inhibitor activity", "GO:0004970_ionotropic glutamate receptor activity", 
"GO:0004970_ionotropic glutamate receptor activity", "GO:0004970_ionotropic glutamate receptor activity", 
"GO:0004970_ionotropic glutamate receptor activity", "GO:0004970_ionotropic glutamate receptor activity", 
"GO:0005515_protein binding", "GO:0005515_protein binding", "GO:0005515_protein binding", 
"GO:0005515_protein binding", "GO:0005515_protein binding", "GO:0005524_ATP binding", 
"GO:0005525_GTP binding", "GO:0005525_GTP binding", "GO:0005525_GTP binding", 
"GO:0005549_odorant binding", "GO:0005549_odorant binding", "GO:0005549_odorant binding", 
"GO:0005549_odorant binding", "GO:0005576_extracellular region", 
"GO:0005615_extracellular space", "GO:0005622_intracellular", 
"GO:0005622_intracellular", "GO:0005622_intracellular", "GO:0005634_nucleus", 
"GO:0005634_nucleus", "GO:0005634_nucleus", "GO:0005634_nucleus", 
"GO:0005737_cytoplasm", "GO:0005737_cytoplasm", "GO:0005789_endoplasmic reticulum membrane", 
"GO:0005840_ribosome", "GO:0005840_ribosome", "GO:0006094_gluconeogenesis", 
"GO:0006094_gluconeogenesis", "GO:0006094_gluconeogenesis", "GO:0006351_transcription, DNA-templated", 
"GO:0006355_regulation of transcription, DNA-templated", "GO:0006364_rRNA processing", 
"GO:0006412_translation", "GO:0006412_translation", "GO:0006464_cellular protein modification process", 
"GO:0006508_proteolysis", "GO:0006508_proteolysis", "GO:0006508_proteolysis", 
"GO:0006508_proteolysis", "GO:0006508_proteolysis", "GO:0006508_proteolysis", 
"GO:0006596_polyamine biosynthetic process", "GO:0006807_nitrogen compound metabolic process", 
"GO:0006812_cation transport", "GO:0007030_Golgi organization", 
"GO:0007165_signal transduction", "GO:0008152_metabolic process", 
"GO:0008152_metabolic process", "GO:0008235_metalloexopeptidase activity", 
"GO:0008270_zinc ion binding", "GO:0008270_zinc ion binding", 
"GO:0008324_cation transmembrane transporter activity", "GO:0008527_taste receptor activity", 
"GO:0008527_taste receptor activity", "GO:0008527_taste receptor activity", 
"GO:0008527_taste receptor activity", "GO:0008527_taste receptor activity", 
"GO:0008641_ubiquitin-like modifier activating enzyme activity", 
"GO:0008649_rRNA methyltransferase activity", "GO:0015031_protein transport", 
"GO:0015031_protein transport", "GO:0016020_membrane", "GO:0016020_membrane", 
"GO:0016020_membrane", "GO:0016020_membrane", "GO:0016020_membrane", 
"GO:0016020_membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016021_integral component of membrane", 
"GO:0016021_integral component of membrane", "GO:0016051_carbohydrate biosynthetic process", 
"GO:0016180_snRNA processing", "GO:0016757_transferase activity, transferring glycosyl groups", 
"GO:0016787_hydrolase activity", "GO:0016787_hydrolase activity", 
"GO:0017076_purine nucleotide binding", "GO:0017076_purine nucleotide binding", 
"GO:0017076_purine nucleotide binding", "GO:0019284_L-methionine salvage from S-adenosylmethionine", 
"GO:0019509_L-methionine salvage from methylthioadenosine", "GO:0019538_protein metabolic process", 
"GO:0030145_manganese ion binding", "GO:0032039_integrator complex", 
"GO:0043565_sequence-specific DNA binding", "GO:0043715_2,3-diketo-5-methylthiopentyl-1-phosphate enolase activity", 
"GO:0043716_2-hydroxy-3-keto-5-methylthiopentenyl-1-phosphate phosphatase activity", 
"GO:0043874_acireductone synthase activity", "GO:0050912_detection of chemical stimulus involved in sensory perception of taste", 
"GO:0050912_detection of chemical stimulus involved in sensory perception of taste", 
"GO:0050912_detection of chemical stimulus involved in sensory perception of taste", 
"GO:0050912_detection of chemical stimulus involved in sensory perception of taste", 
"GO:0050912_detection of chemical stimulus involved in sensory perception of taste", 
"GO:0055085_transmembrane transport"), gene = c("AAEL000076", 
"AAEL000076", "AAEL000109", "AAEL000013", "AAEL000005", "AAEL000042", 
"AAEL000049", "AAEL000065", "AAEL000107", "AAEL000041", "AAEL000010", 
"AAEL000032", "AAEL000013", "AAEL000044", "AAEL000097", "AAEL000101", 
"AAEL000108", "AAEL000028", "AAEL000037", "AAEL000038", "AAEL000074", 
"AAEL000099", "AAEL000006", "AAEL000025", "AAEL000080", "AAEL000006", 
"AAEL000025", "AAEL000080", "AAEL000087", "AAEL000011", "AAEL000018", 
"AAEL000063", "AAEL000066", "AAEL000089", "AAEL000008", "AAEL000013", 
"AAEL000057", "AAEL000084", "AAEL000092", "AAEL000081", "AAEL000006", 
"AAEL000025", "AAEL000080", "AAEL000035", "AAEL000051", "AAEL000071", 
"AAEL000073", "AAEL000087", "AAEL000087", "AAEL000010", "AAEL000032", 
"AAEL000108", "AAEL000005", "AAEL000041", "AAEL000107", "AAEL000109", 
"AAEL000108", "AAEL000109", "AAEL000004", "AAEL000010", "AAEL000032", 
"AAEL000006", "AAEL000025", "AAEL000080", "AAEL000041", "AAEL000041", 
"AAEL000076", "AAEL000010", "AAEL000032", "AAEL000091", "AAEL000028", 
"AAEL000037", "AAEL000038", "AAEL000074", "AAEL000099", "AAEL000108", 
"AAEL000044", "AAEL000111", "AAEL000077", "AAEL000088", "AAEL000057", 
"AAEL000101", "AAEL000109", "AAEL000108", "AAEL000005", "AAEL000107", 
"AAEL000077", "AAEL000012", "AAEL000043", "AAEL000048", "AAEL000069", 
"AAEL000075", "AAEL000091", "AAEL000076", "AAEL000088", "AAEL000090", 
"AAEL000011", "AAEL000018", "AAEL000063", "AAEL000066", "AAEL000088", 
"AAEL000089", "AAEL000004", "AAEL000012", "AAEL000018", "AAEL000027", 
"AAEL000039", "AAEL000043", "AAEL000047", "AAEL000048", "AAEL000066", 
"AAEL000069", "AAEL000075", "AAEL000077", "AAEL000089", "AAEL000090", 
"AAEL000097", "AAEL000033", "AAEL000004", "AAEL000016", "AAEL000109", 
"AAEL000006", "AAEL000025", "AAEL000080", "AAEL000109", "AAEL000109", 
"AAEL000108", "AAEL000108", "AAEL000033", "AAEL000041", "AAEL000109", 
"AAEL000109", "AAEL000109", "AAEL000012", "AAEL000043", "AAEL000048", 
"AAEL000069", "AAEL000075", "AAEL000077"), go_domain = c("biological_process", 
"molecular_function", "molecular_function", "biological_process", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"biological_process", "biological_process", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "cellular_component", "cellular_component", 
"cellular_component", "biological_process", "biological_process", 
"molecular_function", "molecular_function", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"biological_process", "biological_process", "biological_process", 
"molecular_function", "cellular_component", "molecular_function", 
"molecular_function", "molecular_function", "molecular_function", 
"biological_process", "biological_process", "biological_process", 
"biological_process", "biological_process", "biological_process"
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-139L))

Importing gene level counts and expression from salmon results

Salmon usually outputs a quant.sf file, you should input the path to folder containing this file to import_tx() function.

path_to_files <- "temp_test/quants/"

imported_expression <- import_tx(path_to_files,source = "salmon", accession = "vectorbase")
imported_expression$abundance %>%
  head() %>%
  knitr::kable()

Experimental design

We need to have a sample table, or experimental design table, with description for each sample

sample_table <- dplyr::data_frame(
  sample = colnames(imported_expression$abundance),
  treatment = c(rep("drug_A",4),rep("control",4)),
  day = as.character(c(1,1,2,2,1,1,2,2))
)

sample_table %>%
  knitr::kable()

Analysis

Differential expression

de_results <- imported_expression %>%
  de_analysis(sample_table, contrast_var = "treatment", "drug_A", "control")

de_results %>%
  dplyr::arrange(pvalue) %>%
  head() %>%
  knitr::kable()

Gene Set Enrichment Analysis

gsea_results <- de_results %>%
  gsea_analysis(gene_sets = go_gene_sets)

gsea_results %>%
  dplyr::arrange(pvalue) %>%
  head() %>%
  knitr::kable()

Leading Edge analysis

best_gene_set <- gsea_results %>%
  dplyr::arrange(pvalue) %>%
  dplyr::slice(1) %>%
  dplyr::pull(pathway)

best_gene_set

gsea_results %>%
  le_analysis(set = best_gene_set)

Figures

plot_heatmap(imported_expression, sample_table, color_by = c("day","treatment"))
multiplot(
  plot_pca(imported_expression, sample_table, color_by = "day"),
  plot_pca(imported_expression, sample_table, color_by = "treatment")
)
plot_volcano(de_results, lfc_threshold = 0.6)
plot_gene_expression("AAEL000043",imported_expression, sample_table, "treatment")
sessionInfo()
  fs::dir_delete("temp_test/")


luciorq/txomics documentation built on Sept. 3, 2020, 5:36 a.m.