inst/doc/case_study.R

## ----eval=FALSE------------------------------------------------------------
#  if (!requireNamespace("BiocManager", quietly=TRUE))
#      install.packages("BiocManager")
#  BiocManager::install("CTDquerier")

## ----gala_read_csv---------------------------------------------------------
table_e1_csv <- system.file(
  paste0( "extdata", .Platform$file.sep, "gala_table_e1.csv" ), 
  package="CTDquerier"
)
table_e1 <- read.csv( table_e1_csv, stringsAsFactors = FALSE )

## ----gala_remove_na_file---------------------------------------------------
dim( table_e1 )
table_e1 <- table_e1[ table_e1$Genes != "NA ", ]
dim( table_e1 )

## ----gala_create_list------------------------------------------------------
gala_genes <- trimws( unlist( strsplit( table_e1$Genes, "," ) ) )
length( gala_genes )
gala_genes[1:15]

## ----load_ctdquerier, message=FALSE----------------------------------------
library( CTDquerier )

## ----gale_query, eval=FALSE------------------------------------------------
#  gala <- query_ctd_gene( terms = gala_genes, verbose = TRUE )

## ----gala_data-------------------------------------------------------------
data( gala, package = "CTDquerier" )
gala

## ----gala_plot_query, message=FALSE----------------------------------------
library( ggplot2 )
plot( gala ) + ggtitle( "Lost & Found Genes from GALA Study" )

## ----gala_lost-------------------------------------------------------------
get_terms( gala )[[ "lost" ]]

## ----gala_show_2-----------------------------------------------------------
gala

## ----gala_gda_all----------------------------------------------------------
gala_all_diseases <- get_table( gala, index_name = "diseases" )
colnames( gala_all_diseases )
dim( gala_all_diseases )

## ----gala_disease_genes----------------------------------------------------
length( unique( gala_all_diseases$GeneSymbol ) )
sum( get_terms( gala )[[ "found" ]] %in% 
    unique( gala_all_diseases$GeneSymbol ) )
sum( !get_terms( gala )[[ "found" ]] %in% 
    unique( gala_all_diseases$GeneSymbol ) )

## ----gala_diseases_no_genes------------------------------------------------
get_terms( gala )[[ "found" ]][ 
    !get_terms( gala )[[ "found" ]] %in% unique( gala_all_diseases$GeneSymbol )
]

## ----gala_diseases_unique--------------------------------------------------
length( unique( gala_all_diseases$Disease.Name ) )

## ----gala_diseases_curated-------------------------------------------------
gala_all_diseases_cu <- gala_all_diseases[ !is.na( gala_all_diseases$Direct.Evidence ), ]
gala_all_diseases_cu <- gala_all_diseases_cu[ gala_all_diseases_cu$Direct.Evidence != "", ]
dim( gala_all_diseases_cu )
length( unique( gala_all_diseases_cu$Disease.Name ) )

## ----gala_diseases_asthma--------------------------------------------------
gala_asthma <- gala_all_diseases[ 
    gala_all_diseases$Disease.Name == "Asthma" , 
]
dim( gala_asthma )

## ----gala_diseases_asthma_direct-------------------------------------------
sum( gala_asthma$Direct.Evidence != "" & !is.na( gala_asthma$Direct.Evidence ) )

## ----gala_diseases_asthma_evidence-----------------------------------------
mean( gala_asthma$Inference.Score, na.rm = TRUE )

## ----gala_diseases_asthma_reference----------------------------------------
sum( gala_asthma$Reference.Count, na.rm = TRUE )

## ----gala_diseases_asthma_evidence_plot------------------------------------
plot( gala, index_name = "disease", subset.disease = "Asthma", filter.score = 20 ) +
    ggtitle( "Evidence of the association between GALA genes and Asthma" )

## ----gala_chemicals--------------------------------------------------------
gala_chem <- get_table( gala, index_name = "chemical interactions" )
colnames( gala_chem )
length( unique( gala_chem$Chemical.Name ) )

## ---- gala_chemicals_table, results="asis"---------------------------------
knitr::kable( t( table( gala_chem$Reference.Count ) ) )

## ---- gala_chemicals_plot--------------------------------------------------
plot( gala, index_name = "chemical interactions", filter.score = 6 )

## ----ctd_asthma------------------------------------------------------------
asthma <- query_ctd_dise( terms = "Asthma", verbose = TRUE )

## ----ctd_asthma_show-------------------------------------------------------
asthma

## ----ctd_asthma_n_genes----------------------------------------------------
ctd_asthma <- get_table( asthma, index_name = "gene interactions" )
length( unique( ctd_asthma$Gene.Symbol ) )

## ----ctd_asthma_n_genes_curated--------------------------------------------
sum( !is.na( ctd_asthma$Direct.Evidence ) & ctd_asthma$Direct.Evidence != "" )

## ----ctd_asthma_table, results="asis"--------------------------------------
library( knitr )
tt <- as.data.frame( table( ctd_asthma$Disease.Name ) )
colnames( tt ) <- c( "Disease", "Frequency" )
kable( tt[ order( tt$Frequency, decreasing = TRUE ), ] )

## ----ctd_asthma_chem-------------------------------------------------------
ctd_asthma_chem <- get_table( asthma, index_name = "chemical interactions" )
colnames( ctd_asthma_chem )
length( unique( ctd_asthma_chem$Chemical.Name ) )

## ----ctd_asthma_chem_cur---------------------------------------------------
sum( !is.na( ctd_asthma_chem$Direct.Evidence ) & ctd_asthma_chem$Direct.Evidence != "" )

## ----ctd_asthma_plot-------------------------------------------------------
plot( asthma, index_name = "chemical interactions", subset.disease = "Asthma", filter.score = 30 ) +
    ggtitle( "Evidence of the association between GALA genes and chemicals" )

## ----intersect_gala_asthma_chem_1------------------------------------------
intr_chem <- intersect( gala_chem$Chemical.Name, ctd_asthma_chem$Chemical.Name )
length( intr_chem )

## ----intersect_gala_asthma_chem_2------------------------------------------
length( intr_chem ) / nrow( gala_chem ) * 100
length( intr_chem ) / nrow( ctd_asthma_chem ) * 100

## ----intersect_gala_asthma_chem_2_temp, echo=FALSE-------------------------
p1 <- round(length( intr_chem ) / nrow( gala_chem ) * 100, 2)
p2 <- round(length( intr_chem ) / nrow( ctd_asthma_chem ) * 100, 2)

## ----intersect_gala_asthma_chem_cur_1--------------------------------------
a <- ctd_asthma_chem$Chemical.Name[
    !is.na( ctd_asthma_chem$Direct.Evidence ) & ctd_asthma_chem$Direct.Evidence != ""
]
intr_chem <- intersect(  gala_chem$Chemical.Name, a )
length( intr_chem )

## ----intersect_gala_asthma_chem_plot_1-------------------------------------
gala_chem_r <- gala_chem[ gala_chem$Chemical.Name %in% intr_chem, ]
gala_chem_r <- gala_chem_r[ !duplicated( gala_chem_r$Chemical.Name ), ]
ctd_asthma_chem_r <- ctd_asthma_chem[ ctd_asthma_chem$Chemical.Name %in% intr_chem, ]
ctd_asthma_chem_r <- ctd_asthma_chem_r[ !duplicated( ctd_asthma_chem_r$Chemical.Name ), ]

dta <- merge(
    gala_chem_r[ , c( "Chemical.Name", "Reference.Count" ) ],
    ctd_asthma_chem_r[ , c( "Chemical.Name", "Reference.Count" ) ],
    by = "Chemical.Name"
)
colnames( dta ) <- c( "Chemical.Name", "Reference.Gala", "Reference.Asthma" )
dta <- dta[ 
    order( dta$Reference.Gala, dta$Reference.Asthma, decreasing = TRUE ), 
]
dta[1:5, ]

## ----intersect_gala_asthma_chem_plot_2-------------------------------------
leaf_plot( dta[1:25, ], label = "Chemical.Name", 
    valueLeft = "Reference.Gala", valueRight = "Reference.Asthma",
    titleLeft = "GALA", titleRight = "Asthma"
)

## ----load_hugo-------------------------------------------------------------
hgnc_universe <- read.delim( system.file( "extdata", "HGNC_Genes.tsv", package="CTDquerier" ),
    sep = "\t", stringsAsFactor = FALSE )

## ----gala_enrich_asthma_all------------------------------------------------
enrich( gala, asthma, use = "all", universe = hgnc_universe$Approved.Symbol )

## ----gala_enrich_asthma_curated--------------------------------------------
enrich( gala, asthma, 
    universe = hgnc_universe$Approved.Symbol, use = "curated" )

## ----air_ctd---------------------------------------------------------------
air <- query_ctd_chem( terms = "Air Pollutants" )
air

## ----gala_enrich_air-------------------------------------------------------
enrich( gala, air, universe = hgnc_universe$Approved.Symbol, use = "all" )

## ----sessionInfo, echo=FALSE-----------------------------------------------
sessionInfo()

Try the CTDquerier package in your browser

Any scripts or data that you put into this service are public.

CTDquerier documentation built on Oct. 31, 2019, 2:57 a.m.