inst/doc/LINC.R

## ----include=FALSE, cache=FALSE------------------------------------------
require(png)
require(grid)
require(gridExtra)
suppressMessages(require(LINC))
data(BRAIN_EXPR)

## ----eval = FALSE--------------------------------------------------------
#  str(GTEX_CRBL)
#  
#   num [1:56318, 1:117] 0.0475 10.7799 0.105 0 0 ...
#   - attr(*, "dimnames")=List of 2
#    ..$ : chr [1:56318] "ENSG00000223972" "ENSG00000227232" "ENSG00000243485" "ENSG00000237613" ...
#    ..$ : chr [1:117] "GTEX-117XS-3126-SM-5GIDP" "GTEX-1192X-3226-SM-5987D" "GTEX-11DXW-1026-SM-5H11K"
#                      "GTEX-11DXY-3126-SM-5N9BT" ...
#  

## ----eval = FALSE--------------------------------------------------------
#  # STEP 1: select the high-variance genes
#  var_index <- order(apply(GTEX_CRBL, 1, var), decreasing = T)
#  GTEX_CRBL_HVAR <- GTEX_CRBL[var_index[1:5000], ]
#  
#  # STEP 2: get the gene biotype
#  require(biomaRt)
#  ensembl <- useMart("ensembl", dataset = "hsapiens_gene_ensembl")
#  biotype <- getBM(attributes=c('gene_biotype',
#                                'ensembl_gene_id'),
#                      filters = 'ensembl_gene_id',
#                      values = rownames(GTEX_CRBL_HVAR),
#                      mart = ensembl)
#  # STEP 3:
#  index <- match(rownames(GTEX_CRBL_HVAR), biotype$ensembl_gene_id)
#  GTEX_CRBL_BIOTYPE <- biotype$gene_biotype[index]

## ----eval = FALSE--------------------------------------------------------
#  table(GTEX_CRBL_BIOTYPE)
#  
#   3prime_overlapping_ncRNA                        antisense                            lincRNA
#                          8                              279                                 74
#                      miRNA                         misc_RNA                            Mt_rRNA
#                          3                                4                                  2
#                    Mt_tRNA             processed_pseudogene               processed_transcript
#                          4                              119                                 37
#             protein_coding                             rRNA                     sense_intronic
#                       4256                                1                                 11
#          sense_overlapping                           snoRNA                              snRNA
#                         13                                8                                  1
#                  TR_C_gene transcribed_processed_pseudogene transcribed_unprocessed_pseudogene
#                          1                                8                                 25
#         unitary_pseudogene           unprocessed_pseudogene
#                          2                               18
#  

## ----warning = FALSE, message = FALSE, fig.width = 17, fig.height = 11, eval = TRUE----
                                 
# the preprocessed expression matrix with 1000 genes                                 
str(cerebellum)

# a TRUE/FALSE vector with TRUE for protein-coding genes
str(pcgenes_crbl)

# STEP 1: Separate the protine-coding genes from the queries (lincRNAs)
crbl_matrix  <- linc(cerebellum, codingGenes = pcgenes_crbl)

# STEP 2: Compute the co-expression network with a fixed threshold
crbl_cluster <- clusterlinc(crbl_matrix, pvalCutOff = 0.005)

# STEP 3: Interrogate enriched biological terms for co-expressed genes
crbl_bp <- getbio(crbl_cluster)

# Show the results as a plot!
plotlinc(crbl_bp)

## ---- message = FALSE----------------------------------------------------
getcoexpr(crbl_cluster, query = "55384")[1:5]

# The co-expressed genes can also be returned as gene symbols.
getcoexpr(crbl_cluster, query = "55384", keyType = 'SYMBOL')[1:5]


## ----fig.width = 23, fig.height = 10, warning = FALSE, message = FALSE, eval = TRUE----
meg3 <- singlelinc(crbl_matrix, query = "55384",
                   onlycor = TRUE, underth = FALSE,
                   threshold = 0.5, ont = 'MF')
plotlinc(meg3)

## ----fig.width = 17, fig.height = 9.5, warning = FALSE, message = FALSE, eval = TRUE----
data(BRAIN_EXPR)

# (A) call 'linc' with no further arguments
# 'cerebellum' is a matrix of expression values; rows correspond to genes
# 'pcgenes_crbl' is a TRUE/FALSE vector; TRUE indicates a protein-coding gene 
crbl_matrix <- linc(cerebellum, codingGenes = pcgenes_crbl)

# (B) remove first seven principle components
crbl_matrix_pca <- linc(cerebellum, codingGenes = pcgenes_crbl, rmPC = c(1:7))

# (C) negative correlation by using 'userFun'
crbl_matrix_ncor <- linc(cerebellum, codingGenes = pcgenes_crbl,
                         userFun = function(x,y){ -cor(x,y) })

# (D) remove outliers using the ESD method
crbl_matrix_esd <- linc(cerebellum, codingGenes = pcgenes_crbl, outlier = "esd")

# (E) plot this object
plotlinc(crbl_matrix_esd)

## ----eval = FALSE--------------------------------------------------------
#  plotlinc(crbl_matrix)

## ----fig.width = 23, fig.height = 10, warning = FALSE, message = FALSE, eval = TRUE----
meg3_pca <- singlelinc(crbl_matrix_pca, query = "55384",
                   onlycor = TRUE, underth = FALSE,
                   threshold = 0.5, ont = 'MF')
plotlinc(meg3_pca)

## ------------------------------------------------------------------------
intersect(getcoexpr(meg3), getcoexpr(meg3_pca))

## ----warning = FALSE, eval = TRUE----------------------------------------
# results()     - results (different for subclasses) 
# correlation() - correlation matrices
# assignment()  - vector of protein-coding genes
# history()     - stored parameters 
# express()     - original expression matrix

cerebellum <- express(crbl_cluster)
str(cerebellum)

## ----warning = FALSE, eval = TRUE----------------------------------------
# getlinc() is used to accesss information
getlinc(crbl_cluster, subject = "queries")

## ----warning = FALSE, eval = TRUE----------------------------------------
# feature() can be used to convert objects
crbl_matrix <- crbl_cluster + feature(setLevel = "LINCmatrix", showLevels = FALSE)

## ----warning = FALSE, eval = TRUE----------------------------------------
# change the used gene annotation, here from "human" to "mouse"
murine_matrix <- changeOrgDb(crbl_matrix, OrgDb = 'org.Mm.eg.db')

## ----fig.width = 11, fig.height = 7, warning = FALSE, eval = TRUE--------

# scatterplots, correlations and expression level of query
plotlinc(crbl_cluster, showCor = c("647979", "6726", "3337", "3304" ,"3320"))

## ----eval = FALSE--------------------------------------------------------
#  linctable(file_name = "crbl_co_expr", input = crbl_cluster)

## ----fig.width = 14, fig.height = 12, warning = FALSE, message = FALSE, eval = FALSE----
#  justlinc(GTEX_LIVER_CRUDE) # 'justlinc' will search for the 10 best candidates
#  
#  my_lincRNAs <- c("ENSG00000197813") # This could also be a vector of ids
#  res <- justlinc(GTEX_LIVER_CRUDE, targetGenes = my_lincRNAs) # 'justlinc' with one query

## ----include=FALSE, cache=FALSE------------------------------------------
data(BRAIN_EXPR)

## ----fig.width = 13, fig.height = 7.5, warning = FALSE, message = FALSE, eval = TRUE----
# apply the distance method "correlation" instead of "dicedist", the default
crbl_cluster_cor <- clusterlinc(crbl_matrix, distMethod = "correlation")
plotlinc(crbl_cluster_cor)

## ----fig.width = 13.5, fig.height = 6.5, warning = FALSE, fig.keep = 'last', eval = TRUE----
# add custom names and colors
gbm_cluster  <-  gbm_cluster + feature(customID = "CANCER_GBM", customCol = "red")
ctx_cluster  <-  ctx_cluster + feature(customID = "HEALTHY_CTX", customCol = "blue")
hpc_cluster  <-  hpc_cluster + feature(customID = "HEALTHY_HPC", customCol = "blue")
crbl_cluster <-  crbl_cluster + feature(customID = "HEALTHY_CRBL", customCol = "blue")

# plot the dendrogram
norad <- querycluster('647979', queryTitle = 'NORAD',
             gbm_cluster,  # Glioblastoma
             ctx_cluster,  # Cortex
             hpc_cluster,  # Hippocampus
             crbl_cluster) # Cerebellum

neat1 <- querycluster('283131', queryTitle = 'NEAT1',
             gbm_cluster, ctx_cluster,
             hpc_cluster, crbl_cluster)

grid.arrange(norad, neat1, ncol = 2)

## ----eval = FALSE--------------------------------------------------------
#  # STEP 1: get the co-expressed genes
#  norad <- lapply(list(gbm_cluster,
#                       ctx_cluster,
#                       hpc_cluster,
#                       crbl_cluster),
#                  function(x){
#                  getcoexpr(x, '647979')})
#  
#  # STEP 2: name the list
#  names(norad) <- c("CANCER_GBM",
#                    "HEALTHY_CTX",
#                    "HEALTHY_HPC",
#                    "HEALTHY_CRBL")
#  
#  # STEP 3: enrichment analysis in 'clusterProfiler'
#  require(clusterProfiler)
#  norad_cluster <- compareCluster(norad,
#                                  fun = 'enrichGO',
#                                  OrgDb = 'org.Hs.eg.db',
#                                  ont = 'BP')
#  plot(norad_cluster)

## ----eval = FALSE--------------------------------------------------------
#  # WRAPPER
#  justlinc()         # gene selection, co-expression
#  
#  # MAIN FUNCTIONS
#  linc()             # cor. matrix and statistics
#  clusterlinc()      # cluster and cor. test
#  singlelinc()       # single query co-expression
#  
#  # PLOTTING FUNCTIONS
#  plotlinc()         # main plotting function
#  querycluster()     # one query in multiple data sets
#  
#  # HELPING FUNCTIONS
#  getcoexpr()        # get the co-expressed genes
#  getbio()           # enriched terms for a cluster
#  object + feature() # level and data labeling
#  getlinc()          # subsetting of 'LINC' objects
#  changeOrgDb()      # change organism
#  linctable()        # write to table

## ----echo=FALSE, fig.width = 8, fig.height = 3.5, eval = TRUE------------
  overview_img <- readPNG(system.file("extdata", "overview_img.png",
                                     package ="LINC"))
    overview_plot <- rasterGrob(overview_img, interpolate = TRUE)
    grid.arrange(overview_plot)

Try the LINC package in your browser

Any scripts or data that you put into this service are public.

LINC documentation built on April 28, 2020, 6:44 p.m.