R/database_support.R

Defines functions database_support

Documented in database_support

#' Intersect with databases for potential miRNA-target gene interactions
#'
#' This function will interst potential miRNA-target gene interactions from
#' the input matrix, which is generated by \link{negative_cor} or
#' \link{miR_converter}, with 8 predict databases and 2 validate databases
#' about miRNA-target gene interactions. If the input caontains hundreds
#' of interactions, it would take a few minutes to intersect all of them.
#'
#' @return data.frame format. Each row represent one potential interaction.
#'    The first four columns are information about interactions: miRNA,
#'    gene symbol, Ensembl ID, gene ID, as for column 5 to 12 represent
#'    the predict dataases, while column 13 to 14 are validate databases.
#'    if databases truly hit this interactions, the number in it would be 1.
#'    The column `Sum` means total hits by 8 databases, and column `Validate`
#'    would be TRUE if at least one validate database hit the interaction.
#'    Furthermore, `Fold-Change` and `P-adjust` can also be found in this
#'    output, and if the `de novo` column contains 1 means that row is
#'    not supported by any databases. The column `evidence` represents if
#'    the experiment for validation is strong or limited, considering
#'    \url{http://mirtarbase.mbc.nctu.edu.tw/}.
#'
#' @param cor_data matrix format generated from \link{negative_cor} or
#'    \link{miR_converter}, including miRNA, gene, correlation coefficient
#'    for column names.
#' @param org species of genes and miRNAs, only support "hsa", "mmu"
#' @param Sum.cutoff a Threshold for total hits by predict databases. This
#'    one should not be greater than 8. Default is 2.
#'
#' @examples
#' ## Use the internal dataset
#' data("mirna", package = "anamiR", envir = environment())
#' data("pheno.mirna", package = "anamiR", envir = environment())
#' data("mrna", package = "anamiR", envir = environment())
#' data("pheno.mrna", package = "anamiR", envir = environment())
#'
#' ## SummarizedExperiment class
#' require(SummarizedExperiment)
#' mirna_se <- SummarizedExperiment(
#'  assays = SimpleList(counts=mirna),
#'  colData = pheno.mirna)
#'
#' ## SummarizedExperiment class
#' require(SummarizedExperiment)
#' mrna_se <- SummarizedExperiment(
#'  assays = SimpleList(counts=mrna),
#'  colData = pheno.mrna)
#'
#' ## Finding differential miRNA from miRNA expression data with t.test
#' mirna_d <- differExp_discrete(
#'    se = mirna_se,
#'    class = "ER",
#'    method = "t.test"
#' )
#'
#' ## Finding differential mRNA from mRNA expression data with t.test
#' mrna_d <- differExp_discrete(
#'    se = mrna_se,
#'    class = "ER",
#'    method = "t.test"
#' )
#'
#' ## Convert annotation to miRBse 21
#' mirna_21 <- miR_converter(data = mirna_d, original_version = 17)
#'
#' ## Correlation
#' cor <- negative_cor(mrna_data = mrna_d, mirna_data = mirna_21)
#'
#' ## Intersect with known databases
#' sup <- database_support(cor_data = cor)
#'
#' @import RMySQL
#' @import DBI
#' @export
database_support <- function(
  cor_data,
  org = "hsa",
  Sum.cutoff = 2
) {
  # connect with db
  db <- RMySQL::dbConnect(RMySQL::MySQL(), user = "visitor",
                          password = "visitor",
                          dbname = "visitor",
                          host = "anamir.cgm.ntu.edu.tw")
  # intersection
  interaction <- list()
  for (i in seq_len(nrow(cor_data))) {
    mirna <- cor_data[i, 1]
    gene <- cor_data[i, 2]
    cor_par <- cor_data[i, 3]
    fc_mirna <- cor_data[i, 4]
    p_mirna <- cor_data[i, 5]
    mg1_mirna <- cor_data[i, 6]
    mg2_mirna <- cor_data[i, 7]
    fc_gene <- cor_data[i, 8]
    p_gene <- cor_data[i, 9]
    mg1_gene <- cor_data[i, 10]
    mg2_gene <- cor_data[i, 11]
    if (org %in% "hsa"){
      query <- paste0("SELECT `miRNA_21`, `Gene_symbol`, `Ensembl`,
                      `Gene_ID`, `DIANA_microT_CDS`, `EIMMo`, `Microcosm`,
                      `miRDB`, `miRanda`, `PITA`, `rna22`, `Targetscan`,
                      `Sum`, `miRecords`, `miRTarBase`,
                      `Validate` FROM `all_hsa` WHERE miRNA_21 like '",
                      mirna, "' AND gene_symbol like '", gene, "' ;")
    }
    if (org %in% "mmu"){
      query <- paste0("SELECT `miRNA_21`, `Gene_symbol`, `Ensembl`,
                      `Gene_ID`, `DIANA_microT_CDS`, `EIMMo`, `Microcosm`,
                      `miRDB`, `miRanda`, `PITA`, `rna22`, `Targetscan`,
                      `Sum`, `miRecords`, `miRTarBase`,
                      `Validate` FROM `all_mmu` WHERE miRNA_21 like '",
                      mirna, "' AND gene_symbol like '", gene, "' ;")
    }
    tmp <- DBI::dbGetQuery(db, query)
    if (nrow(tmp) == 0 && Sum.cutoff == 0){
      if (org %in% "hsa"){
        query <- paste0("SELECT DISTINCT gene_symbol, ensembl,
                        gene_id FROM `all_hsa` WHERE gene_symbol like '",
                        gene, "' ;")
      }
      if (org %in% "mmu"){
        query <- paste0("SELECT DISTINCT gene_symbol, ensembl,
                        gene_id FROM `all_mmu` WHERE gene_symbol like '",
                        gene, "' ;")
      }
      gene_info <- DBI::dbGetQuery(db, query)
      tmp[1, ] <- c(mirna, gene, gene_info[1, 2], gene_info[1, 3],
                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, "FALSE")
      tmp <- c(tmp, cor_par, fc_mirna, p_mirna, mg1_mirna, mg2_mirna,
               fc_gene, p_gene, mg1_gene, mg2_gene, 1)
      interaction[[i]] <- tmp
    } else if (nrow(tmp) == 0 && Sum.cutoff > 0) {
      next
    } else {
      tmp <- c(tmp, cor_par, fc_mirna, p_mirna, mg1_mirna, mg2_mirna,
               fc_gene, p_gene, mg1_gene, mg2_gene, 0)
      interaction[[i]] <- tmp
    }
  }
  interaction <- do.call(rbind, interaction)
  #disconnect db
  cons <- RMySQL::dbListConnections(RMySQL::MySQL())
  for (con in cons) RMySQL::dbDisconnect(con)

  # add column de novo
  colnames(interaction)[ncol(interaction)] <- c("de novo")

  if (Sum.cutoff > 1) {
    del_row <- c()
    for (i in seq_len(nrow(interaction))) {
      if (interaction[i, 13] < Sum.cutoff && interaction[i, 16] %in% "FALSE") {
        del_row <- c(del_row, i)
      }
    }
    interaction <- interaction[-(del_row), ]
  }
  return(interaction)
}

Try the anamiR package in your browser

Any scripts or data that you put into this service are public.

anamiR documentation built on Oct. 31, 2019, 8:55 a.m.