R/DEm-1-arm1.R

Defines functions DEm_arm1_miRNA_gene

#file: DE-method/DEm-1-arm1-fn-v1.R

# arm1: miRNA-gene --------------------------------------------------------

###arm1_miRNA_gene function (groups anamiR steps in one function)
#case: with genetic abnormality
#control: without genetic abnormality
#' @title DEm_arm1_miRNA_gene
#' @description identifies valid miRNA-gene pairs for FFL
#' and that are found in databases
#'
#' @param mirna_expr miRNA expression matrix (miRNAs x samples):
#' matrix where rows are miRNAs and columns are samples
#' (row names are miRNA names in hsa format and column names are sampleIDs)
#' @param mrna_expr mRNA expression matrix (genes x samples):
#' matrix where rows are genes and columns are samples
#' (row names are gene names and column names are sampleIDs)
#' @param pheno phenotype matrix (samples x 1):
#' matrix where rows are samples and there is column ("group") that indicates group membership of samples
#' (row names are sampleIDs, column name is "group)
#'
#' @return list of length 5:
#' 1) mirna_se: SummarizedExperiment miRNAs
#' 2) mrna_se: Summarized Experiment mRNA,
#' 3) mirna_DE: dataframe of DE miRNAs with 5 columns added to mirna_expr (logFC, p-value, p-value adjusted, mean(group1) and mean(group2)),
#' 4) mrna_DE: dataframe of DE mRNAs with 5 columns added to mrna_expr (logFC, p-value, p-value adjusted, mean(group1) and mean(group2)),
#' 5) db_hits: dataframe of miRNA-mRNA pairs that were found in databases
DEm_arm1_miRNA_gene <- function(mirna_expr, mrna_expr, pheno){
  #####SummarizedExperiment class
  #convert matrices to SummarizedExperiment class
  mirna_se <- SummarizedExperiment::SummarizedExperiment(assays = S4Vectors::SimpleList(counts=mirna_expr), colData = pheno)
  mrna_se <- SummarizedExperiment::SummarizedExperiment(assays = S4Vectors::SimpleList(counts=mrna_expr), colData = pheno)
  print("1/4 -- converted data to SummarizedExperiment class")

  #####DE analysis
  mirna_DE <- as.data.frame(differExp_discrete(se = mirna_se, class = "group", method = "limma"))
  mrna_DE <- as.data.frame(differExp_discrete(se = mrna_se, class = "group", method = "limma"))
  print("2/4 -- finished DE miRNA & DE mRNA analyses")

  #####!!!!!!!! convert miR names to version 21?

  #####find all possible miRNA-TF pairs
  #logFC: case - control
  #logFC pos: upregulated in case group
  #logFC neg: downregulated in case group
  #pairs: miRNA up, gene down
  mirna_up <- row.names(mirna_DE[mirna_DE$`log-ratio` > 0, ]) #upregulated miRNAs
  mrna_down <- row.names(mrna_DE[mrna_DE$`log-ratio` < 0, ]) #downregulated genes
  mirna_up_mrna_down_pairs <- expand.grid(mirna_up, mrna_down) #find all possible pairs
  colnames(mirna_up_mrna_down_pairs) <- c("miRNA", "gene")
  #pairs: miRNA down, gene up
  mirna_down <- row.names(mirna_DE[mirna_DE$`log-ratio` < 0, ])  #downregulated miRNAs
  mrna_up <- row.names(mrna_DE[mrna_DE$`log-ratio` > 0, ]) #upregulated genes
  mirna_down_mrna_up_pairs <- expand.grid(mirna_down, mrna_up) #find all possible pairs
  colnames(mirna_down_mrna_up_pairs) <- c("miRNA", "gene")
  #put all pairs together
  mirna_gene_pairs <- rbind(mirna_up_mrna_down_pairs, mirna_down_mrna_up_pairs)
  mirna_gene_pairs[ , 1:2] <- apply(mirna_gene_pairs[ , 1:2], 2, as.character)
  #print info
  print("3/4 -- finished gathering potential miRNA-gene pairs")
  print(paste(dim(mirna_gene_pairs)[1], "potential miRNA-gene pairs", sep = " "))

  #####intersect with databases
  #search databases
  db_hits <- as.data.frame(database_support(cor_data = mirna_gene_pairs, org = "hsa", Sum.cutoff = 1))
  #convert df columns to class character or numeric (currently, columns are of class list)
  colnames_charac <- c("miRNA_21", "Gene_symbol", "Ensembl", "Validate")
  colnames_numeric <- colnames(db_hits)[!colnames(db_hits) %in% colnames_charac]
  db_hits[colnames_charac] = apply(db_hits[colnames_charac], 2, as.character)
  db_hits[colnames_numeric] = apply(db_hits[colnames_numeric], 2, as.numeric)
  #change column names
  names(db_hits)[names(db_hits) == "miRNA_21"] <- "miRNA"
  names(db_hits)[names(db_hits) == "Gene_symbol"] <- "gene"
  names(db_hits)[names(db_hits) == "Ensembl"] <- "Ensembl_ID_gene"
  names(db_hits)[names(db_hits) == "Gene_ID"] <- "GeneID_gene"
  names(db_hits)[names(db_hits) == "Sum"] <- "sum_db_hits_miRNAgene"
  print(paste(dim(db_hits)[1], "/", dim(mirna_gene_pairs)[1], "miRNA-gene pairs found in at least one database", sep = " "))
  print("4/4 -- finished database search")

  #####functional analysis?
  #path <- enrichment(data_support = db_hits, org = "hsa", per_time = 1000)

  #####add DE columns
  DE_cols <- c("log-ratio", "P-Value", "P-adjust", "mean_case", "mean_control")
  #add "(miRNA)" at the end of mirna DE columns
  colnames(mirna_DE)[colnames(mirna_DE) %in% DE_cols] <- paste(colnames(mirna_DE)[colnames(mirna_DE) %in% DE_cols], "(miRNA)", sep = "")
  #add mirna DE columns to db_hits
  mirna_DE_copy <- mirna_DE
  mirna_DE_copy$miRNA <- row.names(mirna_DE_copy)
  db_hits <- merge(x = db_hits,
                   y = mirna_DE_copy[, c("miRNA", "log-ratio(miRNA)", "P-Value(miRNA)", "P-adjust(miRNA)", "mean_case(miRNA)", "mean_control(miRNA)")],
                   by = "miRNA",
                   all.x = TRUE)
  #add "(gene)" at the end of mrna DE columns
  colnames(mrna_DE)[colnames(mrna_DE) %in% DE_cols] <- paste(colnames(mrna_DE)[colnames(mrna_DE) %in% DE_cols], "(gene)", sep = "")
  #add mrna DE columns to db_hits
  mrna_DE_copy <- mrna_DE
  mrna_DE_copy$gene <- row.names(mrna_DE_copy)
  db_hits <- merge(x = db_hits,
                   y = mrna_DE_copy[, c("gene", "log-ratio(gene)", "P-Value(gene)", "P-adjust(gene)", "mean_case(gene)", "mean_control(gene)")],
                   by = "gene",
                   all.x = TRUE)

  return(list(mirna_se = mirna_se, mrna_se = mrna_se,
              mirna_DE = mirna_DE, mrna_DE = mrna_DE,
              db_hits = db_hits))
}
th789/ffl documentation built on Nov. 5, 2019, 10:04 a.m.