R/miRNA_summary_validated.R

Defines functions miRNA_summary_validated

Documented in miRNA_summary_validated

#' @title miRNA_summary_validated

#' @description miRNA_summary_validated function provides information on genes that have known miRNA regulating them. The function checks  genes against known miRNA target database image and provides various plots to assess emerging data features. The user can also specify if the plotting is necessary for location ("location") or protein class ("class").
#'
#' @param data Requires a data frame generated by score_genes. Class - data frame
#' @param type Requires to specify a value for plotting. If plotting is performed for location select "location", alternatively select "class"; default is "class". Class - string
#' @return  a data frame with GC content; multiple plots are also plotted summarising the data
#' @importFrom RCurl getURL
#' @importFrom reshape2 melt
#' @importFrom lattice densityplot
#' @importFrom lattice histogram
#' @importFrom ggExtra ggMarginal
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 theme
#' @ImpportFrom ggplot2 element_text
#' @ImportFrom ggplot2 geom_col
#' @ImportFrom ggplot2 ggplot
#' @import methods
#' @import utils
#' @examples
#' \dontrun{
#' path_to_test_data<- system.file("extdata", "test_data.tabular", package="OmicInt")
#' # basic usage of miRNA_summary_validated
#' df<-utils::read.table(path_to_test_data)
#' return_df<-miRNA_summary_validated(df)
#' head(return_df)
#' }
#' @export
miRNA_summary_validated<-function(data, type="class"){

  #plot structures
  #access data

  #miRNA data preparation
  miRNA_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/miRNA_df_validated.csv")
  miRNA <- utils::read.csv(text = miRNA_url)

  #prepare data frame
  miRNA_list<-list()
  miRNA_counts<-list()
  #only miRNA data containing genes are reported
  for(gene in data$"Symbol"){

    if(gene%in%miRNA$"Target_Symbol"){

      miRNA_list[gene]<-list(miRNA[which(gene==miRNA$"Target_Symbol"),"mature_miRNA"] )
      miRNA_counts[gene]<-length(miRNA_list[[gene]])
    }

  }

  df<-reshape2::melt(miRNA_counts)
  colnames(df)<-c("Number_of_miRNAs","Symbol")

  #class data preparation
  if(type=="class"){
    #access data

    classes_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/HS_protein_classes_curated.csv")
    classes <- utils::read.csv(text = classes_url)

    #prepare data frame
    data$"Class"<-ifelse(data$"Symbol"%in%classes$"Gene",classes$"Class","NA")
    #only miRNA data containing genes are reported
    df$"Class"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Class","NA")
    Class<-df$"Class"
  }
  #location data preparation
  if(type=="location"){

    #download the data from curated databases
    location_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/Subcellular.locationmerged_protein_data.csv")
    location_df <- utils::read.csv(text = location_url)


    data$"Location"<-ifelse(data$"Symbol"%in%location_df$"Symbol",location_df$"Subcellular.location","NA")
    data$"Location"<-ifelse( is.na(data$"Location"),"NA", data$"Location")
    #only miRNA data containing genes are reported
    df$"Location"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Location","NA")
    Location<-df$"Location"
  }




  #df data preparation to avoid plotting conflicts

  gene<-df$"Symbol"

  df$"Interactors"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Interactors","NA")
  df$"Association_score"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Association_score","NA")
  df$"Specificity_score"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Specificity_score","NA")
  df$"LFCscore"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"LFCscore","NA")
  df$"log2FoldChange"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"log2FoldChange","NA")

  Number_of_miRNAs<-df$"Number_of_miRNAs"
  Interactors<-df$"Interactors"
  Association_score<-df$"Association_score"
  Specificity_score<-df$"Specificity_score"
  LFCscore<-df$"LFCscore"
  log2FoldChange<-df$"log2FoldChange"


  #prepare color palette
  qual_col_pals <- brewer.pal.info[which(brewer.pal.info$"category"%in%c('qual')),] #max number of colours 335, setting for qual gives 74
  col_vector <- unlist(mapply(brewer.pal, qual_col_pals$"maxcolors", rownames(qual_col_pals)))
  col_vector<-col_vector[1:nlevels(factor(data$"Class"))]

  #plot general plot
  lattice::histogram(~Number_of_miRNAs,
                     type="percent",
                     xlab="Number of known miRNAs",
                     main="miRNA % distribution")



  if(type=="class"){

    lattice::histogram(~Number_of_miRNAs|Class,data=df,
                       type="percent",
                       xlab="Number of known miRNAs",
                       main="miRNA number distribution across protein classes")

    p<-lattice::densityplot(~ Number_of_miRNAs, groups = Class, data = df, plot.points = FALSE, auto.key = TRUE, par.settings = list(superpose.line = list(col =col_vector)),main=paste(nlevels(factor(data$"Class")),"miRNA target gene distributions based on protein class"))
    methods::show(p)



    p1<-ggplot2::ggplot(df)+ggplot2::geom_col(ggplot2::aes(x=gene,y=Number_of_miRNAs, fill=Class))+ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1))
    methods::show(p1)

    p <- ggplot2::ggplot(df, ggplot2::aes(x = log2FoldChange, y = Number_of_miRNAs, color=Class)) +  ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")

    p <- ggplot2::ggplot(df, ggplot2::aes(x = LFCscore, y = Number_of_miRNAs, color=Class)) +  ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")

    p <- ggplot2::ggplot(df, ggplot2::aes(x = Specificity_score, y = Number_of_miRNAs, color=Class)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")

    p <- ggplot2::ggplot(df, ggplot2::aes(x = Association_score, y = Number_of_miRNAs, color=Class)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")

    p <- ggplot2::ggplot(df, ggplot2::aes(x = Interactors, y = Number_of_miRNAs, color=Class)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")

  }

  if(type=="location"){


    lattice::histogram(~Number_of_miRNAs|Location,data=df,
                       type="percent",
                       xlab="Number of known miRNAs",
                       main="miRNA number distribution across protein cellular locations")

    p<-lattice::densityplot(~ Number_of_miRNAs, groups = Location, data = df, plot.points = FALSE, auto.key = TRUE, par.settings = list(superpose.line = list(col =col_vector)),main=paste(nlevels(factor(data$"Location")),"miRNA target gene distributions based on protein location"))
    methods::show(p)

    p <- ggplot2::ggplot(df, ggplot2::aes(x = Interactors, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")
    p <- ggplot2::ggplot(df, ggplot2::aes(x = LFCscore, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")

    p <- ggplot2::ggplot(df, ggplot2::aes(x = log2FoldChange, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")
    p <- ggplot2::ggplot(df, ggplot2::aes(x = Association_score, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")
    p <- ggplot2::ggplot(df, ggplot2::aes(x = Specificity_score, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
    ggExtra::ggMarginal(p, type = "densigram",
                        size = 3, fill="lightblue")}

  return(df)
}

Try the OmicInt package in your browser

Any scripts or data that you put into this service are public.

OmicInt documentation built on Oct. 28, 2021, 5:09 p.m.