Nothing
#' @title miRNA_summary_validated
#' @description miRNA_summary_validated function provides information on genes that have known miRNA regulating them. The function checks genes against known miRNA target database image and provides various plots to assess emerging data features. The user can also specify if the plotting is necessary for location ("location") or protein class ("class").
#'
#' @param data Requires a data frame generated by score_genes. Class - data frame
#' @param type Requires to specify a value for plotting. If plotting is performed for location select "location", alternatively select "class"; default is "class". Class - string
#' @return a data frame with GC content; multiple plots are also plotted summarising the data
#' @importFrom RCurl getURL
#' @importFrom reshape2 melt
#' @importFrom lattice densityplot
#' @importFrom lattice histogram
#' @importFrom ggExtra ggMarginal
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 theme
#' @ImpportFrom ggplot2 element_text
#' @ImportFrom ggplot2 geom_col
#' @ImportFrom ggplot2 ggplot
#' @import methods
#' @import utils
#' @examples
#' \dontrun{
#' path_to_test_data<- system.file("extdata", "test_data.tabular", package="OmicInt")
#' # basic usage of miRNA_summary_validated
#' df<-utils::read.table(path_to_test_data)
#' return_df<-miRNA_summary_validated(df)
#' head(return_df)
#' }
#' @export
miRNA_summary_validated<-function(data, type="class"){
#plot structures
#access data
#miRNA data preparation
miRNA_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/miRNA_df_validated.csv")
miRNA <- utils::read.csv(text = miRNA_url)
#prepare data frame
miRNA_list<-list()
miRNA_counts<-list()
#only miRNA data containing genes are reported
for(gene in data$"Symbol"){
if(gene%in%miRNA$"Target_Symbol"){
miRNA_list[gene]<-list(miRNA[which(gene==miRNA$"Target_Symbol"),"mature_miRNA"] )
miRNA_counts[gene]<-length(miRNA_list[[gene]])
}
}
df<-reshape2::melt(miRNA_counts)
colnames(df)<-c("Number_of_miRNAs","Symbol")
#class data preparation
if(type=="class"){
#access data
classes_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/HS_protein_classes_curated.csv")
classes <- utils::read.csv(text = classes_url)
#prepare data frame
data$"Class"<-ifelse(data$"Symbol"%in%classes$"Gene",classes$"Class","NA")
#only miRNA data containing genes are reported
df$"Class"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Class","NA")
Class<-df$"Class"
}
#location data preparation
if(type=="location"){
#download the data from curated databases
location_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/Subcellular.locationmerged_protein_data.csv")
location_df <- utils::read.csv(text = location_url)
data$"Location"<-ifelse(data$"Symbol"%in%location_df$"Symbol",location_df$"Subcellular.location","NA")
data$"Location"<-ifelse( is.na(data$"Location"),"NA", data$"Location")
#only miRNA data containing genes are reported
df$"Location"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Location","NA")
Location<-df$"Location"
}
#df data preparation to avoid plotting conflicts
gene<-df$"Symbol"
df$"Interactors"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Interactors","NA")
df$"Association_score"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Association_score","NA")
df$"Specificity_score"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Specificity_score","NA")
df$"LFCscore"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"LFCscore","NA")
df$"log2FoldChange"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"log2FoldChange","NA")
Number_of_miRNAs<-df$"Number_of_miRNAs"
Interactors<-df$"Interactors"
Association_score<-df$"Association_score"
Specificity_score<-df$"Specificity_score"
LFCscore<-df$"LFCscore"
log2FoldChange<-df$"log2FoldChange"
#prepare color palette
qual_col_pals <- brewer.pal.info[which(brewer.pal.info$"category"%in%c('qual')),] #max number of colours 335, setting for qual gives 74
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$"maxcolors", rownames(qual_col_pals)))
col_vector<-col_vector[1:nlevels(factor(data$"Class"))]
#plot general plot
lattice::histogram(~Number_of_miRNAs,
type="percent",
xlab="Number of known miRNAs",
main="miRNA % distribution")
if(type=="class"){
lattice::histogram(~Number_of_miRNAs|Class,data=df,
type="percent",
xlab="Number of known miRNAs",
main="miRNA number distribution across protein classes")
p<-lattice::densityplot(~ Number_of_miRNAs, groups = Class, data = df, plot.points = FALSE, auto.key = TRUE, par.settings = list(superpose.line = list(col =col_vector)),main=paste(nlevels(factor(data$"Class")),"miRNA target gene distributions based on protein class"))
methods::show(p)
p1<-ggplot2::ggplot(df)+ggplot2::geom_col(ggplot2::aes(x=gene,y=Number_of_miRNAs, fill=Class))+ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1))
methods::show(p1)
p <- ggplot2::ggplot(df, ggplot2::aes(x = log2FoldChange, y = Number_of_miRNAs, color=Class)) + ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = LFCscore, y = Number_of_miRNAs, color=Class)) + ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Specificity_score, y = Number_of_miRNAs, color=Class)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Association_score, y = Number_of_miRNAs, color=Class)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Interactors, y = Number_of_miRNAs, color=Class)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
}
if(type=="location"){
lattice::histogram(~Number_of_miRNAs|Location,data=df,
type="percent",
xlab="Number of known miRNAs",
main="miRNA number distribution across protein cellular locations")
p<-lattice::densityplot(~ Number_of_miRNAs, groups = Location, data = df, plot.points = FALSE, auto.key = TRUE, par.settings = list(superpose.line = list(col =col_vector)),main=paste(nlevels(factor(data$"Location")),"miRNA target gene distributions based on protein location"))
methods::show(p)
p <- ggplot2::ggplot(df, ggplot2::aes(x = Interactors, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = LFCscore, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = log2FoldChange, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Association_score, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Specificity_score, y = Number_of_miRNAs, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")}
return(df)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.