Nothing
#' @title CpG_summary
#' @description CpG_summary function provides information on genes with CpG islands and GC content. The function checks genes against known CpG islands and provides various plots to assess emerging data features. The user can also specify if the plotting is necessary for location ("location") or protein class ("class"). Only genes with GC data are assessed.
#'
#' @param data Requires a data frame generated by score_genes; class - data frame
#' @param type Requires to specify if plotting is performed for location or class types; default is "class". Alternatively, select "location". Class - string
#' @return multiple plots (class - plots) and a data frame with GC content (class - data frame)
#' @importFrom RCurl getURL
#' @importFrom lattice densityplot
#' @importFrom lattice histogram
#' @importFrom ggExtra ggMarginal
#' @importFrom ggplot2 aes
#' @importFrom ggplot2 theme
#' @ImpportFrom ggplot2 element_text
#' @ImportFrom ggplot2 geom_col
#' @ImportFrom ggplot2 ggplot
#' @import methods
#' @import utils
#' @examples
#' \dontrun{
#' path_to_test_data<- system.file("extdata", "test_data.tabular", package="OmicInt")
#' # basic usage of CpG_summary
#' df<-utils::read.table(path_to_test_data)
#' return_df<-CpG_summary(df)
#' head(return_df)
#' }
#' @export
CpG_summary<-function(data, type="class"){
#plot structures
#access data
#CpG data preparation
cpg_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/CpG_data.csv")
cpg <- utils::read.csv(text = cpg_url)
cpg$region<-paste(paste(cpg$chromosome_name,cpg$start_position,sep=":"),cpg$end_position, sep=":")
cpg$region<-paste("chr",cpg$region,sep = "")
#prepare data frame
data$"CpG"<-ifelse(data$"Symbol"%in%cpg$"hgnc_symbol",cpg$"region","NA")
data$"GC_content"<-ifelse(data$"Symbol"%in%cpg$"hgnc_symbol",cpg$"percentage_gene_gc_content",NA)
#extract features and plot GC
#only GC data containing genes are reported
df<-data[!is.na(data$"GC_content"),]
#Class data preparation
if(type=="class"){
#access data
classes_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/HS_protein_classes_curated.csv")
classes <- utils::read.csv(text = classes_url)
#prepare data frame
data$"Class"<-ifelse(data$"Symbol"%in%classes$"Gene",classes$"Class","NA")
#only GC data containing genes are reported
df$"Class"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Class","NA")
Class<-df$"Class"
}
#Location data preparation
if(type=="location"){
#download the data from curated databases
location_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/Subcellular.locationmerged_protein_data.csv")
location_df <- utils::read.csv(text = location_url)
data$"Location"<-ifelse(data$"Symbol"%in%location_df$"Symbol",location_df$"Subcellular.location","NA")
data$"Location"<-ifelse( is.na(data$"Location"),"NA", data$"Location")
#only GC data containing genes are reported
df$"Location"<-ifelse(df$"Symbol"%in%data$"Symbol",data$"Location","NA")
Location<-df$"Location"
}
#df data preparation to avoid plotting conflicts
gene<-df$"Symbol"
GC<-df$"GC_content"
Interactors<-df$"Interactors"
Association_score<-df$"Association_score"
Specificity_score<-df$"Specificity_score"
LFCscore<-df$"LFCscore"
log2FoldChange<-df$"log2FoldChange"
#prepare color palette
qual_col_pals <- brewer.pal.info[which(brewer.pal.info$"category"%in%c('qual')),] #max number of colours 335, setting for qual gives 74
col_vector <- unlist(mapply(brewer.pal, qual_col_pals$"maxcolors", rownames(qual_col_pals)))
col_vector<-col_vector[1:nlevels(factor(data$"Class"))]
#plot general plot
lattice::histogram(~GC,
type="percent",
xlab="GC%",
main="GC% distribution")
if(type=="class"){
lattice::histogram(~GC|Class,data=df,
type="percent",
xlab="GC%",
main="GC% distribution across protein classes")
p<-lattice::densityplot(~ GC_content, groups = Class, data = df, plot.points = FALSE, auto.key = TRUE, par.settings = list(superpose.line = list(col =col_vector)),main="CpG island gene distributions based on protein class")
methods::show(p)
p1<-ggplot2::ggplot(df)+ggplot2::geom_col(ggplot2::aes(x=gene,y=GC, fill=Class))+ggplot2::theme(axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust=1))
methods::show(p1)
p <- ggplot2::ggplot(df, ggplot2::aes(x = log2FoldChange, y = GC, color=Class)) + ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = LFCscore, y = GC, color=Class)) + ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Specificity_score, y = GC, color=Class)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Association_score, y = GC, color=Class)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Interactors, y = GC, color=Class)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
}
if(type=="location"){
lattice::histogram(~GC|Location,data=df,
type="percent",
xlab="GC%",
main="GC% distribution across protein cellular locations")
p<-lattice::densityplot(~ GC_content, groups = Location, data = df, plot.points = FALSE, auto.key = TRUE, par.settings = list(superpose.line = list(col =col_vector)),main="CpG island gene distributions based on protein location")
methods::show(p)
p <- ggplot2::ggplot(df, ggplot2::aes(x = Interactors, y = GC, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = LFCscore, y = GC, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = log2FoldChange, y = GC, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Association_score, y = GC, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")
p <- ggplot2::ggplot(df, ggplot2::aes(x = Specificity_score, y = GC, color=Location)) +ggplot2::geom_point()
ggExtra::ggMarginal(p, type = "densigram",
size = 3, fill="lightblue")}
return(data)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.