R/class_map.R

Defines functions class_map

Documented in class_map

#' @title class_map
#'
#' @description Function provides visualisation of how the highest and lowest LFCscore  genes cluster based on protein class data. Function requires a data frame generated by the score_genes function.
#'
#' @param  data Requires a  data frame generated by score_genes; class - data frame
#' @param  num a number for genes to cluster from top upregulated and downregulated genes, if not selected all genes will be used; default 0, i.e. do not select a smaller set; class - integer
#' @return dendogram; class - plot
#' @importFrom  dendextend find_k
#' @importFrom  dendextend color_labels
#' @importFrom  dendextend color_branches
#' @importFrom  dendextend colored_bars
#' @import cluster
#' @import RColorBrewer
#' @importFrom graphics legend
#' @import utils
#' @importFrom stats  hclust
#' @importFrom stats as.dendrogram
#' @importFrom RCurl getURL
#' @examples
#' \dontrun{
#' path_to_test_data<- system.file("extdata", "test_data.tabular", package="OmicInt")
#' # basic usage of class_map
#' df<-utils::read.table(path_to_test_data)
#' class_map(df)}
#' @export
class_map<-function(data, num=0){

  #download the data from curated databases
  #access data
  classes_url <- RCurl::getURL("https://gitlab.com/Algorithm379/databases/-/raw/main/HS_protein_classes_curated.csv")
  classes <- utils::read.csv(text = classes_url)

  #prepare data frame
  data$"Class"<-ifelse(data$"Symbol"%in%classes$"Gene",classes$"Class","NA")

  #prepare plot

  df<-data[,c("log2FoldChange","Interactors" ,"LFCscore","Class")]
  rownames(df)<-data$"Symbol"

  if(num!=0){

    if(num>nrow(data)){stop("You selected more genes than your data file contains")}

    if(num*2>nrow(data)){ stop("There is an overlap for top and lowest scoring genes, you may want to cluster the full data instead or supply a lower number, i.e. 2*num < number of genes in your data")


    }

    df<-df[order(df$"LFCscore"),]
    df_down<-df[1:num,]

    df_up<-df[(nrow(df)-num+1):nrow(df),]

    df<-rbind(df_up,df_down)

  }


  # Clusterisation using "LFCscore"
  df_dist<-as.data.frame(df[,c("LFCscore")])
  rownames(df_dist)<-rownames(df)
  dist<-dist(df_dist,method = "euclidean")
  h_clust<- stats::hclust(dist,method = "ward.D2")
  dend<- stats::as.dendrogram(h_clust)

  #prepare colors
  qual_col_pals <- RColorBrewer::brewer.pal.info[which(RColorBrewer::brewer.pal.info$"category"%in%c('qual')),] #max number of colours 335, setting for qual gives 74
  col_vector <- unlist(mapply(RColorBrewer::brewer.pal, qual_col_pals$"maxcolors", rownames(qual_col_pals)))

  colors <- col_vector[1:nlevels(as.factor(data$"Class"))]
  names(colors)<-levels(as.factor(data$"Class"))

  #set condition for df
  col_vector_names<-c()
  for(name in rownames(df)){

    col_class<-data[which(name==data$"Symbol"),"Class"]

    col_vector_names<-c(col_vector_names,colors[col_class])
  }
  df$"Color"<-col_vector_names


  dend_k<-dendextend::find_k(dend)

  dend <- dendextend::color_labels(dend, k = dend_k$nc*2)


  plot(dendextend::color_branches(dend,  h= mean(h_clust$height)*dend_k$nc*2))

  dendextend::colored_bars(colors =as.vector(col_vector_names), dend = dend, rowLabels = "Class",add = TRUE,y_shift = -0.1,y_scale = 0.7)
  graphics::legend("topright", legend = names(colors), fill = as.vector(colors),xpd=TRUE,  cex=0.7, bty='n',inset=c(-0.1, -.25), title="Protein class")
}

Try the OmicInt package in your browser

Any scripts or data that you put into this service are public.

OmicInt documentation built on Oct. 28, 2021, 5:09 p.m.