R/plotAPCG.R

Defines functions plotAPCG

Documented in plotAPCG

#' Plot All Protein Composition Graph
#'
#' A function that plots a comprehensive composition mapping given a protein list.
#' The composition descriptors includes hydrophobicity, van der Waals volume,
#' polarity, polarizability and desolvation. A hierarchical row and column clustering
#' will also be displayed on the side to showing the similarity analysis
#'
#' @param file input csv file containing the list of pdb id and sequence
#'
#' @return
#' This function returns a matrix of composition descriptor infomation as well as
#' a full composition graph given the proteins.
#'
#' @examples
#' composition_plot <- plotAPCG(file = "proSeq")
#' composition_plot
#'
#' @references
#'Xiao, N., Cao, D.-S., Zhu, M.-F., & Xu, Q.-S. (2015). protr/ProtrWeb: R package and web server for
#'generating various numerical representation schemes of protein sequences. Bioinformatics, 31(11), 1857–1859.
#'https://doi.org/10.1093/bioinformatics/btv042
#'
#'
#' @export
#' @import protr
#' @import gplots
#' @import utils
plotAPCG <- function(file = "proSeq") {
  library(protr)
  library(gplots)
  library(utils)

  #Validate file path
  if(!(file_test("-f",file)) & (file != "proSeq")){
    stop("Invalid file path.")
  }

  #load the default data
  if (file == "proSeq"){
    protein_sequence = data.frame(lapply(proSeq, as.character), stringsAsFactors=FALSE)
  #load data from validated file path
  }else{
    raw_sequence = read.csv(filepath,header = TRUE,sep = ",")
    protein_sequence = data.frame(lapply(raw_sequence, as.character), stringsAsFactors=FALSE)
  }
  #five attribute for analysis
  colname = c("hydrophobicity","VWF Volume","polarity", "polarizability","desolvation")
  #the full info of five attributes
  five_attr_info = NULL
  #loop over all proteins to calculate the average of each attributes
  for(i in seq_along(protein_sequence[,1]) ) {
    #pdbid
    pdb = protein_sequence[i,1]
    #sequence
    pseq = protein_sequence[i,2]
    #protein sequence composition data generated by extractDTDC()
    composition_data = extractCTDC (pseq)
    # Compute average of 5 categories: hydrophobiciy, van der Waals volume, polarity
    #polarizability,desolvation.
    # The classification and calculation is based on the Xiao's article on the three-group classification of the 20 amino acids
    # by each attribute.
    hydro = composition_data[[3]]
    vdw = (composition_data[[4]]*2.78+composition_data[[5]]*4+composition_data[[6]]*8.08)/3
    pol= (composition_data[[7]]*6.2+composition_data[[8]]*9.2+composition_data[[9]]*13.0)/3
    polarizability = (composition_data[[10]]*1.08+composition_data[[11]]*0.186+composition_data[[12]]*0.409)/3
    desol = composition_data[[19]]

    #initiation
    if (is.null(five_attr_info)){
      five_attr_info = matrix(c(hydro,vdw, pol, polarizability, desol), nrow = 1,ncol= 5, dimnames = list(pdb, colname))
    #add new data
    }else{
      subm = matrix(c(hydro,vdw, pol, polarizability, desol), nrow = 1,ncol= 5, dimnames = list(pdb, colname))
      five_attr_info = rbind(five_attr_info,subm)
    }

  }
  #construct dataframe
  dataf = as.data.frame(five_attr_info)
  #Heatmap of the result
  mapping_result = heatmap.2(scale(five_attr_info), cexCol = 1.2, margins = c(7.5, 4))

  return(mapping_result)

}

#Reference
#Xiao, N., Cao, D.-S., Zhu, M.-F., & Xu, Q.-S. (2015). protr/ProtrWeb: R package and web server for
#generating various numerical representation schemes of protein sequences. Bioinformatics, 31(11), 1857–1859.
#https://doi.org/10.1093/bioinformatics/btv042
dxjasmine/Rppsc documentation built on Dec. 8, 2019, 11:40 a.m.