#' Plot All Protein Composition Graph
#'
#' A function that plots a comprehensive composition mapping given a protein list.
#' The composition descriptors includes hydrophobicity, van der Waals volume,
#' polarity, polarizability and desolvation. A hierarchical row and column clustering
#' will also be displayed on the side to showing the similarity analysis
#'
#' @param file input csv file containing the list of pdb id and sequence
#'
#' @return
#' This function returns a matrix of composition descriptor infomation as well as
#' a full composition graph given the proteins.
#'
#' @examples
#' composition_plot <- plotAPCG(file = "proSeq")
#' composition_plot
#'
#' @references
#'Xiao, N., Cao, D.-S., Zhu, M.-F., & Xu, Q.-S. (2015). protr/ProtrWeb: R package and web server for
#'generating various numerical representation schemes of protein sequences. Bioinformatics, 31(11), 1857–1859.
#'https://doi.org/10.1093/bioinformatics/btv042
#'
#'
#' @export
#' @import protr
#' @import gplots
#' @import utils
plotAPCG <- function(file = "proSeq") {
library(protr)
library(gplots)
library(utils)
#Validate file path
if(!(file_test("-f",file)) & (file != "proSeq")){
stop("Invalid file path.")
}
#load the default data
if (file == "proSeq"){
protein_sequence = data.frame(lapply(proSeq, as.character), stringsAsFactors=FALSE)
#load data from validated file path
}else{
raw_sequence = read.csv(filepath,header = TRUE,sep = ",")
protein_sequence = data.frame(lapply(raw_sequence, as.character), stringsAsFactors=FALSE)
}
#five attribute for analysis
colname = c("hydrophobicity","VWF Volume","polarity", "polarizability","desolvation")
#the full info of five attributes
five_attr_info = NULL
#loop over all proteins to calculate the average of each attributes
for(i in seq_along(protein_sequence[,1]) ) {
#pdbid
pdb = protein_sequence[i,1]
#sequence
pseq = protein_sequence[i,2]
#protein sequence composition data generated by extractDTDC()
composition_data = extractCTDC (pseq)
# Compute average of 5 categories: hydrophobiciy, van der Waals volume, polarity
#polarizability,desolvation.
# The classification and calculation is based on the Xiao's article on the three-group classification of the 20 amino acids
# by each attribute.
hydro = composition_data[[3]]
vdw = (composition_data[[4]]*2.78+composition_data[[5]]*4+composition_data[[6]]*8.08)/3
pol= (composition_data[[7]]*6.2+composition_data[[8]]*9.2+composition_data[[9]]*13.0)/3
polarizability = (composition_data[[10]]*1.08+composition_data[[11]]*0.186+composition_data[[12]]*0.409)/3
desol = composition_data[[19]]
#initiation
if (is.null(five_attr_info)){
five_attr_info = matrix(c(hydro,vdw, pol, polarizability, desol), nrow = 1,ncol= 5, dimnames = list(pdb, colname))
#add new data
}else{
subm = matrix(c(hydro,vdw, pol, polarizability, desol), nrow = 1,ncol= 5, dimnames = list(pdb, colname))
five_attr_info = rbind(five_attr_info,subm)
}
}
#construct dataframe
dataf = as.data.frame(five_attr_info)
#Heatmap of the result
mapping_result = heatmap.2(scale(five_attr_info), cexCol = 1.2, margins = c(7.5, 4))
return(mapping_result)
}
#Reference
#Xiao, N., Cao, D.-S., Zhu, M.-F., & Xu, Q.-S. (2015). protr/ProtrWeb: R package and web server for
#generating various numerical representation schemes of protein sequences. Bioinformatics, 31(11), 1857–1859.
#https://doi.org/10.1093/bioinformatics/btv042
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.