Nothing
#' @title hydrophobicity_plot
#'
#' @description Function to plot amino acid sequence hydrophobicity profile using Kyte-Doolittle hydrophobicity scale; Reference= J. Mol. Biol. 157=105-132;1982. The Kyte-Doolittle scale is used for detecting hydrophobic regions in proteins. Regions with a positive value are hydrophobic ans those with negative values are hydrophylic. This scale can be used to identify both surface-exposed regions as well as transmembrane regions, depending on the window size used.
#'
#' @param pdb_df Requires a pdb data frame generated by PDB_prepare
#' @param window Size of a window between 3 and 21, default is 21
#' @param weight Relative weight of the window edges compared to the window center in percent; default=100
#' @param model Weight variation model either "linear" or "exponential", if the relative weight at the edges is selected to be < 100 percent; default="linear"
#' @return Scaled line graph
#' @ImportFrom ggplot2 aes
#' @ImportFrom ggplot2 ggplot
#' @ImportFrom ggplot2 geom_line
#' @ImportFrom ggplot2 scale_x_continuous
#' @ImportFrom ggplot2 xlab
#' @ImportFrom ggplot2 ylab
#' @ImportFrom ggplot2 ggtitle
#' @export
#' @examples
#' path_to_processed_PDB<- system.file("extdata", "pdb_df.tabular", package="Fiscore")
#' # basic usage of hydrophobicity_plot
#' pdb_df<-read.table(path_to_processed_PDB)
#' hydrophobicity_plot(pdb_df)
hydrophobicity_plot<-function(pdb_df, window=3, weight=100, model="exponential"){
#Kyte-Doolittle hydrophobicity scale values------
KD_values<-list("ALA"= 1.800,"ARG"= -4.500,"ASN"= -3.500,"ASP"= -3.500, "CYS"= 2.500, "GLN"= -3.500, "GLU"= -3.500,"GLY"= -0.400,"HIS"= -3.200,"ILE"= 4.500, "LEU"= 3.800,"LYS"= -3.900,"MET"= 1.900, "PHE"= 2.800, "PRO"= -1.600,"SER"= -0.800,"THR"= -0.700,"TRP"= -0.900,"TYR"= -1.300, "VAL"= 4.200)
#Warnings------
if(((window<3) || (window>21))||(window%%2==0)) {stop("Please select a window size between values 3 and 21 where the value is odd")}
if((weight<0) || (weight>100)) {stop("Please select weight values as a percentage, e.g. 50 for 50%")}
if(!(model %in%c("exponential","linear"))) {stop("You can only select linear or exponential models, e.g., 'exponential' ")}
#Helper functions--------
#***linear fit-----
linear_fit<-function(value, size){
#value - weight
#size - window size
#function k*x+b=y
k=(100-value)/((size-1)/2) #establish a coefficient
b= value-k #calculate the intercept for linear function
values<-c()
#calculate k>0 values
for(i in 1:((size-1)/2)){
values<-c(values,k*i+b)
}
# add 100%
values<-c(values,100, rev(values))
# add the reverse values to complete the window
return(values)
}
#***linear fit-----
exp_fit<-function(value, size){
#value - weight
#size - window size
#function a*b^x=y
b=(100/value)^(2/(size-1))
a= value/b #calculate the intercept for linear function
values<-c()
#calculate k>0 values
for(i in 1:((size-1)/2)){
values<-c(values,a*b^i)
}
# add 100%
values<-c(values,100, rev(values))
# add the reverse values to complete the window
return(values)
}
MINMAX_normalization_func<-function(array){
#MIN-MAX normalisation based on the input array
#input numeric array
#returns normalised array values
return ((array-min(array))/(max(array)-min(array)))
}
#Extract Protein sequence and build temporary data frame for analysis
df<-pdb_df[,c("df_resno","df_res" , "Type")]
rownames(df)<-1:nrow(df)
df$"Type"<-as.factor(df$"Type")
#Prepare weights
if(model=="exponential"){values<-exp_fit(weight, window) }
if(model=="linear"){ values<-linear_fit(weight, window) }
values<-values/100 #scale values
scores<-c()
weight_dist<-c()
half_window<-(window-1)/2
#process data within sliding window, i.e. amino acids that can be analysed this way
for(i in 1+half_window:(nrow(df)-half_window-1)){
scores_temp<-c()
score<-0
#calculate score for amino acid based on the window
for(j in (i-half_window):(i+half_window)){
aa<-df[j,"df_res"]
scores_temp<-c(scores_temp,KD_values[[aa]])
}
#add weighed scores for amino acid where values provide earlier calculated scores
for(n in 1:length(values)){
score<-score+scores_temp[n]*values[n] }
scores<-c(scores,score)
}
# add missing residues for beggining and end of the protein by just adding the value from the scale without weighing
scores_temp<-c()
for(i in 1:half_window){
aa<-df[i,"df_res"]
scores_temp<-c(scores_temp,KD_values[[aa]])
}
scores<-c(scores_temp,scores)
scores_temp<-c()
for(i in (nrow(df)-half_window+1):nrow(df)){
aa<-df[i,"df_res"]
scores_temp<-c(scores_temp,KD_values[[aa]])
}
scores<-c(scores,scores_temp)
#scale scores
normalised_scores<-MINMAX_normalization_func(scores)
df$"Score"<-normalised_scores
#to avoid namespace conflucts
df_resno_val<-df$"df_resno"
Score_val<-df$"Score"
Type_val<-df$"Type"
#Plot will capture breakage regions with empty
ggplot2::ggplot() +ggplot2::geom_line(data = df,ggplot2::aes(df_resno_val, Score_val, group = 1, color = Type_val))+ggplot2::scale_x_continuous(breaks=seq(min(df$"df_resno"),max(df$"df_resno"),25), limits=c(min(df$"df_resno"),max(df$"df_resno")))+ggplot2::ggtitle(label="Kyte-Doolittle hydrophobicity plot")+ ggplot2::xlab(label = "Residue number")+ggplot2::ylab(label = "Normalised score")
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.