# Copyright (C) 2018 Jochen Weile, Roth Lab
#
# This file is part of MaveVis.
#
# MaveVis is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# MaveVis is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with MaveVis. If not, see <https://www.gnu.org/licenses/>.
options(stringsAsFactors=FALSE)
# source("lib/libpdb.R")
# source("lib/cliargs.R")
# source("lib/trackdrawer.R")
# source("lib/amasLite.R")
#' Query PDB
#'
#' Queries the Protein Data Bank (PDB) for protein structure data for
#' a given database accession and writes the result to file
#'
#' @param pdb.acc the PDB accession to query
#' @param pdb.file the name of the file to which the output will be written.
#' @return the name of the output file.
#' @examples
#' \dontrun{
#' #with a pre-determined output file
#' outfile <- "sumo_conjugase_structure.pdb"
#' query.pdb("3UIP",outfile)
#'
#' #with an autogenerated output file
#' outfile <- query.pdb("3UIP")
#' }
#' @export
query.pdb <- function(pdb.acc,pdb.file=paste0(pdb.acc,".pdb")) {
library("httr")
pdb.url <- paste0("https://files.rcsb.org/download/",pdb.acc,".pdb")
htr <- GET(pdb.url)
if (http_status(htr)$category == "Success") {
writeBin(content(htr, "raw"), pdb.file)
return(pdb.file)
} else {
# logger$fatal("Unable to download PDB file")
stop("Unable to download PDB file")
}
}
#' Query Alphafold
#'
#' Queries the EMBL Alphafold database for predicted protein structure data for
#' a given database accession and writes the result to file
#'
#' @param uniprot.acc the Uniprot accession to query
#' @param pdb.file the name of the file to which the output will be written.
#' @return the name of the output file.
#' @examples
#' \dontrun{
#' #with a pre-determined output file
#' outfile <- "sumo_conjugase_structure.pdb"
#' query.pdb("3UIP",outfile)
#'
#' #with an autogenerated output file
#' outfile <- query.pdb("3UIP")
#' }
#' @export
query.alphafold <- function (uniprot.acc,pdb.file=paste0(uniprot.acc,"_af.pdb")) {
library("httr")
af.url <- paste0("https://alphafold.ebi.ac.uk/files/AF-",uniprot.acc,"-F1-model_v4.pdb")
htr <- GET(af.url)
if (http_status(htr)$category == "Success") {
writeBin(content(htr, "raw"), pdb.file)
return(pdb.file)
} else {
# logger$fatal("Unable to download PDB file")
stop("Unable to download PDB file")
}
}
#' Run FreeSASA on a PDB structure
#'
#' Calculates solvent accessible surface area on a given structure using the
#' external software FreeSASA, which must be installed and available via $PATH.
#'
#' @param pdb.file The PDB file on which to run FreeSASA
#' @return a \code{data.frame} with the output of FreeSASA
#' @examples
#' \dontrun{
#' sasa <- run.sasa("3UIP.pdb")
#' }
#' @export
run.sasa <- function(pdb.file) {
suppressMessages(library("gdata"))
widths <- c(
type=3,res=4,chain=2,pos=4,all.abs=9,all.rel=6,side.abs=7,side.rel=6,
main.abs=7,main.rel=6,nonpolar.abs=7,nonpolar.rel=6,polar.abs=7,polar.rel=6
)
lines <- system(paste("freesasa --format=rsa",pdb.file),intern=TRUE)
lines <- lines[regexpr("^RES",lines) > 0]
con <- textConnection(lines)
sasa.out <- read.fwf(con,widths,strip.white=TRUE)
close(con)
colnames(sasa.out) <- names(widths)
for (i in 4:14) sasa.out[,i] <- as.numeric(sasa.out[,i])
#fix boolean misclassification bug
if (inherits(sasa.out$chain,"logical")) {
sasa.out$chain <- sapply(sasa.out$chain,function(x) ifelse(x,"T","F"))
}
return(sasa.out)
}
#' Run DSSP on a PDB structure
#'
#' Calculates secondary structure information from a PDB file using the
#' external software DSSP, which must be installed and available via $PATH
#'
#' @param pdb.file The PDB file on which to run DSSP
#' @return a \code{data.frame} with the output of DSSP
#' @examples
#' \dontrun{
#' dssp.out <- run.dssp(pdb.file)
#' }
#' @export
run.dssp <- function(pdb.file) {
suppressMessages(library("gdata"))
cols <- c(num=5,residue=5,chain=2,aa=3,struc=3,struc.flags=7,
bp1=5,bp2=4,acc=4,no1=12,on1=11,no2=11,on2=11,tco=8,
kappa=6,alpha=6,phi=6,psi=6,x.ca=7,y.ca=7,z.ca=7)
lines <- system(paste("dssp",pdb.file),intern=TRUE)
lines <- lines[regexpr("^\\s+\\d+\\s+\\d+ \\w{1} \\w{1}",lines) > 0]
con <- textConnection(lines)
dssp.out <- read.fwf(
con,
widths=cols,
stringsAsFactors=FALSE,
strip.white=TRUE
)
close(con)
colnames(dssp.out) <- names(cols)
substr(dssp.out$structure,1,1)
return(dssp.out)
}
#' Generate PDB files for complex components
#'
#' This function finds the component chains of a given PDB structure and generates
#' new PDB files for any desired set of combinations of these components
#'
#' @param pdb.file the input PDB file for the complex
#' @param chain.sets a list of character vectors, detailing the combinations of chains to
#' produce. E.g. \code{list("A",c("A","B"),c("A","C"))} produces one file for
#' chain A, one for the combination of chains A and B, and one for the combination
#' of chains A and C.
#' @return a vector of file names corresponding to the elements of \code{chain.sets}
#' @examples
#' \dontrun{
#' subcplx.files <- sub.complex.combos("3UIP.pdb",chain.sets=list("A",c("A","B"),c("A","C")))
#' }
#' @export
subcomplex.combos <- function(pdb.file,chain.sets) {
lines <- scan(pdb.file,what="character",sep="\n")
atom.lines <- which(grepl("^ATOM|^TER",lines))
model.lines <- which(grepl("^MODEL",lines))
if (length(model.lines) > 0) {
endmdl.lines <- which(grepl("^ENDMDL",lines))
header.lines <- 1:model.lines[[1]]
footer.lines <- tail(endmdl.lines,1):length(lines)
applicable.atom.lines <- atom.lines[atom.lines > model.lines[[1]] & atom.lines < endmdl.lines[[1]]]
} else {
header.lines <- 1:(atom.lines[[1]]-1)
footer.lines <- (tail(atom.lines,1)+1):length(lines)
applicable.atom.lines <- atom.lines
}
applicable.chains <- substr(lines[applicable.atom.lines],22,22)
subfiles <- sapply(chain.sets,function(cset) {
outfile <- tempfile(fileext=".pdb")
con <- file(outfile,open="w")
writeLines(lines[header.lines],con)
writeLines(lines[applicable.atom.lines[applicable.chains %in% cset]],con)
writeLines(lines[footer.lines],con)
close(con)
return(outfile)
})
return(subfiles)
}
#' Calculate structural features
#'
#' Calculates structural features of a protein from a given PDB entry
#'
#' @param acc The PDB or Uniprot accession to use, e.g. "3UIP"
#' @param main.chain The chain identifier in the PDB file that corresponds
#' to the protein of interest. Should be a single uppercase letter, e.g. "A".
#' @param db either 'pdb' or 'alphafold'
#' @param offset position offset for incorrectly annotated pdb files (integer)
#' @param overrideCache if true, ignore cached files and query all data from scratch
#' @return a \code{data.frame} detailing secondary structure, solvent accessibility,
#' and burial in interfaces.
#' @examples
#' \dontrun{
#' sfeats <- calc.strucfeats("3UIP","A")
#' }
#' @export
calc.strucfeats <- function(acc,main.chain,db=c("pdb","alphafold"),offset=0L,overrideCache=FALSE) {
struc.cache.file <- getCacheFile(paste0(acc,":",main.chain,"_features.csv"))
if (file.exists(struc.cache.file) && !overrideCache) {
cat("Using cached features...\n")
burial.all <- read.csv(struc.cache.file)
return(burial.all)
}
library("httr")
set_config(config(ssl_verifypeer = 0L))
#set up cache file for structure
pdb.file <- getCacheFile(paste0(acc,".pdb"))
# pdb.file <- tempfile(fileext=".pdb")
if (!file.exists(pdb.file)) {
switch(match.arg(db,c("pdb","alphafold")),
pdb={
cat("Querying PDB...\n")
query.pdb(acc,pdb.file)
},
alphafold={
cat("Querying PDB...\n")
query.alphafold(acc,pdb.file)
}
)
} else {
cat("Using cached PDB file...\n")
}
cat("Parsing PDB file...\n")
cplx.struc <- new.structure(pdb.file)
chains <- cplx.struc$get.chains()
if (!(main.chain %in% chains)) {
stop("The requested chain ",main.chain," doesn't exist in ",acc)
}
chain.info <- cplx.struc$get.chain.info()
# rownames(chain.info) <- chain.info$chain
chain.names <- sapply(chains,function(ch) with(chain.info,as.character(name[db=="UNP" & chain == ch])))
chain.names <- sub("_HUMAN","",chain.names)
if (length(chains) > 1) {
cat("Splitting protein complex...\n")
#list of chain pairs
other.chains <- setdiff(chains,main.chain)
chain.sets <- c(main.chain,lapply(other.chains,c,main.chain))
#Subdivide into chain pairs
# subfiles <- cplx.struc$subcomplex.combos(main.chain)
subfiles <- subcomplex.combos(pdb.file,chain.sets)
cat("Calculating surface area...\n")
acctables <- mapply(function(cset,subfile) {
#re-order chain ids to original file order
cset <- intersect(chains,cset)
acctable <- run.sasa(subfile)
acctable[acctable$chain==main.chain,]
},cset=chain.sets,subfile=subfiles,SIMPLIFY=FALSE)
cat("Calculating interface areas...\n")
#calculate burial based on accessibities
burial <- cbind(acctables[[1]],do.call(cbind,lapply(2:length(acctables),function(i) {
before <- acctables[[1]][,"all.abs"]
data.frame(
abs.burial=before-acctables[[i]][,"all.abs"],
rel.burial=(before-acctables[[i]][,"all.abs"])/(before+.000001)
)
})))
colnames(burial)[seq(15,ncol(burial),2)] <- paste0("abs.burial.",chain.names[other.chains])
colnames(burial)[seq(16,ncol(burial),2)] <- paste0("rel.burial.",chain.names[other.chains])
} else {
acctable <- run.sasa(pdb.file)
burial <- acctable
}
cat("Calculating secondary structures...\n")
#calculate secondary structure
dssp.out <- run.dssp(pdb.file)
secstruc <- dssp.out[dssp.out$chain==main.chain,c("residue","struc")]
cat("Compiling results...\n")
rownames(burial) <- burial$pos+offset
rownames(secstruc) <- secstruc$residue+offset
allpos <- 1:max(burial$pos)
burial.all <- cbind(burial[as.character(allpos),],secstruc=secstruc[as.character(allpos),"struc"])
rownames(burial.all) <- burial.all$pos <- allpos
write.table(burial.all,struc.cache.file,sep=",")
# cat("Deleting temporary files...\n")
# #clean up temp files
# file.remove(pdb.file)
# file.remove(subfiles)
return(burial.all)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.