Nothing
`read.pqr` <-
function (file, maxlines=-1, multi=FALSE,
rm.insert=FALSE, rm.alt=TRUE, verbose=TRUE) {
if(missing(file)) {
stop("read.pqr: please specify a PQR 'file' for reading")
}
if(!is.numeric(maxlines)) {
stop("read.pqr: 'maxlines' must be numeric")
}
if(!is.logical(multi)) {
stop("read.pqr: 'multi' must be logical TRUE/FALSE")
}
cl <- match.call()
## PDB FORMAT v3.3: colpos, datatype, name, description
atom.format <- matrix(c(6, 'character', "type", # type(ATOM)
5, 'numeric', "eleno", # atom_no
-1, NA, NA, # (blank)
4, 'character', "elety", # atom_ty
1, 'character', "alt", # alt_loc
4, 'character', "resid", # res_na
1, 'character', "chain", # chain_id
4, 'numeric', "resno", # res_no
1, 'character', "insert", # ins_code
-3, NA, NA, # (blank)
8, 'numeric', "x", # x
8, 'numeric', "y", # y
8, 'numeric', "z", # z
8, 'numeric', "o", # o ### 6 for pdb
8, 'numeric', "b", # b ### 6 for pdb
-6, NA, NA, # (blank)
4, 'character', "segid", # seg_id
2, 'character', "elesy", # element symbol
2, 'character', "charge" # atom_charge (should be 'numeric']
), ncol=3, byrow=TRUE,
dimnames = list(c(1:19), c("widths","what","name")) )
trim <- function(s) {
##- Remove leading and trailing spaces from character strings
s <- sub("^ +", "", s)
s <- sub(" +$", "", s)
s[(s=="")]<-""
s
}
split.fields <- function(x) {
##- Split a character string for data.frame fwf reading
## First splits a string 'x' according to 'first' and 'last'
## then re-combines to new string with ";" as separator
x <- trim( substring(x, first, last) )
paste(x,collapse=";")
}
is.character0 <- function(x){length(x)==0 & is.character(x)}
##- Find first and last (substr) positions for each field
widths <- as.numeric(atom.format[,"widths"]) # fixed-width spec
drop.ind <- (widths < 0) # cols to ignore (i.e. -ve)
widths <- abs(widths) # absolute vales for later
st <- c(1, 1 + cumsum( widths ))
first <- st[-length(st)][!drop.ind] # substr start
last <- cumsum( widths )[!drop.ind] # substr end
names(first) = na.omit(atom.format[,"name"])
names(last) = names(first)
##- Read 'n' lines of PDB file
raw.lines <- readLines(file, n = maxlines)
type <- substring(raw.lines, first["type"], last["type"])
##- Check number of END/ENDMDL records
raw.end <- sort(c(which(type == "END"),
which(type == "ENDMDL")))
## Check if we want to store multiple model data
if (length(raw.end) > 1) {
print("PDB has multiple END/ENDMDL records")
if (!multi) {
print("multi=FALSE: taking first record only")
} else {
print("multi=TRUE: 'read.dcd/read.ncdf' will be quicker!")
raw.lines.multi <- raw.lines
type.multi <- type
}
raw.lines <- raw.lines[ (1:raw.end[1]) ]
type <- type[ (1:raw.end[1]) ]
}
##- Check for 'n' smaller than total lines in PDB file
if ( length(raw.end) !=1 ) {
if (length(raw.lines) == maxlines) {
print("You may need to increase 'maxlines'")
print("check you have all data in $atom")
}
}
##- Split input lines by record type
raw.header <- raw.lines[type == "HEADER"]
raw.seqres <- raw.lines[type == "SEQRES"]
raw.helix <- raw.lines[type == "HELIX "]
raw.sheet <- raw.lines[type == "SHEET "]
raw.atom <- raw.lines[type %in% c("ATOM ","HETATM")]
if (verbose) {
if (!is.character0(raw.header)) { cat(" ", raw.header, "\n") }
}
## Edit from Baoqiang Cao <bqcao@ices.utexas.edu> Nov 29, 2009
## Old version:
## seqres <- unlist(strsplit( trim(substring(raw.seqres,19,80))," +"))
## New version
seqres <- unlist(strsplit( trim(substring(raw.seqres,19,80))," +"))
if(!is.null(seqres)) {
seqres.ch <- substring(raw.seqres, 12, 12)
seqres.ln <- substring(raw.seqres, 13, 17)
seqres.in <- ( !duplicated(seqres.ch) )
names(seqres) <- rep(seqres.ch[seqres.in], times=seqres.ln[seqres.in])
}
## End Edit from Baoqiang:
##- Secondary structure
helix <- list(start = as.numeric(substring(raw.helix,22,25)),
end = as.numeric(substring(raw.helix,34,37)),
chain = trim(substring(raw.helix,20,20)),
type = trim(substring(raw.helix,39,40)))
sheet <- list(start = as.numeric(substring(raw.sheet,23,26)),
end = as.numeric(substring(raw.sheet,34,37)),
chain = trim(substring(raw.sheet,22,22)),
sense = trim(substring(raw.sheet,39,40)))
## 2014-04-23
## Update to use single data.frame for ATOM and HETATM records
## file="2RGF"; multi=TRUE;
## file="./4q21.pdb"; maxlines=-1; multi=FALSE;
## rm.insert=FALSE; rm.alt=TRUE; het2atom=FALSE; verbose=TRUE
atom <- read.table(text=sapply(raw.atom, split.fields),
stringsAsFactors=FALSE, sep=";", quote='',
colClasses=unname(atom.format[!drop.ind,"what"]),
col.names=atom.format[!drop.ind,"name"],
comment.char="", na.strings="")
##-- End data.frame update
##- Coordinates only object
###xyz.models <- c(t(atom[,c("x","y","z")]))
xyz.models <- matrix(as.numeric(c(t(atom[,c("x","y","z")]))), nrow=1)
##- Multi-model coordinate extraction
if (length(raw.end) > 1 && multi) {
raw.atom <- raw.lines.multi[ type.multi %in% c("ATOM ","HETATM") ]
if( (length(raw.atom)/length(raw.end)) ==nrow(atom) ){
## Only work with models with the same number of atoms)
tmp.xyz=( rbind( substr(raw.atom, first["x"],last["x"]),
substr(raw.atom, first["y"],last["y"]),
substr(raw.atom, first["z"],last["z"]) ) )
## Extract coords to nrow/frame * ncol/xyz matrix
xyz.models <- matrix( as.numeric(tmp.xyz), ncol=nrow(atom)*3,
nrow=length(raw.end), byrow=TRUE)
rownames(xyz.models) = NULL
} else {
warning(paste("Unequal number of atoms in multi-model records:", file))
}
rm(raw.lines.multi)
}
rm(raw.lines, raw.atom)
##- Possibly remove 'Alt records' (m[,"alt"] != NA)
if (rm.alt) {
if ( sum( !is.na(atom[,"alt"]) ) > 0 ) {
first.alt <- sort( unique(na.omit(atom[,"alt"])) )[1]
cat(paste(" PDB has ALT records, taking",first.alt,"only, rm.alt=TRUE\n"))
alt.inds <- which( (atom[,"alt"] != first.alt) ) # take first alt only
if(length(alt.inds)>0) {
atom <- atom[-alt.inds,]
xyz.models <- xyz.models[ ,-atom2xyz(alt.inds), drop=FALSE ]
}
}
}
##- Possibly remove 'Insert records'
if (rm.insert) {
if ( sum( !is.na(atom[,"insert"]) ) > 0 ) {
cat(" PDB has INSERT records, removing, rm.insert=TRUE\n")
insert.inds <- which(!is.na(atom[,"insert"])) # rm insert positions
atom <- atom[-insert.inds,]
xyz.models <- xyz.models[ ,-atom2xyz(insert.inds), drop=FALSE ]
}
}
##- Vector of Calpha positions
## check for calcium resid and restrict to ATOM records only
# calpha = (atom[,"elety"]=="CA") & (atom[,"resid"]!="CA") & (atom[,"type"]=="ATOM")
output<-list(atom=atom,
#het=atom[atom$type=="HETATM",],
helix=helix,
sheet=sheet,
seqres=seqres,
xyz=as.xyz(xyz.models),
calpha = NULL, call=cl)
class(output) <- c("pdb", "sse")
ca.inds <- atom.select.pdb(output, string="calpha", verbose=FALSE)
output$calpha <- seq(1, nrow(atom)) %in% ca.inds$atom
return(output)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.