Nothing
#' @title Generating an Rmoleculeset from an SDF file
#'
#' @description This function uses the ChemmineR package to read an SDF file
#' and converts it into an \code{Rmoleculeset} that can be used as input
#' for the kernel functions \code{sd2gram}, \code{sd2gramSpectrum}, ...,
#' \code{sd2gram3Dpharma}.
#'
#' @param sdfFileName The name of the SDF file containing the molecules.
#'
#' @param detectArom If the molecules in the SDF file have no annotated
#' aromatic bonds, the ChemmineR function \code{rings} is used for detecting
#' aromaticity. Default = TRUE.
#'
#' @param bound Detection of aromaticity can be time consuming if the
#' molecules are large. Detection is only done if the number of atoms is below
#' the given number. Default = Inf.
#'
#' @param type Experimental parameter to switch between to types of the
#' function. Default = 2.
#'
#' @return An instance of Rmoleculeset.
#'
#' @author Guenter Klambauer <rchemcpp@@bioinf.jku.at>
#' @export
readRmoleculeset <- function(sdfFileName, detectArom=TRUE, bound=Inf, type=2){
x <- new(Rmoleculeset)
sdf <- ChemmineR::read.SDFset(sdfFileName)
valids <- ChemmineR::validSDF(sdf)
if (!(all(valids))){
warnings("Found invalid entries in the sdf file. Suggested to remove.")
#sdf <- sdf[which(valids)]
}
molNames <- ChemmineR::sdfid(sdf)
if (type==1){
for (i in 1:length(sdf)){
mol <- sdf[[i]]
aB <- atomblock(mol)
bB <- bondblock(mol)
atoms <- sapply(strsplit(rownames(aB),"_"),.subset2,1)
#print(atoms)
k <- length(atoms)
if (detectArom & k <= bound){
ringS <- ChemmineR::rings(mol,arom=TRUE)
if (is.null(ringS$RINGS)){
aromaticIdx <- rep(FALSE,k)
} else {
aromaticIdx <- (1:k) %in% as.integer(unlist(sapply(
ringS$RINGS[which(ringS$AROMATIC)],
function(x){
sapply(strsplit(x,"_"),.subset2,2)
})))
}
aIdx <- which(aromaticIdx)
bB[which((bB[,1] %in% aIdx) & (bB[,2] %in% aIdx)),3] <- 4
}
#browser()
m <- new(Rmolecule)
sapply(atoms,function(kk) m$addAtom(kk))
bB[,1:2] <- bB[,1:2]-1
#bB[which(bB[,3]==4),3] <- 3
#browser()
apply(bB,1, function(x) m$linkAtoms(x[1],x[2],x[3]))
x$addMoleculeCopy(m)
}
} else {
if (detectArom){
for (i in 1:length(sdf)){
mol <- sdf[[i]]
aB <- atomblock(mol)
bB <- bondblock(mol)
mode(bB) <- "integer"
atoms <- sapply(strsplit(rownames(aB),"_"),.subset2,1)
k <- length(atoms)
if (k <=bound){
ringS <- ChemmineR::rings(mol,arom=TRUE)
if (is.null(ringS$RINGS)){
aromaticIdx <- rep(FALSE,k)
} else {
aromaticIdx <- (1:k) %in% as.integer(unlist(sapply(
ringS$RINGS[which(ringS$AROMATIC)],
function(x){
sapply(strsplit(x,"_"),.subset2,2)
})))
}
aIdx <- which(aromaticIdx)
bB[which((bB[,1] %in% aIdx) & (bB[,2] %in% aIdx)),3] <- 4
mol@bondblock <- bB
}
sdf[[i]] <- mol
}
}
# make 7 columns for bondblocks
bb <- ChemmineR::bondblock(sdf)
bbrep <- lapply(bb, function(b){
if (ncol(b)>7){
b <- b[,1:7,drop=FALSE]
} else if (ncol(b) <7){
b <- cbind(b,matrix(0,nrow(b),7-ncol(b)))
}
return(as.matrix(b))
})
ChemmineR::bondblock(sdf) <- bbrep
fileId <- tempfile(fileext='.sdf')
sdf2strWrite(sdf,fileId,cid=TRUE)
x$addSD(fileId,TRUE)
unlink(fileId)
}
ll <- list(x,sdf,molNames)
names(ll) <- c("Rmoleculeset","SDFset","moleculeNames")
return(ll)
#return(x)
}
#' @title Converting SDFset objects to Rmoleculesets.
#'
#' @param SDFset The SDFset object of ChemmineR.
#' @param detectArom Flag that determines, whether aromatic structures should
#' be detected. Default = TRUE.
#' @param bound Detection of aromaticity can be time consuming if the
#' molecules are large. Detection is only done if the number of atoms is below
#' the given number. Default = Inf.
#'
#' @return An instance of Rmoleculeset
#'
#' @noRd
#'
#' @author Guenter Klambauer
SDFsetToRmoleculeset <- function(SDFset, detectArom=TRUE, bound=70, type=2){
x <- new(Rmoleculeset)
valids <- ChemmineR::validSDF(SDFset)
if (!(all(valids))){
warnings("Found invalid entries in the sdf file. Suggested to remove.")
#SDFset <- SDFset[which(valids)]
}
molNames <- ChemmineR::sdfid(SDFset)
if (type==1){
for (i in 1:length(SDFset)){
mol <- SDFset[[i]]
aB <- atomblock(mol)
bB <- bondblock(mol)
mode(bB) <- "integer"
atoms <- sapply(strsplit(rownames(aB),"_"),.subset2,1)
k <- length(atoms)
if (detectArom & k<=bound){
ringS <- ChemmineR::rings(mol,arom=TRUE)
if (is.null(ringS$RINGS)){
aromaticIdx <- rep(FALSE,k)
} else {
aromaticIdx <- (1:k) %in% as.integer(unlist(sapply(
ringS$RINGS[which(ringS$AROMATIC)],
function(x){
sapply(strsplit(x,"_"),.subset2,2)
})))
}
aIdx <- which(aromaticIdx)
bB[which((bB[,1] %in% aIdx) & (bB[,2] %in% aIdx)),3] <- 4
}
#browser()
m <- new(Rmolecule)
sapply(atoms,function(kk) m$addAtom(kk))
bB[,1:2] <- bB[,1:2]-1
#bB[which(bB[,3]==4),3] <- 3
apply(bB,1, function(x) m$linkAtoms(as.integer(x[1]),
as.integer(x[2]),as.integer(x[3])))
x$addMoleculeCopy(m)
}
} else {
if (detectArom){
for (i in 1:length(SDFset)){
mol <- SDFset[[i]]
aB <- atomblock(mol)
bB <- bondblock(mol)
mode(bB) <- "integer"
atoms <- sapply(strsplit(rownames(aB),"_"),.subset2,1)
k <- length(atoms)
if (k <= bound){
ringS <- ChemmineR::rings(mol,arom=TRUE)
if (is.null(ringS$RINGS)){
aromaticIdx <- rep(FALSE,k)
} else {
aromaticIdx <- (1:k) %in% as.integer(unlist(sapply(
ringS$RINGS[which(ringS$AROMATIC)],
function(x){
sapply(strsplit(x,"_"),.subset2,2)
})))
}
aIdx <- which(aromaticIdx)
bB[which((bB[,1] %in% aIdx) & (bB[,2] %in% aIdx)),3] <- 4
mol@bondblock <- bB
}
SDFset[[i]] <- mol
}
}
# make 7 columns for bondblocks
bb <- ChemmineR::bondblock(SDFset)
bbrep <- lapply(bb, function(b){
if (ncol(b)>7){
b <- b[,1:7,drop=FALSE]
} else if (ncol(b) <7){
b <- cbind(b,matrix(0,nrow(b),7-ncol(b)))
}
return(as.matrix(b))
})
ChemmineR::bondblock(SDFset) <- bbrep
fileId <- tempfile(fileext='.sdf')
sdf2strWrite(SDFset,fileId,cid=TRUE)
x$addSD(fileId,TRUE)
unlink(fileId)
}
return(list(x,SDFset,molNames))
}
#' @title createRMolecule
#'
#' @description Creates an \"Rmolecule\" from an atom-vector and a bond-matrix
#'
#' @usage createRMolecule(atoms, bonds)
#'
#' @param atoms A vector containing the symbol names of all atoms in the molecule
#' @param bonds A matrix with the same number of rows and columns as the atoms-vector
#' containing the type of bonds between the atoms
#' @return an instance of "molecule"
#' @author Michael Mahr <rchemcpp@@bioinf.jku.at>
#'
#' @examples
#' m <- createRMolecule(c("C","C"),matrix(c(0,3,3,0),nrow=2))
#' @export
createRMolecule = function(atoms, bonds)
{
if(!is.vector(atoms)) stop("atoms must be vector")
if(!is.matrix(bonds)) stop("bonds must be matrix")
bonds = apply(bonds,1,as.integer)
if (length(atoms) != nrow(bonds)) stop("matrix must have as many rows as vector")
if (length(atoms) != ncol(bonds)) stop("matrix must have as many cols as vector has rows")
if (! isSymmetric(bonds) ) stop("matrix must be symmetric")
if (! all(diag(bonds) == 0)) stop("cannot connect atoms with themselves")
if ( (max(bonds) > 3) || (min(bonds) < 0 ) ) stop("Bond types can only range from 1 to 3")
m = new(Rchemcpp::Rmolecule)
for (k in atoms)
{
m$addAtom(k)
}
for (i in 1:length(atoms)){
for (j in 1:length(atoms)){
if ((i < j) && (bonds[i,j] != 0)){
m$linkAtoms(i-1,j-1,bonds[i,j]);
}
}
}
return (m)
}
### The following functions are taken from the ChemmineR package by Thomas Girke ###
sdf2strWrite <- function(sdf, file, cid=TRUE, ...) {
## Checks
if(class(sdf)!="SDFset") stop("Function expects molecule object of class SDF as input!")
if(cid==TRUE) { sdflist <- lapply(cid(sdf), function(x) sdf2str(sdf=sdf[[x]], cid=x, ...)) }
if(cid==FALSE) { sdflist <- lapply(cid(sdf), function(x) sdf2str(sdf=sdf[[x]], ...)) }
cat(unlist(sdflist), sep="\n", file=file)
}
sdf2str <- function (sdf, file, head, ab, bb, db, cid=NULL, sig=FALSE, ...){
if(missing(head)) {
head <- as.character(sdf[[1]])
if(sig==TRUE) head[2] <- paste("ChemmineR-", format(Sys.time(), "%m%d%y%H%M"), "XD", sep="")
if(length(cid)==1) head[1] <- cid
}
## Atom block
if(missing(ab)) {
ab <- sdf[[2]]
ab <- cbind(Indent="", format(ab[,1:3], width=9, justify="right"),
A=format(gsub("_.*", "", rownames(ab)), width=1, justify="left"),
Space="", format(ab[,-c(1:3)], width=2, justify="right"))
ab <- sapply(seq(along=ab[,1]), function(x) paste(ab[x, ], collapse=" "))
}
## Bond block
if(missing(bb)) {
bb <- sdf[[3]]
bb <- cbind(Indent="", format(bb, width=3, justify="right"))
bb <- sapply(seq(along=bb[,1]), function(x) paste(bb[x, ], collapse=""))
}
## Data block
if(missing(db)) {
db <- sdf[[4]]
if(length(db)>0) {
dbnames <- paste("> <", names(db), ">", sep="")
dbvalues <- as.character(db)
db <- as.vector(rbind(dbnames, dbvalues, ""))
} else {
db <- NULL
}
}
## Assemble in character vector
sdfstrvec <- c(head, ab, bb, "M END", db, "$$$$")
return(sdfstrvec)
}
plotStruc <- function(sdf, atomcex=1.2, atomnum=FALSE, no_print_atoms=c("C"), noHbonds=TRUE, bondspacer=0.12, colbonds=NULL, bondcol="red", ...) {
toplot <- list(atomblock=cbind(atomblock(sdf)[,c(1:2)], as.matrix(bonds(sdf, type="bonds")[,-1])), bondblock=cbind(as.matrix(as.data.frame(bondblock(sdf))[,1:3]), bondcol=1))
## Add bond color
toplot[[2]][, "bondcol"] <- toplot[[2]][,"bondcol"] + as.numeric((toplot[[2]][,"C1"] %in% colbonds) & (toplot[[2]][,"C2"] %in% colbonds))
## Create empty plot with proper dimensions
plot(toplot[[1]], type="n", axes=F, xlab="", ylab="", ...)
## Remove C-hydrogens including their bonds
if(noHbonds==TRUE) {
nonbonded <- !1:length(toplot[[1]][,1]) %in% sort(unique(as.vector(toplot[[2]][,1:2])))
nonbonded <- as.data.frame(toplot[[1]])[nonbonded,]
CHbondindex <- sapply(seq(toplot[[2]][,1]), function(x) paste(sort(gsub("_.*", "", rownames(toplot[[1]]))[toplot[[2]][x,1:2]]), collapse="") == "CH")
toplot[[1]] <- toplot[[1]][sort(unique(as.numeric(toplot[[2]][!CHbondindex,1:2]))), ]
toplot[[2]] <- as.matrix(as.data.frame(toplot[[2]])[!CHbondindex,])
toplot[[1]] <- as.matrix(rbind(toplot[[1]], nonbonded))
}
## Plot bonds
z <- toplot[[2]][, "bondcol"]; z[z==2] <- bondcol # Stores bond coloring data
for(i in seq(along=toplot[[2]][,1])) {
x <- toplot[[1]][gsub("*.*_", "", rownames(toplot[[1]])) %in% toplot[[2]][i,1:2],1]
y <- toplot[[1]][gsub("*.*_", "", rownames(toplot[[1]])) %in% toplot[[2]][i,1:2],2]
## Plot single bonds
if(toplot[[2]][i,3]==1) {
lines(x=x, y=y, lty=1, lwd=3, col=z[i])
}
## Plot double bonds
if(toplot[[2]][i,3]==2) {
rslope <- (atan(diff(y)/diff(x))*180/pi)/90
lines(x=x-rslope*bondspacer, y=y+(1-abs(rslope))*bondspacer, lty=1, lwd=3, col=z[i])
lines(x=x+rslope*bondspacer, y=y-(1-abs(rslope))*bondspacer, lty=1, lwd=3, col=z[i])
}
## Plot triple bonds
if(toplot[[2]][i,3]==3) {
rslope <- (atan(diff(y)/diff(x))*180/pi)/90
bondspacer <- bondspacer * 2
lines(x=x, y=y, lty=1, lwd=3, col=z[i])
lines(x=x-rslope*bondspacer, y=y+(1-abs(rslope))*bondspacer, lty=1, lwd=3, col=z[i])
lines(x=x+rslope*bondspacer, y=y-(1-abs(rslope))*bondspacer, lty=1, lwd=3, col=z[i])
}
## Plot aromatic or any other bonds
if(toplot[[2]][i,3] > 3 | toplot[[2]][i,3] < 1 ) {
lines(x=x, y=y, lty=2, lwd=3, col=z[i])
}
}
## Exclude certain atoms from being printed
exclude <- paste("(^", no_print_atoms, "_)", sep="", collapse="|")
labelMA <- toplot[[1]][!grepl(exclude, rownames(toplot[[1]])), , drop=FALSE] # Added July 31, 2012: 'drop=FALSE'
## Add charges
charge <- c("0"="", "3"="3+", "2"="2+", "1"="+", "-1"="-", "-2"="2-", "-3"="3-")
charge <- charge[as.character(labelMA[,"charge"])]
## Add hydrogens to non-charged/non-C atoms according to valence rules (some SD files require this)
Nhydrogens <- c("0"="", "1"="H", "2"="H2", "3"="H3", "4"="H4", "5"="H5", "6"="H6", "7"="H7", "8"="H8")
hydrogens <- (labelMA[, "Nbondrule"] + labelMA[, "charge"]) - labelMA[,"Nbondcount"]
hydrogens[labelMA[,"charge"]!=0] <- 0; hydrogens[hydrogens < 0] <- 0
hydrogens <- Nhydrogens[as.character(hydrogens)]
## Plot data
if(is.vector(labelMA)) labelMA <- matrix(labelMA, 1, 2, byrow=TRUE, dimnames=list(rownames(toplot[[1]])[!grepl(exclude, rownames(toplot[[1]]))], c("C1", "C2")))
if(is.matrix(labelMA) & length(labelMA[,1])>=1) {
atomcol <- gsub("_.*", "", rownames(labelMA)); atomcol[!grepl("N|C|O|H", atomcol)] <- "any"; mycol <- c(C="black", H="black", N="blue", O="red", any="green"); atomcol <- mycol[atomcol]
## Overplot nodes to display atom labels
points(x=labelMA[,1], y=labelMA[,2], col="white", pch=16, cex=2.8)
## Plot atom labels
if(atomnum==TRUE) {
text(x=labelMA[,1], y=labelMA[,2], paste(gsub("_", "", rownames(labelMA)), hydrogens, charge, sep=""), cex=atomcex, col=atomcol)
} else {
text(x=labelMA[,1], y=labelMA[,2], paste(gsub("_.*", "", rownames(labelMA)), hydrogens, charge, sep=""), cex=atomcex, col=atomcol)
}
}
}
setMethod(f="plot", signature="SDF", definition=function(x, print=TRUE, ...) {
plotStruc(sdf=x, ...)
if(print==TRUE) { return(x) }
}
)
setMethod(f="plot", signature="SDFset",
definition=function(x, griddim, print_cid=cid(x), print=TRUE, ...) {
if(missing(griddim)) {
mydim <- ceiling(sqrt(length(x)))
griddim <- c(mydim, mydim)
}
par(mfrow=griddim)
for(i in 1:length(x)) { plotStruc(sdf=x[[i]], main=print_cid[i], ...) }
if(print==TRUE) { return(SDFset2SDF(x)) }
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.