R/procSDF.R

procSDF <- function(file,Path){
	sdf <- readLines(paste(Path,file,sep="/"))
	sdf <- sdf[-which(sdf=="")]
	cids <- sdf[which(sdf=="> <PUBCHEM_COMPOUND_CID>")+1]
		cols <- c("> <PUBCHEM_IUPAC_NAME>","> <PUBCHEM_IUPAC_TRADITIONAL_NAME>","> <PUBCHEM_IUPAC_INCHI>","> <PUBCHEM_IUPAC_INCHIKEY>","> <PUBCHEM_EXACT_MASS>","> <PUBCHEM_MOLECULAR_FORMULA>","> <PUBCHEM_MOLECULAR_WEIGHT>", "> <PUBCHEM_OPENEYE_CAN_SMILES>","> <PUBCHEM_MONOISOTOPIC_WEIGHT>","> <PUBCHEM_TOTAL_CHARGE>","> <PUBCHEM_CACTVS_HBOND_ACCEPTOR>","> <PUBCHEM_CACTVS_HBOND_DONOR>","> <PUBCHEM_XLOGP3_AA>","> <PUBCHEM_CACTVS_COMPLEXITY>","> <PUBCHEM_COMPONENT_COUNT>","> <PUBCHEM_ISOTOPIC_ATOM_COUNT>")
	db <- matrix(ncol=length(cols)+1,nrow=length(cids))
	colnames(db) <- c("> <PUBCHEM_COMPOUND_CID>",cols)
	db[,1] <- cids
	separ <- which(sdf=="$$$$")
	separ <- c(0,separ)
	sdf.1 <- NULL
	for(i in 1:(length(separ)-1)){
		sdf.1[[i]] <- sdf[separ[i]:separ[i+1]]
	}
	names(sdf.1) <- cids
	for (i in 1:length(cols)){
		for (x in 1:length(sdf.1)) {
			cur <- sdf.1[[x]]
			cur <- cur[which(cur==cols[i])+1]
			if(length(cur)==0){
				cur <- ""
			}
			db[x,i+1] <- cur
		}
	}
	colnames(db) <- c("PUBCHEM_COMPOUND_CID","PUBCHEM_IUPAC_NAME","PUBCHEM_IUPAC_TRADITIONAL_NAME","PUBCHEM_IUPAC_INCHI","PUBCHEM_IUPAC_INCHIKEY","PUBCHEM_EXACT_MASS","PUBCHEM_MOLECULAR_FORMULA","PUBCHEM_MOLECULAR_WEIGHT", "PUBCHEM_OPENEYE_CAN_SMILES","PUBCHEM_MONOISOTOPIC_WEIGHT","PUBCHEM_TOTAL_CHARGE","PUBCHEM_CACTVS_HBOND_ACCEPTOR","PUBCHEM_CACTVS_HBOND_DONOR","PUBCHEM_XLOGP3_AA","PUBCHEM_CACTVS_COMPLEXITY","PUBCHEM_COMPONENT_COUNT","PUBCHEM_ISOTOPIC_ATOM_COUNT")
	## calculate elemental composition
	return(data.frame(db))
}
jasenfinch/pubchemPIP documentation built on May 18, 2019, 4:52 p.m.