R/buildPubchem.R

buildPubchem <- function(in_path,out_path,db_name){
  # Read in file list and append to sql
  files <- list.files(in_path)
  nrows  <- 0
  for (i in 1:length(files)){
    cat(i,". ",files[[i]],"\n",sep="")
    file <- procSDF(files[[i]],in_path)
    cat("nrows: ",nrow(file),"\n")
    nrows <- nrows + nrow(file)
    cat("total rows: ", nrows,"\n")
    db <- dbConnect(dbDriver("SQLite"), dbname = paste(out_path,db_name,sep="/"))
    tabName <- files[[i]] %>%
    	str_replace('.sdf.gz','') %>%
    	str_replace('Compound_','')
    dbWriteTable(db,tabName,file)
    dbDisconnect(db)
  }
  # db <- dbConnect(dbDriver("SQLite"), dbname = paste(out_path,db_name,sep="/"))
  # dbSendQuery(db,"CREATE INDEX idx_em ON PUBCHEM_ALL(PUBCHEM_EXACT_MASS)")
  # dbSendQuery(db,"CREATE INDEX idx_cid ON PUBCHEM_ALL(PUBCHEM_COMPOUND_CID)")
  # dbSendQuery(db,"CREATE INDEX idx_mf ON PUBCHEM_ALL(PUBCHEM_MOLECULAR_FORMULA)")
  # dbDisconnect(db)
}
jasenfinch/pubchemPIP documentation built on May 18, 2019, 4:52 p.m.