# tools for loading data to postgres
# Joseph Guillame and Ivan Hanigan
# original by Joe 24/3/2009
# modifications
# 5/1/2010
# ihanigan
# generalise a bit more, add optional primary key, improve handling of dates
# TO DO:
## set the vacuum automatically when printcopy=F
## on linux replace the 'type' command with 'cat'
# load_newtable_to_postgres = Convert to csv and load to postgres
# pk as either column names as they appear at the end or column indices
# inspired from
#odbc_dsn="pg"
#require(RODBC)
#con<-odbcConnect(odbc_dsn,"postgres","test",case="postgresql")
#sqlSave(con,data[0,],test=TRUE,verbose=TRUE)
#close(con)
# source file could be
#source_file=paste("E'", csvfilename,"'",sep="")
if (!require(RODBC)) install.packages('RODBC'); require(RODBC) # for getSqlTypeInfo
# if (!file.exists('C:/pgutils/psql.exe')) {
# dir.create('c:/pgutils')
# download.file("http://alliance.anu.edu.au/access/content/group/4e0f55f1-b540-456a-000a-24730b59fccb/pgutils.zip","c:/pgutils/pgutils.zip",mode="wb")
# unzip("c:/pgutils/pgutils.zip",exdir="C:/pgutils")
# }
# not working
# print('please download http://alliance.anu.edu.au/access/content/group/4e0f55f1-b540-456a-000a-24730b59fccb/pgutils.zip')
load2postgres<-function(inputfilepath,schema,tablename,pk=NULL,header=TRUE,printcopy=TRUE,sheetname="Sheet1",withoids=FALSE,pguser="username",db='databasename',ip='ipaddress',source_file="STDIN",datecol=NULL,nrowscsv=10000,pgpath=c('c:\\pgutils\\psql')){
table=paste(schema,".",tablename,sep="")
ext<-substr(inputfilepath,nchar(inputfilepath)-2,nchar(inputfilepath))
#print(ext)
if (ext=="dbf"){
require(foreign)
data<-read.dbf(inputfilepath,as.is=TRUE)
csvfilename=sub(".dbf",".csv",basename(inputfilepath))
csvfilename=paste(getwd(),csvfilename,sep="/")
write.csv(data,csvfilename,row.names=FALSE,na="")
}
else if (ext=="csv" || ext=="txt"){
#or from csv originally
csvfilename<-inputfilepath
data<-read.csv(csvfilename,stringsAsFactors=FALSE,header=header,strip.white=TRUE,nrows=nrowscsv)
names(data)<-gsub("\\.","_",names(data))
names(data)<-gsub("_+","_",names(data))
}
else if (ext=="xls"){
odbcf<-odbcConnectExcel(inputfilepath)
data<-sqlFetch(odbcf,sheetname,as.is=TRUE)
csvfilename=sub(".xls",".csv",basename(inputfilepath))
csvfilename=paste(getwd(),csvfilename,sep="/")
write.csv(data,csvfilename,row.names=FALSE,na="")
}
else print("Unknown extension")
names(data)<-tolower(names(data))
if (length(pk)>0) {
if (class(pk) %in% c("integer","numeric")) pk=paste(names(data)[pk],collapse=",")
}
datatypes<-getSqlTypeInfo("PostgreSQL")
datatypes["numeric"]<-"numeric"
csvfilename=gsub("\\\\","\\\\\\\\",csvfilename)
text=""
text=paste(text,"CREATE TABLE ",table," (",sep="")
columnnames<-names(data)
#################################################################################
if (length(pk)>0) {
for (n in columnnames) {
if (length(grep(n, datecol))>0) {
text=paste(text,"\"",n,"\" date,\n",sep="")
} else {
#print(class(data[[n]]))
if (is.null(class(data[[n]]))) cat("Missing datatype:",class(data[[n]]),"\n")
text=paste(text,"\"",n,"\" ",datatypes[[class(data[[n]])]],",\n",sep="")
}
}
text=paste(text,"CONSTRAINT \"",table,"_pkey\" PRIMARY KEY (",pk,")\n",sep="")
}
if (length(pk)==0) {
for (n in columnnames[1:(length(columnnames)-1)]) {
if (length(grep(n, datecol))>0) {
text=paste(text,"\"",n,"\" date,\n",sep="")
} else {
#print(class(data[[n]]))
if (is.null(class(data[[n]]))) cat("Missing datatype:",class(data[[n]]),"\n")
text=paste(text,"\"",n,"\" ",datatypes[[class(data[[n]])]],",\n",sep="")
}
}
n=columnnames[length(columnnames)]
text=paste(text,"\"",n,"\" ",datatypes[[class(data[[n]])]],sep="")
#\"
}
###############################################################################
if (withoids) text=paste(text,") WITH (OIDS=TRUE);\n",sep="")
else text=paste(text,") WITH (OIDS=FALSE);\n",sep="")
text=paste(text,"ALTER TABLE ",table," OWNER TO ",pguser,";\n",sep="")
if (source_file=="STDIN") {
if (header) text=paste(text,"COPY ",table," FROM ",source_file," CSV HEADER;\n",sep="")
else text=paste(text,"COPY ",table," FROM ",source_file," CSV;\n",sep="")
sink("sqlquery.txt")
cat(text)
sink()
if (printcopy){
cat(paste('ok the CREATE TABLE and COPY statements have been constructed for this file and is in "sqlquery.txt", have a look and see if it is correct\nif it is ok and you have not set your password to be remembered in pgAdmin then paste this into a cmd prompt\n\n type sqlquery.txt \"',csvfilename,'\" | \"',pgpath,'\" -h ',ip,' -U ',pguser,' -d ',db,'\n\n\notherwise you can run this directly from R\n\n system(\"type sqlquery.txt \\"',csvfilename,'\\" | \"',pgpath,'\" -h ',ip,' -U ',pguser,' -d ',db,'\")',sep=''),'\n')
cat(paste("\n\nnow you probably should vaccuum the table\nVACUUM ANALYZE ",table,";\n",sep=""))
} else {
sink('go.bat')
cat(paste('type sqlquery.txt \"',csvfilename,'\" | \"',pgpath,'\" -h ',ip,' -U ',pguser,' -d ',db,'',sep=''))
sink()
shell('go.bat')
file.remove('go.bat')
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.