R/sqlsurvey.R

check.not.crazy.big<-function(conn,table, vars,byvars){
   levs<-dbGetQuery(conn, sqlsubst("select count(*),%%b%% from %%table%% group by %%b%%",list(table=table, b=paste(byvars,collapse=","))))
   big<-nchar(paste(apply(levs, 1, paste,collapse="_"),collapse="_"))^2
   if (big*length(vars)>2e5)
       stop("Query is too large: split it up")
   else TRUE
}


make.zdata<-function(db, table, factors=9){
  rval<-dbGetQuery(db,sqlsubst("select * from %%tbl%% limit 1",
                            list(tbl=table)))
  if (length(factors)==0) return(rval[FALSE,])
  rval<-as.list(rval) 	## lists are faster
  if(is.character(factors)){
    for(f in factors){
      levs<- dbGetQuery(db,sqlsubst("select distinct %%v%% from %%tbl%% order by %%v%%",
                        list(v=f,tbl=table)))[[1]]
      rval[[f]]<-factor(rval[[f]],  levels=levs)
    }
    class(rval)<-"data.frame"
    return(rval[FALSE,])
  } else {
    ##numeric limit on levels
    if (!is.numeric(factors) || length(factors)>1)
        stop("invalid specification of 'factors'")
    if (factors<=0) return(rval[FALSE,])

    cat("checking factor levels\n")
    for(f in names(rval)){
      cat(".")
       levs<- dbGetQuery(db,
                        sqlsubst("select  %%v%% from %%tbl%% limit %%n%%",
                                 list(v=f, tbl=table, n=factors*10)))[[1]]
      
      if ((length(na.omit(unique(levs)))<=factors)){
        levs<- dbGetQuery(db,
                          sqlsubst("select distinct %%v%% from %%tbl%% order by %%v%% limit %%n%%",
                                   list(v=f, tbl=table, n=factors+2)))[[1]]
        if (length(na.omit(levs))<=factors)
          rval[[f]]<-factor(rval[[f]],  levels=levs)
      }
    }
    class(rval)<-"data.frame"
    return(rval[FALSE,])
  }
  
}

close.sqlsurvey<-function(con, ...){
  gc() ## try to make sure any dead model matrices and subsets are finalized.
  dbDisconnect(con$conn)
}

open.sqlsurvey<-function(con, driver, ...){  
  con$conn<-dbConnect(driver, url=con$dbname,...)
  if (!is.null(con$subset)){
    con$subset$conn<-con$conn
  }
  con
}


open.sqlmodelmatrix<-function(con, design,...){
  con$conn<-design$conn
  con
}

sqlmmDrop<-function(mmobj){
  dbSendUpdate(mmobj$conn,sqlsubst("drop table %%mm%%", list(mm=mmobj$table)),async=TRUE)
  dbSendUpdate(mmobj$conn,sqlsubst("drop table %%mf%%", list(mf=mmobj$mf)),async=TRUE)
  invisible(NULL)
}

finalizeSubset<-function(e){
  dbSendUpdate(e$conn, sqlsubst("drop index %%idx%%",list(idx=e$idx)),async=TRUE)
  dbSendUpdate(e$conn, sqlsubst("drop table %%tbl%%",list(tbl=e$table)),async=TRUE)
}



linbin2<-function(x,table,design,M=101,lim=NULL,y=NULL){
  if (M>800) stop("too many grid points")
  if (is.null(design$subset)){
    tablename<-table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }
  if(is.null(lim)){
	xrange<-dbGetQuery(design$conn, sqlsubst("select min(%%x%%) as low, max(%%x%%) as up from %%tbl%%",
					list(x=x,tbl=tablename)))
	lim=c(xrange[1,"low"],xrange[1,"up"])
  }
  gridp<-seq(lim[1], lim[2], length=M)
  delta<-gridp[2]-gridp[1]

  bindata<-matrix(0,ncol=4,nrow=M+1)
  for(i in 2:M){
    qi<-sqlsubst("select sum(wt*(x-%%low%%)), sum((wt*(x-%%low%%)*y)),sum(wt*(%%hi%%-x)), sum((wt*(%%hi%%-x)*y)) from (select cast(%%xname%% as double) as x, cast(%%yname%% as double) as y, cast(%%wtname%% as double) as wt from %%table%%) as foo where x>%%low%% and x<=%%hi%%",list(low=gridp[i-1],hi=gridp[i],xname=x,yname=y,wtname=wtname,table=tablename)) 
    bindata[i,]<-as.numeric(as.matrix(dbGetQuery(design$conn, qi)))
  }
  bindata[is.na(bindata)]<-0
  
  N<-dbGetQuery(design$conn, sqlsubst("select sum(%%wt%%) from %%tbl%%", 
     list(wt=wtname,tbl=tablename)))[[1]]

  rval<-list(grid=gridp, xcounts=(bindata[2:(M+1),1]+bindata[1:M,3])/(delta), ycounts=(bindata[2:(M+1),2]+bindata[1:M,4])/(delta))
  rval
}

linbin1<-function(x,table,design,M=101,lim=NULL){
  if (M>800) stop("too many grid points")
  
  if (is.null(design$subset)){
    tablename<-table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }
  
  if(is.null(lim)){
	xrange<-dbGetQuery(design$conn, sqlsubst("select min(%%x%%) as low, max(%%x%%) as up from %%tbl%%",
					list(x=x,tbl=tablename)))
	lim=c(xrange[1,"low"],xrange[1,"up"])
  }
  gridp<-seq(lim[1], lim[2], length=M)
  delta<-gridp[2]-gridp[1]

  bindata<-matrix(0,ncol=2,nrow=M+1)
  for(i in 2:M){
    qi<-sqlsubst("select sum(wt*(x-%%low%%)), sum(wt*(%%hi%%-x)) from (select cast(%%xname%% as double) as x,  cast(%%wtname%% as double) as wt from %%table%%) as foo where x>%%low%% and x<=%%hi%%",
                 list(low=gridp[i-1],hi=gridp[i],xname=x,wtname=wtname,table=tablename)) 
    bindata[i,]<-as.numeric(as.matrix(dbGetQuery(design$conn, qi)))
  }
  bindata[is.na(bindata)]<-0
  
  N<-dbGetQuery(design$conn, sqlsubst("select sum(%%wt%%) from %%tbl%%", 
     list(wt=wtname,tbl=tablename)))[[1]]

  rval<-list(grid=gridp, xcounts=(bindata[2:(M+1),1]+bindata[1:M,2]/(delta*N)))
  rval
}


svysmooth.sqlrepsurvey<-svysmooth.sqlsurvey<-function(formula, design, bandwidth=NULL, M=101, sample.bandwidth=NULL,...){

  if(!is.null(sample.bandwidth)){
    ## read a subsample to estimate the bandwidth
    if (is.null(design$subset)){
      tablename<-design$table
      wtname<-design$weights
    } else {
      tablename<-sqlsubst("%%tbl%% inner join %%subset%% using(%%key%%) ",
                          list(tbl=design$table, subset=design$subset$table, key=design$key))
      wtname<-design$subset$weights
    }

    
  updates<-NULL
  metavars<-NULL
  allv<-all.vars(formula)
  needsUpdates<-any(!sapply(allv,isBaseVariable,design=design))
  if (needsUpdates){
    metavars<-with(design,c(wtname,repweights,key))
    updates<-getTableWithUpdates(design,allv,metavars,tablename)
    tablename<-updates$table
    on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,d),add=TRUE)
    for(f in updates$createfns) dbSendUpdate(design$conn,f)
  }

    
    if (!is.numeric(sample.bandwidth) || (sample.bandwidth<100) || (sample.bandwidth>1e5))
      stop("invalid sample.bandwidth")
    vars<- paste(all.vars(formula),collapse=",")
    mf <- model.frame(formula,data=dbGetQuery(design$conn, paste("select",vars, "from (select",vars,",",wtname,"from", tablename,"where (",wtname,">0) ) as foo sample",round(sample.bandwidth))))
    mm <- model.matrix(terms(formula), mf)
    if (attr(terms(formula), "intercept")) 
      mm <- mm[, -1, drop = FALSE]
    naa <- attr(mf, "na.action")
    if (length(formula) == 3) {
      Y <- mf[[1]]
      density <- FALSE
    }
    else density <- TRUE
    bandwidth <- numeric(ncol(mm))
    for (i in 1:ncol(mm)) {
      bandwidth[i] <- if (density) 
        dpik(mm[, i], gridsize = M)
      else dpill(mm[, i], Y, gridsize = M)
    }

    N<-dbGetQuery(design$conn, paste("select count(*) from",tablename,"where",wtname,">0"))
    bandwidth<-bandwidth*(sample.bandwidth/N)^(1/5)
  } else if (is.null(bandwidth)) stop("Must provide bandwidth or sample.bandwidth")
  
  
  if (length(formula)==2){
    x<-attr(terms(formula),"term.labels")
    if(length(x)>1) stop('only one variable')
    bins<-linbin1(x, table=design$table, design=design,M=M)
    rval<-list(locpoly(rep(1,M-1), bins$xcounts ,binned=TRUE,
                       bandwidth=bandwidth,range.x=range(bins$grid)))
    attr(rval,"ylab")<-"Density"
    names(rval)<-x
  } else {
    if(length(formula[[3]])>1) stop('only one variable')
    x<-attr(terms(formula),"term.labels")
    if(length(x)>1) stop('only one variable')
    y<-deparse(formula[[2]])
    bins<-linbin2(x, table=design$table, design=design,M=M,y=y)
    rval<-list(locpoly(bins$xcounts, bins$ycounts,binned=TRUE,bandwidth=bandwidth,
                       range.x=range(bins$grid)))
    names(rval)<-x
    attr(rval,"ylab")<-y
  }
  class(rval)<-"svysmooth"
  attr(rval,"call") <- sys.call()
  attr(rval,"density") <- (length(formula)==2)
  rval
}

sqlexpr<-function(expr, design){
   nms<-new.env(parent=emptyenv())
   assign("%in%"," IN ", nms)
   assign("&", " AND ", nms)
   assign("=="," = ",nms)
   assign("|"," OR ", nms)
   assign("!"," NOT ",nms)
   assign("I","",nms)
   assign("~","",nms)
   assign("(","",nms)
   out <-textConnection("str","w",local=TRUE)
   inorder<-function(e){
     if(length(e) ==1) {
       if (is.character(e))
         cat("'",e,"'",file=out,sep="")
       else
         cat(e, file=out)
     } else if (e[[1]]==quote(is.na)){
     	cat("(",file=out)
     	inorder(e[[2]])
     	cat(") IS NULL", file=out)
     } else if (length(e)==2){
       nm<-deparse(e[[1]])
       if (exists(nm, nms)) nm<-get(nm,nms)
       cat(nm, file=out)
       cat("(", file=out)
       inorder(e[[2]])
       cat(")", file=out)
     } else if (deparse(e[[1]])=="c"){
       cat("(", file=out)
       for(i in seq_len(length(e[-1]))) {
         if(i>1) cat(",", file=out)
         inorder(e[[i+1]])
       }
       cat(")", file=out)
     } else if (deparse(e[[1]])==":"){
       cat("(",file=out)
       cat(paste(eval(e),collapse=","),file=out)
       cat(")",file=out)
     } else{
         cat("(",file=out)
         inorder(e[[2]])
         nm<-deparse(e[[1]])
         if (exists(nm,nms)) nm<-get(nm,nms)
         cat(nm,file=out)
         inorder(e[[3]])
         cat(")",file=out)
       }

   }
   inorder(expr)
   close(out)
   paste("(",str,")")

}


sqlcutfn<-function (breaks) 
{
    n <- length(breaks)
    numbers<-format(1:(n+1))
    last <- NULL
    others <- NULL
    first <- paste("when x<=", breaks[1], " then return '",numbers[1],". to ", 
        breaks[1], "';", sep = "")
    if (n > 1) {
        last <- paste("when x>", breaks[n], " then return '", 
            numbers[n + 1], ". ", breaks[n], "+';", sep = "")
        others <- paste("when (x<=", breaks[-1], " and x>", breaks[-n], 
            ") then return '", numbers[2:n], ". ", breaks[-n], "+ to ", 
            breaks[-1], "';", sep = "")
    }
    whens <- paste(c(first, last, others), collapse = "\n")
    fname <- basename(tempfile("rec_"))
    query <- paste("create function", fname, "(x double precision)", 
        "returns varchar(255)", "begin", "case", whens, "else return null;", 
        "end case;", "end;", sep = "\n")
    tidy <- paste("drop function", fname)
    list(name = fname, construct = query, destroy = tidy)
}



  
cutprocess<-function(formula){

  functions<-list()
  
  descend<-function(e){
    if (length(e)==1){
      e
    } else if (e[[1]]==as.name("cut")){
      if(length(e)!=3) stop("cut(x,breaks) are the only supported arguments")
      fn<-sqlcutfn(eval(e[[3]],environment(formula)))
      functions<<-c(functions,list(fn))
      bquote(.(as.name(fn$name))(.(e[[2]])))      
    } else {
      for(i in 2:length(e)){
        e[[i]]<-descend(e[[i]])
      }
      e
    }
  }

  list(descend(formula),functions)
  
}

subset.sqlsurvey<-function(x,subset,...){

  subset<-substitute(subset)
  rval<-new.env()
  rval$subset<-sqlexpr(subset)

  rval$table<-basename(tempfile("_sbs_"))
  rval$idx<-basename(tempfile("_idx_"))
  rval$weights<-"_subset_weight_"

  allv<-all.vars(subset)
  tablename<-x$table
  if (any(sapply(allv, isCreatedVariable, design=x))){
    updates<-getTableWithUpdates(x,allv,c(x$weights,x$key),tablename)
    tablename<-updates$table
    alreadydefined<-dbGetQuery(x$conn, paste("select count(*) from functions where name = '",updates$fnames[1],"'",sep=""))[1,1]>0
    if(!alreadydefined){
      on.exit(for (d in updates$destroyfns) dbSendUpdate(x$conn,d), add=TRUE)
      for(f in updates$createfns) dbSendUpdate(x$conn,f)
    }
  } 
  
  if (is.null(x$subset)){
    query<-sqlsubst("create table %%tbl%% as (select %%key%%, %%wt%%*1.0*(1*%%subset%%) as _subset_weight_ from %%base%%) with data",
                    list(tbl=rval$table, key=x$key, wt=x$weights, subset=rval$subset, base=tablename )
                    )
    
  } else {
       query<-sqlsubst("create table %%tbl%% as (select %%key%%, (%%subsetwt%%*1.0*(1*%%subset%%)) as _subset_weight_ from  %%base%% inner join %%oldtbl%% using(%%key%%) ) with data",
                    list(tbl=rval$table, key=x$key, subset=rval$subset, base=tablename,oldtbl=x$subset$table,subsetwt=x$subset$weights)
                    )
  }
  dbSendUpdate(x$conn, query)
  dbSendUpdate(x$conn,sqlsubst("create unique index %%idx%% on %%tbl%%(%%key%%)",
                             list(idx=rval$idx,tbl=rval$table, key=x$key)))
  rval$conn<-x$conn
  reg.finalizer(rval, finalizeSubset)
  x$subset<-rval
  x$call<-sys.call(-1)
  x
}

sqlsurvey<-function(id=NULL, strata=NULL, weights=NULL, fpc="0",driver=MonetDB.R(),
                       database, table.name,
                       key, check.factors=10,...){

    if (is(database, "DBIConnection"))
        db<-database
    else
        db<-dbConnect(driver,database,...)
        
    if (missing(table.name)) stop("table.name required")

  if (is.data.frame(check.factors)){
    zdata<-check.factors
    actualnames<-dbListFields(db,table.name)
    if (!all(names(zdata) %in% actualnames)) stop("supplied data frame includes variables not in the data table")
    if (!all(actualnames %in% names(zdata))) message("levels for some variables not supplied: assumed numeric")
    for(v in setdiff(actualnames,names(zdata))) zdata[[v]]<-numeric(0)   
  } else{
    zdata<-make.zdata(db,table.name,factors=check.factors)
  }

  if (is.null(id) || id=="1") {
      if (is.null(id)) warning("No 'id' given, individual sampling assumed")
      id<-key
  }
      
      
  rval<-list(conn=db, table=table.name,
           id=id, strata=strata,weights=weights,fpc=fpc,
           call=sys.call(), zdata=zdata, key=key
           )

    rval$dbname<-database

  class(rval)<-"sqlsurvey"
  rval
}

sqlsubst<-function(strings, values){
  for(nm in names(values)){
  	if (is.null(values[[nm]])) next
    if (length(values[[nm]])>1) values[[nm]]<-paste(values[[nm]],collapse=", ")
    strings<-gsub(paste("%%",nm,"%%",sep=""),values[[nm]], strings)
  }
strings
}

print.sqlsurvey<-function(x,...){
  cat("MonetDB survey object (linearisation variances):\n")
  print(x$call)
  invisible(x)
}

dim.sqlsurvey<-function(x){
  if(is.null(x$subset))
    nrows<-dbGetQuery(x$conn, sqlsubst("select count(*) from %%table%%", list(table=x$table)))[[1]]
  else
    nrows<-dbGetQuery(x$conn, sqlsubst("select count(*) from %%table%% where %%wt%%>0",
                                       list(table=x$subset$table, wt=x$subset$weights)))[[1]]
  ncols<-ncol(x$zdata)
  c(nrows,ncols)
}


svymean.sqlsurvey<-function(x, design, na.rm=TRUE,byvar=NULL,se=FALSE, keep.estfun=FALSE,...){

  tms<-terms(x)

  ##handle subpopulations
  if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }

  updates<-NULL
  metavars<-NULL
  allv<-unique(c(all.vars(x),all.vars(byvar)))
  needsUpdates<-any(!sapply(allv,isBaseVariable,design=design))
  if (needsUpdates){
    metavars<-with(design,c(id,strata,wtname,fpc,key))
    updates<-getTableWithUpdates(design,allv,metavars,tablename)
    tablename<-updates$table
    on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,d),add=TRUE)
    for(f in updates$createfns) dbSendUpdate(design$conn,f)
  }
  
  ## handle missing values
  for(v in allv){
    nmissing<-dbGetQuery(design$conn, sqlsubst("select sum(%%wt%%) from %%table%% where %%var%% is null",
                                               list(wt=wtname, table=tablename, var=v)))[[1]][1]
    if (!is.na(nmissing) && nmissing>0) break
  }
  if (!is.na(nmissing) && nmissing>0){
    if (na.rm==FALSE) return(NA)
    notna<-parse(text=paste(paste("!is.na(",allv,")"),collapse="&"))[[1]]
    design<-do.call(subset, list(design, notna))
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights

    if (needsUpdates){
      metavars<-union(metavars,wtname)
      updates<-getTableWithUpdates(design,allv,metavars,tablename)
      tablename<-updates$table
    }
  }

  
  ## use sqlmodelmatrix if we have factors or interactions
  basevars<-rownames(attr(tms,"factors"))
  if ( any(attr(tms,"order")>1) || any(sapply(basevars, isFactorVariable, design=design))){
    mm<-sqlmodelmatrix(x,design, fullrank=FALSE)
    termnames<-mm$terms
    tablename<-sqlsubst("%%tbl%% inner join %%mm%% using(%%key%%)",
                        list(tbl=tablename,mm=mm$table, key=design$key))
  } else {
    termnames<-attr(tms,"term.labels")
  }
  vars<-paste("sum(",wtname,"*",termnames,")")
  tvars<-paste("sum(",wtname,"*(1*(",termnames," is not null)))")



  if (is.null(byvar)){
    query<-sqlsubst("select %%vars%%  from %%table%%",
                    list(vars=vars,table=tablename))
    tquery<-sqlsubst("select %%vars%% from %%table%%",
                    list(vars=tvars, table=tablename))
  }else{
    byvar<-attr(terms(byvar),"term.labels")
    check.not.crazy.big(design$conn, tablename, all.vars(x) ,byvar)

    query<-sqlsubst("select %%vars%%, %%byvars%% from %%table%% where %%wt%%<>0 group by %%byvars%% order by %%byvars%%",
                    list(vars=vars, byvars=byvar, wt=wtname, table=tablename))    
    tquery<-sqlsubst("select %%vars%%, %%byvars%% from %%table%% where %%wt%%<>0 group by %%byvars%% order by %%byvars%%",
                    list(vars=tvars, byvars=byvar, wt=wtname, table=tablename))    
  }
  result<-dbGetQuery(design$conn, query)
  p<-length(termnames)
  totwt<-dbGetQuery(design$conn, tquery)[1:p]
  result[1:p]<-result[1:p]/totwt

  names(result)<-c(termnames,byvar)
  if(se){
    utable<-basename(tempfile("_U_"))
    if (is.null(byvar)){
      means<-unlist(result[1,])
      unames<-paste("_",termnames,sep="")
      query<-sqlsubst("create table %%utbl%% as (select %%key%%, %%vars%% from %%table%%) with data",
                      list(utbl=utable, key=design$key,
                           vars=paste(termnames,"-",means," as ",unames,sep=""),
                           table=tablename))
    } else {
      means<-as.vector(t(as.matrix(result[,1:p,drop=FALSE])))
      bycols<-result[,-(1:p),drop=FALSE]
      qbycols<-lapply(bycols, function(v) if(is.character(v)) lapply(v,adquote) else v)
      bynames<-do.call(paste, c(mapply(paste, names(bycols),"=",qbycols,SIMPLIFY=FALSE), sep=" AND "))
      vnames<-paste("(",termnames,"-",means,")",sep="")
      uexpr<-paste(vnames,"*(1*(",rep(bynames,each=p),"))")
      unames<-paste0("_",make.db.names(design$conn, paste(as.vector(outer(termnames, do.call(paste, c(bycols,sep="_")), paste,sep="_")),sep=""),unique=TRUE))
      query<-sqlsubst("create table %%utbl%% as (select %%key%%, %%vars%% from %%table%%) with data",
                      list(utbl=utable, key=design$key,
                           vars=paste(uexpr,"as",unames),
                           table=tablename))
      
    }
    
    dbSendUpdate(design$conn, query)
    query<-sqlsubst("create unique index %%idx%% on  %%utbl%% (%%key%%)",
                    list(utbl=utable, key=design$key, idx=basename(tempfile("idx"))))
    dbSendUpdate(design$conn, query)
    if (!keep.estfun)
      on.exit(dbSendUpdate(design$conn, sqlsubst("drop table %%utbl%%",list(utbl=utable))),add=TRUE)
    vmat<-sqlvar(unames,utable,design)
    ##FIXME
    attr(result,"var")<-vmat/tcrossprod(as.vector(t(as.matrix(totwt))))

  }
  
  attr(result,"resultcol")<-1:length(termnames)
  class(result)<-c("sqlsvystat",class(result))
  result
  
}

coef.sqlsvystat<-function(object,...) as.vector(t(as.matrix(object[,attr(object,"resultcol"),drop=FALSE])))
vcov.sqlsvystat<-function(object,...) attr(object,"var")

print.sqlsvystat<-function(x,...){
  se<-SE(x)
  if (length(se)){
    mat<-cbind(coef(x),SE=SE(x))
  } else{
    mat<-as.data.frame(t(as.matrix(x[,attr(x,"resultcol"),drop=FALSE])))
  }
  print(mat)
  invisible(x)
}

svytotal.sqlsurvey<-function(x, design, na.rm=TRUE,byvar=NULL,se=FALSE,keep.estfun=FALSE,...){

  tms<-terms(x)

  
##handle subpopulations
  if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }

  updates<-NULL
  metavars<-NULL
  allv<-unique(c(all.vars(x),all.vars(byvar)))
  needsUpdates<-any(!sapply(allv,isBaseVariable,design=design))
  if (needsUpdates){
    metavars<-with(design,c(id,strata,wtname,fpc,key))
    updates<-getTableWithUpdates(design,allv,metavars,tablename)
    tablename<-updates$table
    on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,d),add=TRUE)
    for(f in updates$createfns) dbSendUpdate(design$conn,f)
  }
  
  ## handle missing values
  for(v in allv){
    nmissing<-dbGetQuery(design$conn, sqlsubst("select sum(%%wt%%) from %%table%% where %%var%% is null",
                                               list(wt=wtname, table=tablename, var=v)))[[1]][1]
    if (!is.na(nmissing) && nmissing>0) break
  }
  if (!is.na(nmissing) && nmissing>0){
    if (na.rm==FALSE) return(NA)
    notna<-parse(text=paste(paste("!is.na(",allv,")"),collapse="&"))[[1]]
    design<-do.call(subset, list(design, notna))
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights

    if (needsUpdates){
      metavars<-union(metavars,wtname)
      updates<-getTableWithUpdates(design,allv,metavars,tablename)
      tablename<-updates$table
    }
  }
  
  ## use modelmatrix if we have factors or interactions
  basevars<-rownames(attr(tms,"factors"))
  if ( any(attr(tms,"order")>1) || any(sapply(basevars, isFactorVariable, design=design))){
    mm<-sqlmodelmatrix(x,design, fullrank=FALSE)
    termnames<-mm$terms
    tablename<-sqlsubst("%%tbl%% inner join %%mm%% using(%%key%%)",
                        list(tbl=tablename,mm=mm$table, key=design$key))
  } else {
    termnames<-attr(tms,"term.labels")
  }

  
  vars<-paste("sum(",wtname,"*(1*",termnames,"))",sep="")
  
  if (is.null(byvar)){
    query<-sqlsubst("select %%vars%% from %%table%%",
                    list(vars=vars, wt=wtname,table=tablename))
  }else{
    byvar<-attr(terms(byvar),"term.labels")
    check.not.crazy.big(design$conn, tablename, all.vars(x) ,byvar)
    
    query<-sqlsubst("select %%vars%%, %%byvars%%  from %%table%% where %%wt%%<>0 group by %%byvars%% order by %%byvars%%",
                    list(vars=vars, byvars=byvar, wt=wtname, table=tablename))    
  }
  result<-dbGetQuery(design$conn, query)
  p<-length(termnames)
  names(result)<-c(termnames, byvar)

  if(se){
    utable<-basename(tempfile("_U_"))
    if (is.null(byvar)){
      unames<-paste("_",termnames,sep="")
      query<-sqlsubst("create table %%utbl%% as (select %%key%%, %%vars%% from %%table%%) with data",
                      list(utbl=utable, key=design$key,
                           vars=paste(termnames,"as",unames),
                           table=tablename))
    } else {
      bycols<-result[,-(1:p),drop=FALSE]
      qbycols<-lapply(bycols, function(v) if(is.character(v)) lapply(v,adquote) else v)
      bynames<-do.call(paste, c(mapply(paste, names(bycols),"=",qbycols,SIMPLIFY=FALSE), sep=" AND "))
      uexpr<-as.vector(outer(termnames, bynames, function(i,j) paste(i,"*(1*(",j,"))",sep="")))
      unames<-paste0("_",make.db.names(design$conn,as.vector(outer(termnames, do.call(paste, c(bycols,sep="_")), paste,sep="_")),unique=TRUE))
      query<-sqlsubst("create table %%utbl%% as (select %%key%%, %%vars%% from %%table%%)  with data",
                      list(utbl=utable, key=design$key,
                           vars=paste(uexpr,"as",unames),
                           table=tablename))
      
    }
    dbSendUpdate(design$conn, query)
    query<-sqlsubst("create unique index %%idx%% on  %%utbl%% (%%key%%)",
                    list(utbl=utable, key=design$key, idx=basename(tempfile("idx"))))
    dbSendUpdate(design$conn, query)
    if (!keep.estfun)
      on.exit(dbSendUpdate(design$conn, sqlsubst("drop table %%utbl%%",list(utbl=utable))),add=TRUE)
    vmat<-sqlvar(unames,utable,design)
    attr(result,"var")<-vmat
    
  }
  
  attr(result,"resultcol")<-1:length(termnames)
  class(result)<-c("sqlsvystat",class(result))
  result
  
}


svyquantile.sqlsurvey<-function(x,design, quantiles,build.index=FALSE,na.rm=TRUE,...){
  SMALL<-10000 ## 20 for testing. More like 1000 for production use
  if (is.null(design$subset)){
    tablename<-design$table
    weights<-design$weights
  }else{
    tablename<-sqlsubst("%%tbl%% inner join  ( select * from %%sub%% where %%wt%%>0) as foo using(%%key%%)",
                        list(tbl=design$table, sub=design$subset$table,
                             key=design$key,wt=design$subset$weights))
    weights<-design$subset$weights                         
    }
  bisect<-function(varname,wtname,  low, up, plow,pup, nlow,nup,quant,W){
    if (up==low) return(up)
    if (nup-nlow < SMALL){
      query<-sqlsubst("select %%var%%, %%wt%% from %%table%% where %%var%%>%%low%% and %%var%%<=%%up%%",
                      list(var=varname,wt=wtname,table= tablename,
                           low=low,up=up))
      data<-dbGetQuery(design$conn, query)
      return(bisect.in.mem(data, plow, pup,quant,W))
    }
    mid<-((pup-quant+0.5)*low+(quant-plow+0.5)*up)/(pup-plow+1)
    query<-sqlsubst("select sum(%%wt%%), count(*), max(%%var%%) from %%table%% where %%var%%<=%%mid%%",
                    list(var=varname,mid=mid,wt=wtname,table= tablename))
    result<-dbGetQuery(design$conn, query)
    mid<-result[[3]]
    nmid<-result[[2]]
    pmid<-result[[1]]/W
    if (mid==up && pmid>quant) return(mid)
    if (mid==low){
      query<-sqlsubst("select sum(%%wt%%)from %%table%% where %%var%% = %%mid%% ",
                      list(var=varname, mid=mid,table= tablename, wt=wtname))
      pexactmid<-dbGetQuery(design$conn,query)
      if (pmid+pexactmid>quant) return(up)
    }
    if (pmid>quant)
      bisect(varname,wtname,low,mid,plow,pmid,nlow,nmid,quant,W)
    else
      bisect(varname,wtname,mid,up,pmid, pup, nmid, nup, quant,W)
  }

  bisect.in.mem<-function(data, plow,pup,quant,W){
    data<-data[order(data[,1]),]
    p<-cumsum(data[,2])/W
    p<-p+plow
    p[length(p)]<-p[length(p)]+(1-pup)
    data[min(which(p>=quant)),1]
  }

  qsearch<-function(varname, quant){
    ll<-levels(design$zdata[[varname]])
    if (!is.null(ll) && length(ll)<100){
      tbl<-svytable(formula(paste("~",varname)),design)
      cdf<-cumsum(tbl[,2])/sum(tbl[,2])
      return(tbl[,1][min(which(cdf>=quant))])
    }
        lims<-dbGetQuery(design$conn,
                     sqlsubst("select min(%%var%%), max(%%var%%), count(*), sum(%%wt%%) from %%table%%",
                              list(var=varname, wt=weights, table= tablename)))
    bisect(varname,weights, lims[[1]],lims[[2]],0,1,0,lims[[3]],quant,lims[[4]])
  }

  if(length(quantiles)>1) stop("only one quantile")
  tms<-terms(x)
  rval<-sapply(attr(tms,"term.labels"), qsearch,quant=quantiles)
  names(rval)<-attr(tms,"term.labels")
  rval
}

metavars<-function(design,...) UseMethod("metavars")
metavars.sqlsurvey<-function(design, ...) with(design, c(id, strata,  fpc, key))
metavars.sqlrepsurvey<-function(design,...) with(design, c( repweights, key))

dropmissing<-function(expr,design,na.rm){
  if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }

  updates <- NULL
  metavars <- NULL
  allv <- all.vars(expr)
  needsUpdates <- any(!sapply(allv, isBaseVariable, design = design))
  if (needsUpdates) {
    metavars <- c(wtname,metavars(design))
    updates <- getTableWithUpdates(design, allv, metavars, 
                                   tablename)
    tablename <- updates$table
    on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,d), add = TRUE)
    for (f in updates$createfns) dbSendUpdate(design$conn, f)
   }
  
  for(v in allv){
    nmissing<-dbGetQuery(design$conn, sqlsubst("select sum(%%wt%%) from %%table%% where %%var%% is null", 
                                               list(wt=wtname, table=tablename, var=v)))[[1]][1]
    if (!is.na(nmissing) && nmissing>0) break
  }
  
  if (!is.na(nmissing) && nmissing>0){
    if (na.rm==FALSE) stop("missing values present and na.rm=FALSE")
    notna<-parse(text=paste(paste("!is.na(",all.vars(expr),")"),collapse="&"))[[1]]
    design<-do.call(subset, list(design, notna))
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }
  design
}

svylm<-function(formula,design,...) UseMethod("svylm",design)

svylm.sqlsurvey<-function(formula, design,na.rm=TRUE,...){
  tms<-terms(formula)
  yname<-as.character(attr(tms,"variables")[[2]])

  design<-dropmissing(formula,design,na.rm)

  ## handle subpopulations
  if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }


  
  mm<-sqlmodelmatrix(formula, design, fullrank=TRUE)
  termnames<-mm$terms
  tablename<-sqlsubst("%%tbl%% inner join %%mm%% using(%%key%%)",
                        list(tbl=tablename,mm=mm$table, key=design$key))

   p<-length(termnames)
   mfy<-basename(tempfile("_y_"))
   sumxy<-paste("sum(",termnames,"*%%mfy%%*%%wt%%) as _xy_",termnames,sep="")
   sumsq<-outer(termnames,termnames, function(i,j) paste("sum(",i,"*",j,"*%%wt%%)",sep=""))

   qxwx<-sqlsubst("select %%sumsq%% from %%table%%" ,
                     list(sumsq=sumsq, table=tablename, wt=wtname)
                     )
   xwx<-matrix(as.matrix(dbGetQuery(design$conn, qxwx)),p,p)
   qxwy <- sqlsubst("select %%sumxy%% from %%tablename%% inner join (select %%y%% as %%mfy%%, %%key%% from %%mf%%) as foo using(%%key%%)", 
        list(sumxy = sumxy, y = yname, key = design$key, tablename = tablename, 
        mf=mm$mf, wt=wtname, mfy=mfy))
   xwy<-drop(as.matrix(dbGetQuery(design$conn, qxwy)))
   beta<-solve(xwx,xwy)

   xytab<-basename(tempfile("_xyt_"))
   muname<-basename(tempfile("_mu_"))
   qmu<-paste("(",paste(termnames,"*",formatC(beta,format="fg",digits=16),collapse="+"),") as ",muname)
   qxytab<-sqlsubst("create table %%xytab%% as (select %%x%%, %%y%%, %%qmu%%, %%key%% from (select %%y%%, %%key%% from %%mf%%) as foo inner join %%mm%% using(%%key%%) ) with data",
                    list(xytab=xytab, x=termnames, y=yname, key=design$key,qmu=qmu,mf=mm$mf,mm=mm$table))       
   dbSendUpdate(design$conn, qxytab)
   on.exit(dbSendUpdate(design$conn, paste("drop table ",xytab)),add=TRUE)
   
   Utable<-basename(tempfile("_U_"))
   unames<-paste("_U_", termnames, sep="")
   u<-paste(termnames,"*(",yname,"-",muname,") as ",unames)
   qu<-sqlsubst("create table %%utable%% as (select %%u%%, %%key%% from %%xytab%%) with data",
   	list(key=design$key, utable=Utable, u=u,xytab=xytab))  
   dbSendUpdate(design$conn, qu)
   on.exit(dbSendUpdate(design$conn,paste("drop table ",Utable)),add=TRUE)

   Uvar<-sqlvar(unames,Utable, design)
   xwxinv<-solve(xwx)
  v<-xwxinv%*%Uvar%*%xwxinv
  names(beta)<-termnames
  results<-data.frame(beta=beta)
  
  dimnames(v)<-list(termnames, termnames)
  attr(results, "var")<-v
  
  attr(results,"resultcol")<-1:length(termnames)
  class(results)<-c("sqlsvystat",class(results))
   results
}

dim.sqlmm<-function(x){
	n<-dbGetQuery(x$conn, sqlsubst("select count(*) from %%table%%",list(table=x$table)))[[1]]
	p<-length(x$terms)
	c(n,p) 	
}

sqlvar<-function(U, utable, design){
   nstages<-length(design$id)
   units<-NULL
   strata<-NULL
   results<-vector("list",nstages)
   stagevar<-vector("list",nstages)
   p<-length(U)
   if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }
   sumUs<-sqlsubst(paste(paste("sum(",U,"*%%wt%%) as _s_",U,sep=""),collapse=", "),list(wt=wtname))
   Usums<-paste("_s_",U,sep="")
   avgs<-paste("avg(",Usums,")",sep="")
   avgsq<-outer(Usums,Usums, function(i,j) paste("avg(",i,"*",j,")",sep=""))
   for(stage in 1:nstages){
     oldstrata<-strata
     strata<-unique(c(units, design$strata[stage]))
     units<-unique(c(units, design$strata[stage], design$id[stage]))

     if(length(strata)>0){
         
       query<-sqlsubst("select %%avgs%%, %%avgsq%%, count(*) as _n_, _fpc_, %%strata%%
                      from 
                            (select %%strata%%, %%sumUs%%, avg(%%fpc%%) as _fpc_ from %%basetable%% inner join %%tbl%% using(%%key%%) group by %%units%%)  as r_temp
                      group by %%strata%%, _fpc_" ,
                       list(units=units, strata=strata, sumUs=sumUs, tbl=utable,avgs=avgs,
                            avgsq=avgsq,fpc=design$fpc[stage], strata=strata,
                            basetable=tablename, key=design$key
                            )
                       )
     } else {
       query<-sqlsubst("select %%avgs%%, %%avgsq%%, count(*) as _n_,  _fpc_
                      from 
                            (select  %%sumUs%%, avg(%%fpc%%) as _fpc_ from %%basetable%% inner join %%tbl%% using(%%key%%) group by %%units%%)  as r_temp group by _fpc_" ,
                       list(units=units, strata=strata, sumUs=sumUs, tbl=utable,avgs=avgs,
                            avgsq=avgsq,fpc=design$fpc[stage],
                            basetable=tablename, key=design$key
                            )
                       )
 
     }
     result<-dbGetQuery(design$conn, query)
     result<-subset(result, `_fpc_`!=`_n_`) ## remove certainty units
     if (is.null(oldstrata)){
       result$`_p_samp_`<-1
     } else {
       index<-match(result[,oldstrata], results[[stage-1]][,oldstrata])
       keep<-!is.na(index)
       result<-result[keep,,drop=FALSE]
       index<-index[keep]
       result$`_p_samp_`<-results[[stage-1]][index,"_n_"]/results[[stage-1]][index,"_fpc_"] ##assumes p = n/N (missing data?)
     }
     means<-as.matrix(result[,1:p])
     ssp<-as.matrix(result[,p+(1:(p*p))])
     meansq<-means[,rep(1:p,p)]*means[,rep(1:p,each=p)]
     nminus1<-(result$`_n_`-1)
     if (any(nminus1==0)){
       if (getOption("survey.lonely.psu")=="remove")
         nminus1[nminus1==0]<-Inf
       else
         stop("strata with only one PSU at stage ",stage)
     }
     stagevar[[stage]]<-((ssp-meansq) * (result$`_n_`^2)/nminus1)*result$`_p_samp_`

     if (any(result$`_fpc_`>0)) {## without-replacement
       stagevar[[stage]][result$`_fpc_`>0]<-stagevar[[stage]][result$`_fpc_`>0]*((result$`_fpc_`-result$`_n_`)/result$`_fpc_`)[result$`_fpc_`>0]
     }
     results[[stage]]<-result
 }
   vars<-lapply(stagevar, function(v) matrix(colSums(v),p,p))
   rval<-vars[[1]]
   for(i in seq(length=nstages-1)) rval<-rval+vars[[i+1]]
   dimnames(rval)<-list(U,U)
   rval
}

svytable.sqlsurvey<-function(formula, design,...){
  tms<-terms(formula)
  if (is.null(design$subset))
    tablename<-design$table
  else
    tablename<-sqlsubst("%%base%% inner join %%sub%% using(%%key%%) where %%wt%%>0",
                        list(base=design$table, sub=design$subset$table,
                             key=design$key, wt=design$subset$weights))
  query<-sqlsubst("select %%tms%%, sum(%%wt%%) from %%table%% group by %%tms%%",
                  list(wt=design$weights,
                       table=tablename,
                       tms=attr(tms,"term.labels")))
  dbGetQuery(design$conn, query)
}


adquote<-function(s) paste("\'",s,"\'",sep="")

sqlmodelmatrix<-function(formula, design, fullrank=TRUE){
  mmcol<-function(variables,levels, name.only=FALSE){
    if (length(variables)==0){
      if(name.only) return("_Intercept_") else return("1 as _Intercept_")
    }
    rval<-paste("(1*(",variables,"=",adquote(levels),"))",sep="")
    termname<-paste(variables,levels,sep="")
    if (length(rval)>1){
      rval<-paste(paste("(",rval,")",sep=""),collapse="*")
      termname<-paste(termname,collapse="_")
    }
    if (name.only)
      make.db.names(design$conn, termname)
    else 
      paste(rval,"as",make.db.names(design$conn, termname))
  }
  
  if (!all(sapply(all.vars(formula), isKnownVariable, design=design)))
    stop("some variables not in database")
  ok.names<-c("~","I","(","-","+","*")
  if (!all( all.names(formula) %in% c(ok.names, all.vars(formula))))
    stop("Unsupported transformations in formula")

  tablename<-design$table
  
  needsUpdates <- any(!sapply(all.vars(formula), isBaseVariable, design = design))
    if (needsUpdates) {
        metavars <- with(design, key)
        updates <- getTableWithUpdates(design, all.vars(formula), metavars, 
            tablename)
        tablename <- updates$table
        alreadydefined<-dbGetQuery(design$conn,
                                   paste("select count(*) from functions where name = '",updates$fnames[1],"'",sep="")
                                   )[1,1]>0
        if(!alreadydefined){
          on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,d), add=TRUE)
          for(f in updates$createfns) dbSendUpdate(design$conn,f)
        }
    } 


  mftable<-basename(tempfile("_mf_"))
  mmtable<-basename(tempfile("_mm_"))

  dbSendUpdate(design$conn, sqlsubst("create table %%mf%% as (select %%key%%, %%vars%% from %%table%%) with data",
                               list(mf=mftable, vars=all.vars(formula), table=tablename,
                                    key=design$key)))
  dbSendUpdate(design$conn, sqlsubst("create unique index %%idx%% on %%tbl%%(%%key%%)",
                                   list(idx=basename(tempfile("idx")), tbl=mftable,
                                        key=design$key)))
  
  tms<-terms(formula)
  mf<-zero.model.frame(formula,design)
 
  ## character variables get temporary factorness
  for(v in all.vars(formula)){
    if (is.character(mf[[v]])){
      charlevels<-dbGetQuery(design$conn, paste("select distinct",v,"from",mftable))[[1]]
      mf[[v]]<-factor(mf[[v]],levels=charlevels)
    }   
  }

  mm<-model.matrix(tms, mf)
  ntms<-max(attr(mm,"assign"))


  patmat<-attr(tms,"factors")
  nms<-attr(tms,"term.labels")
  orders<-attr(tms, "order")
  if (fullrank)
    contrastlevels<-function(f) {levels(f)[-1]}
  else
    contrastlevels<-function(f) levels(f)

  
  mmterms<-lapply(1:ntms,
                    function(i){
                      vars<-rownames(patmat)[as.logical(patmat[,nms[i]])]
                      if (orders[i]==1 && is.null(levels(mf[[vars]])))
                        return(list(paste(vars," as _",vars,sep="")))
                      levs<-as.matrix(expand.grid(lapply(mf[vars],contrastlevels)))
                      lapply(split(levs,row(levs)),
                             function(ll) mmcol(vars,ll))
                    })
  if (fullrank)  mmterms<-c(mmterms, list(mmcol(NULL,NULL)))
  
  mmnames<-lapply(1:ntms,
                    function(i){
                      vars<-rownames(patmat)[as.logical(patmat[,nms[i]])]
                      if (orders[i]==1 && is.null(levels(mf[[vars]])))
                        return(list(paste("_",vars,sep="")))
                      levs<-as.matrix(expand.grid(lapply(mf[vars],contrastlevels)))
                      lapply(split(levs,row(levs)),
                             function(ll) mmcol(vars,ll,TRUE))
                    })
  if (fullrank) mmnames<-c(mmnames, list(mmcol(NULL,NULL,TRUE)))

  
  mmquery<-sqlsubst("create table %%mm%% as (select %%key%%, %%terms%% from %%mf%%) with data",
                    list(mm=mmtable, id=design$id, strata=design$strata,
                         terms=unlist(mmterms), mf=mftable,
                         key=design$key))
  dbSendUpdate(design$conn, mmquery)
  dbSendUpdate(design$conn, sqlsubst("create unique index %%idx%% on %%tbl%%(%%key%%)",
                                   list(idx=basename(tempfile("idx")), tbl=mmtable,
                                        key=design$key)))
  rval<-new.env(parent=emptyenv())
  rval$table<-mmtable
  rval$mf<-mftable
  rval$formula<-formula
  rval$terms<-unlist(mmnames)
  rval$call<-sys.call()
  rval$conn<-design$conn
  reg.finalizer(rval, sqlmmDrop)
  class(rval)<-"sqlmm"
  rval
}


head.sqlmm<-function(x,n=6,...) dbGetQuery(x$conn,
     sqlsubst("select * from %%mm%% limit %%nn%%", list(mm=x$table,nn=n)))

hexbinmerge<-function(h1,h2){
	h<-h1
	h@cID<-NULL
	
	extra<-!(h2@cell %in% h1@cell)
	cells<-sort(unique(c(h1@cell, h2@cell)))
	i1<-match(h1@cell,cells)
	i2<-match(h2@cell,cells)
	i2new<-match(h2@cell[extra], cells)
	i2old<-match(h2@cell[!extra], cells)
	
	n<-length(cells)

	count<-integer(n)
	count[i1]<-h1@count
	count[i2new]<-h2@count[extra]
	count[i2old]<-count[i2old]+h2@count[!extra]
	h@count<-count
		
	xcm<-numeric(n)
	xcm[i1]<-h1@xcm
	xcm[i2new]<-h2@xcm[extra]
	xcm[i2old]<-xcm[i2old]*(1-h2@count[!extra]/count[i2old])+h2@xcm[!extra]*(h2@count[!extra]/count[i2old])
	h@xcm<-xcm
	
	ycm<-numeric(n)
	ycm[i1]<-h1@ycm
	ycm[i2new]<-h2@ycm[extra]
	ycm[i2old]<-ycm[i2old]*(1-h2@count[!extra]/count[i2old])+h2@ycm[!extra]*(h2@count[!extra]/count[i2old])
	h@ycm<-ycm
	
	h@n<-as.integer(sum(count))
	h@ncells<-n	
	h@cell<-cells
	h
	}

sqlhexbin<-function(formula, design, xlab=NULL,ylab=NULL, ...,chunksize=10000){
  require("hexbin")
  tms<-terms(formula)
  x<-attr(tms,"term.labels")
  if (length(x)>2) stop("only one x variable")
  y<-deparse(formula[[2]])
  if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }
  needsUpdates <- any(!sapply(all.vars(formula), isBaseVariable, design = design))
  if (needsUpdates) {
    metavars <- with(design, c(id, wtname, key))
    updates <- getTableWithUpdates(design, all.vars(formula),  metavars, tablename)
    tablename <- updates$table
    on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,  d), add = TRUE)
    for (f in updates$createfns) dbSendUpdate(design$conn,  f) 
  }
  
  query<-sqlsubst("select min(%%x%%) as xmin, max(%%x%%) as xmax, min(%%y%%) as ymin, max(%%y%%) as ymax from %%tbl%% where %%wt%%>0",
                  list(x=x,tbl=tablename,y=y, wt=wtname))
  ranges<-dbGetQuery(design$conn, query)
  xlim<-with(ranges,c(xmin,xmax))
  ylim<-with(ranges,c(ymin,ymax))
  N<-dbGetQuery(design$conn, sqlsubst("select count(*) from %%tbl%% where %%wt%%>0",
                                      list(tbl=tablename,wt=wtname)))[[1]]
  
  query<-sqlsubst("select %%x%% as x, %%y%% as y, %%wt%% as _wt from %%tbl%% where %%wt%% > 0",
                  list(x=x,y=y,tbl=tablename,wt=wtname))
  result<-dbSendQuery(design$conn, query)
  on.exit(dbClearResult(result), add=TRUE)
  
  got<-0
  htotal<-NULL
  while(got<N){
    df<-fetch(result, chunksize)
    if (nrow(df)==0) break
    h<-hexbin(df$x,df$y,IDs=TRUE,xbnds=xlim,ybnds=ylim)
    h@count<-as.vector(tapply(df[["_wt"]],h@cID,sum))
    if (is.null(htotal)){
      htotal<-h
    } else 
    htotal<-hexbinmerge(htotal,h) 
    
  }
  if (is.null(xlab)) xlab<-x
  if (is.null(ylab)) ylab<-y
  
  gplot.hexbin(htotal,xlab=xlab, ylab=ylab, ...)
  invisible(htotal)
}


svyplot.sqlsurvey<-svyplot.sqlrepsurvey<-function (formula, design, style = c("hex", "grayhex","subsample"), ...) 
{
  style <- match.arg(style)
  mf<-match.call()
  mf[[1]]<-switch(style, hex = as.name("sqlhexbin"), grayhex=as.name("sqlhexbin"),subsample = as.name("sqlscatter"))
  mf$style<-switch(style, hex="centroids",grayhex=NULL,subsample=NULL)
  invisible(eval(mf,parent.frame()))
}


sqlscatter<-function(formula,design,npoints=1000,nplots=4,...){
  if (is.null(design$subset)){
    tablename<-design$table
    wtname<-design$weights
  } else {
    tablename<-sqlsubst(" %%tbl%% inner join %%subset%% using(%%key%%) ",
                        list(tbl=design$table, subset=design$subset$table, key=design$key))
    wtname<-design$subset$weights
  }

  needsUpdates <- any(!sapply(all.vars(formula), isBaseVariable, design = design))
  if (needsUpdates) {
    metavars <- with(design, c(id, wtname, key))
    updates <- getTableWithUpdates(design, all.vars(formula),  metavars, tablename)
    tablename <- updates$table
    on.exit(for (d in updates$destroyfns) dbSendUpdate(design$conn,  d), add = TRUE)
    for (f in updates$createfns) dbSendUpdate(design$conn,  f) 
  }

  n<-round(10*sqrt(nrow(design)))
  
  vars<-union(all.vars(formula),all.vars(substitute(list(...))))
  samp<-dbGetQuery(design$conn, sqlsubst("select %%wt%%,%%vars%% from %%table%% sample %%n%%",list(vars=vars,wt=wtname, table=tablename,n=n)))
  
  m<-match.call(expand.dots=FALSE)
  for(i in 1:nplots){
    idx<-sample(1:n,npoints,prob=samp[,1], replace=TRUE)
    subsamp<-samp[idx,]
    dots<-lapply(m$...,eval,subsamp,parent.frame())
    do.call(plot, c(list(formula=formula, data=samp[idx,]),dots))
  }
  invisible(NULL)
}
  


dimnames.sqlsurvey<-function(x,...) list(character(0),allVarNames(x))

Try the sqlsurvey package in your browser

Any scripts or data that you put into this service are public.

sqlsurvey documentation built on May 2, 2019, 4:53 p.m.