Standard deviation with SQLite is zero when there is one data item, not the expected NA. Nocie the sd() calculation agrees with R's local calculation when n>1 so this isn't just a sample variance versus population variance issue.

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = " # "
)
options(width =100)
library('dplyr')
library('RSQLite')
packageVersion('dplyr')
packageVersion('RSQLite')

my_db <- dplyr::src_sqlite(":memory:", create = TRUE)

# confirm sqlite can represent NA
d <- data.frame(x = c(1,NA,3))
dbData <- dplyr::copy_to(my_db, d, name='d', 
                           create=TRUE, overwrite=TRUE)
print(dbData)

for(n in 1:3) {
  print("***********")
  print(paste('n',n))
  dplyr::db_drop_table(my_db$con, 'd')
  d <- data.frame(x= seq_len(n))
  print("local")
  print(dplyr::summarise_all(d, dplyr::funs(sd)))
  dbData <- dplyr::copy_to(my_db, d, name='d', 
                           create=TRUE, overwrite=TRUE)
  print("RSQLite")
  print(dplyr::summarise_all(dbData, dplyr::funs(sd)))
  print("***********")
}

Filed as RSQLite 201.

version
rm(list=ls())
gc()


WinVector/replyr documentation built on Oct. 22, 2020, 8:07 p.m.