knitr::opts_chunk$set(echo = TRUE)

Download data archive from luftdaten.info

Function luftdaten::LD_download() saves data locally.

library(luftdaten)
library(curl)
#data("sensortypes")
#for (s in sensortypes)
#{
LD_download("/media/schmid/local/data/luftdaten/", start="2018-02-28", end="2018-03-06")
#}

Plot the size of data per day:

folder="/media/schmid/local/data/luftdaten/"
here=getwd()
system(paste0("cd ",folder,"; du -s * > ",here,"/size.txt"))
size<-read.table(paste0(here,"/size.txt"), header=FALSE, col.names=c("size","date"))
size$date<-as.Date(size$date)
size$size<-size$size/(2^10)
plot(size$date,size$size, xlab="Date",main="Data per day", ylab="MB",type="s")

Prepare database (mysqlite using RSQLite package)

One database per sensor type

  source="/media/schmid/local/data/luftdaten/" 
  x=as.Date("2016-10-01")
  end<-as.Date("2018-02-28")
  st<-c()
  while(x<end)
    {
    files<-list.files(paste0(source,"/",x))
    files<-files[grep(pattern = ".csv",files)]
    if (length(files)>0)
      {
      strip<-strsplit(files,"_")
      strip<-matrix(unlist(strip),nrow=length(strip),byrow=TRUE)[,2]
      st<-unique(c(st,strip))
    }
    x<-x+1
  }
  print(st)
  sensortypes<-st
  save(sensortypes, file="../inst/data/sensortypes.rda")
library(luftdaten)
data(sensortypes)
for (s in sensortypes)
{
LD_sql2(start="2018/02/28", end="2018/02/28", source="/media/schmid/local/data/luftdaten/",dbname="db/orig/", sensortype=s,verbose=FALSE)
}
for (s in sensortypes)
{
LD_sql2(start="2018/02/28", end=Sys.Date()-1, source="/media/schmid/local/data/luftdaten/",dbname="../db/ab1803_", sensortype=s,verbose=FALSE)
}
library(RSQLite)
library(DBI)
files<-fs::dir_ls("db/orig")
for (f in files)
{
  db<-dbConnect(SQLite(),f)
  dbListTables(db)
  print(f)
  fields<-dbListFields(db, "data")

  locid<-dbReadTable(db,"locid")

  res<-dbSendQuery(db,"SELECT * from data")
  temp<-dbFetch(res,50000)

  while(dim(temp)[1]>0)
  {
    time<-strsplit(temp$timestamp,"-")
    time<-matrix(unlist(time),ncol=dim(temp)[1])
    time<-t(time[1:2,])
    ti<-unique(time)
    print(ti)
    for (i in 1:dim(ti)[1])
    {
      w<-(time[,1]==ti[i,1])&(time[,2]==ti[i,2])
      temp0<-temp[w,]
      for (field in c("P1","P2","temperature","humidity","pressure"))
      {
        if (any(names(temp0)==field))
        {
          w0<-which(names(temp0)==field)
          db0<-dbConnect(SQLite(),paste0("db/data/",field,"-",ti[i,1],"-",ti[i,2],".sqlite"))
          temp2<-data.frame("timestamp"=temp0$timestamp, "locid"=temp0$locid,"value"=temp0[,w0])
          dbWriteTable(db0,"data",temp2,append=TRUE)
          locid2<-unique(temp2$locid)
          for (l in locid)
          {
            w0<-which(locid$id==l)
          }
          temp2<-try(dbGetQuery(db0, paste0("SELECT * from locid where id=",l)),silent=TRUE)
          locid0<-locid[w0,]
          if (class(temp2)=="try-error")
          {
            dbWriteTable(db0,"locid",locid0,append=TRUE)
          }  
          else
          {
            if (dim(temp2)[1]==0)
            {
              dbWriteTable(db0,"locid",locid0,append=TRUE)
            }
          }
          dbDisconnect(db0)
        }
      }
    }
  temp<-dbFetch(res,10000)
}  
}
  if(0)
    {
    res<-dbSendQuery(db,"SELECT * from processed")
    temp<-dbFetch(res,10000)
    while(dim(temp)[1]>0)
    {
      time<-strsplit(temp$file,"-")
      time<-matrix(unlist(time),nrow=3)
      time<-t(time[-3,])
      ti<-unique(time)
      for (i in 1:dim(ti)[1])
      {
        w<-(time[,1]==ti[i,1])&(time[,2]==ti[i,2])
        temp0<-temp[w,]
        db0<-dbConnect(SQLite(),paste0("db/DHT22-",ti[1],"-",ti[2],".sqlite"))
        dbWriteTable(db0,"processed",temp0,append=TRUE)
        dbDisconnect(db0)
      }

  temp<-dbFetch(res,10000)
}  

}


volkerschmid/luftdaten documentation built on May 26, 2019, 5:35 a.m.