R/DownloadWunderground.R

Defines functions download_time_seq

Documented in download_time_seq

#' Download Wunderground data
#' @title Wunderground time series from URL
#' @description Function to download 5min weather data from the \url{www.wunderground.nl} website. For a specific time period,
#' station and variable(s) the data can be downloaded and saved in a directory. For each stationname a new folder is created (unless)
#' the folder already exists. After downloading the daily files all the .txt files are combined and the daily files are deleted.
#' @param stationname Character, name of the station
#' @param start Date, start of the download period
#' @param stop Date, stop period of the download period
#' @param var Character, variable(s) which you want to download
#' @param folder.loc Character, location were the data will be stored. For each stationname a new folder is created.
#' @importFrom data.table fread
#' @importFrom stats complete.cases
#' @importFrom lubridate day mday year month
#' @importFrom utils write.table
#' @examples
#' #Run the example below to store data at the current location
#' #download_time_seq(stationname = "IZUIDHOL87",
#' #                   start = as.Date("2018/04/01"),
#' #                   stop = as.Date("2018/05/01"),
#' #                   var = "TemperatureC",
#' #                   folder.loc = ".")
#' #For multiple stations use lapply:
#' #stations<-c('IZUIDHOL36','ILIMBURG36')
#' #lapply(stations,download_time_seq)
#' @export
download_time_seq<-function(stationname = "IUTRECHT23",
                            start = as.Date("2009/01/01"),
                            stop = as.Date("2018/11/27"),
                            var = "TemperatureF",
                            folder.loc = "C:/Users/marie/OneDrive/Documenten/uhi_max/inst/Wunderground/"){

if (dir.exists(paste0(folder.loc,"/",stationname))) {
  message(paste0("Already a directory for available for ",stationname))
  return(TRUE)
} else {
message(paste0("Creating a new directory for ",stationname))
dir.create(paste0(folder.loc,"/",stationname)) # maakt nieuw mapje voor het station waar een textfile per dag in komt
}

time.seq<-seq(start,stop,by="day")


for(i in 1:length(time.seq)){
t<-time.seq[i]
URL<-paste0("https://www.wunderground.com/weatherstation/WXDailyHistory.asp?ID=",stationname,"&day=",mday(t),"&year=",year(t),"&month=",month(t),"&format=1")

df<-fread(URL,sep=",",fill=TRUE)

#Main point of the complete.cases is to get rid of the <br> NA NA NA ... lines in the file

df<-try(df[complete.cases(df[[var]])],TRUE) # In my case I'm only interested in the temperature
try(write.table(df,paste0(folder.loc,"/",stationname,"/","daydata_",t,".txt"),row.names = FALSE,sep=","),TRUE)

}

txt.files=list.files(path=paste0(folder.loc,"/",stationname,"/"), full.names=TRUE, pattern=".txt")
df<-lapply(txt.files,function(x)fread(x))

#only select files with data
rows<-unlist(lapply(df,nrow))
I.sub<-which(rows!=0)

if(length(I.sub)==0){
  fld<-paste0(folder.loc,"/",stationname,"/")
  unlink(fld,recursive=TRUE)
  return(FALSE)
}
df<-df[I.sub]

#check if the columns exist, rm non existing
colnms<-unlist(lapply(df,function(x) "DateUTC<br>" %in% colnames(x) & var %in% colnames(x)))
df<-df[colnms]

if(length(df)==0){
  fld<-paste0(folder.loc,"/",stationname,"/")
  unlink(fld,recursive=TRUE)
  return(FALSE)
}

df.t<-lapply(df,function(x)subset(x,select=c(var,"DateUTC<br>"))) #Subsetting Columns in all the txt files
df.m<-Reduce(function(x,y) rbind(x,y,fill=TRUE),df.t)

#exit if there is no data in the files
if(nrow(df.m)==0){
  fld<-paste0(folder.loc,"/",stationname,"/")
  unlink(fld,recursive=TRUE)
  return(FALSE)
}
names(df.m)<-c(var,"Timestamp_UTC")

write.table(df.m,paste0(folder.loc,"/",stationname,"/","daydata_rbind_",start,"_until_",stop,".txt"),row.names = FALSE,sep=",")
message(paste0("Combined all the files into: ","daydata_rbind_",start,"_until_",stop,".txt"))

unlink(txt.files)   # haalt alle dagfiles weg, alleen combined_stationname.txt bestaat nog.
message("removing all the single dayfiles")

}
MariekeDirk/uhi_max documentation built on May 23, 2019, 1:44 p.m.