#' Conversion from a long dataframe to a sptm object
#'
#' This function allows you to calculate a Gaussian process kernel
#' @param df.obs entry points
#' @param frequency frequency
#' @keywords data conversion
#' @export
#' @examples
#' data("wq_analysis_week2")
#' SPTMData(wq.raw.obs, frequency = "quarter")
SPTMData = function(df.obs, agg = "month", seasonality = "month", method = "loess", debug=F, degFree = 10){
SiteID = levels(df.obs$ID)
if(agg == "year"){
df.obs$date.temp = paste0(format(df.obs$date, "%Y"), "-01-01")
temp = aggregate(obs ~ date.temp + ID, data = df.obs, FUN = mean)
colnames(temp)[1] <- "date"
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "year")
dfd = data.frame(date=dates)
}
if(agg == "6-months"){
temp = aggregate(list(obs = df.obs$obs),
list(date = cut(df.obs$date, "6 months"), ID = df.obs$ID),
mean)
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "6 months")
dfd = data.frame(date=dates)
}
if(agg == "4-months"){
temp = aggregate(list(obs = df.obs$obs),
list(date = cut(df.obs$date, "4 months"), ID = df.obs$ID),
mean)
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "4 months")
dfd = data.frame(date=dates)
}
if(agg == "quarter"){
df.obs$date.temp = as.yearqtr(df.obs$date)
temp = aggregate(obs ~ date.temp + ID, data = df.obs, FUN = mean)
colnames(temp)[1] <- "date"
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "quarter")
dfd = data.frame(date=dates)
}
if(agg == "2-months"){
temp = aggregate(list(obs = df.obs$obs),
list(date = cut(df.obs$date, "2 months"), ID = df.obs$ID),
mean)
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "2 months")
dfd = data.frame(date=dates)
}
if(agg == "month"){
df.obs$date.temp = as.yearmon(df.obs$date)
temp = aggregate(obs ~ date.temp + ID, data = df.obs, FUN = mean)
colnames(temp)[1] <- "date"
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "month")
dfd = data.frame(date=dates)
}
if(agg == "week"){
df.obs$date.temp = format(df.obs$date, "%Y-%U %u")
temp = aggregate(obs ~ date.temp + ID, data = df.obs, FUN = mean)
temp$date = format(strptime(temp$date.temp, "%Y-%U %u"), "%Y-%m-%d")
temp$date.temp <- NULL
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "week")
dfd = data.frame(date=dates)
}
if(agg == "day"){
df.obs$date.temp = format(df.obs$date, "%Y-%m-%d")
temp = aggregate(obs ~ date.temp + ID, data = df.obs, FUN = mean)
colnames(temp)[1] <- "date"
dates = seq(min(as.Date(temp$date)), max(as.Date(temp$date)), by = "day")
dfd = data.frame(date=dates)
}
temp$date = as.Date(temp$date)
dfd$date= as.Date(dfd$date)
y=NULL
for(i in SiteID){
df.m = merge(dfd, temp[temp$ID %in% i,], by="date", all.x = TRUE)
df.m[which(is.na(df.m[,3])),2] <- i
y = rbind(y, df.m)
}
y = y[order(y$date),]
if(debug==T){browser()}
basis = NULL
list.idx = NULL
if(method == "loess"){
season = array(NA, dim = c(length(dates), length(seasonality) + 2, length(SiteID)),
dimnames=list(as.character(dates), c("intercept","trend",seasonality), SiteID))
idx.n = which(!(colnames(season) %in% c("intercept","trend","year","6-months","4-months","quarter","month","week","day")))
if(length(idx.n)>0){season <- season[,-idx.n,]}
for(i in 1:length(SiteID)){
Y = y[which(y$ID %in% SiteID[i]),]
freq.gas = freqFun(agg, seasonality)
if(agg == "year"){
start.date = as.numeric(format(min(dates),"%Y"))
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(format(dates, "%Y"))
}
if(agg == "6-months"){
start.date = as.numeric(format(min(dates),"%Y")) + round((as.numeric(format(min((dates)),"%m"))-1)/6)/2 -.5
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(format((dates),"%Y")) + round((as.numeric(format(((dates)),"%m"))-1)/6)/2 -.5
}
if(agg == "4-months"){
start.date = as.numeric(format(min(dates),"%Y")) + (as.numeric(format(min((dates)),"%m"))-1)/12
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(format((dates),"%Y")) + (as.numeric(format(min((dates)),"%m"))-1)/12
}
if(agg == "quarter"){
start.date = as.numeric(format(min(dates),"%Y")) + (as.numeric(format(min(as.yearqtr(dates)),"%q"))-1)/4
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(as.yearqtr(dates))
}
if(agg == "month"){
start.date = as.numeric(format(min(dates),"%Y")) + (as.numeric(format(min(dates),"%m"))-1)/12
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(as.yearmon(dates))
}
if(agg == "week"){
dates = as.Date(dates)
start.date = as.numeric(format(min(dates),"%Y")) + (as.numeric(format(min(dates),"%U"))-1)/52
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(format(dates, "%Y")) + as.numeric(format(dates,"%U")) / 52
}
if(agg == "day"){
dates = as.Date(dates)
start.date = as.numeric(format(min(dates),"%Y")) + (as.numeric(format(min(dates),"%j"))-1)/366
gas <- ts((Y$obs),freq=freq.gas, start=start.date)
dates.i = as.numeric(format(dates, "%Y")) + as.numeric(format(dates,"%m")) / 12 + as.numeric(format(dates,"%d")) / 31
}
if (freq.gas < 2 || as.integer(length(gas)) <= 2 * freq.gas){
dim.j=1
idx.season = match(round(index(gas),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){
if(length(idx.season)- sum(is.na(as.matrix(gas)))==1){
season[idx.season,dim.j,i] <- as.matrix(gas)
}else{
season[idx.season,dim.j,i] <- as.matrix(gas - mean(gas, na.rm=T))
}
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = 0
}else{
dim.j = 1
if("decade"%in% seasonality){
s.win = round(diff(as.numeric(format(range(dates),"%Y")))/10)
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("5-years"%in% seasonality){
s.win = round(diff(as.numeric(format(range(dates),"%Y")))/5)
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("2-years"%in% seasonality){
s.win = round(diff(as.numeric(format(range(dates),"%Y")))/2)
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("year"%in% seasonality){
s.win = diff(as.numeric(format(range(dates),"%Y")))
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("quarter"%in% seasonality){
s.win = diff(as.numeric(format(range(dates),"%Y")))*4
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("month"%in% seasonality){
s.win = diff(as.numeric(format(range(dates),"%Y")))*12
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("week"%in% seasonality){
s.win = diff(as.numeric(format(range(dates),"%Y")))*52
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
if("day"%in% seasonality){
s.win = diff(as.numeric(format(range(dates),"%Y")))*365
loess.ts = stl(gas, s.window = s.win, na.action = na.approx)
idx.season = match(round(index(loess.ts$time.series),3),round(dates.i,3))
dim.j = dim.j + 1
if(dim.j==2){season[idx.season,dim.j,i] <- loess.ts$time.series[,2] - mean(loess.ts$time.series[,2],na.rm=T)
dim.j = dim.j + 1}
season[idx.season,dim.j,i] = loess.ts$time.series[,1]
start.date = index(loess.ts$time.series)[1]
gas <- ts(loess.ts$time.series[,3],freq=freq.gas, start=start.date)
}
}
season[,1,i] <- 1
}
}
if(method == "splines"){
season = array(NA, dim = c(length(dates), length(seasonality) + 2, length(SiteID)),
dimnames=list(as.character(dates), c("intercept","trend",seasonality), SiteID))
idx.n = which(!(colnames(season) %in% c("intercept","trend","year","6-months","4-months","quarter","month","week","day")))
if(length(idx.n)>0){season <- season[,-idx.n,]}
for(i in 1:length(SiteID)){
# i = 1
Y = y[which(y$ID %in% SiteID[i]),]
bbF = matrix(1, ncol = 1, nrow = length(Y$date))
list.idx = list(1)
#if("trend" %in% seasonality){
bbTemp = bs(as.numeric(Y$date)/1000,df = degFree)
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
#}
if("decade"%in% seasonality){
datef = as.numeric(format(Y$date, "%Y")) + (as.numeric(format(Y$date, "%m"))-1)/12 + (as.numeric(format(Y$date, "%d"))-1)/31
bbTemp = bs(datef,df = diff(range(as.numeric(format(Y$date, "%Y"))))/10)
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("5-years"%in% seasonality){
datef = as.numeric(format(Y$date, "%Y")) + (as.numeric(format(Y$date, "%m"))-1)/12 + (as.numeric(format(Y$date, "%d"))-1)/31
bbTemp = bs(datef,df = diff(range(as.numeric(format(Y$date, "%Y"))))/5)
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("2-years"%in% seasonality){
datef = as.numeric(format(Y$date, "%Y")) + (as.numeric(format(Y$date, "%m"))-1)/12 + (as.numeric(format(Y$date, "%d"))-1)/31
bbTemp = bs(datef,df = diff(range(as.numeric(format(Y$date, "%Y"))))/2)
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("year"%in% seasonality){
datef = as.numeric(format(Y$date, "%Y")) + (as.numeric(format(Y$date, "%m"))-1)/12 + (as.numeric(format(Y$date, "%d"))-1)/31
bbTemp = bs(datef,df = diff(range(as.numeric(format(Y$date, "%Y"))))/1)
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("quarter"%in% seasonality){
datef = as.numeric(format(Y$date, "%m")) + as.numeric(format(Y$date, "%d"))/31
bbTemp = bs(datef,df = 4)
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("month"%in% seasonality){
datef = as.numeric(format(Y$date, "%m")) + as.numeric(format(Y$date, "%d"))/31
bbTemp = bs(datef,df = length(unique(as.numeric(format(Y$date, "%m")))))
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("week"%in% seasonality){
datef =as.numeric(format(Y$date, "%W")) + as.POSIXlt(Y$date)$wday / 7
bbTemp = bs(datef,df = length(unique(as.numeric(format(Y$date, "%W")))))
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
if("day"%in% seasonality){
bbTemp = bs(as.numeric(format(Y$date, "%j")),df = length(unique(as.numeric(format(Y$date, "%j")))))
tmp <- cor(bbTemp)
tmp[upper.tri(tmp)] <- 0
diag(tmp) <- 0
bbTemp <- bbTemp[,!apply(tmp,2,function(x) any(x > 0.75))]
bbF = cbind(bbF,bbTemp)
list.idx = c(list.idx, list((ncol(bbF) - ncol(bbTemp)+1):ncol(bbF)))
}
idx.na = which(!is.na(Y$obs))
w = ginv(t(bbF[idx.na,]) %*% bbF[idx.na,]) %*% t(bbF[idx.na,]) %*% matrix(Y$obs[idx.na], ncol=1)
var.w = ginv(t(bbF[idx.na,]) %*% bbF[idx.na,]) * mean((Y$obs-bbF %*% w)^2, na.rm=T)
for(j in 1:length(list.idx)){
if(length(list.idx[[j]]) == 1){season[,j,i] = bbF[,list.idx[[j]]] * w[list.idx[[j]]]}
if(length(list.idx[[j]]) > 1){season[,j,i] = bbF[,list.idx[[j]]] %*% w[list.idx[[j]]]}
delta = diff(range(season[,j,i]))
if(delta == 0){delta = season[,j,i] = 1}else{
season[,j,i] = (season[,j,i] - min(season[,j,i]))/delta*2 - 1
}
}
}
basis = bbF
}
sptm.data = list(obs.data = y, season = season, basis = basis, list.idx = list.idx)
class(sptm.data) <- 'sptm'
return(sptm.data)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.