################################################################
#' Pivot at-site data
#'
#' Returns a matrix of a given variable with time in rows and station
#' index in columns. Useful to obtain observations that occur at
#' the same time.
#'
#' @param form Formula that specify the variable of interest in
#' \code{x} as well as time and location indexes.
#'
#' @param x Data.frame containing at-site date. If \code{x}
#' is passed directly without formula,
#' it is assumed that the first
#' three columns are: station index, time, variable.
#'
#'
#' @examples
#' site1 <- data.frame(site = 1, year = seq(2000,2010),
#' v =rnorm(11), w =rnorm(11))
#' site2 <- data.frame(site = 2, year = seq(1995,2007),
#' v =rnorm(13), w =rnorm(13))
#' site3 <- data.frame(site = 3, year = seq(1995,2010),
#' v =rnorm(16), w =rnorm(16))
#' xdf <- rbind(site1,site2, site3)
#' xdf; pivotAtSite(v~year+site,xdf)
#'
#' @export
#################################################################
pivotAtSite <- function(x,...) UseMethod('pivotAtSite',x)
#' @export
pivotAtSite.data.frame <- function(x){
vname <- names(x)
mx <- reshape2::melt(x[,vname], id = vname[1:2])
ans <- reshape2::dcast(mx,
as.formula(paste(vname[1],vname[2],sep='~')))
return(ans)
}
#' @export
#' @rdname pivotAtSite
pivotAtSite.formula <- function(form, x){
if(!is.data.frame(x))
stop('The variable x must be a data.frame')
ans <- model.frame(form,x)[,c(2,3,1)]
return(pivotAtSite(ans))
}
#' @export
pivotAtSite.matrix <- function(x){
return(pivotAtSite(as.data.frame(x)))
}
##################################################################
#' Organizing paired observation
#'
#' Provide a function that creates a new dataset containing all
#' paired observations, with distance and dependence measures.
#'
#' @param form,xform,yform Formula that specify which variables
#' of \code{x} or \code{y} are used.
#'
#' @param x,y Data.frame of the observation and
#' station characteristics.
#'
#' @param method Type of distance to compute. Either 'geo' for
#' geographical distance or 'euclidean' for euclidean distance.
#'
#' @param htol Threshold that limit the paired distances.
#'
#' @param pearson If \code{TRUE}, the pearson correlation is return.
#' @param spearman If \code{TRUE}, the Spearman rho is return.
#' @param kendall If \code{TRUE}, the Kendall tau is return.
#' @param pseudo If \code{TRUE}, the pseudo-observations are return.
#' @param coord If \code{TRUE}, the coordinates are return.
#'
#'
#' @details
#'
#' With \code{x} or the function \code{pobs}, the formula is
#' of the form :
#' \code{value ~ id + year}, which result in a data.frame with twice
#' the number of columns. In order, they represent the values that
#' composes the paired observation, the station identificator and
#' the time. This function is usually used for
#' transforming a dataset containing hydrological variables from
#' several station at several sampling times.
#'
#' With \code{y} or the function \code{pdist}, the formula is of
#' the form :
#' \code{id ~ lon + lat}, which returns a data.frame with twice
#' the number of columns, plus the paired distance and other
#' related measures of associations. In order the
#' variable represent the station identificator and the coordinates.
#' If \code{method = 'euclidean'} the distance can be calculated
#' between stations characteristics. In the case of 3 variables,
#' the formula could be of the form : \code{id ~ var1 + var2 + var3}.
#'
#' The function \code{pairwise} combine the result of \code{pobs}
#' and \code{pdist} in a list and calculate measure of association
#' between the paired observations.
#'
#' @export
#'
#' @examples
#'
#' xdf <- data.frame(id = factor(c('a','a','b','b','b','c','c'),
#' levels = c('a','b','c','d')),
#' year = 2010 + c(1:2,1:3,1:2),
#' value = rlnorm(7))
#'
#' pobs(value~id+year, xdf)
#'
#' coord <- data.frame(id = c('a','b','c','d'),
#' lon = runif(4)-80,
#' lat = runif(4)+43)
#'
#' pdist(id~lon+lat,coord)
#' pdist(id~lon+lat,coord, htol = 50)
#'
#' pairwise(value~id+year, xdf, id~lon+lat,coord)
#'
#'##################################################################
pairwise <- function(x, ...) UseMethod('pairwise',x)
pairwise.data.frame <- function(x, y, method = 'geo', htol = NULL,
pseudo = TRUE, coord = TRUE,
pearson = TRUE, spearman = TRUE,
kendall = TRUE){
xname <- names(x)
yname <- names(y)[-1]
lvl <- levels(y$id)
names(x) <- c('id','year','var')
names(y) <- append('id', yname)
## Create paired observation
px <- pobs(x)
## Compute the number of paired obs.
np <- dplyr::group_by(px, id_1, id_2)
np <- dplyr::summarise(np, nb = n())
## Pivot observation
if(pseudo | pearson | spearman | kendall){
tx <- pivotAtSite(x[,c(2,1,3)])
}
## Compute pivoted pseudo observation
if(pseudo | spearman){
u <- tx
u[,-1] <- rankit(tx[,-1])
rownames(u) <- tx$year
}
## Unpivot pseudo observation
if(pseudo){
up <- reshape2::melt(u, 'year')[,c(2,1,3)]
names(up) <- c('id','year','u')
up <- dplyr::filter(up, !is.na(u))
up$id <- factor(as.character(up$id),lvl)
up <- pobs(up)
}
## Compute pearson correlation
suppressWarnings(
cx <- cor(tx[,-1], use = "pairwise.complete.obs"))
cp <- reshape2::melt(as.matrix(cx))
names(cp) <- c('id_1','id_2', 'theta')
cp$id_1 <- factor(as.character(cp$id_1),lvl)
cp$id_2 <- factor(as.character(cp$id_2),lvl)
## Copute spearman rho
suppressWarnings(
sx <- cor(u[,-1], use = "pairwise.complete.obs"))
sp <- reshape2::melt(as.matrix(sx))
names(sp) <- c('id_1','id_2', 'rho')
sp$id_1 <- factor(as.character(sp$id_1),lvl)
sp$id_2 <- factor(as.character(sp$id_2),lvl)
## Copute spearman rho
suppressWarnings(
kx <- cor(tx[,-1], use = "pairwise.complete.obs"))
kp <- reshape2::melt(as.matrix(kx))
names(kp) <- c('id_1','id_2', 'tau')
kp$id_1 <- factor(as.character(kp$id_1),lvl)
kp$id_2 <- factor(as.character(kp$id_2),lvl)
## Compute the pairwise distance
dp <- pdist(y, method = method, htol = htol)
## Join all the information
if(pseudo) px <- dplyr::left_join(px, up,
by = c('id_1', 'id_2' ,'year'))
if(!coord) dp <- dplyr::select(dp,id_1, id_2, dist)
dp <- dplyr::left_join(dp,np, by = c("id_1", "id_2"))
if(pearson) dp <- dplyr::left_join(dp,cp, by = c("id_1", "id_2"))
if(spearman) dp <- dplyr::left_join(dp,sp, by = c("id_1", "id_2"))
if(kendall) dp <- dplyr::left_join(dp,kp, by = c("id_1", "id_2"))
ans <- list()
ans$obs <- px
ans$dist <- dp
return(ans)
}
#' @export
#' @rdname pairwise
pairwise.formula <- function(xform, x, yform, y,
method = 'geo', htol = Inf,
pseudo = TRUE, coord = TRUE,
pearson = TRUE, spearman = TRUE,
kendall = TRUE){
if(!is.data.frame(x) )
stop('The variable x must be a data.drame')
x <- x[,all.vars(xform)[c(2,3,1)]]
if(!is.data.frame(y) )
stop('The variable x must be a data.drame')
y <- y[, all.vars(yform)]
pairwise(x,y,method = 'geo', htol = htol,
pseudo = pseudo, coord = coord,
pearson = pearson, spearman = spearman,
kendall = kendall)
}
#' @export
pobs <- function(x, ...) UseMethod('pobs',x)
#' @export
pobs.data.frame <- function(x){
## Create a datasets of all paired observations
ans <- dplyr::mutate(x, nid = as.numeric(x[,1]))
names(ans) <- c('id','yr','z','nid')
ans <- dplyr::inner_join(ans,ans, by = 'yr')
ans <- dplyr::filter(ans, nid.x < nid.y)
ans <- dplyr::select(ans, yr, id.x, id.y, z.x, z.y)
names(ans) <- c(names(x)[2],
paste(names(x)[1], 1:2, sep='_'),
paste(names(x)[3], 1:2, sep='_'))
return(ans)
}
#' @export
#' @rdname pairwise
pobs.formula <- function(form, x){
if(!is.data.frame(x) )
stop('The variable x must be a data.drame')
x <- x[,all.vars(form)[c(2,3,1)]]
pobs(x)
}
#' @export
pdist <- function(y, ...) UseMethod('pdist',y)
#' @export
#' @rdname pairwise
pdist.data.frame <- function(y, method = 'geo',
htol = NULL, ...){
vname1 <- paste(names(y),'.x',sep='')
vname2 <- paste(names(y),'.y',sep='')
## Create the datasets of all paired distances
ans <- dplyr::mutate(y, nid = as.numeric(y[,1]), yr = 0)
names(ans) <- append(names(y),c('nid','yr'))
ans <- dplyr::inner_join(ans, ans, by = 'yr')
ans <- dplyr::filter(ans, nid.x < nid.y )
if(method == 'geo'){
h <- geosphere::distGeo(ans[ , vname1[-1]],
ans[ , vname2[-1]],...)/1000
} else if(method == 'euclidean'){
h <- distEuclid(ans[ ,vname1[-1]],
ans[ , vname2[-1]],...)
}
ans <- ans[,append(vname1,vname2)]
names(ans) <- append(paste(names(y),'1',sep='_'),
paste(names(y),'2',sep='_'))
ans <- cbind(ans, dist = h)
if(!is.null(htol)) ans <- dplyr::filter(ans, dist < htol)
return(ans)
}
#' @export
#' @rdname pairwise
pdist.formula <- function(form, y, method = 'geo',
htol = NULL, ...){
if(!is.data.frame(y) )
stop('The variable x must be a data.drame')
return(pdist(y[, all.vars(form)],
method = method, htol = htol, ...))
}
##################################################################
#' Euclidean distance
#'
#' Returns the euclidean distance between two vector.
#'
#' @param x,y Points between which the distance is computed.
#'
#' @details
#' if \code{x} is a vector or a matrix with one row, the funtion
#' returns vector with the same number of observations as \code{y}
#' corresponding to the
#' distance with \code{x}. If \code{x} and \code{y} are both
#' matrices of the same size, the function returns distance between
#' each row.
#'
#' @examples
#'
#' x1 <- replicate(2,rnorm(5))
#' x2 <- replicate(2,rnorm(5))
#'
#' distEuclid(x1[1,], x2)
#' distEuclid(x1, x2)
#'
#'
#' @export
distEuclid <- function(x, ...) UseMethod('distEuclid', x)
#' @rdname distEuclid
#' @export
distEuclid.numeric <- function(x, y){
if(is.vector(y)){
ans <- sqrt(sum((x -y)^2))
} else {
ans <- matrix(NA,nrow(y),ncol(y))
for(ii in seq(ncol(y)))
ans[,ii] <- x[ii] - y[ ,ii]
ans <- sqrt(rowSums((ans)^2))
}
return(ans)
}
#' @rdname distEuclid
#' @export
distEuclid.matrix <- function(x, y){
if(nrow(x) == 1){
ans <- distEuclid(as.numeric(x),y)
} else{
ans <- sqrt(rowSums((x -y)^2))
}
return(ans)
}
#' @rdname distEuclid
#' @export
distEuclid.data.frame <- function(x,y){
distEuclid(as.matrix(x), y)
}
##################################################################
#' Compute uniform observation based on ranks
#'
#' Returns uniform observations in [0,1] based on ranks
#' with preservation of the \code{NA} values. Denominator is
#' \eqn{n+1}, where \eqn{n} is the sample size.
#'
#' @param x Can be a vector, matrix or list containing data
#' to transform.
#' If it is a matrix 'rankit' is applied to the columns
#' @param ... Additional parameters past to the function 'rank'.
#' By default, last.na = 'keep'.
#'
#' @export
#'
#' @seealso \code{\link{rank}}
#'
#' @examples
#' x <- replicate(2,rnorm(10))
#' y <- rnorm(10)
#' x; rankit(x)
#' y; rankit(y)
rankit <- function(x, ...) UseMethod("rankit", x)
#' @export
#' @rdname rankit
rankit.numeric <- function(x, na.last = "keep", ...){
x <- rank(x, na.last, ...)
return(x/(1+sum(!is.na(x))))
}
#' @export
#' @rdname rankit
rankit.matrix <- function(x, ...){
apply(x, 2, rankit.numeric, ...)
}
#' @export
rankit.data.frame <- function(x, ...){
sapply(x, rankit.numeric, ...)
}
#' @export
#' @rdname rankit
rankit.list <- function(x, ...){
sapply(x, rankit.numeric, ...)
}
###########################################################
#' Relation in elliptical copula family
#'
#' Provide the relation between the measure of association
#' and the parameter \code{theta} of an elliptical copula.
#'
#' @param rho Spearman correlation
#' @param tau Kendall tau
#'
###########################################################
#' @export
rho2theta <- function(rho) 2*sin(pi/6*rho)
#' @export
#' @rdname rho2theta
tau2theta <- function(tau) sin(pi/2*tau)
#' @export
#' @rdname rho2theta
theta2rho <- function(theta) 6/pi*asin(theta/2)
#' @export
#' @rdname rho2theta
theta2tau <- function(theta) 2/pi*asin(theta)
###########################################################
#' Fisher r-to-z transformation
#'
#' Provide the link betweem the correlation coefficient and
#' the z-transformation of Fisher for normality.
#'
#' @param r vector or matrix of Pearson correlation coefficients.
#' @param z Transformation of the correlation coefficients.
#'
#' @export
r2z <- function(r) 0.5 * (log(1+r)-log(1-r))
#' @export
#' @rdname r2z
z2r <- function(z) (exp(2*z)-1)/(exp(2*z)+1)
########################################################################
#' Logit function
#'
#' Return the logit of a functon or its inverse.
#'
#' @param x Sample.
#'
#' @export
logit <- function(x) log(x/(1-x))
#' @export
#' @rdname logit
expit <- function(x) exp(x)/(1 + exp(x))
########################################################################
#' Pointwise bound
#'
#' Return vector where every elements are forced inside boundaries.
#'
#' @param x Sample.
#'
#' @param b Boundaries.
#'
#' @seealso \link{pmin}, \link{pmax}
#'
#' @export
#'
prange <- function(x, b) pmin(pmax(x,b[1]),b[2])
########################################################################
#' Local extremums
#'
#' Return the index or the value of the local extremums of a vector.
#' In case of ties, the first value is returned.
#'
#' @param x Sample.
#'
#' @export
lmin <- function(x) x[which.lmin(x)]
#' @export
#' @rdname lmin
lmax <- function(x) x[which.lmax(x)]
#' @export
#' @rdname lmin
which.lmin <- function(x) which.lmax(-x)
#' @export
#' @rdname lmin
which.lmax <- function(x){
xlow <- c(-Inf, x[1:(length(x)-1)])
xup <- c(x[2:length(x)],-Inf)
return(which(x > xlow & x >= xup))
}
#################################
#' One Dimensional Optimization
#'
#' Perform an initial search inside a grid of points before performing
#' standard optimization routine.
#'
#' @param res Number of subdivisions.
#' @param ... See \link{optimize}
#'
#' @export
goptimize <- function(fn, interval, res = 20, ...){
igrid <- seq(interval[1],interval[2], len = res)
id <- which.min(sapply(igrid,fn))
if(id == 1) bnd0 <- igrid[1:2]
else if(id == res) bnd0 <- igrid[c(res-1,res)]
else bnd0 <- igrid[c(id-1,id)]
return(optimize(fn, bnd0, ...))
}
##########################################################
#' Leap Year
#'
#' Return True if it is a leap year.
#'
#' @param y Year
#'
#' @export
#'
#' @examples
#'
#' is.leapyear(2000:2010)
#'
is.leapyear <- function(y) {((y %% 4 == 0) & (y %% 100 != 0)) | (y %% 400 == 0)}
#############################################################################
#' K-fold Cross-validation
#'
#' Function to split a vector in k samples of similar size.
#'
#' @param z Vector to be separated in groups
#' @param k Number of groups
#'
#' @export
#'
#' @examples
#'
#' idk <- kfold(1:92, k = 10)
#' sapply(idk, length)
#' lapply(idk, sort)
#'
kfold <- function(z, k = 10){
## permute the site order
z <- sample(z)
n <- length(z)
## split the sites in k
grp <- ceiling(seq(n)/n*k)
return(split(z,grp))
}
#############################################################################
#' Estimate distribution parameters from a matrix of L-moment
#'
#' Return a matrix of distribution parameters based on L-moments (in rows).
#'
#' @param x Vector or matrix of L-moments or L-Coefficients
#' @param distr Distribution string. See \link{lmom2par}.
#' @param Lscale Is the second column the L-scale (TRUE) or L-CV
#' @param Lcoef Is the columns 3+ L-coefficients (TRUE) or L-moments
#'
#' @export
#'
#' @examples
#'
#' x <- matrix(c(100,20,.2,
#' 200,50,.3), 2,3, byrow =TRUE)
#' FitLmom(x, 'gev')
#'
FitLmom <- function(x, distr, Lscale = TRUE, Lcoef = TRUE){
## Validate format
if(is.vector(x))
x <- t(x)
else
x <- as.matrix(x)
## Compute L-scale from L-CV if necessary
if(!Lscale){
x[,2] <- x[,2]*x[,1]
}
## Compute LCoefficients from Lmoments if necesssary
if(!Lcoef){
tid <- 3:ncol(x)
x[,tid] <- x[,tid] / replicate(length(tid), x[,2])
}
## Convert Lmoments to parameter for all rows
para <- list()
for(ii in 1:nrow(x))
para[[ii]] <- lmomco::lmom2par(lmomco::vec2lmom(x[ii,]), distr)$para
ans <- matrix(unlist(para), nrow(x), length(para[[1]]), byrow = TRUE)
colnames(ans) <- names(para[[1]])
## return vector if necessary
if(nrow(x) == 1)
ans <- as.vector(ans)
return(ans)
}
############################################################################
#' Load R data into a list
#'
#' Return the contain of file into a list. See \link{load}.
#'
#' @param f File
#'
#' @export
#'
#' @examples
#'
#' print(x0 <- rnorm(5))
#' x1 <- 'Hello World!'
#' save(x0, x1, file = 'temp.Rdata')
#' rm(x0,x1)
#'
#' print(ls())
#'
#' print(xlist <- ImportFromFile('temp.Rdata'))
#'
ImportFromFile <- function(f){
if(file.exists(f)){
nenv <- new.env()
load(f, envir = nenv)
ans <- mget(ls(envir = nenv), nenv)
} else {
ans <- list()
}
ans
}
###########################################################
#' Extract the Annual maximum of daily time series
#'
#' Returns a dataframe containing the annual maximums,
#' the date of the peaks occur and
#' the number of days.
#'
#' @param form Formula of the form \code{value ~ date}.
#'
#' @param x Data containing the date and variable.
#' If a data.frame is passed directly the first column is used as
#' value and the second as date.
#'
#' @export
#'
#' @examples
#'
#' xx <- canadaFlood$daily
#'
#' ExtractAmax(flow~date, xx)
#'
ExtractAmax.formula <- function(form, x){
x <- model.frame(form,x)
ExtractAmax.data.frame(x)
}
#' @export
ExtractAmax.data.frame <- function(x){
## Split data by years
yy <- format(x[,2],'%Y')
x <- cbind(x,seq(nrow(x)))
lx <- split(x,yy)
## Identify the annual maxima
nx <- sapply(lx, nrow)
mx <- sapply(lx, function(z) z[which.max(z[,1]),3])
ans <- x[mx,-3]
cbind(ans, n = nx, yy = as.numeric(format(ans[,2],'%Y')))
}
#' @export
ExtractAmax <- function(x, ...) UseMethod('ExtractAmax',x)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.