R/utils.R

################################################################
#' Pivot at-site data
#'
#' Returns a matrix of a given variable with time in rows and station
#' index in columns. Useful to obtain observations that occur at
#' the same time.
#'
#' @param form Formula that specify the variable of interest in
#'     \code{x} as well as time and location indexes.
#'
#' @param x Data.frame containing at-site date. If \code{x}
#'  is passed directly without formula,
#'  it is assumed that the first
#'  three columns are: station index, time, variable.
#'

#'
#' @examples
#' site1 <- data.frame(site = 1, year = seq(2000,2010),
#'                      v =rnorm(11), w =rnorm(11))
#' site2 <- data.frame(site = 2, year = seq(1995,2007),
#'                      v =rnorm(13), w =rnorm(13))
#' site3 <- data.frame(site = 3, year = seq(1995,2010),
#'                      v =rnorm(16), w =rnorm(16))
#' xdf <- rbind(site1,site2, site3)
#' xdf; pivotAtSite(v~year+site,xdf)
#'
#' @export
#################################################################

pivotAtSite <- function(x,...) UseMethod('pivotAtSite',x)

#' @export
pivotAtSite.data.frame <- function(x){
  vname <- names(x)
  mx <- reshape2::melt(x[,vname], id = vname[1:2])
  ans <- reshape2::dcast(mx,
                       as.formula(paste(vname[1],vname[2],sep='~')))

  return(ans)
}

#' @export
#' @rdname pivotAtSite
pivotAtSite.formula <- function(form, x){
  if(!is.data.frame(x))
    stop('The variable x must be a data.frame')

  ans <- model.frame(form,x)[,c(2,3,1)]
  return(pivotAtSite(ans))
}

#' @export
pivotAtSite.matrix <- function(x){
  return(pivotAtSite(as.data.frame(x)))
}

##################################################################
#' Organizing paired observation
#'
#' Provide a function that creates a new dataset containing all
#' paired observations, with distance and dependence measures.
#'
#' @param form,xform,yform Formula that specify which variables
#'   of \code{x} or \code{y} are used.
#'
#' @param x,y Data.frame of the observation and
#'   station characteristics.
#'
#' @param method Type of distance to compute. Either 'geo' for
#'    geographical distance or 'euclidean' for euclidean distance.
#'
#' @param htol Threshold that limit the paired distances.
#'
#' @param pearson If \code{TRUE}, the pearson correlation is return.
#' @param spearman If \code{TRUE}, the Spearman rho is return.
#' @param kendall If \code{TRUE}, the Kendall tau is return.
#' @param pseudo If \code{TRUE}, the pseudo-observations are return.
#' @param coord If \code{TRUE}, the coordinates are return.
#'
#'
#' @details
#'
#' With \code{x} or the function \code{pobs}, the formula is
#' of the form :
#' \code{value ~ id + year}, which result in a data.frame with twice
#' the number of columns. In order, they represent the values that
#' composes the paired observation, the station identificator and
#' the time. This function is usually used for
#' transforming a dataset containing hydrological variables from
#' several station at several sampling times.
#'
#' With \code{y} or the function \code{pdist}, the formula is of
#' the form :
#' \code{id ~ lon + lat}, which returns a data.frame with twice
#' the number of columns, plus the paired distance and other
#' related measures of associations. In order the
#' variable represent the station identificator and the coordinates.
#' If \code{method = 'euclidean'} the distance can be calculated
#' between stations characteristics. In the case of 3 variables,
#' the formula could be of the form : \code{id ~ var1 + var2 + var3}.
#'
#' The function \code{pairwise} combine the result of \code{pobs}
#' and \code{pdist} in a list and calculate measure of association
#' between the paired observations.
#'
#' @export
#'
#' @examples
#'
#' xdf <- data.frame(id = factor(c('a','a','b','b','b','c','c'),
#'                              levels = c('a','b','c','d')),
#'                  year = 2010 + c(1:2,1:3,1:2),
#'                  value = rlnorm(7))
#'
#' pobs(value~id+year, xdf)
#'
#' coord <- data.frame(id = c('a','b','c','d'),
#'                  lon = runif(4)-80,
#'                  lat = runif(4)+43)
#'
#' pdist(id~lon+lat,coord)
#' pdist(id~lon+lat,coord, htol = 50)
#'
#' pairwise(value~id+year, xdf, id~lon+lat,coord)
#'
#'##################################################################

pairwise <- function(x, ...) UseMethod('pairwise',x)

pairwise.data.frame <- function(x, y, method = 'geo', htol = NULL,
                     pseudo = TRUE, coord = TRUE,
                     pearson = TRUE, spearman = TRUE,
                     kendall = TRUE){

  xname <- names(x)
  yname <- names(y)[-1]
  lvl <- levels(y$id)

  names(x) <- c('id','year','var')
  names(y) <- append('id', yname)

  ## Create paired observation
  px <- pobs(x)

  ## Compute the number of paired obs.
  np <- dplyr::group_by(px, id_1, id_2)
  np <- dplyr::summarise(np, nb = n())

  ## Pivot observation
  if(pseudo | pearson | spearman | kendall){
    tx <- pivotAtSite(x[,c(2,1,3)])
  }

  ## Compute pivoted pseudo observation
  if(pseudo | spearman){
    u <- tx
    u[,-1] <- rankit(tx[,-1])
    rownames(u) <- tx$year
  }

  ## Unpivot pseudo observation
  if(pseudo){
    up <- reshape2::melt(u, 'year')[,c(2,1,3)]
    names(up) <- c('id','year','u')
    up <- dplyr::filter(up, !is.na(u))
    up$id <- factor(as.character(up$id),lvl)
    up <- pobs(up)
  }
  ## Compute pearson correlation
  suppressWarnings(
    cx <- cor(tx[,-1], use = "pairwise.complete.obs"))

  cp <- reshape2::melt(as.matrix(cx))
  names(cp) <- c('id_1','id_2', 'theta')
  cp$id_1 <- factor(as.character(cp$id_1),lvl)
  cp$id_2 <- factor(as.character(cp$id_2),lvl)

  ## Copute spearman rho
  suppressWarnings(
    sx <- cor(u[,-1], use = "pairwise.complete.obs"))

  sp <- reshape2::melt(as.matrix(sx))
  names(sp) <- c('id_1','id_2', 'rho')
  sp$id_1 <- factor(as.character(sp$id_1),lvl)
  sp$id_2 <- factor(as.character(sp$id_2),lvl)

  ## Copute spearman rho
  suppressWarnings(
    kx <- cor(tx[,-1], use = "pairwise.complete.obs"))

  kp <- reshape2::melt(as.matrix(kx))
  names(kp) <- c('id_1','id_2', 'tau')
  kp$id_1 <- factor(as.character(kp$id_1),lvl)
  kp$id_2 <- factor(as.character(kp$id_2),lvl)

  ## Compute the pairwise distance
  dp <- pdist(y, method = method, htol = htol)

  ## Join all the information
  if(pseudo) px <- dplyr::left_join(px, up,
                                    by = c('id_1', 'id_2' ,'year'))

  if(!coord) dp <- dplyr::select(dp,id_1, id_2, dist)

  dp <- dplyr::left_join(dp,np, by = c("id_1", "id_2"))

  if(pearson)  dp <- dplyr::left_join(dp,cp, by = c("id_1", "id_2"))
  if(spearman) dp <- dplyr::left_join(dp,sp, by = c("id_1", "id_2"))
  if(kendall)  dp <- dplyr::left_join(dp,kp, by = c("id_1", "id_2"))

  ans <- list()
  ans$obs <- px
  ans$dist <- dp

  return(ans)
}

#' @export
#' @rdname pairwise
pairwise.formula <- function(xform, x, yform, y,
                    method = 'geo', htol = Inf,
                     pseudo = TRUE, coord = TRUE,
                     pearson = TRUE, spearman = TRUE,
                     kendall = TRUE){

  if(!is.data.frame(x) )
    stop('The variable x  must be a data.drame')

  x <- x[,all.vars(xform)[c(2,3,1)]]

  if(!is.data.frame(y) )
    stop('The variable x  must be a data.drame')

  y <- y[, all.vars(yform)]

  pairwise(x,y,method = 'geo', htol = htol,
                     pseudo = pseudo, coord = coord,
                     pearson = pearson, spearman = spearman,
                     kendall = kendall)
}


#' @export
pobs <- function(x, ...) UseMethod('pobs',x)

#' @export
pobs.data.frame <- function(x){

  ## Create a datasets of all paired observations
  ans <- dplyr::mutate(x, nid = as.numeric(x[,1]))
  names(ans) <- c('id','yr','z','nid')
  ans <- dplyr::inner_join(ans,ans, by = 'yr')
  ans <- dplyr::filter(ans, nid.x < nid.y)
  ans <- dplyr::select(ans, yr, id.x, id.y, z.x, z.y)

  names(ans) <- c(names(x)[2],
                  paste(names(x)[1], 1:2, sep='_'),
                  paste(names(x)[3], 1:2, sep='_'))
  return(ans)
}

#' @export
#' @rdname pairwise
pobs.formula <- function(form, x){

  if(!is.data.frame(x) )
    stop('The variable x  must be a data.drame')

  x <- x[,all.vars(form)[c(2,3,1)]]
  pobs(x)
}

#' @export
pdist <- function(y, ...) UseMethod('pdist',y)

#' @export
#' @rdname pairwise
pdist.data.frame <- function(y, method = 'geo',
                                 htol = NULL, ...){
  vname1 <- paste(names(y),'.x',sep='')
  vname2 <- paste(names(y),'.y',sep='')

  ## Create the datasets of all paired distances
  ans <- dplyr::mutate(y, nid = as.numeric(y[,1]), yr = 0)
  names(ans) <- append(names(y),c('nid','yr'))

  ans <- dplyr::inner_join(ans, ans, by = 'yr')
  ans <- dplyr::filter(ans, nid.x < nid.y )

  if(method == 'geo'){
    h <- geosphere::distGeo(ans[ , vname1[-1]],
                            ans[ , vname2[-1]],...)/1000
  } else if(method == 'euclidean'){
      h <- distEuclid(ans[ ,vname1[-1]],
                      ans[ , vname2[-1]],...)
  }

  ans <- ans[,append(vname1,vname2)]
  names(ans) <- append(paste(names(y),'1',sep='_'),
                       paste(names(y),'2',sep='_'))

  ans <- cbind(ans, dist = h)

  if(!is.null(htol)) ans <- dplyr::filter(ans, dist < htol)

  return(ans)
}

#' @export
#' @rdname pairwise
pdist.formula <- function(form, y, method = 'geo',
                              htol = NULL, ...){
  if(!is.data.frame(y) )
    stop('The variable x  must be a data.drame')

  return(pdist(y[, all.vars(form)],
                   method = method, htol = htol, ...))

}

##################################################################
#' Euclidean distance
#'
#' Returns the euclidean distance between two vector.
#'
#' @param x,y Points between which the distance is computed.
#'
#' @details
#'  if \code{x} is a vector or a matrix with one row, the funtion
#'  returns vector with the same number of observations as \code{y}
#'  corresponding to the
#'  distance with \code{x}. If \code{x} and \code{y} are both
#'  matrices of the same size, the function returns distance between
#'  each row.
#'
#'  @examples
#'
#'  x1 <- replicate(2,rnorm(5))
#'  x2 <- replicate(2,rnorm(5))
#'
#'  distEuclid(x1[1,], x2)
#'  distEuclid(x1, x2)
#'
#'
#' @export
distEuclid <- function(x, ...) UseMethod('distEuclid', x)


#' @rdname distEuclid
#' @export
distEuclid.numeric <- function(x, y){

  if(is.vector(y)){
    ans <- sqrt(sum((x -y)^2))

  } else {
    ans <- matrix(NA,nrow(y),ncol(y))

    for(ii in seq(ncol(y)))
      ans[,ii] <- x[ii] - y[ ,ii]

    ans <- sqrt(rowSums((ans)^2))
  }
  return(ans)
}

#' @rdname distEuclid
#' @export
distEuclid.matrix <- function(x, y){
  if(nrow(x) == 1){
    ans <- distEuclid(as.numeric(x),y)
  } else{
    ans <- sqrt(rowSums((x -y)^2))
  }

  return(ans)
}

#' @rdname distEuclid
#' @export
distEuclid.data.frame <- function(x,y){
  distEuclid(as.matrix(x), y)
}



##################################################################
#' Compute uniform observation based on ranks
#'
#' Returns uniform observations in [0,1] based on ranks
#' with preservation of the \code{NA} values. Denominator is
#' \eqn{n+1}, where \eqn{n} is the sample size.
#'
#' @param x Can be a vector, matrix or list containing data
#'        to transform.
#'          If it is a matrix 'rankit' is applied to the columns
#' @param ... Additional parameters past to the function 'rank'.
#'            By default, last.na = 'keep'.
#'
#' @export
#'
#' @seealso \code{\link{rank}}
#'
#' @examples
#' x <- replicate(2,rnorm(10))
#' y <- rnorm(10)
#' x; rankit(x)
#' y; rankit(y)

rankit <- function(x, ...) UseMethod("rankit", x)

#' @export
#' @rdname rankit
rankit.numeric <- function(x, na.last = "keep", ...){
  x <- rank(x, na.last, ...)
  return(x/(1+sum(!is.na(x))))
}

#' @export
#' @rdname rankit
rankit.matrix <- function(x, ...){
  apply(x, 2, rankit.numeric, ...)
}

#' @export
rankit.data.frame <- function(x, ...){
  sapply(x, rankit.numeric, ...)
}

#' @export
#' @rdname rankit
rankit.list <- function(x, ...){
  sapply(x, rankit.numeric, ...)
}


###########################################################
#' Relation in elliptical copula family
#'
#' Provide the relation between the measure of association
#' and the parameter \code{theta} of an elliptical copula.
#'
#'  @param rho Spearman correlation
#'  @param tau Kendall tau
#'
###########################################################

#' @export
rho2theta <- function(rho) 2*sin(pi/6*rho)

#' @export
#' @rdname rho2theta
tau2theta <- function(tau) sin(pi/2*tau)

#' @export
#' @rdname rho2theta
theta2rho <- function(theta) 6/pi*asin(theta/2)

#' @export
#' @rdname rho2theta
theta2tau <- function(theta) 2/pi*asin(theta)


###########################################################
#' Fisher r-to-z transformation
#'
#' Provide the link betweem the correlation coefficient and
#' the z-transformation of Fisher for normality.
#'
#' @param r vector or matrix of Pearson correlation coefficients.
#' @param z Transformation of the correlation coefficients.
#'
#' @export

r2z <- function(r) 0.5 * (log(1+r)-log(1-r))

#' @export
#' @rdname r2z

z2r <- function(z) (exp(2*z)-1)/(exp(2*z)+1)

########################################################################
#' Logit function
#'
#' Return the logit of a functon or its inverse.
#'
#' @param x Sample.
#'
#' @export
logit <- function(x) log(x/(1-x))

#' @export
#' @rdname logit
expit <- function(x) exp(x)/(1 + exp(x))

########################################################################
#' Pointwise bound
#'
#' Return vector where every elements are forced inside boundaries.
#'
#' @param x Sample.
#'
#' @param b Boundaries.
#'
#' @seealso \link{pmin}, \link{pmax}
#'
#' @export
#'
prange <- function(x, b) pmin(pmax(x,b[1]),b[2])


########################################################################
#' Local extremums
#'
#' Return the index or the value of the local extremums of a vector.
#' In case of ties, the first value is returned.
#'
#' @param x Sample.
#'
#' @export
lmin <- function(x) x[which.lmin(x)]

#' @export
#' @rdname lmin
lmax <- function(x) x[which.lmax(x)]

#' @export
#' @rdname lmin
which.lmin <- function(x) which.lmax(-x)

#' @export
#' @rdname lmin
which.lmax <- function(x){
  xlow <- c(-Inf, x[1:(length(x)-1)])
  xup <- c(x[2:length(x)],-Inf)
  return(which(x > xlow & x >= xup))
}

#################################
#' One Dimensional Optimization
#'
#' Perform an initial search inside a grid of points before performing
#' standard optimization routine.
#'
#' @param res Number of subdivisions.
#' @param ... See \link{optimize}
#'

#' @export

goptimize <- function(fn, interval, res = 20, ...){

  igrid <- seq(interval[1],interval[2], len = res)
  id <- which.min(sapply(igrid,fn))

  if(id == 1) bnd0 <- igrid[1:2]
  else if(id == res) bnd0 <- igrid[c(res-1,res)]
  else bnd0 <- igrid[c(id-1,id)]

  return(optimize(fn, bnd0, ...))
}



##########################################################
#' Leap Year
#'
#' Return True if it is a leap year.
#'
#' @param y Year
#'
#' @export
#'
#' @examples
#'
#' is.leapyear(2000:2010)
#'
is.leapyear <- function(y) {((y %% 4 == 0) & (y %% 100 != 0)) | (y %% 400 == 0)}



#############################################################################
#' K-fold Cross-validation
#'
#' Function to split a vector in k samples of similar size.
#'
#' @param z Vector to be separated in groups
#' @param k Number of groups
#'
#' @export
#'
#' @examples
#'
#' idk <- kfold(1:92, k = 10)
#' sapply(idk, length)
#' lapply(idk, sort)
#'
kfold <- function(z, k = 10){

  ## permute the site order
  z <- sample(z)
  n <- length(z)

  ## split the sites in k
  grp <- ceiling(seq(n)/n*k)

  return(split(z,grp))
}

#############################################################################
#' Estimate distribution parameters from a matrix of L-moment
#'
#' Return a matrix of distribution parameters based on L-moments (in rows).
#'
#' @param x Vector or matrix of L-moments or L-Coefficients
#' @param distr Distribution string. See \link{lmom2par}.
#' @param Lscale Is the second column the L-scale (TRUE) or L-CV
#' @param Lcoef Is the columns 3+ L-coefficients (TRUE) or L-moments
#'
#' @export
#'
#' @examples
#'
#' x <- matrix(c(100,20,.2,
#'                  200,50,.3), 2,3, byrow =TRUE)
#' FitLmom(x, 'gev')
#'
FitLmom <- function(x, distr, Lscale = TRUE, Lcoef = TRUE){

  ## Validate format
  if(is.vector(x))
    x <- t(x)
  else
    x <- as.matrix(x)

  ## Compute L-scale from L-CV if necessary
  if(!Lscale){
    x[,2] <- x[,2]*x[,1]
  }

  ## Compute LCoefficients from Lmoments if necesssary
  if(!Lcoef){
    tid <- 3:ncol(x)
    x[,tid] <- x[,tid] / replicate(length(tid), x[,2])
  }

  ## Convert Lmoments to parameter for all rows
  para <- list()
  for(ii in 1:nrow(x))
    para[[ii]] <- lmomco::lmom2par(lmomco::vec2lmom(x[ii,]), distr)$para

  ans <- matrix(unlist(para), nrow(x), length(para[[1]]), byrow = TRUE)
  colnames(ans) <- names(para[[1]])

  ## return vector if necessary
  if(nrow(x) == 1)
    ans <- as.vector(ans)

  return(ans)
}

############################################################################
#' Load R data into a list
#'
#' Return the contain of file into a list. See \link{load}.
#'
#' @param f File
#'
#' @export
#'
#' @examples
#'
#' print(x0 <- rnorm(5))
#' x1 <- 'Hello World!'
#' save(x0, x1, file = 'temp.Rdata')
#' rm(x0,x1)
#'
#' print(ls())
#'
#' print(xlist <- ImportFromFile('temp.Rdata'))
#'
ImportFromFile <- function(f){

 if(file.exists(f)){
    nenv <- new.env()
    load(f, envir = nenv)
    ans <- mget(ls(envir = nenv), nenv)

  } else {
   	ans <- list()
  }

	ans
}


###########################################################
#' Extract the Annual maximum of daily time series
#'
#'  Returns a dataframe containing the annual maximums,
#'  the date of the peaks occur and
#'  the number of days.
#'
#' @param form Formula of the form \code{value ~ date}.
#'
#' @param x Data containing the date and variable.
#'  If a data.frame is passed directly the first column is used as
#'  value and the second as date.
#'
#' @export
#'
#' @examples
#'
#' xx <- canadaFlood$daily
#'
#' ExtractAmax(flow~date, xx)
#'

ExtractAmax.formula <- function(form, x){
  x <- model.frame(form,x)
  ExtractAmax.data.frame(x)
}

#' @export
ExtractAmax.data.frame <- function(x){

	## Split data by years
	yy <- format(x[,2],'%Y')
	x <- cbind(x,seq(nrow(x)))
	lx <- split(x,yy)

	## Identify the annual maxima
	nx <- sapply(lx, nrow)
	mx <- sapply(lx, function(z) z[which.max(z[,1]),3])

	ans <- x[mx,-3]
	cbind(ans, n = nx, yy = as.numeric(format(ans[,2],'%Y')))
}

#' @export
ExtractAmax <- function(x, ...) UseMethod('ExtractAmax',x)
martindurocher/floodRFA documentation built on June 5, 2019, 8:44 p.m.