Nothing
# File cmv.R
# Part of the hydroTSM R package, https://github.com/hzambran/hydroTSM ;
# https://CRAN.R-project.org/package=hydroTSM
# Copyright 2023-2023 Mauricio Zambrano-Bigiarini
# Distributed under GPL 2 or later
################################################################################
# cmv: Counting Missing Values #
################################################################################
# This function counts the percentage/amount of missing data in a zoo object,
# using a user-defined temporal scale
# 'x' : zoo object to be analised
# 'tscale' : character with the temporal scale to be used for analysing the mssing data.
# Valid values for 'tscale' are:
# -) "hourly" : the percentage/amount of missing values will be given for each
# hour and ,therefore, the expected time frequency of 'x' must
# be sub-hourly.
# -) "daily" : the percentage/amount of missing values will be given for each
# day and, therefore, the expected time frequency of 'x' must
# be sub-daily (i.e., hourly or sub-hourly).
# -) "weekly" : the percentage/amount of missing values will be given for each
# week (starting on Monday) and, therefore, the expected time
# frequency of 'x' must be sub-weekly (i.e., daily, (sub)hourly).
# -) "monthly" : the percentage/amount of missing values will be given for each
# month and, therefore, the expected time frequency of 'x' must
# be sub-monthly (i.e., daily, hourly or sub-hourly).
# -) "quarterly": the percentage/amount of missing values will be given for each
# quarter and, therefore, the expected time frequency of 'x' must
# be sub-quarterly (i.e., monthly, daily, hourly or sub-hourly).
# -) "seasonal" : the percentage/amount of missing values will be given for each
# weather season (see ?time2season) and, therefore, the expected
# time frequency of 'x' must be sub-seasonal (i.e., monthly, daily,
# hourly or sub-hourly).
# -) "annual" : the percentage/amount of missing values will be given for each
# year and, therefore, the expected time frequency of 'x' must
# be sub-annual (i.e., seasonal, monthly, daily, hourly or sub-hourly).
# -) 'out.type' : character indicating how should be returned the missing values
# for each temporal scale. Valid values for 'out'type' are:
# -) "percentage": the missing values are returned as an real value, representing
# the percentage of missing values in each temporal scale.
# -) "amount" : the missing values are returned as an integer value, representing
# the absolute amount of missing values in each temporal scale.
# -) 'dec' : integer indicating the amount of decimal places included in the output.
# It is only used when "out.type=='percentage'"
# -) 'start' : character, indicating the starting time used for aggregating sub-daily time
# series into daily ones.
# It MUST be provided in the format specified by \code{start.fmt}. \cr
# This value is used to define the time when a new day begins (e.g., for some
# rain gauge stations). \cr
# -) All the values of \code{x} with a time attribute before \code{start} are
# considered as belonging to the day before the one indicated in the time
# attribute of those values. \cr
# -) All the values of \code{x} with a time attribute equal to \code{start}
# are considered to be equal
# to \code{"00:00:00"} in the output zoo object. \cr
# -) All the values of \code{x} with a time attribute after \code{start} are
# considered as belonging to the same day as the one indicated in the time
# attribute of those values. \cr
# It is useful when the daily values start at a time different from
# \code{"00:00:00"}. Use with caution. See examples.
# -) 'start.fmt' : character indicating the format in which the time is provided in \code{start}.
# By default \code{date.fmt=\%H:\%M:\%S}. See \code{format} in
# \code{\link[base]{as.POSIXct}}.
# -) 'tz' : character, with the specification of the time zone used in both
# \code{x} and \code{start}.
# System-specific (see time zones), but \code{""} is the current time zone,
# and \code{"GMT"} is UTC (Universal Time, Coordinated).
# See \code{\link[base]{Sys.timezone}} and \code{\link[base]{as.POSIXct}}. \cr
# If \code{tz} is missing (the default), it is automatically set to the
# time zone used in \code{time(x)}. \cr
# This argument can be used to force using the local time zone or any other
# time zone instead of UTC as time zone.
cmv <-function(x, ...) UseMethod("cmv")
################################################################################
# cmv.default #
################################################################################
# Author : Mauricio Zambrano-Bigiarini #
################################################################################
# Started: 25-Jul-2023 (Buenos Aires) #
# Updates: 28-Jul-2023 ; 03-Aug-2023 ; 27-Nov-2023 #
################################################################################
cmv.default <- function(x,
tscale=c("hourly", "daily", "weekly", "monthly", "quarterly", "seasonal", "annual"),
out.type=c("percentage", "amount"),
dec=3,
start="00:00:00",
start.fmt= "%H:%M:%S",
tz,
...) {
# Checking that 'x' is a zoo object
if ( !is.zoo(x) ) stop("Invalid argument: 'class(x)' must be 'zoo' !")
# checking 'tscale'
tscale <- match.arg(tscale)
# checking 'out.type'
out.type <- match.arg(out.type)
cmv.zoo(x, tscale=tscale, out.type=out.type, dec=dec,
start=start, start.fmt=start.fmt, ...)
} # 'cmv.default' end
################################################################################
# cmv.zoo #
################################################################################
# Author : Mauricio Zambrano-Bigiarini #
################################################################################
# Started: 25-Jul-2023 (Buenos Aires) #
# Updates: 28-Jul-2023 ; 03-Aug-2023 ; 27-Nov-2023 #
################################################################################
cmv.zoo <- function(x,
tscale=c("hourly", "daily", "weekly", "monthly", "quarterly", "seasonal", "annual"),
out.type=c("percentage", "amount"),
dec=3,
start="00:00:00",
start.fmt= "%H:%M:%S",
tz,
...) {
# checking 'tscale'
tscale <- match.arg(tscale)
# checking 'out.type'
out.type <- match.arg(out.type)
# checking 'dec'
if ( ( abs(dec) - round(dec) ) > 1e-5 )
stop("Invalid argument: 'dec' must be integer !")
# Automatic detection of 'tz'
if (missing(tz)) tz <- ""
#if (missing(tz)) tz <- format(time(x), "%Z")[1]
# Analysis of days different from 00:00 to 23:59 hrs
if ( start != "00:00:00" ) {
# Storing the original time
time.old <- time(x)
# Converting the new starting time provided by the user into a POSIXct object
start <- as.POSIXct(start, format=start.fmt, tz=tz)
# normal staring time for a day
nstart <- as.POSIXct("00:00:00", format="%H:%M:%S", tz=tz)
# time difference between the desired starting time 'strat' and the "normal"
# starting time 'nstart', [s]
delta <- difftime(start, nstart, units="secs")
# Computing teh time difference between 'start' and the "normal" starting time, [s]
#time.new <- as.POSIXct(time.old, tz=tz) - delta
time.new <- time.old - delta
# Changing the time in 'x' in 'delta' seconds
time(x) <- time.new
} # IF end
# Checking that the time frequency of 'x' is compatible with 'tscale'
if ( tscale == "hourly") {
if ( !(sfreq(x) %in% c("minute") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} else if ( tscale == "daily") {
if ( !(sfreq(x) %in% c("minute", "hourly") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} else if ( tscale == "weekly") {
if ( !(sfreq(x) %in% c("minute", "hourly", "daily") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} else if ( tscale == "monthly") {
if ( !(sfreq(x) %in% c("minute", "hourly", "daily", "weekly") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} else if ( tscale == "quarterly") {
if ( !(sfreq(x) %in% c("minute", "hourly", "daily", "weekly", "monthly") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} else if ( tscale == "seasonal") {
if ( !(sfreq(x) %in% c("minute", "hourly", "daily", "weekly", "monthly") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} else if ( tscale == "annual") {
if ( !(sfreq(x) %in% c("minute", "hourly", "daily", "weekly", "monthly", "quarterly", "seasonal") ) )
stop("Invalid argument: 'tscale' and the time frequency of 'x' are not compatible !")
} # else END
# Computing the total amount of data at the desired temporal scale
fun <- length
ndata <- switch( tscale,
"hourly" = aggregate(x, by= function(tt) format(tt, "%Y-%m-%d %H"), FUN=fun),
"daily" = aggregate(x, by= function(tt) format(tt, "%Y-%m-%d") , FUN=fun),
"weekly" = aggregate(x, by= function(tt) format(tt, "%Y-%W") , FUN=fun), # week starting on Monday
"monthly" = aggregate(x, by= function(tt) format(tt, "%Y-%m") , FUN=fun),
"quarterly" = aggregate(x, by= function(tt) zoo::format.yearqtr(tt) , FUN=fun),
"seasonal" = aggregate(x, by= function(tt) paste0(format(tt, "%Y"), "-", time2season(tt)), FUN=fun),
"annual" = aggregate(x, by= function(tt) format(tt, "%Y") , FUN=fun)
) # 'md' END
# Function for obtaining the amount of missing values in 'x'
smv <- function(x) {
na.index <- is.na(x)
return( sum(na.index) )
} # 'smv' END
# Computing the amount of missing values at the desired temporal scale
fun <- smv
nNA<- switch( tscale,
"hourly" = aggregate(x, by= function(tt) format(tt, "%Y-%m-%d %H"), FUN=fun),
"daily" = aggregate(x, by= function(tt) format(tt, "%Y-%m-%d") , FUN=fun),
"weekly" = aggregate(x, by= function(tt) format(tt, "%Y-%W") , FUN=fun), # week starting on Monday
"monthly" = aggregate(x, by= function(tt) format(tt, "%Y-%m") , FUN=fun),
"quarterly" = aggregate(x, by= function(tt) zoo::format.yearqtr(tt) , FUN=fun),
"seasonal" = aggregate(x, by= function(tt) paste0(format(tt, "%Y"), "-", time2season(tt)), FUN=fun),
"annual" = aggregate(x, by= function(tt) format(tt, "%Y") , FUN=fun)
) # 'md' END
if (out.type=="percentage") {
out <- nNA/ndata
out <- round(out, dec)
} else out <- nNA # out.type=="amount"
return(out)
} # 'cmv.zoo' END
################################################################################
# cmv.data.frame #
################################################################################
# Author : Mauricio Zambrano-Bigiarini #
################################################################################
# Started: 25-Jul-2023 (Buenos Aires) #
# Updates: 28-Jul-2023 ; 03-Aug-2023 ; 27-Nov-2023 #
################################################################################
# 'dates' : "numeric", "factor", "Date" indicating how to obtain the
# dates for correponding to the 'sname' station
# If 'dates' is a number, it indicates the index of the column in
# 'x' that stores the dates
# If 'dates' is a factor, it have to be converted into 'Date' class,
# using the date format specified by 'date.fmt'
# If 'dates' is already of Date class, the following line verifies that
# the number of days in 'dates' be equal to the number of element in the
# time series corresponding to the 'st.name' station
# 'date.fmt': character indicating the format in which the dates are stored in 'dates'.
# ONLY required when class(dates)=="factor" or "numeric"
# 'verbose' : logical; if TRUE, progress messages are printed
cmv.data.frame <- function(x, tscale=c("hourly", "daily", "weekly", "monthly", "quarterly", "seasonal", "annual"),
out.type=c("percentage", "amount"),
dec=3,
start="00:00:00",
start.fmt= "%H:%M:%S",
tz,
dates=1,
date.fmt="%Y-%m-%d", ...) {
# Checking that the user provied a valid argument for 'dates'
if (is.na(match(class(dates), c("numeric", "factor", "Date"))))
stop("Invalid argument: 'dates' must be of class 'numeric', 'factor', 'Date'")
# If 'dates' is a number, it indicates the index of the column of 'x' that stores the dates
# The column with dates is then substracted form 'x' for easening the further computations
if ( inherits(dates, "numeric") ) {
tmp <- dates
dates <- as.Date(x[, dates], format= date.fmt) # zoo::as.Date
x <- x[-tmp]
} # IF end
# If 'dates' is a factor, it have to be converted into 'Date' class,
# using the date format specified by 'date.fmt'
if ( inherits(dates, "factor") ) dates <- as.Date(dates, format= date.fmt) # zoo::as.Date
# If 'dates' is already of Date class, the following line verifies that
# the number of days in 'dates' be equal to the number of element in the
# time series corresponding to the 'st.name' station
if ( ( inherits(dates, "Date") ) & (length(dates) != nrow(x) ) )
stop("Invalid argument: 'length(dates)' must be equal to 'nrow(x)'")
# Transforming 'x' into a zoo object
tryCatch(
#this is the chunk of code we want to run
{ x <- zoo(x, dates)
#when it throws an error, the following block catches the error
}, error = function(msg){return(NA)}
)
cmv.zoo(x=x, tscale=tscale, out.type=out.type, dec=dec,
start=start, start.fmt=start.fmt, tz=tz)
} #'cmv.data.frame' END
################################################################################
# cmv.matrix #
################################################################################
# Author : Mauricio Zambrano-Bigiarini #
################################################################################
# Started: 25-Jul-2023 (Buenos Aires) #
# Updates: 28-Jul-2023 ; 03-Aug-2023 ; 27-Nov-2023 #
################################################################################
cmv.matrix <- function(x, tscale=c("hourly", "daily", "weekly", "monthly", "quarterly", "seasonal", "annual"),
out.type=c("percentage", "amount"),
dec=3,
start="00:00:00",
start.fmt= "%H:%M:%S",
tz,
dates=1,
date.fmt="%Y-%m-%d", ...) {
x <- as.data.frame(x)
cmv.data.frame(x=x, tscale=tscale, out.type=out.type, dec=dec,
start=start, start.fmt=start.fmt, tz=tz,
dates=dates, date.fmt=date.fmt)
} # 'cmv.matrix ' END
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.