## helper file for dstVis
calc_storms <- function(dataRaw, evntMax = NULL, evntThreshold = NULL) {
if (is.null(evntThreshold)) {
evntThreshold = -20
}
if (is.null(evntMax)) {
evntMax = -100
}
if (evntThreshold < 0) {
icrit = (dataRaw$evnt <= evntThreshold)
} else {
icrit = (dataRaw$evnt >= evntThreshold)
}
blocks = rle(icrit)
iLoc = blocks$length
iStorm = blocks$values
lenBlock = length(iLoc)
numStorm = length(iStorm[(iStorm == T)])
dstStorm = replicate(numStorm, 0)
dstStormTime <- .POSIXct(character(numStorm))
iStormIndex = 1
# Identify all intervals and tag with the peak value and time of that peak
# for negative/positive thresholds use min/max functions.
if (evntThreshold < 0) {
for (i in 1:lenBlock) {
if (iStorm[i] == T) {
x1 = sum(iLoc[1:(i - 1)])
x2 = x1 + iLoc[i]
dstStorm[iStormIndex] = min(dataRaw$evnt[x1:x2])
dstStormTime[iStormIndex] = dataRaw$evnt_time[x1]
iStormIndex = iStormIndex + 1
}
}
} else {
for (i in 1:lenBlock) {
if (iStorm[i] == T) {
x1 = sum(iLoc[1:(i - 1)])
x2 = x1 + iLoc[i]
dstStorm[iStormIndex] = max(dataRaw$evnt[x1:x2])
dstStormTime[iStormIndex] = dataRaw$evnt_time[x1]
iStormIndex = iStormIndex + 1
}
}
}
dstStruct = data.frame(evnt_time = dstStormTime, evnt = dstStorm)
# Only keep those values where the peak exceeded evntMax
if (evntMax < 0) {
dstStruct = dplyr::filter(dstStruct, evnt < evntMax)
} else {
dstStruct = dplyr::filter(dstStruct, evnt > evntMax)
}
return(dstStruct)
}
get_estimates <- function( emp_data, continous = TRUE) {
emp_data <- emp_data[order(emp_data)]
if ( length(which(emp_data == 0)) != 0 ) {
emp_data <- emp_data[-1*which(emp_data == 0)]
}
if (continous == F) {
pl_m = displ$new(emp_data)
} else {
pl_m = conpl$new(emp_data)
}
est_pl = estimate_xmin(pl_m, distance = 'ks')
KS = est_pl$gof
xmin = est_pl$xmin
alpha = est_pl$pars
ntail = est_pl$ntail
ntotal <- length(emp_data)
estimates <- list( estimates = c(KS, xmin, alpha, ntail, ntotal), data_ordered = emp_data)
return (estimates)
}
clauset <- function(data, boot = 100, bootShow = 10, xcrit = NULL, xmin = NULL, xaxmin = NULL, yaxmax = NULL, continous = T, dt = NULL, tSpan = NULL) {
estimates <- data[[1]]
emp_data <- data[[2]]
emp_data <- emp_data[order(emp_data)]
if (missing(xcrit) | is.null(xcrit)) {
xcrit <- max(emp_data, na.rm = TRUE)
}
if (missing(xmin) | is.null(xmin)) {
xmin <- estimates[2] # could set this to a hard value of 100 to test the model fits
}
emp_ccdf <- data.frame(value = unique(emp_data), p_x = NA)
n_emp_data <- length(emp_data)
for (i in 1:nrow(emp_ccdf)) {
emp_ccdf$p_x[i] <- length(which(emp_data >= emp_ccdf$value[i]))/n_emp_data
}
if (length(which(emp_ccdf$value == 0)) != 0) {
emp_ccdf <- emp_ccdf[-1 * which(emp_ccdf$value == 0), ]
}
# if (missing(xaxmin) | is.null(xaxmin)) {
# xaxmin = round(min(emp_ccdf$value))
# }
if (missing(yaxmax) | is.null(yaxmax)) {
yaxmax = max(emp_ccdf$p_x)
}
df_emp_ccdf = data.frame(x = emp_ccdf$value, y = emp_ccdf$p_x)
# these are the probabilities for an event as large as the largest event in the dataset
qLogNorm = replicate(boot, NA)
qExp = replicate(boot, NA)
qPowLaw = replicate(boot, NA)
# these are the probabilities for an extreme event or larger
qXcritLogNorm = replicate(boot, NA)
qXcritExp = replicate(boot, NA)
qXcritPowLaw = replicate(boot, NA)
list_ln_ccdf = list_pl_ccdf = list_exp_ccdf = list()
icount = 0
for (i in 1:boot) {
y <- emp_data[round(runif(n = length(emp_data), min = 1, max = length(emp_data)))]
y <- y[order(y)]
y_tail <- y[which(y >= xmin)]
y_tail <- y_tail[order(y_tail)]
newYTail = c(emp_ccdf$value)
newYTail = newYTail[which(newYTail >= xmin)]
p_tail <- length(y_tail)/length(y)
# compute the log-normal traces #######################
if (continous == F) {
m_ln <- dislnorm$new(y_tail)
} else {
m_ln <- conlnorm$new(y_tail)
}
m_ln$setXmin(xmin)
m_ln$mle()
ln_ccdf <- data.frame(value = newYTail, p_x = NA)
ln_ccdf$p_x <- 1 - dist_cdf(m_ln, ln_ccdf$value)
ln_ccdf$p_x <- ln_ccdf$p_x * p_tail
if (max(ln_ccdf$value) == (max(emp_data))) {
qLogNorm[i] = min(ln_ccdf$p_x)
}
# compute the probabilty for an xcrit event
qXcritLogNorm[i] = p_tail * (1 - dist_cdf(m_ln, xcrit))
# compute the exponential traces ##############
if (continous == F) {
m_exp <- disexp$new(y_tail)
} else {
m_exp <- conexp$new(y_tail)
}
m_exp$setXmin(xmin)
m_exp$mle()
exp_ccdf <- data.frame(value = newYTail, p_x = NA)
exp_ccdf$p_x <- 1 - dist_cdf(m_exp, exp_ccdf$value)
exp_ccdf$p_x <- exp_ccdf$p_x * p_tail
if (max(exp_ccdf$value) == (max(emp_data))) {
qExp[i] = min(exp_ccdf$p_x)
}
# compute the probabilty for an xcrit event
qXcritExp[i] = p_tail * (1 - dist_cdf(m_exp, xcrit))
# compute and plot the power-law traces ##############
pl_ccdf <- data.frame(value = newYTail, p_x = NA)
sum <- 0
for (value in y_tail) {
sum <- sum + log(value/(xmin - (1/2)))
}
alpha <- 1 + length(y_tail) * ((sum)^(-1))
pl_ccdf$p_x <- (pl_ccdf$value/xmin)^(-1 * alpha + 1)
pl_ccdf$p_x <- pl_ccdf$p_x * p_tail
if (max(pl_ccdf$value) == (max(emp_data))) {
qPowLaw[i] = min(pl_ccdf$p_x)
}
# compute the probabilty for an xcrit event
qXcritPowLaw[i] = p_tail * (xcrit/xmin)^(-1 * alpha + 1)
if ((i%%(boot/bootShow)) == 0) {
icount = icount + 1
list_exp_ccdf[[icount]] = exp_ccdf
list_pl_ccdf[[icount]] = pl_ccdf
list_ln_ccdf[[icount]] = ln_ccdf
}
} # the end of the boot do-loop.
##### Plot the density plot of the extreme values (maximum observed) ###########
nEvents = length(emp_data)
pHatLogNorm = 1. - exp(-nEvents*qLogNorm)
pHatExp = 1. - exp(-nEvents*qExp)
pHatPowLaw = 1. - exp(-nEvents*qPowLaw)
#Compute probabilities that a particular event will occur over next X years
pLogNormXyears = 1. - exp(-nEvents*dt*qXcritLogNorm/tSpan)
pExpXyears = 1. - exp(-nEvents*dt*qXcritExp/tSpan)
pPowLawXyears = 1. - exp(-nEvents*dt*qXcritPowLaw/tSpan)
ci95_pl = quantile(pPowLawXyears , c(0.025, 0.975),na.rm=T)
ci95_ln = quantile(pLogNormXyears, c(0.025, 0.975),na.rm=T)
ci95_se = quantile(pExpXyears , c(0.025, 0.975),na.rm=T)
c2 = c(ci95_pl[1], ci95_ln[1], ci95_se[1]) * 100.0
c3 = c(ci95_pl[2], ci95_ln[2], ci95_se[2]) * 100.0
c1 = c(median(pPowLawXyears, na.rm = TRUE), median(pLogNormXyears, na.rm = TRUE), median(pExpXyears, na.rm = TRUE)) * 100.0
c1 = signif(c1, digits = 3)
c2 = signif(c2, digits = 3)
c3 = signif(c3, digits = 3)
pEventXyears_df <- data.frame(Model = c('Power-Law', 'Log-Normal', 'Exponential'), c1, c2, c3)
pHat_df = data.frame(model = factor(rep(c("Power-Law", "Log-Normal", "Exponential"), each = boot)), prob = c(pHatPowLaw, pHatLogNorm, pHatExp))
return(list(df_emp_ccdf = df_emp_ccdf, pl_ccdf = list_pl_ccdf, ln_ccdf = list_ln_ccdf, exp_ccdf = list_exp_ccdf, pHat_df = pHat_df, pEventXyears_df = pEventXyears_df, xcrit = xcrit))
}
dst_extRemes <- function(data = NULL, threshold = 100) {
blocks = rle(year(data$evnt_time))
iEvent = blocks$length
iYear = blocks$values
# Need average number of events per year
# Exclude the last year from the average because it is likley to be incomplete and less accurate
events.per.year = round(mean(iEvent[1:(length(iYear) - 1)]))
time.units = paste0(events.per.year, "/year")
fitGP <- fevd(abs(data$evnt), threshold = threshold, type = 'GP', time.units = time.units, units = 'nT', verbose = TRUE)
return(fitGP)
}
## Return a data frame of return level point coordinates
getrlpoints <- function(fit) {
xp2 <- ppoints(fit$n, a = 0)
ytmp <- datagrabber(fit)
y <- c(ytmp[, 1])
sdat <- sort(y)
npy <- fit$npy
u <- fit$threshold
rlpoints.x <- -1/log(xp2)[sdat > u]/npy
rlpoints.y <- sdat[sdat > u]
rlpoints <- data.frame(rlpoints.x, rlpoints.y)
return(rlpoints)
}
## Return a data frame of estimated returned levels with confidence intervals
getcidf <- function(fit) {
rperiods = c(2, 5, 10, 50, 75, 100,200, 500, 1000)
bds <- ci.fevd.mle(fit, return.period = rperiods)
c1 <- as.numeric(bds[, 1])
c2 <- as.numeric(bds[, 2])
c3 <- as.numeric(bds[, 3])
ci_df <- data.frame(c1, c2, c3, rperiods)
return(ci_df)
}
trim.data.in <- function(longvec, missing = 9999) {
noobs <- length(longvec)
first <- 1
last <- noobs
while (longvec[first] == missing) first <- first + 1
while (longvec[last] == missing) last <- last - 1
return(list(val = longvec[first:last], first = first, last = last))
}
compDistributions <- function(data, continous = T) {
# now compare different distributions.
if (continous == F) {
pl_m = displ$new(data)
ln_m = dislnorm$new(data)
exp_m = disexp$new(data)
} else {
pl_m = conpl$new(data)
ln_m = conlnorm$new(data)
exp_m = conexp$new(data)
}
est_pl = estimate_xmin(pl_m)
pl_m$setXmin(est_pl)
# need to set xmin the same, so must get from previous estimate of PL
ln_m$setXmin(pl_m$getXmin())
est_ln = estimate_pars(ln_m)
ln_m$setPars(est_ln$pars)
exp_m$setXmin(pl_m$getXmin())
est_exp = estimate_pars(exp_m)
exp_m$setPars(est_exp)
comp_pl_ln = compare_distributions(pl_m, ln_m)
comp_pl_exp = compare_distributions(pl_m, exp_m)
comp_ln_exp = compare_distributions(ln_m, exp_m)
return(list(comp_pl_ln = comp_pl_ln, comp_pl_exp = comp_pl_exp, comp_ln_exp = comp_ln_exp))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.