Practical example

To demonstrate graphically the difference between correlation and experimental data we will use the Dranchuk-AbouKassem results at a $T_{pr}$=1.05 at various $P_{pr}$.

library(zFactor)
zFactor:::z.plot.range("DAK", interval = "fine")

library(tibble)
library(ggplot2)
dak_tpr <- as.tibble(z.stats("DAK"))
dak_tpr
dak_tpr <- as.tibble(z.stats("DAK"))
p1 <- ggplot(dak_tpr, aes(x = Tpr, y = z.chart-z.calc, col = Tpr)) +
           geom_point()
p2 <- ggplot(dak_tpr, aes(z.chart-z.calc)) + geom_histogram(bins=60)

gridExtra::grid.arrange(p1, p2)
sum_tpr <- as.tibble(z.stats("BB"))
bb <- ggplot(sum_tpr, aes(x = Tpr, y = RMSE, color = Tpr)) +
           geom_point() + ylim(0, 0.4) + theme(legend.position="none") +
    ggtitle("Beggs-Brill")
bb
sum_tpr <- as.tibble(z.stats("HY"))
hy <- ggplot(sum_tpr, aes(x = Tpr, y = RMSE, col = Tpr)) +
           geom_point() + ylim(0, 0.4) + theme(legend.position="none") + 
    ggtitle("Hall-Yarborough")
hy
sum_tpr <- as.tibble(z.stats("DAK"))
dak <- ggplot(sum_tpr, aes(x = Tpr, y = RMSE, col = Tpr)) +
           geom_point() + ylim(0, 0.4) + theme(legend.position="none") +
    ggtitle("Dranchuk-AbouKassem")
dak
sum_tpr <- as.tibble(z.stats("SH"))
sh <- ggplot(sum_tpr, aes(x = Tpr, y = RMSE, col = Tpr)) +
           geom_point() + ylim(0, 0.4) + theme(legend.position="none") +
    ggtitle("Shell")
sh
sum_tpr <- as.tibble(z.stats("N10"))
n10 <- ggplot(sum_tpr, aes(x = Tpr, y = RMSE, col = Tpr)) +
           geom_point() + ylim(0, 0.4) + theme(legend.position="none") +
    ggtitle("Neural-Network-10")
n10
sum_tpr <- as.tibble(z.stats("PP"))
pp <- ggplot(sum_tpr, aes(x = Tpr, y = RMSE, col = Tpr)) +
           geom_point() + ylim(0, 0.4) + theme(legend.position="none") +
    ggtitle("Papp")
pp
# gridExtra::grid.arrange(bb, hy, sh, n10, pp)
dak_tpr <- as.tibble(z.stats("DAK"))
ggplot(dak_tpr, aes(x = Ppr, y = MSE, col = Tpr)) +
           geom_point(alpha=0.7) +
    geom_smooth()
dak_tpr <- as.tibble(z.stats("DAK"))
# Change geom to freqpoly (position is identity by default) 
ggplot(dak_tpr, aes(z.chart-z.calc, fill = Tpr)) +
  geom_histogram(binwidth = 0.001)
bb_tpr <- as.tibble(z.stats("BB"))
# Change geom to freqpoly (position is identity by default) 
ggplot(bb_tpr, aes(z.chart-z.calc, fill = Tpr)) +
  geom_histogram(binwidth = 0.5)
# residual <- dak_tpr$z.chart - dak_tpr$z.calc
# ggplot(dak_tpr) + geom_bar(aes(fill = residual, stat = identity))

sandbox

library(dplyr)
library(tibble)

correlation <- "DAK"
# get all `lp` Tpr curves
tpr_all <- getStandingKatzTpr(pprRange = "lp")
# ppr <- c(0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 4.0, 4.5, 5.0, 5.5, 6.0, 6.5, 7.0)
ppr <- zFactor:::getStandingKatzPpr(interval = "fine")
sk_corr_all <- createTidyFromMatrix(ppr, tpr_all, correlation)
grouped <- group_by(sk_corr_all, Tpr, Ppr)
smry_tpr_ppr <- summarise(grouped,  z.chart, z.calc,
                          RMSE = sqrt(mean((z.chart-z.calc)^2)),
                          MPE  = sum((z.calc - z.chart) / z.chart) * 100 / n(),
                          MAPE = sum(abs((z.calc - z.chart) / z.chart)) * 100 / n(),
                          MSE  = sum((z.calc - z.chart)^2) / n(),
                          RSS  = sum((z.calc - z.chart)^2),
                          MAE  = sum(abs(z.calc - z.chart)) / n()

)
as.tibble(smry_tpr_ppr)
# dak_tpr_ppr <- smry_tpr_ppr
smry_tpr_105 <- smry_tpr_ppr[smry_tpr_ppr$Tpr=="1.05", ]
smry_tpr_105
# par(mfrow = c(2,1))
# hist(smry_tpr_105$z.chart - smry_tpr_105$z.calc)
# boxplot(smry_tpr_105$z.chart - smry_tpr_105$z.calc)
# # RMSE Tpr=1.05
# dat = data.frame(residual = smry_tpr_105$z.chart - smry_tpr_105$z.calc, yhat=smry_tpr_105$z.calc)
# plt = ezplot::plt_dist(dat)
# plt("residual")
plot(smry_tpr_105$Ppr, smry_tpr_105$RMSE)
plot(smry_tpr_105$Ppr, smry_tpr_105$z.chart, type = "l")
points(smry_tpr_105$Ppr, smry_tpr_105$z.calc, cex = 1.25)
points(smry_tpr_105$Ppr, smry_tpr_105$RMSE)
library(ggplot2)
ggplot(smry_tpr_ppr, aes(x = Ppr, y = z.chart, group = Tpr, color = Tpr)) + 
    geom_line() + 
    geom_point(aes(y=z.calc), col = "blue") +
    geom_crossbar(aes(ymin=z.chart-RMSE, ymax=z.chart+RMSE), width = 0.25, col = "orange") +
    geom_linerange(aes(ymin = z.chart-RMSE, ymax = z.chart+RMSE), col = "orange")

ggplot(smry_tpr_ppr, aes(x=Ppr, y=RMSE)) +  geom_line() + geom_point()
# MPE
library(ggplot2)
ggplot(smry_tpr_105, aes(x = Ppr, y = z.chart)) + 
    geom_line() + 
    geom_point(aes(y=z.calc), col = "blue") +
    geom_crossbar(aes(ymin=z.chart-MPE, ymax=z.chart+MPE), width = 0.25, col = "orange")
    #+ geom_linerange(aes(ymin = z.chart-MPE, ymax = z.chart+MPE), col = "orange")

ggplot(smry_tpr_105, aes(x=Ppr, y=MPE)) +  geom_line() + geom_point()
# p <- ggplot(smry_tpr_105, aes(x = Ppr, y = z.chart))
# p <- p + geom_line()    
# p <- p + geom_line(aes(y=MPE))
# p <- p + scale_y_continuous(sec.axis = sec_axis(Ppr+ MPE. * .1))
# 
# p
p <- ggplot(mtcars, aes(cyl, mpg)) +
  geom_point()

# Create a simple secondary axis
p + scale_y_continuous(sec.axis = sec_axis(~.+10))

# Inherit the name from the primary axis
p + scale_y_continuous("Miles/gallon", sec.axis = sec_axis(~.+10, name = derive()))

# Duplicate the primary axis
p + scale_y_continuous(sec.axis = dup_axis())

# You can pass in a formula as a shorthand
p + scale_y_continuous(sec.axis = ~.^2)
# MPE
library(ggplot2)
ggplot(smry_tpr_105, aes(x = Ppr, y = z.chart)) +
    geom_line() 
    # geom_line(aes(y=MPE)) #+
    #geom_point(aes(y=z.calc), col = "blue") +
    # geom_hline(aes(x=Ppr, y = MPE, yintercept=0))  +
    # scale_y_continuous(sec.axis = sec_axis(~.*2 ))
# MAPE

library(ggplot2)
ggplot(smry_tpr_105, aes(x = Ppr, y = z.chart)) + 
    geom_line() + 
    geom_point(aes(y=z.calc), col = "blue") +
    geom_crossbar(aes(ymin=z.chart-MAPE, ymax=z.chart+MAPE), width = 0.25, col = "orange")
    #+ geom_linerange(aes(ymin = z.chart-MPE, ymax = z.chart+MPE), col = "orange")

ggplot(smry_tpr_105, aes(x=Ppr, y=MAPE)) +  geom_line() + geom_point()
# MSE

library(ggplot2)
ggplot(smry_tpr_105, aes(x = Ppr, y = z.chart)) + 
    geom_line() + 
    geom_point(aes(y=z.calc), col = "blue") +
    geom_crossbar(aes(ymin=z.chart-MSE, ymax=z.chart+MSE), width = 0.25, col = "orange")
    #+ geom_linerange(aes(ymin = z.chart-MPE, ymax = z.chart+MPE), col = "orange")

ggplot(smry_tpr_105, aes(x=Ppr, y=MSE)) +  geom_line() + geom_point()
library(ggplot2)
ggplot(smry_tpr_105, aes(x=Ppr, y=RSS)) +  geom_line() + geom_point()
    #geom_point(aes(y=z.calc), col = "blue") +
    #geom_crossbar(aes(ymin=z.chart-RSS, ymax=z.chart+RSS), width = 0.25, col = "orange")
library(ggplot2)
ggplot(smry_tpr_105, aes(x=Ppr, y=MAE)) + 
    geom_line() + geom_point()
    #geom_point(aes(y=z.calc), col = "blue") +
    #geom_crossbar(aes(ymin=z.chart-RSS, ymax=z.chart+RSS), width = 0.25, col = "orange")
# RSS

library(ggplot2)
ggplot(smry_tpr_105, aes(x = Ppr, y = z.chart)) + 
    #geom_line() + 
    #geom_point(aes(y=z.calc), col = "blue") +
    geom_crossbar(aes(ymin=z.chart-RSS, ymax=z.chart+RSS), width = 0.25, col = "orange")
    #+ geom_linerange(aes(ymin = z.chart-MPE, ymax = z.chart+MPE), col = "orange")

ggplot(smry_tpr_105, aes(x=Ppr, y=RSS)) +  geom_line() + geom_point()
library(zFactor)
library(tibble)

dak <- z.stats("DAK")
dak
dak_105 <- dak[dak$Tpr=="1.05", ]
dak_105

Accuracy measurement

The comparative analysis shows tables with different error measurements:

RMSE:  Root Mean Squared Error
MPE:   Mean Percentage error
MAPE:  Mean Absolute Percentage Error
MSE:   Mean Squared Error
RSS:   Residual sum of Squares
MAE:   Mean Absolute Error

where:

$a_t$ are the observed true values. In our case the Standing-Katz chart $z$ values;
$f_t$ are the calculated or predicted values (the $z$ values calculated by the correlations); and
$n$ is the number of samples

RMSE

Measure of accuracy, to compare errors of different calculation models for the same dataset.

$$RMSE = \sqrt {\sum_{t=1}^n \frac {(a_t - f_t)^2} {n}}$$

RMSE code

RMSE = sqrt(mean((z.chart - z.calc)^2))

MPE: Mean Percentage error

$$MPE = \frac {100%} {n} \sum_{t=1}^n \frac {a_t - f_t} {a_t}$$

MPE code

MPE  = sum((z.calc - z.chart) / z.chart) * 100 / n(),

MAPE: Mean Absolute Percentage Error

$$MAPE = \frac {100} {n} \sum | \frac {a_t - f_t} {a_t}|$$

MAPE code

MAPE = sum(abs((z.calc - z.chart) / z.chart)) * 100 / n()

MSE: Mean Squared Error

$$MSE = \frac {1}{n} \sum_{t=1}^n (a_t - f_t)^2 $$ MSE code

MSE  = sum((z.calc - z.chart)^2) / n()

RSS: Residual sum of Squares

$$RSS = \sum_{t=1}^n (a_t - f_t)^2 $$ RSS code

RSS  = sum((z.calc - z.chart)^2)

MAE: Mean Absolute Error

$$MAE = \frac {1} {n} \sum | {a_t - f_t} |$$ MAE code

MAE  = sum(abs(z.calc - z.chart)) / n()
d = data.frame(
  x  = c(1:5)
  , y  = c(1.1, 1.5, 2.9, 3.8, 5.2)
  , sd = c(0.2, 0.3, 0.2, 0.0, 0.4)
)

##install.packages("Hmisc", dependencies=T)
library("Hmisc")

# add error bars (without adjusting yrange)
plot(d$x, d$y, type="n")
with (
  data = d
  , expr = errbar(x, y, y+sd, y-sd, add=T, pch=1, cap=.1)
)

# new plot (adjusts Yrange automatically)
with (
  data = d
  , expr = errbar(x, y, y+sd, y-sd, add=F, pch=1, cap=.015, log="x")
)
with (
  data = d,
qplot(x,y)+geom_errorbar(aes(x=x, ymin=y-sd, ymax=y+sd), width=0.25)
)


f0nzie/zFactor documentation built on Aug. 2, 2019, 1:42 a.m.