# this function needs to be rewritten. A lot of redundand code!
#' Calculate bias, RMSE, corellation etc of two vectors.
#'
#' @description Calculates: corelation coeficient,
#' bias (absolute and relative), RMSE (absolute and relative), and performes statistical test to check if the differences between \code{reference} and \code{estimate} are significant.
#' Correlation coefficient as well as statistical test depend on the \code{dist.normal} value.
#' Optionally statistics can be calculated for groups of data, specified with the \code{by} parameter.
#' @param reference a vector of reference values
#' @param estimate a vector of estimated values
#' @param by Optional grouping variable
#' @param noinfo Logical. Should the additional information on the calculations be displayed?
#' @param dist.normal Logical. Is the distribution normal? If TRUE t-test and Pearson correlation is calculated. If FALSE, Wilcoxon paired test and Spearman corelation is calculated.
#' @param short Logical. Calculate only a subset of summary statistics.
#' @return summary statistics of the differences between \code{reference} and \code{estimate}.
#' @export
calc.error<-function(reference,estimate,by=NULL,noinfo=TRUE,dist.normal=TRUE,short=FALSE) {
#check input variables
if (!is.numeric(reference) | !is.numeric(estimate)) {
stop("Input values not numeric!")
}
#check if input values are equal length
if(length(reference) != length(estimate)) {
stop("Input variable not of equal length")
}
if(is.null(by) == FALSE) {
if(length(reference) != length(by) | length(reference) != length(by)) {
stop("Input variable not of equal length")
}
}
if(noinfo==FALSE) { #display information about how the differnce is calculated
message("The difference is calculated with following equations:") ;
message(paste("bias =",deparse(substitute(estimate)),"-",deparse(substitute(reference))));
message(paste("bias% = (",deparse(substitute(estimate)),"-",deparse(substitute(reference)),") / ",deparse(substitute(reference)),"* 100"))
if (dist.normal == T) {
message("Correlation method: Pearson. Stat test: t-test (paired)")
} else {
message("Correlation method: Spearman Stat test: Wilcoxon (paired)")
}
}
d<-estimate-reference
#stats in groups
if(is.null(by) == FALSE) {
df <- data.frame(reference,estimate,by) #create temporary data frame
f <- levels(as.factor(df$by))
#create placeholders for results
count<-cor_coeff<-bias<-RMSE<-bias_perc<-RMSE_perc<-p_val<-stat<-c()
for (i in 1:length(f)) {
subs <- subset(df,df$by==f[i])
a <- calc_error_internal(subs,dist.normal = dist.normal, short = short)
count[i] <- a[2]
cor_coeff[i] <- a[1]
bias[i] <- a[3]
bias_perc[i] <- a[4]
RMSE[i] <- a[5]
RMSE_perc[i] <- a[6]
stat[i] <- a[7]
p_val[i] <- a[8]
}
stat_for_all<-calc.error(reference,estimate,noinfo=TRUE,dist.normal=dist.normal) #recursive!:) #stats for all data to add as the last row
out<-data.frame(factor=f,count,cor_coeff,bias,bias_perc,RMSE,RMSE_perc,stat,p_val)
out<-rbind(out,stat_for_all)
return(out)
} else { #calculations for ungrouped data
f<-"all"
df <- data.frame(reference,estimate)
a <- calc_error_internal(df,dist.normal = dist.normal, short = short)
count <- a[2]
cor_coeff <- a[1]
bias <- a[3]
bias_perc <- a[4]
RMSE <- a[5]
RMSE_perc <- a[6]
stat <- a[7]
p_val <- a[8]
return(data.frame(factor=f,count,cor_coeff,bias,bias_perc,RMSE,RMSE_perc,stat,p_val))
}
}
calc_error_internal <- function(dfs, dist.normal = dist.normal, short = short) {
count_int <- length(dfs$reference)
bias_int <- mean(dfs$estimate - dfs$reference,na.rm=T)
RMSE_int <- rmse(dfs$estimate,dfs$reference)
#bias_perc_int <- mean((dfs$estimate - dfs$reference) / dfs$reference, na.rm=T)*100
bias_perc_int <- bias_int / mean(dfs$reference, na.rm=T)*100
RMSE_perc_int <- RMSE_int / abs(mean(dfs$reference,na.rm=T))*100
#if (is.infinite(bias_perc_int)) bias_perc <- mean((dfs$estimate - dfs$reference) / ifelse(dfs$reference==0,NA,dfs$reference*100),na.rm=T)
if (dist.normal == T) {
if (short==FALSE) {test <- t.test(x = dfs$reference,y = dfs$estimate,paired=T)}
if (short==FALSE) {p_val <- test$p.value; stat <- as.numeric(test$statistic)} else {p_val<-NA; stat<-NA}
if (short==FALSE) {cor_coeff<-cor(dfs$reference,dfs$estimate,method="pearson",use="pairwise.complete.obs")} else {cor_coeff <- NA}
} else {
if (short==FALSE) {test <-wilcox.test(dfs$reference,dfs$estimate,paired=T)}
if (short==FALSE) {p_val<-test$p.value; stat<-as.numeric(test$statistic)} else {p_val<-NA; stat<-NA}
if (short==FALSE) {cor_coeff<-cor(dfs$reference,dfs$estimate,method="spearman",use="pairwise.complete.obs")} else {cor_coeff <- NA}
}
return(c(cor_coeff,count_int, bias_int, bias_perc_int, RMSE_int,RMSE_perc_int, stat,p_val))
}
# dist.normal<-T
#
# df %>%
# group_by(by) %>%
# summarize(count = n(),
# bias = mean(estimate-reference, na.rm=T),
# bias_perc = (mean((estimate - reference) / reference, na.rm=T)*100),
# RMSE = rmse(estimate,reference),
# RMSE_perc = (rmse(estimate,reference) / abs(mean(reference,na.rm=T))*100),
# if(dist.normal == T) {
# cor_coeff = cor(reference,estimate,method="pearson",use="pairwise.complete.obs")
# })
#
# calc.error(reference, estimate, by)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.