#' Create a bib file for R packages, including the citations of user-defined packages.
#' @param pkg packages
#' @param bibfile file path and name to save the bib entries.
#' @return a bib file
#' @export
#' @examples
#' mf_bib()
#' mf_bib(pkg = c('base', 'knitr'))
mf_bib <- function(pkg = c('base'), bibfile = 'packages.bib'){
pkg <- unique(pkg[order(pkg)])
for (i in pkg){
# if (class(try(citation(i))) == 'try-error') install.packages(i)
cti <- toBibtex(citation(i))
entryloc <- grep(pattern = '^@', cti)
cti[entryloc] <- gsub(',', paste('R-',i, ',', sep =''), cti[entryloc])
symbol6loc <- grep('&', cti)
for (j in symbol6loc) {
cti[j] <- gsub(pattern = ' &', replacement = ' \\\\&', cti[j])
}
if (length(entryloc) > 1) cti[entryloc] <- paste(substr(cti[entryloc], 1, nchar(cti[entryloc])-1), 1:length(entryloc), ',', sep ='')
cat(cti, sep = '\n', file = bibfile, append = TRUE)
}
}
#' Convert the unit of air humidity according to Sonntag 1990. a in g/m-3 , RH in \%, q in kg/kg.
#' @param convert a character defining what is converted to what
#' @param x a given humidity in g/m-3 (a), \% (RH), kg/kg (q)
#' @param t temperature, in degree C
#' @param p air pressure, in Pa
#' @param surface ice surface or water surface
#' @return new value in destined unit
#' @export
#' @examples
#' mf_convertRH()
mf_convertRH <- function(convert = c('a to RH', 'RH to a', 'q to RH')[1], x = 10, t = 20, p = 100000, surface = c("water", "ice")[1]){
E <- switch(surface, # nach Sonntag 1990
"water" = 6.112 * exp((17.62*t) / (243.12 + t)), # over Wasser
"ice" = 6.112 * exp((22.46*t) / (272.62 + t)), # over Eis
stop("surface must be \"water\" or \"ice\"")
)
y <- switch(convert,
'a to RH' = 100 * x * (t + 273.15) / 216.67 / E,
'RH to a' = 216.67 * x/100 * E / (t + 273.15),
'q to RH' = 100 * p * q / ( 0.622 + 0.378 * q ) / E,
stop("convert must be \"a to RH\", or \"RH to a\", or \"q to RH\"")
)
return(y) # returns rH in \%
}
#' Convert air concentration between ppm and micromol m-3
#' @param convert a character defining what is converted to what
#' @param x a given concentration in ppm
#' @param t temperature, in °C
#' @param p air pressure, in Pa
#' @param w water vapour mixing ratio, dimensionless
#' @return new value in destined unit
#' @export
#' @examples
#' mf_convertc()
mf_convertc <- function(convert = c('ppm to mmol m-3', 'ppm to mmol m-3')[1], x = 400, t = 20, p = 100000, w = 0){ # x in ppm, t in °C, p in Pa, w dimensionless
y <- switch(convert,
'ppm to micro mol m-3' = x * p * (1 - w) / 8.314 / (t + 273.15),
'ppm to mmol m-3' = x * p * (1 - w) / 8.314 / (t + 273.15) / 1000,
stop("convert must be \"ppm to micro mol m-3\" or \"ppm to mmol m-3\".")
)
return(y)
}
#' Define daytime or nighttime
#' @param mytime given time, POSIXct
#' @param longtitude given longitute, numeric
#' @param latitude given latituede, numeric
#' @return 'day' or 'night', character
#' @export
#' @examples
#' mf_daynight()
mf_daynight <- function(mytime = as.POSIXct('2016-09-12', tz = 'GMT'), longitude = 11.40, latitude = 47.27){
mysunrise <- sunriset(crds = matrix(c(longitude, latitude), nrow = 1), mytime, direction="sunrise", POSIXct.out=TRUE)$time
mysunset <- sunriset(crds = matrix(c(longitude, latitude), nrow = 1), mytime, direction="sunset", POSIXct.out=TRUE)$time
dn <- ifelse(mytime > mysunrise & mytime < mysunset, 'day', 'night')
return(dn)
}
#' Plot a dataframe, multiple ys against one x
#' @param x a vector
#' @param y a vector or a dataframe with the same length as x
#' @param add logical, whether to add this plot to the previous one
#' @param xlab character
#' @param ylab character
#' @param myaxes logical, whether to display axes automatically
#' @param xlim
#' @param ylim
#' @param mycol colours
#' @param mytype
#' @param mypch
#' @param mycex
#' @param mylty
#' @param lwd
#' @param xerror errorbar, same dimension of x
#' @param yerror same dimension of y
#' @param mycolerrorbar error bar colours
#' @param mylegend
#' @param mylegendcol
#' @param myledendcex numeric
#' @param legendpos character
#' @return a figure
#' @export
#' @examples
#'
mf_dfplot <- function(x, y,
add = FALSE,
xlab = '', ylab = '',
myaxes = FALSE,
xlim = NULL, ylim = NULL,
mycol = NULL,
mytype = 'l',
mypch = 20,
mycex = 1,
mylty = NULL,
lwd = 1,
xerror = NULL,
yerror = NULL,
mycolerrorbar = NULL,
mylegend = NULL,
mylegendcol = mycol,
mylegendcex = 1,
legendpos = 'top') {
y <- as.data.frame(y)
if (is.null(ylim)) ylim <- range(y, na.rm = TRUE)
if (add == FALSE) {
plot(x, y[, 1],
xlab = xlab, ylab = ylab,
axes = myaxes, xlim = xlim, ylim = ylim, type = 'n', cex = mycex, lty = ifelse(is.null(mylty), 1, mylty[1]))
}
# ny <- ifelse(is.data.frame(y), dim(y)[2], 1)
ny <- ncol(y)
if (is.null(mycol)) {
mycol <- rainbow(ny)
}
if (is.null(mylty)) {
mylty <- rep(1, ny)
}
if (is.null(mycolerrorbar)) {
mycolerrorbar <- rainbow(ny, alpha = 0.5)
}
for (i in 1:ny){
if (!is.null(xerror)){
polygon(x = c(x + xerror, rev(x - xerror)),
y = c(y[, i], rev(y[, i])),
col = mycolerrorbar[i], border = NA)
}
if (!is.null(yerror)){
yerror <- as.data.frame(yerror)
yerror[is.na(yerror[, i]), i] <- 0
polygon(c(x, rev(x)),
c(y[, i] + yerror[, i], rev(y[, i] - yerror[, i])),
col = mycolerrorbar[i], border = NA)
}
points(x, y[, i], type = mytype, col = mycol[i], lwd = lwd, lty = mylty[i], cex = mycex, pch = mypch)
}
if (!is.null(mylegend)){
legend(legendpos, legend = mylegend, col = mylegendcol, bty = 'n', lty = mylty, lwd = lwd, cex = mylegendcex)
}
box()
}
#' Plot a dataframe, one y against multiple xs
#' @param x a vector or a dataframe with the same length as x
#' @param y a vector
#' @param xlab character
#' @param ylab character
#' @param xlim
#' @param ylim
#' @param mycol colours
#' @param xerror errorbar, same dimension of x
#' @param yerror same dimension of y
#' @param mycolerrorbar error bar colours
#' @param mylegend
#' @return a figure
#' @export
#' @examples
#'
mf_dfplot2 <- function(x, y,
xlab = 'x', ylab = 'y',
xlim = NULL, ylim = NULL,
mycol = NULL,
mylty = NULL,
xerror = NULL,
yerror = NULL,
mycolerrorbar = NULL,
mylegend = NULL) {
par(las=1)
x <- as.data.frame(x)
plot(x[, 1], y,
xlab = xlab, ylab = ylab,
axes = FALSE, xlim = xlim, ylim = ylim, type = 'n')
# ny <- ifelse(is.data.frame(y), dim(y)[2], 1)
nx <- dim(x)[2]
if (is.null(mycol)) {
mycol <- rainbow(nx)
}
if (is.null(mylty)) {
mylty <- rep(1, nx)
}
if (is.null(mycolerrorbar)) {
mycolerrorbar <- rainbow(nx, alpha = 0.5)
}
for (i in 1:nx){
if (!is.null(yerror)){
polygon(x = c(x[, i] + xerror[, i], rev(x[, i] - xerror[, i])),
y = c(y, rev(y)),
col = mycolerrorbar[i], border = NA)
}
if (!is.null(xerror)){
xerror <- as.data.frame(xerror)
polygon(y = c(y, rev(y)),
x = c(x[, i] + xerror[, i], rev(x[, i] - xerror[, i])),
col = mycolerrorbar[i], border = NA)
}
points(x[, i], y, type = 'l', col = mycol[i], lwd = 2, lty = mylty[i])
}
if (!is.null(mylegend)){
legend('top', legend = mylegend, col = mycol, bty = 'n', lty = mylty)
}
box()
}
#' add error bars to a scatterplot.
#' @param x
#' @param y
#' @param xupper
#' @param xlower
#' @param yupper
#' @param ylower
#' @param col
#' @param lty
#' @return errorbars in a plot
#' @export
#' @examples
#'
mf_errorbar <- function(x, y, xupper = NULL, xlower = NULL, yupper = NULL, ylower = NULL, col = 'black', lty = 1)
{
if (!is.null(yupper)){
arrows(x, y, x, y + yupper, angle = 90, length = 0.03, col = col, lty = lty)
}
if (!is.null(ylower)){
arrows(x, y, x, y - ylower, angle = 90, length = 0.03, col = col, lty = lty)
}
if (!is.null(xupper)){
arrows(x, y, x+xupper, y, angle = 90, length = 0.03, col = col, lty = lty)
}
if (!is.null(xlower)){
arrows(x, y, x-xlower, y, angle = 90, length = 0.03, col = col, lty = lty)
}
}
#' Find peaks of a curve
#' #https://rtricks.wordpress.com/2009/05/03/an-algorithm-to-find-local-extrema-in-a-vector/
#' @param vec
#' @param bw
#' @param x.coo
#' @return peaks
#' @export
#' @examples
#'
mf_findpeaks <- function(vec, bw = 1, x.coo = c(1:length(vec))){
pos.x.max <- NULL
pos.y.max <- NULL
pos.x.min <- NULL
pos.y.min <- NULL
for(i in 1:(length(vec)-1)) {
if((i+1+bw)>length(vec)){
sup.stop <- length(vec)} else {sup.stop <- i+1+bw
}
if((i-bw)<1){inf.stop <- 1}else{inf.stop <- i-bw}
subset.sup <- vec[(i+1):sup.stop]
subset.inf <- vec[inf.stop:(i-1)]
is.max <- sum(subset.inf > vec[i]) == 0
is.nomin <- sum(subset.sup > vec[i]) == 0
no.max <- sum(subset.inf > vec[i]) == length(subset.inf)
no.nomin <- sum(subset.sup > vec[i]) == length(subset.sup)
if(is.max & is.nomin){
pos.x.max <- c(pos.x.max,x.coo[i])
pos.y.max <- c(pos.y.max,vec[i])
}
if(no.max & no.nomin){
pos.x.min <- c(pos.x.min,x.coo[i])
pos.y.min <- c(pos.y.min,vec[i])
}
}
return(list(pos.x.max,pos.y.max,pos.x.min,pos.y.min))
}
#' Find peaks of a curve
#' http://stackoverflow.com/questions/31220307/calculate-x-value-of-curve-maximum-of-a-smooth-line-in-r-and-ggplot2
#' @param x
#' @param y
#' @param bw
#' @return peaks
#' @export
#' @examples
#'
mf_findpeaks2 <- function(x, y, bw = 3) {
data <- structure(list(x = x, y = y), .Names = c("x", "y"), class = "data.frame")
p1 <- ggplot(data, aes(x=x, y=y))
p1 <- p1 + stat_smooth(method = "lm", formula = y ~ ns(x, bw))
p1 <- p1 + geom_point()
gb <- ggplot_build(p1)
exact_x_value_of_the_curve_maximum <- gb$data[[1]]$x[which(diff(sign(diff(gb$data[[1]]$y)))== -2)+1] # 2, when minimum
exact_x_value_of_the_curve_minimum <- gb$data[[1]]$x[which(diff(sign(diff(gb$data[[1]]$y)))== 2)+1] # 2, when minimum
p2 <- p1 + geom_vline(xintercept=c(exact_x_value_of_the_curve_maximum, exact_x_value_of_the_curve_minimum))
return(list(p2 = p2, max = exact_x_value_of_the_curve_maximum, min = exact_x_value_of_the_curve_minimum))
}
#' Fill a time series with NAs. in: a dataframe and a timestamp vector. out: a dataframe.
#' @param data dataframe
#' @param timestamps can be character
#' @param informat format of input timestamps
#' @param dtin time interval in timestamps
#' @param dtout time interval in output file
#' @param starttime starttime in the output data. the same format as 'informat'
#' @param ndays how many days in the output data
#' @param outformat format of output timestamps
#' @param agg.function
#' @param digits
#' @return a dataframe
#' @export
#' @examples
#'
mf_fillna <- function(data, # a data frame
timestamps, # timestamps of data. can be character, but its format must be specified in 'informat'
informat, # format of timestamps
dtin, # time interval in timestamps
dtout = dtin, # time interval in output file
starttime, # starttime in the output data. the same format as 'informat'
ndays, # how many days in the output data
outformat=informat,
# limits= NULL, # a list for consistency limit of the values in each column.
# value=NA, # only valid when limits!=NULL. a vector with string or numeric with length of the column number. used to replace values out of bounds defined by limits
agg.function=expression(mean),digits)
{
# this function creates a subset of a given data set "data" by the respective timestamp ("timestamps")
# (the inputdata has neither to be continuous, nor equidistant)
# and returns a dataframe, beginning with "starttime" and length "ndays" with a time step of "dtout".
# "dtin" is the time step of the input data and if "dtout" is not equal "dtin", the output data will be
# aggregated by arithmetic mean of dtout/dtin records
# consisterncy limits will be checked according to the argument limits
# output data and timestamp are continuous and equidistant according to dtout
# missing values are filled with NA
# data: dataframe or matrix containing only NUMERIC or INTEGER
# timestamps: character vector of the timestamp related to data, given in format "informat"
# informat: format character string, see option format in ?strptime
# dtin: time step of the input data (delta t) in sec (numeric or integer)
# starttime: character string with format "informat" or POSIXct object (created by strptime)
# ndays: length of the output in days (integer or numeric)
# outformat: format of the time stamp in the output, defaults to informat
# dtout: time step of the output data (delta t) in sec, defaults to dtin
# if dtout = dtin, no aggregation will be processeed
# limits: list of length of the column number of the input data, each item containing the
# lower[1] and upper[2] consistency limit of the values. Values out of the range
# are set to the string specified in value. If limits = NULL (default), no check will be performed
# value: vector with string or numeric with length of the column number of the input data
# which replace values out of bounds defined by limits
data <- as.matrix(data)
suppressWarnings( if (class(starttime)=="character"){
starttime <- strptime(starttime, format=informat)
})
nrow.raw <- floor(86400*ndays/dtin)
dates.raw <- seq(starttime, by=dtin, length.out=nrow.raw)
# print(nrow.raw)
x.raw <- matrix(NA, nrow=nrow.raw, ncol=ncol(data))
time.in <- strptime(as.character(timestamps), format=informat)
index.in <- match(format(dates.raw,format=informat),format(time.in ,format=informat), nomatch=0)
index.raw <- match(format(time.in ,format=informat),format(dates.raw,format=informat), nomatch=0)
x.raw[index.raw,] <- data[index.in,]
# apply consistency limits
# for (j in 1:ncol(data)){ # clean raw tdr from negative values
# ind1 <- which(x.raw[,j] < limits[[j]][1])
# ind2 <- which(x.raw[,j] > limits[[j]][2])
# x.raw[union(ind1,ind2),j] <- value[j]
# } #end loop j
# return(x.raw)
# aggregate to the specified output time interval
if (dtin==dtout){
return(cbind.data.frame(format(dates.raw,format=outformat), x.raw))
} else {
n.agg <- dtout/dtin
if(n.agg > floor(n.agg)){
print("WARNING: dtout not a multiple of dtin:")
print(paste(" dtout / dtin: ",n.agg,sep=""))
print(paste(" ",floor(n.agg)," will be used for dtout / dtin instead ",sep=""))
n.agg <- floor(n.agg)
}
nrow.out <- floor(nrow.raw/n.agg)
dates.out <- seq(starttime, by=n.agg*dtin, length.out=nrow.out)
x.raw.ts <- ts(x.raw, frequency=n.agg)
x.out <- aggregate.ts(x.raw.ts,nfrequency=1, ndeltat=1,FUN=eval(agg.function))
x.out <- round(x.out,digits)
return(cbind.data.frame(format(dates.out,format=outformat), x.out))
}
}
#' Plot a user-customized hist
#' @param data
#' @param mybreaks
#' @param myxlim
#' @param myylim
#' @param eightlines
#' @param eightdigit
#' @param eightcex
#' @param eightcolors
#' @param mylegend
#' @param myxlab
#' @param show_n
#' @param show_skewness
#' @param show_density
#' @param myaxes
#' @return a hist plot
#' @export
#' @examples
#'
mf_hist <- function(data, mybreaks = "Sturges", myxlim = NULL, myylim = NULL,
eightlines = TRUE, eightdigit = 0, eightcex = 0.8, eightcolors = c('red','darkgreen','blue', 'black', 'purple', 'gold')[c(1,2,3,2,1,6,6,5,4,5)],
mylegend = '', myxlab = '',
show_n = TRUE, show_skewness = TRUE, show_density = FALSE,
myaxes = c(1, 2)) {
if (is.null(myylim)) myylim <- c(0, max(hist(data, breaks = mybreaks, plot = FALSE)$density) * 1.1)
if (is.null(myxlim)) {
hist(data, col = 'grey', border = NA, main = '', freq = FALSE, breaks = mybreaks, xlab = myxlab, ylim = myylim,
axes = FALSE)#, axes = FALSE, breaks = mybreaks)
} else {
hist(data, col = 'grey', border = NA, main = '', freq = FALSE, breaks = mybreaks, xlab = myxlab, xlim = myxlim, ylim = myylim,
axes = FALSE)#, axes = FALSE, breaks = mybreaks)
}
if(!is.na(myaxes[1])){
for (i in myaxes) axis(i)
}
box()
if (show_density) lines(density(data[!is.na(data)], bw = "SJ"))
rug(data, col = 'darkgrey')
legend('topleft', bty = 'n', legend = mylegend)
legend(
'topright', bty = 'n',
legend = paste(ifelse(show_n, paste('n = ', sum(!is.na(data)), '\n', sep = ''), ''),
ifelse(show_skewness, paste('skewness = ', round(mf_skewness(data), 2), sep = ''), ''),
sep = '')
)
if (eightlines) {
myfive <- fivenum(data)
threshold <- IQR(data, na.rm = TRUE) * 1.5
abline(v = c(myfive, myfive[2] - threshold, myfive[4] + threshold), col = eightcolors[1:7])
mtext(text = round(myfive, eightdigit), side = 3, line = c(0, 1, 0, 1, 0), at = myfive, col = eightcolors[1:5], cex = eightcex)
# mtext(text = round(myfive[c(1, 3, 5)], eightdigit), side = 3, line = 0, at = myfive[c(1, 3, 5)], col = eightcolors[c(1, 3, 5)], cex = eightcex)
# mtext(text = round(myfive[c(2, 4)], eightdigit), side = 3, line = 1, at = myfive[c(2, 4)], col = eightcolors[c(3, 5)], cex = eightcex)
mymean <- mean(data, na.rm = TRUE)
mysd <- sd(data, na.rm = TRUE)
abline(v = seq(from = mymean - mysd, by = mysd, length.out = 3), col = eightcolors[8:10], lty = 2)
}
box()
# return(c(myfive, threshold, mymean, mysd))
return(data.frame(para = c('min', '1q', 'median', '3q', 'max', 'lower', 'upper', 'mean', 'sd'),
value =c(myfive, myfive[2] - threshold, myfive[4] + threshold, mymean, mysd)))
}
#' Plot a Hovmoeller or fingerprint plot
#' @param data a dataframe
#' @param col_plot column to plot
#' @param title_var title of each sub plot
#' @param inter_var user defined interval if au.range = FALSE
#' @param auto.range logical auto.class=9,
#' @param auto.class
#' @param colset
#' @param ny how many ys. for example, ny = 48 for harf hourly data
#' @param y y value (row number) in the plot. must be integer. 0:24 for harf hourly data
#' @param yat
#' @param ylab
#' @param x
#' @param xat
#' @return multiple figures
#' @export
#' @examples
#'
mf_hmplot <- function(data, # a dataframe
col_plot, # column to plot
title_var = col_plot, # title of each sub plot
inter_var, # user defined interval if au.range = FALSE
auto.range = TRUE,
auto.class=9,
colset = 2,
ny, # how many ys. for example, ny = 48 for harf hourly data
y, # y value (row number) in the plot. must be integer. 0:24 for harf hourly data
yat,
ylab,
x,
xat)
{
# dev.off()
layout(matrix(1:8, 4,2, byrow=TRUE), widths = c(6, 1))
# par(mar=c(2,3,2,4), cex.axis = 0.946, cex.main = 1.046, col.axis = F, tck = 0, mgp=c(3, 0.8, 0))
# par(mar=c(1,1,1,3))
for (i in 1: length(col_plot)) {
# i <- 1
val <- as.numeric(as.character(data[, col_plot[i]]))
val <- matrix(val, ncol = ny, byrow = TRUE)
range.val <- range(val,na.rm=TRUE)
if (auto.range) {
inter <- seq(range.val[1], range.val[2], length.out=auto.class+1)
} else {
inter <- inter_var[[i]]
}
y2lab <- as.character(inter)
nclasses <- length(y2lab) - 1
if(colset==1) colo=grey.colors(nclasses,start=0.9,end=0.3,gamma=2.2) else
if(colset==2 && nclasses==8)
colo=c( "#00008F","#0020FF","#00AFFF","#40FFBF","grey85","#FF9F00","#FF1000","#800000") else
if(colset==2 && nclasses==5)
colo=c("#00AFFF","lightskyblue2","grey85","#FF9F00","#FF1000") else #"#00008F","#008BFF","#CFFF30","#FF7C00","#800000"
if(colset==2 && nclasses==3) colo=c("#00AFFF","grey85","#FF1000") else #"#008BFF","#CFFF30","#FF7C00"
colo=tim.colors(nclasses)
par(mar=c(2,3,2,0), cex.axis = 0.946, cex.main = 1.046, mgp=c(3, 0.8, 0))
image(x = x, y = y, z = val, breaks=inter,font.main=4,axes=F,xlab="",ylab="",col=colo, main=title_var[i])
if (class(x[1])[1] == 'POSIXct') {
axis.POSIXct(1, x, col.axis='black', tck = NA, las = 1.7, font = 2,mgp = c(3,0.6,0))
# axis.POSIXct(1, col.axis='black', tck = NA, at= xat, format="\%m.\%d",las=1.7,font=2,mgp=c(3,0.6,0))
} else {
axis(1, col.axis = 'black', tck = NA, at = xat,las = 1.7, font = 2,mgp = c(3,0.6,0))
}
axis(2,col.axis='black',tck = NA,at=yat,labels=ylab,las=1.7,font=2)
box()
par(mar=c(2,0.1,2,5), cex.axis = 0.946, cex.main = 1.046, mgp=c(3, 0.8, 0))
mf_imagescale(val, col= colo, breaks=inter, horiz = FALSE, yaxt = 'n', xlab = '', ylab = '')
axis(4, at=inter, las=2, font = 2)
box()
}
}
#' Create a legend
#' @param z
#' @param zlim
#' @param col
#' @param breaks
#' @param horiz
#' @param ylim
#' @param xlim
#' @return a lagend
#' @export
#' @examples
#'
mf_imagescale <- function(z, zlim, col = heat.colors(12),
breaks, horiz=TRUE, ylim=NULL, xlim=NULL, ...){
if(!missing(breaks)){
if(length(breaks) != (length(col)+1)){stop("must have one more break than colour")}
}
if(missing(breaks) & !missing(zlim)){
breaks <- seq(zlim[1], zlim[2], length.out=(length(col)+1))
}
if(missing(breaks) & missing(zlim)){
zlim <- range(z, na.rm=TRUE)
zlim[2] <- zlim[2]+c(zlim[2]-zlim[1])*(1E-3)#adds a bit to the range in both directions
zlim[1] <- zlim[1]-c(zlim[2]-zlim[1])*(1E-3)
breaks <- seq(zlim[1], zlim[2], length.out=(length(col)+1))
}
poly <- vector(mode="list", length(col))
for(i in seq(poly)){
poly[[i]] <- c(breaks[i], breaks[i+1], breaks[i+1], breaks[i])
}
xaxt <- ifelse(horiz, "s", "n")
yaxt <- ifelse(horiz, "n", "s")
if(horiz){YLIM<-c(0,1); XLIM<-range(breaks)}
if(!horiz){YLIM<-range(breaks); XLIM<-c(0,1)}
if(missing(xlim)) xlim=XLIM
if(missing(ylim)) ylim=YLIM
plot(1,1,t="n",ylim=ylim, xlim=xlim, xaxt=xaxt, yaxt=yaxt, xaxs="i", yaxs="i", ...)
for(i in seq(poly)){
if(horiz){
polygon(poly[[i]], c(0,0,1,1), col=col[i], border=NA)
}
if(!horiz){
polygon(c(0,0,1,1), poly[[i]], col=col[i], border=NA)
}
}
}
#' Save a list into an ASCII file. in: a list. out: a file.
#' @param x a list
#' @param file
#' @return a file
#' @export
#' @examples
#'
mf_list2ascii <- function(x, file = paste(deparse(substitute(x)), ".txt", sep = ""))
{
# MHP July 7, 2004
# R or S function to write an R list to an ASCII file.
# This can be used to create files for those who want to use
# a spreadsheet or other program on the data.
#
tmp.wid = getOption("width") # save current width
options(width=10000) # increase output width
sink(file) # redirect output to file
print(x) # print the object
sink() # cancel redirection
options(width=tmp.wid) # restore linewidth
return(invisible(NULL)) # return (nothing) from function
}
#' plot a linear regression figure and return a list of parameters. in: two vectors. out: a figure and a list.
#' @param x
#' @param y
#' @param xlim
#' @param ylim
#' @param plot.title
#' @param xlab
#' @param ylab
#' @param refline logical, reference line
#' @param slope slope of refline
#' @param intercept of refline
#' @param showr2 logical
#' @param showleg logical
#' @return a figure
#' @export
#' @examples
#'
mf_lm <- function(x, y,
xlim = range(as.numeric(x), na.rm = TRUE), ylim = range(as.numeric(y), na.rm = TRUE),
plot.title="linear regression", xlab = 'x', ylab = 'y',
refline = FALSE, slope = 1, intercept = 0,
showr2 = TRUE,
showleg = TRUE){
x <- as.numeric(x)
y <- as.numeric(y)
plot(x, y, xlim = xlim, ylim = ylim, xlab = xlab, ylab = ylab, col = "grey", pch = 19)
lm.my <- lm(y ~ x)
lm.sum <- summary(lm.my)
lm.rs <- round(lm.sum$r.squared, digits = 3)
b <- signif(lm.my$coefficients[1], 3)
a <- signif(lm.my$coefficients[2], 3)
abline(lm.my, col = "black", lwd = 2)
if (refline) abline(a = intercept, b = slope, col = 'blue', lty = 2)
text(xlim[1],
ylim[2] - diff(ylim) * 0.1,
# substitute(paste(italic(y), ' = ', a, italic(x), ' + ', b), list(a = a, b = b)),
substitute(paste(italic(y), ' = ', a, italic(x), c, b), list(a = a, b = b, c = ifelse(b < 0, '', ' + '))),
cex = 1.2, pos = 4)
text(xlim[1],
ylim[2]- diff(ylim) * 0.2,
# expression(italic(R)^2==r, list(r = lm.rs)),
as.expression(substitute(italic(n)==r, list(r = length(x)))),
cex = 1.2, pos = 4)
if (showr2) text(xlim[1],
ylim[2]- diff(ylim)* 0.3,
# expression(italic(R)^2==r, list(r = lm.rs)),
as.expression(substitute(italic(R)^2==r, list(r = lm.rs))),
cex = 1.2, pos = 4)
if (showleg) legend('bottomright', legend = c('data', 'linear', '1:1'), col = c('darkgrey', 'black', 'blue'), pch = c(19, -1, -1), lty = c(0, 1, 2), bty = 'n')
return(list(lm.sum$coefficients, lm.sum$r.squared))
}
#' calculate linear regression between every two columns in a data frame. in: a dataframes. out: a dataframe showing the linear regression.
#' @param data a datafrram
#' @return another dataframe
#' @export
#' @examples
#'
mf_lmdf <- function(data, simply = FALSE, intercept = TRUE){
ncol <- ncol(data)
output <- data.frame()
k <- 1
for (i in 1:(ifelse(simply, ncol - 1, ncol))){
x <- data[, i]
for (j in (ifelse(simply, i + 1, 1)): ncol){
if (j!=i) {
y <- data[, j]
if (intercept) {
lm.my <- lm(y ~ x)
outputcol <- c('x', 'y', 'r.squared', 'adj.r.squared', 'intercept', 'slope', 'Std.Error.intercept', 'Std.Error.slope', 't.intercept', 't.slope', 'Pr.intercept', 'Pr.slope')
} else {
lm.my <- lm(y ~ x + 0)
outputcol <- c('x', 'y', 'r.squared', 'adj.r.squared', 'slope', 'Std.Error.slope', 't.slope', 'Pr.slope')
}
lm.sum <- summary(lm.my)
output <- rbind(output, rep(NA, length(outputcol)))
output[k, 1:2] <- names(data)[c(i,j)]
output[k, 3:length(outputcol)] <- c(lm.sum$r.squared, lm.sum$adj.r.squared, c(lm.sum$coefficients))
# output <- rbind(output, c(names(data)[c(i,j)], lm.sum$r.squared, lm.sum$adj.r.squared, c(lm.sum$coefficients)))
k <- k + 1
}
}
}
names(output) <- outputcol
return(output)
}
#' Enhancement of names()
#' @param data a dataframe
#' @return a list
#' @export
#' @examples
#'
mf_names <- function(data) {
# print(paste(names(data), collapse = "','"))
# print(matrix(names(data), nrow = 1))
y <- as.data.frame(matrix(names(data), nrow = 1))
names(y) <- 1:length(names(data))
list(names(data),
paste("'", paste(names(data), collapse = "','"), "'", sep = ''),
y)
}
#' optim function for the dmodel in chamber flux calculation. in: initial values for optim function. out: best fitted parameters
#' @param cx_ini
#' @param a_ini
#' @param t0_ini
#' @param max_it
#' @param mymethod
#' @param mylower
#' @param myupper
#' @return optimized function
#' @export
#' @examples
#'
mf_optimdmodel <- function(cx_ini = cx, a_ini = a, t0_ini = t0, max_it = 500, mymethod = "Nelder-Mead", mylower = -Inf, myupper = Inf){
rec <- 0
dfpar <- data.frame(cx_ini = NA, a_ini = NA, t0_ini = NA, cx = NA, a = NA, t0 = NA, optim_value = NA, optim_function = NA, optim_gradient = NA, optim_convergence = NA) #, optim_message = NA) # save parameters
for (x1 in cx_ini){
for (x2 in a_ini){
for (x3 in t0_ini){
par_ini <- c(x1, x2, x3)
# print(par_ini)
# par.new <- optim(par_ini, dmodel, method = 'L-BFGS-B', lower = c(0, -10, -20))
par_new <- try(optim(par_ini, dmodel, control = list(maxit = max_it), method = mymethod, lower = mylower, upper = myupper), silent = TRUE)
if (class(par_new) == 'try-error') par_new <- try(optim(par_ini, dmodel, control = list(maxit = max_it), method = mymethod, lower = mylower, upper = c(Inf, Inf, Inf)), silent = TRUE)
if (class(par_new) == 'try-error') par_new <- try(optim(par_ini, dmodel, control = list(maxit = max_it), method = 'Nelder-Mead'), silent = TRUE)
dmodelcx <- par_new$par[1]
dmodela <- par_new$par[2]
dmodelt0 <- par_new$par[3]
dmodelrmse <- par_new$value
rec <- rec + 1
# deltapar[rec, ] <- c(par_ini, round(dmodelcx, 0), signif(dmodela,3), round(dmodelt0, 1), par_new$value, par_new$counts[1], par_new$counts[2], par_new$convergence) #, par.new$message)
dfpar[rec, ] <- c(par_ini, dmodelcx, dmodela, dmodelt0, par_new$value, par_new$counts[1], par_new$counts[2], par_new$convergence) #, par.new$message)
}
}
}
pc <- dfpar$cx > 0 & dfpar$a > 0 & dfpar$a < 0.1 & dfpar$t0 > 0
if (sum(pc) != 0) {
return(dfpar[pc, ][which.min(dfpar[pc, 'optim_value']), ])
} else {
pc <- dfpar$cx > 0 & dfpar$a > 0 & dfpar$a < 0.1
if (sum(pc) != 0) {
return(dfpar[pc, ][which.min(dfpar[pc, 'optim_value']), ])
} else {
pc <- dfpar$a > 0 & dfpar$a < 0.1
if (sum(pc) != 0) {
return(dfpar[pc, ][which.min(dfpar[pc, 'optim_value']), ])
} else {
return(dfpar[which.min(dfpar[, 'optim_value']), ])
}
}
}
}
#' check outliers. in: a vector. out: a plot and a vector of flags. the blank flag means passing all checks.
#' @param x data to be checked
#' @param ifplot logical
#' @param rangecheck logical
#' @param r.min lower threshold of rangecheck
#' @param r.max upper threshold of rangecheck
#' @param boxcheck logical
#' @param errorcheck logical
#' @param e.w window width of absolute deviation calculation
#' @param d.w window width of quantile calculation
#' @param d.quantile
#' @param d.quantile.factor
#' @param sd.check logical
#' @param sd.w
#' @param sd.factor.lower
#' @param sd.factor.upper
#' @return outlier makers. 'R' means the value has failed the range check. E the error check. S the sd check. B the boxplot check.
#' @export
#' @examples
#'
mf_outlier <- function(x, # data to be checked
ifplot = FALSE,
rangecheck = FALSE,
r.min =-Inf,
r.max = Inf, # range
boxcheck = FALSE,
errorcheck = FALSE,
e.w = 7, # window width of absolute deviation calculation
d.w = 21, # window width of quantile calculation
d.quantile = 0.95, # quantile
d.quantile.factor = 1.3, # quantile times
sd.check = FALSE,
sd.w = 29, # window width of sd
sd.factor.lower = 3,
sd.factor.upper = 3) # sd times
{
n.row = length(x)
y <- x
# 1 range
flag <- rep('', n.row)
if (rangecheck) {
flag[y < r.min | y > r.max] <- 'R'
y[flag == 'R'] <- NA
}
if (boxcheck) {
# par(mfrow = c(1,2))
ybox <- boxplot(y, plot = FALSE)
flag[y < ybox$stats[1, 1] | y > ybox$stats[5, 1]] <- 'B'
y[flag == 'B'] <- NA
}
# 2 absolute deviation
if (errorcheck){
abs_d <- rep(NA, n.row)
w.half <- (e.w - 1)/2
for (i in (1 + w.half) : (n.row - w.half))
{
ii <- c(i - w.half, i + w.half)
abs_d[i] <- ifelse(is.na(y[i]) |
length(y[ii[1] : ii[2]][is.na(y[ii[1] : ii[2]])]) > w.half,
NA,
abs(y[i] - mean(c(y[ii[1] : (i - 1)], y[(i + 1) : ii[2]]), na.rm = TRUE)))
}
d.w.half <- (d.w - 1) / 2
for (i in (1 + w.half + d.w.half) : (n.row - w.half - d.w.half))
{
if (is.na(abs_d[i]) | abs_d[i] > d.quantile.factor * quantile(abs_d[(i - d.w.half):(i + d.w.half)], d.quantile, na.rm = TRUE))
{
flag[i] <- paste(flag[i], 'E', sep = '')
y[i] <- NA
}
}
}
# 3 standard deviation
if (sd.check) {
sd.w.half <- (sd.w - 1)/2
sd_l <- rep(NA, n.row)
sd_u <- rep(NA, n.row)
for (i in (1 + sd.w.half) : (n.row - sd.w.half))
{
ii <- c(i - sd.w.half, i + sd.w.half)
sd_u[i] <- ifelse(is.na(y[i]),
NA,
median(y[ii[1] : ii[2]], na.rm = TRUE) + sd.factor.upper * sd(y[ii[1] : ii[2]], na.rm = TRUE))
sd_l[i] <- ifelse(is.na(y[i]),
NA,
median(y[ii[1] : ii[2]], na.rm = TRUE) - sd.factor.lower * sd(y[ii[1] : ii[2]], na.rm = TRUE))
if ((!is.na(sd_u[i]) & y[i] > sd_u[i]) | (!is.na(sd_l[i]) & y[i] < sd_l[i]))
{
flag[i] <- paste(flag[i], 'S')
}
}
}
if (ifplot) {
color <- ifelse(regexpr('R', flag) > 0, 'black',
ifelse(regexpr('B', flag) > 0, 'green',
ifelse(regexpr('E', flag) > 0, 'blue',
ifelse(regexpr('S', flag) > 0, 'red', 'grey'))))
pch <- ifelse(flag == '', 20, 4)
plot(x, type = 'l', col = 'grey')
points(x, col = color, pch = pch, type = 'p')
}
# return(data.frame(x = x, flag = flag, color = color))
return(flag)
}
# graphics.off()
# flag <- mf_outlier(x$MeanT, rangecheck = T, r.min = -7, r.max = 5, errorcheck = T, sd.check = T)
#' plot pair-wise correlations. in: a dataframe. out: a figure.
#' @param data a dataframe
#' @param lower.panel
#' @param upper.panel
#' @param diag.panel
#' @param lwd
#' @param col
#' @param labels
#' @param cex.labels
#' @return a pair plot
#' @export
#' @examples
#'
mf_pairs <- function(data, lower.panel = c(panel.lm, panel.smooth)[[1]], upper.panel=panel.cor, diag.panel = panel.diag, lwd = 2, col = "grey", labels=names(data), cex.labels=4){
# remove character columns and NA values
data <- data[, lapply(data, class) != 'character']
datana <- is.na(data)
data <- data[(rowSums(datana) == 0), ]
panel.hist <- function(x, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(usr[1:2], 0, 1.5) )
h <- hist(x, plot = FALSE)
breaks <- h$breaks; nB <- length(breaks)
y <- h$counts; y <- y/max(y)
rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
}
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y)
# txt <- format(c(r, 0.123456789), digits=digits)[1]
test <- cor.test(x,y)
Signif <- ifelse(test$p.value < 0.01, "p < 0.01",
ifelse(0.01 <= test$p.value & test$p.value < 0.05,
"p < 0.05",
paste("p = ",round(test$p.value,3), sep="")))
txt <- format(round(r, 2), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
# text(0.5, 0.5, txt, cex = 2 * r + 4, col=c(rgb(1,seq(0,0.1,length.out=10),seq(0,0.9,length.out=10)),rgb(0.5,0.5,0.5), rgb(seq(0.1,0,length.out=10),seq(1,0,length.out=10),1))[round(r*10, 0)+11]) # size and gradient color
text(0.5, 0.5, paste('R =', txt), cex = 2 * abs(r), col=c(rgb(1,seq(0,0.5,length.out=10),seq(0,0.5,length.out=10)),rgb(0.5,0.5,0.5), rgb(seq(0.5,0,length.out=10),seq(0.5,0,length.out=10),1))[round(r*10, 0)+11]) # size and gradient color
text(0.5, 0.2, Signif, col=ifelse(round(test$p.value,3)<0.05, "red", "black"), font=ifelse(round(test$p.value,3)<0.01, 2, 1), cex=1)
# text(0.5, 0.5, txt, cex = cex.cor * r) # size
# text(0.5, 0.5, txt, col=rainbow(21)[round(r*10, 0)+11]) #rainbow color
}
panel.diag = function (x, ...) {
par(new = TRUE)
hist(x,
# col = "light blue",
col = "grey",
probability = TRUE,
axes = FALSE,
main = "")
lines(density(x),
# col = "red",
col = "blue",
lwd = 3)
rug(x)
}
panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"),
cex = 1, col.regres = "red", ...)
{
points(x, y, pch = pch, col = col, bg = bg, cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok))
abline(stats::lm(y[ok] ~ x[ok]), col = col.regres, ...)
}
pairs(data, lower.panel=lower.panel, upper.panel=upper.panel, diag.panel = diag.panel, lwd = 2, col = "darkgrey", labels=labels, cex.labels=2, pch = 16)
}
#' plot pair-wise correlations with p value. in: a dataframe. out: a figure.
#' @param data a dataframe
#' @param lower.panel
#' @param upper.panel
#' @param diag.panel
#' @param lwd
#' @param col
#' @param labels
#' @param cex.labels
#' @return a pair plot
#' @export
#' @examples
#'
mf_pairs2 <- function(data, lower.panel=panel.smooth, upper.panel=panel.cor, diag.panel = panel.diag, lwd = 2, col = "grey", labels='', cex.labels=4){
panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y)
txt <- format(c(r, 0.123456789), digits=digits)[1]#防止末位为0
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
test <- cor.test(x,y)
Signif <- if(round(test$p.value,3)<0.01)
{print("p<0.01")}
else
{
if (0.01<=round(test$p.value,3) & round(test$p.value,3)<0.05)
{print("p<0.05")}
else
{paste("p=",round(test$p.value,3), sep="")}
}
text(0.5, 0.35, Signif, col=ifelse(round(test$p.value,3)<0.05, "red", "black"), font=ifelse(round(test$p.value,3)<0.01, 2, 1), cex=1)
text(0.5, 0.65, txt, col=ifelse(r<0, "red", "blue"), cex=1.3)
}
panel.smooth<-function (x, y, col = "grey", bg = NA, pch = 18, cex = 0.8, col.smooth = "red", span = 2/3, iter = 3, ...)
{
points(x, y, pch = pch, col = col, bg = bg, cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok))
lines(stats::lowess(x[ok], y[ok], f = span, iter = iter),
col = col.smooth, ...)
}
panel.diag = function (x, ...) {
par(new = TRUE)
hist(x,
#col = "light blue",
col = "grey",
probability = TRUE,
axes = FALSE,
main = "")
lines(density(x),
#col = "red",
col = "blue",
lwd = 2)
dnormseq <-round(min(x),digits=0):round(max(x),digits=0)
lines(dnormseq, dnorm(dnormseq, mean(x), sd(x)), col="red", lwd=2)
}
pairs(data, lower.panel=lower.panel, upper.panel=upper.panel, diag.panel = diag.panel, lwd = 2, col = "grey", labels=labels, cex.labels=4)
}
#' plot pair-wise correlations with linear regression. in: a dataframe. out: a figure. # not done yet.
#' @param data a dataframe
#' @param lower.panel
#' @param upper.panel
#' @param diag.panel
#' @param lwd
#' @param col
#' @param labels
#' @param cex.labels
#' @return a pair plot
#' @export
#' @examples
#'
mf_pairslm <- function(data, lower.panel = panel.lm, upper.panel=panel.cor, diag.panel = panel.diag, lwd = 2, col = "grey", labels='', cex.labels=4){
# panel.hist <- function(x, ...)
# {
# usr <- par("usr"); on.exit(par(usr))
# par(usr = c(usr[1:2], 0, 1.5) )
# h <- hist(x, plot = FALSE)
# breaks <- h$breaks; nB <- length(breaks)
# y <- h$counts; y <- y/max(y)
# rect(breaks[-nB], 0, breaks[-1], y, col="cyan", ...)
# }
# panel.cor <- function(x, y, digits=2, prefix="", cex.cor, ...)
panel.cor <- function(x, y, digits=2, cex.cor, ...)
{
usr <- par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
r <- cor(x, y)
# txt <- format(c(r, 0.123456789), digits=digits)[1]
txt <- format(round(r, 2), digits=digits)[1]
txt <- paste(prefix, txt, sep="")
if(missing(cex.cor)) cex.cor <- 0.8/strwidth(txt)
# text(0.5, 0.5, txt, cex = 2 * r + 4, col=c(rgb(1,seq(0,0.1,length.out=10),seq(0,0.9,length.out=10)),rgb(0.5,0.5,0.5), rgb(seq(0.1,0,length.out=10),seq(1,0,length.out=10),1))[round(r*10, 0)+11]) # size and gradient color
# text(0.5, 0.5, txt, cex = 2 * abs(r) + 4, col=c(rgb(1,seq(0,0.5,length.out=10),seq(0,0.5,length.out=10)),rgb(0.5,0.5,0.5), rgb(seq(0.5,0,length.out=10),seq(0.5,0,length.out=10),1))[round(r*10, 0)+11]) # size and gradient color
text(0.5, 1, txt, cex = 2) # size and gradient color
# text(0.5, 0.5, txt, cex = cex.cor * r) # size
# text(0.5, 0.5, txt, col=rainbow(21)[round(r*10, 0)+11]) #rainbow color
}
panel.diag = function (x, ...) {
par(new = TRUE)
hist(x,
# col = "light blue",
col = "grey",
probability = TRUE,
axes = FALSE,
main = "")
# lines(density(x),
# # col = "red",
# col = "blue",
# lwd = 3)
rug(x)
}
panel.lm <- function (x, y, col = par("col"), bg = NA, pch = par("pch"),
cex = 1, col.regres = "red", ...)
{
points(x, y, pch = pch, col = col, bg = bg, cex = cex)
ok <- is.finite(x) & is.finite(y)
if (any(ok))
abline(stats::lm(y[ok] ~ x[ok]), col = col.regres, ...)
}
pairs(data, lower.panel=lower.panel, upper.panel=upper.panel, diag.panel = diag.panel, lwd = 2, col = "grey", labels=labels, cex.labels=4)
}
#' A filter to pick out data within a range by quantile of ref. in: a numeric vector. out: a logical vector.
#' @param x
#' @param x
#' @param quantile_lower
#' @param quantile_upper
#' @return filtered data
#' @export
#' @examples
#'
mf_pickquantile <- function(x, ref = x, quantile_lower = 0.1, quantile_upper = 0.9)
{
y <- x > quantile(ref, prob = quantile_lower, na.rm = TRUE) & x < quantile(ref, prob = quantile_upper, na.rm = TRUE)
return(y)
}
#' calculate the planafit coefficients from 3D wind measurement. in: three vectors or one-column dataframes. out: a list.
#' @param u
#' @param v
#' @param w
#' @param Ulower
#' @param Uupper
#' @param ifplot logcial
#' @return planafit coefficients
#' @export
#' @examples
#'
mf_planarfit <- function(u, v, w, Ulower = 1, Uupper = 5, ifplot = FALSE){
U <- sqrt(u^2 + v^2 + w^2)
pc <- U > Ulower & U < Uupper
u <- matrix(u[pc], ncol = 1)
v <- matrix(v[pc], ncol = 1)
w <- matrix(w[pc], ncol = 1)
l = length(u)
su = sum(u)
sv = sum(v)
sw = sum(w)
suv = t(u) %*% v
suw = t(u) %*% w
svw = t(v) %*% w
su2 = t(u) %*% u
sv2 = t(v) %*% v
H = matrix(c(l, su, sv, su, su2, suv, sv, suv, sv2), byrow = TRUE, ncol = 3)
g = matrix(c(sw, suw, svw), ncol = 1)
b <- drop(solve(a = H, b = g))
names(b) <- c('b0', 'b1', 'b2')
k3 = 1 / sqrt(1 + b[2] ^ 2 + b[3] ^ 2)
k <- c(-b[2] * k3, -b[3] * k3, k3)
names(k) <- c('k1', 'k2', 'k3')
w_sim <- (b[1] + b[2] * u + b[3] * v)
w_summary <- summary(lm(w_sim ~ w))
if (ifplot) plot(w, w_sim, xlim = c(-0.5, 0.5), ylim = c(-0.5, 0.5))
return(list(b = b, k = k, w_summary = w_summary))
}
#' plot a blank figure
#' @return a blank figure
#' @export
#' @examples
#'
mf_plotblank <- function() plot(1, type = 'n', axes = FALSE, xlab = '', ylab = '')
#' A reminder for colors
#' @return a figure
#' @export
#' @examples
#'
mf_plotcolors <- function(){
SetTextContrastColor <- function(color)
{
ifelse( mean(col2rgb(color)) > 127, "black", "white")
}
# Define this array of text contrast colors that correponds to each
# member of the colors() array.
TextContrastColor <- unlist( lapply(colors(), SetTextContrastColor) )
oldpar <- par(mfrow = c(2, 1), mar = c(0, 0, 0, 0))
# 1a. Plot matrix of R colors, in index order, 25 per row.
# This example plots each row of rectangles one at a time.
colCount <- 25 # number per row
rowCount <- 27
plot( c(1,colCount), c(0,rowCount), type="n", ylab="", xlab="",
axes=FALSE, ylim=c(rowCount,0))
# title("R colors")
for (j in 0:(rowCount-1))
{
base <- j*colCount
remaining <- length(colors()) - base
RowSize <- ifelse(remaining < colCount, remaining, colCount)
rect((1:RowSize)-0.5,j-0.5, (1:RowSize)+0.5,j+0.5,
border="black",
col=colors()[base + (1:RowSize)])
text((1:RowSize), j, paste(base + (1:RowSize)), cex=0.7,
col=TextContrastColor[base + (1:RowSize)])
}
# 1b. Plot matrix of R colors, in "hue" order, 25 per row.
# This example plots each rectangle one at a time.
RGBColors <- col2rgb(colors()[1:length(colors())])
HSVColors <- rgb2hsv( RGBColors[1,], RGBColors[2,], RGBColors[3,],
maxColorValue=255)
HueOrder <- order( HSVColors[1,], HSVColors[2,], HSVColors[3,] )
plot(0, type="n", ylab="", xlab="",
axes=FALSE, ylim=c(rowCount,0), xlim=c(1,colCount))
# title("R colors -- Sorted by Hue, Saturation, Value")
for (j in 0:(rowCount-1))
{
for (i in 1:colCount)
{
k <- j*colCount + i
if (k <= length(colors()))
{
rect(i-0.5,j-0.5, i+0.5,j+0.5, border="black", col=colors()[ HueOrder[k] ])
text(i,j, paste(HueOrder[k]), cex=0.7, col=TextContrastColor[ HueOrder[k] ])
}
}
}
par(oldpar)
}
#' lty
#' @return a figure reminding you lty
#' @export
#' @examples
#'
mf_plotlty <- function(){
ltynr <- 6
plot(0:ltynr + 1, 0:ltynr + 1, type = 'n', axes = FALSE, xlab = "", ylab = "")
axis(2, las = 1, lwd = 0, at = seq(1, ltynr))
abline(h = seq(1, ltynr), lty = 1:ltynr )
}
#' pch numbers
#' @return a figure reminding you pch
#' @export
#' @examples
#'
mf_plotpch <- function(){
mypch <- 0:25
x <- rep(1:13, 2)
y <- rep(c(1, 1.8), each = 13)
plot(x, y, pch = mypch, ylim = c(0.5, 2.5), cex = 5, xlab = '', ylab = '', axes = FALSE)
text(x, y, labels = mypch, pos = 1, offset = 2, cex = 2)
}
#' type
#' @return a figure reminding you type
#' @export
#' @examples
#'
mf_plottype <- function(){
par(mfrow = c(3,3), cex = 1.2, mar = c(0, 0, 0, 0))
y <- rnorm(n = 6)
for (i in c("p", 'l', "b", "c", "o", "h", "s", "S", "n")) {
plot(x= 1:6, y, type = i, axes = FALSE, cex = 1.5)
box()
legend('bottomright', legend = paste('type = "', i, '"', sep = ''), bty = "n", text.col = 'blue')
}
}
#' fill 0 before a string. in and out: a vector with length 1. if length(x) > 1, use unlist(lapply(x,FUN = mf_fill0)). e.g. 12 --- 012
#' @param x
#' @param d digits
#' @return character
#' @export
#' @examples
#'
mf_prefix0 <- function(x, d = 2) #x must be a vector with length 1. if length(x) > 1, use unlist(lapply(x,FUN = mf_fill0))
{
# Peng Zhao, Aug 19, 2011
# To fill 0 before a string.
#
if(length(x) > 1) {
print('Error: length(x) > 1. use unlist(lapply(x,FUN = mf_fill0))')
return(NULL)
} else {
x <- as.character(x)
x.n <- nchar(x)
if(any(x.n > d, na.rm = TRUE)) {print("Warning: at least one of the length of x is larger than d")}
y <- ifelse(x.n <= d,
paste(paste(rep("0", d - x.n), collapse = ""), x, sep = ""),
x)
return(y)
}
}
#' # give each x a color by groups.
#' @param x numeric vector
#' @param ngroups
#' @return a vector of color codes dependent on the value of x. in: a numeric vector. out: a vector of color.
#' @export
#' @examples
#'
mf_rainbow <- function(x, ngroup = 20)
{
rainbow(ngroup)[round((x - min(x, na.rm = TRUE)) / diff(range(x, na.rm = TRUE)) * (ngroup - 1), 0) + 1]
}
#' batch read several files. in: a path. out: a list containing all data frames
#' @param wd working dir
#' @param sep sep of each file
#' @return a list
#' @export
#' @examples
#'
mf_readdir <- function(wd = ".", sep = c(","))
{
x <- dir("./")
y <- list()
for (i in 1:length(input.ls))
{
input.p <- paste(wd, x[i], sep = "\\")
y[[as.character(i)]] <- read.table(input.p,
header = TRUE,
sep = sep[i],
fill = TRUE,
na.strings = c("Inf","-Inf","NA", "NaN"),
stringsAsFactors = FALSE)
}
return(y)
}
#' calculation of saturation vapour pressure (Pa) at T(degree C). in and out: a vector. for -45 to 60 degree over water according to Sonntag 1990
#' @param t temperature in degree C
#' @return saturation vapour pressure (Pa)
#' @export
#' @examples
#'
mf_satpress <- function(t)
{
6.112 * exp(17.62 * t / (243.12 + t)) * 100
}
#' standard error
#' @param x
#' @param na.rm logical
#' @return se
#' @export
#' @examples
#'
mf_se <- function(x, na.rm = TRUE) {sd(x, na.rm = na.rm)/sqrt(sum(!is.na(x)))}
#' A template to create a folder with data files to share
#' @param foldername
#' @param filename
#' @param df_my dataframe to share
#' @param header logical
#' @param meta logical
#' @return a folder to share
#' @export
#' @examples
#'
mf_sharedata <- function(foldername = 'foo', filename = 'bar', df_my = NULL, header = TRUE, meta = TRUE){
filepre <- paste0('./', foldername, '/', filename)
# DataUsePolicy
dir.create(foldername)
dupcontent <- paste('Data use policy:
Inform those who contributed the data on your plans to
use the data and any plans for publication; the data
contributors should be given the opportunity to contribute
substantively to publications and, as result, to be a co-author.
Contacts: Peng Zhao <peng.zhao@uibk. ac.at>, Georg Wohlfahrt <georg.wohlfahrt@uibk. ac.at>'
)
writeLines(text = dupcontent, con = paste0(filepre, '_DataUsePolicy.txt'))
# data frame
if (!is.null(df_my)) write.csv(x = df_my, file = paste0(filepre, '.csv'), row.names = FALSE)
# header
if (!is.null(df_my) & header) {
df_header <- data.frame(names = names(df_my), unit = '', class = '', description = '')
write.csv(x = df_header, file = paste0(filepre, '_header.csv'), row.names = FALSE)
}
# meta
if (meta) {
metacontent <- paste0('"key","value"
"Contact","Peng Zhao <peng.zhao@uibk. ac.at>"
"Data Prepared By","Peng Zhao <peng.zhao@uibk. ac.at>"
"Created On",', Sys.time(), '
"Note1","The 9mEC sonics anemometer data were provided by Matthias Zeeman (KIT/IMK-IFU)"
"Note2","--"
"Note3","--"
')
writeLines(text = metacontent, con = paste0(filepre, '_meta.csv'))
}
}
#' normality test. if either skewness/se_skew is outside -1.96 -- 1.96, the data are unlikely to be normally distributed. Or Kolmogorov-Smirnov test, Shapiro-Wilks' W test. But a visual examination is the best. Negative values of the skewness indicate data that are skewed to the left(negativelz skewed)
#' @param x
#' @return skewness
#' @export
#' @examples
#'
mf_skewness <- function(x){
x <- x[!is.na(x)]
n <- length(x)
skewness <- n / (n-1) / (n-2) * sum((x - mean(x)) ^ 3) / sd(x) ^3
se_skewness <- sqrt(6/length(x))
return(skewness/se_skewness)
}
#' Kurtosis measures whether the data are peaked or flat relative to a normal curve. positive: wide. negative: narrow
#' @param x
#' @return Kurtosis
#' @export
#' @examples
#'
mf_kurtosis <- function(x){
x <- x[!is.na(x)]
n <- length(x)
kurtosis <- n*(n+1)/(n-1)/(n-2)*sum((x-mean(x))^4) / sd(x)^4 - 3*(n-1)^2/(n-2)/(n-3)
se_kurtosis <- sqrt(24/n)
return(kurtosis/se_kurtosis)
}
#' smooth a series with a giving width. in and out: a vector.
#' @param x
#' @param width.half
#' @return smoothed data
#' @export
#' @examples
#'
mf_smooth <- function(x, width.half = 0)
{
y <- x
width.half <- ceiling(width.half)
if (width.half > 0 & length(x) > (width.half * 2) )
{
for (i in 1:width.half)
{
y[i] <- mean(x[1 : (i + width.half)], na.rm = TRUE)
}
for (i in (width.half + 1) : (length(x) - width.half))
{
y[i] <- mean(x[(i - width.half): (i + width.half)], na.rm = TRUE)
}
for (i in (length(x) - width.half + 1))
{
y[i] <- mean(x[(i - width.half): length(x)], na.rm = TRUE)
}
}
return(y)
}
#' A simplied version of strptime. only for '\%Y-\%m-\%d \%H:\%M:\%S'
#' @param x character
#' @param myformat
#' @param mytz
#' @return time
#' @export
#' @examples
#'
mf_strptime <- function(x, myformat = '%Y-%m-%d %H:%M:%S', mytz = 'GMT'){
strptime(x, format = myformat, tz = mytz)
}
#' calculate sunrise and sunset time in a friendly way. in: a given date and coordinates. out: a dataframe with sunrise and sunset time.
#' @param mydate POSIXct
#' @param lon
#' @param lat
#' @return a dataframe
#' @export
#' @examples
#'
mf_sunriset <- function(mydate = as.POSIXct("2008-08-08", tz="GMT"), lon = 116.39, lat = 39.91)
{
data.frame(date = format(mydate, "%Y-%m-%d"),
sunrise = format(sunriset(matrix(c(lon, lat), nrow = 1), mydate, direction=c("sunrise"), POSIXct.out = TRUE)$time + 3600 * ceiling(lon/15), format = '%H:%M:%S'),
sunset = format(sunriset(matrix(c(lon, lat), nrow = 1), mydate, direction=c("sunset"), POSIXct.out = TRUE)$time + 3600 * ceiling(lon/15), format = '%H:%M:%S'))
}
#' a friendly version of tapply for dataframes. in and out: same as tapply(). the built-in function tapply returns a matrix with unfriendly row name and colname. mf-tapply returns a friendly dataframe
#' @param data dataframe
#' @param select character, column names to calc
#' @param myfactor a colname as factor
#' @param na.rm
#' @return a dataframe
#' @export
#' @examples
#'
mf_tapply <- function(data, select = names(data), myfactor, ..., na.rm = c(TRUE, FALSE, NULL)[1])
{
if (is.null(na.rm)) {
y <- data.frame(tapply(data[, select[1]], data[, myfactor],...))
} else {
y <- data.frame(tapply(data[, select[1]], data[, myfactor],..., na.rm = na.rm))
}
names(y) <- select[1]
y[, myfactor] <- rownames(y)
y <- y[, c(2,1)]
if (length(select) > 1){
for (i in select[-1]){
if (is.null(na.rm)) {
yi <- data.frame(tapply(data[, i], data[, myfactor],...))
} else {
yi <- data.frame(tapply(data[, i], data[, myfactor],..., na.rm = na.rm))
}
names(yi) <- i
yi[, myfactor] <- rownames(yi)
y <- merge(y, yi, by = myfactor)
}
}
return(y)
}
#' mf_taylor: plot a taylor diagram to compare reference (x) and model (y). in: two vectors. out: a figure.
#' @param ref a vector
#' @param model a vector
#' @param add
#' @param col
#' @param pch
#' @param pos.cor
#' @param xlab
#' @param ylab
#' @param main
#' @param show.gamma
#' @param ngamma
#' @param gamma.col
#' @param sd.arcs
#' @param ref.sd
#' @param grad.corr.lines
#' @return a taylor diagram
#' @export
#' @examples
#'
mf_taylor <- function (ref, model, add = FALSE, col = "red", pch = 19, pos.cor = TRUE,
xlab = "", ylab = "", main = "Taylor Diagram", show.gamma = TRUE,
ngamma = 3, gamma.col = 8, sd.arcs = 0, ref.sd = FALSE, grad.corr.lines = c(0.2,
0.4, 0.6, 0.8, 0.9), pcex = 1, normalize = FALSE, mar = c(5,
4, 6, 6), ...)
{
grad.corr.full <- c(0, 0.2, 0.4, 0.6, 0.8, 0.9, 0.95, 0.99,
1)
R <- cor(ref, model, use = "pairwise")
sd.r <- sd(ref)
sd.f <- sd(model)
if (normalize) {
sd.f <- sd.f/sd.r
sd.r <- 1
}
maxsd <- 1.5 * max(sd.f, sd.r)
oldpar <- par("mar", "xpd", "xaxs", "yaxs")
if (!add) {
if (pos.cor) {
if (nchar(ylab) == 0)
ylab = "Standard deviation"
par(mar = mar)
plot(0, xlim = c(0, maxsd), ylim = c(0, maxsd), xaxs = "i",
yaxs = "i", axes = FALSE, main = main, xlab = xlab,
ylab = ylab, type = "n", ...)
if (grad.corr.lines[1]) {
for (gcl in grad.corr.lines) lines(c(0, maxsd *
gcl), c(0, maxsd * sqrt(1 - gcl^2)), lty = 3)
}
segments(c(0, 0), c(0, 0), c(0, maxsd), c(maxsd,
0))
axis.ticks <- pretty(c(0, maxsd))
axis.ticks <- axis.ticks[axis.ticks <= maxsd]
axis(1, at = axis.ticks)
axis(2, at = axis.ticks)
if (sd.arcs[1]) {
if (length(sd.arcs) == 1)
sd.arcs <- axis.ticks
for (sdarc in sd.arcs) {
xcurve <- cos(seq(0, pi/2, by = 0.03)) * sdarc
ycurve <- sin(seq(0, pi/2, by = 0.03)) * sdarc
lines(xcurve, ycurve, col = "blue", lty = 3)
}
}
if (show.gamma[1]) {
if (length(show.gamma) > 1)
gamma <- show.gamma
else gamma <- pretty(c(0, maxsd), n = ngamma)[-1]
if (gamma[length(gamma)] > maxsd)
gamma <- gamma[-length(gamma)]
labelpos <- seq(45, 70, length.out = length(gamma))
for (gindex in 1:length(gamma)) {
xcurve <- cos(seq(0, pi, by = 0.03)) * gamma[gindex] +
sd.r
endcurve <- which(xcurve < 0)
endcurve <- ifelse(length(endcurve), min(endcurve) -
1, 105)
ycurve <- sin(seq(0, pi, by = 0.03)) * gamma[gindex]
maxcurve <- xcurve * xcurve + ycurve * ycurve
startcurve <- which(maxcurve > maxsd * maxsd)
startcurve <- ifelse(length(startcurve), max(startcurve) +
1, 0)
lines(xcurve[startcurve:endcurve], ycurve[startcurve:endcurve],
col = gamma.col)
boxed.labels(xcurve[labelpos[gindex]], ycurve[labelpos[gindex]],
gamma[gindex], border = FALSE)
}
}
xcurve <- cos(seq(0, pi/2, by = 0.01)) * maxsd
ycurve <- sin(seq(0, pi/2, by = 0.01)) * maxsd
lines(xcurve, ycurve)
bigtickangles <- acos(seq(0.1, 0.9, by = 0.1))
medtickangles <- acos(seq(0.05, 0.95, by = 0.1))
smltickangles <- acos(seq(0.91, 0.99, by = 0.01))
segments(cos(bigtickangles) * maxsd, sin(bigtickangles) *
maxsd, cos(bigtickangles) * 0.97 * maxsd, sin(bigtickangles) *
0.97 * maxsd)
par(xpd = TRUE)
if (ref.sd) {
xcurve <- cos(seq(0, pi/2, by = 0.01)) * sd.r
ycurve <- sin(seq(0, pi/2, by = 0.01)) * sd.r
lines(xcurve, ycurve)
}
points(sd.r, 0)
text(cos(bigtickangles) * 1.05 * maxsd,
sin(bigtickangles) * 1.05 * maxsd,
seq(0.1, 0.9, by = 0.1))
text(c(0.95, 0.99) * 1.07 * maxsd,
sin(acos(c(0.95, 0.99))) * 1.05 * maxsd,
c(0.95, 0.99))
text(maxsd * 0.7, maxsd * 0.9, "Correlation", srt = -35, cex = 1.43)
segments(cos(medtickangles) * maxsd, sin(medtickangles) *
maxsd, cos(medtickangles) * 0.98 * maxsd, sin(medtickangles) *
0.98 * maxsd)
segments(cos(smltickangles) * maxsd, sin(smltickangles) *
maxsd, cos(smltickangles) * 0.99 * maxsd, sin(smltickangles) *
0.99 * maxsd)
}
else {
x <- ref
y <- model
R <- cor(x, y, use = "pairwise.complete.obs")
E <- mean(x, na.rm = TRUE) - mean(y, na.rm = TRUE)
xprime <- x - mean(x, na.rm = TRUE)
yprime <- y - mean(y, na.rm = TRUE)
sumofsquares <- (xprime - yprime)^2
Eprime <- sqrt(sum(sumofsquares)/length(complete.cases(x)))
E2 <- E^2 + Eprime^2
if (add == FALSE) {
maxray <- 1.5 * max(sd.f, sd.r)
plot(c(-maxray, maxray), c(0, maxray), type = "n",
asp = 1, bty = "n", xaxt = "n", yaxt = "n",
xlab = xlab, ylab = ylab, main = main)
discrete <- seq(180, 0, by = -1)
listepoints <- NULL
for (i in discrete) {
listepoints <- cbind(listepoints, maxray *
cos(i * pi/180), maxray * sin(i * pi/180))
}
listepoints <- matrix(listepoints, 2, length(listepoints)/2)
listepoints <- t(listepoints)
lines(listepoints[, 1], listepoints[, 2])
lines(c(-maxray, maxray), c(0, 0))
lines(c(0, 0), c(0, maxray))
for (i in grad.corr.lines) {
lines(c(0, maxray * i), c(0, maxray * sqrt(1 -
i^2)), lty = 3)
lines(c(0, -maxray * i), c(0, maxray * sqrt(1 -
i^2)), lty = 3)
}
for (i in grad.corr.full) {
text(1.05 * maxray * i, 1.05 * maxray * sqrt(1 -
i^2), i, cex = 0.6)
text(-1.05 * maxray * i, 1.05 * maxray * sqrt(1 -
i^2), -i, cex = 0.6)
}
seq.sd <- seq.int(0, 2 * maxray, by = (maxray/10))[-1]
for (i in seq.sd) {
xcircle <- sd.r + (cos(discrete * pi/180) *
i)
ycircle <- sin(discrete * pi/180) * i
for (j in 1:length(xcircle)) {
if ((xcircle[j]^2 + ycircle[j]^2) < (maxray^2)) {
points(xcircle[j], ycircle[j], col = "darkgreen",
pch = ".")
if (j == 10)
text(xcircle[j], ycircle[j], signif(i,
2), cex = 0.5, col = "darkgreen")
}
}
}
seq.sd <- seq.int(0, maxray, length.out = 5)
for (i in seq.sd) {
xcircle <- (cos(discrete * pi/180) * i)
ycircle <- sin(discrete * pi/180) * i
if (i)
lines(xcircle, ycircle, lty = 3, col = "blue")
text(min(xcircle), -0.03 * maxray, signif(i,
2), cex = 0.5, col = "blue")
text(max(xcircle), -0.03 * maxray, signif(i,
2), cex = 0.5, col = "blue")
}
text(0, -0.08 * maxray, "Standard Deviation",
cex = 0.7, col = "blue")
text(0, -0.12 * maxray, "Centered RMS Difference",
cex = 0.7, col = "darkgreen")
points(sd.r, 0, pch = 22, bg = "darkgreen", cex = 1.1)
text(0, 1.1 * maxray, "Correlation Coefficient",
cex = 0.7)
}
S <- (2 * (1 + R))/(sd.f + (1/sd.f))^2
}
}
points(sd.f * R, sd.f * sin(acos(R)), pch = pch, col = col,
cex = pcex)
invisible(oldpar)
}
#' Fill time series with NA
#' @param data data frame
#' @param timestamps timestamps of data. can be character, but its format must be specified in 'informat'
#' @param informat format of timestamps
#' @param interval time interval in timestamps
#' @param intervalunit
#' @param starttime starttime in the output data. the same format as 'informat'
#' @param endtime
#' @param outformat
#' @param fillwith
#' @return a dataframe
#' @export
#' @examples
#'
# mf_timefiller <- function(data, # a data frame
# timestamps, # timestamps of data. can be character, but its format must be specified in 'informat'
# informat = '%Y-%m-%d %H:%M:%S', # format of timestamps
# interval = 1800, # time interval in timestamps
# starttime = timestamps[1], # starttime in the output data. the same format as 'informat'
# endtime = timestamps[length(timestamps)],
# outformat = informat)
# {
# datanames <- names(data)
# data <- as.matrix(data)
# starttime <- strptime(starttime, format = informat, tz = 'GMT')
# endtime <- strptime(endtime, format=informat, tz = 'GMT')
# dates.raw <- seq(starttime, by = interval, to = endtime)
# nrow.raw <- length(dates.raw)
#
# x.raw <- matrix(NA, nrow = nrow.raw, ncol = ncol(data))
#
# time.in <- strptime(as.character(timestamps), format = informat)
# index.in <- match(format(dates.raw, format = informat), format(time.in, format = informat), nomatch = 0)
# index.raw <- match(format(time.in, format = informat), format(dates.raw, format = informat), nomatch = 0)
# x.raw[index.raw,] <- data[index.in,]
# zz <- cbind.data.frame(format(dates.raw, format=outformat), x.raw)
# names(zz) <- c('TimeFiller', datanames)
# return(zz)
# }
mf_timefiller <- function(data, # a data frame
timestamps, # timestamps of data. can be character, but its format must be specified in 'informat'
informat = '%Y-%m-%d %H:%M:%S', # format of timestamps
interval = 1800, # time interval in timestamps
intervalunit = 'seconds',
starttime = timestamps[1], # starttime in the output data. the same format as 'informat'
endtime = timestamps[length(timestamps)],
outformat = informat,
fillwith = NA)
{
# if ((informat == '%Y-%m-%d %H:%M:%S' | informat == '%d.%m.%Y %H:%M:%S') & nchar(timestamps[1]) == 16) timestamps <- paste(timestamps, ':00', sep = '')
# if ((informat == '%Y-%m-%d %H:%M:%S' | informat == '%d.%m.%Y %H:%M:%S') & nchar(timestamps[1]) == 10) timestamps <- paste(timestamps, ' 00:00:00', sep = '')
# if (informat == '%Y%m%d%H%M%S' & nchar(timestamps[1]) == 12) timestamps <- paste(timestamps, '00', sep = '')
timestamps <- mf_timestampfull(timestamps, informat)
interval <- interval * switch (intervalunit,
'seconds' = 1,
'minutes' = 60,
'hours' = 3600,
'days' = 3600 * 24
)
datanames <- names(data)
data <- as.matrix(data)
starttime <- strptime(mf_timestampfull(starttime, informat), format = informat, tz = 'GMT')
endtime <- strptime(mf_timestampfull(endtime, informat), format = informat, tz = 'GMT')
dates.raw <- seq(starttime, by = interval, to = endtime)
nrow.raw <- length(dates.raw)
x.raw <- matrix(fillwith, nrow = nrow.raw, ncol = ncol(data))
time.in <- strptime(as.character(timestamps), format = informat, tz = 'GMT')
rpc <- time.in >= starttime & time.in <= endtime
time.in <- time.in[rpc]
# index.in <- match(format(dates.raw, format = informat), format(time.in, format = informat), nomatch = 0)
index.raw <- match(format(time.in, format = informat), format(dates.raw, format = informat), nomatch = 0)
x.raw[index.raw, ] <- data[rpc, ]
# x.raw[index.raw,] <- data[index.in,]
zz <- cbind.data.frame(format(dates.raw, format=outformat), x.raw)
names(zz) <- c('TimeFiller', datanames)
write.csv(zz, 'weather_gf.csv', row.names = TRUE)
zz <- read.csv('weather_gf.csv', stringsAsFactors = FALSE)
file.remove('weather_gf.csv')
return(zz)
}
#' convert time stamps into a full format, i.e.
#' convert 2016-07-14 or 2016-07-14 00:00 into 2016-07-14 00:00:00
#' convert 14.07.2016 or 14.07.2016 00:00 into 14.07.2016 00:00:00
#' convert 14.07.2016 or 14.07.2016 00:00 into 14.07.2016 00:00:00
#' convert 20160714 or 2016071400 or 201607140000 into 20160714000000
#' @param timestamps can be character
#' @param informat
#' @return time stamps
#' @export
#' @examples
#'
mf_timestampfull <- function(timestamps, informat){
if ((informat == '%Y-%m-%d %H:%M:%S' | informat == '%d.%m.%Y %H:%M:%S') & nchar(timestamps[1]) == 16) timestamps <- paste(timestamps, ':00', sep = '')
if ((informat == '%Y-%m-%d %H:%M:%S' | informat == '%d.%m.%Y %H:%M:%S') & nchar(timestamps[1]) == 10) timestamps <- paste(timestamps, ' 00:00:00', sep = '')
if (informat == '%Y%m%d%H%M%S' & nchar(timestamps[1]) == 12) timestamps <- paste(timestamps, '00', sep = '')
if (informat == '%Y%m%d%H%M%S' & nchar(timestamps[1]) == 10) timestamps <- paste(timestamps, '0000', sep = '')
if (informat == '%Y%m%d%H%M%S' & nchar(timestamps[1]) == 8) timestamps <- paste(timestamps, '000000', sep = '')
timestamps
}
#' a friendly version of tapply
#' @param colname
#' @param x
#' @param factor
#' @return a dataframe
#' @export
#' @examples
#'
mf_vtapply <- function(colname = "tapply", x, factor, ...) # x must be a vector
{
y <- data.frame(tapply(x, factor, ..., na.rm = TRUE))
names(y) <- colname
y$rownames <- rownames(y)
y <- y[, c(2,1)]
return(y)
}
#' calculate resultant mean wind spead and resultant mean wind direction.# in: a wind speed vector and a wind direction vector. # out: a mean speed and a mean direction.
#' @param ws
#' @param wd
#' @return resultant mean wind speed
#' @export
#' @examples
#'
mf_windmean <- function(ws, wd){
u <- - ws * sin(2 * pi * wd/360)
v <- - ws * cos(2 * pi * wd/360)
mean.u <- mean(u, na.rm = TRUE)
mean.v <- mean(v, na.rm = TRUE)
mean.wd <- (atan2(mean.u, mean.v) * 360/2/pi) + 180
mean.ws <- ((mean.u^2 + mean.v^2)^0.5)
return(c(mean.ws, mean.wd))
}
#' not recommended! alculate resultant mean wind speed and resultant mean wind direction. especially used in tapply(). this calc is sometimes confusing. better to calc step by step. in: a character vector with wind speed and direction separated with semi colon ';'. out: a chracter vector with resultant mean wind speed and resultant mean direction separated with semi colon ';'.
#' @param wswd
#' @param na.rm
#' @return mean
#' @export
#' @examples
#'
mf_windmean2 <- function(wswd, na.rm = TRUE){
wswd <- as.numeric(unlist(strsplit(wswd, ';')))
ws <- wswd[seq(1, length(wswd), by = 2)]
wd <- wswd[seq(2, length(wswd), by = 2)]
u <- - ws * sin(2 * pi * wd/360)
v <- - ws * cos(2 * pi * wd/360)
mean.u <- mean(u, na.rm = na.rm)
mean.v <- mean(v, na.rm = na.rm)
mean.wd <- (atan2(mean.u, mean.v) * 360/2/pi) + 180
mean.ws <- ((mean.u^2 + mean.v^2)^0.5)
return(paste(mean.ws, mean.wd, sep = ';'))
}
#' calculate u component of wind. in: a vector of wind speed and a vector of wind direction. out: a vector of u
#' @param ws
#' @param wd
#' @return u
#' @export
#' @examples
#'
mf_windu <- function(ws, wd) {- ws * sin(2 * pi * wd/360)}
#' calculate v component of wind. in: a vector of wind speed and a vector of wind direction. out: a vector of v
#' @param ws
#' @param wd
#' @return v
#' @export
#' @examples
#'
mf_windv <- function(ws, wd) {- ws * cos(2 * pi * wd/360)}
#' calculate resultant wind direction from u and v. in: a vector of u and v. out: a vector of wind direction.
#' @param u
#' @param v
#' @return resultant wind direction
#' @export
#' @examples
#'
mf_windd <- function(u, v) {atan2(u, v) * 360/2/pi + 180}
#' convert wind direction out of the range [0, 360) into the range.
#' @param winddir
#' @param ymin
#' @param ymax
#' @return corrected dir
#' @export
#' @examples
#'
mf_winddclear <- function(winddir, ymin = 0, ymax = 360) {
winddclear <- ifelse(winddir < ymin, winddir + 360, ifelse(winddir >= ymax, winddir -360, winddir))
return(winddclear)
}
#' Draw my windrose
#' @param mydata a dataframe
#' @param ws colname of wind speed
#' @param wd colname of wind dir
#' @param mytype
#' @param myceiling lagical, if change the maximum wind speed shown in the legend
#' @return a windrose figure
#' @export
#' @examples
#'
mf_windrose <- function(mydata, ws = 'ws', wd = 'wd', mytype = 'default', myceiling = FALSE) {
if (myceiling) mydata[which.max(mydata[, ws]), ws] <- ceiling(mydata[which.max(mydata[, ws]), ws])
windRose(mydata = mydata, ws = ws, wd = wd, key.position = "right", paddle = FALSE, seg = 0.9, angle = 22.5, ws.int = 0.5, cex = 3, annotate = FALSE, type = mytype)
}
#' save csv file with asking if the file already exists.
#' @param data
#' @param writefile destination file
#' @param row.names logical
#' @return write a file
#' @export
#' @examples
#'
mf_write <- function(data, writefile, row.names = FALSE)
if (file.exists(writefile)){
warning('File exists! New file is not saved!')
} else {
write.csv(data, file = writefile, row.names = row.names)
}
##' Hour rose plot
##'
##' @param mydata
##' @param ws
##' @param wd
##' @param ws2
##' @param wd2
##' @param ws.int
##' @param angle
##' @param type
##' @param cols
##' @param grid.line
##' @param width
##' @param seg
##' @param auto.text
##' @param breaks
##' @param offset
##' @param paddle
##' @param key.header
##' @param key.footer
##' @param key.position
##' @param key
##' @param dig.lab
##' @param statistic
##' @param pollutant
##' @param annotate
##' @param border
##' @param cust_labels
##' @param ...
##'
#' @return a figure
#' @export
#' @examples
#' mf_hourrose(mydata, breaks = seq(0, 24, 1), angle = 15, key.footer = 'votes')
mf_hourrose <- function (mydata, ws = "ws", wd = "wd", ws2 = NA, wd2 = NA, ws.int = 2,
angle = 30, type = "default", cols = "default", grid.line = NULL,
width = 1, seg = 0.9, auto.text = TRUE, breaks = 4, offset = 10,
paddle = FALSE, key.header = NULL, key.footer = "(m/s)", key.position = "right",
key = TRUE, dig.lab = 5, statistic = "prop.count", pollutant = NULL,
cust_labels = c(0, 6, 12, 18),
annotate = TRUE, border = NA, ...)
{
if (is.null(seg))
seg <- 0.9
if (length(cols) == 1 && cols == "greyscale") {
lattice::trellis.par.set(list(strip.background = list(col = "white")))
calm.col <- "black"
}
else {
calm.col <- "forestgreen"
}
current.strip <- lattice::trellis.par.get("strip.background")
on.exit(lattice::trellis.par.set("strip.background", current.strip))
if (360/angle != round(360/angle)) {
warning("In windRose(...):\n angle will produce some spoke overlap",
"\n suggest one of: 5, 6, 8, 9, 10, 12, 15, 30, 45, etc.",
call. = FALSE)
}
if (angle < 3) {
warning("In windRose(...):\n angle too small", "\n enforcing 'angle = 3'",
call. = FALSE)
angle <- 3
}
extra.args <- list(...)
extra.args$xlab <- if ("xlab" %in% names(extra.args))
quickText(extra.args$xlab, auto.text)
else quickText("", auto.text)
extra.args$ylab <- if ("ylab" %in% names(extra.args))
quickText(extra.args$ylab, auto.text)
else quickText("", auto.text)
extra.args$main <- if ("main" %in% names(extra.args))
quickText(extra.args$main, auto.text)
else quickText("", auto.text)
if (is.character(statistic)) {
ok.stat <- c("prop.count", "prop.mean", "abs.count",
"frequency")
if (!is.character(statistic) || !statistic[1] %in% ok.stat) {
warning("In windRose(...):\n statistic unrecognised",
"\n enforcing statistic = 'prop.count'", call. = FALSE)
statistic <- "prop.count"
}
if (statistic == "prop.count") {
stat.fun <- length
stat.unit <- "%"
stat.scale <- "all"
stat.lab <- "Frequency of counts (%)"
stat.fun2 <- function(x) signif(mean(x, na.rm = TRUE),
3)
stat.lab2 <- "mean"
stat.labcalm <- function(x) round(x, 1)
}
if (statistic == "prop.mean") {
stat.fun <- function(x) sum(x, na.rm = TRUE)
stat.unit <- "%"
stat.scale <- "panel"
stat.lab <- "Proportion contribution to the mean (%)"
stat.fun2 <- function(x) signif(mean(x, na.rm = TRUE),
3)
stat.lab2 <- "mean"
stat.labcalm <- function(x) round(x, 1)
}
if (statistic == "abs.count" | statistic == "frequency") {
stat.fun <- length
stat.unit <- ""
stat.scale <- "none"
stat.lab <- "Count"
stat.fun2 <- function(x) round(length(x), 0)
stat.lab2 <- "count"
stat.labcalm <- function(x) round(x, 0)
}
}
if (is.list(statistic)) {
stat.fun <- statistic$fun
stat.unit <- statistic$unit
stat.scale <- statistic$scale
stat.lab <- statistic$lab
stat.fun2 <- statistic$fun2
stat.lab2 <- statistic$lab2
stat.labcalm <- statistic$labcalm
}
vars <- c(wd, ws)
diff <- FALSE
rm.neg <- TRUE
if (!is.na(ws2) & !is.na(wd2)) {
vars <- c(vars, ws2, wd2)
diff <- TRUE
rm.neg <- FALSE
mydata$ws <- mydata[, ws2] - mydata[, ws]
mydata$wd <- mydata[, wd2] - mydata[, wd]
id <- which(mydata$wd < 0)
if (length(id) > 0)
mydata$wd[id] <- mydata$wd[id] + 360
pollutant <- "ws"
key.footer <- "ws"
wd <- "wd"
ws <- "ws"
vars <- c("ws", "wd")
if (missing(angle))
angle <- 10
if (missing(offset))
offset <- 20
if (is.na(breaks[1])) {
max.br <- max(ceiling(abs(c(min(mydata$ws, na.rm = TRUE),
max(mydata$ws, na.rm = TRUE)))))
breaks <- c(-1 * max.br, 0, max.br)
}
if (missing(cols))
cols <- c("lightskyblue", "tomato")
seg <- 1
}
if (any(type %in% openair:::dateTypes))
vars <- c(vars, "date")
if (!is.null(pollutant))
vars <- c(vars, pollutant)
mydata <- openair:::checkPrep(mydata, vars, type, remove.calm = FALSE,
remove.neg = rm.neg)
mydata <- na.omit(mydata)
if (is.null(pollutant))
pollutant <- ws
mydata$x <- mydata[, pollutant]
mydata[, wd] <- angle * ceiling(mydata[, wd]/angle - 0.5)
mydata[, wd][mydata[, wd] == 0] <- 360
mydata[, wd][mydata[, ws] == 0] <- -999
if (length(breaks) == 1)
breaks <- 0:(breaks - 1) * ws.int
if (max(breaks) < max(mydata$x, na.rm = TRUE))
breaks <- c(breaks, max(mydata$x, na.rm = TRUE))
if (min(breaks) > min(mydata$x, na.rm = TRUE))
warning("Some values are below minimum break.")
breaks <- unique(breaks)
mydata$x <- cut(mydata$x, breaks = breaks, include.lowest = FALSE,
dig.lab = dig.lab)
theLabels <- gsub("[(]|[)]|[[]|[]]", "", levels(mydata$x))
theLabels <- gsub("[,]", " to ", theLabels)
prepare.grid <- function(mydata) {
if (all(is.na(mydata$x)))
return()
levels(mydata$x) <- c(paste("x", 1:length(theLabels),
sep = ""))
all <- stat.fun(mydata[, wd])
calm <- mydata[mydata[, wd] == -999, ][, pollutant]
mydata <- mydata[mydata[, wd] != -999, ]
calm <- stat.fun(calm)
weights <- tapply(mydata[, pollutant], list(mydata[,
wd], mydata$x), stat.fun)
if (stat.scale == "all") {
calm <- calm/all
weights <- weights/all
}
if (stat.scale == "panel") {
temp <- stat.fun(stat.fun(weights)) + calm
calm <- calm/temp
weights <- weights/temp
}
weights[is.na(weights)] <- 0
weights <- t(apply(weights, 1, cumsum))
if (stat.scale == "all" | stat.scale == "panel") {
weights <- weights * 100
calm <- calm * 100
}
panel.fun <- stat.fun2(mydata[, pollutant])
u <- mean(sin(2 * pi * mydata[, wd]/360))
v <- mean(cos(2 * pi * mydata[, wd]/360))
mean.wd <- atan2(u, v) * 360/2/pi
if (all(is.na(mean.wd))) {
mean.wd <- NA
}
else {
if (mean.wd < 0)
mean.wd <- mean.wd + 360
if (mean.wd > 180)
mean.wd <- mean.wd - 360
}
weights <- cbind(data.frame(weights), wd = as.numeric(row.names(weights)),
calm = calm, panel.fun = panel.fun, mean.wd = mean.wd)
weights
}
if (paddle) {
poly <- function(wd, len1, len2, width, colour, x.off = 0,
y.off = 0) {
theta <- wd * pi/180
len1 <- len1 + off.set
len2 <- len2 + off.set
x1 <- len1 * sin(theta) - width * cos(theta) + x.off
x2 <- len1 * sin(theta) + width * cos(theta) + x.off
x3 <- len2 * sin(theta) - width * cos(theta) + x.off
x4 <- len2 * sin(theta) + width * cos(theta) + x.off
y1 <- len1 * cos(theta) + width * sin(theta) + y.off
y2 <- len1 * cos(theta) - width * sin(theta) + y.off
y3 <- len2 * cos(theta) + width * sin(theta) + y.off
y4 <- len2 * cos(theta) - width * sin(theta) + y.off
lpolygon(c(x1, x2, x4, x3), c(y1, y2, y4, y3), col = colour,
border = border)
}
}
else {
poly <- function(wd, len1, len2, width, colour, x.off = 0,
y.off = 0) {
len1 <- len1 + off.set
len2 <- len2 + off.set
theta <- seq((wd - seg * angle/2), (wd + seg * angle/2),
length.out = (angle - 2) * 10)
theta <- ifelse(theta < 1, 360 - theta, theta)
theta <- theta * pi/180
x1 <- len1 * sin(theta) + x.off
x2 <- rev(len2 * sin(theta) + x.off)
y1 <- len1 * cos(theta) + x.off
y2 <- rev(len2 * cos(theta) + x.off)
lpolygon(c(x1, x2), c(y1, y2), col = colour, border = border)
}
}
mydata <- cutData(mydata, type, ...)
results.grid <- plyr::ddply(mydata, type, prepare.grid)
results.grid$calm <- stat.labcalm(results.grid$calm)
results.grid$mean.wd <- stat.labcalm(results.grid$mean.wd)
strip.dat <- openair:::strip.fun(results.grid, type, auto.text)
strip <- strip.dat[[1]]
strip.left <- strip.dat[[2]]
pol.name <- strip.dat[[3]]
if (length(theLabels) < length(cols)) {
col <- cols[1:length(theLabels)]
}
else {
col <- openColours(cols, length(theLabels))
}
max.freq <- max(results.grid[, (length(type) + 1):(length(theLabels) +
length(type))], na.rm = TRUE)
off.set <- max.freq * (offset/100)
box.widths <- seq(0.002^0.25, 0.016^0.25, length.out = length(theLabels))^4
box.widths <- box.widths * max.freq * angle/5
legend <- list(col = col, space = key.position, auto.text = auto.text,
labels = theLabels, footer = key.footer, header = key.header,
height = 0.6, width = 1.5, fit = "scale", plot.style = if (paddle) "paddle" else "other")
legend <- openair:::makeOpenKeyLegend(key, legend, "windRose")
temp <- paste(type, collapse = "+")
myform <- formula(paste("x1 ~ wd | ", temp, sep = ""))
mymax <- 2 * max.freq
myby <- if (is.null(grid.line))
pretty(c(0, mymax), 10)[2]
else grid.line
if (myby/mymax > 0.9)
myby <- mymax * 0.9
xyplot.args <- list(x = myform, xlim = 1.03 * c(-max.freq -
off.set, max.freq + off.set), ylim = 1.03 * c(-max.freq -
off.set, max.freq + off.set), data = results.grid, type = "n",
sub = stat.lab, strip = strip, strip.left = strip.left,
as.table = TRUE, aspect = 1, par.strip.text = list(cex = 0.8),
scales = list(draw = FALSE), panel = function(x, y, subscripts,
...) {
panel.xyplot(x, y, ...)
angles <- seq(0, 2 * pi, length = 360)
sapply(seq(off.set, mymax, by = myby), function(x) llines(x *
sin(angles), x * cos(angles), col = "grey85",
lwd = 1))
subdata <- results.grid[subscripts, ]
upper <- max.freq + off.set
# larrows(-upper, 0, upper, 0, code = 3, length = 0.1)
# larrows(0, -upper, 0, upper, code = 3, length = 0.1)
# ltext(upper * -1 * 0.95, 0.07 * upper, "18", cex = 0.7)
# ltext(0.07 * upper, upper * -1 * 0.95, "12", cex = 0.7)
# ltext(0.07 * upper, upper * 0.95, "0", cex = 0.7)
# ltext(upper * 0.95, 0.07 * upper, "6", cex = 0.7)
larrows(-upper * 0.9, 0, upper * 0.9, 0, code = 3, length = 0)
larrows(0, -upper * 0.9, 0, upper * 0.9, code = 3, length = 0)
ltext(0 * upper, upper * 0.95, cust_labels[1], cex = 1)
ltext(upper * 0.95, 0 * upper, cust_labels[2], cex = 1)
ltext(0 * upper, upper * -1 * 0.95, cust_labels[3], cex = 1)
ltext(upper * -1 * 0.95, 0 * upper, cust_labels[4], cex = 1)
if (nrow(subdata) > 0) {
for (i in 1:nrow(subdata)) {
with(subdata, {
for (j in 1:length(theLabels)) {
if (j == 1) {
temp <- "poly(wd[i], 0, x1[i], width * box.widths[1], col[1])"
} else {
temp <- paste("poly(wd[i], x", j - 1,
"[i], x", j, "[i], width * box.widths[",
j, "], col[", j, "])", sep = "")
}
eval(parse(text = temp))
}
})
}
}
ltext(seq((myby + off.set), mymax, myby) * sin(pi/4),
seq((myby + off.set), mymax, myby) * cos(pi/4),
paste(seq(myby, mymax, by = myby), stat.unit,
sep = ""), cex = 0.7)
if (annotate) if (statistic != "prop.mean") {
if (!diff) {
ltext(max.freq + off.set, -max.freq - off.set,
label = paste(stat.lab2, " = ", subdata$panel.fun[1],
"\ncalm = ", subdata$calm[1], stat.unit,
sep = ""), adj = c(1, 0), cex = 0.7, col = calm.col)
}
if (diff) {
ltext(max.freq + off.set, -max.freq - off.set,
label = paste("mean ws = ", round(subdata$panel.fun[1],
1), "\nmean wd = ", round(subdata$mean.wd[1],
1), sep = ""), adj = c(1, 0), cex = 0.7,
col = calm.col)
}
} else {
ltext(max.freq + off.set, -max.freq - off.set,
label = paste(stat.lab2, " = ", subdata$panel.fun[1],
stat.unit, sep = ""), adj = c(1, 0), cex = 0.7,
col = calm.col)
}
}, legend = legend)
xyplot.args <- openair:::listUpdate(xyplot.args, extra.args)
plt <- do.call(xyplot, xyplot.args)
if (length(type) == 1)
plot(plt)
else plot(useOuterStrips(plt, strip = strip, strip.left = strip.left))
newdata <- results.grid
output <- list(plot = plt, data = newdata, call = match.call())
class(output) <- "openair"
invisible(output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.