#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# author: Reza Hosseini
library("RColorBrewer")
## color functions with transparency
ColAlpha = function(colors, alpha=0.5) {
r = col2rgb(colors, alpha=TRUE)
r[4, ] = alpha * 255
r = r / 255.0
return(rgb(r[1, ], r[2, ], r[3, ], r[4, ]))
}
## compare the distribution of two variables
# making sure that the data are cut in the same way
DichomHist = function(
x,
pltTitle="",
xlab="",
signif=2,
step=0.1,
labelCol=ColAlpha("red", 0.7),
srt=75) {
x = signif(x, signif)
x = na.omit(x)
x = sort(x)
cutoffs = c(min(x), quantile(x, probs=seq(step, 1-step, step)), max(x))
cutoffs = unique(cutoffs)
x1 = cut(x, cutoffs, include.lowest=TRUE)
labels = unique(x1)
k = length(labels)
eps = 0.1
maxVal = max(as.numeric(100*table(x1) / sum(table(x1))))
par(mar=c(10.1, 6.1, 5.1, 5.1))
plot(
1:k,
as.numeric(100*table(x1) / sum(table(x1))),
col=ColAlpha("blue", 0.6),
type="h",
lwd=20,
xaxt="n",
main=pltTitle,
xlab="",
ylab="Frequency (%)",
xlim=c(1, k),
ylim=c(0, maxVal + 2),
cex.lab=1.5)
#axis(2, cex.axis=1.2)
axis(1, at=1:k, labels=FALSE, cex.axis=1.2)
text(
1:k, par("usr")[3]-1, labels=labels, srt=srt, pos=1,
xpd=TRUE, cex=1.5, col=labelCol)
title(xlab=xlab, line=9, cex.lab=1.6, family="Calibri Light")
}
TestDichomHist = function() {
x = c(rep(0, 100), 10:100, 1000:1200)
DichomHist(x, step=0.05, labelCol="red")
}
## makes gg box plots which zoom in and get rid of extreme outliers
ZoomedBoxP = function(df, x, y, fill) {
p = ggplot(df, aes_string(x=x, y=y, fill=fill)) +
geom_boxplot()
if ("data.table" %in% class(df)) {
values = eval(parse(text = paste0("values = df[ ,", y, "]")))
} else {
values = df[ , y]
}
ylim = boxplot.stats(values)[["stats"]][c(1, 5)]
# scale y limits based on ylim
p = p + coord_cartesian(ylim=ylim*1.05)
return(p)
}
TestZoomedBoxP = function() {
m = 100
x = sample(c("cat", "dog", "horse"), m, replace=TRUE)
y = rnorm(m)
fill = sample(c("me", "myself", "dude"), m, replace=TRUE)
df = data.frame("x"=x, "y"=y, "fill"=fill)
ZoomedBoxP(df=df, x="x", y="y", fill="fill")
ZoomedBoxP(df=data.table(df), x="x", y="y", fill="fill")
}
## compare the distribution of two conti variables
# making sure that the data are cut in the same way
PltCompare_contiDist = function(
x,
y,
pltTitle="",
xlab="",
varNames=c("var1", "var2"),
signif=2,
step=0.1) {
x = signif(x, signif)
y = signif(y, signif)
z = na.omit(c(x, y))
z = sort(z)
cutoffs = c(min(z), quantile(z, probs=seq(step, 1-step, step)), max(z))
cutoffs = unique(cutoffs)
x = sort(x)
y = sort(y)
x1 = cut(x, cutoffs, include.lowest=TRUE)
y1 = cut(y, cutoffs, include.lowest=TRUE)
z1 = cut(z, cutoffs, include.lowest=TRUE)
labels = unique(z1)
k = length(labels)
eps = 0.1
maxVal = max(
as.numeric(100*table(x1) / sum(table(x1))),
as.numeric(100*table(y1) / sum(table(y1))))
plot(
1:k,
as.numeric(100*table(x1) / sum(table(x1))),
col=ColAlpha("blue", 0.6),
type="h",
lwd=10,
xaxt="n",
main=pltTitle,
xlab=xlab,
ylab="Frequency (%)",
xlim=c(1, k),
ylim=c(0, maxVal + 2),
cex.lab=1.5)
axis(2, cex.axis=1.2)
lines(
(1:k) + eps,
as.numeric(100*table(y1) / sum(table(y1))),
col=ColAlpha("red", 0.6),
lwd=10,
type="h")
axis(1, at=1:k, labels=labels, cex.axis=1.2)
legend(
"topright", inset=c(0, 0), legend=varNames, lwd=c(8, 8),
col=ColAlpha(c("blue", "red"), 0.5), title="", cex=1.3, pt.cex=1)
}
TestPltCompare_contiDist = function() {
x = runif(100, 1, 2)
y = runif(100, 1.5, 2.5)
PltCompare_contiDist(1:10, 1:11)
}
## compare categ distbn for labelCol across compareCol
# propThresh is to limit number of categories shows
PltCompare_categDist = function(
df,
labelCol,
compareCol,
propThresh=NULL,
angle=30) {
dt = data.table(df)
freqDt = dt[ , .(num=.N), by=c(labelCol, compareCol)]
freqDt2 = freqDt[ , .(total_num=sum(num)), by=compareCol]
freqDt3 = merge(freqDt, freqDt2, by=compareCol, all.x=TRUE, all=FALSE)
freqDt3[ , "prop"] = round(100*freqDt3[ , num] / freqDt3[ , total_num], 2)
freqDt3[ , maxprop := max(prop), by=labelCol]
if (!is.null(propThresh)) {
freqDt3 = freqDt3[maxprop > propThresh, ]
}
p = ggplot(
data.frame(freqDt3),
aes_string(x=labelCol, y="prop", fill=compareCol)) +
geom_bar(stat="identity", width=.5, position="dodge") +
ylab("freq(%)") +
xlab(labelCol) +
guides(fill=guide_legend(title=compareCol)) +
theme(
text=element_text(size=16),
axis.text.x=element_text(angle=angle, hjust=1))
return(list("p"=p, "propDt"=freqDt3))
}
TestPltCompare_categDist = function() {
n = 100
m = 90
df = data.frame(
country=sample(c("IR", "US"), n, replace=TRUE),
animal=sample(paste0("a", 1:m), n, replace=TRUE))
res = PltCompare_categDist(df, labelCol="animal", compareCol="country")
res[["p"]]
res = PltCompare_categDist(
df,
labelCol="animal",
compareCol="country",
propThresh=5)
res[["p"]]
res[["propDt"]]
}
PltCompare_categDist_stacked = function(
df,
xCol,
fillCol,
pltTitle=NULL) {
dt = data.table(df)
freqDt = dt[ , .(num=.N), by=c(xCol, fillCol)]
freqDt2 = freqDt[ , .(total_num=sum(num)), by=xCol]
freqDt3 = merge(freqDt, freqDt2, by=xCol, all.x=TRUE, all=FALSE)
freqDt3[ , "prop"] = round(100*freqDt3[ , num] / freqDt3[ , total_num], 2)
if (is.null(pltTitle)) {
pltTitle = paste(
"cond. dtbn. of ",
fillCol,
"\n for each val. of",
xCol)
}
p = StackPlt(
meltDf=data.frame(freqDt3),
xCol=xCol,
yCol="prop",
fill=fillCol,
pltTitle=pltTitle)
return(list("p"=p, "propDt"=freqDt3))
}
# plots a bivariate categorical variable
PltStack_bivarCateg = function(df, xCol, fillCol, pltTitle=NULL) {
dt = data.table(df)
freqDt = dt[ , .(num=.N), by=c(xCol, fillCol)]
freqDt[ , "prop"] = 100 * freqDt[ , num] / sum(freqDt[ , num])
if (is.null(pltTitle)) {
pltTitle = paste("prop wrt", xCol, "and", fillCol)
}
p = StackPlt(
meltDf=data.frame(freqDt),
xCol=xCol,
yCol="prop",
fill=fillCol,
pltTitle=pltTitle)
return(list("p"=p, "freqDt"=freqDt))
}
## makes a histogram for d, also adds the mean and median lines for d
Plt_compareDiffWithZero = function(
d, pltTitle="", xlab="", varNames=c("var1", "var2")) {
med = median(d, na.rm=TRUE)
m = mean(d, na.rm=TRUE)
hist(
d,
col=ColAlpha("red", 0.5),
main=paste0(pltTitle,": ", varNames[1], " - ", varNames[2]),
xlab="Diff",
probability=TRUE)
abline(v=med, lty=2, lwd=3, col=ColAlpha("blue", 0.5))
abline(v=m, lty=2, lwd=3, col=ColAlpha("green", 0.5))
legend("topright", legend=c("median", "mean"), lty=c(1, 1), lwd=c(5, 5),
col=ColAlpha(c("blue","green"), 0.5), cex=0.8)
}
TestPlt_compareDiffWithZero = function() {
d = rnorm(100)
Plt_compareDiffWithZero(d)
}
## comparing a valueCol boxplots across categories
Plt_compareBoxPlot = function(df, compareCol, valueCol, pltTitle="") {
p = (
ggplot(df, aes_string(compareCol, valueCol, fill=compareCol)) +
geom_boxplot() + labs(title=compareCol) + xlab(compareCol) +
ylab(valueCol) + ggtitle(pltTitle))
return(p)
}
Plt_compareDensity = function(
df, compareCol, valueCol, addMeans=TRUE, pltTitle="") {
df = df[ , c(compareCol, valueCol)]
meanDf = data.frame(
data.table(df)[ , lapply(.SD, mean, na.rm=TRUE), by=compareCol])
colnames(meanDf) = c(compareCol, valueCol)
print(meanDf)
p = (
ggplot(df, aes_string(x=valueCol, fill=compareCol, color=compareCol))
+ geom_density(alpha=0.3, size=2)
+ labs(title=compareCol)
+ xlab(compareCol)
+ ylab(valueCol)
+ ggtitle(pltTitle))
if (addMeans) {
p = p + geom_vline(
data=meanDf,
aes_string(xintercept=valueCol, color=compareCol),
linetype="dashed",
size=1, alpha=0.5)
}
return(p)
}
## plotting multiple plots in ggplots
Multiplot = function(pltList=NULL, ncol=NULL) {
# Make a list from the ... arguments and pltList
numPlots = length(pltList)
# Make the panel
# Number of columns of plots
if (is.null(ncol)) {
ncol = ceiling(sqrt(numPlots))
}
do.call(what=function(...) {grid.arrange(..., ncol=ncol)}, pltList)
#pltCols = cols
#pltRows = ceiling(numPlots / pltCols)
# Set up the page
#grid.newpage()
#pushViewport(viewport(layout=grid.layout(pltRows, pltCols)))
#vplayout = function(x, y) {
# viewport(layout.pos.row=x, layout.pos.col=y)
#}
# Make each plot, in the correct location
#for (i in 1:numPlots) {
# curRow = ceiling(i / pltCols)
# curCol = (i-1) %% pltCols + 1
# print(plots[[i]], vp=vplayout(curRow, curCol), newpage=FALSE)
#}
}
## saving multiple plots using ggsave in one page
GgsaveMulti = function(
fn,
pltList,
ncol=NULL,
Device=function(...)Cairo::CairoPNG(..., units="in", dpi=120),
width=6,
height=6) {
if (is.null(ncol)) {
ncol = round(sqrt(length(pltList)))
}
grd = do.call(gridExtra::arrangeGrob, c(pltList, ncol=ncol))
ggplot2::ggsave(
fn,
grd,
width=width,
height=height,
device=Device)
dev.off()
close(fn)
}
TestGgsaveMulti = function() {
plt1 = ggplot2::ggplot(iris, aes(Species)) +
ggplot2::geom_bar()
plt2 = ggplot2::ggplot(iris, aes(Sepal.Width, Sepal.Length)) +
ggplot2::geom_point()
pltList = list(plt1, plt2)
fn = "test.png"
GgsaveMulti(
fn=file(fn, "w"),
pltList=pltList
)
}
## it plots multiple lines in the same plot
# xCols are the x-axis columns
# yCols are the y-axis columns
# if x-axis variable are the same for all lines, xCols can be passed as one var
# if y-axis variable are the same for all lines, yCols can be passed as one var
Plt_dfColsLines = function(
df,
xCols,
yCols,
ylim=NULL,
xlim=NULL,
xlab=NULL,
ylab=NULL,
main="",
legend=NULL,
legPos="topleft",
lwd=2.5,
cex=1.5,
cex.main=1.5,
cex.axis=1.6,
cex.lab=1.5,
cex.legend=1.5,
varLty=FALSE,
sizeAlpha=1) {
n_x = length(xCols)
n_y = length(yCols)
k = max(n_x, n_y)
if (n_x != n_y && n_x != 1 && n_y != 1) {
warnings(
"length of the xCols, yCols is not the same. Also none is 1.")
return(NULL)
}
if (n_x == 1) {
xCols = rep(xCols, k)
}
if (n_y == 1) {
yCols = rep(yCols, k)
}
if (is.null(ylab)) {
ylab = paste(unique(yCols), collapse=";")
}
if (is.null(xlab)) {
xlab = paste(unique(xCols), collapse=";")
}
if (is.null(legend)) {
legend = yCols
}
if (is.null(ylim)) {
yMax = max(df[ , yCols], na.rm=TRUE)
yMin = min(df[ , yCols], na.rm=TRUE)
yMax = yMax + (yMax - yMin) / 2
ylim = c(yMin, yMax)
}
if (is.null(xlim)) {
xMax = max(df[ , xCols], na.rm=TRUE)
xMin = min(df[ , xCols], na.rm=TRUE)
xlim = c(xMin, xMax)
}
colors = rainbow(k)
ltyVec = rep(1, k)
if (varLty) {
ltyVec = 1:k
}
plot(
df[ , xCols[1]],
df[ , yCols[1]],
ylim=ylim,
xlim=xlim,
type="l",
col=ColAlpha(colors[1], 0.75),
ylab=ylab,
xlab=xlab,
lwd=lwd*sizeAlpha,
main=main,
cex.main=cex.main*sizeAlpha,
cex=cex*sizeAlpha,
cex.axis=cex.axis*sizeAlpha,
cex.lab=cex.lab*sizeAlpha,
lty=1)
if (k > 1) {
for (i in 2:k) {
lines(
df[ , xCols[i]], df[ , yCols[i]], col=ColAlpha(colors[i], 0.75),
lwd=lwd, lty=ltyVec[i])
}
}
legend(
legPos, legend=legend, col=colors, cex=cex.legend*sizeAlpha,
lty=ltyVec, lwd=lwd*sizeAlpha)
}
## plot bands given a lower column and an upper column: usiful for CIs
PltBands = function(
x,
yLower,
yUpper,
col=ColAlpha("grey", 0.5),
border=NA,
angle=NULL,
density=NULL,
lwd=3) {
polygon(
c(rev(x), x), c(rev(yLower), yUpper), col=col,
border=NA, angle=angle, density=density, lwd=lwd)
}
TestPltBands = function() {
plot(-20:20, -20:20)
x = sort(rnorm(100))
yLower = x - 6 + rnorm(100)
yUpper = x + 6 + rnorm(100)
polygon(
c(rev(x), x), c(rev(yLower), yUpper),
col=ColAlpha("blue", 0.5), border=NA,
lwd=2, angle=45, density=20)
legend(
"top", legend=1, ncol=1, fill=TRUE, col=1, angle=45, density=20)
}
## it compares the CI from different methods (groups)
# across xCol (e.g. sample size)
# the data is given in long format i.e the groups data are stacked
Plt_compareCiGroups = function(
df,
xCol,
lowerCol,
upperCol,
compareCol,
compareValues=NULL,
xlab=NULL,
ylab="",
main="",
lwd=3,
addMidPoint=TRUE) {
yMin = min(df[ , c(lowerCol, upperCol)], na.rm=TRUE)
yMax = max(df[ , c(lowerCol, upperCol)], na.rm=TRUE)
yMax = yMax + (yMax - yMin) / 3
xMin = min(df[ , xCol], na.rm=TRUE)
xMax = max(df[ , xCol], na.rm=TRUE)
if (is.null(xlab)) {
xlab = xCol
}
plot(
x=c(xMin, xMax),
y=c(yMin, yMax),
type="n",
xlab=xlab,
ylab=ylab,
main=main,
cex.main=1.5,
cex.axis=1.2,
cex.lab=1.2)
if (is.null(compareValues)) {
compareValues = unique(df[ , compareCol])
}
k = length(compareValues)
angles = (1:k)*180 / (k + 1)
cols = rainbow(k)
for (i in 1:k) {
value = compareValues[i]
df0 = df[df[ , compareCol] == value, ]
PltBands(
x=df0[ , xCol],
yLower=df0[ , lowerCol],
yUpper=df0[ , upperCol],
col=ColAlpha(cols[i], 0.3),
angle=angles[i],
density=60,
lwd=lwd)
}
legend(
"top",
legend=compareValues,
col=cols,
lwd=lwd,
bty="n",
angle=angles,
density=60,
cex=1.2)
dt = data.table(df)
aggDt = DtSimpleAgg(
dt=dt,
valueCols=c(lowerCol, upperCol),
gbCols=compareCol,
AggFunc=mean)
aggDt[ , "midPoint"] = (aggDt[ , get(lowerCol)] + aggDt[ , get(upperCol)]) / 2
if (addMidPoint) {
for (i in 1:k) {
value = compareValues[i]
df0 = df[df[ , compareCol] == value, ]
midPoint = aggDt[(aggDt[ , get(compareCol)] == value), midPoint]
x = df0[ , xCol]
points(
x=x, y=rep(midPoint, length(x)), col=ColAlpha(cols[i], 0.5),
pch=10, cex=0.3)
}
}
}
TestPlt_compareCiGroups = function() {
x = seq(0, 1, 0.01)
yLower = sin(2 * pi * x) - 0.1
yUpper = sin(2 * pi * x) + 0.1
group = "1"
df1 = data.frame(x, yLower, yUpper, group)
yLower = cos(2 * pi * x) - 0.1
yUpper = cos(2 * pi * x) + 0.1
group = "2"
df2 = data.frame(x, yLower, yUpper, group)
df = rbind(df1, df2)
Plt_compareCiGroups(
df=df, xCol="x", lowerCol="yLower", upperCol="yUpper",
compareCol="group", lwd=3)
}
## plots selected columns of df (cols) as side by side bars in a plot,
# it uses different color per col
Plt_barsSideBySide = function(
df, cols=NULL, legendPosition="topright",
xLabels=NULL, xlim=NULL, ylim=NULL, xlab=NULL, ylab=NULL,
legendLabels=NULL, colors=NULL, xpd=FALSE) {
if (is.null(cols)) {
cols = colnames(df)
}
df = df[ , cols, drop=FALSE]
n = dim(df)[2]
if (is.null(colors)) {
colors = rainbow(n)
}
if (is.null(legendLabels)) {
legendLabels = cols
}
if (is.null(ylim)) {
ylim = c(min(df, na.rm=TRUE), max(df, na.rm=TRUE))
}
if (is.null(xlab)) {
xlab = ""
}
if (is.null(ylab)) {
ylab = ""
}
par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=xpd)
eps = 3/(4*n)
lwd = 100/n
x = df[ , 1]
if (is.null(xlim)) {
xlim = c(1, length(x) + 1)
}
par(font.axis = 2)
plot(
1:length(x), x, col=ColAlpha(colors[1], 0.5),
xlim=xlim, ylim=ylim, type="h", lwd=lwd, xlab=xlab,
ylab=ylab, xaxt = "n")
if (is.null(xLabels)) {
xLabels = 1:length(x)
}
for (i in 2:n) {
y = df[ , i]
lines(
1:length(y) + (i-1)*eps, y, col=ColAlpha(colors[i], 0.5) ,
xlim=xlim, ylim=ylim, type="h", lwd=lwd, xlab=xlab, ylab=ylab)
}
legend(
legendPosition, inset=rep(0, n), legend=legendLabels,
lwd=rep(80/(n), n), col=ColAlpha(colors, 0.5), title="")
axis(1, at=1:length(x), labels=xLabels, las=2, cex.axis=0.95)
}
TestPlt_barsSideBySide = function() {
df0 = data.frame("x"=1:10, "y"=2:11)
Plt_barsSideBySide(df0, legendPosition="topleft")
}
## plots resp wrt wrtCol
## while it slices wrt groupCol
# if there are multilple values for each c(wrtCol, groupCol), it aggregates
Plt_wrtGroup = function(
df,
resp,
wrtCol,
groupCol,
AggF=function(x){mean(x, na.rm=TRUE)},
group=NULL,
main="",
ColPattern=rainbow,
lwd=4,
lty=NULL,
xlab=NULL,
ylab=NULL,
alpha=0.5,
gridAlpha=0.2,
gridLwd=2) {
df2 = df[ , c(resp, wrtCol, groupCol)]
formulaText = paste0(resp, '~', wrtCol, '+', groupCol)
formula = as.formula(formulaText)
dfAgg = aggregate(formula, data=df2, FUN=AggF)
if (is.null(group)) {
group = unique(df[ , groupCol])
}
yMax = max(dfAgg[ , resp], na.rm=TRUE)
yMin = min(dfAgg[ , resp], na.rm=TRUE)
delta = (yMax - yMin)/5
yMax = yMax + delta
yMin = yMin - delta
colors = rainbow(length(group))
if (is.null(lty)) {
lty = rep(1, length(group))
}
if (is.null(xlab)) {
xlab = wrtCol
}
if (is.null(ylab)) {
ylab = resp
}
#c(bottom, left, top, right)
par(mar=c(5.1, 4.1, 4.1, 14.1), xpd=TRUE)
par(font=2)
for (i in 1:length(group)) {
g = group[i]
dfAgg2 = dfAgg[dfAgg[ , groupCol] == g, ]
if (i == 1) {
plot(
dfAgg2[ , wrtCol], dfAgg2[ , resp],
col=ColAlpha(colors[i], alpha), main=main,
ylim=c(yMin, yMax), ylab=resp, xlab=wrtCol, type='l', cex.main=2,
lwd=lwd, lty=lty[i], cex.lab=1.5, cex.axis=1.5, font.lab=2)
} else {
lines(
dfAgg2[ , wrtCol],
dfAgg2[ , resp],
col=ColAlpha(colors[i], alpha),
lwd=lwd, lty=lty[i])
}
}
legend("topright", inset=c(-0.2, 0), legend=group,
lwd=rep(lwd, length(group)), title=groupCol,
col=ColAlpha(colors, alpha), cex=1, lty=lty)
par(xpd=FALSE)
grid(nx=NULL, ny=NULL, col=ColAlpha('black', gridAlpha), lwd=gridLwd)
}
## create stackplot with ggplot
StackPlt = function(
meltDf,
xCol,
yCol,
fill,
xlab=NULL,
ylab=NULL,
labelCol=NULL,
xAxisRotation=0,
manualPalette=TRUE,
colors=NULL,
palette="Paired",
legendTitle="variable",
pltTitle=NULL,
ylim=NULL, fontSizeAlpha=1) {
if (is.null(pltTitle)) {
pltTitle = paste(yCol, "wrt", xCol, "partitioned by", fill)
}
if (is.null(xlab)) {
xlab = xCol
}
if (is.null(ylab)) {
ylab=ylab
}
p = ggplot(meltDf, aes_string(x=xCol, y=yCol, fill=fill)) +
xlab(xlab) + ylab(ylab) +
geom_bar(stat="identity", colour=ColAlpha("black", 0.3), lwd=0.5) +
ggtitle(pltTitle) +
scale_y_continuous(limits=ylim) +
guides(
fill=guide_legend(override.aes=list(colour=ColAlpha("black", 0.5))),
lwd=1) +
guides(colour=FALSE) +
theme(
axis.text.x=element_text(
angle=xAxisRotation,
hjust=0.5, face="bold", size=14*fontSizeAlpha)) +
theme(
axis.text=element_text(
size=14*fontSizeAlpha, face="bold"),
axis.title=element_text(size=14*fontSizeAlpha, face="bold")) +
theme(plot.title=element_text(
face="bold", size=16*fontSizeAlpha, hjust=0)) +
guides(
fill=guide_legend(
keywidth=(2.5)*fontSizeAlpha, keyheight=(2.5)*fontSizeAlpha)) +
theme(legend.text=element_text(size=12*fontSizeAlpha)) +
guides(fill=guide_legend(title=legendTitle)) +
guides(colour=guide_legend(title.hjust=2*fontSizeAlpha)) +
guides(colour=guide_legend(override.aes=list(size=20*fontSizeAlpha)))
if (manualPalette) {
if (is.null(colors)) {
n = length(unique(meltDf[ , fill]))
colors = GenColors2(n)
}
p = p + scale_color_manual(values=colors)
} else {
p = p + scale_fill_brewer(palette=palette)
}
if (!is.null(labelCol)) {
p = p + geom_text(aes_string(
label=labelCol),
position=position_stack(vjust=0.5),
size=5*fontSizeAlpha)
}
p = p + theme(
text=element_text(size=16),
axis.text.x=element_text(angle=30, hjust=1))
return(p)
}
## creates level plots
LevPlot = function(
df,
pltTitle="",
at=seq(-1, 1, 0.01),
colRange=c("yellow", "blue")) {
rgb.palette = colorRampPalette(colRange, space = "rgb")
p = levelplot(
df, main=pltTitle, xlab="", ylab="",
col.regions=rgb.palette(length(at)), cuts=length(at), at=at,
scales=list(x=list(rot=90)))
return(p)
}
## level plot with ggplot library
LevPlotGg = function(
df,
pltTitle="",
limits=NULL,
colRange=c("yellow", "grey", "blue"),
xlab=NULL,
ylab=NULL,
xLabelsAngel=90,
fontSizeAlpha=1,
fontFace="plain") {
meltedDf = melt(df)
#myPalette = colorRampPalette(rev(RColorBrewer::brewer.pal(11, "Spectral")))
#sc = ggplot2::scale_fill_gradientn(colors = myPalette(100), limits=c(-1, 1))
#sc = ggplot2::scale_fill_gradientn(colors = myPalette(100), low="yellow", high="blue", limits=c(-1, 1))
#sc = ggplot2::scale_fill_distiller(palette = "Spectral", low=-1, high=1)
#sc = ggplot2::scale_colour_gradient(limits=c(-1, 1), low="red", high="white")
#sc = ggplot2::scale_fill_distiller(palette = palette)
if (is.null(limits)) {
limits = c(
min(meltedDf[ , "value"], na.rm=TRUE),
max(meltedDf[ , "value"], na.rm=TRUE))
}
sc = ggplot2::scale_fill_gradientn(limits=limits, colours=colRange)
p = (
ggplot(data=meltedDf, aes(x=Var1, y=Var2, fill=value)) +
geom_tile(stat="identity", colour=ColAlpha("black", 0.2), lwd=0.4) +
ggtitle(pltTitle) +
theme(axis.text.x=element_text(angle=xLabelsAngel, hjust=1, vjust=1)) +
sc +
theme(
axis.text=element_text(size=18*fontSizeAlpha, face=fontFace),
axis.title=element_text(size=18*fontSizeAlpha, face=fontFace)) +
theme(
plot.title=element_text(
face=fontFace, size=20*fontSizeAlpha, hjust=0)) +
guides(colour=FALSE) +
theme(legend.text=element_text(size=15*fontSizeAlpha)))
#theme(panel.background = element_rect(colour = "black")) +
#guides(fill = guide_legend(override.aes = list(colour = ColAlpha("black", 0.5)))) +
if (!is.null(xlab)) {
p = p + labs(x = xlab)
}
if (!is.null(ylab)) {
p = p + labs(y = ylab)
}
return(p)
}
## make 2d correlation plot
CorPlt = function(
df,
cols=NULL,
subsetIndList=NULL,
colRange=c("red", "white", "green"),
tightLimits=FALSE,
limits=c(-1, 1),
xLabelsAngel=90,
pltTitle="",
fontSizeAlpha=0.8,
fontFace="plain") {
if (!is.null(cols)) {
df = df[ , cols, drop=FALSE]
}
dfCor = cor(df, use="pairwise.complete.obs")
if (!is.null(subsetIndList)) {
dfCor = dfCor[subsetIndList[[1]], subsetIndList[[2]]]
}
if (tightLimits) {
limits = c(min(dfCor), max(dfCor))
}
p = LevPlotGg(
df=dfCor, pltTitle=pltTitle, limits=limits, colRange=colRange,
xlab="", ylab="", xLabelsAngel=xLabelsAngel,
fontSizeAlpha=fontSizeAlpha, fontFace=fontFace)
#p = LevPlot(dfCor, pltTitle=pltTitle, colRange=colRange)
return(p)
}
## creates corr plots
CorPerSlice = function(
df,
cols=NULL,
conditions=NULL,
colRange=c("yellow", "grey", "blue"),
pltTitle="",
fontSizeAlpha=0.8) {
sliceName = ""
if (!is.null(conditions)) {
res = SliceDfCondition(df=df, conditions=conditions)
df = res[["df"]]
sliceName = res[["sliceName"]]
}
out = CorPlt(
df, cols=cols, colRange=colRange, pltTitle=paste(pltTitle, sliceName),
fontSizeAlpha=fontSizeAlpha)
return(out)
}
## create a continuous variable set correlation plot
# and latex table and save them
SaveCorPlt_andLatex = function(
df,
subsetIndList=NULL,
tightLimits=FALSE,
limits=c(-1, 1),
colRange=c("red", "white", "green"),
xLabelsAngel=90,
figsPath,
tablesPath,
fnSuffix,
cropColsNum=NULL,
tableSize=NULL) {
fn0 = paste0(figsPath, "cor_", fnSuffix, ".png")
Mark(fn0, "filename")
fn = file(fn0, "w")
r = 1.9
Cairo(
width=850*r, height=480*r, file=fn, type="png", dpi=120*r,
pointsize=12*r)
print(CorPlt(
df=df, subsetIndList=subsetIndList, tightLimits=tightLimits,
limits=limits, colRange=colRange, xLabelsAngel=xLabelsAngel))
dev.off()
close(fn)
cropColsNum = 3
corMat = round(cor(df), 2)
corMatCropped = corMat
if (!is.null(cropColsNum)) {
corMatCropped = corMat[ , 1:min(cropColsNum, dim(corMat)[1])]
}
corLat = xtable(
corMatCropped,
caption=gsub("_", " ", x=paste(fnSuffix, "corr")),
label=paste0(fnSuffix, "corr", sep="-"))
if (!is.null(tableSize)) {
corLat = gsub("\\centering", paste("\\centering ", tableSize), x=corLat)
}
fn0 = paste0("cor_", fnSuffix, ".tex")
fn0 = paste0(tablesPath, fn0)
fn0 = tolower(fn0)
Mark(fn0, "filename")
fn = file(fn0, "w")
print(x=corLat, file=fn)
close(fn)
return(list(
"corMat"=corMat, "corLat"=corLat, "corMatCropped"=corMatCropped))
}
## save both plot and latex table in the specified paths
SaveCorPlt_andLatex_andShow = function(
df,
subsetIndList,
label,
fnSuffix,
figsPath,
tablesPath,
tightLimits=FALSE,
limits=c(-0.05, 0.7),
colRange=c("white", "yellow", "green")) {
fnSuffix2 = paste0(fnSuffix, label)
res = SaveCorPlt_andLatex(
df=df, subsetIndList=subsetIndList, tightLimits=tightLimits,
limits=limits, colRange=colRange, xLabelsAngel=45,
figsPath=figsPath, tablesPath=tablesPath, fnSuffix=fnSuffix2,
cropColsNum=3, tableSize=NULL)
out = CorPlt(
df=df, subsetIndList=subsetIndList, tightLimits=tightLimits,
limits=limits, colRange=colRange, xLabelsAngel=45)
return(out)
}
## generate colors
GenColors = function(n) {
library(RColorBrewer)
ind = RColorBrewer::brewer.pal.info[ , "category"] == "qual"
qualColPals = RColorBrewer::brewer.pal.info[ind, ]
colVec = unlist(mapply(
RColorBrewer::brewer.pal,
qualColPals[ , "maxcolors"],
rownames(qualColPals)))
return(colVec)
}
TestGenColors = function() {
n = 64
colVec = GenColors(n)
pie(rep(1, n), col=sample(colVec, n))
}
# generates nice color pallettes
GenColors2 = function(n, pals=NULL) {
df = data.frame(
"pal"=c(
"Accent", "Dark2", "Paired", "Pastel1",
"Pastel2", "Set1", "Set2", "Set3"),
"num"=c(8, 8, 12, 9, 8, 9, 8, 12))
if (!is.null(pals)) {
df = df[df[ , "pal"] %in% pals, ]
}
colors = NULL
for (i in 1:nrow(df)) {
colors = c(
colors,
RColorBrewer::brewer.pal(
n=df[i, "num"],
name=as.character(df[i, "pal"])))
}
m = length(colors)
if (m < n) {
warning(paste("n is larger than the number of avail cols: ", m))
return(NULL)
}
return(sample(x=colors, size=n, replace=FALSE))
}
TestGenColors2 = function() {
n = 20
colVec = GenColors2(n)
pie(rep(1, n), col=sample(colVec, n))
}
## Plots a value column (valueCol) vs xCol wrt two grouping:
# first grouping (spec. by splitCurveCol) is done using
# a different color for each element
# second grouping (spec. by splitPanelCol) is done by using a different panel
Plt_splitCurve_splitPanel = function(
df,
xCol,
valueCol,
splitCurveCol,
splitPanelCol=NULL,
errBars=NULL,
xlab=NULL,
ylab=NULL,
type="line",
size=2,
alpha=0.5,
remove_xAxis=FALSE,
pltTitle=NULL,
savePlt=FALSE,
fileName=NULL,
fnLabel=NULL,
figsPath="") {
if (is.null(pltTitle)) {
pltTitle = gsub("_", " ", valueCol)
}
if (is.null(xlab)) {
xlab = gsub("_", " ", xCol)
}
if (is.null(ylab)) {
ylab = gsub("_", " ", valueCol)
}
geom_custom = geom_line
if (type == "points") {
geom_custom = geom_point
}
p = (
ggplot(df, aes(get(xCol), get(valueCol))) +
geom_custom(aes(color=get(splitCurveCol)), alpha=alpha, size=size) +
scale_y_continuous() +
theme_bw() +
xlab(xlab) +
ylab(ylab) +
theme(
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=14),
legend.position="top",
legend.title=element_blank(),
legend.text=element_text(size=14),
strip.text.x=element_text(size=14, face="bold"),
plot.title=element_text(size=18, face="bold", hjust=0.5)) +
ggtitle(pltTitle)
)
if (remove_xAxis) {
p = p + theme(
axis.title.x=element_blank(),
axis.text.x=element_blank(),
axis.ticks.x=element_blank())
}
if (!is.null(splitPanelCol)) {
p = p + facet_wrap(~get(splitPanelCol))
}
if (!is.null(errBars)) {
p = p + geom_errorbar(errBars, width=.3)
}
if (savePlt) {
if (is.null(fileName)) {
fileName = paste0(
valueCol, "_foreach_", xCol, "_foreach_", splitCurveCol)
## is an extra label is to be added to the figure, we do so here
if (!is.null(fnLabel)) {
fileName = paste0(fileName, "_", fnLabel)
}
fileName = paste0(fileName, ".png")
}
print(paste("file name:", fileName))
fn = paste0(figsPath, fileName)
print(paste("file name with path:", fn))
#print(fn)
p + ggsave(file=fn, width=10, height=6)
}
return(p)
}
## Test function for above
TestPlt_splitCurve_splitPanel = function() {
n = 100
gender = sample(c("men", "women"), n, replace=TRUE)
country = sample(c("uk", "us", "jp"), n, replace=TRUE)
genderEffect = list("men"=1, "women"=-5)
countryEffect = list("uk"=0, "us"=2, "jp"=-2)
F = function(x) {
genderEffect[[x[1]]] + countryEffect[[x[2]]]
}
df = data.frame("gender"=gender, "country"=country)
df[ , "x"] = 1:n
df[ , "y"] = unlist(apply(X=df, MARGIN=1, FUN=F)) + 2 * df[ , "x"]
Plt_splitCurve_splitPanel(
df=df, xCol="x", valueCol="y", splitCurveCol="gender",
splitPanelCol="country", type="line", savePlt=FALSE)
}
## heatmap of value column vs xCol values
# and across two possible groupings
# grouping 1: specified in splitVertCol which is mapped across y-axis
# grouping 2 (optional): specified in splitPanelCol and used to split plots
# into various panels
HeatMap_splitVertCol_splitPanel = function(
df,
xCol,
splitVertCol,
valueCol,
splitPanelCol=NULL,
breaks=NULL,
xlab=NULL,
ylab=NULL,
pltTitle=NULL,
scaleFillValues=NULL,
savePlt=FALSE,
fileName=NULL,
fnLabel=NULL,
figsPath="") {
if (is.null(pltTitle)) {
pltTitle = gsub("_", " ", valueCol)
}
if (is.null(xlab)) {
xlab = gsub("_", " ", xCol)
}
if (is.null(ylab)) {
ylab = gsub("_", " ", splitVertCol)
}
if (is.null(scaleFillValues)) {
scaleFillValues = 1:n
}
if (class(df[ , valueCol]) %in% c("character", "factor")) {
n = length(unique(df[ , valueCol]))
scaleFill = scale_fill_manual(values=ColAlpha(scaleFillValues, 0.95))
} else {
scaleFill = ggplot2::scale_fill_gradientn(
colours=c(ColAlpha("grey", 0.75), "yellow", "red"), na.value="black")
}
p = (
ggplot(df, aes_string(xCol, splitVertCol)) +
geom_tile(
aes_string(fill=valueCol),
colour=ColAlpha("grey", 0.1),
lwd=0.4) +
xlab(xlab) +
ylab(ylab) +
scaleFill +
guides(fill=guide_legend(title="")) +
theme_bw() +
theme(
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=10),
strip.text.x=element_text(size=14, face="bold"),
plot.title = element_text(size=18, face="bold", hjust=0.5),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank()) +
ggtitle(pltTitle))
if (!is.null(breaks)) {
p = p + scale_x_discrete(breaks=breaks)
}
if (!is.null(splitPanelCol)) {
p = p + facet_wrap(~get(splitPanelCol))
}
if (savePlt) {
if (is.null(fileName)) {
fileName = paste0(valueCol, "_foreach_", xCol, "_across_", yCol)
## is an extra label is to be added to the figure, we do so here
if (!is.null(fnLabel)) {
fileName = paste0(fileName, "_", fnLabel)
}
fileName = paste0(fileName, ".png")
}
print(paste("file name:", fileName))
fn = paste0(figsPath, fileName)
p + ggsave(file=fn, width=10, height=6)
}
return(p)
}
TestHeatMap_splitVertCol_splitPanel = function() {
n = 10000
app = sample(c("fb", "ch", "agsa", "wa", "sc"), n, replace=TRUE)
country = sample(c("uk", "us", "jp"), n, replace=TRUE)
genderEffect = list("fb"=1, "ch"=-15, "agsa"=10, "wa"=2, "sc"=9)
countryEffect = list("uk"=-20, "us"=20, "jp"=-9)
Func = function(x) {
genderEffect[[x[1]]] + countryEffect[[x[2]]]
}
df = data.frame("app"=app, "country"=country)
df[ , "x"] = sample(1:10, size=n, replace=TRUE)
df[ , "value"] = unlist(apply(X=df, MARGIN=1, FUN=Func)) + 2 * df[ , "x"]
HeatMap_splitVertCol_splitPanel(
df=df, xCol="x", splitVertCol="app", valueCol="value",
splitPanelCol="country")
## categorical response
df = data.frame(
person=factor(paste0("id ", 1:50),
levels =rev(paste0("id ", 1:50))),
matrix(sample(LETTERS[1:3], 150, TRUE), ncol=3))
df2 = melt(df, id.var="person")
HeatMap_splitVertCol_splitPanel(
df=df2,
xCol="variable",
splitVertCol="person",
valueCol="value",
splitPanelCol=NULL)
}
## plot a bar chart for value column for each categ given in a categ column
# add actual values close the bars
BarChart_valueAdded = function(
df,
categCol,
valueCol,
xlab=NULL,
ylab=NULL,
rounding=2,
pltTitle=NULL,
savePlt=FALSE,
fileName=NULL,
fnLabel=NULL,
figsPath="") {
if (is.null(pltTitle)) {
pltTitle = gsub("_", " ", valueCol)
}
if (is.null(xlab)) {
xlab = gsub("_", " ", categCol)
}
if (is.null(ylab)) {
ylab = gsub("_", " ", valueCol)
}
dodge = position_dodge(width=0.9)
p = (
ggplot(
df,
aes(
reorder(get(categCol), get(valueCol)),
get(valueCol),
fill=get(valueCol))) +
geom_bar(stat="identity", position=dodge) +
scale_color_discrete(name="") +
scale_y_continuous() +
theme_bw() +
xlab(xlab) +
ylab(ylab) +
geom_text(
aes(y=get(valueCol), label=round(get(valueCol), rounding)),
position='stack',
hjust=-0.5,
vjust=0,
color="black",
size=4) +
theme(
axis.text.x=element_text(size=14),
axis.text.y=element_text(size=14),
strip.text.x=element_text(size=14, face="bold"),
plot.title = element_text(size=18, face="bold", hjust=0.5),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
legend.position="none") +
ggtitle(pltTitle) +
coord_flip()
)
if (savePlt) {
if (is.null(fileName)) {
fileName = paste0(valueCol, "_foreach_", categCol)
## is an extra label is to be added to the figure, we do so here
if (!is.null(fnLabel)) {
fileName = paste0(fileName, "_", fnLabel)
}
fileName = paste0(fileName, ".png")
}
print(paste("file name:", fileName))
fn = paste0(figsPath, fileName)
p + ggsave(file=fn, width=10, height=6)
}
return(p)
}
## Quick plot save
QuickPltSave = function(p, fn, r=1.5) {
fn = file(fn, "w")
Cairo(
width=640*r, height=480*r, file=fn, type="png", dpi=120*r,
pointsize=10*r)
print(p)
dev.off()
close(fn)
}
## hex plot
GgPlotHex = function(
x,
y,
x1=NA,
y1=NA,
xlim=NA,
ylim=NA,
xlab='x',
ylab='y',
name='Freq',
ablineVec=NA,
lwd=1) {
df = data.frame(x=x, y=y)
plot1 = (
ggplot(df, aes(x=x, y=y)) +
theme_bw() +
stat_binhex(colour=gray(0.8), na.rm=TRUE) +
scale_fill_gradientn(
colours=c(gray(0.8), "black"),
name=name,
na.value=NA) +
xlab(xlab) +
ylab(ylab))
if (!is.na(ablineVec[1])) {
plot1 = (
plot1 +
geom_abline(
intercept=ablineVec[2],
slope=ablineVec[2],
col=1,
lwd=lwd))
}
if (!is.na(x1[1])) {
plot1 = plot1 + geom_line(aes(x=x1, y=y1), col=1, lwd=lwd)
}
if (!is.na(xlim)) {
plot1 = plot1 + xlim(xlim[1], xlim[2])
}
if (!is.na(ylim)) {
plot1 = plot1 + xlim(ylim[1], ylim[2])
}
return(plot1)
}
Example = function() {
library(ggplot2)
library(grid)
vplayout = function(x, y) {
viewport(layout.pos.row=x, layout.pos.col=y)
}
plot1 = qplot(
mtcars, x=wt, y=mpg, geom="point", main="scatterplot of wt vs. mpg")
plot2 = qplot(
mtcars, x=wt, y=disp, geom="point", main="Scatterplot of wt vs disp")
plot3 = qplot(wt, data=mtcars)
plot4 = qplot(wt, mpg, data=mtcars, geom="boxplot")
plot5 = qplot(wt, data=mtcars)
plot6 = qplot(mpg, data=mtcars)
plot7 = qplot(disp, data=mtcars)
# 4 figures arranged in 2 rows and 2 columns
grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 2)))
print(plot1, vp = vplayout(1, 1))
print(plot2, vp = vplayout(1, 2))
print(plot3, vp = vplayout(2, 1))
print(plot4, vp = vplayout(2, 2))
# One figure in row 1 and two figures in row 2
grid.newpage()
pushViewport(viewport(layout = grid.layout(2, 2)))
print(plot5, vp = vplayout(1, 1:2))
print(plot6, vp = vplayout(2, 1))
print(plot7, vp = vplayout(2, 2))
}
## plots resp wrt wrtCol
## while it slices wrt groupcol
Plt_wrtGroup = function(
data,
resp,
wrtCol,
groupCol,
AggFcn=function(x){mean(x, na.rm=TRUE)},
group=NULL,
ColPattern=rainbow,
lwd=2,
lty=NULL,
xlab=NULL,
ylab=NULL,
alpha=0.5,
gridAlpha=0.2,
gridLwd=2) {
# plots resp wrt wrtCol
# while it slices wrt groupcol
data2 = data[ , c(resp, wrtCol, groupCol)]
formulaText = paste0(resp, '~', wrtCol, '+', groupCol)
formula = as.formula(formulaText)
dataAgg = aggregate(formula, data=data2, FUN=AggFcn)
if (is.null(group)) {
group = unique(data[ , groupCol])
}
yMax = max(dataAgg[ , resp], na.rm=TRUE)
yMin = min(dataAgg[ , resp], na.rm=TRUE)
delta = (yMax - yMin)/5
yMax = yMax + delta
yMin = yMin - delta
colors = rainbow(length(group))
if (is.null(lty)) {
lty = rep(1, length(group))
}
if (is.null(xlab)) {
xlab = wrtCol
}
if (is.null(ylab)) {
ylab = resp
}
par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE)
for (i in 1:length(group)) {
g = group[i]
dataAgg2 = dataAgg[dataAgg[ , groupCol]==g, ]
if (i == 1) {
plot(dataAgg2[ , wrtCol], dataAgg2[ , resp],
col=ColAlpha(colors[i], alpha),
ylim=c(yMin, yMax), ylab=resp, xlab=wrtCol, type='l',
lwd=lwd, lty=lty[i])
} else {
lines(
dataAgg2[ , wrtCol],
dataAgg2[ , resp],
col=ColAlpha(colors[i], alpha),
lwd=lwd, lty=lty[i])
}
}
legend(
"topright",
inset=c(-0.2, 0),
legend=group,
lwd=rep(lwd, length(group)),
title=groupCol,
col=ColAlpha(colors, alpha),
cex=0.7,
lty=lty)
par(xpd=FALSE)
grid(nx=NULL, ny=NULL, col=ColAlpha('black', gridAlpha), lwd=gridLwd)
}
## plot df
## makes n plots of cols2 versus cols1
PltDfCols = function(data, cols1, cols2) {
n = length(cols1)
m = length(cols2)
data2 = data[ , cols2]
ymin = min(data2, na.rm=TRUE)
ymax = max(data2, na.rm=TRUE)
colNames = colnames(data2)
par(mfrow=c(1, n))
for (i in 1:n) {
col1 = cols1[i]
x = data[ , col1]
xmin = min(x, na.rm=TRUE)
xmax = max(x, na.rm=TRUE)
delta = (xmax-xmin)/8
for (j in 1:m) {
col2 = cols2[j]
y = data2[ , col2]
if (j == 1) {
plot(x, y, ylim=c(ymin, ymax), xlim=c(xmin, xmax), col=j, pch=j)
}
if (j > 1) {
points(x, y, col=j, pch=j)
}
}
legend(
x=(xmax-delta),
y=ymax,
col=(1:m),
legend=colNames,
pch=(1:m),
cex=0.5)
}
}
Example = function() {
n = 100
x1 = rnorm(n)
x2 = rnorm(n)
y1 = sin(x1)
y2 = cos(x2) + 3
data = data.frame(x1, x2, y1, y2)
PltDfCols(data, cols1=c('x1', 'x2'), cols2=c('y1', 'y2'))
}
### following functions are mainly used in time series context
# but they are applicable more generally
## build summary data wrt pivot data
PlotSummPivot = function(
df, resp, pivotVar=c("day", "year"),
ylim=NULL, plotIt=TRUE, pltTitle="",
xlab=NULL) {
library("moments")
varList = c(pivotVar, resp)
ymax = max(df[ , resp], na.rm=TRUE)
ymin = min(df[ , resp], na.rm=TRUE)
if (is.null(ylim)) {
ylim = c(ymin, ymax)
}
InstallIf("reshape")
library(reshape)
castText = paste("dfPivot = cast(df,", pivotVar[1], "~",
pivotVar[2], ",value=\"",
resp,"\", fun.aggregate=mean)", sep="")
eval(parse(text=castText))
QSkew = QskewFcn(pDelta=1/4)
Median = function(x, ...) {
as.double(median(x, ...))
}
fcnList = list(RemoveNAfcn(Median), RemoveNAfcn(mean),
RemoveNAfcn(sd), RemoveNAfcn(min), RemoveNAfcn(max),
RemoveNAfcn(skewness), RemoveNAfcn(QSkew), RemoveNAfcn(kurtosis),
RemoveNAfcn(IQR))
fcnNames = c("median", "mean", "sd", "min", "max",
"skewness", "QSkew", "kurtosis", "IQR")
pivotSummDf = SummWrt(df, valueCol=resp, wrtCol=pivotVar[1],
probs=c(0.25, 0.5, 0.75), fcnList=fcnList, fcnNames=fcnNames,
addValuePrefix=FALSE)
k = dim(dfPivot)[1]
if (plotIt) {
n = dim(dfPivot)[1]
m = dim(dfPivot)[2]
if (is.null(xlab)) {
xlab = pivotVar[1]
}
plot(
dfPivot[ , 1], dfPivot[ , 2], typ="l", ylim=ylim,
xlab=xlab, ylab=resp, col=rgb(0, 0, 0, alpha=0.1),
main=pltTitle)
for (i in 2:m) {
lines(dfPivot[ , 1], dfPivot[ , i], type="l",
col=rgb(0, 0, 0, alpha=10/k), lwd=1.5)
}
pivotSummDf = pivotSummDf[order(pivotSummDf[, pivotVar[1]]), ]
lines(pivotSummDf[ , pivotVar[1]], pivotSummDf[ , 2],
col=ColAlpha(2, 0.7), lwd=2)
lines(pivotSummDf[ , pivotVar[1]], pivotSummDf[ , 3],
col=ColAlpha(3, 0.7), lwd=2)
lines(pivotSummDf[ , pivotVar[1]], pivotSummDf[ , 4],
col=ColAlpha(4, 0.7), lwd=2)
legend(
"topright", c("Q1", "median", "Q3"), lwd=2,
col=ColAlpha(2:4, 0.7), bty="n")
}
pivotSummDf = pivotSummDf[order(pivotSummDf[ , pivotVar[1]]), ]
return(pivotSummDf)
}
TestPlotSummPivot = function() {
df = data.frame(
"day"=c(1:365, 1:365, 1:365, 1:365),
"year"=c(
rep(2011, 365),
rep(2012, 365),
rep(2013, 365),
rep(2014, 365)),
"y"=c(
(1:365) + 100*rnorm(365),
(1:365) + 100*rnorm(365),
(1:365) + 100*rnorm(365),
(1:365) + 100*rnorm(365)))
summDf = PlotSummPivot(
df=df, resp="y", pivotVar=c("day", "year"),
ylim=NULL, plotIt=TRUE, pltTitle="")
}
## plot important summaries wrt pivot variable
PlotDistMeas = function(
df,
measures,
respCols,
pivotVar=c("day", "year"),
parDim=NULL) {
ylims = list(c(-1, 1), c(-1, 1), c(-1, 1), c(-1, 1), c(-1, 1))
if (is.null(parDim)) {
parDim = c(length(respCols), length(measures))
}
par(mfrow=parDim)
for (resp in respCols) {
for (measure in measures) {
res = PlotSummPivot(
df=df, resp=resp, pivotVar=pivotVar,
ylim=ylims[[i]], plotIt=FALSE)
plot(res[ , pivotVar[1]], res[ , measure], type="l",
main=paste(resp, measure, sep=": "),
ylab=measure, xlab=pivotVar[1])
}
}
}
## plot a few functions on top of each other
# after aggregating resp with each wrt a col
Plt_overlayFcnList = function(
df,
resp,
fcnList,
wrtCol,
fcnNames=NULL,
pltTitle="",
logScale="",
ymin=NULL,
ymax=NULL,
colSuffix="",
colPrefix="",
addToPlot=FALSE,
cex.main=1.5,
cex.axis=1.5,
cex.lab=1.5,
cex.legend=0.8) {
varList = c(wrtCol, resp)
df = df[ , varList]
if (is.null(fcnNames)) {
fcnNames = paste0("fcn", 1:length(fcnList))
}
outDfColNames = paste(resp, fcnNames, sep="_")
outDfColNames = paste0(colPrefix, outDfColNames, colSuffix)
for (i in 1:length(fcnList)) {
Fcn = fcnList[[i]]
colName = outDfColNames[i]
l = list()
l[[wrtCol]] = df[ , wrtCol]
dfAgg = aggregate(df[ , resp], l, Fcn)
colnames(dfAgg) = c(wrtCol, colName)
if (i == 1) {
outDf = dfAgg
} else {
outDf[ , colName] = dfAgg[ , colName]
}
ymax = max(c(dfAgg[ , 2]), ymax)
ymin = min(c(dfAgg[ , 2]), ymin)
ymax = ymax + (ymax - ymin) / 3
}
lwd = 5
colInten = 0.4
lty = 1
if (addToPlot) {
lwd = 2.5
colInten = 0.9
lty = 2
}
for (i in 1:length(fcnList)) {
if (i == 1 & !addToPlot) {
plot(outDf[ , 1], outDf[ , outDfColNames[i]],
ylim=c(ymin, ymax), col=ColAlpha(1, colInten),
typ="l", xlab=wrtCol, ylab=resp, log=logScale,
main=pltTitle, lwd=lwd, lty=lty, cex.main=cex.main,
cex.lab=cex.lab, cex.axis=cex.axis)
} else if (i == 1) {
lines(
outDf[ , 1],
outDf[ , outDfColNames[i]],
col=ColAlpha(1, colInten),
lwd=lwd, lty=lty)
}
else {
lines(
outDf[ , 1],
outDf[, outDfColNames[i]],
col=ColAlpha(i, colInten),
lwd=lwd, lty=lty)
}
}
if (is.null(fcnNames)) {
fcnNames = names(fcnList)
}
if (!is.null(fcnNames) & !addToPlot) {
legend("topright",
fcnNames,
lty=lty,
lwd=lwd,
col=ColAlpha(1:length(fcnList), colInten),
bty="n",
cex=cex.legend)
}
if (!is.null(fcnNames) & addToPlot) {
legend("topleft",
fcnNames,
lty=lty,
lwd=lwd,
col=ColAlpha(1:length(fcnList), colInten),
bty="n",
cex=cex.legend)
}
return(outDf)
}
TestPlt_overlayFcnList = function() {
n = 100
df = data.frame(
y=1:n,
x=sample(c("a", "b", "c", "d"), n, replace=TRUE))
x = 2 * pi * rep(seq(0, 1, 0.01), 5)
n = 1000
df = data.frame(
x=x,
y1=sin(x) + rnorm(length(x), sd=0.1),
y2=sin(x) + rnorm(length(x), sd=0.05))
fcnList = list(mean, sd)
Plt_overlayFcnList(
df=df,
resp="y1",
fcnList=fcnList,
fcnNames=c("mean obs", "sd obs"),
wrtCol="x",
pltTitle="",
addToPlot=FALSE)
fcnList = list(mean, sd)
Plt_overlayFcnList(
df=df,
resp="y2",
fcnList=fcnList,
fcnNames=c("mean sim", "sd sim"),
wrtCol="x",
pltTitle="",
addToPlot=TRUE)
}
## overlay by splitting wrt values of a column using ggplot
# can be deprecated because ggplot does not allow easy way to add legend
Plt_splitDfOverlayGg = function(
df, splitCol, wrtCol, yCol, varyColor=FALSE) {
df = df[ , c(splitCol, wrtCol, yCol)]
dfList = split(df, df[[splitCol]])
dfNum = length(dfList)
alpha = 2/sqrt(dfNum)
p = (
ggplot(dfList[[1]], aes(x=get(wrtCol), y=get(yCol))) +
geom_line(color=ColAlpha("black", alpha)) +
xlab(wrtCol) + ylab(yCol))
for (i in 2:dfNum) {
color = ColAlpha("black", alpha)
if (varyColor) {
color = ColAlpha(i, alpha)
}
p = p + geom_line(data=dfList[[i]], color=color)
}
return(p)
}
## overlay by splitting wrt values of a column using base R
Plt_splitDfOverlay = function(
df, splitCol, wrtCol, yCol, varyColor=FALSE, main="", lwd=2) {
df = df[ , c(splitCol, wrtCol, yCol)]
dfList = split(df, df[[splitCol]])
dfNum = length(dfList)
alpha = 2 / sqrt(dfNum)
yMax = max(df[ , yCol], na.rm=TRUE)
yMin = min(df[ , yCol], na.rm=TRUE)
xMax = max(df[ , wrtCol], na.rm=TRUE)
xMin = min(df[ , wrtCol], na.rm=TRUE)
yDelta = (yMax - yMin) / 5
yMax = yMax + yDelta
yMin = yMin - yDelta
df1 = dfList[[1]]
plot(
df1[ , wrtCol], df1[ , yCol], xlim=c(xMin, xMax), ylim=c(yMin, yMax),
xlab=wrtCol, ylab=yCol, main=main, col=ColAlpha("black", alpha),
type="l", lwd=lwd)
for (i in 2:dfNum) {
col = ColAlpha("black", alpha)
if (varyColor) {
col = ColAlpha(i, alpha)
}
lines(dfList[[i]][ , wrtCol], dfList[[i]][ , yCol], col=col, lwd=lwd)
}
}
TestPlt_splitDfOverlay = function() {
m = 30
x = rep(1:100, m)/100
y = sin(x*2*pi) + rnorm(length(x))
z = unlist(lapply(X=1:m, FUN=function(i){rep(i, 100)}))
df = data.frame(x=x, y=y, z=z)
Plt_splitDfOverlay (
df=df,
splitCol="z",
wrtCol="x",
yCol="y",
varyColor=FALSE,
main="")
}
CategBarPlt = function(x, pltTitle="bar plot", xlab="", col="blue") {
tab = table(x)
tab = 100.0 * tab / sum(tab)
barplot(tab, main=pltTitle, xlab="", col=col, ylab="freq (%)")
}
TestCategBarPlt = function() {
x = sample(c("cat", "cat", "dog", "horse"), 100, replace=TRUE)
CategBarPlt(x)
}
CategBarPlt_conti = function(
x,
dichomNum=10,
pltTitle="bar plot",
xlab="",
col="blue") {
res = DichomVar(x, num=dichomNum)
xCateg = res[["var"]]
CategBarPlt(x=xCateg, pltTitle=pltTitle, xlab="", col="blue")
return(res)
}
TestCategBarPlt_conti = function() {
x = rnorm(1000)
x = round(x)
CategBarPlt_conti(x)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.