Nothing
## ----echo=FALSE---------------------------------------------------------------
knitr::opts_chunk$set(fig.width=6, fig.height=6)
if (requireNamespace('data.table', quietly = TRUE)) {
# don't multi-thread during CRAN checks
data.table::setDTthreads(1)
}
## -----------------------------------------------------------------------------
set.seed(34903490)
x = rnorm(50)
y = 0.5*x^2 + 2*x + rnorm(length(x))
frm = data.frame(
x = x,
y = y,
yC = y>=as.numeric(quantile(y,probs=0.8)),
stringsAsFactors = FALSE)
frm$absY <- abs(frm$y)
frm$posY = frm$y > 0
## -----------------------------------------------------------------------------
WVPlots::ScatterHist(frm, "x", "y", title="Example Fit")
## -----------------------------------------------------------------------------
WVPlots::ScatterHist(frm, "x", "y", smoothmethod="lm",
title="Example Linear Fit", estimate_sig = TRUE)
## -----------------------------------------------------------------------------
WVPlots::ScatterHist(frm, "x", "y", smoothmethod="identity",
title="Example Relation Plot", estimate_sig = TRUE)
## -----------------------------------------------------------------------------
set.seed(34903490)
fmScatterHistC = data.frame(
x=rnorm(50),
y=rnorm(50),
stringsAsFactors = FALSE)
fmScatterHistC$cat <- fmScatterHistC$x+fmScatterHistC$y>0
WVPlots::ScatterHistC(fmScatterHistC, "x", "y", "cat", title="Example Conditional Distribution")
## -----------------------------------------------------------------------------
set.seed(34903490)
frmScatterHistN = data.frame(
x=rnorm(50),
y=rnorm(50),
stringsAsFactors = FALSE)
frmScatterHistN$z <- frmScatterHistN$x+frmScatterHistN$y
WVPlots::ScatterHistN(frmScatterHistN, "x", "y", "z", title="Example Joint Distribution")
## -----------------------------------------------------------------------------
WVPlots::BinaryYScatterPlot(frm, "x", "posY", use_glm=FALSE,
title="Example 'Probability of Y' Plot (ggplot2 smoothing)")
WVPlots::BinaryYScatterPlot(frm, "x", "posY", use_glm=TRUE,
title="Example 'Probability of Y' Plot (GLM smoothing)")
## -----------------------------------------------------------------------------
if(requireNamespace("hexbin", quietly = TRUE)) {
set.seed(5353636)
df = rbind(data.frame(x=rnorm(1000, mean = 1),
y=rnorm(1000, mean = 1, sd = 0.5 ),
stringsAsFactors = FALSE),
data.frame(x = rnorm(1000, mean = -1, sd = 0.5),
y = rnorm(1000, mean = -1, sd = 0.5),
stringsAsFactors = FALSE),
stringsAsFactors = FALSE)
print(WVPlots::HexBinPlot(df, "x", "y", "Two gaussians"))
}
## -----------------------------------------------------------------------------
set.seed(34903490)
y = abs(rnorm(20)) + 0.1
x = abs(y + 0.5*rnorm(20))
frm = data.frame(
model=x,
value=y,
stringsAsFactors = FALSE)
frm$costs=1
frm$costs[1]=5
frm$rate = with(frm, value/costs)
frm$isValuable = (frm$value >= as.numeric(quantile(frm$value, probs=0.8)))
## -----------------------------------------------------------------------------
WVPlots::GainCurvePlot(frm, "model", "value", title="Example Continuous Gain Curve")
## -----------------------------------------------------------------------------
gainx = 0.10 # get the top 10% most valuable points as sorted by the model
# make a function to calculate the label for the annotated point
labelfun = function(gx, gy) {
pctx = gx*100
pcty = gy*100
paste("The top ", pctx, "% most valuable points by the model\n",
"are ", pcty, "% of total actual value", sep='')
}
WVPlots::GainCurvePlotWithNotation(frm, "model", "value",
title="Example Gain Curve with annotation",
gainx=gainx,labelfun=labelfun)
## -----------------------------------------------------------------------------
WVPlots::GainCurvePlotC(frm, "model", "costs", "value", title="Example Continuous Gain CurveC")
## -----------------------------------------------------------------------------
set.seed(34903490)
# data with two different regimes of behavior
frm <- rbind(
data.frame(
model = rnorm(1000),
isValuable = sample(c(TRUE, FALSE), prob = c(0.02, 0.98), size = 1000, replace = TRUE)),
data.frame(
model = rnorm(200) + 5,
isValuable = sample(c(TRUE, FALSE), size = 200, replace = TRUE))
)
WVPlots::ROCPlot(frm, "model", "isValuable", TRUE, title="Example ROC plot")
## -----------------------------------------------------------------------------
set.seed(34903490)
x1 = rnorm(50)
x2 = rnorm(length(x1))
y = 0.2*x2^2 + 0.5*x2 + x1 + rnorm(length(x1))
frmP = data.frame(
x1=x1,
x2=x2,
yC = y>=as.numeric(quantile(y,probs=0.8)),
stringsAsFactors = FALSE)
# WVPlots::ROCPlot(frmP, "x1", "yC", TRUE, title="Example ROC plot")
# WVPlots::ROCPlot(frmP, "x2", "yC", TRUE, title="Example ROC plot")
WVPlots::ROCPlotPair(frmP, "x1", "x2", "yC", TRUE, title="Example ROC pair plot")
## -----------------------------------------------------------------------------
set.seed(2342458)
make_data <- function(nrows) {
d <- data.frame(x = rnorm(nrows))
d['y'] = sin(d['x']) + 0.25*rnorm(n = nrows)
d['x2'] = rnorm(n = nrows)
d['yc'] = d[['y']]>0.5
return(d)
}
training <- make_data(500)
test <- make_data(200)
model <- glm(yc ~ x + x2, data=training, family=binomial)
training$pred <- predict(model, newdata=training, type="response")
test$pred <- predict(model, newdata=test, type="response")
WVPlots::ROCPlotPair2(nm1 = "Training", # model 1
frame1 = training,
xvar1 = "pred", truthVar1 = "yc", truthTarget1 = TRUE,
nm2 ="Test", # model 2
frame2 = test,
xvar2 = "pred", truthVar2 = "yc", truthTarget2 = TRUE,
title = "Model performance, training vs test",
estimate_sig = FALSE)
## -----------------------------------------------------------------------------
set.seed(34903490)
x1 = rnorm(50)
x2 = rnorm(length(x1))
x3 = rnorm(length(x1))
y = 0.2*x2^2 + 0.5*x2 + x1 + rnorm(length(x1))
frm_m = data.frame(
x1 = x1,
x2 = x2,
x3 = x3,
yC = y >= as.numeric(quantile(y,probs=0.8)))
WVPlots::ROCPlotPairList(
frame = frm_m,
xvar_names = c("x1", "x2", "x3"),
truthVar = "yC", truthTarget = TRUE,
title = "Example ROC list plot")
## -----------------------------------------------------------------------------
WVPlots::PRTPlot(frm, "model", "isValuable", TRUE, title="Example Precision-Recall plot")
## -----------------------------------------------------------------------------
# replicate PRTPlot. Looks a little different because ThresholdPlot does different smoothing
WVPlots::ThresholdPlot(frm, "model", "isValuable", title="Reproduce PRTPlot",
truth_target=TRUE, # default
metrics = c("precision", "recall"))
# default: sensitivity/specificity
WVPlots::ThresholdPlot(frm, "model", "isValuable",
title="Sensitivity and Specificity as a Function of Threshold")
## -----------------------------------------------------------------------------
WVPlots::ThresholdPlot(frm, "model", "isValuable", title="ROC 'unrolled'",
metrics = c("true_positive_rate", "false_positive_rate"))
## -----------------------------------------------------------------------------
WVPlots::DoubleDensityPlot(frm, "model", "isValuable", title="Example double density plot")
## -----------------------------------------------------------------------------
WVPlots::DoubleHistogramPlot(frm, "model", "isValuable", title="Example double histogram plot")
## -----------------------------------------------------------------------------
set.seed(34903490)
# discrete variable: letters of the alphabet
# frequencies of letters in English
# source: http://en.algoritmy.net/article/40379/Letter-frequency-English
letterFreqs = c(8.167, 1.492, 2.782, 4.253, 12.702, 2.228,
2.015, 6.094, 6.966, 0.153, 0.772, 4.025, 2.406, 6.749, 7.507, 1.929,
0.095, 5.987, 6.327, 9.056, 2.758, 0.978, 2.360, 0.150, 1.974, 0.074)
letterFreqs = letterFreqs/100
letterFrame = data.frame(
letter = letters,
freq=letterFreqs,
stringsAsFactors = FALSE)
# now let's generate letters according to their letter frequencies
N = 1000
randomDraws = data.frame(
draw=1:N,
letter=sample(letterFrame$letter, size=N, replace=TRUE, prob=letterFrame$freq),
stringsAsFactors = FALSE)
WVPlots::ClevelandDotPlot(randomDraws, "letter", title = "Example Cleveland-style dot plot")
WVPlots::ClevelandDotPlot(randomDraws, "letter", limit_n = 10, title = "Top 10 most frequent letters")
WVPlots::ClevelandDotPlot(randomDraws, "letter", sort=0, title="Example Cleveland-style dot plot, unsorted")
WVPlots::ClevelandDotPlot(randomDraws, "letter", sort=1, stem=FALSE, title="Example with increasing sort order + coord_flip, no stem") + ggplot2::coord_flip()
## -----------------------------------------------------------------------------
set.seed(34903490)
N = 1000
ncar_vec = 0:5
prob = c(1.5, 3, 3.5, 2, 1, 0.75); prob = prob/sum(prob)
df = data.frame(
num_cars = sample(ncar_vec, size = N, replace = TRUE, prob=prob),
stringsAsFactors = FALSE)
WVPlots::ClevelandDotPlot(df, "num_cars", sort = 0, title = "Distribution of household vehicle ownership")
## -----------------------------------------------------------------------------
set.seed(354534)
N = 100
# rough proportions of eye colors
eprobs = c(0.37, 0.36, 0.16, 0.11)
eye_color = sample(c("Brown", "Blue", "Hazel", "Green"), size = N, replace = TRUE, prob = eprobs)
sex = sample(c("Male", "Female"), size = N, replace = TRUE)
# A data frame of eye color by sex
dframe = data.frame(
eye_color = eye_color,
sex = sex,
stringsAsFactors = FALSE)
WVPlots::ShadowPlot(dframe, "eye_color", "sex", title = "Shadow plot of eye colors by sex")
## -----------------------------------------------------------------------------
set.seed(354534)
N = 100
dframe = data.frame(
x = rnorm(N),
gp = "region 2",
stringsAsFactors = FALSE)
dframe$gp = with(dframe, ifelse(x < -0.5, "region 1",
ifelse(x > 0.5, "region 3", gp)))
WVPlots::ShadowHist(dframe, "x", "gp", title = "X values by region")
## -----------------------------------------------------------------------------
WVPlots::ShadowHist(dframe, "x", "gp", title = "X values by region", monochrome=TRUE)
## -----------------------------------------------------------------------------
colormap = c("#1F968BFF", "#29AF7FFF", "#55C667FF")
WVPlots::ShadowHist(dframe, "x", "gp", title = "X values by region", palette=NULL) +
ggplot2::scale_fill_manual(values=colormap)
## -----------------------------------------------------------------------------
classes = c("a", "b", "c")
means = c(2, 4, 3)
names(means) = classes
label = sample(classes, size=1000, replace=TRUE)
meas = means[label] + rnorm(1000)
frm2 = data.frame(label=label,
meas = meas,
stringsAsFactors = FALSE)
WVPlots::ScatterBoxPlot(frm2, "label", "meas", pt_alpha=0.2, title="Example Scatter/Box plot")
WVPlots::ScatterBoxPlotH(frm2, "meas", "label", pt_alpha=0.2, title="Example Scatter/Box plot")
## -----------------------------------------------------------------------------
frmx = data.frame(x = rbinom(1000, 20, 0.5),
stringsAsFactors = FALSE)
WVPlots::DiscreteDistribution(frmx, "x","Discrete example")
## -----------------------------------------------------------------------------
set.seed(52523)
d <- data.frame(wt=100*rnorm(100),
stringsAsFactors = FALSE)
WVPlots::PlotDistCountNormal(d,'wt','example')
WVPlots::PlotDistDensityNormal(d,'wt','example')
## -----------------------------------------------------------------------------
set.seed(13951)
trial_size = 20 # one trial is 20 flips
ntrial = 100 # run 100 trials
true_frate = 0.4 # true heads probability
fdata = data.frame(n_heads = rbinom(ntrial, trial_size, true_frate),
stringsAsFactors = FALSE)
title = paste("Distribution of head counts, trial size =", trial_size)
# compare to empirical p
WVPlots::PlotDistCountBinomial(fdata, "n_heads", trial_size, title)
## -----------------------------------------------------------------------------
# compare to theoretical p of 0.5
WVPlots::PlotDistCountBinomial(fdata, "n_heads", trial_size, title,
p = 0.5)
## -----------------------------------------------------------------------------
set.seed(349521)
N = 100 # number of cohorts
psucc = 0.15 # true success rate in population
group_size = round(runif(N, min=25, 50)) # sizes of observed sample groups
nsucc = rbinom(N, group_size, psucc) # successes in each group
hdata = data.frame(n_success=nsucc,
group_size=group_size,
stringsAsFactors = FALSE)
# observed rate of successes in each group
hdata$rate_success = with(hdata, n_success/group_size)
title = "Observed prevalence of success in population"
WVPlots::PlotDistHistBeta(hdata, "rate_success", title)
WVPlots::PlotDistDensityBeta(hdata, "rate_success", title)
## -----------------------------------------------------------------------------
y = c(1,2,3,4,5,10,15,18,20,25)
x = seq_len(length(y))
df = data.frame(x=x,
y=y,
stringsAsFactors = FALSE)
WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", NULL, title="centered smooth, one group")
WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", NULL, title="left smooth, one group", align="left")
WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", NULL, title="right smooth, one group", align="right")
n = length(x)
df = rbind(data.frame(x=x, y=y+rnorm(n), gp="times 1", stringsAsFactors = FALSE),
data.frame(x=x, y=0.5*y + rnorm(n), gp="times 1/2", stringsAsFactors = FALSE),
data.frame(x=x, y=2*y + rnorm(n), gp="times 2", stringsAsFactors = FALSE),
stringsAsFactors = FALSE)
WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", "gp", title="centered smooth, multigroup")
WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", "gp", title="left smooth, multigroup", align="left")
WVPlots::ConditionalSmoothedScatterPlot(df, "x", "y", "gp", title="right smooth, multigroup", align="right")
## -----------------------------------------------------------------------------
set.seed(52523)
d = data.frame(meas=rnorm(100), stringsAsFactors = FALSE)
threshold = -1.5
WVPlots::ShadedDensity(d, "meas", threshold,
title="Example shaded density plot, left tail")
WVPlots::ShadedDensity(d, "meas", -threshold, tail="right",
title="Example shaded density plot, right tail")
## -----------------------------------------------------------------------------
set.seed(52523)
d = data.frame(meas=rnorm(100), stringsAsFactors = FALSE)
# first and third quartiles of the data (central 50%)
boundaries = quantile(d$meas, c(0.25, 0.75))
WVPlots::ShadedDensityCenter(d, "meas", boundaries,
title="Example center-shaded density plot")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.