Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
library(hypervolume)
library(palmerpenguins)
library(ggplot2)
library(gridExtra)
library(raster)
## ---- results = "hide"--------------------------------------------------------
data(penguins)
data(quercus)
## ---- results = "hide", eval = FALSE------------------------------------------
# data("quercus")
# data_alba = subset(quercus, Species=="Quercus alba")[,c("Longitude","Latitude")]
# data_rubra = subset(quercus, Species=="Quercus rubra")[,c("Longitude","Latitude")]
# climatelayers <- getData('worldclim', var='bio', res=10, path=tempdir())
#
# # z-transform climate layers to make axes comparable
# climatelayers_ss = climatelayers[[c(1,4,12,15)]]
# for (i in 1:nlayers(climatelayers_ss))
# {
# climatelayers_ss[[i]] <- (climatelayers_ss[[i]] - cellStats(climatelayers_ss[[i]], 'mean')) / cellStats(climatelayers_ss[[i]], 'sd')
# }
# climatelayers_ss_cropped = crop(climatelayers_ss, extent(-150,-50,15,60))
#
# # extract transformed climate values
# climate_alba = extract(climatelayers_ss_cropped, data_alba)
# climate_rubra = extract(climatelayers_ss_cropped, data_rubra)
# combined_sample = data.frame(rbind(climate_rubra, climate_alba))
# combined_sample["Species"] = quercus$Species
#
# # Create hypervolumes
# hv_alba = hypervolume(climate_alba)
# hv_rubra = hypervolume(climate_rubra)
## ----eval=FALSE---------------------------------------------------------------
# alba_seq = hypervolume_resample("alba_seq", hv_alba, "bootstrap seq", n = 20, seq = seq(100, 1700, 400), cores = 32)
# rubra_seq = hypervolume_resample("rubra_seq", hv_rubra, "bootstrap seq", n = 20, seq = seq(100, 2100, 400), cores = 32)
#
# # Funnel Plots
# alba_plot = hypervolume_funnel(alba_seq) +
# geom_point(aes(y = upperq)) +
# geom_point(aes(y = lowerq)) +
# geom_point(aes(y = sample_mean), col = "blue") +
# theme_bw() +
# labs(title = "a)", subtitle = NULL) +
# ylab("Volume") +
# ylim(0, 0.8) +
# xlim(0, 2100)
# rubra_plot = hypervolume_funnel(rubra_seq) +
# geom_point(aes(y = upperq)) +
# geom_point(aes(y = lowerq)) +
# geom_point(aes(y = sample_mean), col = "blue") +
# theme_bw() +
# labs(title = "b)", subtitle = NULL) +
# ylab("Volume") +
# ylim(0, 0.8) +
# xlim(0, 2100)
# grid.arrange(alba_plot, rubra_plot, nrow = 1)
## ----out.width="100%", echo=FALSE---------------------------------------------
url = "funnel_plot_comparisons.png"
knitr::include_graphics(url)
## ---- eval = FALSE------------------------------------------------------------
# data("quercus")
# data_alba = subset(quercus, Species=="Quercus alba")[,c("Longitude","Latitude")]
# climatelayers <- getData('worldclim', var='bio', res=10, path=tempdir())
#
# # z-transform climate layers to make axes comparable
# climatelayers_ss = climatelayers[[c(1,4,12,15)]]
# for (i in 1:nlayers(climatelayers_ss))
# {
# climatelayers_ss[[i]] <- (climatelayers_ss[[i]] - cellStats(climatelayers_ss[[i]], 'mean')) / cellStats(climatelayers_ss[[i]], 'sd')
# }
# climatelayers_ss_cropped = crop(climatelayers_ss, extent(-150,-50,15,60))
#
# # extract transformed climate values
# climate_alba = extract(climatelayers_ss_cropped, data_alba)
#
# # Generate Hypervolumes
# hv_alba = hypervolume(climate_alba,name='alba',samples.per.point=10)
#
# # Give all samples to the east of -75 longitude double the weight of all other observations to compensate for undersampling
# weights = ifelse(data_alba$Longitude > -75, 2, 1)
#
# # Create new hypervolume using weighted bootstrapping
# weighted_hv_alba = hypervolume_resample(hv = hv_alba, n=1, method = "weighted bootstrap", weights = weights, to_file = FALSE)
# weighted_hv_alba = weighted_hv_alba[[1]]
#
# # Perform overlap test to see if the weights changed the distribution
# alba_biased_combined_data = rbind(hv_alba@Data, weighted_hv_alba@Data)
# alba_biased_combined_hv = hypervolume(alba_biased_combined_data)
# alba_biased_combined_samples = hypervolume_resample("combined_biased_resample", hv = alba_biased_combined_hv, method = "bootstrap", n = 128, points_per_resample = nrow(alba_biased_combined_data)/2, verbose = FALSE, cores = 32)
# result = hypervolume_overlap_test(hv_alba, weighted_hv_alba, alba_biased_combined_samples, cores = 32)
#
# # Show null distribution and observed value
# result$plots$sorensen +
# theme_bw() +
# labs(title = NULL) +
# xlab("Sorensen Distance") +
# ylab("Density")
## ----out.width="100%", echo = FALSE-------------------------------------------
url = "biased_bootstrap_overlap.png"
knitr::include_graphics(url)
## ---- eval=FALSE--------------------------------------------------------------
# hv = hypervolume(na.omit(penguins)[,3:6], verbose = FALSE)
# cols_to_weigh = c("bill_length_mm", "bill_depth_mm")
#
# # Highest weights are assigned to max bill length and max bill depth
# mu = apply(hv@Data, 2, max)[cols_to_weigh]
# sigma = apply(hv@Data, 2, var)[cols_to_weigh]*2
# biased_path = hypervolume_resample("Bill bias", hv, method = "weighted bootstrap", n = 1, mu = mu, sigma = sigma, cols_to_weigh = cols_to_weigh)
#
# # Read in hypervolume object from file
# biased_hv = readRDS(file.path(biased_path, "resample 1.rds"))
#
# combined_dat = data.frame(rbind(hv@Data, biased_hv@Data))
# combined_dat['Type'] = rep(c('original', 'biased'), each = nrow(hv@Data))
## ---- eval=FALSE--------------------------------------------------------------
# plot1 = ggplot(combined_dat) +
# geom_histogram(aes(x = bill_depth_mm, fill = Type), bins = 20) +
# facet_wrap(~Type) +
# ggtitle("Distribution of Bill Depth") +
# xlab("Bill depth (mm)") +
# ylab("Count") +
# theme_bw() +
# theme(legend.position = "none")
# plot2 = ggplot(combined_dat) +
# geom_histogram(aes(x = bill_length_mm, fill = Type), bins = 20) +
# facet_wrap(~Type) +
# ggtitle("Distribution of Bill Length") +
# xlab("Bill length (mm)") +
# ylab(NULL) +
# theme_bw() +
# theme(legend.position = "none")
# plot3 = ggplot(combined_dat) +
# geom_histogram(aes(x = flipper_length_mm, fill = Type), bins = 20) +
# facet_wrap(~Type) +
# ggtitle("Distribution of Flipper Length") +
# xlab("Flipper length (mm)") +
# ylab("Count")+
# theme_bw() +
# theme(legend.position = "none")
# plot4 = ggplot(combined_dat) +
# geom_histogram(aes(x = body_mass_g, fill = Type), bins = 20) +
# facet_wrap(~Type) +
# ggtitle("Distribution of Body Mass") +
# xlab("Body mass (g)") +
# ylab(NULL) +
# theme_bw() +
# theme(legend.position = "none")
# grid.arrange(plot1, plot2, plot3, plot4, nrow = 2)
## ----out.width="100%", echo = FALSE-------------------------------------------
url = "penguin_weights.png"
knitr::include_graphics(url)
## ---- eval=FALSE--------------------------------------------------------------
# data("quercus")
# data_alba = subset(quercus, Species=="Quercus alba")[,c("Longitude","Latitude")]
# data_rubra = subset(quercus, Species=="Quercus rubra")[,c("Longitude","Latitude")]
# climatelayers <- getData('worldclim', var='bio', res=10, path=tempdir())
#
# # z-transform climate layers to make axes comparable
# climatelayers_ss = climatelayers[[c(1,4,12,15)]]
# for (i in 1:nlayers(climatelayers_ss))
# {
# climatelayers_ss[[i]] <- (climatelayers_ss[[i]] - cellStats(climatelayers_ss[[i]], 'mean')) / cellStats(climatelayers_ss[[i]], 'sd')
# }
# climatelayers_ss_cropped = crop(climatelayers_ss, extent(-150,-50,15,60))
#
# # extract transformed climate values
# climate_alba = extract(climatelayers_ss_cropped, data_alba)
# climate_rubra = extract(climatelayers_ss_cropped, data_rubra)
#
# # Generate Hypervolumes
# hv_alba = hypervolume(climate_alba,name='alba',samples.per.point=10)
# hv_rubra = hypervolume(climate_rubra,name='rubra',samples.per.point=10)
#
# # Method 1: 2hr runtime with 12 threads
# combined_sample = rbind(climate_alba, climate_rubra)
# population_hat = hypervolume(combined_sample)
#
# # Create bootstrapped hypervolumes of both sample sizes
# method1_path_size_1669 = hypervolume_resample("quercus_1669_boot", population_hat, "bootstrap", n = 100, cores = 12)
# method1_path_size_2110 = hypervolume_resample("quercus_2110_boot", population_hat, "bootstrap", n = 100, cores = 12)
#
#
# result1 = hypervolume_overlap_test(hv_alba, hv_rubra, c(method1_path_size_1669, method1_path_size_2110), cores = 12)
#
# #Method 2: 9hr runtime with 12 threads
# method2_path = hypervolume_permute("rubra_alba_permutation", hv1, hv2, n = 1357, cores = 12)
#
# result2 = hypervolume_overlap_test(hv1, hv2, method2_path, cores = 2)
#
# # Graphical Results of null sorensen statistic
# plot1 = result1$plots$sorensen +
# xlab("Sorensen distance") +
# ylab("Density") +
# ggtitle("a)") +
# xlim(0.7, 1) +
# theme_bw()
# plot1 = result2$plots$sorensen +
# xlab("Sorensen distance") +
# ylab("Density") +
# ggtitle("b)") +
# xlim(0.7, 1) +
# theme_bw()
# grid.arrange(plot1, plot2, ncol=2)
## ----out.width="100%", echo = FALSE-------------------------------------------
url = "overlap_test_demos.png"
knitr::include_graphics(url)
## ---- eval=FALSE--------------------------------------------------------------
# library(foreach)
# library(mvtnorm)
# library(doParallel)
#
# # Choose number of threads to use for parallel computing
# nthreads = detectCores()
#
# # Load required libraries in the environment of each thread and register cluster to parallel backend
# cl = makeCluster(nthreads)
# clusterEvalQ(cl, {
# library(hypervolume)
# library(mvtnorm)
# })
# registerDoParallel(cl)
#
# # Set shift distance and number of test replications
# N = 40
# M = 20
# offset = # shift distance
# reps = # Number of replications of the test
#
# # Each replication takes around 4hrs. Total run time equals reps/nthreads * 4
# # Returns a list of p values given by the overlap test
# pvals = foreach(i = 1:reps, .combine = rbind) %dopar% {
# # Random data N(0, I) and offset mean
# x1 = rmvnorm(M, rep(0, 5))
# x2 = rmvnorm(N, rep(offset, 5))
# hv1 = hypervolume(x1)
# hv2 = hypervolume(x2)
# hv_combined = hypervolume(rbind(x1, x2))
#
# path_M = hypervolume_resample(paste0("shift_", offset, "_M_", i), hv_combined, method = "bootstrap", n = 20, points_per_resample = M)
# path_N = hypervolume_resample(paste0("shift_", offset, "_N_", i), hv_combined, method = "bootstrap", n = 20, points_per_resample = N)
# result = hypervolume_overlap_test(hv1, hv2, c(path_M, path_N))
# result$p_values
# }
#
# # Power is calculated as the percent of test replications that result in rejections which depends on the rejection threshold or alpha value.
# # We will use alpha = 0.05
# power = mean(pvals[,"sorensen"] <= 0.05)
## ---- eval = FALSE------------------------------------------------------------
# power0 = mean(pvals0[,"sorensen"] <= 0.05)
# power1 = mean(pvals1[,"sorensen"] <= 0.05)
# power2 = mean(pvals2[,"sorensen"] <= 0.05)
# power3 = mean(pvals3[,"sorensen"] <= 0.05)
# power4 = mean(pvals4[,"sorensen"] <= 0.05)
# power5 = mean(pvals5[,"sorensen"] <= 0.05)
#
# ggplot(data =NULL, aes(x = c(0, 0.4472136 0.8944272 1.3416408 1.7888544 2.2360680), y = c(power0, power1, power2, power3, power4, power5))) +
# geom_line() +
# theme_bw() +
# ylab("Power") +
# xlab("Shift distance")
## ----out.width="100%", echo = FALSE-------------------------------------------
url = "shift_powers.png"
knitr::include_graphics(url)
## ----fig.width=7, fig.height=7 , echo=FALSE-----------------------------------
set.seed(123)
library(mvtnorm)
alpha_squared = 1 - 0:5 * 1/6
data1 = data.frame(rmvnorm(1000, mean = c(0, 0)))
data2 = data.frame(rmvnorm(1000, mean = c(0, 0), sigma = diag(c(alpha_squared[2], 1/alpha_squared[2]))))
data3 = data.frame(rmvnorm(1000, mean = c(0, 0), sigma = diag(c(alpha_squared[3], 1/alpha_squared[3]))))
data4 = data.frame(rmvnorm(1000, mean = c(0, 0), sigma = diag(c(alpha_squared[4], 1/alpha_squared[4]))))
data5 = data.frame(rmvnorm(1000, mean = c(0, 0), sigma = diag(c(alpha_squared[5], 1/alpha_squared[5]))))
data6 = data.frame(rmvnorm(1000, mean = c(0, 0), sigma = diag(c(alpha_squared[6], 1/alpha_squared[6]))))
data = rbind(data1, data2, data3, data4, data5, data6)
data["Scale factor"] = rep(c("a = 1", "a = 0.913", "a = 0.816", "a = 0.707", "a = 0.577", "a = 0.408"), each = 1000)
ggplot(data = data, aes(x = X1, y = X2, color = `Scale factor`)) +
geom_point(alpha = 0.4) +
theme_bw() +
coord_fixed(ratio = 1) +
facet_wrap(~`Scale factor`) +
theme(legend.position = "none")
## ----eval=FALSE---------------------------------------------------------------
# N = 40
# M = 20
# offset = # Scale factor
# reps = # Number of replications of the test
#
# # Since the data is only 2 dimensions each replication only takes a few minutes
# pvals = foreach(i = 1:reps, .combine = rbind) %dopar% {
# x1 = rmvnorm(M, rep(0, 2))
# x2 = rmvnorm(N, rep(0, 2), diag(c(offset^2, 1/(offset^2))))
# hv1 = hypervolume(x1)
# hv2 = hypervolume(x2)
# hv_combined = hypervolume(rbind(x1, x2))
#
# path_M = hypervolume_resample(paste0("scale_", offset, "_M_", i), hv_combined, method = "bootstrap", n = 20, points_per_resample = M)
# path_N = hypervolume_resample(paste0("scale_", offset, "_N_", i), hv_combined, method = "bootstrap", n = 20, points_per_resample = N)
# result = hypervolume_overlap_test(hv1, hv2, c(path_M, path_N))
# result$p_values
# }
## ----eval=FALSE---------------------------------------------------------------
# power0 = mean(pvals0[,"sorensen"] <= 0.05)
# power1 = mean(pvals1[,"sorensen"] <= 0.05)
# power2 = mean(pvals2[,"sorensen"] <= 0.05)
# power3 = mean(pvals3[,"sorensen"] <= 0.05)
# power4 = mean(pvals4[,"sorensen"] <= 0.05)
# power5 = mean(pvals5[,"sorensen"] <= 0.05)
#
# ggplot(data =NULL, aes(x = sqrt(1 - 0:5 * 1/6), y = c(power0, power1, power2, power3, power4, power5))) +
# geom_line() +
# theme_bw() +
# ylab("Power") +
# xlab("Scale factor") +
# scale_x_reverse()
## ----out.width="100%", echo = FALSE-------------------------------------------
url = "shape_powers.png"
knitr::include_graphics(url)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.