## -----------------------------------------------------------------------------
# Load libraries
library("netdist")
library("purrr")
## -----------------------------------------------------------------------------
# Set source directory for Virus PPI graph edge files
source_dir <- system.file(file.path("extdata", "VRPINS"), package = "netdist")
# Load query graphs
graph_1 <- read_simple_graph(file.path(source_dir, "EBV.txt"),
format = "ncol")
graph_2 <- read_simple_graph(file.path(source_dir, "ECL.txt"),
format = "ncol")
## -----------------------------------------------------------------------------
# Maximum graphlet size to calculate counts and netdis statistic for.
max_graphlet_size <- 4
# Ego network neighbourhood size
neighbourhood_size <- 2
# Minimum size of ego networks to consider
min_ego_nodes <- 3
min_ego_edges <- 1
# Ego network density binning parameters
min_bin_count <- 5
num_bins <- 100
## -----------------------------------------------------------------------------
# Get ego networks for query graphs and reference graph
ego_1 <- make_named_ego_graph(graph_1,
order = neighbourhood_size,
min_ego_nodes = min_ego_nodes,
min_ego_edges = min_ego_edges)
ego_2 <- make_named_ego_graph(graph_2,
order = neighbourhood_size,
min_ego_nodes = min_ego_nodes,
min_ego_edges = min_ego_edges)
## -----------------------------------------------------------------------------
# Count graphlets for ego networks in query and reference graphs
graphlet_counts_1 <- ego_to_graphlet_counts(ego_1, max_graphlet_size = max_graphlet_size)
graphlet_counts_2 <- ego_to_graphlet_counts(ego_2, max_graphlet_size = max_graphlet_size)
## -----------------------------------------------------------------------------
# Get ego-network densities
densities_1 <- ego_network_density(graphlet_counts_1)
densities_2 <- ego_network_density(graphlet_counts_2)
# Adaptively bin ego-network densities
binned_densities_1 <- binned_densities_adaptive(densities_1,
min_counts_per_interval = min_bin_count,
num_intervals = num_bins)
ego_density_bins_1 <- binned_densities_1$breaks
binned_densities_2 <- binned_densities_adaptive(densities_2,
min_counts_per_interval = min_bin_count,
num_intervals = num_bins)
ego_density_bins_2 <- binned_densities_2$breaks
## -----------------------------------------------------------------------------
#' INTERNAL FUNCTION - DO NOT CALL DIRECTLY
#' Calculate expected counts with geometric poisson (Polya-Aeppli)
#' approximation for a single density bin.
#' @param bin_idx Density bin index to calculate expected counts for.
#' @param graphlet_counts Graphlet counts for a number of ego_networks.
#' @param density_interval_indexes Density bin index for
#' each ego network.
exp_counts_bin_gp <- function(bin_idx, graphlet_counts,
density_interval_indexes,
mean_binned_graphlet_counts,
max_graphlet_size) {
counts <- graphlet_counts[density_interval_indexes == bin_idx, ]
means <- mean_binned_graphlet_counts[bin_idx, ]
mean_sub_counts <- sweep(counts, 2, means)
Vd_sq <- colSums(mean_sub_counts^2) / (nrow(mean_sub_counts) - 1)
theta_d <- 2 * means / (Vd_sq + means)
exp_counts_dk <- vector()
for (k in 2:max_graphlet_size) {
graphlet_idx <- graphlet_ids_for_size(k)
lambda_dk <- mean(2 * means[graphlet_idx]^2 /
(Vd_sq[graphlet_idx] + means[graphlet_idx]),
na.rm = TRUE)
exp_counts_dk <- append(exp_counts_dk,
lambda_dk / theta_d[graphlet_idx])
}
exp_counts_dk
}
#' Calculate expected counts in density bins using the
#' geometric poisson (Polya-Aeppli) approximation.
#' @param graphlet_counts Graphlet counts for a number of ego_networks.
#' @param density_interval_indexes Density bin index for
#' each ego network.
#' @param max_graphlet_size Determines the maximum size of graphlets
#' included in graphlet_counts.
#' @export
density_binned_counts_gp <- function(graphlet_counts,
density_interval_indexes,
max_graphlet_size) {
mean_binned_graphlet_counts <- mean_density_binned_graphlet_counts(
graphlet_counts,
density_interval_indexes)
nbins <- length(unique(density_interval_indexes))
expected_counts_bin <- t(sapply(1:nbins,
exp_counts_bin_gp,
graphlet_counts = graphlet_counts,
density_interval_indexes = density_interval_indexes,
mean_binned_graphlet_counts = mean_binned_graphlet_counts,
max_graphlet_size = max_graphlet_size))
# deal with NAs caused by bins with zero counts for a graphlet
expected_counts_bin[is.nan(expected_counts_bin)] <- 0
expected_counts_bin
}
binned_graphlet_counts_1 <- density_binned_counts_gp(graphlet_counts_1,
binned_densities_1$interval_indexes,
max_graphlet_size)
binned_graphlet_counts_2 <- density_binned_counts_gp(graphlet_counts_2,
binned_densities_2$interval_indexes,
max_graphlet_size)
## -----------------------------------------------------------------------------
# Calculate expected graphlet counts for each ego network
exp_graphlet_counts_1 <- netdis_expected_counts(graphlet_counts_1,
ego_density_bins_1,
binned_graphlet_counts_1,
max_graphlet_size,
scale_fn = NULL)
exp_graphlet_counts_2 <- netdis_expected_counts(graphlet_counts_2,
ego_density_bins_2,
binned_graphlet_counts_2,
max_graphlet_size,
scale_fn = NULL)
# Centre graphlet counts by subtracting expected counts
centred_graphlet_counts_1 <- netdis_subtract_exp_counts(graphlet_counts_1,
exp_graphlet_counts_1,
max_graphlet_size)
centred_graphlet_counts_2 <- netdis_subtract_exp_counts(graphlet_counts_2,
exp_graphlet_counts_2,
max_graphlet_size)
## -----------------------------------------------------------------------------
sum_graphlet_counts_1 <- colSums(centred_graphlet_counts_1)
sum_graphlet_counts_2 <- colSums(centred_graphlet_counts_2)
## -----------------------------------------------------------------------------
netdis_result <- netdis_uptok(sum_graphlet_counts_1,
sum_graphlet_counts_2,
max_graphlet_size)
print(netdis_result)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.