library(RColorBrewer)
library(colorspace)
library(igraph)
# PWR
pwr <- function(net, y,X, bandwidth="cv_pwr"){
igraph::igraph_options(add.vertex.names=FALSE)
N <- length(igraph::V(net))
X <- as.matrix(X)
if(igraph::is.igraph(net)==FALSE){
warning("net should be a igraph object")
} else if(bandwidth=="cv_pwr"){
cv <- pwr_cv(net=net, y=y, X=X, n=500)
bandwidth <- cv$bandwidth
N <- length(igraph::V(net))
cwr.net <- array(0,dim=c(length(igraph::V(net)),ncol(X)+1))
for(i in 1:N){
d <- exp(-(c(c(.Call(igraph:::C_R_igraph_shortest_paths, net, as.numeric(i)-1, igraph::V(net)-1,3,1,1)))/bandwidth)^2)
cwr.net[i,] <- coef(lm(y~X, weights=d))
cwr.net <- as.data.frame(cwr.net)
colnames(cwr.net) <- c("intercept", paste0("Beta_", 1:ncol(X)))
}
results <- list(coefficients=cwr.net, cv_band=cv)
return(results)
} else{
N <- length(igraph::V(net))
cwr.net <- array(0,dim=c(length(igraph::V(net)),ncol(X)+1))
for(i in 1:N){
d <- exp(-(c(c(.Call(igraph:::C_R_igraph_shortest_paths, net, as.numeric(i)-1, igraph::V(net)-1,3,1,1)))/bandwidth)^2)
cwr.net[i,] <- coef(lm(y~X, weights=d))
cwr.net <- as.data.frame(cwr.net)
colnames(cwr.net) <- c("intercept", paste0("Beta_", 1:ncol(X)))
}
return(cwr.net)
}}
pwr_cv <- function(net, y, X, bandwidth=exp(seq(from=-1, to=3, by=.05)),
n=length(igraph::V(net))) {
# making sure is a matrix
X <- as.matrix(X)
# Selecting a sample from the total network
sample_obs <- sample(length(igraph::V(net)), n) # must be n here
# Selecting y and x
y_sample <- y[sample_obs]
x_sample <- X[sample_obs, ]
# Calculating the distance matrix
distance.w <- sapply(sample_obs, function(z)
igraph::distances(net, v = igraph::V(net)[z],mode="all")[sample_obs])
# running the model with different bandwidths
fitted_models_sample <- purrr::map(bandwidth, ~ cwr.matrix_out(y=y_sample,
x=x_sample,
net=net, obs=sample_obs,
bandwidth = .x,
distance.w = distance.w))
# Getting the mse
mse_values <- purrr::map_df(fitted_models_sample, ~ mse(data=.x, x=x_sample, y=y_sample)) %>%
dplyr::mutate(bandwidth=bandwidth)
# Analytical solution
res <- unname(lm(mse_values$mse~mse_values$bandwidth)$residuals)
opt<-mse_values$bandwidth[which(res==min(res))]
#graph
cv_graph <- ggplot2::ggplot(mse_values, ggplot2::aes(y=mse, x=bandwidth)) +
ggplot2::geom_point(size=3) +
ggplot2::xlab("Bandwidth") + ggplot2::ylab("Mean Squared Error") +
ggplot2::ggtitle("Plot for bandwidth selection") +
ggplot2::theme_minimal() +
ggplot2::geom_smooth(linetype="dashed", color="red") +
ggplot2::geom_vline(xintercept =opt,
linetype="dashed", color="tomato2",
alpha=1)
# Outputs
results <- list()
results[["bandwidth"]] <- opt
results[["cv_graph"]] <- cv_graph
return(results)
}
#' This function runs the leave-one-out cross validation
cwr.matrix_out <- function(y=y_sample,x=x_sample, net=net,
bandwidth=bandwidth, obs=sample_obs,
distance.w=distance.w){
# Repeat here is a matrix. It was returning error when I had only one covariate
x <- as.matrix(x)
#weights
matrix.w <- (exp(-distance.w/bandwidth)^2)
# The trick here is to set the diag to zero.
diag(matrix.w) <-0
cwr.net <- t(sapply(1:ncol(matrix.w), function(z) coef(lm(y[-z]~ as.matrix(x[-z,]), # excluding the obs again
weights= matrix.w[-z,z]))))
cwr.net
}
#' MSE function
mse <- function(data, x=x_sample, y=y_sample){
df <- data*cbind(1,x)
df <- cbind(df, y_pred=rowSums(df), y) %>%
as.data.frame() %>%
dplyr::mutate(error.sq=(y_pred-y)^2) %>%
dplyr::select(y_pred, y, error.sq)
df %>% dplyr::summarise(., mse=mean(.$error.sq, na.rm = TRUE))
}
# Run an example
load("~/Dropbox/packages/pwr/data/net.rda")
fair_cols <- c("#38170B","#BF1B0B", "#FFC465", "#66ADE5", "#252A52")
fair_ramp <- scales::colour_ramp(fair_cols)
mod.1 <- pwr(net=net,y=log(V(net)$in_degree+1),
X=as.matrix(log(V(net)$out_degree+1)))
mod.1$coefficients
mod.1$cv_band
plot(V(net)$l1,V(net)$l2,cex=log(V(net)$ind+1), col= fair_ramp(exp(mod.1$coefficients[,1])/6),pch=16)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.