pwr_example.R

library(RColorBrewer)
library(colorspace)
library(igraph)


# PWR
pwr <- function(net, y,X, bandwidth="cv_pwr"){

  igraph::igraph_options(add.vertex.names=FALSE)
  N <- length(igraph::V(net))
  X <- as.matrix(X)

  if(igraph::is.igraph(net)==FALSE){
    warning("net should be a igraph object")
  }  else if(bandwidth=="cv_pwr"){

    cv <- pwr_cv(net=net, y=y, X=X, n=500)
    bandwidth <- cv$bandwidth
    N <- length(igraph::V(net))

    cwr.net <- array(0,dim=c(length(igraph::V(net)),ncol(X)+1))
    for(i in 1:N){
      d <- exp(-(c(c(.Call(igraph:::C_R_igraph_shortest_paths, net, as.numeric(i)-1, igraph::V(net)-1,3,1,1)))/bandwidth)^2)
      cwr.net[i,] <- coef(lm(y~X, weights=d))
      cwr.net <- as.data.frame(cwr.net)
      colnames(cwr.net) <- c("intercept", paste0("Beta_", 1:ncol(X)))
    }

    results <- list(coefficients=cwr.net, cv_band=cv)
    return(results)

  }  else{
    N <- length(igraph::V(net))
    cwr.net <- array(0,dim=c(length(igraph::V(net)),ncol(X)+1))
    for(i in 1:N){
      d <- exp(-(c(c(.Call(igraph:::C_R_igraph_shortest_paths, net, as.numeric(i)-1, igraph::V(net)-1,3,1,1)))/bandwidth)^2)
      cwr.net[i,] <- coef(lm(y~X, weights=d))

      cwr.net <- as.data.frame(cwr.net)
      colnames(cwr.net) <- c("intercept", paste0("Beta_", 1:ncol(X)))
    }

    return(cwr.net)

  }}

pwr_cv <- function(net, y, X, bandwidth=exp(seq(from=-1, to=3, by=.05)),
                   n=length(igraph::V(net))) {


  # making sure is a matrix

  X <- as.matrix(X)


  # Selecting a sample from the total network

  sample_obs <- sample(length(igraph::V(net)), n) # must be n here

  # Selecting y and x

  y_sample <- y[sample_obs]
  x_sample <- X[sample_obs, ]

  # Calculating the distance matrix

  distance.w <- sapply(sample_obs, function(z)
    igraph::distances(net, v = igraph::V(net)[z],mode="all")[sample_obs])


  # running the model with different bandwidths

  fitted_models_sample <-   purrr::map(bandwidth, ~ cwr.matrix_out(y=y_sample,
                                                                   x=x_sample,
                                                                   net=net, obs=sample_obs,
                                                                   bandwidth = .x,
                                                                   distance.w = distance.w))


  # Getting the mse

  mse_values <- purrr::map_df(fitted_models_sample, ~ mse(data=.x, x=x_sample, y=y_sample)) %>%
    dplyr::mutate(bandwidth=bandwidth)



  # Analytical solution
  res <- unname(lm(mse_values$mse~mse_values$bandwidth)$residuals)
  opt<-mse_values$bandwidth[which(res==min(res))]

  #graph
  cv_graph <-  ggplot2::ggplot(mse_values, ggplot2::aes(y=mse, x=bandwidth)) +
    ggplot2::geom_point(size=3) +
    ggplot2::xlab("Bandwidth") + ggplot2::ylab("Mean Squared Error") +
    ggplot2::ggtitle("Plot for bandwidth selection") +
    ggplot2::theme_minimal() +
    ggplot2::geom_smooth(linetype="dashed", color="red") +
    ggplot2::geom_vline(xintercept =opt,
                        linetype="dashed", color="tomato2",
                        alpha=1)



  # Outputs
  results <- list()
  results[["bandwidth"]] <- opt
  results[["cv_graph"]] <- cv_graph



  return(results)
}

#' This function runs the leave-one-out cross validation

cwr.matrix_out <- function(y=y_sample,x=x_sample, net=net,
                           bandwidth=bandwidth, obs=sample_obs,
                           distance.w=distance.w){



  # Repeat here is a matrix. It was returning error when I had only one covariate
  x <- as.matrix(x)

  #weights
  matrix.w <- (exp(-distance.w/bandwidth)^2)
  # The trick here is to set the diag to zero.
  diag(matrix.w) <-0

  cwr.net <- t(sapply(1:ncol(matrix.w), function(z) coef(lm(y[-z]~ as.matrix(x[-z,]), # excluding the obs again
                                                            weights= matrix.w[-z,z]))))
  cwr.net
}


#' MSE function

mse <- function(data, x=x_sample, y=y_sample){
  df <- data*cbind(1,x)
  df <- cbind(df, y_pred=rowSums(df), y)  %>%
    as.data.frame() %>%
    dplyr::mutate(error.sq=(y_pred-y)^2) %>%
    dplyr::select(y_pred, y, error.sq)

  df %>% dplyr::summarise(., mse=mean(.$error.sq, na.rm = TRUE))
}



# Run an example
load("~/Dropbox/packages/pwr/data/net.rda")

fair_cols <- c("#38170B","#BF1B0B", "#FFC465", "#66ADE5", "#252A52")
fair_ramp <- scales::colour_ramp(fair_cols)

mod.1 <- pwr(net=net,y=log(V(net)$in_degree+1),
             X=as.matrix(log(V(net)$out_degree+1)))

mod.1$coefficients
mod.1$cv_band

plot(V(net)$l1,V(net)$l2,cex=log(V(net)$ind+1), col=  fair_ramp(exp(mod.1$coefficients[,1])/6),pch=16)
TiagoVentura/pwr documentation built on Oct. 26, 2021, 2:13 p.m.