R/reshape_dyadic_data.R

Defines functions long_to_wide copy_var_to_new_suffix upper_triangular_to_full

Documented in copy_var_to_new_suffix long_to_wide

## This R package implements the methods proposed in 
## Dzemski, Andreas: An empirical model of dyadic link formation in 
## a network with unobserved heterogeneity, Review of Economics and Statistics, forthcoming

## Copyright (C) 2018  Andreas Dzemski

## This program is free software: you can redistribute it and/or modify
## it under the terms of the GNU General Public License as published by
## the Free Software Foundation, either version 3 of the License, or
## (at your option) any later version.

## This program is distributed in the hope that it will be useful,
## but WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
## GNU General Public License for more details.

## You should have received a copy of the GNU General Public License
## along with this program.  If not, see <https://www.gnu.org/licenses/>.

#' Reshape dyadic network data from long (ij, i != j) to wide (ij, i<j)
#'
#' @param links network data in long format
#' @param col_sender name of column containing the sender of the link
#' @param col_receiver name of column containing the receiver of the link
#' @param upper_triangle =TRUE (default) wide format respects i<j,
#'  =FALSE then i != j but each row contains information about everything in dyad
#' @param nodes all node indices
#' @param ij_variables variables that will be reported as dyad information
#'
#' @return network data in wide format
#'
#' @import data.table
#' @importFrom utils combn
long_to_wide <- function(links, col_sender, col_receiver, upper_triangle = TRUE,
                         nodes = unique(links[[col_sender]]),
                         ij_variables = setdiff(colnames(links), c(col_sender, col_receiver))) {

  add_vars <- function(by.x, dyads, suffix) {
    dyads <- merge(dyads, data.table(links)[, c(col_sender, col_receiver, ij_variables), with = FALSE],
                   by.x = by.x, by.y = c(col_sender, col_receiver))

    newv <- paste0(ij_variables, suffix)
    dyads[, (newv) := as.list(dyads[, ij_variables, with = FALSE])]
    dyads[, (ij_variables) := NULL]

    dyads
  }

  if (upper_triangle)
    dyads <- combn(nodes, 2)
  else {
    dyads <- t(as.matrix(links[, c(col_sender, col_receiver), with = FALSE]))
  }

  wide <- data.table(i = dyads[1, ], j = dyads[2, ])

  wide <- add_vars(c("j", "i"), wide, suffix = "_ji")
  wide <- add_vars(c("i", "j"), wide, suffix = "_ij")

  setkeyv(wide, cols = c("i", "j"))

  wide
}

#' Copy variable, new name changes the suffix
#'
#' @param dt data.table
#' @param variables name of variables (columns)
#' @param old_suffix old suffix
#' @param new_suffix new suffix
#'
#' @return view of data.table (new variables are added by reference)
copy_var_to_new_suffix <- function(dt, variables, old_suffix, new_suffix) {
  dt[, paste0(variables, new_suffix) := .SD, .SDcols = paste0(variables, old_suffix)]
}

upper_triangular_to_full <- function(wide_triangular) {

  vars_all <- setdiff(colnames(wide_triangular), c("i", "j"))

  vars_ij <- gsub("(.+)\\_ij", "\\1", grep(".+\\_ij", vars_all, perl = TRUE, value = TRUE),
                  perl = TRUE)
  vars_ji <- gsub("(.+)\\_ji", "\\1", grep(".+\\_ji", vars_all, perl = TRUE, value = TRUE),
                  perl = TRUE)
  vars_both <- intersect(vars_ij, vars_ji)
  vars_symmetric <- setdiff(vars_ij, vars_both)

  vij1 <- wide_triangular[, paste0(vars_both, "_ij"), with = FALSE]
  vji1 <- wide_triangular[, paste0(vars_both, "_ji"), with = FALSE]
  ij1 <- wide_triangular[, .(i, j)]

  vij2 <- wide_triangular[, setNames(.SD, paste0(vars_both, "_ji")),
                          .SDcols = paste0(vars_both, "_ij")]
  vji2 <- wide_triangular[, setNames(.SD, paste0(vars_both, "_ij")),
                          .SDcols = paste0(vars_both, "_ji")]
  ij2 <- wide_triangular[, .(i = j, j = i)]

  d1 <- cbind(ij1, vij1, vji1)
  d2 <- cbind(ij2, vij2, vji2)

  if (length(vars_symmetric) > 0) {
    vij_sym <- wide_triangular[, setNames(.SD, paste0(vars_symmetric, "_ij")),
                               .SDcols = paste0(vars_symmetric, "_ij")]
    d1 <- cbind(d1, vij_sym)
    d2 <- cbind(d2, vij_sym)
  }

  d <- rbind(d1, d2)
  setkey(d, i, j)

  d
}

#' Make new variables that for entry i,j contains the j,i values of given variables
#'
#' data table is updated by reference,
#' new variable names are generated by adding "_ji" subscript
#'
#' @param wide data table with link data
#' @param var_names names of variables to copy
#'
#' @return view on update data table
copy_ij_value_to_ji <- function(wide, var_names) {

  wide_ji <- copy(wide[, c("i", "j", var_names), with = FALSE])
  setkey(wide_ji, j, i)
  var_names_new <- sprintf("%s_ji", gsub("(.+)\\_ij", "\\1", var_names, perl = TRUE))

  for (k in seq_along(var_names)) {
    wide[, (var_names_new[[k]]) := wide_ji[[var_names[[k]] ]] ]
  }
}
adzemski/netprobitFE documentation built on May 17, 2019, 11:40 a.m.