#' Compute a score of demographic difference using the procedure suggested by Li and Hambrick (2005) for the study of faultlines.
#' @param x dataframe. Table informing about team members' characteristics (all team members should belong to the same team), numeric or binary. Ideally, all values should be scaled between 0 and 1 before running the analysis.
#' @param subgroup character. Name of the variable indicating subgroups memberships.
#' @return A dataframe with one row (for the whole group) containing the index of demographic difference (DemDiff).
#' @references Li, J., and D. C. Hambrick. 2005. Factional groups: A new vantage on demographic faultlines, conflict, and disintegration in work teams. Academy of Management Journal 48 (5): 794–813.
#' @examples
#' library(construct)
#' data("fictiveteams")
#' # prepare the data: group the observations per team, scale between 0 and 1, identify subgroups
#' library(dplyr)
#' library(tidyr)
#' library(purrr)
#' fl <- fictiveteams %>%
#' group_by(team) %>%
#' nest() %>%
#' mutate(data = map(data, make_0_to_1)) %>%
#' mutate(subgp = map(data, find_subgroups, gpnbr = 2)) %>%
#' unnest()
#' # Compute demographic differences
#' fl %>%
#' group_by(team) %>%
#' nest() %>%
#' mutate(data = map(data, faultlines_demdiff, subgroup = "subgroup")) %>%
#' unnest()
#' @seealso make_0_to_1
#' @seealso find_subgroups
#' @importFrom dplyr %>%
#' @importFrom dplyr select
#' @importFrom dplyr everything
#' @importFrom dplyr summarise_all
#' @importFrom dplyr group_by
#' @importFrom dplyr mutate
#' @importFrom dplyr mutate_if
#' @importFrom dplyr left_join
#' @importFrom tidyr gather
#' @importFrom tidyr spread
#' @export
faultlines_demdiff <- function(x, subgroup){
# bind variables
attribute <- NULL
meanGp <- NULL
meanSubGp <- NULL
gp0 <- NULL
gp1 <- NULL
sd <- NULL
sdSubGp <- NULL
diffMean <- NULL
prodSD <- NULL
x <- x %>%
dplyr::select(subgroup = subgroup, everything()) %>%
mutate_if(is.factor, function(x) as.integer(as.character(x)))
# Find the average values for each variable
center <- x %>%
summarise_all(mean) %>%
gather(attribute, meanGp)
# Find for each variable the centroid of each group
centroids <- x %>%
group_by(subgroup) %>%
summarise_all(mean) %>%
gather(attribute, meanSubGp, -subgroup)
# Compute de Demographic Difference index of Li and Hambrick
if (length(unique(centroids$subgroup)) > 1){
#Compute the distances between subgroup centroids for each variable
centroidsMeanDiff <- centroids %>%
mutate(subgroup = paste0("gp", subgroup)) %>%
spread(subgroup, meanSubGp, fill = NA) %>%
mutate(diff = (gp0-gp1)^2) %>%
dplyr::select(attribute, diffMean = diff)
# Compute within subgroup standard deviations on each attribute and compute the demographic distance
dem_diff <- x %>%
group_by(subgroup) %>%
summarise_all(stats::sd) %>%
gather(attribute, sdSubGp, -subgroup) %>%
mutate(subgroup = paste0("gp", subgroup)) %>%
spread(subgroup, sdSubGp, fill = 0) %>%
mutate(prod = ((gp0*gp1)/2)+1) %>%
dplyr::select(attribute, prodSD = prod) %>%
left_join(centroidsMeanDiff, by = "attribute") %>%
mutate(dem_diff = diffMean / prodSD) %>%
dplyr::select(dem_diff) %>%
colMeans() %>%
as.vector()
} else dem_diff <- as.vector(NA)
names(dem_diff) <- c("DemDiff")
results <- data.frame(as.list(c(dem_diff)))
return(results)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.