inst/scripts/ie-data2.R

#' Investment Executive Weighted Means 
#' 
#' Create means, by company, where the statement response is weighted
#' by the importance of the statement
#' 
#' @author Brandon Bertelsen, Research Director, Credo Consulting

library(dplyr)
library(tidyr)
setwd("../Dropbox/cross_pc/ie/")
d <- read.csv(
  "donneesbrandon/CSV/Top des courtiers québécois 2018.csv", 
  na.strings = c("s. o.","s.o."), 
  stringsAsFactors = FALSE, 
  check.names = FALSE, encoding = "UTF-8")

firm_cols <- grep("Firme", names(d))
importance_cols <- grep("Importance|importance", d[1,])
comments <- grep("Commentaires|Open-Ended|commentaires", d[1,])
names(d)[importance_cols] <- names(d)[importance_cols-1]

importance <- d[c(firm_cols, importance_cols)]
importance <- importance[-1,]
importance$rowid <- 1:nrow(importance)
names(importance)[1] <- "Firme"
importance <- na.omit(gather(importance, Firme, rowid))
names(importance) <- c("Firme","rowid","Statement","weight")

importance <- separate(importance, Statement, into = c('Statement','Type'), sep = " - ") %>% select(-Type)
# importance$Type <- NULL

answers <- d[-c(1:11, importance_cols, comments)]
answers <- answers[-1,]
answers$rowid <- 1:nrow(answers)
names(answers)[1] <- "Firme"
answers <- na.omit(gather(answers, Firme, rowid))
names(answers) <- c("Firme","rowid","Statement","response")
answers$Statement <- gsub("Importance accordée.1", "Importance accordée", answers$Statement, fixed = TRUE)
#answers <- separate(answers, Statement, into = c('Statement','Type'), sep = " - ") %>% select(-Type)

left_join(answers, importance, by = c("Firme","rowid","Statement")) %>% 
  group_by(Firme, Statement) %>% 
  mutate(response = as.integer(response),
         weight = as.integer(weight)) %>% 
  na.omit %>% 
  summarize(n = length(response), wmean = weighted.mean(x = response, w = weight, na.rm = TRUE)) %>% 
  write.csv("Weighted Means by Company and Statement.csv")
credoinc/credoc documentation built on May 23, 2019, 8:39 a.m.