R/ESGspecific.R

Defines functions generateDataTables sumFactor toPercentString getFundsByBenchmarkInfoTable plotFundsStatsByBenchmarkBar plotFundsStatsByBenchmarkPie getUniverseComparisonInfo plotRatiobyProviders plotRatioTwoVectors plotUniverseByBrandName getInstitutionChoiceList getInstitutionChoiceCountTable plotInstitutionChoice getYearCountForUniverse plotTimeSeriesForUniverse plotTimeSeriesForAllScope plotTimeSeriesForEsgEnv plotTimeSeriesForProviders

# ---- demo ----
generateDataTables <- function(csvFilePath,
                               sysKeyword="(Summary|Percentile|Sum|Average|Count|Maximum|Minimum|Median|Deviation)",
                               msciKeyword="MSCI",
                               ftseKeyword="(FTSE|Russell)",
                               stoxxKeyword="STOXX",
                               spdjKeyword="(S&P|Dow|DJ)",
                               esgKeywords="(Sustain|ESG|esg|SRI|sri|Social|Governance|Catholic|Ethical)",
                               envKeywords="(Water|Carbon|Climate|Enviro|Green|Energy|Renew|Tech|Fossil|Alternative|Clean|Fuel|Pollution|Prevention)",
                               esgIndexTotal=c(82,5,35,67),
                               envIndexTotal=c(7,34,5,11),
                               provider=c("MSCI","FTSE Russell","STOXX","SPDJ")
                               ) {
  result <- list()
  rawTable <- read.csv(csvFilePath, header=TRUE, sep=",")
  result$universe <- rawTable[!duplicated(rawTable[c('Name')]),] %>%
    filter(Name!="") %>%
    filter(!is.na(Oldest.Share.Class)) %>%
    filter(!grepl(sysKeyword, Name))

  result$MSCI <- result$universe %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  result$FTSE <- result$universe %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  result$STOXX <- result$universe %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  result$SPDJ <- result$universe %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  result$universeEsg <- result$universe %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
  result$MSCIesg <- result$MSCI %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
  result$FTSEesg <- result$FTSE %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
  result$STOXXesg <- result$STOXX %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
  result$SPDJesg <- result$SPDJ %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
  result$universeEnv <- result$universe %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
  result$MSCIenv <- result$MSCI %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
  result$FTSEenv <- result$FTSE %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
  result$STOXXenv <- result$STOXX %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
  result$SPDJenv <- result$SPDJ %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
  return(result)
}

sumFactor <- function(vec) {
  sum(as.numeric(gsub(",", "", as.character(vec))), na.rm=TRUE)
}

toPercentString <- function(c,d) {
  paste(as.character(c), "(",as.character(round(d*100,digits=1)),"%)",sep="")
}

getFundsByBenchmarkInfoTable <- function(universe, MSCI, FTSE, STOXX, SPDJ, provider=c("MSCI","FTSE Russell","STOXX","SPDJ")) {
  fundsCount <- c(length(MSCI$Name),length(FTSE$Name),length(STOXX$Name),length(SPDJ$Name))
  fundsPercent <- fundsCount/sum(fundsCount)
  AUM <- c(sumFactor(MSCI$Fund.Size.USD),sumFactor(FTSE$Fund.Size.USD),sumFactor(STOXX$Fund.Size.USD),sumFactor(SPDJ$Fund.Size.USD))
  countTable <- data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent, AUM=AUM)
}

plotFundsStatsByBenchmarkBar <- function(fundInfoTable, colName) {
  fundInfoTable %>%
    ggplot(aes_string(x="provider", y=colName, fill="provider")) +
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=toPercentString(fundsCount,fundsPercent)),fill='white')
}

plotFundsStatsByBenchmarkPie <- function(fundInfoTable, colName) {
  fundInfoTable %>%
    ggplot(aes_string(y=colName, fill="provider")) +
    geom_bar(width=1, stat="identity", aes(x=factor(1))) +
    coord_polar(theta="y") +
    theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank())
}


getUniverseComparisonInfo <- function(infoTable1, infoTable2) {
  result <- data.frame(totalCount=c(sum(infoTable1$fundsCount), sum(infoTable2$fundsCount)),
                       totalAUM=c(sum(infoTable1$AUM), sum(infoTable2$AUM)),
                       ratio=c(sum(infoTable1$fundsCount)/sum(infoTable2$fundsCount),
                               sum(infoTable1$AUM)/sum(infoTable2$AUM)
                               )
                       )
  return(result)
}

plotRatiobyProviders <- function(infoTable1, infoTable2, compareCol) {
    data.frame(ratio=infoTable1[,compareCol] / infoTable2[,compareCol], provider=infoTable1$provider) %>%
    ggplot(aes(x=provider, y=ratio, fill=provider)) +
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=round(ratio, digits=1)))
}

plotRatioTwoVectors <- function(colNames, vec1, vec2) {
  data.frame(provider=colNames, ratio=vec1/vec2) %>%
    ggplot(aes(x=provider, y=ratio, fill=provider)) +
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=round(ratio, digits=1)))
}


plotUniverseByBrandName <- function(universe, floor=5) {
  universe %>%
    group_by(Branding.Name) %>%
    summarize(count=n()) %>%
    filter(count >= floor) %>%
    ggplot(aes(x=Branding.Name, y=count, fill=Branding.Name)) +
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=count)) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}


getInstitutionChoiceList <- function(keyword, universe, msciKeyword="MSCI",
                                     ftseKeyword="(FTSE|Russell)",
                                     stoxxKeyword="STOXX",
                                     spdjKeyword="(S&P|Dow|DJ)",
                                     provider=c("MSCI","FTSE Russell","STOXX","SPDJ")) {
  companyUniverse <- universe %>% filter(grepl(keyword, Name))
  MSCI <- companyUniverse %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  FTSE <- companyUniverse %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  STOXX <- companyUniverse %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  SPDJ <- companyUniverse %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  return(list(MSCI=MSCI, FTSE=FTSE, STOXX=STOXX, SPDJ=SPDJ))
}

getInstitutionChoiceCountTable <- function(keyword, universe, msciKeyword="MSCI",
                                           ftseKeyword="(FTSE|Russell)",
                                           stoxxKeyword="STOXX",
                                           spdjKeyword="(S&P|Dow|DJ)",
                                           provider=c("MSCI","FTSE Russell","STOXX","SPDJ")) {
  companyUniverse <- universe %>% filter(grepl(keyword, Name))
  MSCI <- companyUniverse %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  FTSE <- companyUniverse %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  STOXX <- companyUniverse %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  SPDJ <- companyUniverse %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  fundsCount <- c(length(MSCI$Name),length(FTSE$Name),length(STOXX$Name),length(SPDJ$Name))
  fundsPercent <- fundsCount/sum(fundsCount)
  countTable <- data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent)
  return(countTable)
}

plotInstitutionChoice <- function(keyword, universe, msciKeyword="MSCI",
                                  ftseKeyword="(FTSE|Russell)",
                                  stoxxKeyword="STOXX",
                                  spdjKeyword="(S&P|Dow|DJ)",
                                  provider=c("MSCI","FTSE Russell","STOXX","SPDJ")) {
  companyUniverse <- universe %>% filter(grepl(keyword, Name))
  MSCI <- companyUniverse %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  FTSE <- companyUniverse %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  STOXX <- companyUniverse %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  SPDJ <- companyUniverse %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  fundsCount <- c(length(MSCI$Name),length(FTSE$Name),length(STOXX$Name),length(SPDJ$Name))
  fundsPercent <- fundsCount/sum(fundsCount)
  data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent) %>%
    ggplot(aes(x=provider, y=fundsCount, fill=provider)) +
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=toPercentString(fundsCount, fundsPercent)))
}

getYearCountForUniverse <- function(universe, keyword="", yearLimit=1930) {
  dateU <- universe %>%
    filter(grepl(keyword, Name)) %>%
    filter(grepl("/",as.character(Inception.Date))) %>%
    mutate(date=as.character(Inception.Date))
  yearVec <- dateU$date
  for (i in 1:length(yearVec)) {
    d <- yearVec[i]
    st <- substring(d, nchar(d)-1, nchar(d))
    if (as.numeric(st) < 17) {
      yearVec[i] <- as.numeric(paste("20", st, sep=""))
    } else {
      yearVec[i] <- as.numeric(paste("19", st, sep=""))
    }
  }
  yearVec <- as.numeric(yearVec)
  yearDF <- data.frame(year=yearVec) %>%
    group_by(year) %>%
    filter(year >= yearLimit) %>%
    summarize(count=n())
  return(yearDF)
}

plotTimeSeriesForUniverse <- function(universe, keyword="",  yearLimit=1930) {
  getYearCountForUniverse(universe, keyword, yearLimit) %>%
    ggplot(aes(x=year, y=count)) +
    geom_line() +
    scale_x_continuous(breaks=seq(yearLimit, 2016, 5))
}

plotTimeSeriesForAllScope <- function(universe, esgU, envU, keyword="",  yearLimit=1930) {
  yearDF <- getYearCountForUniverse(universe, keyword, yearLimit)
  esgDF <- getYearCountForUniverse(esgU, keyword, yearLimit)
  envDF <- getYearCountForUniverse(envU, keyword, yearLimit)
  colnames(esgDF) <- c('year', 'esg')
  colnames(envDF) <- c('year', 'env')
  fullYears <- yearDF %>%
    full_join(esgDF, by="year") %>%
    full_join(envDF, by="year")
  fullYears[is.na(fullYears)] <- 0
  fullYearsNarrow <- gather(fullYears, year)
  colnames(fullYearsNarrow) <- c("year", "type", "count")
  fullYearsNarrow %>%
    filter(year >= yearLimit) %>%
    ggplot(aes(x=year, y=count, group=type, col=type)) +
    geom_line()
}

plotTimeSeriesForEsgEnv <- function(esgU, envU, keyword="",  yearLimit=1930) {
  esgDF <- getYearCountForUniverse(esgU, keyword, yearLimit)
  envDF <- getYearCountForUniverse(envU, keyword, yearLimit)
  colnames(esgDF) <- c('year', 'esg')
  colnames(envDF) <- c('year', 'env')
  fullYears <- esgDF %>%
    full_join(envDF, by="year")
  fullYears[is.na(fullYears)] <- 0
  fullYearsNarrow <- gather(fullYears, year)
  colnames(fullYearsNarrow) <- c("year", "type", "count")
  fullYearsNarrow %>%
    filter(year >= yearLimit) %>%
    ggplot(aes(x=year, y=count, group=type, col=type)) +
    geom_line()
}

plotTimeSeriesForProviders <- function(MSCI, FTSE, SPDJ, STOXX, keyword="",  yearLimit=1930) {
  msciDF <- getYearCountForUniverse(MSCI, keyword, yearLimit)
  ftseDF <- getYearCountForUniverse(FTSE, keyword, yearLimit)
  spdjDF <- getYearCountForUniverse(SPDJ, keyword, yearLimit)
  stoxxDF <- getYearCountForUniverse(STOXX, keyword, yearLimit)
  colnames(msciDF) <- c('year', 'MSCI')
  colnames(ftseDF) <- c('year', 'FTSE')
  colnames(spdjDF) <- c('year', 'SPDJ')
  colnames(stoxxDF) <- c('year', 'STOXX')
  fullYears <- msciDF %>%
    full_join(ftseDF, by="year") %>%
    full_join(spdjDF, by="year") %>%
    full_join(stoxxDF, by="year")
  fullYears[is.na(fullYears)] <- 0
  fullYearsNarrow <- gather(fullYears, year)
  colnames(fullYearsNarrow) <- c("year", "type", "count")
  fullYearsNarrow %>%
    filter(year >= yearLimit) %>%
    ggplot(aes(x=year, y=count, group=type, col=type)) +
    geom_line()
}
yezhaoqin/morningStarAnalytics documentation built on May 4, 2019, 2:32 p.m.