knitr::opts_chunk$set(echo = TRUE)
library(DataComputing)
library(printr)
Sys.setlocale('LC_ALL','C')

Golbal Configuration

filePath <- "~/Desktop/morningstar.csv"
sysKeyword <- "(Summary|Percentile|Sum|Average|Count|Maximum|Minimum|Median|Deviation)"
msciKeyword <-"MSCI"
ftseKeyword <-"(FTSE|Russell)"
stoxxKeyword <-"STOXX"
spdjKeyword <-"(S&P|Dow|DJ)"
esgKeywords <- '(Sustain|ESG|esg|SRI|sri|Social|Governance|Catholic|Ethical)'
envKeywords <-'(Water|Carbon|Climate|Enviro|Green|Energy|Renew|Tech|Fossil|Alternative|Clean|Fuel|Pollution|Prevention)'
blackrockKeyword <- '(iShare|BlackRock|BLK|Blackrock)'
sumF <- function(vec) {sum(as.numeric(gsub(",", "", as.character(vec))), na.rm=TRUE)}
toP <- function(c,d) {paste(as.character(c), "(",as.character(round(d*100,digits=1)),"%)",sep="")}
"^" <- function(x,y) ifelse(y==0,0,base:::"/"(x,y))
provider <- c("MSCI","FTSE Russell","STOXX","SPDJ")
esgIndexTotal <- c(82,5,35,67)
envIndexTotal <- c(7,34,5,11)
myPalette <- c("#722287","#0079C1","#009941","#002960")
fillTheme <- scale_fill_manual(values=myPalette)
colorTheme <- scale_color_manual(values=myPalette)
rawTable <- read.csv(filePath, header=TRUE, sep=",")
universe <- rawTable[!duplicated(rawTable[c('Name')]),] %>%
  filter(Name!="") %>%
  filter(!is.na(Oldest.Share.Class)) %>%
  filter(!grepl(sysKeyword, Name))
MSCI <- universe %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
FTSE <- universe %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
STOXX <- universe %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
SPDJ <- universe %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))

fundsCount <- c(length(MSCI$Name),length(FTSE$Name),length(STOXX$Name),length(SPDJ$Name))
fundsPercent <- fundsCount/sum(fundsCount)
AUM <- c(sumF(MSCI$Fund.Size.USD),sumF(FTSE$Fund.Size.USD),sumF(STOXX$Fund.Size.USD),sumF(SPDJ$Fund.Size.USD))
countTable <- data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent, AUM=AUM)

\newpage

Analysis of funds tracking each provider

r countTable

Count bar chart

countTable %>%
  ggplot(aes_string(x='provider', y='fundsCount', fill='provider')) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=toP(fundsCount,fundsPercent))) +
  fillTheme

AUM bar chart

countTable %>%
  ggplot(aes(x=provider, y=AUM, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=toP(paste(as.character(round(AUM/1000000000, digits=2)),"B", sep=""), AUM/sum(AUM)))) +
  fillTheme

Count pie chart

countTable %>%
  ggplot(aes(x=factor(1), y=fundsPercent, fill=provider)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar(theta="y") + 
  theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank()) +
  fillTheme

AUM pie chart

countTable %>%
  ggplot(aes(x=factor(1), y=AUM/sum(AUM), fill=provider)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar(theta="y") + 
  theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank()) +
  fillTheme

\newpage

Analysis of funds tracking ESG indexes of each provider

universeesg <- universe %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
MSCIesg <- MSCI %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
FTSEesg <- FTSE %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
STOXXesg <- STOXX %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))
SPDJesg <- SPDJ %>% filter(grepl(esgKeywords,Primary.Prospectus.Benchmark))

esgCount <- c(length(MSCIesg$Name),length(FTSEesg$Name),length(STOXXesg$Name),length(SPDJesg$Name))
esgPercent <- esgCount/sum(esgCount)
esgAUM <- c(sumF(MSCIesg$Fund.Size.USD),sumF(FTSEesg$Fund.Size.USD),sumF(STOXXesg$Fund.Size.USD),sumF(SPDJesg$Fund.Size.USD))
esgTable <- data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent, AUM=AUM, esgCount=esgCount, esgPercent=esgPercent, esgAUM=esgAUM, esgRatio=esgCount/fundsCount, aumRatio=esgAUM/AUM)

r select(esgTable, provider, esgCount, esgPercent, esgAUM)

Count bar chart

esgTable %>%
  ggplot(aes(x=provider, y=esgCount, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=toP(esgCount, esgPercent))) +
  fillTheme

AUM bar comparison

esgTable %>%
  ggplot(aes(x=provider, y=esgAUM, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=toP(paste(as.character(round(esgAUM/1000000000, digits=2)),"B", sep=""), esgAUM/sum(esgAUM)))) +
  fillTheme

Count pie chart

esgTable %>%
  ggplot(aes(x=factor(1), y=esgPercent, fill=provider)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar(theta="y") + 
  theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank()) +
  fillTheme

AUM pie chart

esgTable %>%
  ggplot(aes(x=factor(1), y=esgAUM/sum(esgAUM), fill=provider)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar(theta="y") + 
  theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank()) +
  fillTheme

\newpage

Percentage of funds tracking ESG indexes for each provider

total tracking ESG count: r sum(esgTable$esgCount)

total funds count: r sum(esgTable$fundsCount)

percentage count of total: r sum(esgTable$esgCount)/sum(esgTable$fundsCount)

percentage AUM of total: r sum(esgTable$esgAUM)/sum(esgTable$AUM)

Count ratio for each provider

esgTable %>%
  ggplot(aes(x=provider, y=esgRatio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(esgRatio, digits=4))) +
  fillTheme

AUM ratio for each provider

esgTable %>%
  ggplot(aes(x=provider, y=aumRatio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(aumRatio, digits=4))) +
  fillTheme

\newpage

Analysis of funds tracking Environment indexes of each provider

universeenv <- universe %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
MSCIenv <- MSCI %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
FTSEenv <- FTSE %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
STOXXenv <- STOXX %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))
SPDJenv <- SPDJ %>% filter(grepl(envKeywords,Primary.Prospectus.Benchmark))

envCount <- c(length(MSCIenv$Name),length(FTSEenv$Name),length(STOXXenv$Name),length(SPDJenv$Name))
envPercent <- envCount/sum(envCount)
envAUM <- c(sumF(MSCIenv$Fund.Size.USD),sumF(FTSEenv$Fund.Size.USD),sumF(STOXXenv$Fund.Size.USD),sumF(SPDJenv$Fund.Size.USD))
envTable <- data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent, AUM=AUM, envCount=envCount, envPercent=envPercent, envAUM=envAUM, envRatio=envCount/fundsCount, aumRatio=envAUM/AUM)

r select(envTable, provider, envCount, envPercent, envAUM)

Count bar chart

envTable %>%
  ggplot(aes(x=provider, y=envCount, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=toP(envCount, envPercent))) +
  fillTheme

AUM bar chart

envTable %>%
  ggplot(aes(x=provider, y=envAUM, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=toP(paste(as.character(round(envAUM/1000000000, digits=2)),"B", sep=""), envAUM/sum(envAUM)))) +
  fillTheme

Count pie chart

envTable %>%
  ggplot(aes(x=factor(1), y=envPercent, fill=provider)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar(theta="y") + 
  theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank()) +
  fillTheme

AUM pie chart

envTable %>%
  ggplot(aes(x=factor(1), y=envAUM/sum(envAUM), fill=provider)) + 
  geom_bar(width=1, stat="identity") + 
  coord_polar(theta="y") + 
  theme(axis.text=element_blank(), axis.title=element_blank(), panel.background=element_blank()) +
  fillTheme

\newpage

Percentage of Environment funds tracking Environment indexes for each provider

total tracking Environment count: r sum(envTable$envCount)

total funds count: r sum(envTable$fundsCount)

percentage count of total: r sum(envTable$envCount)/sum(envTable$fundsCount)

percentage AUM of total: r sum(envTable$envAUM)/sum(envTable$AUM)

Count ratio for each provider

envTable %>%
  ggplot(aes(x=provider, y=envRatio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(envRatio, digits=4))) +
  fillTheme

AUM ratio for each provider

envTable %>%
  ggplot(aes(x=provider, y=aumRatio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(aumRatio, digits=4))) +
  fillTheme

\newpage

ESG & Environment cross comparison

ESG/Environment (# of Indexes) Ratio

data.frame(esg=esgIndexTotal, env=envIndexTotal, ratio=esgIndexTotal/envIndexTotal) %>%
  ggplot(aes(x=provider, y=ratio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(ratio, digits=1))) +
  fillTheme + ylim(0, 12)

ESG/Environment (# of Indexes In Use) Ratio

esgTable %>%
  mutate(ratio=esgCount^envCount) %>%
  ggplot(aes(x=provider, y=ratio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(ratio, digits=1))) +
  fillTheme + ylim(0, 12)

ESG/Environment (% of Indexes In Use) Ratio

esgTable %>%
  mutate(ratio=(esgCount/esgIndexTotal)^(envCount/envIndexTotal)) %>%
  ggplot(aes(x=provider, y=ratio, fill=provider)) + 
  geom_bar(stat="identity") +
  geom_label(fill='white',aes(label=round(ratio, digits=1))) +
  fillTheme + ylim(0, 12)

\newpage

Function 1: get timeseries given any universe and keyword

getYearCountForUniverse <- function(universe, keyword="", yearLimit=1930) {
  dateU <- universe %>%
    filter(grepl(keyword, Name)) %>%
    filter(grepl("/",as.character(Inception.Date))) %>%
    mutate(date=as.character(Inception.Date))
  yearVec <- dateU$date
  for (i in 1:length(yearVec)) {
    d <- yearVec[i]
    st <- substring(d, nchar(d)-1, nchar(d))
    if (as.numeric(st) < 17) {
      yearVec[i] <- as.numeric(paste("20", st, sep=""))
    } else {
      yearVec[i] <- as.numeric(paste("19", st, sep=""))
    }
  }
  yearVec <- as.numeric(yearVec)
  yearDF <- data.frame(year=yearVec) %>%
    group_by(year) %>%
    filter(year >= yearLimit) %>%
    summarize(count=n())
  return(yearDF)
}

plotTimeSeriesForUniverse <- function(universe, keyword="",  yearLimit=1930) {

  getYearCountForUniverse(universe, keyword, yearLimit) %>%
    ggplot(aes(x=year, y=count)) + 
    geom_line() + 
    scale_x_continuous(breaks=seq(yearLimit, 2016, 5)) +
    colorTheme
}

plotTimeSeriesForAllScope <- function(universe, esgU, envU, keyword="",  yearLimit=1930) {
  yearDF <- getYearCountForUniverse(universe, keyword, yearLimit)
  esgDF <- getYearCountForUniverse(esgU, keyword, yearLimit)
  envDF <- getYearCountForUniverse(envU, keyword, yearLimit)
  colnames(esgDF) <- c('year', 'esg')
  colnames(envDF) <- c('year', 'env')
  fullYears <- yearDF %>%
    full_join(esgDF, by="year") %>%
    full_join(envDF, by="year")
  fullYears[is.na(fullYears)] <- 0
  fullYearsNarrow <- gather(fullYears, year)
  colnames(fullYearsNarrow) <- c("year", "type", "count")
  fullYearsNarrow %>%
    filter(year >= yearLimit) %>%
    ggplot(aes(x=year, y=count, group=type, col=type)) + 
    geom_line() +
    colorTheme
}

plotTimeSeriesForEsgEnv <- function(esgU, envU, keyword="",  yearLimit=1930) {
  esgDF <- getYearCountForUniverse(esgU, keyword, yearLimit)
  envDF <- getYearCountForUniverse(envU, keyword, yearLimit)
  colnames(esgDF) <- c('year', 'esg')
  colnames(envDF) <- c('year', 'env')
  fullYears <- esgDF %>%
    full_join(envDF, by="year")
  fullYears[is.na(fullYears)] <- 0
  fullYearsNarrow <- gather(fullYears, year)
  colnames(fullYearsNarrow) <- c("year", "type", "count")
  fullYearsNarrow %>%
    filter(year >= yearLimit) %>%
    ggplot(aes(x=year, y=count, group=type, col=type)) + 
    geom_line() +
    colorTheme
}

plotTimeSeriesForProviders <- function(MSCI, FTSE, SPDJ, STOXX, keyword="",  yearLimit=1930) {
  msciDF <- getYearCountForUniverse(MSCI, keyword, yearLimit)
  ftseDF <- getYearCountForUniverse(FTSE, keyword, yearLimit)
  spdjDF <- getYearCountForUniverse(SPDJ, keyword, yearLimit)
  stoxxDF <- getYearCountForUniverse(STOXX, keyword, yearLimit)
  colnames(msciDF) <- c('year', 'MSCI')
  colnames(ftseDF) <- c('year', 'FTSE')
  colnames(spdjDF) <- c('year', 'SPDJ')
  colnames(stoxxDF) <- c('year', 'STOXX')
  fullYears <- msciDF %>%
    full_join(ftseDF, by="year") %>%
    full_join(spdjDF, by="year") %>%
    full_join(stoxxDF, by="year")
  fullYears[is.na(fullYears)] <- 0
  fullYearsNarrow <- gather(fullYears, year)
  colnames(fullYearsNarrow) <- c("year", "type", "count")
  fullYearsNarrow %>%
    filter(year >= yearLimit) %>%
    ggplot(aes(x=year, y=count, group=type, col=type)) + 
    geom_line() +
    colorTheme
}

example: universe time series

plotTimeSeriesForAllScope(universe, universeesg, universeenv)

example: ESG vs ENV time series

plotTimeSeriesForEsgEnv(universeesg, universeenv)

example: MSCI ESG vs ENV time series

plotTimeSeriesForEsgEnv(MSCIesg, MSCIenv)

example: all provider time series

 plotTimeSeriesForProviders(MSCI, FTSE, SPDJ, STOXX, yearLimit=1980)

\newpage

Function 2: check any keyword's distribution of index provider

institutionChoice <- function(keyword, universe) {
  companyUniverse <- universe %>% filter(grepl(keyword, Name))
  MSCI <- companyUniverse %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  FTSE <- companyUniverse %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  STOXX <- companyUniverse %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  SPDJ <- companyUniverse %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  fundsCount <- c(length(MSCI$Name),length(FTSE$Name),length(STOXX$Name),length(SPDJ$Name))
  fundsPercent <- fundsCount/sum(fundsCount)
  countTable <- data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent)
  return(countTable)
}

plotInstitutionChoice <- function(keyword, universe) {
  companyUniverse <- universe %>% filter(grepl(keyword, Name))
  MSCI <- companyUniverse %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  FTSE <- companyUniverse %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  STOXX <- companyUniverse %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  SPDJ <- companyUniverse %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  fundsCount <- c(length(MSCI$Name),length(FTSE$Name),length(STOXX$Name),length(SPDJ$Name))
  fundsPercent <- fundsCount/sum(fundsCount)
  data.frame(provider=provider, fundsCount=fundsCount, fundsPercent=fundsPercent) %>%
    ggplot(aes(x=provider, y=fundsCount, fill=provider)) + 
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=toP(fundsCount, fundsPercent))) +
    fillTheme
}

getInstitutionChoice <- function(keyword, universe) {
  companyUniverse <- universe %>% filter(grepl(keyword, Name))
  MSCI <- companyUniverse %>% filter(grepl(msciKeyword,Primary.Prospectus.Benchmark))
  FTSE <- companyUniverse %>% filter(grepl(ftseKeyword,Primary.Prospectus.Benchmark))
  STOXX <- companyUniverse %>% filter(grepl(stoxxKeyword,Primary.Prospectus.Benchmark))
  SPDJ <- companyUniverse %>% filter(grepl(spdjKeyword,Primary.Prospectus.Benchmark))
  return(list(MSCI=MSCI, FTSE=FTSE, STOXX=STOXX, SPDJ=SPDJ))
}

exsample: BlackRock

institutionChoice(blackrockKeyword, universe)
plotInstitutionChoice(blackrockKeyword, universe)

exsample: BlackRock ESG

plotInstitutionChoice(blackrockKeyword, universeesg)

exsample: BlackRock ESG

institutionChoice(blackrockKeyword, universeesg)
plotInstitutionChoice(blackrockKeyword, universeesg)

exsample: ETF

institutionChoice('ETF', universe)
plotInstitutionChoice('ETF', universe)

\newpage

Function 3: check any company's index choice provider popularity

plotUniverseByBrandName <- function(universe, floor=5) {
  universe %>%
    group_by(Branding.Name) %>%
    summarize(count=n()) %>%
    filter(count >= floor) %>%
    ggplot(aes(x=Branding.Name, y=count, fill=Branding.Name)) +
    geom_bar(stat="identity") +
    geom_label(fill='white',aes(label=count)) +
    theme(axis.text.x = element_text(angle = 45, hjust = 1))
}

exsample: universe plot

plotUniverseByBrandName(universe, floor=30)

exsample: universe ESG plot

plotUniverseByBrandName(universeesg, floor=2)

exsample: universe ENV plot

plotUniverseByBrandName(universeenv, floor=0)

exsample: MSCI plot

plotUniverseByBrandName(MSCI, floor=10)

exsample: MSCI ESG plot

plotUniverseByBrandName(MSCIesg, floor=0)


yezhaoqin/morningStarAnalytics documentation built on May 4, 2019, 2:32 p.m.