R/credo.companycode.r

#' Recodes Mutual Fund Company Names in Canada from Verbatim Data
#' 
#' This function takes a character vector or factor, and recodes misspelled words
#' into the corrected company names. Uses pre-programmed find and replace.  
#' 
#' @return m A list containing recoded factors ($verbatims) and unmatched values ($unmatched)
#' @param verbatim A character vector or factor to be recoded
#' @param hold.na Logical, maintain NA values to keep ordering
#' @param other convert unmatched to "Other" or leave as is
#' @param year full year to extract from credobb3 companies list
#' @references Trim functionality courtesy of f3lix: \url{http://stackoverflow.com/a/2261149/170352}
#' @export credo.companycode
#' @examples 
#' verbatim <- c("CI","bmo","cibc","renaisance","vpi",NA,"Aim","why")
#' credo.companycode(verbatim)
#' credo.companycode(verbatim, TRUE)
#' credo.companycode(verbatim, TRUE, FALSE)

credo.companycode <- function(verbatim, hold.na = FALSE, other = FALSE,year = format(Sys.Date(),format = "%Y")) {
  trim <- function (x) gsub("^\\s+|\\s+$", "", x)
  # Courtesy of http://stackoverflow.com/a/2261149/170352
  m <- as.character(verbatim)
  m <- tolower(m)
  m <- trim(m)
  
  if (!hold.na) {
    m[which(m =="")] <- NA # Drop blanks
  }
  m[grep("aeg",m)] <- "Aegon Captial Management"
  m[grep("agf",m)] <- "AGF Investments"
  m[grep("aim",m)] <- "Invesco"
  m[grep("aic",m)] <- "AIC"
  m[grep("alta",m)] <- "National Bank"
  m[grep("altra",m)] <- "National Bank"
  m[grep("arrow",m)] <- "Arrow Capital"
  m[grep("acuity",m)] <- "AGF Investments"
  m[grep("afg",m)] <- "AGF Investments"
  m[grep("acut",m)] <- "AGF Investments"
  m[grep("aston",m)] <- "Aston Hill"
  m[grep("ashton",m)] <- "Aston Hill"
  m[grep("atb",m)] <- "ATB Financial"
  m[grep("b2b",m)] <- "B2B Bank"
  m[grep("b to b",m)] <- "B2B Bank"
  m[grep("bmo etfs",m)] <- "BMO ETFs"
  m[grep("bmo",m)] <- "BMO Mutual Funds"
  m[grep("bank of montreal",m)] <- "BMO Mutual Funds"
  m[grep("blackrock",m)] <- "iShares"
  m[grep("bullion",m)] <- "The Bullion Group"
  m[grep("beut",m)] <- "Beutel Goodman Investment Counsel"
  m[grep("buet",m)] <- "Beutel Goodman Investment Counsel"
  m[grep("brande",m)] <- "Bridgehouse-Brandes"
  m[grep("bridge",m)] <- "Bridgehouse-Brandes"
  m[grep("bridgehouse",m)] <- "Bridgehouse-Brandes"
  m[grep("^cam$",m)] <- "Renaissance"
  m[grep("canoe",m)] <- "Canoe Financial"
  m[grep("^canada",m)] <- "Canada Life Insurance"
  m[grep("canada life",m,fixed=TRUE)] <- "Canada Life Insurance"
  m[grep("canada vie",m,fixed=TRUE)] <- "Canada Life Insurance"
  m[grep("cibc",m)] <- "Renaissance"
  m[grep("ci$",m)] <- "CI Investments"
  m[grep("^ci",m)] <- "CI Investments"
  m[grep("c.i",m,fixed=TRUE)] <- "CI Investments"
  m[grep("c i",m,fixed=TRUE)] <- "CI Investments"
  m[grep("c. i.",m,fixed=TRUE)] <- "CI Investments"
  m[grep("ci investments",m,fixed=TRUE)] <- "CI Investments"
  m[grep("ci funds",m,fixed=TRUE)] <- "CI Investments"
  m[grep("clarin",m)] <- "IA Clarington"
  m[grep("connor",m)] <- "Connor, Clark & Lunn"
  m[grep("counsel",m)] <- "Counsel Portfolio Services"
  m[grep("consel",m)] <- "Counsel Portfolio Services"
  m[grep("compass",m)] <- "Compass"
  m[grep("credential",m)] <- "Credential"
  m[grep("dimen",m)] <- "Dimensional Fund Advisors"
  m[grep("desjar",m)] <- "Desjardins Financial"
  m[grep("^des",m)] <- "Desjardins Financial"
  m[grep("dominion",m)] <- "TD Mutual Funds"
  m[grep("dfa",m)] <- "Dimensional Fund Advisors"
  m[grep("dundee",m)] <- "Dundee Corporation"
  m[grep("^dynam",m)]  <- "Dynamic Funds"
  m[grep("edge",m)] <- "Edgepoint"
  m[grep("edward jones",m)] <- "Edward Jones"
  m[grep("empire",m)] <- "Empire Life"
  m[grep("ethical",m)] <- "NEI"
  m[grep("excel",m)] <- "Excel Funds"
  m[grep("equity asso",m)] <- "Equity Associates"
  m[grep("equitable",m)] <- "Equitable Life"
  m[grep("fid",m)] <- "Fidelity Investments"
  m[grep("fedelity",m)] <- "Fidelity Investments"
  m[grep("fieli",m)] <- "Fidelity Investments"
  m[grep("fiera",m)] <- "Fiera Capital"
  m[grep("first a",m)] <- "First Asset"
  m[grep("first t",m)] <- "First Trust ETFs"
  m[grep("freedom 55",m)] <- "Freedom 55"
  m[grep("frankli",m)] <- "Franklin Templeton"
  m[grep("front",m)] <- "Front Street"
  m[grep("ggof",m)] <- "BMO Mutual Funds"
  m[grep("growthwork",m)] <- "Matrix Asset Management"
  m[grep("great",m)] <- "Great West Life"
  m[grep("gwl",m)] <- "Great West Life"
  m[grep("guardian",m)] <- "BMO Mutual Funds"
  m[grep("harvest",m)] <- "Harvest Portfolios"
  m[grep("hesperian",m)] <- "Norrep Capital Management"
  m[grep("horizon",m)] <- "Horizons ETFs"
  m[grep("^ia$",m)] <- "IA Clarington"
  m[grep("ia clar",m,fixed=TRUE)] <- "IA Clarington"
  m[grep("industrial",m)] <- "IA Clarington"
  m[grep("industrielle alliance",m)] <- "IA Clarington"
  m[grep("industri",m)] <- "IA Clarington"
  m[grep("invesc",m)] <- "Invesco Trimark"
  m[grep("investco",m)] <- "Invesco Trimark"
  m[grep("investors",m)] <- "Investors Group"
  m[grep("ishares",m)] <- "iShares"
  m[grep("shares",m)] <- "iShares"
  m[grep("jov",m)] <- "Horizons ETFs"
  m[grep("kenzie",m)] <- "Mackenzie Financial"
  m[grep("london life",m)] <- "London Life"
  m[grep("lysand",m)] <- "Lysander Funds"
  m[grep("mack",m)] <- "Mackenzie Financial"
  m[grep("^mac.*zie$",m)] <- "Mackenzie Financial"
  m[grep("manu",m)] <- "Manulife Investments"
  m[grep("^man.*life$",m)] <- "Manulife Investments"
  m[grep("matrix",m)] <- "Matrix Asset Management"
  m[grep("macq",m)] <- "Macquarie"
  m[grep("mawer",m)] <- "Mawer"
  m[grep("merit",m)] <- "Meritas"
  m[grep("middlefield",m)] <- "Middlefield"
  m[grep("morning",m)] <- "Morningstar"
  m[grep("natix",m)] <- "Natixis GAM"
  m[grep("national",m)] <- "National Bank"
  m[grep("nbc",m)] <- "National Bank"
  m[grep("^nei",m)] <- "Northwest Ethical"
  m[grep("nex",m)] <- "Natixis GAM"
  m[grep("northwest",m)] <- "NEI"
  m[grep("^nei$",m)] <- "NEI"
  m[grep("norre",m)] <- "Norrep Funds"
  m[grep("o'leary",m,fixed=TRUE)] <- "O'Leary Funds"
  m[grep("pfsl",m)] <- "PFSL Investments"
  m[grep("pimc",m)] <- "PIMCO"
  m[grep("^picton",m)] <- "Picton Mahoney Asset Management"
  m[grep("phillips",m,fixed=TRUE)] <- "Phillips Hager North"
  m[grep("ph&n",m,fixed=TRUE)] <- "Phillips Hager North"
  m[grep("ph & n",m,fixed=TRUE)] <- "Phillips Hager North"
  m[grep("phn",m)] <- "Phillips Hager North"
  m[grep("^power",m)] <- "Powershares"
  m[grep("^purpose",m)] <- "Purpose Investments"
  m[grep("primerica",m)] <- "Primerica"
  m[grep("quadrus",m)] <- "Quadrus Investments"
  m[grep("queensbur",m)] <- "Queensbury Group"
  m[grep("^roi",m)] <- "ROI Capital"
  m[grep("rbc e",m)] <- "RBC ETFs"
  m[grep("rbc",m)] <- "RBC Mutual Funds"
  m[grep("reni",m)] <- "Renaissance"
  m[grep("renn",m)] <- "Renaissance"
  m[grep("russ",m)] <- "Russell Investments"
  m[grep("royal",m)] <- "RBC Mutual Funds"
  m[grep("scotia",m)] <- "Scotia Mutual Funds"
  m[grep("^sei",m)] <- "SEI"
  m[grep("sance",m)] <- "Renaissance"
  m[grep("sentry",m)] <- "Sentry Investments"
  m[grep("senty",m)] <- "Sentry Investments"
  m[grep("standar",m)] <- "Standard Life"
  m[grep("stone",m)] <- "Stone & Co Funds"
  m[grep("sprott",m)] <- "Sprott Asset Management"
  m[grep("sun",m)] <- "Sun Life Global Investments"
  m[grep("ssq",m)] <- "SSQ"
  m[grep("^td",m)] <- "TD Mutual Funds"
  m[grep("t.d.",m,fixed=TRUE)] <- "TD Mutual Funds"
  m[grep("td$",m)] <- "TD Mutual Funds"
  m[grep("temple",m)] <- "Franklin Templeton"
  m[grep("tempel",m)] <- "Franklin Templeton"
  m[grep("templ",m)] <- "Franklin Templeton"
  m[grep("transamerica",m)] <- "Transamerica"
  m[grep("trima",m)] <- "Invesco Trimark"
  m[grep("trim",m)] <- "Invesco Trimark"
  
  m[grep("vanguard",m)] <- "Vanguard ETFs"
  m[grep("value",m)] <- "Value Partner Investments"
  m[grep("vpi",m)] <- "Value Partner Investments"
  m[grep("rena",m)] <- "Renaissance"
  m[grep("^wis",m)] <- "Wisdom Tree"
  m[grep("world financial",m)] <- "World Financial Group Insurance Agency of Canada"
  
  o <- list()
  o$verbatims <- m
  
  # Unmatched
  unmatched <- which(!(substr(m,1,1) %in% LETTERS[1:26])&m!="iShares")
  
  o$unmatched <- m[unmatched]
  o$verbatims[unmatched] <- "Other"
  
  # Match it  with credobb3
  
  
  companies_bb3 <- c("AGF Investments", "Invesco Trimark", "BMO Mutual Funds", "Bridgehouse-Brandes", 
                     "CI Investments", "First Asset", "Desjardins Financial", "Dynamic Funds", 
                     "NEI", "Fidelity Investments", "Franklin Templeton", 
                     "Horizons ETFs", "Sun Life Global Investments", "IA Clarington", 
                     "iShares", "Mackenzie Financial", "Manulife Investments", "National Bank", 
                     "Phillips Hager North", "RBC Mutual Funds", "Renaissance", "Scotia Mutual Funds", 
                     "Sentry Investments", "TD Mutual Funds", "PIMCO", "BMO ETFs", 
                     "Excel Funds", "Mawer", "Russell Investments", "EdgePoint", "Vanguard ETFs", 
                     "Canoe Financial", "RBC ETFs", "Powershares", "First Trust ETFs", 
                     "Natixis GAM", "Purpose Investments")
  
  match_bb3 <- o$verbatims %in% companies_bb3
  
  if (other) {
    o$verbatims[!match_bb3] <- "Other"
  }else{
    o$verbatims[!match_bb3] <- NA
  }
  
  
  return(o)
}
credoinc/credoc documentation built on May 23, 2019, 8:39 a.m.