R/credo.companycode_old.r

#' Recodes Mutual Fund Company Names in Canada from Verbatim Data
#' 
#' This function takes a character vector or factor, and recodes misspelled words
#' into the corrected company names. Uses pre-programmed find and replace.  
#' 
#' @return m A list containing recoded factors ($verbatims) and unmatched values ($unmatched)
#' @param verbatim A character vector or factor to be recoded
#' @param hold.na Logical, maintain NA values to keep ordering
#' @param other convert unmatched to "Other" or leave as is
#' @references Trim functionality courtesy of f3lix: \url{http://stackoverflow.com/a/2261149/170352}
#' @examples 
#' verbatim <- c("CI","bmo","cibc","renaisance","vpi",NA,"Aim","why")
#' credo.companycode(verbatim)
#' credo.companycode(verbatim, TRUE)
#' credo.companycode(verbatim, TRUE, FALSE)

credo.companycode_old <- function(verbatim, hold.na = FALSE, other = TRUE) {
  trim <- function (x) gsub("^\\s+|\\s+$", "", x)
  # Courtesy of http://stackoverflow.com/a/2261149/170352
  m <- as.character(verbatim)
  m <- tolower(m)
  m <- trim(m)
  
  m[which(m =="")] <- NA # Drop blanks
  m[grep("aeg",m)] <- "Aegon Captial Management"
  m[grep("agf",m)] <- "AGF"
  m[grep("aim",m)] <- "Invesco"
  m[grep("aic",m)] <- "AIC"
  m[grep("alta",m)] <- "National Bank"
  m[grep("altra",m)] <- "National Bank"
  m[grep("arrow",m)] <- "Arrow Capital"
  m[grep("acuity",m)] <- "AGF"
  m[grep("afg",m)] <- "AGF"
  m[grep("acut",m)] <- "AGF"
  m[grep("aston",m)] <- "Aston Hill"
  m[grep("ashton",m)] <- "Aston Hill"
  m[grep("atb",m)] <- "ATB Financial"
  m[grep("b2b",m)] <- "B2B Bank"
  m[grep("b to b",m)] <- "B2B Bank"
  m[grep("bmo etfs",m)] <- "BMO ETFs"
  m[grep("bmo",m)] <- "BMO Mutual Funds"
  m[grep("bank of montreal",m)] <- "BMO Mutual Funds"
  m[grep("blackrock",m)] <- "BlackRock"
  m[grep("bullion",m)] <- "The Bullion Group"
  m[grep("beut",m)] <- "Beutel Goodman Investment Counsel"
  m[grep("buet",m)] <- "Beutel Goodman Investment Counsel"
  m[grep("brande",m)] <- "Bridgehouse"
  m[grep("bridge",m)] <- "Bridgehouse"
  m[grep("bridgehouse",m)] <- "Bridgehouse"
  m[grep("^cam$",m)] <- "Renaissance"
  m[grep("canoe",m)] <- "Canoe Financial"
  m[grep("^canada",m)] <- "Canada Life Insurance"
  m[grep("canada life",m,fixed=TRUE)] <- "Canada Life Insurance"
  m[grep("canada vie",m,fixed=TRUE)] <- "Canada Life Insurance"
  m[grep("cibc",m)] <- "Renaissance"
  m[grep("ci$",m)] <- "CI"
  m[grep("^ci",m)] <- "CI"
  m[grep("c.i",m,fixed=TRUE)] <- "CI"
  m[grep("c i",m,fixed=TRUE)] <- "CI"
  m[grep("c. i.",m,fixed=TRUE)] <- "CI"
  m[grep("ci investments",m,fixed=TRUE)] <- "CI"
  m[grep("ci funds",m,fixed=TRUE)] <- "CI"
  m[grep("clarin",m)] <- "IA Clarington"
  m[grep("connor",m)] <- "Connor, Clark & Lunn"
  m[grep("counsel",m)] <- "Counsel Portfolio Services"
  m[grep("consel",m)] <- "Counsel Portfolio Services"
  m[grep("compass",m)] <- "Compass"
  m[grep("credential",m)] <- "Credential"
  m[grep("dimen",m)] <- "Dimensional Fund Advisors"
  m[grep("desjar",m)] <- "Desjardins Financial"
  m[grep("^des",m)] <- "Desjardins Financial"
  m[grep("dominion",m)] <- "TD Mutual Funds"
  m[grep("dfa",m)] <- "Dimensional Fund Advisors"
  m[grep("dundee",m)] <- "Dundee Corporation"
  m[grep("dy",m)]  <- "Dynamic"
  m[grep("edge",m)] <- "Edgepoint"
  m[grep("edward jones",m)] <- "Edward Jones"
  m[grep("empire",m)] <- "Empire Life"
  m[grep("ethical",m)] <- "Northwest and Ethical"
  m[grep("excel",m)] <- "Excel Funds"
  m[grep("equity asso",m)] <- "Equity Associates"
  m[grep("equitable",m)] <- "Equitable Life"
  m[grep("fid",m)] <- "Fidelity"
  m[grep("fedelity",m)] <- "Fidelity"
  m[grep("fieli",m)] <- "Fidelity"
  m[grep("fiera",m)] <- "Fiera Capital"
  m[grep("first",m)] <- "First Asset"
  m[grep("freedom 55",m)] <- "Freedom 55"
  m[grep("frankli",m)] <- "Franklin Templeton"
  m[grep("front",m)] <- "Front Street"
  m[grep("ggof",m)] <- "BMO Mutual Funds"
  m[grep("growthwork",m)] <- "Matrix Asset Management"
  m[grep("great",m)] <- "Great West Life"
  m[grep("gwl",m)] <- "Great West Life"
  m[grep("guardian",m)] <- "BMO Mutual Funds"
  m[grep("harvest",m)] <- "Harvest Portfolios"
  m[grep("hesperian",m)] <- "Norrep Capital Management"
  m[grep("horizon",m)] <- "Horizons ETFs"
  m[grep("^ia$",m)] <- "IA Clarington"
  m[grep("ia clar",m,fixed=TRUE)] <- "IA Clarington"
  m[grep("industrial",m)] <- "IA Clarington"
  m[grep("industrielle alliance",m)] <- "IA Clarington"
  m[grep("industri",m)] <- "IA Clarington"
  m[grep("invesc",m)] <- "Invesco"
  m[grep("investco",m)] <- "Invesco"
  m[grep("investors",m)] <- "Investors Group"
  m[grep("ishares",m)] <- "IShares"
  m[grep("shares",m)] <- "IShares"
  m[grep("jov",m)] <- "Horizons ETFs"
  m[grep("kenzie",m)] <- "Mackenzie"
  m[grep("london life",m)] <- "London Life"
  m[grep("lysand",m)] <- "Lysander Funds"
  m[grep("mack",m)] <- "Mackenzie"
  m[grep("^mac.*zie$",m)] <- "Mackenzie"
  m[grep("manu",m)] <- "Manulife"
  m[grep("^man.*life$",m)] <- "Manulife"
  m[grep("matrix",m)] <- "Matrix Asset Management"
  m[grep("macq",m)] <- "Macquarie"
  m[grep("mawer",m)] <- "Mawer"
  m[grep("merit",m)] <- "Meritas"
  m[grep("middlefield",m)] <- "Middlefield"
  m[grep("morning",m)] <- "Morningstar"
  m[grep("natix",m)] <- "Natixis GAM"
  m[grep("national",m)] <- "National Bank"
  m[grep("nbc",m)] <- "National Bank"
  m[grep("^nei",m)] <- "Northwest and Ethical"
  m[grep("nex",m)] <- "Natixis GAM"
  m[grep("northwest",m)] <- "Northwest and Ethical"
  m[grep("norre",m)] <- "Norrep Funds"
  m[grep("o'leary",m,fixed=TRUE)] <- "O'Leary Funds"
  m[grep("pfsl",m)] <- "PFSL Investments"
  m[grep("pimc",m)] <- "PIMCO"
  m[grep("^picton",m)] <- "Picton Mahoney Asset Management"
  m[grep("phillips",m,fixed=TRUE)] <- "Phillips Hager and North"
  m[grep("ph&n",m,fixed=TRUE)] <- "Phillips Hager and North"
  m[grep("ph & n",m,fixed=TRUE)] <- "Phillips Hager and North"
  m[grep("phn",m)] <- "Phillips Hager and North"
  m[grep("primerica",m)] <- "Primerica"
  m[grep("quadrus",m)] <- "Quadrus Investments"
  m[grep("queensbur",m)] <- "Queensbury Group"
  m[grep("^roi",m)] <- "ROI Capital"
  m[grep("rbc",m)] <- "RBC Mutual Funds"
  m[grep("reni",m)] <- "Renaissance"
  m[grep("renn",m)] <- "Renaissance"
  m[grep("russ",m)] <- "Russell Investments"
  m[grep("royal",m)] <- "RBC Mutual Funds"
  m[grep("scotia",m)] <- "Scotia Mutual Funds"
  m[grep("^sei",m)] <- "SEI"
  m[grep("sance",m)] <- "Renaissance"
  m[grep("sentry",m)] <- "Sentry Investments"
  m[grep("senty",m)] <- "Sentry Investments"
  m[grep("standar",m)] <- "Standard Life"
  m[grep("stone",m)] <- "Stone & Co Funds"
  m[grep("sprott",m)] <- "Sprott Asset Management"
  m[grep("sun",m)] <- "Sun Life"
  m[grep("ssq",m)] <- "SSQ"
  m[grep("^td",m)] <- "TD Mutual Funds"
  m[grep("t.d.",m,fixed=TRUE)] <- "TD Mutual Funds"
  m[grep("td$",m)] <- "TD Mutual Funds"
  m[grep("temple",m)] <- "Franklin Templeton"
  m[grep("tempel",m)] <- "Franklin Templeton"
  m[grep("templ",m)] <- "Franklin Templeton"
  m[grep("transamerica",m)] <- "Transamerica"
  m[grep("trima",m)] <- "Invesco"
  m[grep("trim",m)] <- "Invesco"
  
  m[grep("vanguard",m)] <- "Vanguard ETFs"
  m[grep("value",m)] <- "Value Partner Investments"
  m[grep("vpi",m)] <- "Value Partner Investments"
  m[grep("rena",m)] <- "Renaissance"
  m[grep("^wis",m)] <- "Wisdom Tree"
  m[grep("world financial",m)] <- "World Financial Group Insurance Agency of Canada"
  
  o <- list()
  o$verbatims <- m
  
  # Unmatched
  unmatched <- which(!substr(m,1,1) %in% LETTERS[1:26])
  
  o$unmatched <- m[unmatched]
  o$verbatims[unmatched] <- "Other"
  return(o)
}
credoinc/credoc documentation built on May 23, 2019, 8:39 a.m.