##' @description Prepare data for neural network model
##'
##' @import dplyr
##' @import tidyr
##'
##' @return a dataframe with the relavent columns
##'
##' @export
prepare_data = function(x, y) {
## Remove variables we don't want
x = x %>% select(-X, -For.Year)
## Coerce to correct classes
x$Ticker.Symbol = as.character(x$Ticker.Symbol)
y$symbol = as.character(y$symbol)
## Already have "%Y-%m-%d" in fundamentals data
y$date = as.Date(as.character(y$date), format = "%m/%d/%y")
## Convert date to date class
x$Period.Ending = as.Date(as.character(x$Period.Ending), format = "%Y-%m-%d")
## Remove information for anything after 2015
x = x %>% filter(Period.Ending <= "2015-12-31" & Period.Ending >= "2012-01-01")
## Only work with companies with price data that spans end of 2012 to end of 2016
## Get list of tickers that we want
companies = unique(y$symbol)
companies_keep = character()
lower_dates = as.Date(c("2013-12-31", "2013-12-30", "2013-12-29"), format = "%Y-%m-%d")
upper_dates = as.Date(c("2016-12-31", "2016-12-30", "2016-12-29"), format = "%Y-%m-%d")
## Loop through all companies
for(i in 1:length(companies)) {
this_company = filter(y, symbol == companies[i])
this_company_dates = this_company$date
## If we have the lower and upper range of our
if(sum(lower_dates %in% this_company_dates) > 0 && sum(upper_dates %in% this_company_dates) > 0) {
companies_keep = c(companies_keep, companies[i])
}
}
## Subset for the companies we want
x = x %>% filter(Ticker.Symbol %in% companies_keep)
## Filter out all companies that do not contain fundamentals data
## for the date of "2015-12-31"
new_companies_keep = character()
possible_companies = unique(x$Ticker.Symbol)
for(i in 1:length(possible_companies)) {
this_company_new = filter(x, Ticker.Symbol == possible_companies[i]) %>%
select(Period.Ending)
## Check if we have date
if(as.Date("2015-12-31") %in% this_company_new$Period.Ending) {
new_companies_keep = c(new_companies_keep, possible_companies[i])
}
}
## Subset for the companies we want
x = x %>% filter(Ticker.Symbol %in% new_companies_keep)
## Deal with missing values
## Set to average of dataset
columns = c("Cash.Ratio",
"Current.Ratio",
"Quick.Ratio",
"Earnings.Per.Share",
"Estimated.Shares.Outstanding")
for(i in 1:length(columns)) {
x[[columns[i]]][is.na(x[[columns[i]]])] = mean(x[[columns[i]]], na.rm = TRUE)
}
## We should also normalize all fundamentals variables
## Get column names of numeric variables
numeric_names = character()
for(i in 1:(length(colnames(x)))) {
if(is.numeric(x[, i])) {
numeric_names = c(numeric_names, colnames(x[i]))
}
}
## Scale the numeric columns
x <- x %>% mutate_each_(funs(scale(.) %>% as.vector), vars=numeric_names)
return(list("x" = x, "y" = y))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.