server <- function(input, output, session) {
############################################################# Stocks
# load stock dataset
stockdata_DE <- reactive({
validate(need(correct_path() == T, "Please choose the correct path"))
load_all_stocks_DE()
})
stockdata_US <- reactive({
validate(need(correct_path() == T, "Please choose the correct path"))
load_all_stocks_US()
})
# output$stock_choice <- renderUI({
# validate(need(correct_path() == T, "Please choose the correct path"))
#
#
# input <- selectizeInput("Stock","Choose Companies:",
# c(COMPONENTS_DE()[["Company.Name"]],"DAX" = "GDAXI",
# COMPONENTS_US()[["Company.Name"]],"DJI" = "DJI"),
# selected = "Bayer ",multiple = TRUE)
#
#
#
# })
#
#
#
# # reset button for stock selection
# observeEvent(input$reset,{
# updateSelectizeInput(session,"Stock",selected = "")
# })
# # plot of the stocks
# output$plot_DE <- renderPlot({
# req(input$Stock)
# validate(need(correct_path() == T, "Please choose the correct path"))
#
# if (input$country_stocks == "Germany"){
# plotdata <- filter(stockdata_DE(),
# .data$name %in% (c(COMPONENTS_DE()[["Symbol"]],"GDAXI")[c(COMPONENTS_DE()[["Company.Name"]],"GDAXI") %in% .env$input$Stock]) &
# .data$Dates >= .env$input$dates[1] & .data$Dates <= .env$input$dates[2])
# } else {
# plotdata <- filter(stockdata_US(),
# .data$name %in% (c(COMPONENTS_US()[["Symbol"]], "DJI")[c(COMPONENTS_US()[["Company.Name"]], "DJI") %in% .env$input$Stock]) &
# .data$Dates >= .env$input$dates[1] & .data$Dates <= .env$input$dates[2])
# }
#
# if (!is.null(ranges$x)) {
# ranges$x <- as.Date(ranges$x, origin = "1970-01-01")
# }
# ggplot(plotdata,aes_string("Dates",input$stock_outcome,color = "name"))+
# geom_line()+
# theme_classic()+
# coord_cartesian(xlim = ranges$x, ylim = ranges$y, expand = FALSE)
# })
# # hover info box
# output$hover_info_DE <- renderUI({
# req(input$hovering)
# create_hover_info_DE(input$plot_hover_DE,stockdata_DE())
# })
# # zoom functionality
# ranges <- reactiveValues(x = NULL, y = NULL)
# observeEvent(input$plot1_dblclick, {
# brush <- input$plot1_brush
# if (!is.null(brush)) {
# ranges$x <- c(brush$xmin, brush$xmax)
# ranges$y <- c(brush$ymin, brush$ymax)
#
# } else {
# ranges$x <- NULL
# ranges$y <- NULL
# }
# })
#####################################################################
##################################################################### Corona
# corona_data <- reactive({
# validate(need(correct_path() == T, "Please choose the correct path"))
#
# CORONA(input$CoronaCountry,input$dates_corona[1],input$dates_corona[2])
# })
#
# output$corona_plot <- renderPlot({
# if (!is.null(ranges2$x)) {
# ranges2$x <- as.Date(ranges2$x, origin = "1970-01-01")
# }
#
# ggplot(corona_data(), aes_string("date",input$corona_measurement,color = "location"))+
# geom_line() +
# theme_classic() +
# coord_cartesian(xlim = ranges2$x, ylim = ranges2$y, expand = FALSE)
# })
#
# # hover info box
# output$hover_info_corona <- renderUI({
# req(input$hovering_corona)
# create_hover_info_corona(input$plot_hover_corona, corona_data(),input$corona_measurement)
# })
#
# # zoom functionality
# ranges2 <- reactiveValues(x = NULL, y = NULL)
# observeEvent(input$plot_corona_dblclick, {
# brush <- input$plot_corona_brush
# if (!is.null(brush)) {
# ranges2$x <- c(brush$xmin, brush$xmax)
# ranges2$y <- c(brush$ymin, brush$ymax)
#
# } else {
# ranges2$x <- NULL
# ranges2$y <- NULL
# }
# })
####################################################################### Instruction texts ##################################################
###############################################granger instructions
###### In the following, the instruction text for the models are prepared:
observeEvent(input$instructions_granger,{
guide_granger$init()$start()
})
guide_granger <- cicerone::Cicerone$
new()$
step(
el = "first_variable_granger",
title = "Choose the first variable",
description = "Here, you can select the first variable for the Granger causality analysis. First, choose a country for the company/index. Secondly select the desired
company/index. Lastly pick a variable."
)$
step(
"second_variable_granger",
"Choose the first variable",
"Here, you can select the second variable. If you decide to use Sentiment, click on the switch and then specify the type of sentiment using the 'Filter sentiment
input' tab at the top. Alternatively, a variable from the first dropdown or second dropdown can be selected."
)$
step(
"date_variable_granger",
"Choose timeseries",
"Here, you can choose which period to focus on. The analysis is based on the selected range of Dates. Choose at least a period of 30 days!"
)$
step(
"direction_variable_granger",
"Direction",
"Here, you can choose the direction of the granger causality. If this box is checked, the analysis verifies whether the second variable granger causes the first."
)
############################################################# reg instructions
observeEvent(input$instructions_regression,{
guide_regression$init()$start()
})
guide_regression <- cicerone::Cicerone$
new()$
step(
el = "first_variable_regression",
title = "Choose the first variable",
description = "Here, you can select the dependent variable for the regression analysis.First, choose a country for the company/index. Secondly select the desired
company/index. Lastly pick a variable."
)$
step(
"control_variable_regression",
"Choose the control variable(s)",
"Here, you can select control variables. If you decide to use Sentiment, click on the switch and then specify the type of sentiment using the 'Filter sentiment
input' tab at the top. Alternatively, a variable from the first dropdown or second dropdown can be selected."
)$
step(
"date_variable_regression",
"Choose timeseries",
"Here, you can choose which period to focus on. The analysis is based on the selected range of Dates. Choose at least a period of 30 days!"
)
################################################################## var
observeEvent(input$instructions_var,{
guide_var$init()$start()
})
guide_var <- cicerone::Cicerone$
new()$
step(
el = "first_variable_var",
title = "Choose the first variable",
description = "Here, you can select the dependent variable for the VAR. First, choose a country for the company/index. Secondly select the desired
company/index. Lastly pick a variable."
)$
step(
"control_variable_var",
"Choose the control variable(s)",
"Here, you can select control variables. If you decide to use Sentiment, click on the switch and then specify the type of sentiment using the 'Filter sentiment
input' tab at the top. Alternatively, a variable from the first dropdown or second dropdown can be selected. Caution! If no control variable is selected,
an AR model is built using only the dependent variable!"
)$
step(
"date_variable_var",
"Choose timeseries",
"Here, you can choose which period to focus on. The analysis is based on the selected range of Dates. Choose at least a period of 30 days!"
)$
step(
"forecast_var",
"Choose how many days to forecast",
"Here, a number of days to forcast into the future can be selected. On the 'validity' tab, this is the period on which to calculate performance.
On the 'actual forecast' tab, these are the future dates for the forecast."
)
#############################################################################################################################################
#############################################################################################################################################
#############################################################################################################################################
##################################################################################### Granger Model
##Check that first input cannot equal the second input(in case of DAX and DJI)
output$gra_con_check <- renderText({
##### date input
test <- input$Controls_GRANGER
if(("DAX" %in% test)==TRUE) {
if (input$Stock_Granger == "GDAXI"){
##### formulate a validation statement
validate("Index control not feasible if Index chosen as dependent variable")
}
}else if(("DJI" %in% test)==TRUE) {
if (input$Stock_Granger == "DJI"){
##### formulate a validation statement
validate("Index control not feasible if Index chosen as dependent variable")
}
}
})
##### check if the date has at leas 30 days as input
output$gra_date_check <- renderText({
##### date input
if(length(input$date_granger) > 1){
##### calculate the difference of the dates
days_inrange <- difftime(as.Date(input$date_granger[2]) ,as.Date(input$date_granger[1]) , units = c("days"))
if (days_inrange < 30){
##### formulate a validation statement
validate("Less than 30 days selected. Please choose more days.")
}
#### also check if no date is selected
} else if (is.null(input$date_granger)){
##### formulate a validation statement
validate("Need to select at least one day.")
}
})
### select first granger variable
output$Stock_Granger <- renderUI({
validate(need(correct_path() == T, "Please choose the correct path"))
if (input$country_granger == "Germany"){
input <- selectizeInput("Stock_Granger","Choose company or Index:",
#c(COMPONENTS_DE()[["Company.Name"]],"GDAXI"),
company_terms_stock_ger,
selected = "DAX",multiple = FALSE)
} else {
input <- selectizeInput("Stock_Granger","Choose company or Index:",
#c(COMPONENTS_US()[["Company.Name"]],"DJI"),
company_terms_stock_us,
selected = "Dow Jones Industrial",multiple = FALSE)
}
})
#### select second variable
output$ControlsGranger <- renderUI({
if (input$country_granger == "Germany"){
input <- selectizeInput("Controls_GRANGER","Choose control variables:",
c("Google-Trends Coronavirus"="trend_corona",
"VIX"="VIX",
"Financial Distress Index"="fin_distress",
"Economic Uncertainty Index"="econ_uncertain",
"DAX"="DAX"),selected = "VIX",multiple = FALSE)
}else{
input <- selectizeInput("Controls_GRANGER","Choose control variables:",
c("Google-Trends Coronavirus"="trend_corona",
"VIX"="VIX",
"Financial Distress Index"="fin_distress",
"Economic Uncertainty Index"="econ_uncertain",
"DJI"="DJI"),selected = "VIX",multiple = FALSE)
}
})
##### only one control can be chosen, the remaining two are kicked out
observeEvent(req(input$corona_measurement_granger != ""), { #Observe event from input (model choices)
updateSelectizeInput(session, "Controls_GRANGER", selected = "")
updateSwitchInput(session, "senti_yesno_gra", value = FALSE)
})
observeEvent(req(input$Controls_GRANGER!=""), { #Observe event from input (model choices)
updateSelectizeInput(session, "corona_measurement_granger", selected = "")
updateSwitchInput(session, "senti_yesno_gra", value = FALSE)
})
observeEvent(req(input$senti_yesno_gra==TRUE), { #Observe event from input (model choices)
updateSelectizeInput(session, "corona_measurement_granger", selected = "")
updateSelectizeInput(session, "Controls_GRANGER", selected = "")
})
### put together the dataset:
granger_data <- reactive({
validate(need(correct_path() == T, "Please choose the correct path"))
validate(need(input$senti_yesno_gra==TRUE | input$corona_measurement_granger != ""|input$Controls_GRANGER!="", "Choose the second variable!"))
req(input$Stock_Granger)
##stock data:
if (input$country_granger == "Germany"){
granger1 <- dplyr::filter(stockdata_DE(),
.data$name %in% .env$input$Stock_Granger &
.data$Dates >= .env$input$date_granger[1] & .data$Dates <= .env$input$date_granger[2])[c("Dates", input$Granger_outcome)]
} else {
granger1 <-dplyr:: filter(stockdata_US(),
.data$name %in% .env$input$Stock_Granger &
.data$Dates >= .env$input$date_granger[1] & .data$Dates <= .env$input$date_granger[2])[c("Dates", input$Granger_outcome)]
}
## control data:
if (input$country_granger == "Germany"){
if(input$Controls_GRANGER!=""){
global_controls <- global_controls_test_DE() #load controls
global_controls$Date <- as.Date(global_controls$Date) #transform date
colnames(global_controls) <- c("Date","VIX","trend_corona","fin_distress","Credit","Volatility","Safe.assets","Equity.valuation","econ_uncertain")
dax <- dplyr::filter(stockdata_DE(),.data$name %in% c("GDAXI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dax)[1]<-"Date"
colnames(dax)[2] <- "DAX" #rename -> !! is not renamed in final dataset !! -> dont know why
global_controls <- dplyr::left_join(dax,global_controls,by = c("Date")) #join final
}else if (input$corona_measurement_granger!=""){
global_controls <- CORONA_neu("Germany")[c("date",input$corona_measurement_granger)]
colnames(global_controls)[1]<-"Dates"
}
}else {
if(input$Controls_GRANGER!=""){
global_controls <- global_controls_test_US() #same procedure as above
global_controls$Date <- as.Date(global_controls$Date)
colnames(global_controls) <- c("Date","VIX","trend_corona","fin_distress","Credit","Volatility","Safe.assets","Equity.valuation","econ_uncertain")
dji <- dplyr::filter(stockdata_US(),.data$name %in% c("DJI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dji)[1]<-"Date"
colnames(dji)[2] <- "DJI"
global_controls <- dplyr::left_join(dji,global_controls,by = c("Date"))
}else if (input$corona_measurement_granger!=""){
global_controls <- CORONA_neu("United States")[c("date",input$corona_measurement_granger)]
colnames(global_controls)[1]<-"Dates"
}
}
if (input$Controls_GRANGER!=""|input$corona_measurement_granger!=""){
names(global_controls)[1] <- "Dates"
granger <- left_join(granger1,global_controls,by = c("Dates"))
}else{
granger<- granger1
}
## sentiment data
if (input$senti_yesno_gra == TRUE){
#res <- aggri_select()
res <- get_sentiment_granger()
} else {
#res <- aggri_select()[1]
res <- get_sentiment_granger()[1]
}
res$created_at <- as.Date(res$created_at)
granger <- dplyr::left_join(granger,res,by=c("Dates" = "created_at"))
## select chosen variables from dataframe
if (input$senti_yesno_gra == TRUE){
granger <- granger[c("Dates",input$Granger_outcome,colnames(get_sentiment_granger())[2])]
}else if (input$Controls_GRANGER!=""){
granger <- granger[c("Dates",input$Granger_outcome,input$Controls_GRANGER)]
}else{
granger <- granger[c("Dates",input$Granger_outcome,input$corona_measurement_granger)]
}
granger[is.na(granger)]<-0
granger
})
observeEvent(input$Sentiment_type_gra, { #Observe event from input (model choices)
req(input$Sentiment_type_gra)
updateTabsetPanel(session, "params_gra", selected = input$Sentiment_type_gra)
})
observeEvent(input$industry_sentiment_gra, { #Observe event from input (model choices)
req(input$industry_sentiment_gra)
updateTabsetPanel(session, "industry_tab_gra", selected = input$industry_sentiment_gra)
})
################################################################# sql data granger
#make a call to our SQL database to filter for the selected sentiment filter
dates_gra <- reactive({
if (length(input$date_granger) > 1){
input$date_granger
} else {
c(input$date_granger, input$date_granger)
}
})
querry_sentiment_model_gra <- reactive({
#### check which tweet length
if (input$tweet_length_gra == T){
tweetLength <- 81
} else {
tweetLength <- 0
}
dates <- dates_gra()
###### table name
### get language
if (input$sentiment_company_granger == "NoFilter"){
test <- glue('select created_at, {input$aggregation_gra} from sum_stats_{tolower(input$language_gra)} where
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets_gra} and likes_count = {input$minLikes_gra} and
tweet_length = {tweetLength}')
} else {
comp <- gsub("ö","ö", input$sentiment_company_granger)
comp <- gsub("ü", "ü", comp)
test<-glue('SELECT created_at, {input$aggregation_gra} FROM sum_stats_companies WHERE
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets_gra} and likes_count = {input$minLikes_gra} and
tweet_length = {tweetLength} and company = "{comp}" and
language = "{tolower(input$language_gra)}"' )
}
test
})
get_sentiment_granger <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need database connection
validate(need(database_connector(), "Could not connect to database"))
###### need at least one date selected
validate(need(!is.null(input$date_granger), "Please select a date."))
####### store database connection
con <- database_connector()
###### querry data from sql
df_need <- DBI::dbGetQuery(con, querry_sentiment_model_gra())
#### for companies replace umlaute
if ("company" %in% names(df_need)){
df_need$company <- gsub("ö", "ö", df_need$company)
df_need$company <- gsub("ü", "ü", df_need$company)
}
#### return df
df_need
})
#################################### Granger analyis:
#calculate optimal lags
optlags <- reactive({
#library(vars)
req(is.null(granger_data())==FALSE)
vars::VARselect(granger_data()[-1],lag.max = 7, type = "const")$selection[["AIC(n)"]]
})
#test for stationarity and difference if necessary
dickey_fuller <- reactive({
data <- granger_data()
while (tseries::adf.test(data[[2]],k=optlags())$p.value > 0.1 | tseries::adf.test(data[[3]],k=optlags())$p.value > 0.1){
data[2] <- c(diff(data[[2]],1),NA)
data[3] <- c(diff(data[[3]],1),NA)
data <- tidyr::drop_na(data)
}
data
})
#perform granger test
granger_result <- reactive({
varobject <- vars::VAR(dickey_fuller()[-1], p = optlags(), type = "const")
cause <- NULL
if(input$Controls_GRANGER!=""){
ifelse(input$direction_granger == TRUE,cause <- colnames(granger_data())[3],cause <- input$Granger_outcome)
}else{
ifelse(input$direction_granger == TRUE,cause <- colnames(granger_data())[3],cause <- input$Granger_outcome)
}
granger <- vars::causality(varobject, cause = cause)
granger$Granger
})
#print granger output
output$granger_result <- renderPrint({
granger_result()})
#########visualize tab:
# plot of the first variable
output$stocks_granger <- dygraphs::renderDygraph({
plotdata <- xts::xts(granger_data()[input$Granger_outcome],order.by=granger_data()[["Dates"]])
dygraphs::dygraph(plotdata,
ylab = colnames(granger_data()[2]),
main = glue::glue("Plot of the first variable"))%>%
dygraphs::dyShading(from = min(granger_data()[["Dates"]]), to = max(granger_data()[["Dates"]]), color = "white")
})
# plot of the second variable
output$second_granger <- dygraphs::renderDygraph({
plotdata <- xts::xts(granger_data()[colnames(granger_data())[3]],order.by=granger_data()[["Dates"]])
dygraphs::dygraph(plotdata,
ylab = colnames(granger_data()[3]),
main = glue::glue("Plot of the second variable"))%>%
dygraphs::dyShading(from = min(granger_data()[["Dates"]]), to = max(granger_data()[["Dates"]]), color = "white")
})
#########background steps tab
#text for the lags
output$grangertext1 <- renderUI({
str1 <- paste("The optimal lag order for the VAR model using the Akaike information criterium (AIC) is ",optlags()," lags.")
htmltools::HTML(paste(str1))
})
#output of optimal lags
output$optimallags <- renderPrint({
vars::VARselect(granger_data()[-1],lag.max = 7, type = "const")
})
#sentence for stationarity
output$grangertext2 <- renderUI({
if (nrow(dickey_fuller()) != nrow(granger_data())){
str2 <- paste("The Dickey Fuller test found one of the timeseries to be non-stationary:")
}else{
str2 <-paste("The Dickey Fuller test found both timeseries to be stationary.
Hence, the granger causality analysis can be performed without tranformations:")
}
})
#dickey fuller test for first variable
output$dickey_fuller <- renderPrint({
tseries::adf.test(granger_data()[[2]],k=optlags())
})
#dickey fuller test for second variable
output$dickey_fuller_second <- renderPrint({
tseries::adf.test(granger_data()[[3]],k=optlags())
})
# text for differencing
output$grangertext3 <- renderUI({
req(nrow(dickey_fuller()) != nrow(granger_data()))
str3 <- paste("Differencing the series ",nrow(granger_data()) - nrow(dickey_fuller()),"times achieved stationarity:")
})
#first variable after differencing
output$dickey_fuller_diff <- renderPrint({
req(nrow(dickey_fuller()) != nrow(granger_data()))
tseries::adf.test(dickey_fuller()[[2]],k=optlags())
})
#second variable after differencing
output$dickey_fuller_second_diff <- renderPrint({
req(nrow(dickey_fuller()) != nrow(granger_data()))
tseries::adf.test(dickey_fuller()[[3]],k=optlags())
})
#########sentence for result tab
output$granger_satz <- renderUI({
if(input$direction_granger == TRUE){
if (granger_result()["p.value"] < 0.1){
str1 <- paste(htmltools::em(colnames(granger_data())[3]), " granger causes ",htmltools::em(input$Granger_outcome))
} else {
str1 <- paste(htmltools::em(colnames(granger_data())[3]), " does not granger cause ",htmltools::em(input$Granger_outcome))
}
} else {
if (granger_result()["p.value"] < 0.1){
str1 <- paste(htmltools::em(input$Granger_outcome),"of",input$Stock_Granger, "granger causes ",htmltools::em(colnames(granger_data())[3]))
} else {
str1 <- paste(htmltools::em(input$Granger_outcome),"of",input$Stock_Granger, "does not granger cause ",htmltools::em(colnames(granger_data())[3]))
}
}
htmltools::HTML(paste(str1))
})
### info text for information tab
output$info_granger <- renderUI({
htmltools::HTML(paste(htmltools::h1(htmltools::strong("Granger Causality Analysis"), align="center", style = "font-family: 'Times', serif;
font-weight: 30px; font-size: 30px; line-height: 1;"),
htmltools::h2(htmltools::strong("Visualize") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("On this tab, both variables are plotted for comparison.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Background-steps") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("Here, all relevant steps required in order to achieve the results are displayed. This includes, the number of optimal lags,
repeatedly testing for stationarity and differencing. Note that the content of this tab is basically extra information and can be skipped if one is only interested in the results.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Results") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("This tab includes the result of the granger causality test, providing the R-output of the test and the result in form opf a sentence.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::tags$hr(),
htmltools::h2(htmltools::strong("Important information") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("In this section, the user is able to perform a Granger causality test, which is a statistical hypothesis test for determining whether one time series is useful in forecasting another.
The term ", htmltools::em("causality"), " in this context means nothing more than predictive causality and should not be mistaken for ",
htmltools::em("true causality"),". It rather measures the ability of past values of one time series to predict future values of another time series.",htmltools::tags$br(),
"To test the null hypothesis that time series ", htmltools::em("x")," does not Granger cause", htmltools::em("y"), ", one first finds the optimal lagged values of ", htmltools::em("y")," to include in a autoregression of ", htmltools::em("y:")
,style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
withMathJax("$$y_t = \\alpha_0 + \\alpha_1y_{t-1} + \\alpha_2y_{t-1} + ... + \\alpha_my_{t-m} + error_t$$"),
htmltools::p("In the next step, lagged values of ", htmltools::em("x"),"are added to the regression: ",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
withMathJax("$$y_t = \\alpha_0 + \\alpha_1y_{t-1} + \\alpha_2y_{t-1} + ... + \\alpha_my_{t-m} + \\beta_1x_{t-1} + \\beta_qx_{t-q} + error_t$$"),
htmltools::p("The lagged values of ", htmltools::em("x")," are kept as long as they add explanatory power to the regression according to an F-test.
The null hypothesis that ", htmltools::em("x")," does not Granger cause", htmltools::em("y"), "is accepted if and only if no lagged values of ", htmltools::em("x")," are included.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Instructions:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("In order to perform the Granger causality Analysis, built the model using the panel on the left: ",htmltools::tags$br(),
htmltools::div("- select the first variable",htmltools::tags$br(),
"- select the second variable",htmltools::tags$br(),
"- choose the direction of the causality test using the checkbox",htmltools::tags$br()
# "- the tab ",htmltools::strong(em("Visualize")),"contains plots of both series for comparison",htmltools::tags$br(),
# "- the tab ",htmltools::strong(em("Background-steps"))," contains all important steps required in the analysis",htmltools::tags$br(),
# "- the results can be accessed on the tab ",htmltools::strong(em("Results"))
, style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Analysis steps:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("The following steps are automatically performed after the user selects two time series: ",htmltools::tags$br(),
htmltools::div("1. The optimal number of lags is calculated",htmltools::tags$br(),
"2. Stationarity is repeatedly tested and the series are differenced until sationarity is achieved",htmltools::tags$br(),
"3. A VAR model is estimated with the optimal number of lags and the (if necessary) transformed series",htmltools::tags$br(),
"4. A granger causality test is performed.",
style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),
style = "font-weight: 18px; font-size: 18px; line-height: 1;")
))
})
################################################################################################### Regression
##### infotext for regression
output$info_regression <- renderUI({
HTML(paste(htmltools::h1(htmltools::strong("Regression Analysis"), align="center", style = "font-family: 'Times', serif;
font-weight: 30px; font-size: 30px; line-height: 1;"),
htmltools::h2(htmltools::strong("Summary Statistics") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("On this tab, you can find summary statistics for the selected variables, including a correlation plot.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Linear Regression") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("Here, the results of a linear regression using the selected variables is displayed. Significance is indicated at the 0.01, 0.05 and 0.1
level with ***, **, * respectively.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Quantile Regression") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("This tab includes the result of a quantile regression for the chosen quantile, using the selected variables. Significance is indicated at the 0.01, 0.05 and 0.1
level with ***, **, * respectively.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::tags$hr(),
htmltools::h2(htmltools::strong("Important information") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("In this section, the user is able to perform a simple linear regression and a quantile regression. Here, one can test which variables
help to explain the stock prices of a specific company. By adding and dropping the variables, one can observe their potential of adding explanatory
power to the regression.
The linear regression estimates the conditional mean of the dependent variable and is of the form:",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
withMathJax("$$y_i = \\beta_0 + \\beta_1x_{i1} + ... + \\beta_px_{ip} + \\epsilon_i$$"),
htmltools::p("Quantile regressions estimate the conditional median (or quantile) of the dependet variable. They allow to quantify the effect
of the independent variables at specified parts of the distribution. For example, in this application one can verify if companies
with lower (or higher) stock returns are significantly more (or less) affected by the explanatory variables. The regression is fitted, by minimizing the median
absolute deviation of the following equation: ",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
withMathJax("$$Q_{\\tau}(y_i) = \\beta_0(\\tau) + \\beta_1(\\tau)x_{i1} + ... + \\beta_p(\\tau)x_{ip} + \\epsilon_i$$"),
#Blablablabalabalabalaballbabalaaballabaal Motivation, intention, warum regression? dependent variable nur stocks möglich?
#möglichkeit sentiment rein und rauszunemehen"
#,style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Instructions:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("In order to perform the regression analysis, built the model using the panel on the left: ",htmltools::tags$br(),
htmltools::div("- select the dependent variable",htmltools::tags$br(),
"- select the control variable(s)",htmltools::tags$br(),
"- choose whether sentiment variable should be included",htmltools::tags$br(),
"- if sentiment is added, switch to the tab ",htmltools::em("Filter sentiment input")," on top of the sidebar and specify the sentiment",htmltools::tags$br(),
"- the tab ",htmltools::em("Summary Statistics")," contains information on the selected variables",htmltools::tags$br(),
"- the results can be accessed on the tab ",htmltools::em("Linear Regression")," and ",htmltools::em("Quantile Regression")," respectively.",htmltools::tags$br(),
"- on the tab ",htmltools::em("Quantile Regression")," specify the desired quantile for which to compute the regression", style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),
style = "font-weight: 18px; font-size: 18px; line-height: 1;")))
})
###### check for input in first and second variable( DAX and DJI)
output$reg_con_check <- renderText({
##### date input
test <- input$Controls
if(("DAX" %in% test)==TRUE) {
if (input$Stock_Regression == "GDAXI"){
##### formulate a validation statement
validate("Index control not feasible if Index chosen as dependent variable")
}
}else if(("DJI" %in% test)==TRUE) {
if (input$Stock_Regression == "DJI"){
##### formulate a validation statement
validate("Index control not feasible if Index chosen as dependent variable")
}
}
})
##### check if the date has at leas 30 days as input
output$reg_date_check <- renderText({
##### date input
if(length(input$date_regression) > 1){
##### calculate the difference of the dates
days_inrange <- difftime(as.Date(input$date_regression[2]) ,as.Date(input$date_regression[1]) , units = c("days"))
if (days_inrange < 30){
##### formulate a validation statement
validate("Less than 30 days selected. Please choose more days.")
}
#### also check if no date is selected
} else if (is.null(input$date_regression)){
##### formulate a validation statement
validate("Need to select at least one day.")
}
})
###flexible input for stocks: show either german or us companies
output$stock_regression <- renderUI({
validate(need(correct_path() == T, "Please choose the correct path"))
if (input$country_regression == "Germany"){
input <- selectizeInput("Stock_Regression","Choose company or Index:",
#c(COMPONENTS_DE()[["Company.Name"]],"GDAXI"),
company_terms_stock_ger,
selected = "DAX",multiple = FALSE)
} else {
input <- selectizeInput("Stock_Regression","Choose company or Index:",
#c(COMPONENTS_US()[["Company.Name"]],"DJI"),
company_terms_stock_us,
selected = "Dow Jones Industrial",multiple = FALSE)
}
})
### control variable selection
output$Controls <- renderUI({
#res <- dataset()
#res$name <- NULL
validate(need(correct_path() == T, "Please choose the correct path"))
if (input$country_regression == "Germany"){
input <- selectizeInput("Controls","Control variables:",
c("Google-Trends Coronavirus"="trend_corona",
"VIX"="VIX",
"Financial Distress Index"="fin_distress",
"Economic Uncertainty Index"="econ_uncertain",
"DAX"="DAX"),selected = "VIX",multiple = TRUE)
#c(colnames(res[3:length(res)])),multiple = TRUE
}else{
input <- selectizeInput("Controls","Control variables:",
c("Google-Trends Coronavirus"="trend_corona",
"VIX"="VIX",
"Financial Distress Index"="fin_distress",
"Economic Uncertainty Index"="econ_uncertain",
"DJI"="DJI"),selected = "VIX",multiple = TRUE)
}
})
####dataset for analysis
dataset <- reactive({
validate(need(correct_path() == T, "Please choose the correct path"))
req(input$country_regression)
if (input$country_regression == "Germany"){
data_reg <- dplyr::filter(stockdata_DE(), #nur hier nach datum filtern, rest wird draufgemerged
.data$name %in% .env$input$Stock_Regression &
.data$Dates >= .env$input$date_regression[1] & .data$Dates <= .env$input$date_regression[2])[c("Dates",input$regression_outcome,"name")] #hier später noch CLose flexibel machen
} else {
data_reg <- dplyr::filter(stockdata_US(), #nur hier nach datum filtern, rest wird draufgemerged
.data$name %in% .env$input$Stock_Regression &
.data$Dates >= .env$input$date_regression[1] & .data$Dates <= .env$input$date_regression[2])[c("Dates",input$regression_outcome,"name")] #hier später noch CLose flexibel machen
}
#controls
if (input$country_regression == "Germany"){
global_controls <- global_controls_test_DE() #load controls
global_controls$Date <- as.Date(global_controls$Date) #transform date
colnames(global_controls) <- c("Date","VIX","trend_corona","fin_distress","Credit","Volatility","Safe.assets","Equity.valuation","econ_uncertain")
dax <- dplyr::filter(stockdata_DE(),.data$name %in% c("GDAXI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dax)[1]<-"Date"
colnames(dax)[2] <- "DAX" #rename -> !! is not renamed in final dataset !! -> dont know why
global_controls <- dplyr::left_join(dax,global_controls,by = c("Date")) #join final
if(input$corona_measurement_regression!=""){
help <- CORONA_neu("Germany")[c("date",input$corona_measurement_regression)]
colnames(help)[1]<-"Date"
global_controls <- dplyr::left_join(global_controls,help,by=c("Date"))
} else {}
}else {
global_controls <- global_controls_test_US() #same procedure as above
global_controls$Date <- as.Date(global_controls$Date)
colnames(global_controls) <- c("Date","VIX","trend_corona","fin_distress","Credit","Volatility","Safe.assets","Equity.valuation","econ_uncertain")
dji <- dplyr::filter(stockdata_US(),.data$name %in% c("DJI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dji)[1]<-"Date"
colnames(dji)[2] <- "DJI"
global_controls <- dplyr::left_join(dji,global_controls,by = c("Date"))
if(input$corona_measurement_regression!=""){
help <- CORONA_neu("United States")[c("date",input$corona_measurement_regression)]
colnames(help)[1]<-"Date"
global_controls <- dplyr::left_join(global_controls,help,by=c("Date"))
} else {}
}
names(global_controls)[1] <- "Dates"
datareg2 <- dplyr::left_join(data_reg,global_controls,by = c("Dates"))
datareg2[is.na(datareg2)]<-0
datareg2
})
###select relevant variables
df_selected_controls <- reactive({
#req(input$Controls_var | input$corona_measurement_var)
res <- dataset()
if(is.null(input$Controls)==TRUE && input$corona_measurement_regression==""){
res <- res[c("Dates",input$regression_outcome)]
}else if (is.null(input$Controls)==FALSE && input$corona_measurement_regression!=""){
res <- res[c("Dates",input$regression_outcome,input$Controls,input$corona_measurement_regression)]
} else if (is.null(input$Controls)==FALSE && input$corona_measurement_regression==""){
res <- res[c("Dates",input$regression_outcome,input$Controls)]
} else if (is.null(input$Controls)==TRUE && input$corona_measurement_regression!=""){
res <- res[c("Dates",input$regression_outcome,input$corona_measurement_regression)]
}
res
})
observeEvent(input$Sentiment_type, { #Observe event from input (model choices)
req(input$Sentiment_type)
updateTabsetPanel(session, "params", selected = input$Sentiment_type)
})
observeEvent(input$industry_sentiment, { #Observe event from input (model choices)
req(input$industry_sentiment)
updateTabsetPanel(session, "industry_tab", selected = input$industry_sentiment)
})
# dataset_senti <- reactive({
# req(input$Sentiment_type)
# validate(need(correct_path() == T, "Please choose the correct path"))
# if(input$Sentiment_type == "NoFilter"){
#
# res <- En_NoFilter_0_0_yes() # still fix as it is not clear yet if sql or csv
# #res <- eval(parse(text = paste('En', '_NoFilter_',input$minRetweet,'_',
# # input$minminLikes,'_',input$tweet_length,'()', sep='')))
# #input$language
# }else{
# req(input$Stock_reg)
# ticker <- ticker_dict(input$Stock_reg) # dict for a few stock
# res <- eval(parse(text = paste(ticker,'()', sep=''))) # example: ADS.DE()
#
# }
#
#
# })
# # filter
# filtered_df <- reactive({
# validate(need(correct_path() == T, "Please choose the correct path"))
# req(input$Sentiment_type)
# req(input$minRetweet_stocks1)
# req(input$minRetweet_stocks2)
#
# if(input$Sentiment_type == "NoFilter"){
#
# res <- dataset_senti()
# }else{ # live filtering
# req(input$industry_sentiment)
# res <- dataset_senti()
# if(input$industry_sentiment == "no"){
# res <- dataset_senti()
# if(input$tweet_length_stock1 == "yes"){
#
# res <- res %>% filter((retweets_count > as.numeric(input$minRetweet_stocks1)) &
# (tweet_length > 81))}
# else{
# res <- res %>% filter((retweets_count > as.numeric(input$minRetweet_stocks1)))
# }
# }#else{
# #res <- dataset_senti()
# #if(input$tweet_length_stock2 == "yes"){
# # res <- res %>% filter((retweets_count > as.numeric(input$minRetweet_stocks2)) &
# # (tweet_length > 81))
# #}else{
# # res <- res %>% filter(retweets_count > as.numeric(input$minRetweet_stocks2))
# #}
# #}
# }
# })
#
# # aggregate dataset to get one sentiment per day
# aggri_select <- reactive({
#
# if(input$Sentiment_type == "NoFilter"){ # NoFilter files already aggregated
# res <- filtered_df()
# aggregation <- key(input$aggregation) # select aggregation type: Mean, mean weighted by,...
# res <- res %>% tidyr::gather("id", "aggregation", aggregation)
# res <- res[c("date","aggregation")]
# }else{
# if(input$industry_sentiment == "no"){
# res <- filtered_df()
# res <- aggregate_sentiment(res) # function to aggregate sentiment per day
# res <- res %>% filter(language == input$language1)
# aggregation <- key(input$aggregation1)
# res <- res %>% tidyr::gather("id", "aggregation", aggregation)
# res <- res[c("date","aggregation")]
# }else{
# res <- get_industry_sentiment(COMPONENTS_DE(),input$industry,input$minRetweet_stocks2,
# input$tweet_length_stock2) #function to gather all stock in certain industry
# aggregation <- key(input$aggregation2) #--> also calculates aggregation inside function
# res <- res %>% tidyr::gather("id", "aggregation", aggregation)
# res <- res[c("date","aggregation")]
# }
# }
#
# })
#reset button for variables
observeEvent(input$reset_regression,{
updateSelectizeInput(session,"Controls",selected = "")
updateSelectizeInput(session,"corona_measurement_regression",selected = "")
})
#merge sentiment with control+dep vars
final_regression_df <- reactive ({
if (input$senti_yesno_reg == TRUE){
#res <- aggri_select()
res <- get_sentiment_regression()
} else {
#res <- aggri_select()[1]
res <- get_sentiment_regression()[1]
}
res$created_at <- as.Date(res$created_at)
res_c <- df_selected_controls()
res <- dplyr::left_join(res_c,res, by=c("Dates" = "created_at"))
res <- res[-1]
res
})
########################### sql data
### call to our SQL database to load the sentiment for the desired filters
dates_reg <- reactive({
if (length(input$date_regression) > 1){
input$date_regression
} else {
c(input$date_regression, input$date_regression)
}
})
querry_sentiment_model_reg <- reactive({
#### check which tweet length
if (input$tweet_length == T){
tweetLength <- 81
} else {
tweetLength <- 0
}
dates <- dates_reg()
###### table name
### get language
if (input$sentiment_company_regression == "NoFilter"){
test <- glue('select created_at, {input$aggregation} from sum_stats_{tolower(input$language)} where
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets} and likes_count = {input$minLikes} and
tweet_length = {tweetLength}')
} else {
comp <- gsub("ö","ö", input$sentiment_company_regression)
comp <- gsub("ü", "ü", comp)
test <- glue('SELECT created_at, {input$aggregation} FROM sum_stats_companies WHERE
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets} and likes_count = {input$minLikes} and
tweet_length = {tweetLength} and company = "{comp}" and
language = "{tolower(input$language)}"' )
}
})
get_sentiment_regression <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need database connection
validate(need(database_connector(), "Could not connect to database"))
###### need at least one date selected
validate(need(!is.null(input$date_regression), "Please select a date."))
####### store database connection
con <- database_connector()
###### querry data from sql
df_need <- DBI::dbGetQuery(con, querry_sentiment_model_reg())
#### for companies replace umlaute
if ("company" %in% names(df_need)){
df_need$company <- gsub("ö", "ö", df_need$company)
df_need$company <- gsub("ü", "ü", df_need$company)
}
#### return df
df_need
})
####################################################Summary statistics Regression #####################################################
###table
df_need_reg <- reactive({
df_need <- round(psych::describe(final_regression_df())[c(3, 4, 5, 8, 9)], 2)
test <- nrow(df_need)
test2 <- nrow(df_need)==1
if (nrow(df_need == 1)) {
row.names(df_need)[1] <- input$regression_outcome
} else{
df_need <- df_need
}
names(df_need) <- names(df_need) %>% toupper()
df_need
})
output$reg_summary <- function(){
#colnames(df_need)<- "value"
#names(df_need) <- names(df_need) %>% toupper()
knitr::kable(df_need_reg(),colnames = NULL) %>%
column_spec(1:6, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
#### correlation plot
output$correlation_reg <- renderPlot({
res <- final_regression_df()
#help_df <- res
help_df <- res %>% select_if(~ !any(is.na(.)))
if(any(is.na(res))){
names_missing <- colnames(res)[ncol(res)]
# showNotification(glue("Removed {names_missing} for plot due to missing values"),
# type = "message")
}
GGally::ggpairs(help_df, upper = list(continuous = wrap(ggally_cor, size = 8)), lower = list(continuous = 'smooth'))
#ggpairs(final_regression_df_var()[-1])
})
###################################################################################
#regression
regression_result <- reactive({
req(ncol(final_regression_df())>=2)
model <- stats::lm(stats::reformulate(".",input$regression_outcome), data = final_regression_df())
#summary(model)
lmtest::coeftest(model, vcov = sandwich::vcovHC(model, "HC1"))
})
#Qregression
regression_result_Qreg <- reactive({
req(ncol(final_regression_df())>=2)
model <- quantreg::rq(stats::reformulate(".",input$regression_outcome),tau = input$Quantiles,data = final_regression_df())
#summary(model,se = "ker")
model
})
#### regression table
reg_table <- reactive({
test <- regression_result()
test <- broom::tidy(test)
colnames(test)<-c("variable","estimate","std.error","test-statistic","p-value")
test[5] <- round(test[5],4)
real<- test
test[5]<- "help"
for (i in 1:nrow(test)){
if (real[i,"p-value"]<0.01){
test[i,"p-value"]<- paste0(real[i,"p-value"],'***')
}else if (real[i,"p-value"]<0.05){
test[i,"p-value"]<- paste0(real[i,"p-value"],'**')
} else if (real[i,"p-value"]<0.05){
test[i,"p-value"]<- paste0(real[i,"p-value"],'*')
} else {
test[i,"p-value"]<- paste0(real[i,"p-value"])
}
}
test
})
output$regression_result <- function(){
knitr::kable(reg_table(),colnames = NULL) %>%
column_spec(1:5, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
### regression equation
output$regression_equation <- renderUI({
str1 <- paste("Linear regression: ",input$regression_outcome,"~",paste(input$Controls,collapse = " + "),"<br/>")
htmltools::HTML(paste(str1,sep = '<br/>'))
})
#### quantile regression table
qreg_table <- reactive({
test <- broom::tidy(regression_result_Qreg(),se.type="nid")
colnames(test)<-c("variable","estimate","std.error","test-statistic","p-value","quantile")
test[5] <- round(test[5],4)
real<- test
test[5]<- "help"
for (i in 1:nrow(test)){
if (real[i,"p-value"]<0.01){
test[i,"p-value"]<- paste0(real[i,"p-value"],'***')
}else if (real[i,"p-value"]<0.05){
test[i,"p-value"]<- paste0(real[i,"p-value"],'**')
} else if (real[i,"p-value"]<0.05){
test[i,"p-value"]<- paste0(real[i,"p-value"],'*')
} else {
test[i,"p-value"]<- paste0(real[i,"p-value"])
}
}
test
})
output$regression_result_Qreg <- function(){
knitr::kable(qreg_table(),colnames = NULL) %>%
column_spec(1:6, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
###############################################################################
######################## VAR #############################################
###############################################################################
#infotext information var
output$info_var <- renderUI({
htmltools::HTML(paste(htmltools::h1(htmltools::strong("VAR-Forecasting"), align="center", style = "font-family: 'Times', serif;
font-weight: 30px; font-size: 30px; line-height: 1;"),
htmltools::h2(htmltools::strong("Summary Statistics") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("On this tab, you can find summary statistics for the selected variables, including a correlation plot.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Validity") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("Here, you can verify the predictive performance of the model. Performance metrics are calculated by cutting
the timeseries by the selected number of days and using this period as a testing-sample. Next to the, metrics for the forecast,
the fitted values of the complete series are used to calculate the insample performance, in order to check for overfitting.
In addition, a Breusch-Godfrey LM test is provided to test for autocorrelation in the residuals. For unbiased results,
the residuals should be uncorrelated.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Actual Forecast") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("On this tab, you can observe the actual forecast for the selected number of future days. Again, the Breusch-Godfrey LM test is provided to test for autocorrelation in the residuals.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::tags$hr(),
htmltools::p("In this section, the user is able to calculate forecasts of the stock variable using Vector-Autoregressions (VAR).
VAR models are especially usefull for forecasting a collection of related variables where no explicit interpretation is required.
Similar to the concept of Granger causality, it can be observed whether a timeseries is useful in forecasting another.
In a VAR model each variable has an equation including its own lagged values and the lagged values of the other variables.
For example, a VAR model with 2 variables and 1 lag is of the following form:",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
withMathJax("$$y_{1,t} = \\alpha_{1} + \\beta_{11}y_{1,t-1} + \\beta_{12}y_{2,t-1}+ \\epsilon_{i,t}$$"),
withMathJax("$$y_{2,t} = \\alpha_{2} + \\beta_{21}y_{1,t-1} + \\beta_{22}y_{2,t-1}+ \\epsilon_{2,t}$$"),
htmltools::p("A VAR is able to understand and use the relationships of several variables, allowing better description of dynamic behavior
and better forecasting results. Here, different variable combinations can be assessed and used for forecasting.
If only one variable is chosen, a univariate autoregressive model (AR) is applied and the variable is explained by its own lags only.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Analysis steps:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("The analysis consists of the following steps, which are performed automatically: ",htmltools::tags$br(),
htmltools::div("1. The optimal number of lags is calculated",htmltools::tags$br(),
"2. Stationarity is repeatedly tested and the series are differenced until sationarity is achieved",htmltools::tags$br(),
"3. A VAR model is estimated with the optimal number of lags and the (if necessary) transformed series",htmltools::tags$br(),
"4. The residuals of the model are tested for serial correlation",htmltools::tags$br(),
"5. The series is forcasted n-steps ahead",style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::h2(htmltools::strong("Instructions:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
htmltools::p("In order to perform the regression analysis, built the model using the panel on the left: ",htmltools::tags$br(),
htmltools::div("- select the dependent variable",htmltools::tags$br(),
"- select the control variables (optional)",htmltools::tags$br(),
"- choose whether sentiment variable should be included",htmltools::tags$br(),
"- if sentiment is added, switch to the tab ",htmltools::em("Filter sentiment input")," on top of the sidebar and specify the sentiment",htmltools::tags$br(),
"- the tab ",htmltools::em("Summary Statistics")," contains information on the selected variables",htmltools::tags$br(),
"- the tab ",htmltools::em("Validity")," performs a robustness check, including performance measurements for the model",htmltools::tags$br(),
"- the tab ",htmltools::em("Actual Forecast"),"displays the results for future-forecasts", style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),
style = "font-weight: 18px; font-size: 18px; line-height: 1;")))
})
###################################################### dataset ###############################################################
## first and second variable cant be both DAX or DJI
output$var_con_check <- renderText({
##### date input
test <- input$Controls_var
if(("DAX" %in% test)==TRUE) {
if (input$Stock_Regression_var == "GDAXI"){
##### formulate a validation statement
validate("Index control not feasible if Index chosen as dependent variable")
}
}else if(("DJI" %in% test)==TRUE) {
if (input$Stock_Regression_var == "DJI"){
##### formulate a validation statement
validate("Index control not feasible if Index chosen as dependent variable")
}
}
})
##### check if the date has at leas 30 days as input
output$var_date_check <- renderText({
##### date input
if(length(input$date_regression_var) > 1){
##### calculate the difference of the dates
days_inrange <- difftime(as.Date(input$date_regression_var[2]) ,as.Date(input$date_regression_var[1]) , units = c("days"))
if (days_inrange < 30){
##### formulate a validation statement
validate("Less than 30 days selected. Please choose more days.")
}
#### also check if no date is selected
} else if (is.null(input$date_regression_var)){
##### formulate a validation statement
validate("Need to select at least one day.")
}
})
###flexible input for stocks: show either german or us companies
output$stock_regression_var <- renderUI({
validate(need(correct_path() == T, "Please choose the correct path"))
if (input$country_regression_var == "Germany"){
input <- selectizeInput("Stock_Regression_var","Choose company or Index:",
#c(COMPONENTS_DE()[["Company.Name"]],"GDAXI"),
company_terms_stock_ger,
selected = "DAX",multiple = FALSE)
} else {
input <- selectizeInput("Stock_Regression_var","Choose company or Index:",
#c(COMPONENTS_US()[["Company.Name"]],"DJI"),
company_terms_stock_us,
selected = "Dow Jones Industrial",multiple = FALSE)
}
})
###controls select
output$Controls_var <- renderUI({
validate(need(correct_path() == T, "Please choose the correct path"))
if (input$country_regression_var == "Germany"){
input <- selectizeInput("Controls_var","Control variables:",
c("Google-Trends Coronavirus"="trend_corona",
"VIX"="VIX",
"Financial Distress Index"="fin_distress",
"Economic Uncertainty Index"="econ_uncertain",
"DAX"="DAX"),selected = "VIX",multiple = TRUE)
#c(colnames(res[3:length(res)])),multiple = TRUE
}else{
input <- selectizeInput("Controls_var","Control variables:",
c("Google-Trends Coronavirus"="trend_corona",
"VIX"="VIX",
"Financial Distress Index"="fin_distress",
"Economic Uncertainty Index"="econ_uncertain",
"DJI"="DJI"),selected = "VIX",multiple = TRUE)
}
})
##dataframe
dataset_var <- reactive({
validate(need(correct_path() == T, "Please choose the correct path"))
if (input$country_regression_var == "Germany"){
data_reg <- dplyr::filter(stockdata_DE(), #nur hier nach datum filtern, rest wird draufgemerged
.data$name %in% .env$input$Stock_Regression_var &
.data$Dates >= .env$input$date_regression_var[1] & .data$Dates <= .env$input$date_regression_var[2])[c("Dates",input$regression_outcome_var,"name")] #hier später noch CLose flexibel machen
} else {
data_reg <- dplyr::filter(stockdata_US(), #nur hier nach datum filtern, rest wird draufgemerged
.data$name %in% .env$input$Stock_Regression_var &
.data$Dates >= .env$input$date_regression_var[1] & .data$Dates <= .env$input$date_regression_var[2])[c("Dates",input$regression_outcome_var,"name")] #hier später noch CLose flexibel machen
}
#controls
if (input$country_regression_var == "Germany"){
global_controls <- global_controls_test_DE() #load controls
global_controls$Date <- as.Date(global_controls$Date) #transform date
colnames(global_controls) <- c("Date","VIX","trend_corona","fin_distress","Credit","Volatility","Safe.assets","Equity.valuation","econ_uncertain")
dax <- dplyr::filter(stockdata_DE(),.data$name %in% c("GDAXI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dax)[1]<-"Date"
colnames(dax)[2] <- "DAX" #rename -> !! is not renamed in final dataset !! -> dont know why
global_controls <- dplyr::left_join(dax,global_controls,by = c("Date")) #join final
if(input$corona_measurement_var!=""){
help <- CORONA_neu("Germany")[c("date",input$corona_measurement_var)]
colnames(help)[1]<-"Date"
global_controls <- dplyr::left_join(global_controls,help,by=c("Date"))
} else {}
}else {
global_controls <- global_controls_test_US() #same procedure as above
global_controls$Date <- as.Date(global_controls$Date)
colnames(global_controls) <- c("Date","VIX","trend_corona","fin_distress","Credit","Volatility","Safe.assets","Equity.valuation","econ_uncertain")
dji <- dplyr::filter(stockdata_US(),.data$name %in% c("DJI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dji)[1]<-"Date"
colnames(dji)[2] <- "DJI"
global_controls <- dplyr::left_join(dji,global_controls,by = c("Date"))
if(input$corona_measurement_var!=""){
help <- CORONA_neu("United States")[c("date",input$corona_measurement_var)]
colnames(help)[1]<-"Date"
global_controls <- dplyr::left_join(global_controls,help,by=c("Date"))
} else {}
}
names(global_controls)[1] <- "Dates"
data_reg2 <- dplyr::left_join(data_reg,global_controls,by = c("Dates"))
data_reg2[is.na(data_reg2)]<-0
data_reg2
})
#select desired variables
df_selected_controls_var <- reactive({
res <- dataset_var()
if(is.null(input$Controls_var) ==TRUE && input$corona_measurement_var==""){
res <- res[c("Dates",input$regression_outcome_var)]
}else if (is.null(input$Controls_var) ==FALSE && input$corona_measurement_var!=""){
res <- res[c("Dates",input$regression_outcome_var,input$Controls_var,input$corona_measurement_var)]
} else if (is.null(input$Controls_var) ==FALSE && input$corona_measurement_var==""){
res <- res[c("Dates",input$regression_outcome_var,input$Controls_var)]
} else if (is.null(input$Controls_var) ==TRUE && input$corona_measurement_var!=""){
res <- res[c("Dates",input$regression_outcome_var,input$corona_measurement_var)]
}
res
})
observeEvent(input$Sentiment_type_var, { #Observe event from input (model choices)
req(input$Sentiment_type_var)
updateTabsetPanel(session, "params", selected = input$Sentiment_type_var)
})
observeEvent(input$industry_sentiment_var, { #Observe event from input (model choices)
req(input$industry_sentiment_var)
updateTabsetPanel(session, "industry_tab", selected = input$industry_sentiment_var)
})
#reset button for variables
observeEvent(input$reset_regression_var,{
updateSelectizeInput(session,"Controls_var",selected = "")
updateSelectizeInput(session,"corona_measurement_var",selected = "")
})
#merge sentiment with control+dep vars
final_regression_df_var <- reactive ({
if (input$senti_yesno_var == TRUE){
res <- get_sentiment_var()
} else {
res <- get_sentiment_var()[1]
}
res$created_at <- as.Date(res$created_at)
res_c <- df_selected_controls_var()
res <- dplyr::left_join(res_c,res, by=c("Dates" = "created_at"))
#res <- res[-1]
res
})
################################################################# sql data var
### call to our sql database to get sentiment with the respective filter
dates_var <- reactive({
if (length(input$date_regression_var) > 1){
input$date_regression_var
} else {
c(input$date_regression_var, input$date_regression_var)
}
})
querry_sentiment_model_var <- reactive({
#### check which tweet length
if (input$tweet_length_var == T){
tweetLength <- 81
} else {
tweetLength <- 0
}
dates <- dates_var()
###### table name
### get language
if (input$sentiment_company_var == "NoFilter"){
test <- glue('select created_at, {input$aggregation_var} from sum_stats_{tolower(input$language_var)} where
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets_var} and likes_count = {input$minLikes_var} and
tweet_length = {tweetLength}')
} else {
comp <- gsub("ö","ö", input$sentiment_company_var)
comp <- gsub("ü", "ü", comp)
test<-glue('SELECT created_at, {input$aggregation_var} FROM sum_stats_companies WHERE
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets_var} and likes_count = {input$minLikes_var} and
tweet_length = {tweetLength} and company = "{comp}" and
language = "{tolower(input$language_var)}"' )
}
test
})
get_sentiment_var <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need database connection
validate(need(database_connector(), "Could not connect to database"))
###### need at least one date selected
validate(need(!is.null(input$date_regression_var), "Please select a date."))
####### store database connection
con <- database_connector()
###### querry data from sql
df_need <- DBI::dbGetQuery(con, querry_sentiment_model_var())
#### for companies replace umlaute
if ("company" %in% names(df_need)){
df_need$company <- gsub("ö", "ö", df_need$company)
df_need$company <- gsub("ü", "ü", df_need$company)
}
#### return df
df_need
})
####################################################Summary statistics #####################################################
#summary table
df_need <- reactive({
df_need <- round(psych::describe(final_regression_df_var()[-1])[c(3, 4, 5, 8, 9)], 2)
test <- nrow(df_need)
test2 <- nrow(df_need)==1
if (nrow(df_need == 1)) {
row.names(df_need)[1] <- input$regression_outcome_var
} else{
df_need <- df_need
}
df_need
})
output$var_summary <- function(){
#colnames(df_need)<- "value"
knitr::kable(df_need(),colnames = NULL) %>%
column_spec(1:6, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
### correlation plot
output$correlation_var <- renderPlot({
res <- final_regression_df_var()[-1]
#help_df <- res
help_df <- res %>% select_if(~ !any(is.na(.)))
if(any(is.na(res))){
names_missing <- colnames(res)[ncol(res)]
# showNotification(glue("Removed {names_missing} for plot due to missing values"),
# type = "message")
}
GGally::ggpairs(help_df, upper = list(continuous = wrap(ggally_cor, size = 8)), lower = list(continuous = 'smooth'))
#ggpairs(final_regression_df_var()[-1])
})
################################################## Validity-Tab
###load reactive dataframe:
output$datensatz_var <- renderPrint ({
head(final_regression_df_var())
})
###for validity cut dataframe at selected number of forecast days
forecast_data <- reactive({
final_regression_df_var()[1:(nrow(final_regression_df_var())-input$ahead),-1,drop=FALSE]
})
### true values for the days that were cut off
actual_values <- reactive({
final_regression_df_var()[((nrow(final_regression_df_var())+1)-input$ahead):nrow(final_regression_df_var()),2]
})
#test for stationarity and difference if necessary
stationary <- reactive({
data <- forecast_data()
if (tseries::adf.test(data[[1]],k=2)$p.value > 0.1){
for (i in 1:ncol(data)){
data[i] <- c(diff(data[[i]],1),NA)
}
data <- drop_na(data)
}else{}
data
})
#optimal lags
optlags_var <- reactive({
vars::VARselect(stationary(),lag.max = 10, type = "none")$selection[["SC(n)"]]
})
#fit model
var_model <- reactive({
if (ncol(forecast_data()) == 1) {
model <- stats::arima(stationary(), order = c(optlags_var(), 0, 0))
} else {
model <- vars::VAR(stationary(), p = optlags_var(), type = "none")
}
model
})
#test for autocorrelation: rejection = bad (means presence of correlated errors)
serial_test <- reactive({
if (ncol(forecast_data()) == 1) {
test <- stats::Box.test(var_model()$residuals,type= "Box-Pierce" )
} else {
test <- vars::serial.test(var_model(), type="BG",lags.bg = optlags_var())
}
test
})
#forecast
forecast_var <- reactive({
fcast <- stats::predict(var_model(), n.ahead = input$ahead)
if(nrow(stationary())!=nrow(forecast_data())){
if (ncol(forecast_data()) == 1) {
x <- fcast$pred[1:input$ahead]
x <- cumsum(x) + forecast_data()[nrow(forecast_data()),1]
}else {
x <- fcast$fcst[[1]]
x <- x[,1]
x <- cumsum(x) + forecast_data()[nrow(forecast_data()),1]
}
}else{
if (ncol(forecast_data()) == 1) {
x <- fcast$pred[1:input$ahead]
}else {
x <- fcast$fcst[[1]]
x <- x[,1]
}
}
x
})
#plot the actual vs. the predicted forecast
output$plot_forecast <- dygraphs::renderDygraph({
if (input$var_which_plot == "Forecasted period only"){
plot <- data.frame(final_regression_df_var()$Dates[(nrow(forecast_data())+1):(nrow(forecast_data())+input$ahead)],#Dates
forecast_var(), #forecasted values
actual_values())#actual values
colnames(plot) <- c("a","forecast","actual")
help <- plot
plot <- xts::xts(plot[c("forecast","actual")],order.by=plot[["a"]])
dygraphs::dygraph(plot)%>%
dygraphs::dyShading(from = min(help$a), to = max(help$a), color = "white")
}else{
plot <- data.frame(final_regression_df_var()$Dates,
c(forecast_data()[[1]],forecast_var()),
final_regression_df_var()[2])
colnames(plot) <- c("a","forecast","actual")
help <- plot
plot <- xts::xts(plot[c("forecast","actual")],order.by=plot[["a"]])
dygraphs::dygraph(plot) %>%
dyEvent(final_regression_df_var()$Dates[(nrow(forecast_data()))], "Start of prediction", labelLoc = "bottom")%>%
dygraphs::dyShading(from = min(help$a), to = max(help$a), color = "white")
}
})
##calculate performance based on fitted values of complete series
insample_var <- reactive({
fcast <- stats::predict(var_model(), stationary())
if(nrow(stationary())!=nrow(forecast_data())){
if (ncol(forecast_data()) == 1) {
x <- NA
}else {
x <- fcast$model$varresult[[1]]$fitted.values
x <- cumsum(x) + forecast_data()[1,1]
}
}else{
if (ncol(forecast_data()) == 1) {
x <- NA
}else {
x <- fcast$model$varresult[[1]]$fitted.values
}
}
x
})
output$var_metrics <- function(){
if (ncol(forecast_data()) == 1){
test <- c("Not available for ARIMA","Not available for ARIMA","Not available for ARIMA")
}else{
forecast_data <- forecast_data()[1:length(insample_var()),1]
test <- c(sqrt(mean((insample_var()-forecast_data)^2)),
mean(abs(insample_var()-forecast_data)),
mean(abs((forecast_data-insample_var())/forecast_data * 100)))
}
df_need <- data.frame(c(sqrt(mean((forecast_var()-actual_values())^2)),
mean(abs(forecast_var()-actual_values())),
mean(abs((actual_values()-forecast_var())/actual_values()) * 100)),
row.names = c("RMSE","MAE","MAPE"))
colnames(df_need)<- "forecast"
df_need$insample <- test
knitr::kable(df_need,colnames = NULL) %>%
column_spec(1:3, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
#print serial correlation test
output$serial_test <- renderPrint({
serial_test()
})
#print result of correlation test as sentence
output$var <- renderUI({
if (ncol(forecast_data()) == 1) {
str1 <- paste("Box-Pierce test statistic to test for autocorrelation in the AR-residuals:")
if (serial_test()$p.value > 0.1){
str2 <- paste("The hypothesis of serially uncorrelated residuals cannot be rejected.")
} else{
str2 <- paste("The hypothesis of serially uncorrelated residuals can be rejected.")
}
} else {
str1 <- paste("Breusch-Godfrey LM-statistic to test for autocorrelation in the AR-residuals:")
if (serial_test()$serial$p.value > 0.1){
str2 <- paste("The hypothesis of serially uncorrelated residuals cannot be rejected.")
} else {
str2 <- paste("The hypothesis of serially uncorrelated residuals can be rejected.")
}
}
htmltools::HTML(paste(str1,str2, sep = '<br/>'))
})
################################################## actual forecast-tab
#data-frame
forecast_data_real <- reactive({
final_regression_df_var()[,-1,drop=FALSE]
})
#test stationarity and difference if required
stationary_real <- reactive({
data <- forecast_data_real()
if (tseries::adf.test(data[[1]],k=2)$p.value > 0.1){
for (i in 1:ncol(data)){
data[i] <- c(diff(data[[i]],1),NA)
}
data <- drop_na(data)
}else{}
data
})
#optimal lags
optlags_var_real <- reactive({
vars::VARselect(stationary_real(),lag.max = 10, type = "none")$selection[["SC(n)"]]
})
#fit model
var_model_real <- reactive({
if (ncol(forecast_data_real()) == 1) {
model <- stats::arima(stationary_real(), order = c(optlags_var_real(), 0, 0))
} else {
model <- vars::VAR(stationary_real(), p = optlags_var_real(), type = "none")
}
model
})
###correlation test for residuals
serial_test_real <- reactive({
if (ncol(forecast_data()) == 1) {
test <- stats::Box.test(var_model_real()$residuals,type= "Box-Pierce" )
} else {
test <- vars::serial.test(var_model_real(), type="BG",lags.bg = optlags_var_real())
}
test
})
#forecast
forecast_var_real <- reactive({
fcast <- stats::predict(var_model_real(), n.ahead = input$ahead)
if(nrow(stationary())!=nrow(forecast_data())){
if (ncol(forecast_data_real()) == 1) {
x <- fcast$pred[1:input$ahead]
x <- cumsum(x) + forecast_data_real()[nrow(forecast_data_real()),1]
}else {
x <- fcast$fcst[[1]]
x <- x[,1]
x <- cumsum(x) + forecast_data_real()[nrow(forecast_data_real()),1]
}
}else{
if (ncol(forecast_data_real()) == 1) {
x <- fcast$pred[1:input$ahead]
}else {
x <- fcast$fcst[[1]]
x <- x[,1]
}
}
x
})
### plot the whole series with the forcasted values
output$plot_forecast_real <- dygraphs::renderDygraph({
plot <- data.frame(c(final_regression_df_var()[["Dates"]],seq(as.Date(tail(final_regression_df_var()$Dates,1))+1,by = "day",length.out = input$ahead)),
c(forecast_data_real()[[1]],forecast_var_real()))
colnames(plot) <- c("a","forecast")
help<-plot
plot <- xts::xts(plot["forecast"],order.by=plot[["a"]])
dygraphs::dygraph(plot) %>%
dyEvent(max(final_regression_df_var()$Dates), "Start of prediction", labelLoc = "bottom")%>%
dygraphs::dyShading(from = min(help$a), to = max(help$a), color = "white")
})
#correlation test print
output$serial_test_real <- renderPrint({
serial_test_real()
})
#sentence correlation test
output$var_real <- renderUI({
if (ncol(forecast_data()) == 1) {
str1 <- paste("Box-Pierce test statistic to test for autocorrelation in the AR-residuals:")
if (serial_test_real()$p.value > 0.1){
str2 <- paste("The hypothesis of serially uncorrelated residuals cannot be rejected.")
} else{
str2 <- paste("The hypothesis of serially uncorrelated residuals can be rejected.")
}
} else {
str1 <- paste("Breusch-Godfrey LM-statistic to test for autocorrelation in the AR-residuals:")
if (serial_test_real()$serial$p.value > 0.1){
str2 <- paste("The hypothesis of serially uncorrelated residuals cannot be rejected.")
} else {
str2 <- paste("The hypothesis of serially uncorrelated residuals can be rejected.")
}
}
htmltools::HTML(paste(str1,str2, sep = '<br/>'))
})
#################################################################################################### twitter
###########################################################################################
################################### Instructions #########################################
###########################################################################################
# start introjs when button is pressed with custom options and events
observeEvent(input$instrucitons_desc,{
guide1$init()$start()
})
guide1 <- cicerone::Cicerone$
new()$
step(
el = "lang_instr",
title = "Language",
description = "Here you can select the language of the tweets."
)$
step(
"comp_instr",
"Select a company",
"Here you can either select company specific tweets or tweets that were randomly scraped
without any search term"
)$
step(
"date_instr",
"Date range",
"Here you can select the date range to analyse. We have data starting from 2018-11-30 until yesterday.
The app is updated daily. You can always reset your choice to the entire date range."
)$
step(
"desc_filter_instr",
"Filters",
"Here you may choose to filter the tweets according to minimum number of retweets or likes a tweet needs to have.
You can also display only 'Long Tweets' which are tweets with at least 80 characters"
)$
step(
"metric_desc_instr",
"Time series",
"In this section you can select a metric to display in the time series plots. Sentiment is also available as weighted metrics.
You can choose between the mean, the standard deviation and the median. You can also choose to show the number of tweets per day.
Multiple selections are possible. Once multiple metrics are selected all values get scaled with mean 0 and SD 1 in order to allow
for plotting on a similar scale"
)$
step(
"time_series1_instr",
"Time series",
"Here the time series for the current selection are depicted. You can zoom in (select area with mouse) and out (double click) of the plot.
The ribbon below is only shown when a single metric is selected. It is green when the values are above and red when they are below the average over
the selected period."
)$
step(
"plot_saver_button",
"Save button",
"You can temporarily store a plot in the area below the first plot. This may help you with comparing plots from different companies
or comparing different metrics without overcrowding the plot."
)$
step(
"time_series2_instr",
"Saved time series",
"In this area the temporarily saved plot will appear."
)$
step(
"sum_stats_table_instr",
"Summary Statistics",
"Here you can see the summary statistics for the currently selected inputs."
)$
step(
"histo_instr",
"Histogram",
"Here you can select for which metric you would like to see the histogram. You may choose one of sentiment, retweets, likes or tweet length.
You can adjust the number of bins of the histogram.
"
)$
step(
"log_scale_instr",
"Log scale",
"For retweets, likes and tweet length it may be more interesting to show the log transformed distribution as these values have many outliers.
For sentiment this option is blocked as a log transformation of negative values is not possible (sentiment goes from -1 to 1)."
)$
step(
"histo_plot_instr",
"Histogram output",
"Here the histogram for the current selection is shown. Note that the histogram also depends on the tweet type, date range, retweets, likes
and tweets length filters from above."
)
observeEvent(input$instructions_comp,{
guide2$init()$start()
})
guide2 <- cicerone::Cicerone$
new()$
step(
el = "stock_instr",
title = "Stocks",
description = "Here you can select any stock from the DAX or DJI or the indeces themselves. You may choose to show either the
returns or the adjusted closing prices. Multiple selection are possible. Once more than one stock is selected and the metric
'Adjusted Close' is selected the values will be automatically be set to base value of 100 at the start of the period."
)$
step(
"stock_roll_instr",
"Moving Average",
"With the switch you can depict a 7-day moving average instead of the normal daily values."
)$
step(
"stocks_comp_plot_instr",
"Stock Plot",
"Here the stock values are depicted. Note that you can zoom in and out (through double click) of the plot. The zooming will affect
all plots in this tab."
)$
step(
"twitter_comp_instr",
"Twitter",
"Here you have the same options as on the previous tabs. You can also choose to see the 7-day moving average by toggling
the switch at the bottom."
)$
step(
"twitter_comp_plot_instr",
"Twitter Plot",
"Like above the plot has a zooming ability and is scaled once mutliple metrics are selected."
)$
step(
"covid_comp_instr",
"Control variables",
"Here you can select different COVID-19 related metrics or other financial control variables
for Germany and the US (can plot both simultaneously). Again you may choose
to depict a 7-day moving average."
)$
step(
"covid_plot_comp_instr",
"Control Variables Plot",
"Here the plot for the control variables is shown. Note that COVID-19 data is only available starting in
March 2020. Again, you can zoom in and out of the plot."
)
observeEvent(input$instrucitons_expl, {
guide_expl$init()$start()
})
guide_expl <- cicerone::Cicerone$
new()$
step(
el = "tweets_all_expl",
title = "Tweets",
description = "For the tweets you have the same filter options as on the previous tab."
)$
step(
"emoji_instr",
"Emoji filter",
"Here you can omit words the stem form emoji replacements. Click the question mark for more info."
)$
step(
"plot_type_expl_instr",
"Plot type",
"You can either show a bar plot with the most frequent words or a word cloud. Both outputs have a hovering ability.
As the option for the word cloud and bar plot vary simply click on the question marks for further information on their
controls. Note that you have an additional search option when selecting bigrams."
)$
step("ngram_sel_instr",
"NGram Selection",
"You can decide between showing single words or bigrams. For bigrams you also have the option to search for bigrams
containing specific words."
)$
step(
"num_words_expl_instr",
"Info",
"Here we tell you the number of total umber of tweets for the current selection as well the the number of unique words/bigrams.
Note that it can happen that 0 words/bigrams are found despite the info showing a positive amount of tweets
for the current selection. This is due to the pre-processing of the data where we set an abosulte minimum frequency requirement of 5 and 2 (or
1% and 0.1% of total tweets per day depending on which is higher) occurences per day for words and bigrams respectively. "
)$
step(
"expl_plots_instr",
"Outputs",
"Here the bar plot or wordcloud are shown. Note that the wordcloud can have a bug that it disappears when resizing the window. This bug can be avoided
by installing the github version of the wordcloud2 package with : devtools::install_github('lchiffon/wordcloud2')",
position = "bottom"
)
##### instructions for network plot
observeEvent(input$net_instr, {
guide_net$init()$start()
})
guide_net <- cicerone::Cicerone$
new()$
step(
el = "tweet_net_instr",
title = "Tweets",
description = "Here you have similar controls for selecting tweets as beofre. However, now you can select and arbitaray number of minimum
likes, retweets and can also filter for specific sentiments. However, a maximum of only 2 days can analysed at a time."
)$
step(
"sentiment_net_instr",
"Sentiment",
"Here you can select to show tweets for certain range of sentiments."
)$
step(
"emoji_net_instr",
"Emoji words",
"You can again omit emoji words from your analysis."
)$
step(
"search_net_instr",
"Search terms",
"Here you can look for tweets containing a certain word or tweets from specific users. Note that in both cases the search is not case sensitive and search
terms will be stemmed to fit the cleaned tweet data. You can not only look for exact matches in usernames but also partial. For example you may search
for all tweets from users that have the word trump in their usernames."
)$
step(
"net_type_instr",
"Word Combinations",
"Here you can either select to analyse bigrams or word pairs."
)$
step(
"adv_settings_net_instr",
"Adv. Settings",
"Toggling this button allows you to access the advanced settings. Here you can adjust the minimum thresholds of word occruences and correlations."
)$
step(
"buttons_net_instr",
"Buttons",
"As the network analysis takes some time we want you to actively ask for the computation. You may cancel the process (takes a few seconds) or remove the
already computed network."
)$
step(
"num_tweets_info_net_instr",
"Info",
"Here you get information about the number of available tweets for your current selection. This updates in realtime."
)$
step(
"placeholder",
"Network plot",
"Here the network will appear once it has been computed after the button has been pressed"
)$
step(
"data_table_instr",
"Data",
"Here you can take a look at the tweets contained in the network. This also updates in realtime."
)
############################################################################
################# Directory ###############################################
###########################################################################
# selecting directory
# find home direcoty of user
volumes <- c(Home = fs::path_home(), "R Installation" = R.home(), shinyFiles::getVolumes()())
# allow for searching directories
shinyFiles::shinyDirChoose(input, "directory", roots = volumes, session = session, restrictions = system.file(package = "base"), allowDirCreate = FALSE)
##### set wd with shinyfiles
observeEvent(input$directory,{
##### when directory button hasnt been pressed set wd to home directory and tell user
if (is.integer(input$directory)) {
setwd(volumes)
#cat(glue("No directory has been selected. Current directory {getwd()})"))
} else {
##### when button is pressed set wd to choosen dirfwefe
path <- shinyFiles::parseDirPath(volumes, input$directory)
setwd(path)
}
})
####### manually entered path
observeEvent(input$dir_path_man_btn, {
if (!dir.exists(input$dir_path_man)){
output$path_checker_man <- renderText({
"Please enter a valid path"
})
} else {
output$path_checker_man <- renderText({
""
})
}
#### require that path exists
validate(need(dir.exists(input$dir_path_man), "Please enter a valid path"))
### when manual path button is pressed set wd to enterd path
setwd(input$dir_path_man)
})
#### checks if sqlitstudio dir exists, if yes then wd is set correctly
correct_path <- reactive({
input$dir_path_man_btn
input$directory
dir.exists("SQLiteStudio")
})
####### output text as feedback for user whether directory seems corrct
output$directorypath <- renderText({
input$directory
input$dir_path_man_btn
#### when sqlitestudio dir exists everything correct
if(dir.exists("SQLiteStudio")) {
glue("Current path is set to: {getwd()}. This seems correct.")
} else {
#### if sqlitstudio does not exist something wrong probably
glue("Current path is set to: {getwd()}. Data could not be found in this \n
directory. Are you sure it is set correctly?")
}
})
###### connect to database when path has been set correctly
database_connector <- function(){
if(dir.exists("SQLiteStudio")) {
con <- DBI::dbConnect(RSQLite::SQLite(), "SQLiteStudio/databases/clean_database.db")
con
}
}
###############################################################################
##################### twitter logo directory page ############################
###############################################################################
output$twitter_logo <- renderImage({
###### correct path needs to be chosen
req( correct_path()== T)
##### path to png
filename <- "shiny/images/twitter_logo_wordcloud2.png"
##### image output
list(src = filename,
alt = "This is the Twitter Logo",
contentType = "Images/png",
height = "100%", width = "80%")
}, deleteFile = F)
###############################################################################
############################### twitter descriptive ###########################
###############################################################################
### reset daterange
observeEvent(input$reset_dates_desc,{
#### when reset button is pressed, reset date range to entire date range available
shinyWidgets::updateAirDateInput(session, "dates_desc",
clear = T,
value = c("2018-11-30", date_avail))
})
######## disconnect from database after exit
cancel.onSessionEnded <- session$onSessionEnded(function() {
#validate(need(correct_path() == T, "Please choose the correct path"))
#req(database_connector())
con <- database_connector()
if (!is.null(con)){
DBI::dbDisconnect(con)
}
})
###### create reactive long variable for filtering from user input
long <- reactive({
if (input$long == T){
long <- 81
} else{
long <- 0
}
long
})
################################## date_variable that accounts for single dates
dates_desc <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
######## need at least one date
validate(need(!is.null(input$dates_desc), "Please select a date."))
##### if date range selected, used date range
if (length(input$dates_desc) > 1){
input$dates_desc
} else {
##### if single date selceted, create daterange with same date twice --> for easier computing
# otherwise need to control for cases where input is single date vs. list of dates
c(input$dates_desc, input$dates_desc)
}
})
################################### path finder for histo files
querry_histo <- reactive({
##### need correct wd
validate(need(correct_path() == T, "Please choose the correct path"))
##### need at least one date selected
validate(need(!is.null(input$dates_desc), "Please select a date."))
#### convert long input to name addon in file
if (input$long == T){
long_name <- "long_only"
} else{
long_name <- "all"
}
#### get langauge addon
lang <- lang_converter()
### account for case where sentiment is selected
# replace sentiment with senti because refernced with senti in file
value_var <- stringr::str_replace(input$histo_value,"sentiment", "senti")
# replace tweet_length with long becuase refernced with long in file
value_var <- stringr::str_replace(value_var, "tweet_length", "long")
# for no filter
if (input$comp == "NoFilter"){
#### filename for nofilter data
glue("histo_{value_var}_{lang}_NoFilter_rt_{input$rt}_li_{input$likes}_lo_{long_name}.csv")
} else { #for chosen company
req(!is.null(input$comp))
#### filename for companies
glue("histo_{value_var}_{input$comp}_rt_{input$rt}_li_{input$likes}_lo_{long_name}.csv")
}
}) #reactive closed
##################### summary statistics table data and time series data
querry_sum_stats_table <- reactive({
##### set up querry string for sql
#### get tweet_length filter, 81 for long==T, 0 for long==F
long <- long()
##### for unfitlered tweets
if (input$comp == "NoFilter"){
table_name <- glue("sum_stats_{tolower(input$lang)}")
glue('SELECT * FROM {table_name} WHERE
retweets_count = {input$rt} and likes_count = {input$likes} and
tweet_length = {long}' )
} else { #if company is chosen
### replace umlaute from input, 1233
### control for münchner rück and deutsch börse which are stored with normal
# and corrupted umlaute
if( input$comp == "Münchener Rück"){
glue('SELECT * FROM sum_stats_companies WHERE
retweets_count = {input$rt} and likes_count = {input$likes} and
tweet_length = {long} and company = "Münchener Rück" or company ="Münchener Rück"
and
language = "{tolower(input$lang)}"' )
} else if (input$comp == "Deutsche Börse") {
glue('SELECT * FROM sum_stats_companies WHERE
retweets_count = {input$rt} and likes_count = {input$likes} and
tweet_length = {long} and company = "Münchener Rück" or company ="Münchener Rück"
and
language = "{tolower(input$lang)}"' )
} else {
glue('SELECT * FROM sum_stats_companies WHERE
retweets_count = {input$rt} and likes_count = {input$likes} and
tweet_length = {long} and company = "{input$comp}" and
language = "{tolower(input$lang)}"' )
}
}
})
#########################################################################
############################# get data for sum stats table
get_data_sum_stats_tables <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need database connection
validate(need(database_connector(), "Could not connect to database"))
###### need at least one date selected
validate(need(!is.null(input$dates_desc), "Please select a date."))
####### store database connection
con <- database_connector()
##### check if connection is null
string_value <- is.null(con)
req(!string_value)
###### querry data from sql
df_need <- DBI::dbGetQuery(con, querry_sum_stats_table())
#### account for case where there is no data available for filters
validate(need(dim(df_need)[1] > 0, "No data available for current selection" ))
#### for companies replace umlaute
if ("company" %in% names(df_need)){
df_need$company <- gsub("ö", "ö", df_need$company)
df_need$company <- gsub("ü", "ü", df_need$company)
}
#### drop duplicates
df_need <- unique(df_need)
#### return df
df_need
})
#########################
################################# sum stats table
output$sum_stats_table <- function(){
###### use data from sql from previous step
df_need <- get_data_sum_stats_tables()
##### create summary stats table with function
df_need <- sum_stats_table_creator(df_need, dates_desc()[1], dates_desc()[2])
### need to have data
validate(need(!is.null(df_need), "No data available for current selection" ))
df_need
}
##################################
################################################### output time series
###### title for dygraphs
number_tweets_info_desc <- reactive({
##### get data from sql
df_need <- get_data_sum_stats_tables()
#convert to date
df_need$created_at <- as.Date(df_need$created_at)
###### filter for dates input from user
df_need <- df_need %>%
filter(between(created_at, as.Date(dates_desc()[1]), as.Date(dates_desc()[2])))
### need to have data
validate(need(dim(df_need)[1] > 0, "No data available for current selection" ))
##### for tweet type input get nice company name accroding to named list
if (input$comp == "NoFilter"){
comp_name <- names(purrr::flatten(company_terms))[purrr::flatten(company_terms) == input$comp]
} else {
comp_name <- glue("{names(purrr::flatten(company_terms))[purrr::flatten(company_terms) == input$comp]} Tweets")
}
num_tweets <- as.integer(round(sum(df_need$N)))
num_tweets <- formatC(num_tweets, format="f", big.mark = ",", digits=0)
num_tweets_avg <- formatC(round(mean(df_need$N)), format="f", big.mark = ",", digits=0)
##### set up string for header of time series graphs
glue("{comp_name} ({num_tweets} tweets total;
{num_tweets_avg} on average per day)")
})
###### title for summary statistics
output$sum_stats_table_header <- renderText({
header <- number_tweets_info_desc()
header <- sub( "total.*$", "", header )
glue("Summary Statistics for {header} total)")
})
#################################################
############################## time series plot
###################################################
output$sum_stats_plot <- dygraphs::renderDygraph({
#### need at least one inputalue(likes/retweets etc.)
req(!is.null(input$value) | input$num_tweets_box == T)
###### need at least two days selected for time series plot
validate(need(length(input$dates_desc) != 1, "Cannot plot time series for single date. Please select more dates."))
##### need date input to not be null
validate(need(!is.null(input$dates_desc), "Please select a date."))
##### header for plots
input_title <- number_tweets_info_desc()
### get data for plots
df <- get_data_sum_stats_tables()
### need to have at least to obs for time series
validate(need(dim(df)[1] > 1, "Only one date found for current selection. Cannot plot time series for single date"))
#### when number of tweets should not be plotted
if (input$num_tweets_box == F){
p <- time_series_plotter2(df, input$metric, input$value, num_tweets = F,
input$dates_desc[1], input$dates_desc[2],
input_title = input_title,
group = "twitter_desc",
input_roll = input$roll_twitter)
### need to have data
validate(need(!is.null(p), "No data available for current selection" ))
p
} else { ##### when number of tweets should be plotted
p <- time_series_plotter2(df, input$metric, input$value, num_tweets = T,
input$dates_desc[1], input$dates_desc[2],
input_title = input_title,
group = "twitter_desc",
input_roll = input$roll_twitter)
### need to have data
validate(need(!is.null(p), "No data available for current selection" ))
p
}
})
#####################################################################
################ for second time series plot from saving plot button
##################################
save_plot <- reactiveValues(data = NULL)
##### if button is clicked store time series plot in serperate part
observeEvent(input$plot_saver_button, {
#### date input cannot be null
validate(need(!is.null(input$dates_desc), "Please select a date."))
###### need at leat ne value selected
req(!is.null(input$value) | input$num_tweets_box == T)
####### need at least dates selceted for time series plot
validate(need(length(input$dates_desc) > 1, "Cannot plot time series for single day"))
##### get plot header
input_title <- number_tweets_info_desc()
# get df
df <- get_data_sum_stats_tables()
###### in case where number of tweets should not be included in plot
if (input$num_tweets_box == F){
save_plot$plot <- time_series_plotter2(df, input$metric, input$value, num_tweets = F,
input$dates_desc[1], input$dates_desc[2], r,
date_range = F,
input_title = input_title,
group = "twitter_desc",
input_roll = input$roll_twitter)
} else { ## in case where number of tweets should be included in plot
save_plot$plot <- time_series_plotter2(df, input$metric, input$value, num_tweets = T,
input$dates_desc[1], input$dates_desc[2], r,
date_range = F,
input_title = input_title,
group = "twitter_desc",
input_roll = input$roll_twitter)
}
})
######## time series plot when pressing save plot button
output$sum_stats_plot2 <-dygraphs::renderDygraph({
req(!is.null(save_plot$plot))
save_plot$plot
# dygraphs::dygraph(don) %>%
# dygraphs::dyRangeSelector( input$dates_desc + 1, retainDateWindow = T
# )
})
##############################################################################
############################### data retriever for histogram
data_histo <- reactive({
#validate(need(path_setter()[[3]] == "correct_path", "Please select the correct path"))
validate(need(!is.null(input$dates_desc), "Please select a date."))
validate(need(correct_path() == T, "Please choose the correct path"))
lang <- lang_converter()
# for case no company selected
if (input$comp == "NoFilter"){
file_path <- file.path(glue("Twitter/plot_data/{lang}_NoFilter/{querry_histo()}"))
exists <- file.exists(file_path)
shinyFeedback::feedbackDanger("histo_plot", !exists, "Please make sure you picked the correct path. The \n
file cannot be found in the current directory")
req(exists)
df_need <- data.table::fread(file_path,
select = 1:3)
### need to have data
validate(need(dim(df_need)[1] > 0, "No data available for current selection" ))
#### drop duplicates
df_need <- unique(df_need)
df_need
} else { #for case of choosen company
file_path <- file.path(glue("Twitter/plot_data/Companies/{input$comp}/{querry_histo()}"))
df_need <- data.table::fread(file_path,
select = 1:4) %>% filter(language == tolower(input$lang)) %>%
select(-language)
### need to have data
validate(need(dim(df_need)[1] > 0, "No data available for current selection" ))
#### drop duplicates
df_need <- unique(df_need)
df_need
}
})
###########################################################
######################################## histogram output
output$histo_plot <- plotly::renderPlotly({
validate(need(!is.null(input$dates_desc), "Please select a date."))
req(input$histo_value)
p <- histogram_plotter(data_histo(), date_input1 = dates_desc()[1], date_input2 = dates_desc()[2],
input_bins = input$bins, input_log = input$log_scale)
#### check if data in plot
validate(need(!is.null(p), "No data available for current selection" ))
# plot the plot
p
})
##################### disable log scale option for sentiment because as negative values
observeEvent(input$histo_value, {
if (grepl("sentiment",input$histo_value)) {
shinyWidgets::updateSwitchInput(session = session,
"log_scale",
disabled = T,
value = F)
} else {
shinyWidgets::updateSwitchInput(session = session,
"log_scale",
disabled = F)
}
})
####################### histogram title
output$histo_plot_info <- renderText({
selected_value <- input$value[1]
selected_value <- stringr::str_replace(selected_value, "sentiment_rt", "Retweets weighted Sentiment")
selected_value <- stringr::str_replace(selected_value, "sentiment_likes", "Likes weighted Sentiment")
selected_value <- stringr::str_replace(selected_value, "sentiment_length", "Tweet Length weighted Sentiment")
selected_value <- stringr::str_replace(selected_value, "likes", "Likes")
selected_value <- stringr::str_replace(selected_value, "rt", "Retweets")
selected_value <- stringr::str_replace(selected_value, "tweet_length", "Tweet Length")
selected_value <- stringr::str_replace(selected_value, "sentiment", "Sentiment")
glue("Distribution of {selected_value} for indivdual tweets")
})
######################################################
########################## Word Frequencies ###########
#######################################################
lang_converter <- reactive({
lang <- stringr::str_to_title(input$lang)
})
data_expl <- reactive({
lang <- lang_converter()
if (input$long == T){
long <- "long_only"
tweet_length_filter <- 81
} else{
long <- "all"
tweet_length_filter <- 0
}
validate(need(correct_path() == T, "Please choose the correct path"))
# if (correct_path == "correct_path"){
# Sys.sleep(0.2)
# } else{
# return()
# }
# go into specified folder and load dataframe
if (input$ngram_sel == "Unigram"){
subfolder <- "uni_appended"
add_on <- "uni"
} else {
subfolder <- "bi_appended"
add_on <- "bi"
}
if (input$comp != "NoFilter") {
folder <- file.path("Companies")
file_name <- glue("term_freq_{input$comp}_all_rt_{input$rt}_li_{input$likes}_lo_{long}.csv")
file_path <- file.path("Twitter/term_freq",folder, subfolder, file_name)
# read file
dt <- data.table::fread(file_path)
dt$date_variable <- as.Date(dt$date_variable)
#### drop duplicates
dt <- unique(dt)
#### check if data in dt
validate(need(dim(dt)[1] > 0, "No data available for current selection"))
### return dt
dt
} else {
folder <- glue("{lang}_NoFilter")
file_name <- glue("{add_on}_{lang}_NoFilter_rt_{input$rt}_li_{input$likes}_lo_{long}.csv")
file_path <- file.path("Twitter/term_freq",folder, subfolder, file_name)
# read file
dt <- data.table::fread(file_path)
dt <-dt[,c("date",
"language",
"word",
"N",
"emo")]
dt$date <- as.Date(dt$date)
#### drop duplicates
dt <- unique(dt)
#### check if data in dt
validate(need(dim(dt)[1] > 0, "No data available for current selection"))
### return dt
dt
}
#%>%
# filter(between(date_variable, input$dates[1], input$dates[2]))
})
word_freq_df <- reactive({
#validate(need(path_setter()[[3]] == "correct_path", "Please select the correct path"))
validate(need(!is.null(input$dates_desc), "Please select a date."))
if (input$ngram_sel == "Unigram"){
input_word_freq_filter <- ""
} else {
input_word_freq_filter <- input$word_freq_filter
}
df <- word_freq_data_wrangler(data_expl(), dates_desc()[1], dates_desc()[2],
input$emo, emoji_words,
input_word_freq_filter,
tolower(input$lang),
input$comp)
df
})
######################### freq_plot
output$freq_plot <- plotly::renderPlotly({
# dynamically change height of plot
#height = function() input$n * 30 + 400,
#### account for empty df
validate(need(!is.null(word_freq_df()), "No data available for current selection"))
df <- df_filterer(word_freq_df() , input$n_freq)
term_freq_bar_plot(df)
})
################## wordcloud
output$cloud <- renderUI({
wordcloud2::wordcloud2Output("wordcloud", width = (8/12) * 0.925 * input$dimension[1], height = 1000) %>%
shinycssloaders::withSpinner(type = 5)
})
output$wordcloud <- wordcloud2::renderWordcloud2({
req(input$plot_type_expl == "Word Cloud")
#### account for empty df
validate(need(!is.null(word_freq_df()), "No data available for current selection"))
### set max n_freq_wc to number of unique words/bigrams
n_freq_wc <- min(number_words(), input$n_freq_wc)
df <- df_filterer(word_freq_df(), n_freq_wc)
word_cloud_plotter(df, input$size_wordcloud)
})
number_words <- reactive({
#### number of unqiue words/bigrams
number_words <- unique_words(word_freq_df())
if (is.null(number_words)){
number_words <- 0
}
number_words
})
####################################### number unique words
output$number_words <- reactive({
###### number of total tweets
df_need <- get_data_sum_stats_tables()
#convert to date
df_need$created_at <- as.Date(df_need$created_at)
# filter dates
df_need <- df_need %>%
filter(between(created_at, as.Date(dates_desc()[1]), as.Date(dates_desc()[2])) &
language == tolower(input$lang))
# get number of total tweets
number_tweets <- round(sum(df_need$N, na.rm = T))
number_words <- number_words()
HTML(glue("Number of unique {tolower(input$ngram_sel)}s available for current selection: {number_words} <br>
Number of tweets for current selection: {number_tweets}"))
})
###########################################################################
###########################################################################
###########################################################################
######################### GOING DEEPER ####################################
###########################################################################
###########################################################################
# path to markdown files for helpers
shinyhelper::observe_helpers(withMathJax = TRUE, help_dir = "shiny/helpers")
###### network plot
data_getter_net_react <- reactive({
if(length(input$dates_net) > 1){
days_inrange <- difftime(as.Date(input$dates_net[2]) ,as.Date(input$dates_net[1]) , units = c("days"))
validate(need(days_inrange <= 1,"More than 2 days selected. Please choose a maximum of 2 days."))
}
lang <- stringr::str_to_title(input$lang_net)
df <- network_plot_datagetter(lang, input$dates_net[1], input$dates_net[2], input$comp_net)
df
})
data_filterer_net_react <- reactive({
df <- data_getter_net_react()
if (dim(df)[1] > 0){
df <- network_plot_filterer(df, input$rt_net, input$likes_net, input$long_net,
input$sentiment_net, input$search_term_net,
input$username_net, input$lang_net)
}
df
})
observe({
######### disable render plot button if incorrect path, no date or too many dates selected
if (length(input$dates_net) > 1){
if (difftime(as.Date(input$dates_net[2]) ,
as.Date(input$dates_net[1]) , units = c("days")) > 1) {
removeUI("#network_plot")
shinyjs::disable("button_net")
} else {
shinyjs::enable("button_net")
}
} else if (correct_path() == F | is.null(input$dates_net)){
removeUI("#network_plot")
shinyjs::disable("button_net")
} else if (input$username_net != "" & nchar(input$username_net) < 4){
removeUI("#network_plot")
shinyjs::disable("button_net")
} else {
shinyjs::enable("button_net")
}
})
###### date checker
##### validate that a maximum of 2 days have been selected
output$date_checker_net <- renderText({
if(length(input$dates_net) > 1){
days_inrange <- difftime(as.Date(input$dates_net[2]) ,as.Date(input$dates_net[1]) , units = c("days"))
if (days_inrange >= 2){
validate("More than 2 days selected. Please choose a maximum of 2 days.")
}
} else if (is.null(input$dates_net)){
validate("Need to select at least one day.")
}
})
###### username checker
##### validate that more than 3 chars are put in
output$username_checker <- renderText({
if (input$username_net != "" & nchar(input$username_net) < 4){
validate("Usernames must have at least 4 characters.")
}
})
############################################################
############################ BUTTON RENDER PLOT ############
############################################################
# if button is clicked compute correlations and plot the plot
observeEvent(input$button_net,{
removeUI("#network_plot")
##### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need at least one date selected
validate(need(!is.null(input$dates_net), "Please select a date."))
##### disable render plot button so no mutliple firing possible
shinyjs::disable("button_net")
### enable the cancel computation button only during rendering
shinyjs::enable("cancel_net")
#### start waitress for progress bar
waitress <- waiter::Waitress$new("nav", max = 4, theme = "overlay")
#Automatically close it when done
on.exit(waitress$close())
waitress$notify()
### progress bar elements
#hostess <- waiter::Hostess$new("load")
################################
initial.ok <- input$cancel_net
shinyjs::showElement(id = "loading")
# disable the button after computation started so no new computation can
# be startedd
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
### read all files for the dates
df <- data_getter_net_react()
#hostess$set(2 * 10)
waitress$inc(1)
if(is.null(df) | dim(df)[1] == 0){
showNotification("Tweets not available for this date yet", type = "error")
enable("button_net")
removeUI("#network_plot")
return()
}
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
network <- data_filterer_net_react()
if(is.null(network) | dim(network)[1] == 0){
showNotification("No tweets found", type = "error")
enable("button_net")
removeUI("#network_plot")
return()
}
##### compute minimum n which is set to 0.05% of the number of tweets for the current dataset
min_n <- round(0.001 * dim(network)[1])
#hostess$set(2 * 10)
waitress$inc(1)
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
if (input$word_type_net == "word_pairs_net"){
network <- network_unnester(network, df, input$emo_net)
} else{
network <- network_unnester_bigrams(network, input$emo_net)
}
validate(need(dim(network)[1] > 0, "No data found for current selection"))
if(is.null(network) | dim(network)[1] == 0){
enable("button_net")
return()
}
#hostess$set(2 * 10)
waitress$inc(1)
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
if (input$word_type_net == "word_pairs_net"){
df <- network_word_corr(network, input$n_net,
input$corr_net, min_n,
input$username_net)
} else {
df <- network_bigrammer(df, network, input$n_net, input$n_bigrams_net,
min_n, input$username_net)
}
if(is.null(df) | length(df) == 0){
showNotification("No tweets found or thresholds too high", type = "error")
enable("button_net")
removeUI("#network_plot")
return()
}
# hostess$set(2 * 10)
waitress$inc(1)
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
insertUI("#placeholder", "beforeEnd", ui = networkD3::forceNetworkOutput("network_plot", height ="1000px"))
# render the network plot
if (input$word_type_net == "word_pairs_net"){
output$network_plot <- networkD3::renderForceNetwork({
req(input$button_net)
#if (is.null(df)) return()
validate(need(!is.null(df), message = "No data found for current selection"))
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
#hostess$set(2 * 10)
# waitress$inc(1)
network_plot_plotter(df)
})
} else {
output$network_plot <- networkD3::renderForceNetwork({
req(input$button_net)
#if (is.null(df)) return()
validate(need(!is.null(df), message = "No data found for current selection"))
if (initial.ok < input$cancel_net) {
initial.ok <<- initial.ok + 1
validate(need(initial.ok == 0, message = "The computation has been aborted."))
}
#hostess$set(2 * 10)
#waitress$inc(1)
###### plot the network plot
network_plot_plotter_bigrams(df)
})
}
##### when process has run successfully enable render plot button again
# and disable cancel button again
shinyjs::enable("button_net")
shinyjs::disable("cancel_net")
### enable remove plot button
shinyjs::enable("reset_net")
})
########## when button is pressed remove ui element (network plot)
observeEvent(input$reset_net,{
removeUI("#network_plot")
})
# observeEvent(input$button_net, {
#
#
# })
# ##
######## message for aborting process
observeEvent(input$cancel_net, {
### when button is pressed show error and abort process
showNotification("Computation has been aborted", type = "error")
})
output$raw_tweets_net <- DT::renderDataTable({
##### only work when correct path is choosen
validate(need(correct_path() == T, "Please choose the correct path"))
#### need at least one date selected
validate(need(!is.null(input$dates_net), "Please select a date."))
#### get filtered data
dt <- data_filterer_net_react()
# change to nicer names
dt <- dt[, c("created_at", "tweet", "text", "username","sentiment", "retweets_count", "likes_count")]
names(dt) <- c("Date", "Orig.Tweet", "CleanTweet", "Username","Sentiment", "Retweets", "Likes")
### set up shiny datatable with custom style
DT::datatable(dt, options = list(
initComplete = JS(
"function(settings, json) {",
"$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
"}")
), rownames = F
) %>% DT::formatStyle(columns = c(1), width='75px')
})
##### number tweets info network
output$number_tweets_net <- renderText({
req(correct_path() == T)
req(!is.null(input$dates_net))
glue("Found {dim(data_filterer_net_react())[1]} tweets for current selection")
})
####### description of network analysis
output$network_description <- renderUI({
HTML(network_description_text)
})
#########################################################################
#########################################################################
#############################comparison tab #############################
#########################################################################
#########################################################################
#### get stock data for comparison tab
get_stock_data_comp <- reactive({
validate(need(correct_path() == T, "Please choose the correct path"))
data.table::fread("Yahoo/Full/all_full.csv")
})
###### plot stocks
output$stocks_comp <- dygraphs::renderDygraph({
req(input$stocks_comp)
stock_plotter(get_stock_data_comp(), input$stocks_metric_comp, input$stocks_comp, input$roll_stock_comp)
})
##### get control variables
df_controls_comp <- reactive({
## validity checks
req(!is.null(input$ControlCountry))
validate(need(correct_path() == T, "Please choose the correct path"))
#### only load data when control is slected (and not covid)
if (input$controls_comp %in% c("coronavirus","OFR.FSI",
"VIX", "WLEMUINDXD")) {
if (length(input$ControlCountry) == 2) {
### load both dfs and join them to one
df1 <- data.table::fread("Twitter/sentiment/Model/controls_DE.csv") %>%
select(Date, input$controls_comp)
names(df1) <- c("Date", "Germany")
df2 <- data.table::fread("Twitter/sentiment/Model/controls_US.csv") %>%
select(Date, input$controls_comp)
names(df2) <- c("Date", "USA")
## join dfs
df <- df1 %>% dplyr::inner_join(df2)
} else if (input$ControlCountry == "Germany"){
lang <- "DE"
df <- data.table::fread(glue("Twitter/sentiment/Model/controls_{lang}.csv")) %>%
select(Date, input$controls_comp)
} else if (input$ControlCountry == "United States") {
lang <- "US"
df <- data.table::fread(glue("Twitter/sentiment/Model/controls_{lang}.csv")) %>%
select(Date, input$controls_comp)
}
} else {
df <- data.table::fread("Corona/owid.csv")
}
### make sure df not empty
req(!is.null(df) & dim(df)[1] > 0)
df
})
###### plot controls
output$covid_comp <- dygraphs::renderDygraph({
if (input$controls_comp %in% c("coronavirus","OFR.FSI",
"VIX", "WLEMUINDXD")) {
controls_plotter(df_controls_comp(), input$controls_comp, input$ControlCountry, input$roll_control_comp)
} else {
covid_plotter(df_controls_comp(), input$controls_comp, input$ControlCountry, input$roll_control_comp)
}
})
######### get data for twitter time series
get_querry_twitter_comparison <- reactive({
##### set up querry string for sql
#### get tweet_length filter, 81 for long==T, 0 for long==F
if (input$long_comp == T){
long <- 81
} else{
long <- 0
}
##### for unfitlered tweets
if (input$twitter_comp_comp == "NoFilter"){
table_name <- glue("sum_stats_{tolower(input$lang_comp)}")
glue('SELECT * FROM {table_name} WHERE
retweets_count = {input$rt_comp} and likes_count = {input$likes_comp} and
tweet_length = {long}' )
} else { #if company is chosen
### replace umlaute from input, 1233
comp <- gsub("ö","ö", input$twitter_comp_comp)
comp <- gsub("ü", "ü", comp)
glue('SELECT * FROM sum_stats_companies WHERE
retweets_count = {input$rt} and likes_count = {input$likes_comp} and
tweet_length = {long} and company = "{comp}" and
language = "{tolower(input$lang_comp)}"' )
}
})
get_twitter_comp_data <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need database connection
validate(need(database_connector(), "Could not connect to database"))
###### need at least one date selected
####### store database connection
con <- database_connector()
###### querry data from sql
df <- DBI::dbGetQuery(con, get_querry_twitter_comparison())
#### for companies replace umlaute
if ("company" %in% names(df)){
df$company <- gsub("ö", "ö", df$company)
df$company <- gsub("ü", "ü", df$company)
}
#### return df
df
})
####### get header for plot
###### title for dygraphs
get_header_twitter_comp <- reactive({
##### get data from sql
df_need <- get_twitter_comp_data ()
#convert to date
df_need$created_at <- as.Date(df_need$created_at)
##### for tweet type input get nice company name according to named list
if (input$twitter_comp_comp == "NoFilter"){
comp_name <- names(purrr::flatten(company_terms))[purrr::flatten(company_terms) == input$twitter_comp_comp]
} else {
comp_name <- glue("{names(purrr::flatten(company_terms))[purrr::flatten(company_terms) == input$twitter_comp_comp]} Tweets")
}
##### set up string for header of time series graphs
glue("{comp_name} ({round(sum(df_need$N))} tweets total;
{round(mean(df_need$N))} on average per day)")
})
######## twitter plot
output$twitter_comp <- dygraphs::renderDygraph({
##### get df
df <- get_twitter_comp_data()
#### set up header of plot
title = get_header_twitter_comp()
###### plot the data
time_series_plotter2(df, "mean", input$value_comp, num_tweets = F,
min(as.Date(df$created_at)), max(as.Date(df$created_at)), dates = NA, date_range =F,
title, group = "comp_plots",
input_roll = input$roll_twitter_comp,
ribbon = F)
})
###############################################################################
######################## XGboost #########################################
###############################################################################
###flexible input for stocks: show either german or us companies
#observe_helpers(withMathJax = TRUE, help_dir = "helpers")
output$info_xgb_firstpage <- renderUI({
htmltools::HTML(paste(htmltools::h1(htmltools::strong("XGboost"), align="center", style = "font-family: 'Times', serif;
font-weight: 30px; font-size: 30px; line-height: 1;"),
htmltools::p("Machine Learning models have shown to be a potent complement for traditional time series models such as VAR and ARIMA. In
this project we use the XGboost algorithm. A supervised learning approach requires additional preparation concerning the structure of the dataset before
the training process can start.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::p("We restructure the time series dataset such that the value in t predicts the value of our dependent variable in ", htmltools::em("t+1"), ".
In other words, one needs to ensure that model always uses past values to predict the next time step. In order to exploit the autocorrelation,
especially in the financial context, one can extend the window of past values by using lags, which the model can use to predict the next value.
For example, the value of the dependent variable in ", htmltools::em("t+3"), " is predicted with the help of values observed in t+2,t+1 and t.
In a multivariate setup the model can also use past values of other features to predict ", htmltools::em("y"), ". In addition, one can extract
feautres from the date. This means the model also has information about quarters,seasons etc. of the year.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
htmltools::p("Another, point that requires further attention in a time series dataset is the model evaluation. Here, a traditonal k-fold cross-validation scheme would leak information of the future, which leads to an overly confident model fit. One can prevent this with a
'walk forward' validation approach. Basically, the model trains on a time frame, which lies past the validation time frame. Figure 1 illustrates this approach."
,style = "font-weight: 18px; font-size: 18px; line-height: 1;")
# withMathJax("$$y_t = \\alpha_0 + \\alpha_1y_{t-1} + \\alpha_2y_{t-1} + ... + \\alpha_my_{t-m} + error_t$$"),
# htmltools::p("In the next step, lagged values of ", htmltools::em("x"),"are added to the regression: ",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
# withMathJax("$$y_t = \\alpha_0 + \\alpha_1y_{t-1} + \\alpha_2y_{t-1} + ... + \\alpha_my_{t-m} + \\beta_1x_{t-1} + \\beta_qx_{t-q} + error_t$$"),
# htmltools::p("The lagged values of ", htmltools::em("x")," are kept as long as they add explanatory power to the regression according to an F-test.
# The null hypothesis that ", htmltools::em("x")," does not Granger cause", htmltools::em("y"), "is accepted if and only if no lagged values of ", htmltools::em("x")," are included.",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
# htmltools::h2(htmltools::strong("Instructions:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
# htmltools::p("In order to perform the Granger causality Analysis, built the model using the panel on the left: ",htmltools::tags$br(),
# htmltools::div("- select the first variable",htmltools::tags$br(),
# "- select the second variable",htmltools::tags$br(),
# "- choose the direction of the causality test using the checkbox",htmltools::tags$br()
# # "- the tab ",htmltools::strong(em("Visualize")),"contains plots of both series for comparison",htmltools::tags$br(),
# # "- the tab ",htmltools::strong(em("Background-steps"))," contains all important steps required in the analysis",htmltools::tags$br(),
# # "- the results can be accessed on the tab ",htmltools::strong(em("Results"))
# , style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
# htmltools::h2(htmltools::strong("Analysis steps:") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
# htmltools::p("The following steps are automatically performed after the user selects two time series: ",htmltools::tags$br(),
# htmltools::div("1. The optimal number of lags is calculated",htmltools::tags$br(),
# "2. Stationarity is repeatedly tested and the series are differenced until sationarity is achieved",htmltools::tags$br(),
# "3. A VAR model is estimated with the optimal number of lags and the (if necessary) transformed series",htmltools::tags$br(),
# "4. A granger causality test is performed.",
# style="margin-left: 1em;font-weight: 18px; font-size: 18px; line-height: 1;"),
# style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
# htmltools::h2(htmltools::strong("Visualize-Tab") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
# htmltools::p(" ",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
# htmltools::h2(htmltools::strong("Background-steps") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
# htmltools::p(" ",style = "font-weight: 18px; font-size: 18px; line-height: 1;"),
# htmltools::h2(htmltools::strong("Results") ,style = "font-family: 'Times', serif; font-weight: 20px; font-size: 20px; line-height: 1;"),
# htmltools::p(" ",style = "font-weight: 18px; font-size: 18px; line-height: 1;")
))
})
output$stock_regression_xgb <- renderUI({
req( correct_path()== T)
if (input$country_regression_xgb == "Germany"){
input <- selectizeInput("Stock_Regression_xgb","Choose company or Index:",
#c(COMPONENTS_DE()[["Company.Name"]],"GDAXI"),
company_terms_stock_ger,
selected = "DAX",multiple = FALSE)
} else {
input <- selectizeInput("Stock_Regression_xgb","Choose company or Index:",
#c(COMPONENTS_US()[["Company.Name"]],"DJI"),
company_terms_stock_us,
selected = "Dow Jones Industrial",multiple = FALSE)
}
})
output$Controls_xgb <- renderUI({
#res <- dataset()
#res$name <- NULL
req( correct_path()== T)
if (input$country_regression_xgb == "Germany"){
input <- selectizeInput("Controls_xgb","Control variables:",
c("Google-Trends Coronavirus"="coronavirus",
"VIX"="VIX",
"Financial Distress Index"="OFR.FSI",
"Economic Uncertainty Index"="WLEMUINDXD",
"DAX"="DAX"),selected = "VIX",multiple = TRUE)
#c(colnames(res[3:length(res)])),multiple = TRUE
}else{
input <- selectizeInput("Controls_xgb","Control variables:",
c("Google-Trends Coronavirus"="coronavirus",
"VIX"="VIX",
"Financial Distress Index"="OFR.FSI",
"Economic Uncertainty Index"="WLEMUINDXD",
"DJI"="DJI"),selected = "VIX",multiple = TRUE)
}
})
dataset_xgb <- reactive({
req( correct_path()== T)
if (input$country_regression_xgb == "Germany"){
data_reg <- filter(stockdata_DE(), #nur hier nach datum filtern, rest wird draufgemerged
#.data$name %in% (c(COMPONENTS_DE()[["Symbol"]], "GDAXI")[c(COMPONENTS_DE()[["Company.Name"]], "GDAXI") %in% .env$input$Stock_Regression_xgb]) &
.data$name %in% .env$input$Stock_Regression_xgb &
.data$Dates >= .env$input$date_regression_xgb[1] & .data$Dates <= .env$input$date_regression_xgb[2])[c("Dates",input$regression_outcome_xgb,"name")] #hier später noch CLose flexibel machen
} else {
data_reg <- filter(stockdata_US(), #nur hier nach datum filtern, rest wird draufgemerged
#.data$name %in% (c(COMPONENTS_US()[["Symbol"]], "DJI")[c(COMPONENTS_US()[["Company.Name"]], "DJI") %in% .env$input$Stock_Regression_xgb]) &
.data$name %in% .env$input$Stock_Regression_xgb &
.data$Dates >= .env$input$date_regression_xgb[1] & .data$Dates <= .env$input$date_regression_xgb[2])[c("Dates",input$regression_outcome_xgb,"name")] #hier später noch CLose flexibel machen
}
if (input$country_regression_xgb == "Germany"){
global_controls <- global_controls_test_DE() #load controls
global_controls$Date <- as.Date(global_controls$Date) #transform date
dax <- dplyr::filter(stockdata_DE(),.data$name %in% c("GDAXI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dax)[1]<-"Date"
# dax <- GDAXI() #load dax
# dax$Date <- as.Date(dax$Date, "%d %b %Y") #transform date
# dax <- missing_date_imputer(dax,"Close.") #transform time series by imputing missing values
colnames(dax)[2] <- "DAX" #rename -> !! is not renamed in final dataset !! -> dont know why
global_controls <- dplyr::left_join(dax,global_controls,by = c("Date")) #join final
}else {
global_controls <- global_controls_test_US() #same procedure as above
global_controls$Date <- as.Date(global_controls$Date)
dji <- dplyr::filter(stockdata_US(),.data$name %in% c("DJI")&
.data$Dates >= min(global_controls$Date) & .data$Dates <= max(global_controls$Date))[c("Dates","Close")]
colnames(dji)[1]<-"Date"
# dow <- DOW()
# dow$Date <- as.Date(dow$Date, " %b %d, %Y")
# dow <- missing_date_imputer(dow,"Close.")
colnames(dji)[2] <- "DJI"
global_controls <- dplyr::left_join(dji,global_controls,by = c("Date"))
}
names(global_controls)[1] <- "Dates"
data_reg2 <- left_join(data_reg,global_controls,by = c("Dates")) #hierdurch kommt die varible "global" in den datensatz
##diesen datensatz filtern wir dann nochmal mit dem sliderinput für die kontrollvariablen(eine/keine/mehrere möglich)
data_reg2
})
df_selected_controls_xgb <- reactive({
#req(input$Controls_var)
res <- dataset_xgb()
res <- res[c("Dates",input$regression_outcome_xgb,input$Controls_xgb)]
res
})
observeEvent(input$Sentiment_type_xgb, { #Observe event from input (model choices)
req(input$Sentiment_type_xgb)
updateTabsetPanel(session, "params_xgb", selected = input$Sentiment_type_xgb)
})
observeEvent(input$industry_sentiment_xgb, { #Observe event from input (model choices)
req(input$industry_sentiment_xgb)
updateTabsetPanel(session, "industry_tab", selected = input$industry_sentiment_xgb)
})
# dataset_senti_xgb <- reactive({
# req( correct_path()== T)
# req(input$Sentiment_type_xgb)
# if(input$Sentiment_type_xgb == "NoFilter"){
#
# res <- En_NoFilter_0_0_yes() # still fix as it is not clear yet if sql or csv
# #res <- eval(parse(text = paste('En', '_NoFilter_',input$minRetweet,'_',
# # input$minminLikes,'_',input$tweet_length,'()', sep='')))
# #input$language
# }else{
# req(input$Stock_reg)
# ticker <- ticker_dict(input$Stock_reg) # dict for a few stock
# res <- eval(parse(text = paste(ticker,'()', sep=''))) # example: ADS.DE()
#
# }
#
#
# })
#
#
# filtered_df_xgb <- reactive({
# req( correct_path()== T)
# req(input$Sentiment_type_xgb)
# req(input$minRetweet_stocks1_xgb)
# req(input$minRetweet_stocks2_xgb)
#
# if(input$Sentiment_type_xgb == "NoFilter"){
#
# res <- dataset_senti_xgb()
# }else{ # live filtering
# req(input$industry_sentiment_xgb)
# res <- dataset_senti_xgb()
# if(input$industry_sentiment_xgb == "no"){
# res <- dataset_senti_xgb()
# if(input$tweet_length_stock1_xgb == "yes"){
#
# res <- res %>% filter((retweets_count > as.numeric(input$minRetweet_stocks1_xgb)) &
# (tweet_length > 81))}
# else{
# res <- res %>% filter((retweets_count > as.numeric(input$minRetweet_stocks1_xgb)))
# }
# }#else{
# #res <- dataset_senti()
# #if(input$tweet_length_stock2 == "yes"){
# # res <- res %>% filter((retweets_count > as.numeric(input$minRetweet_stocks2)) &
# # (tweet_length > 81))
# #}else{
# # res <- res %>% filter(retweets_count > as.numeric(input$minRetweet_stocks2))
# #}
# #}
# }
# })
#
#
# aggri_select_xgb <- reactive({
#
# if(input$Sentiment_type_xgb == "NoFilter"){ # NoFilter files already aggregated
# res <- filtered_df_xgb()
# aggregation <- key(input$aggregation_xgb) # select aggregation type: Mean, mean weighted by,...
# res <- res %>% tidyr::gather("id", "aggregation", aggregation)
# res <- res[c("date","aggregation")]
# }else{
# if(input$industry_sentiment_xgb == "no"){
# res <- filtered_df_xgb()
# res <- aggregate_sentiment(res) # function to aggregate sentiment per day
# res <- res %>% filter(language == input$language1_xgb)
# aggregation <- key(input$aggregation1_xgb)
# res <- res %>% tidyr::gather("id", "aggregation", aggregation)
# res <- res[c("date","aggregation")]
# }else{
# res <- get_industry_sentiment(COMPONENTS_DE(),input$industry_xgb,input$minRetweet_stocks2_xgb,
# input$tweet_length_stock2_xgb) #function to gather all stock in certain industry
# aggregation <- key(input$aggregation2_xgb) #--> also calculates aggregation inside function
# res <- res %>% tidyr::gather("id", "aggregation", aggregation)
# res <- res[c("date","aggregation")]
# }
# }
#
# })
observeEvent(input$reset_regression_xgb,{
updateSelectizeInput(session,"corona_measurement_xgb",selected = "")
updateSelectizeInput(session,"Controls_xgb",selected = "")
})
#merge sentiment with control+dep vars
final_regression_df_xgb <- reactive ({
xgb_date_checker()
if (input$senti_yesno_xgb == TRUE){
res <- get_sentiment_xgb()
} else {
res <- get_sentiment_xgb()[1]
}
res$created_at <- as.Date(res$created_at)
res_c <- df_selected_controls_xgb()
res <- left_join(res_c,res, by=c("Dates" = "created_at"))
if(input$senti_yesno_xgb == TRUE){
res <- na_replace(res, 0)
}else{
res
}
res_corona <- df_selected_corona_xgb()
res_corona$date <- as.Date(res_corona$date)
res <- left_join(res,res_corona,by=c("Dates" = "date"))
res
})
############################################################################# sql data xgb
dates_xgb <- reactive({
if (length(input$date_regression_xgb) > 1){
input$date_regression_xgb
} else {
c(input$date_regression_xgb, input$date_regression_xgb)
}
})
querry_sentiment_model_xgb <- reactive({
#### check which tweet length
if (input$tweet_length_xgb == T){
tweetLength <- 81
} else {
tweetLength <- 0
}
dates <- dates_xgb()
###### table name
### get language
if (input$sentiment_company_xgb == "NoFilter"){
test <- glue('select created_at, {input$aggregation_xgb} from sum_stats_{tolower(input$language_xgb)} where
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets_xgb} and likes_count = {input$minLikes_xgb} and
tweet_length = {tweetLength}')
} else {
comp <- gsub("ö","ö", input$sentiment_company_xgb)
comp <- gsub("ü", "ü", comp)
test<-glue('SELECT created_at, {input$aggregation_xgb} FROM sum_stats_companies WHERE
created_at >= "{dates[1]}" and created_at <= "{dates[2]}" and
retweets_count = {input$minRetweets_xgb} and likes_count = {input$minLikes_xgb} and
tweet_length = {tweetLength} and company = "{comp}" and
language = "{tolower(input$language_xgb)}"' )
}
test
})
get_sentiment_xgb <- reactive({
###### need correct path
validate(need(correct_path() == T, "Please choose the correct path"))
###### need database connection
validate(need(database_connector(), "Could not connect to database"))
###### need at least one date selected
validate(need(!is.null(input$date_regression_xgb), "Please select a date."))
####### store database connection
con <- database_connector()
###### querry data from sql
df_need <- DBI::dbGetQuery(con, querry_sentiment_model_xgb())
#### for companies replace umlaute
if ("company" %in% names(df_need)){
df_need$company <- gsub("ö", "ö", df_need$company)
df_need$company <- gsub("ü", "ü", df_need$company)
}
#### return df
df_need
})
#####################################################################################
df_selected_corona_xgb <- reactive({
#req(input$Controls_var)
res <- corona_data_xgb()
###### clean input df
res <- res %>% dplyr::select(-X,-location)
###### extract column based on input
ifelse(input$corona_measurement_xgb=="",res <- res[c("date")],res <- res[c("date",input$corona_measurement_xgb)])
#res <- res[c("date",input$corona_measurement_xgb)]
res
})
corona_data_xgb <- reactive({
##### require database
req( correct_path()== T)
req(input$country_regression_xgb)
##### call function: extract corona file for specific country
test <- input$country_regression_xgb
ifelse(test=="USA",test <- "United States",test <- test)
#CORONA_xgb(input$country_regression_xgb)
CORONA_xgb(test)
})
# output$corona_vars_xgb <- renderUI({
# req( correct_path()== T)
# res <- corona_data_xgb()
# #### delete redundant columns
# res <- res %>% dplyr::select(-X,-location,-date)
# input <- selectizeInput("corona_xgb","Choose a corona related variable:",
# names(res),multiple = TRUE)
#
# })
observeEvent(input$reset_corona_xgb,{
#### reset selection of corona dropdown
updateSelectizeInput(session,"corona_vars_xgb",selected = "")
})
df_need_xgb <- reactive({
##### calculate summary statistics
df_need <- round(describe(final_regression_df_xgb()[-1])[c(3, 4, 5, 8, 9)], 2)
test <- nrow(df_need)
test2 <- nrow(df_need)==1
if (nrow(df_need == 1)) {
row.names(df_need)[1] <- input$regression_outcome_xgb
} else{
df_need <- df_need
}
df_need
})
output$xgb_summary <- function(){
##### summary table
knitr::kable(df_need_xgb(), caption = glue("Summary statistics"),col.names = c("MEAN","SD",
"MEDIAN","MIN","MAX")) %>%
column_spec(1:6, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
##### ggpairs plot
output$correlation_xgb <- renderPlot({
#### avoid error message before plot is loaded
req(input$Controls_xgb)
res <- final_regression_df_xgb()[-1]
##### remove variables with missing values
help_df <- res %>% select_if(~ !any(is.na(.)))
if(any(is.na(res))){
##### store column names of variables with missing values
names_missing <- colnames(res)[ncol(res)]
##### show notification if variable is considered for the plot
# showNotification(glue("Removed {names_missing} for plot due to missing values"),
# type = "message")
}
ggpairs(help_df, upper = list(continuous = wrap(ggally_cor, size = 8)), lower = list(continuous = 'smooth'))
#ggpairs(final_regression_df_xgb()[-1])
})
output$acf_plot_xgb <- renderPlot({
req(input$correlation_xgb_plot)
req(input$correlation_type)
##### call function to calculate autocorrelation plot
#### based on input from user
acf_plot_xgb(final_regression_df_xgb(),input$correlation_xgb_plot)
})
output$pacf_plot_xgb <- renderPlot({
req(input$correlation_xgb_plot)
req(input$correlation_type)
##### call function to calculate autocorrelation plot
##### based on input from user
pacf_plot_xgb(final_regression_df_xgb(),input$correlation_xgb_plot)
})
output$correlation_plot_choice <- renderUI({
##### selection of variables limited to variable selection of user
res <- final_regression_df_xgb() %>% dplyr::select(-Dates)
input <- selectInput("correlation_xgb_plot","Select variable for plot",
names(res))
})
xgb_date_checker <- reactive({
##### date input
if(length(input$date_regression_xgb) > 1){
##### calculate the difference of the dates
days_inrange <- difftime(as.Date(input$date_regression_xgb[2]) ,as.Date(input$date_regression_xgb[1]) , units = c("days"))
if (days_inrange < 30){
##### formulate a validation statement
validate("Less than 30 days selected. Please choose more days.")
}
#### also check if no date is selected
} else if (is.null(input$date_regression_xgb)){
##### formulate a validation statement
validate("Need to select at least one day.")
}
})
##### check if the date has at leas 30 days as input
output$xgb_date_check <- renderText({
##### date input
if(length(input$date_regression_xgb) > 1){
##### calculate the difference of the dates
days_inrange <- difftime(as.Date(input$date_regression_xgb[2]) ,as.Date(input$date_regression_xgb[1]) , units = c("days"))
if (days_inrange < 30){
##### formulate a validation statement
validate("Less than 30 days selected. Please choose more days.")
}
#### also check if no date is selected
} else if (is.null(input$date_regression_xgb)){
##### formulate a validation statement
validate("Need to select at least one day.")
}
})
output$add_features <- renderUI({
##### selection of variables limited to variable selection of user
res <- final_regression_df_xgb() %>% dplyr::select(-Dates)
##### rename variables
input <- selectInput("var_1","Chose variable to add AR and/or MA features",
names(res))
})
observeEvent(input$lag_tabs, { #Observe event from input (model choices)
req(input$lag_tabs)
##### change sidebar structure based on tab selection
updateTabsetPanel(session, "lag_tab", selected = input$lag_tabs)
})
######################################Custom dataset############################
##### initiate a reactive value and initiate sub values in this reactive values
#### will be filled based on input from user to "save" the selection
xchange <- reactiveValues()
xchange$df_full <- NULL
xchange$df_full2 <- NULL
xchange$df_full3 <- NULL
xchange$df_full4 <- NULL
xchange$df_full5 <- NULL
xchange$df_full6 <- NULL
xchange$df_full7 <- NULL
xchange$df_full8 <- NULL
xchange$df1 <- NULL
xchange$df2 <- NULL
#####Disclaimer: storing this whole reactive function in separate functions did not work
##### create dataframe out of text input for moving average
Ma_part1 <- reactive({
res <- final_regression_df_xgb()
##### lag feature columns by one to avoid leakage of information
res <- lag_cols(res,input$regression_outcome_xgb)
##### remove variables with missing values before stationarity is evaluted: test does not allow NAs
help_df <- res %>% select_if(~ !any(is.na(.)))
##### call function to check for stationarity
##### returns differenced dataframe for columns which did not pass the test
help_df <- make_ts_stationary(help_df)
#### store names of columns with NAs in vector for reference in the next step
missing_names <- colnames(res)[!complete.cases(t(res))]
if(!is.null(missing_names)){
res
}else{#### join columns back to dataframe
res <- left_join(help_df,res[,c("Dates",missing_names)])
}
##### extract digits from text input
list_nums <- regmatches(input$ma_select, gregexpr("[[:digit:]]+", input$ma_select))
##### convert
num_vec <- as.numeric(unlist(list_nums))
##### unlist to create vector of numeric values
single_nums <- unlist(stringr::str_split(input$ma_select, ","))
##### create a name vector for name creation
variables_list <- rep(input$var_1, length(num_vec))
##### second part of name
helpi <- rep("MA",length(num_vec))
##### paste the two string vectors together
names_vec <- paste0(helpi,variables_list, single_nums)
##### create a matrix to loop over to create the columns
##### matrix stores the input and the variable name
help_matrix <- mapply(c,num_vec, variables_list, SIMPLIFY = T)
##### init
lies <- NULL
##### go through matrix
for(i in 1:length(num_vec)){
##### only for input greater than zero (validation is included later as safty)
if(help_matrix[1,i] > 0){
avg_len <- as.numeric(help_matrix[1,i])
##### init zoo object
x <- zoo::zoo(res[,help_matrix[2,i]])
##### calculate the rolling mean based on input from matrix
x <- as.data.frame(zoo::rollmean(x, k = avg_len, fill = NA))
# names(x)[1] <- paste("MA_",help_matrix[2,i],sep = "")
lies[[i]] <- x
}else{
res
}
}
##### create a df from list
lies <- as.data.frame(do.call(cbind, lies))
##### assign custom names
colnames(lies) <- names_vec
lies
})
Ma_part2 <- reactive({
res <- final_regression_df_xgb()
##### lag feature columns by one to avoid leakage of information
res <- lag_cols(res,input$regression_outcome_xgb)
##### remove variables with missing values before stationarity is evaluted: test does not allow NAs
help_df <- res %>% select_if(~ !any(is.na(.)))
##### call function to check for stationarity
##### returns differenced dataframe for columns which did not pass the test
help_df <- make_ts_stationary(help_df)
#### store names of columns with NAs in vector for reference in the next step
missing_names <- colnames(res)[!complete.cases(t(res))]
if(!is.null(missing_names)){
res
}else{#### join columns back to dataframe
res <- left_join(help_df,res[,c("Dates",missing_names)])
}
##### extract digits from text input
list_nums <- regmatches(input$ma_select2, gregexpr("[[:digit:]]+", input$ma_select2))
##### convert
num_vec <- as.numeric(unlist(list_nums))
##### create a name vector for name creation
single_nums <- unlist(stringr::str_split(input$ma_select2, ","))
##### create a name vector for name creation
variables_list <- rep(input$var_1, length(num_vec))
##### second part of name: EMA = exponential moving average
helpi <- rep("EMA",length(num_vec))
##### paste the two string vectors together
names_vec <- paste0(helpi,variables_list, single_nums)
##### create a matrix to loop over to create the columns
##### matrix stores the input and the variable name
help_matrix <- mapply(c,num_vec, variables_list, SIMPLIFY = T)
lies <- NULL
##### go through matrix
for(i in 1:length(num_vec)){
##### only for input greater than zero (validation is included later as safty)
if(help_matrix[1,i] > 0){
avg_len <- as.numeric(help_matrix[1,i])
##### init zoo object
x <- res[,help_matrix[2,i]]
##### calculate the rolling mean based on input from matrix
x <- as.data.frame(TTR::EMA(x, avg_len))
lies[[i]] <- x
}else{
res
}
}
##### create a df from list
lies <- as.data.frame(do.call(cbind, lies))
##### assign custom names
colnames(lies) <- names_vec
lies
})
##### create a dataframe for the autoregressive part
final_regression_diff <- reactive({
res <- final_regression_df_xgb()
##### lag feature columns by one to avoid leakage of information
res <- lag_cols(res,input$regression_outcome_xgb)
##### remove variables with missing values before stationarity is evaluted: test does not allow NAs
help_df <- res %>% select_if(~ !any(is.na(.)))
##### call function to check for stationarity
##### returns differenced dataframe for columns which did not pass the test
help_df <- make_ts_stationary(help_df)
#### store names of columns with NAs in vector for reference in the next step
missing_names <- colnames(res)[!complete.cases(t(res))]
if(!is.null(missing_names)){
res
}else{#### join columns back to dataframe
res <- left_join(help_df,res[,c("Dates",missing_names)])
}
res
})
################# create a set of control function to avoid wrong input
##### create a message to inform user not to keep the field empty
output$error_text <- renderText({
if(input$ma_select == "" | input$ma_select2 == ""){
b <- "If you dont want to add a moving average - set input to 1"
}else{
b <- ""
}
})
##### only allow regular numeric expressions
v_1a <- reactive({
validate(validate_iregulars(input$ma_select))
})
v_1b <- reactive({
validate(validate_iregulars(input$ma_select2))
})
##### prevent any decimal numbers
v_2a <- reactive({
validate(validate_no_decimals(input$ma_select))
})
v_2b <- reactive({
validate(validate_no_decimals(input$ma_select2))
})
##### prevent negative values
v_3a <- reactive({
validate(validate_negatives(input$ma_select))
})
v_3b <- reactive({
validate(validate_negatives(input$ma_select2))
})
##### prevent input of too large numbers
v_4a <- reactive({
validate(validate_Large_numbers(input$ma_select))
})
v_4b <- reactive({
validate(validate_Large_numbers(input$ma_select2))
})
##### prevent input of zeros
v_5a <- reactive({
validate(validate_no_zeros(input$ma_select))
})
v_5b <- reactive({
validate(validate_no_zeros(input$ma_select2))
})
output$error_text2 <- renderText({ #### show error message
v_1a()
v_1b()
v_2a()
v_2b()
v_3a()
v_3b()
v_4a()
v_4b()
v_5a()
v_5b()
v_6()
})
#################
##### init reactive value to keep track when action button is pressed
rv_action_button <- reactiveValues(i = 0)
##### set to zero if action button is pressed
observeEvent(input$addButton,{
rv_action_button$i <- 0
})
##### isolate the increase of the reactive value from reactivity
observe({
isolate({rv_action_button$i = rv_action_button$i + 1})
})
#
# req( correct_path()== T)
# if (input$country_regression_xgb == "Germany"){
# input <- selectizeInput("Controls_xgb","Control variables:",
# c("Google-Trends Coronavirus"="coronavirus",
# "VIX"="VIX",
# "Financial Distress Index"="OFR.FSI",
# "Economic Uncertainty Index"="WLEMUINDXD",
# "DAX"="DAX"),selected = "VIX",multiple = TRUE)
# #c(colnames(res[3:length(res)])),multiple = TRUE
# }else{
# input <- selectizeInput("Controls_xgb","Control variables:",
# c("Google-Trends Coronavirus"="coronavirus",
# "VIX"="VIX",
# "Financial Distress Index"="OFR.FSI",
# "Economic Uncertainty Index"="WLEMUINDXD",
# "DJI"="DJI"),selected = "VIX",multiple = TRUE)
observe({
##### only observe if action button is 0
if(rv_action_button$i == 0){
###### place all the validation controls before combining/calculating the datasets
validate(
need(input$ma_select != "","dont")) ##### control for no input
validate(
need(input$ma_select2 != "","dont"))
v_1a()##### only allow regular numeric expressions
v_1b()
v_2a()##### prevent any decimal numbers
v_2b()
v_3a()##### prevent negative values
v_3b()
v_4a()##### prevent input of too large numbers
v_4b()
v_5a()##### prevent input of zeros
v_5b()
if((input$var_1 == "Adj.Close") | (input$var_1 == "Return") | (input$var_1 == "log_Close")){
##### assign the created moving average datasets in isolation
isolate(c <- Ma_part2())
isolate(b <- Ma_part1())
##### add a one to the reactive value since action button was pressed
rv_action_button$i <- 1
##### call function to create autoregressive features
##### input are the stationary df, variable, and number of wanted lages
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
#### combine everything to a reactive value
#### use isolation argument to save dataframe when the user is working on a different variable
isolate(xchange$df_full <- cbind(final_regression_diff(),Ar_part,c,b))
#### show notification whether selection is stored
showNotification("Succesfully stored features!", type = "message")}
else if(input$var_1 == "VIX"){
# v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full2 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
else if(input$var_1 == "coronavirus"){
#v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full3 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
else if(input$var_1 == "OFR.FSI"){
#v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full4 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
else if(input$var_1 == "WLEMUINDXD"){
#v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full5 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
else if(input$var_1 == "DAX" | input$var_1 == "DJI"){
#v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full6 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
else if(input$var_1 == "mean_sentiment_length" | input$var_1 == "mean_sentiment" |
input$var_1 == "mean_sentiment_likes" | input$var_1 == "mean_sentiment_rt"){
#v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full7 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
else{
#v()
rv_action_button$i <- 1
isolate(b <- Ma_part1())
isolate(c <- Ma_part2())
Ar_part <- AR_creator(final_regression_diff() ,input$var_1,input$num_2)
isolate(xchange$df_full8 <- cbind(final_regression_diff(),Ar_part,c,b))
showNotification("Succesfully stored features!", type = "message")}
}
})
##### show a pop when user wants to delete the stored dataframes
modal_confirm <- modalDialog(
####message
"Are you sure you want to continue?",
title = "Deleting files",
#####create action buttons inside of pop up window
footer = tagList(
actionButton("cancel", "Cancel"),
actionButton("ok", "Delete", class = "btn btn-danger")
)
)
observeEvent(input$reset_cus,{
showModal(modal_confirm)
})
##### create an observe statement to delete all the saved dataframes (Reset button)
observeEvent(input$ok, {
showNotification("Files deleted")
removeModal()
xchange$df_full <- NULL
xchange$df_full2 <- NULL
xchange$df_full3 <- NULL
xchange$df_full4 <- NULL
xchange$df_full5 <- NULL
xchange$df_full6 <- NULL
xchange$df_full7 <- NULL
xchange$df_full8 <- NULL
})
observeEvent(input$cancel, {
removeModal()
})
##### create logic of showing the finish button
##### only show finish button if at least one dataframe was created
output$finish_button <- renderUI({
req(!is.null(xchange$df_full) | !is.null(xchange$df_full2) | !is.null(xchange$df_full3)
| !is.null(xchange$df_full4) | !is.null(xchange$df_full5))
actionButton("finish", "Finish")
})
# ###### combine the single dataframes from each variable in on large dataframe
# ##### start combination based on action button
custom_df <- eventReactive(input$finish, {
##### combine the dataframes in a list
list_dfs <- c(xchange$df_full,xchange$df_full2,xchange$df_full3,xchange$df_full4,
xchange$df_full5,xchange$df_full6,xchange$df_full7)
#### call function for preprocessing
df <- prep_custom(list_dfs)
df
})
output$tableCustom <- DT::renderDataTable({
#### create datable based on final dataset created by the user
DT::datatable(custom_df(),options = list(
scrollX = T,
autoWidth = T,
initComplete = JS(
"function(settings, json) {",
"$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
"}")
), rownames = F
) %>% DT::formatStyle(columns = c(1))
})
######################################Default dataset###########################
df_xgb <- reactive({
res <- final_regression_df_xgb()
##### call function to create AR and MA features based
res <- ARMA_creator(res,input$regression_outcome_xgb)
})
##### inform user if no further variable is selected
observeEvent(input$lag_tabs, {
req(input$lag_tabs)
if((ncol(final_regression_df_xgb()) == 2)){
showNotification("No further variables selected. Model will be trained on features from dep. variable", type = "warning")}
})
output$df_xgb_default <- DT::renderDataTable({
##### create datatable
DT::datatable(df_xgb(),options = list(
scrollX = T,
autoWidth = T,
initComplete = JS(
"function(settings, json) {",
"$(this.api().table().header()).css({'background-color': '#000', 'color': '#fff'});",
"}")
), rownames = F
) %>% DT::formatStyle(columns = c(1), width='75px')
})
# options = list(
# autoWidth = FALSE, scrollX = TRUE)) %>% DT::formatStyle(names(df_xgb()),
# lineHeight = '80%',
# lineWidth = '80%')
########################### Validity tab #######################################
####### create dataframe for the model
df_xgb_train <- reactive({
if(input$lag_tabs == "default"){ ##### If user selected the default dataframe
##### read in original dataset, which is created on the first tab
res <- final_regression_df_xgb()
##### call function to pre-process and split
list_dfs <- default_prep(res,input$regression_outcome_xgb,input$n_ahead,input$ftpye,1)
}else{
##### call the created custom dataframe
res <- custom_df()
##### does not need much more pre-procession
##### split function can be directly called
list_dfs <- split_data_for(res,input$n_ahead,input$ftpye,input$regression_outcome_xgb)
}
##### expand the created features either custom or by default to forecast dataframe
res <- ARMA_creator_for(list_dfs$df_forecast,list_dfs$df_train)
#rename with columns from train
list_dfs$df_forecast <- res
list_dfs
})
###### create reactive model
model_xgbi <- eventReactive(input$run,{
#scale(dat)
req(input$model_spec)
if(input$model_spec == "default"){ ##### model trained on default parameters
res <- df_xgb_train()
##### call function model_xgb
model1 <- model_xgb(res$df_train)
model1
}else if(input$model_spec == "custom"){ ##### model trained on custom parameters
res <- df_xgb_train()
##### call function model_xgb
model2 <- model_xgb_custom(res$df_train,input$mtry,input$trees,input$min_n,input$tree_depth,
input$learn_rate,input$loss_reduction,input$sample_size)
model2
}else{ ##### model preforms hyperparameter tuning
res <- df_xgb_train()
##### call function model_xgb
model3 <- model_xgb_hyp(res$df_train,input$trees_hyp,input$grid_size)
model3
}
})
#
# output$model_xgb <- renderPrint({
# model_xgbi()[[1]]
# })
##### update sidebar structure based on input (validity tab)
observeEvent(input$model_spec, { #Observe event from input (model choices)
req(input$model_spec)
updateTabsetPanel(session, "mod_spec", selected = input$model_spec)
})
##### update sidebar structure based on input (actual forecast tab)
observeEvent(input$model_spec_for, { #Observe event from input (model choices)
req(input$model_spec_for)
updateTabsetPanel(session, "mod_spec_for", selected = input$model_spec_for)
})
####### create logic when action buttons are activated
####### Goal: prevent user from prediciting when the inputs have changed
###### --> force user to rerun model when parameters are changed
##### init random value
rv_disable <- reactiveValues(i = 1)
##### set to zero if user runs model
observeEvent(input$run,{
rv_disable$i <- 0
})
##### keep track of value
observe({
isolate({rv_disable$i = rv_disable$i + 1})
})
##### enable prediction button after model was created
observe({
if(rv_disable$i == 0){
shinyjs::enable("pred")}
else{ ##### disable
shinyjs::disable("pred")
shinyjs::show("text2")
}
})
##### create a reactive value to track if the selected forecast changes
rv_prev_input <- reactiveValues(prev_input = NULL)
##### observe input
observeEvent(input$n_ahead, {
###### compare if previous and current selection differ. If yes disable prediction
rv_prev_input$prev_input <- c(rv_prev_input$prev_input , input$n_ahead)
if(rv_prev_input$prev_input[1] != input$n_ahead){
rv_disable$i <- 1
}
})
#### create the same logic for the model selection
#### prevent that user can predict if he/she changes a different model set up
rv_disable_2 <- reactiveValues(i = 1)
observeEvent(input$run,{
rv_disable_2$i <- 0
})
observe({
isolate({rv_disable_2$i = rv_disable_2$i + 1})
})
observe({
if(rv_disable_2$i == 0){
shinyjs::enable("pred")}
else{
shinyjs::disable("pred")
shinyjs::show("text2")
}
})
rv_prev_input_2 <- reactiveValues(prev_input = NULL)
#### observe if user changes the model
observeEvent(input$mod_spec, {
##### prevent prediction if model changes by setting reactive value to 1
rv_prev_input_2$prev_input <- c(rv_prev_input_2$prev_input , input$mod_spec)
if((rv_prev_input_2$prev_input[1] != input$mod_spec)){
rv_disable_2$i <- 1
} else{
##### enable if pre-model calculation selection is in line again
if(rv_prev_input$prev_input[1] == input$n_ahead){
rv_disable_2$i <- 0}
else{
rv_disable_2$i <- 1
}
}
})
######################### Prediction
prediction_xgb <- eventReactive(input$pred,{
##### load dataframe
res <- df_xgb_train()
##### call function to generate predicitons
preds <- pred_output(res,input$regression_outcome_xgb,model_xgbi()[[1]])
#### load original df
df_orig <- final_regression_df_xgb()
### if dependent variable was differenced apply cumsum to restore original values
if(adf.test(df_orig[,2],k=2)$p.value > 0.1){
preds <- cumsum(preds) + df_orig[(nrow(res$df_train)-1),2]
}
preds
})
##### disable run and prediction button while model trains
observeEvent(input$run, {
shinyjs::disable("run")
shinyjs::show("text1")
shinyjs::disable("pred")
shinyjs::hide("text2")
})
output$model_fit <- function(){
if(rv_prev_input$prev_input[1] != input$n_ahead){ #### if model parameters are not in line
return()
}else{
##### load dataframe
res <- df_xgb_train()
#### rename variable
colnames(res$df_train)[which(names(res$df_train) == input$regression_outcome_xgb)] <- "y"
#### fit model
model_xgboost <- model_xgbi()[[1]] %>%
parsnip::fit(formula = y ~ .,data = res$df_train[,c(-1)])
#### get fitted values
fits <- predict(model_xgboost,res$df_train[,c(-1)])
#########calculate metrics
out <- metrics_out(res,fits)
#########
##### collect metrics in dataframe
df_need <- data.frame(c(out$rsme,
out$mean_abs,
out$mape),
row.names = c("RMSE","MAE","MAPE"))
colnames(df_need)<- "value"
knitr::kable(df_need, caption = glue("Performance metrics"),colnames = NULL) %>%
column_spec(1:2, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
}
output$serial_out_xgb <- function(){
###### enable action buttons (this is the last calcualtion when run button is pressed)
shinyjs::enable("run")
shinyjs::hide("text1")
shinyjs::enable("pred")
shinyjs::hide("text2")
######
if(rv_prev_input$prev_input[1] != input$n_ahead){ #### if model parameters are not in line -> remove
return()
}else{
res <- df_xgb_train()
colnames(res$df_train)[which(names(res$df_train) == input$regression_outcome_xgb)] <- "y"
model_xgboost <- model_xgbi()[[1]] %>%
parsnip::fit(formula = y ~ .,data = res$df_train[,c(-1)])
fits <- predict(model_xgboost,res$df_train[,c(-1)])
resids <- (res$df_train$y - fits)
m <- Box.test(resids, lag = 12, type="Lj")
m
df_need <- data.frame(c(round(m$statistic,5),
round(m$p.value,5),
m$method),
row.names = c("statistic","p.value","method"))
colnames(df_need)<- "Summary"
knitr::kable(df_need, caption = glue("Performance metrics"),colnames = NULL) %>%
column_spec(1:2, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
}
#
#
# xgb_metrics_1 <- eventReactive(input$pred,{
#
# ####load prediction
# preds <- prediction_xgb()
# #### load prepared dataframe
# res <- df_xgb_train()
# #### load original values to compare prediction
# df_orig <- final_regression_df_xgb()
# #### select correct time horizon for observed values
# y <- df_orig %>% filter(Dates >= min(res$f_dates) & Dates <= max(res$f_dates))
#
# #########calculate metrics
# out <- metrics_out_final(preds,y)
# #########
#
# ##### collect metrics in dataframe
# df_need <- data.frame(c(out$rsme,
# out$mean_abs,
# out$mape),
# row.names = c("RMSE","MAE","MAPE"))
#
# df_need
# })
#
# output$xgb_metrics <- renderTable({
#
# df_need <- xgb_metrics_1()
# colnames(df_need)<- "value"
# knitr::kable(df_need, caption = glue("Performance metrics"),colnames = NULL) %>%
# column_spec(1:2, color = "lightgrey") %>%
# column_spec(1, bold = T, color = "white") %>%
# row_spec(1, bold = T) %>%
# kableExtra::kable_styling(c("striped","hover"), full_width = F,
# position = "center",
# font_size = 16)
#
#
# })
#
box_test <- reactive({
shinyjs::enable("run")
shinyjs::hide("text1")
shinyjs::enable("pred")
shinyjs::hide("text2")
##### load dataframe
res <- df_xgb_train()
##### rename variable
colnames(res$df_train)[which(names(res$df_train) == input$regression_outcome_xgb)] <- "y"
##### fit model
model_xgboost <- model_xgbi()[[1]] %>%
parsnip::fit(formula = y ~ .,data = res$df_train[,c(-1)])
##### predict trainings dataset
fits <- predict(model_xgboost,res$df_train[,c(-1)])
##### calculate residuals
resids <- (res$df_train$y - fits)
##### check for autocorrelation
m <- stats::Box.test(resids, lag = 12, type="Lj")
m
})
#### create a dynamic text output to summarize test
output$test_text_xgb <- renderUI({
str1 <- paste("Box-Pierce test statistic to test for autocorrelation in the AR-residuals:")
####check p value
if (box_test()$p.value > 0.1){
str2 <- paste("The hypothesis of serially uncorrelated residuals cannot be rejected.")
} else{
str2 <- paste("The hypothesis of serially uncorrelated residuals can be rejected.")
}
##### wrap text in html output
htmltools::HTML(paste(str1,str2, sep = '<br/>'))
})
##### create plots with dygraphs
output$forecast_xgb <- renderDygraph({
##### load original dataframe
full_df <- final_regression_df_xgb()
##### load prepared dataframe
res <- df_xgb_train()
#### load predictions
preds <- prediction_xgb()
plot <- forcast_plot_xgb(res,preds,full_df,input$forecast_plot_choice,input$regression_outcome_xgb,
input$Stock_Regression_xgb)
plot
})
#
# ############################ Actual forecast tab ###############################
#
# ##### this preparation only differes in the way that the complete dataset is used for
# training the model
df_xgb_train_for <- reactive({
if(input$lag_tabs == "default"){ ##### deafult dataset is based on automatic AR/MA creation
res <- final_regression_df_xgb()
##### call function to pre-process and use the full dataset
list_dfs <- default_prep(res,input$regression_outcome_xgb,input$n_ahead2,input$ftpye2,2)
}else{##### dataframe when user created his/her own dataframe
res <- custom_df()
##### create train and forecast dataset
list_dfs <- split_data_for_ahead(res,input$n_ahead2,input$ftpye2)
}
##### create the selected features also for the forecast dataset
##### xgboost cannot predict a dataframe with a differenct column structure
res <- ARMA_creator_for(list_dfs$df_forecast,list_dfs$df_train)
#rename with columns from train
list_dfs$df_forecast <- res
list_dfs
})
#
# ####### create logic when action buttons are activated
# ####### Goal: prevent user from prediciting when the inputs have changed
# ###### --> force user to rerun model when parameters are changed
#
##### init random value
rv_disable_act <- reactiveValues(i = 1)
##### set to zero if user runs model
observeEvent(input$run2,{
rv_disable_act$i <- 0
})
##### keep track of value
observe({
isolate({rv_disable_act$i = rv_disable_act$i + 1})
})
##### enable prediction button after model was created
observe({
if(rv_disable_act$i == 0){
shinyjs::enable("pred2")}
else{ ##### disable
shinyjs::disable("pred2")
shinyjs::show("text2_act")
}
})
##### create a reactive value to track if the selected forecast changes
rv_prev_input_act <- reactiveValues(prev_input = NULL)
##### observe input
observeEvent(input$n_ahead2, {
###### compare if previous and current selection differ. If yes disable prediction
rv_prev_input_act$prev_input <- c(rv_prev_input_act$prev_input , input$n_ahead2)
if(rv_prev_input_act$prev_input[1] != input$n_ahead2){
rv_disable_act$i <- 1
}
})
#### create the same logic for the model selection
#### prevent that user can predict if he/she changes a different model set up
rv_disable_2_act <- reactiveValues(i = 1)
observeEvent(input$run2,{
rv_disable_2_act$i <- 0
})
observe({
isolate({rv_disable_2_act$i = rv_disable_2_act$i + 1})
})
observe({
if(rv_disable_2_act$i == 0){
shinyjs::enable("pred2")}
else{
shinyjs::disable("pred2")
shinyjs::show("text2_act")
}
})
rv_prev_input_2_act <- reactiveValues(prev_input = NULL)
#### observe if user changes the model
observeEvent(input$mod_spec_for, {
##### prevent prediction if model changes by setting reactive value to 1
rv_prev_input_2_act$prev_input <- c(rv_prev_input_2_act$prev_input , input$mod_spec_for)
if((rv_prev_input_2_act$prev_input[1] != input$mod_spec_for)){
rv_disable_2_act$i <- 1
} else{
##### enable if pre-model calculation selection is in line again
if(rv_prev_input_act$prev_input[1] == input$n_ahead2){
rv_disable_2_act$i <- 0}
else{
rv_disable_2_act$i <- 1
}
}
})
###### create reactive model
model_xgbi2 <- eventReactive(input$run2,{
req(input$model_spec_for)
if(input$model_spec_for == "default"){ ##### model trained on default parameters
res <- df_xgb_train_for()
##### call function model_xgb
model1 <- model_xgb(res$df_train)
model1
}else if(input$model_spec_for == "custom"){ ##### model trained on custom parameters
res <- df_xgb_train_for()
##### call function model_xgb
model2 <- model_xgb_custom(res$df_train,input$mtry1,input$trees1,input$min_n1,input$tree_depth1,
input$learn_rate1,input$loss_reduction1,input$sample_size1)
model2
}else{ ##### model preforms hyperparameter tuning
res <- df_xgb_train_for()
##### call function model_xgb
model3 <- model_xgb_hyp(res$df_train,input$trees_hyp1,input$grid_size1)
model3
}
})
#
# #################### Prediction
prediction_xgb_actual <- eventReactive(input$pred2,{
##### load dataframe
res <- df_xgb_train_for()
preds <- pred_output(res,input$regression_outcome_xgb,model_xgbi2()[[1]])
#### load original df
df_orig <- final_regression_df_xgb()
#### if dependent variable was differenced apply cumsum to restore original values
if(adf.test(df_orig[,2],k=2)$p.value > 0.1){
preds <- cumsum(preds) + df_orig[(nrow(res$df_train)-1),2]
}
preds
})
##### disable run and prediction button while model trains
observeEvent(input$run2, {
shinyjs::disable("run2")
shinyjs::show("text1_act")
shinyjs::disable("pred2")
shinyjs::hide("text2_act")
})
output$model_fit_act <- function(){
if(rv_prev_input_act$prev_input[1] != input$n_ahead2){ #### if model parameters are not in line
return() #### remove
}else{
##### load dataframe
res <- df_xgb_train_for()
#### rename variable
colnames(res$df_train)[which(names(res$df_train) == input$regression_outcome_xgb)] <- "y"
#### fit model
model_xgboost <- model_xgbi2()[[1]] %>%
parsnip::fit(formula = y ~ .,data = res$df_train[,c(-1)])
#### get fitted values
fits <- predict(model_xgboost,res$df_train[,c(-1)])
#########calculate metrics
out <- metrics_out(res,fits)
#########
##### collect metrics in dataframe
df_need <- data.frame(c(out$rsme,
out$mean_abs,
out$mape),
row.names = c("RMSE","MAE","MAPE"))
colnames(df_need)<- "value"
knitr::kable(df_need, caption = glue("Performance metrics"),colnames = NULL) %>%
column_spec(1:2, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
}
box_test_act <- reactive({
##### load dataframe
res <- df_xgb_train_for()
##### rename variable
colnames(res$df_train)[which(names(res$df_train) == input$regression_outcome_xgb)] <- "y"
##### fit model
model_xgboost <- model_xgbi2()[[1]] %>%
parsnip::fit(formula = y ~ .,data = res$df_train[,c(-1)])
##### predict trainings dataset
fits <- predict(model_xgboost,res$df_train[,c(-1)])
##### calculate residuals
resids <- (res$df_train$y - fits)
##### check for autocorrelation
m <- stats::Box.test(resids, lag = 12, type="Lj")
m
})
output$serial_out_xgb_for <- function(){
###### enable action buttons (this is the last calcualtion when run button is pressed)
shinyjs::enable("run2")
shinyjs::hide("text1_act")
shinyjs::enable("pred2")
shinyjs::hide("text2_act")
######
if(rv_prev_input_act$prev_input[1] != input$n_ahead2){ #### if model parameters are not in line -> remove
return()
}else{
# load test
res <- df_xgb_train_for()
colnames(res$df_train)[which(names(res$df_train) == input$regression_outcome_xgb)] <- "y"
model_xgboost <- model_xgbi2()[[1]] %>%
parsnip::fit(formula = y ~ .,data = res$df_train[,c(-1)])
fits <- predict(model_xgboost,res$df_train[,c(-1)])
resids <- (res$df_train$y - fits)
m <- Box.test(resids, lag = 12, type="Lj")
##### collect in dataframe
df_need <- data.frame(c(round(m$statistic,5),
round(m$p.value,5),
m$method),
row.names = c("statistic","p.value","method"))
colnames(df_need)<- "Summary"
#### table output
knitr::kable(df_need, caption = glue("Performance metrics"),colnames = NULL) %>%
column_spec(1:2, color = "lightgrey") %>%
column_spec(1, bold = T, color = "white") %>%
row_spec(1, bold = T) %>%
kableExtra::kable_styling(c("striped","hover"), full_width = F,
position = "center",
font_size = 16)
}
}
#### create a dynamic text output to summarize test
output$test_text_xgb_act <- renderUI({
str1 <- paste("Box-Pierce test statistic to test for autocorrelation in the AR-residuals:")
####check p value
if (box_test_act()$p.value > 0.1){
str2 <- paste("The hypothesis of serially uncorrelated residuals cannot be rejected.")
} else{
str2 <- paste("The hypothesis of serially uncorrelated residuals can be rejected.")
}
##### wrap text in html output
htmltools::HTML(paste(str1,str2, sep = '<br/>'))
})
output$plot_1_xgb_actual <- renderDygraph({
##### load original dataframe
full_df <- final_regression_df_xgb()
##### load prepared dataframe
res <- df_xgb_train_for()
#### load predictions
preds <- prediction_xgb_actual()
#### create zoo object
plot <- forcast_plot_xgb_2(res,preds,full_df,input$regression_outcome_xgb,
input$Stock_Regression_xgb,input$n_ahead2)
plot
})
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.