研究各股票的利润情况
library(tidyverse) library(lubridate) library(stringr) library(shiny) library(ZStockModels) # Set global options for knitr knitr::opts_chunk$set( comment = "#>", echo = FALSE, warning = FALSE, message = FALSE, collapse = TRUE ) options(tibble.print_min = 10L, tibble.print_max = 20L)
# Open gta stock databse stock_db <- stock_db(gta_db, "GTA_SQLData") open_stock_db(stock_db) # Initate the stock database invisible(init_stock_db(stock_db)) stock_name_list <- stock_name_list(stock_db) stock_field_list <- stock_field_list(stock_db) # Get names of all stock table from database db_tables <- list_stock_tables(stock_db) knitr::kable( x = db_tables, caption = "Tables in GTA Stock Database", align = "l" )
共有r length(db_tables)
个数据表
全部股票与行业信息如下:
# Get company info trd_company.df <- get_table_dataset(stock_db, table_name = "TRD_Co_公司基本情况") trd_company.tib <- tibble::as.tibble(trd_company.df) # all stocks in the investment universe universe_stocks <- trd_company.tib %>% select(stkcd, stkname= stknme, indcd = nnindcd, indname = nnindnme) %>% arrange(indcd, stkcd) # all industries in investment universe universe_industries <- universe_stocks %>% group_by(indcd, indname) %>% summarise(stknum = n()) %>% arrange(indcd, desc(stknum)) # Output the results universe_industries universe_stocks
共r nrow(universe_industries)
个行业, r nrow(universe_stocks)
只股票
组合股票明细:
# Set params of study table_indicator <- "FR_T5_盈利能力" # Set stocks as portfolio stock_stkcds_list <- c(600066,000550, 600031, 000157,000651, 000333) portfolio_stocks <- universe_stocks %>% filter(stkcd %in% stock_stkcds_list) #Set focus indicator of portfolio focus_fields_code <- c("f053301c", "f050504c", "f052401c") focus_fields_name <- code2name(stock_field_list,focus_fields_code) focus_fields.tib <- tibble(code = focus_fields_code, name = focus_fields_name) the_indicator <- focus_fields_code[2] the_indicator_name <- code2name(stock_field_list,the_indicator) # Set top/last n industries for show top_last_n_industies <- 5 #Output the results portfolio_stocks focus_fields.tib
研究指标: r str_c(the_indicator, "/",the_indicator_name)
加载组合指标数据如下:
# Load indicators of all stocks universe_data.df <- as.tibble(get_table_dataset(stock_db, table_name = table_indicator)) universe_data.tib <- as.tibble(universe_data.df) # get indicators of all stocks universe_data <- universe_data.tib %>% left_join(trd_company.tib, by = "stkcd") %>% mutate(periodtype = ifelse(lubridate::month(accper) == 12, "annual","quarter")) %>% filter(typrep == 'A') %>% select(accper, periodtype, stkcd, stkname = stknme, indcd = indcd.x, indname = nnindnme, contains("0")) # get indicators of portfolio portfolio_data <- universe_data %>% dplyr::filter( stkcd %in% stock_stkcds_list) (portfolio_data)
组合指标字段信息如下:
# Get all fields of portfolio stock_field_code_list <- colnames(portfolio_data) stock_field_code_list <- stringr::str_subset(stock_field_code_list, "\\d") stock_field_name_list <- code2name(stock_field_list, stock_field_code_list) portfolio_fields <- tibble::tibble(code = stock_field_code_list, name = stock_field_name_list) # get indicators data for allstock universe_indicators <- universe_data %>% select(accper:indname, the_indicator) # get indictors data for portfolio portfolio_indicators <- portfolio_data %>% select(accper:indname, the_indicator) # Output the results portfolio_fields portfolio_indicators
研究指标: r str_c(the_indicator, "/",the_indicator_name)
1.各行业指标数据统计分布
各行业各指标中值分布如下:
# use median of each peroid as indicator for industry industry_indicator_peroid<- universe_indicators %>% group_by(indname, accper) %>% summarise_at(the_indicator, .funs = funs(mean, median, sd, max, min)) %>% mutate(periodtype = ifelse(lubridate::month(accper) == 12, "annual","quarter")) %>% select(indname, accper, periodtype, !!the_indicator:=median, everything()) %>% arrange(indname, accper) %>% ungroup() # Indicator summary of each industry industry_indicator_summary <- industry_indicator_peroid %>% group_by(indname) %>% summarise_at(the_indicator, .funs = funs(mean, median, sd, max, min)) %>% filter(!is.na(indname)) %>% arrange(desc(mean)) # Output the results industry_indicator_summary # top and last industries sort_var <- "mean" top_industries <- industry_indicator_summary %>% top_n(top_last_n_industies, get(sort_var)) %>% select(indname) top_industries <- top_industries$indname last_industries <- industry_indicator_summary %>% top_n(-top_last_n_industies, get(sort_var)) %>% select(indname) last_industries <- last_industries$indname # Top 10 industries of Indicator industry_indicator_summary %>% filter(indname %in% top_industries) %>% ggplot(aes(x = reorder(indname, get(sort_var) ), y = get(sort_var))) + geom_col() + labs( title = str_c("Top Industries of Indicator"), subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = "Industry", y = "Median of Indicator" ) + geom_label(aes(label = format(get(sort_var), digits = 2, nsmall = 2)), hjust = TRUE) + coord_flip() # last 10 industries of Indicator industry_indicator_summary %>% filter(indname %in% last_industries) %>% ggplot(aes(x = reorder(indname, get(sort_var)), y = get(sort_var))) + geom_col() + labs( title = str_c("Last Industries of Indicator"), subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = "Industry", y = "Median of Indicator" ) + geom_label(aes(label = format(get(sort_var), digits = 2, nsmall = 2)), hjust = FALSE) + coord_flip() # Plot histogram of distiubtion of top industry industry_indicator_peroid %>% filter(indname %in% top_industries) %>% ggplot(aes(x = get(the_indicator))) + geom_histogram(binwidth = 0.01) + geom_density() + labs( title = "Distribution Summarry of Top Industries", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + coord_cartesian(xlim = c(-1, 1)) + facet_wrap(~indname) # Plot histogram of distiubtion of last industry industry_indicator_peroid %>% filter(indname %in% last_industries) %>% ggplot(aes(x = get(the_indicator))) + geom_histogram(binwidth = 0.01) + geom_density() + labs( title = "Distribution Summarry of of Last Industries ", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + coord_cartesian(xlim = c(-1, 1)) + facet_wrap(~indname) # industry_indicator_peroid %>% # filter(indname %in% top_industries) %>% # ggplot(aes(x = reorder(indname, median), y = median)) + # geom_boxplot() + # coord_flip()
2.各行业指标数据时间趋势
# plot data for top industries # plot series of annual data industry_indicator_peroid %>% filter(indname %in% top_industries) %>% filter(periodtype == "annual") %>% ggplot(aes(x = accper, y = get(the_indicator))) + geom_col() + labs( title = "Annual Trends of Top Industries", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + coord_cartesian(ylim = c(-1, 1)) + facet_wrap(~indname) # plot series of quarter data industry_indicator_peroid %>% filter(indname %in% top_industries) %>% ggplot(aes(x = accper, y = get(the_indicator))) + geom_col() + labs( title = "Quarterly Trends of Top Industries", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + coord_cartesian(ylim = c(-1, 1)) + facet_wrap(~indname) # plot data for last industries # plot series of annual data industry_indicator_peroid %>% filter(indname %in% last_industries) %>% filter(periodtype == "annual") %>% ggplot(aes(x = accper, y = get(the_indicator))) + geom_col() + labs( title = "Annual Trends of Last Industries", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + coord_cartesian(ylim = c(-1, 1)) + facet_wrap(~indname) # plot series of quarter data industry_indicator_peroid %>% filter(indname %in% last_industries) %>% ggplot(aes(x = accper, y = get(the_indicator))) + geom_col() + labs( title = "Quarterly Trends of Last Industries", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + coord_cartesian(ylim = c(-1, 1)) + facet_wrap(~indname)
1.各股指标数据统计分布
# summary distribution of each stock portfolio_indicators_summary <- portfolio_indicators %>% group_by(stkname) %>% summarise( mean = mean(get(the_indicator), na.rm = TRUE), std = sqrt(var(get(the_indicator), na.rm = TRUE)), max = max(get(the_indicator), na.rm = TRUE), Q3 = quantile(get(the_indicator),probs = c(0.75),na.rm = TRUE), median = median(get(the_indicator), na.rm = TRUE), Q1 = quantile(get(the_indicator),probs = c(0.25),na.rm = TRUE), min = min(get(the_indicator), na.rm = TRUE) ) # portfolio_indicators_summary knitr::kable( x = portfolio_indicators_summary, caption = "Portfolio Indicator Summary", align = "l" ) # Plot boxplot of distribution of each stock portfolio_indicators %>% ggplot(aes(x = reorder(stkname, get(the_indicator), FUN = median), y = get(the_indicator), colour = indname)) + geom_boxplot() + labs( title = "Box Summarry ", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + theme(legend.position = "bottom") + coord_flip() # Plot histogram of distiubtion of each stock portfolio_indicators %>% ggplot(aes(x = get(the_indicator))) + geom_histogram(binwidth = 0.01) + geom_density() + labs( title = "Distribution Summarry ", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + facet_wrap(~stkname)
2.各股指标数据时间趋势
# plot series of annual data portfolio_indicators %>% filter(periodtype == "annual") %>% ggplot(aes(x = accper, y = get(the_indicator))) + geom_col() + labs( title = "Annual Trends", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + facet_wrap(~stkname) # plot series of quarter data portfolio_indicators %>% ggplot(aes(x = accper, y = get(the_indicator))) + geom_col() + labs( title = "Quarterly Trends", subtitle = str_c("(", the_indicator, "/", the_indicator_name, ")"), x = NULL, y = NULL ) + facet_wrap(~stkname)
# Close gta stock databse close_stock_db(stock_db)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.