#' Time Difference Relevance (Cross Correlation Coefficient)
#'
#' calculate correlation coefficient when crossing date of two time series,
#' and then separate leading, consistent, lagging indicators from economic indicators.
#'
#' @details this function and it's role is equal to \link{KL},
#' just base on different method for dividing groups. Compare to \link{KL}, \code{crosscor}
#' don't request the time series must include only positive values.
#' @param base a numeric vector, containing a basic indicator for comparing to other indicators in order to recognize who is leading indicator ...
#' @param divergent a data frame containing indicators to be compared with \code{base },
#' and then to divide to 3 data frame, storing lagging, convenient, lagging indicators respectively.
#' @param lag max lag (forward or afterward) for calculating \code{KL information}, at range of \eqn{[-lag, lag]}.
#' @param group_lag threshold value of lag for divide different indicator, default is 3, means that
#' lagging or leading lag at range of \eqn{[-3, 3]} will group the indicator to consistent indicator.
#' @param method method to calculate correlation coefficient, refer to \link{cor}.
#' @export crosscor
#' @return A list includes some value as follow :
#' \describe{
#' \item{Coincident_Indicator}{data frame, including coincident indicator}
#' \item{Leading_Indicator}{data frame, including leading indicator}
#' \item{Lagging_Indicator}{data frame, lagging indicator}
#' \item{Cor_Matrix}{matrix including correlation coefficient and lag information}
#' \item{Cor_Max}{data frame, including vector of \code{Cor_lag} : lag corresponding max
#' correlation coefficient and vector of \code{Cor_max} : max correlation coefficient}
#' }
#' @examples
#' x1 <- c( 5, 9, 5, 12, 13, 8, 14, 14, 8, 10)
#' x2 <- c( 9, 15, 5, 14, 8, 6, 12, 19,8, 7)
#' base<-c( 26, 13, 25, 27 ,25 ,13 ,32 ,20 ,23 ,29)
#' dt<-data.frame(x1, x2, base)
#' crosscor(base,dt,lag = 3,group_lag = 1)
crosscor<-function(base, divergent, lag=12, group_lag=3, method = c("pearson", "kendall", "spearman")) {
# --异常控制--------------------------------
# if (any(is.na(base))) stop("'base' has value of 'NA'") # 检查输入的数据是否有NA值
# if (any(base<=0)) stop("'base' isn't a positive sequence")
# if (any(is.na(divergent))) stop("'divergent' has value of 'NA'")
# --定义存放三类指标的数据框----------------
Leading<-data.frame(rep(NA,length(base)))
Coincident<-data.frame((rep(NA,length(base))))
Lagging<-data.frame((rep(NA,length(base))))
# --初始化数据框列顺序
le_i<-0
co_i<-0
la_i<-0
X_num=length(divergent) #得到数据框divergent中含有的向量个数
lag<-abs(lag)
# --定义kl信息数据框
# 列数==2*lag+1,行数==X_num
# 先生成矩阵
Cor_Matrix<-matrix(rep(NA,(2*lag+1)*(X_num)),X_num,2*lag+1)
# 转成数据框
Cor_Matrix <- as.data.frame(Cor_Matrix)
colnames(Cor_Matrix) <- c(-lag:lag)
row.names(Cor_Matrix) <- names(divergent)
# 定义存放最大相关系数值及期数的数据框
Cor_Max <- matrix(rep(NA,2*X_num),ncol = 2)
Cor_Max <- as.data.frame(Cor_Max)
names(Cor_Max) <- c("Cor_lag","Cor_max")
row.names(Cor_Max) <- names(divergent)
for (i in 1:X_num) {
# print(class(divergent[i]))
# break
# kl_min <- 0 #最小kl值初始为0,不同向量比较的基准kl值不同
# pxi<-divergent[,i]/sum(divergent[,i]) #注意divergent[,i] 和 divergent[i]的区别,前者是向量;后者是数据框,赋值后,是数据框
xi <- divergent[,i]
# --错误控制---
# if (any(xi<=0)) stop(paste("column",i,"of 'divergent' isn't a positive sequence"))
# --lag为负时
for (l in -lag:0) {
# 按滞后或超前期截取向量
y_sub<-base[(abs(l)+1):length(base)]
xi_sub<-xi[1:(length(xi)-abs(l))]
# 先截取,后标准化 ---------后期可以加上(通用的标准化,对负值依然有效)!!
# py_sub<-y_sub/sum(y_sub,na.rm = T) ##考虑缺失值
# pxi_sub<-xi_sub/sum(xi_sub,na.rm = T)
py_sub = y_sub # 没有标准化代码,权且这么着
pxi_sub = xi_sub
# 计算相关系数
cor_l = cor(pxi_sub,py_sub)
Cor_Matrix[i,l+lag+1] <- cor_l #相应的kl值填到kl矩阵中
# 第一次计算时得到的相关系数,即认为最大值
if (l == - lag) { # lag 在前面已做绝对值处理
cor_max = cor_l # 用来存放目前为止最大相关系数值
cor_lag = l # 最优滞后或超前期,后续用来决定所计算的向量是先行,滞后,还是一致
}
else {
if (cor_l > cor_max) {
cor_max = cor_l
cor_lag = l #将期数换成最大相关系数对应的期数
}
}
}
# --lag为正时
for (l in 1:lag) {
y_sub<-base[1:(length(base)-abs(l))]
xi_sub<-xi[(abs(l)+1):length(xi)]
py_sub = y_sub # 没有标准化代码,权且这么着
pxi_sub = xi_sub
# 计算相关系数
cor_l = cor(pxi_sub,py_sub)
Cor_Matrix[i,l+lag+1]<-cor_l #相应的kl值填到kl矩阵中
# 第一次计算时得到的相关系数,即认为最大值
if (l == - lag) { # lag 在前面已做绝对值处理
cor_max = cor_l # 用来存放目前为止最大相关系数值
cor_lag = l # 最优滞后或超前期,后续用来决定所计算的向量是先行,滞后,还是一致
}
else {
if (cor_l > cor_max) {
cor_max = cor_l
cor_lag = l #将期数换成最大相关系数对应的期数
}
}
}
# 将最大相关系数和期数填入数据框Cor_Max
Cor_Max[i,1]<-cor_lag
Cor_Max[i,2]<-cor_max
# 将divergent[i]分到相应的组中去
group_lag=abs(group_lag)
if (cor_lag<(-group_lag)) {
# 先行组
le_i<-le_i+1
Leading[le_i]<-divergent[i]
colnames(Leading)[le_i]<-names(divergent[i])
}
else if (cor_lag>group_lag) {
# 滞后组
la_i<-la_i+1
Lagging[la_i]<-divergent[i]
colnames(Lagging)[la_i]<-names(divergent[i])
}
else { ##[-group_lag,group_lag]为一致组
# 一致
co_i<-co_i+1
Coincident[co_i]<-divergent[i]
colnames(Coincident)[co_i]<-names(divergent[i])
}
}
# 展示结果
# print(Coincident_Indicator)
# print(Leading_Indicator)
# print(Lagging_Indicator)
Coincident_Indicator <- if (!(length(Coincident)==1 & all(is.na(Coincident)))) Coincident
Leading_Indicator <- if (!(length(Leading)==1 & all(is.na(Leading)))) Leading
Lagging_Indicator <- if (!(length(Lagging)==1 & all(is.na(Lagging)))) Lagging
Cor_Matrix <- Cor_Matrix
Cor_Max <- Cor_Max
lst = list(Coincident_Indicator = Coincident_Indicator,
Leading_Indicator = Leading_Indicator,
Lagging_Indicator = Lagging_Indicator,
Cor_Matrix = Cor_Matrix,
Cor_Max
)
lst
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.