gmtools: My small tools for efficiency

#' 计算灰色关联度
#' @author lgm
#'
#' @description m个评价对象（如5中可比资产、3个竞标方案），n个指标的系统；使用n个指标，计算m-1个对比对象和参考对象之间的关联度。
#'
#' @param df_vec  a data.frame in vector format with x0 ,x1,xi in rows,norm_by_row = False for time series（标准化第一列为1）,but true for others (xij for j = 1,..m 值差异较大时，标准指标第一行为1)
#' @param rownum number of row
#' @param norm 标准化的方法。(1) "simp_col", 最简单的情况，如学习成绩、收入，适用于对象间差距较大，但指标（如年份）差异不大。以某一列为分母，进行标准化。（2）"simp_row",同上。（3）maxmin: 极差变换法（标准0-1变换,正向指标（越多越好）:适用于指标的量纲差距大，但是，每个指标在不同的对象间差距较小.(4)linear: 线性比例变换法（正向指标）
#' @return grey relation
#' @export
#' @examples
#'
#' ## df_vec ="x0= 20 30	24;x1=	8	10	9; x2=5	6	7"
#' df_vec ="20 30 24 8 10 9 5 6 7"
#' rownum <- 3
#' grey_relation(df_vec,3,norm="maxmin")
#'
#'## 成绩例子
#' df_vec2 = "100	95	60 90	80	50 1	0.9	0.8"
#' grey_relation(df_vec2,3,norm="maxmin")
#'
#' ## 林木评估例子（标准化后值要在1左右）
#' df_vec3 = "24	15	840	246.43	985	1.05 18.2	12.5	1200	182.63	850	0.82 23.5	15	900	254.04	950	1.05 22.3	14	975	236.26	925	1 26.2	15.5	750	265.42	980	1.05 20.7	13	1050	208.91	930	0.94"
#' grey_relation(df_vec3,6,"maxmin")
#'
#' ##公路建设招标例子(x0,x1,x2,x3 在行里)（标准化后值要在1左右
#' df3 <- "1.1 1.3 5 110 1.1 1.8 4	80 1.2 1.5 3 110 1.5 1.3 5 100"
#' grey_relation(df3,4,"linear")
#'
#' ## 土地评估例子
#' land = "4.51 4.4  4.59  4.56  4.46  4.59  4.55  4.43  4.37  4.64  4.56  4.25 4.4  4.16  4.63 1  1  1  2.33  2.8  2.26  2.48  2.6 2.6  2  2  1  1  3.2  2 1  1  1  1.7  2  2  1.8  2.09  2 1.57  2  2  1  2.17  2 1.2  1  1  1.17  1.06  3.3  1.32  1.21  1.33  3  3  3  1.25  4  1.5 2.21  2.14  2  1.17  1.09  1.09  1.32  1.21  1.33  1  1.29  1.33  2  1.2  1.5 1 4 1 2 4 4 4 3 1 4 1 4 1  1   1 2.46  2.59  2.24  3.27  3.23  3.45 3.06  3.25  3.29  3.23  3.12  2.8  2.7  3.44  3.59 7.2  6.7  7.7  5.9  8.1  7.2  5.7 5.7  6.8  7.8  5.6  7.1  6.6  5.7  6 1.14  3.16  1.84  10.73  1.33 2.99  1.42  2.34  5.72  3.01 2.01  1.52  4.35  1.27  2.62"
#' grey_relation(land,9,"maxmin")

grey_relation <- function(df_vec, rownum, norm = "simp_col") {
	##
	library(magrittr)
	df = scan(text = df_vec) %>%
		matrix(nrow = rownum,byrow = TRUE) %>%
		 data.frame
	rownames(df) <- paste0("x",0:(rownum-1))

	nrow <- dim(df)[1]
	ncol <- dim(df)[2]

	## 标准化
	## 标准化(最简单的情况，如学习成绩、收入，适用于对象间差距较大，但指标（如年份）差异不大。
	if (norm == "simp_row") {
		dft = t(df)
		dfn <- apply(dft,2,function(x) x/dft[,1]) %>% t
	} else if (norm == "simp_col" ) {
		dfn <- apply(df,2,function(x) x/df[,1])
	} else if (norm == "maxmin") {
		# 采用极差变换法（标准0-1变换）
		## 正向指标（越多越好）:适用于指标的量纲差距大，但是，每个指标在不同的对象间差距较小
		## r_ij = [x_{ij} - min_{1<=j<= m} x_{ij} ] / [max_{1<=j<= m} x_{ij} - min_{1<=j<= m} x_{ij} ]
		dfn <- apply(df,2, function(x) (x - min(x))/(max(x)-min(x)))

	} else if (norm == "linear"){
		# 线性比例变换法（正向指标）r_{ij} = x_{ij}/max_{1 <= i <= m} x_{ij}
		dfn <- apply(df,2, function(x) x/max(x))
	} else if (norm == "col_one"){
		#向量归一法：其列向量的模为1 r_{ij} = x_{ij} / sqrt{sum(x_{ij}^2)}
		dfn <- apply(df,2, function(x) x/sqrt(sum(x^2)))

	}


	## 对应差数列表
	df_diff <- apply(dfn,1,function(x) abs(dfn[1,] - x)) %>% t %>% .[-1,]
	df_diff
	minmin <- min(apply(df_diff,1,min))
	maxmax <- max(apply(df_diff,1,max))
	minmin
	maxmax

	## 关联系数计算
	zeta <- 0.5

	xi <- (minmin + zeta*maxmax)/(df_diff + zeta*maxmax)

	## 关联度
	r <- apply(xi,1,mean)

	## 标准化权重
	rweight <- sort(r/sum(r),decreasing = TRUE)

	return(list(df = df,df_norm = dfn,df_diff = df_diff,xi_mat = xi,grey_relation_with_x0 = r, rweight = rweight))

}