#関連を計算
calcor <- function(formula, data,
OOmethod = "Kendall",
OFmethod = "Kruskal-Wallis",
FFmethod = "Fisher"){
#データを解析用に変換
data0 <- model.frame(formula, data, na.action = NULL)
#説明変数がNAの場合、丸ごと列を除去。
#NAでない位置を返す。
notnapos <- function(x){
ret <- c(1:length(x))[!is.na(x)]
return(ret)
}
notnay <- notnapos(data0[,1])
notnadata <- data0[notnay, ]
#NAを除去した変数
y <- notnadata[,1] #説明変数
x <- notnadata[,-1] #目的変数
#yとxをdata.frameにしてNAを除去
na.omit.df <- function(x1, x2){
df <- data.frame(x1 = x1, x2 = x2)
df <- na.omit(df)
return(df)
}
#名義尺度か判断
#factorかcharacterの場合のみnominalと判断
is.nominal <- function(x){
if(is.factor(x) || is.character(x)){ return(TRUE) }else{ return(FALSE) }
}
#順序尺度として扱えるか判断
#numericかintegerの場合のみorderと判断。
is.order <- function(x){
if(is.numeric(x) || is.integer(x)){ return(TRUE) }else { return(FALSE) }
}
#名義尺度か順序尺度のどちらかとして扱えるか判断
is.nominal.order <- function(x){
if(is.nominal(x) || is.order(x)){ return(TRUE)}else{ return(FALSE)}
}
#長さが1の場合、0を返す標準偏差。
sd2 <- function(x){
if(length(x) == 1){return(0)}else{return(sd(x))}
}
#順序尺度と順序尺度
#p値を返す。
OOtest <- function(x1, x2){
#x1かx2のどちらかが全てNAならNAを返す。
if(sum(!is.na(x1)) ==0 || sum(!is.na(x2)) == 0){return(NA)}
#x1かx2の標準偏差がゼロならNAを返す。
if(sd2(x1) == 0 || sd2(x2) == 0){return(NA)}
if(OOmethod == "Kendall"){
result <- cor.test(x1, x2, method = "kendall")
return(result$p.value)
}
if(OOmethod == "Spearman"){
result <- cor.test(x1, x2, method = "spearman")
return(result$p.value)
}
stop("The verification method between order scale and ordinal scale is incorrect.")
}
#順序尺度と名義尺度
#p値を返す。
OFtest <- function(x1, x2){
#x1かx2のどちらかが全てNAならNAを返す。
if(sum(!is.na(x1)) ==0 || sum(!is.na(x2)) == 0){return(NA)}
if(OFmethod == "Kruskal-Wallis"){
if(is.order(x1)){
if(sd2(x1) == 0 || length(unique(x2)) == 1){return(NA)}
df <- data.frame(x1 = x1, x2 = x2)
}else{
if(sd2(x2) == 0 || length(unique(x1)) == 1){return(NA)}
df <- data.frame(x1 = x2, x2 = x1)
}
result <- kruskal.test(x1 ~ x2, df)
return(result$p.value)
}
stop("The verification method between the order scale and the nominal scale is incorrect.")
}
#名義尺度と名義尺度
FFtest <- function(x1, x2){
#x1かx2のどちらかが全てNAならNAを返す。
if(sum(!is.na(x1)) ==0 || sum(!is.na(x2)) == 0){return(NA)}
if(length(unique(x1)) == 1 || length(unique(x2)) == 1){return(NA)}
tab <- table(x1, x2)
if(FFmethod == "Fisher"){
result <- fisher.test(tab)
return(result$p.value)
}
if(FFmethod == "chisq"){
result <- chisq.test(tab)
return(result$p.value)
}
stop("The verification method between the nominal scale and the nominal scale is incorrect.")
}
#データに適用
#目的変数のチェック
if(!is.nominal.order(y)){
stop("Evaluation target is neither numeric, integer, order nor character.")
}
#初期設定
xl <- length(x[1, ])
df <- NULL
grp_df <- NULL
#dfを追加
adddf <- function(df, name, OO, OF, FF){
df0 <- data.frame(name, OO, OF, FF)
if(is.null(df)){df <- df0}else{df <- rbind(df, df0)}
return(df)
}
#yが順序尺度の場合
if(is.order(y)){
yis <- "order"
for(i in 1:xl){
data_df <- na.omit.df(x1 = y, x2 = x[, i])
if(is.order(data_df$x2)){ OO <- OOtest(x1 = data_df$x1, x2 = data_df$x2)}else{ OO <- NA}
if(is.nominal(data_df$x2)){ OF <- OFtest(x1 = data_df$x1, x2 = data_df$x2)}else{ OF <- NA}
FF <- NA
df <- adddf(df, name = names(x)[i], OO = OO, OF = OF, FF = FF)
data_df <- data.frame(data_df[,2], data_df[,1])
names(data_df) <- c(names(data0)[i + 1], names(data0)[1])
grp_df[[i]] <- data_df
}
}
#yが名義尺度の場合
if(is.nominal(y)){
yis <- "nominal"
for(i in 1:xl){
data_df <- na.omit.df(x1 = y, x2 = x[, i])
OO <- NA
if(is.order(data_df$x2)){ OF <- OFtest(x1 = data_df$x2, x2 = data_df$x1)}else{ OF <- NA}
if(is.nominal(data_df$x2)){ FF <- FFtest(x1 = data_df$x1, x2 = data_df$x2)}else{ FF <- NA}
df <- adddf(df, name = names(x)[i], OO = OO, OF = OF, FF = FF)
data_df <- data.frame(data_df[,2], data_df[,1])
names(data_df) <- c(names(data0)[i + 1], names(data0)[1])
grp_df[[i]] <- data_df
}
}
#各手法で出したp値の表に、代表のp値を追加。
pcal <- function(x){
if(sum(!is.na(x)) == 0){return(NA)}else{x[!is.na(x)]}
}
pp <- apply(df[,-1], 1, pcal)
df <- data.frame(df, p.value = pp)
#ベクトルに数値を代入
#名前をつけ、NAを除去し、降順に並べ替え
vec_sort <- function(data, names){
names(data) <- names
data1 <- data[!is.na(data)]
data_sort <- data1[order(data1, decreasing = TRUE)]
return(data_sort)
}
#データを成形
vec_OO <- vec_sort(data = df$OO, names = df$name)
vec_OF <- vec_sort(data = df$OF, names = df$name)
vec_FF <- vec_sort(data = df$FF, names = df$name)
#返り値
ret <- list()
ret$df <- df
ret$grp_df <- grp_df
ret$OO <- vec_OO
ret$OF <- vec_OF
ret$FF <- vec_FF
ret$method <- c(OOmethod, OFmethod, FFmethod)
ret$yis <- yis
class(ret) <- "calcor"
return(ret)
}
#結果を棒グラフで表示
plot.calcor <- function(data, significance = 0.05, cex = 0.5){
num_check <- function(x1, x2, x3){
num <- 0
if(is.numeric(x1)){num <- num + 1}
if(is.numeric(x2)){num <- num + 1}
if(is.numeric(x3)){num <- num + 1}
return(num)
}
num <- num_check(x1 = data$OO, x2 = data$OF, x3 = data$FF)
par(mfrow = c(1, num))
check_plot <- function(data, method){
if(is.numeric(data)){
#色の定義
dl <- length(data)
dsl <- sum(data < significance)
col = c(rep("gray", times = (dl - dsl)), rep("red", times = dsl))
#棒グラフを描く
barplot(data, horiz = TRUE, las = 1,
xlab = "p-value", xlim = c(0, 1),
main = method,
cex.axis = cex, cex.names = cex,
col = col)
}
}
#par(plt = c(0.5,0.9,0.3,0.8))
check_plot(data$OO, data$method[1])
check_plot(data$OF, data$method[2])
check_plot(data$FF, data$method[3])
par(mfrow = c(1, 1))
}
#p値の一覧を表示
summary.calcor <- function(data){
#p値で並べ替え
catdf <- data$df[!is.na(data$df$p.value),]
catdf <- catdf[order(catdf$p.value),]
#計算手法と結果の表示
cat("Test method.\n\n")
if(data$yis == "order"){
cat(paste0("Order vs Order : ", data$method[1], "\n"))
cat(paste0("Order vs Nominal : ", data$method[2], "\n\n\n"))
cat("List of p values of items successfully calculated for the test.\n\n")
print(data.frame(name = catdf$name, Order.Order = catdf$OO, Order.Nominal = catdf$OF))
}
if(data$yis == "nominal"){
cat(paste0("Nominal vs Order : ", data$method[2], "\n"))
cat(paste0("Nominal vs Nominal : ", data$method[3], "\n\n\n"))
cat("List of p values of items successfully calculated for the test.\n\n")
print(data.frame(name = catdf$name, Nominal.Order = catdf$OF, Nominal.Nominal = catdf$FF))
}
#計算に失敗した項目
cat("\nItem failed to calculate p value.\n")
cat(as.character(data$df$name[!is.na(data$df$p.value)]))
}
#p値が小さい順に並べて表示。
slideshow <- function(data, significance = 0.05, num = NULL, cex = 0.7, name = NULL){
if(class(data) != "calcor"){stop("Please enter the variable created by calcor function.")}
#表示するi
#p.value < significanceでNAでない。p.valueで並び替え
slidenumdf <- data.frame(i = c(1:length(data$df$p.value)),
p.value = data$df$p.value, name = data$df$name)
slidenumdf <- na.omit(slidenumdf)
if(is.null(name)){
slidenumdf <- slidenumdf[slidenumdf$p.value < significance, ]
slidenumdf <- slidenumdf[order(slidenumdf$p.value), ]
}else{
slidenumdf <- slidenumdf[slidenumdf$name == name, ]
}
slidenum <- slidenumdf$i
if(!is.null(num)){slidenum <- slidenum[num]}
for(i in slidenum){
plot(data$grp_df[[i]],
cex.axis = cex, cex.lab = cex, cex.main = cex, cex.sub = cex)
pp <- data$df$p.value[i]
nn <- length(data$grp_df[[i]][,1])
par(xpd = TRUE)
par(oma = c(0, 0, 0, 0))
legend("topleft", cex = cex, bty = "n",
legend = paste0("p = ",format(pp, digits = 4) ,"\nn = ", nn))
par(xpd = FALSE)
if(i != slidenum[length(slidenum)]){
cat("Hit <Return> to see next plot: \n")
readline()
}
}
}
#p-valueで並べ替え
psort <- function(x){
df <- x$df
names(df) <- c("name", "Order to Order", "Order to Factor", "Factor to Factor", "p-value")
if(length(x$OO) == 0){df$"Order to Order" <- NULL}
if(length(x$OF) == 0){df$"Order to Factor" <- NULL}
if(length(x$FF) == 0){df$"Factor to Factor" <- NULL}
ret <- df[order(df$"p-value"),]
return(ret)
}
#チェック用
#data <- read.csv("test.csv")
#test <- calcor(A~.,data)
#test <- calcor(B~.,data)
#plot(test)
#test2 <- psort(test)
#slideshow(test,significance =0.3)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.