calcor: Calculate correlation

#関連を計算
calcor <- function(formula, data,
                    OOmethod = "Kendall",
                    OFmethod = "Kruskal-Wallis",
                    FFmethod = "Fisher"){

  #データを解析用に変換
  data0 <- model.frame(formula, data, na.action = NULL)

  #説明変数がNAの場合、丸ごと列を除去。
  #NAでない位置を返す。
  notnapos <- function(x){
    ret <- c(1:length(x))[!is.na(x)]
    return(ret)
  }

  notnay <- notnapos(data0[,1])
  notnadata <- data0[notnay, ]

  #NAを除去した変数
  y <- notnadata[,1] #説明変数
  x <- notnadata[,-1] #目的変数

  #yとxをdata.frameにしてNAを除去
  na.omit.df <- function(x1, x2){
    df <- data.frame(x1 = x1, x2 = x2)
    df <- na.omit(df)
    return(df)
  }

  #名義尺度か判断
  #factorかcharacterの場合のみnominalと判断
  is.nominal <- function(x){
    if(is.factor(x) || is.character(x)){ return(TRUE) }else{ return(FALSE) }
  }

  #順序尺度として扱えるか判断
  #numericかintegerの場合のみorderと判断。
  is.order <- function(x){
    if(is.numeric(x) || is.integer(x)){ return(TRUE) }else { return(FALSE) }
  }

  #名義尺度か順序尺度のどちらかとして扱えるか判断
  is.nominal.order <- function(x){
    if(is.nominal(x) || is.order(x)){ return(TRUE)}else{ return(FALSE)}
  }

  #長さが1の場合、0を返す標準偏差。
  sd2 <- function(x){
    if(length(x) == 1){return(0)}else{return(sd(x))}
  }



  #順序尺度と順序尺度
  #p値を返す。
  OOtest <- function(x1, x2){
    #x1かx2のどちらかが全てNAならNAを返す。
    if(sum(!is.na(x1)) ==0 || sum(!is.na(x2)) == 0){return(NA)}
    #x1かx2の標準偏差がゼロならNAを返す。
    if(sd2(x1) == 0 || sd2(x2) == 0){return(NA)}

    if(OOmethod == "Kendall"){
      result <- cor.test(x1, x2, method = "kendall")
      return(result$p.value)
    }
    if(OOmethod == "Spearman"){
      result <- cor.test(x1, x2, method = "spearman")
      return(result$p.value)
    }
    stop("The verification method between order scale and ordinal scale is incorrect.")
  }


  #順序尺度と名義尺度
  #p値を返す。
  OFtest <- function(x1, x2){
    #x1かx2のどちらかが全てNAならNAを返す。
    if(sum(!is.na(x1)) ==0 || sum(!is.na(x2)) == 0){return(NA)}

    if(OFmethod == "Kruskal-Wallis"){
      if(is.order(x1)){
        if(sd2(x1) == 0 || length(unique(x2)) == 1){return(NA)}
        df <- data.frame(x1 = x1, x2 = x2)
      }else{
        if(sd2(x2) == 0 || length(unique(x1)) == 1){return(NA)}
        df <- data.frame(x1 = x2, x2 = x1)
      }
      result <- kruskal.test(x1 ~ x2, df)
      return(result$p.value)
    }
    stop("The verification method between the order scale and the nominal scale is incorrect.")
  }

  #名義尺度と名義尺度
  FFtest <- function(x1, x2){
    #x1かx2のどちらかが全てNAならNAを返す。
    if(sum(!is.na(x1)) ==0 || sum(!is.na(x2)) == 0){return(NA)}

    if(length(unique(x1)) == 1 || length(unique(x2)) == 1){return(NA)}
    tab <- table(x1, x2)

    if(FFmethod == "Fisher"){
      result <- fisher.test(tab)
      return(result$p.value)
    }

    if(FFmethod == "chisq"){
      result <- chisq.test(tab)
      return(result$p.value)
    }
    stop("The verification method between the nominal scale and the nominal scale is incorrect.")
  }


  #データに適用
  #目的変数のチェック
  if(!is.nominal.order(y)){
    stop("Evaluation target is neither numeric, integer, order nor character.")
  }

  #初期設定
  xl <- length(x[1, ])
  df <- NULL
  grp_df <- NULL

  #dfを追加
  adddf <- function(df, name, OO, OF, FF){
    df0 <- data.frame(name, OO, OF, FF)
    if(is.null(df)){df <- df0}else{df <- rbind(df, df0)}
    return(df)
  }

  #yが順序尺度の場合
  if(is.order(y)){
    yis <- "order"
    for(i in 1:xl){
      data_df <- na.omit.df(x1 = y, x2 = x[, i])
      if(is.order(data_df$x2)){ OO <- OOtest(x1 = data_df$x1, x2 = data_df$x2)}else{ OO <- NA}
      if(is.nominal(data_df$x2)){ OF <- OFtest(x1 = data_df$x1, x2 = data_df$x2)}else{ OF <- NA}
      FF <- NA
      df <- adddf(df, name = names(x)[i], OO = OO, OF = OF, FF = FF)
      data_df <- data.frame(data_df[,2], data_df[,1])
      names(data_df) <- c(names(data0)[i + 1], names(data0)[1])
      grp_df[[i]] <- data_df
    }
  }


  #yが名義尺度の場合
  if(is.nominal(y)){
    yis <- "nominal"
    for(i in 1:xl){
      data_df <- na.omit.df(x1 = y, x2 = x[, i])
      OO <- NA
      if(is.order(data_df$x2)){ OF <- OFtest(x1 = data_df$x2, x2 = data_df$x1)}else{ OF <- NA}
      if(is.nominal(data_df$x2)){ FF <- FFtest(x1 = data_df$x1, x2 = data_df$x2)}else{ FF <- NA}
      df <- adddf(df, name = names(x)[i], OO = OO, OF = OF, FF = FF)
      data_df <- data.frame(data_df[,2], data_df[,1])
      names(data_df) <- c(names(data0)[i + 1], names(data0)[1])
      grp_df[[i]] <- data_df
    }
  }

  #各手法で出したp値の表に、代表のp値を追加。
  pcal <- function(x){
    if(sum(!is.na(x)) == 0){return(NA)}else{x[!is.na(x)]}
  }

  pp <- apply(df[,-1], 1, pcal)
  df <- data.frame(df, p.value = pp)

  #ベクトルに数値を代入
  #名前をつけ、NAを除去し、降順に並べ替え
  vec_sort <- function(data, names){
    names(data) <- names
    data1 <- data[!is.na(data)]
    data_sort <- data1[order(data1, decreasing = TRUE)]
    return(data_sort)
  }

  #データを成形
  vec_OO <- vec_sort(data = df$OO, names = df$name)
  vec_OF <- vec_sort(data = df$OF, names = df$name)
  vec_FF <- vec_sort(data = df$FF, names = df$name)

  #返り値
  ret <- list()
  ret$df <- df
  ret$grp_df <- grp_df

  ret$OO <- vec_OO
  ret$OF <- vec_OF
  ret$FF <- vec_FF

  ret$method <- c(OOmethod, OFmethod, FFmethod)
  ret$yis <- yis

  class(ret) <- "calcor"
  return(ret)
}

#結果を棒グラフで表示
plot.calcor <- function(data, significance = 0.05, cex = 0.5){
  num_check <- function(x1, x2, x3){
    num <- 0
    if(is.numeric(x1)){num <- num + 1}
    if(is.numeric(x2)){num <- num + 1}
    if(is.numeric(x3)){num <- num + 1}
    return(num)
  }

  num <- num_check(x1 = data$OO, x2 = data$OF, x3 = data$FF)
  par(mfrow = c(1, num))

  check_plot <- function(data, method){
    if(is.numeric(data)){

      #色の定義
      dl <- length(data)
      dsl <- sum(data < significance)
      col = c(rep("gray", times = (dl - dsl)), rep("red", times = dsl))

      #棒グラフを描く
      barplot(data, horiz = TRUE, las = 1,
              xlab = "p-value", xlim = c(0, 1),
              main = method,
              cex.axis = cex, cex.names = cex,
              col = col)
      }
  }
  #par(plt = c(0.5,0.9,0.3,0.8))

  check_plot(data$OO, data$method[1])
  check_plot(data$OF, data$method[2])
  check_plot(data$FF, data$method[3])

  par(mfrow = c(1, 1))
}

#p値の一覧を表示
summary.calcor <- function(data){

  #p値で並べ替え
  catdf <- data$df[!is.na(data$df$p.value),]
  catdf <- catdf[order(catdf$p.value),]

  #計算手法と結果の表示
  cat("Test method.\n\n")
  if(data$yis == "order"){
    cat(paste0("Order vs Order : ", data$method[1], "\n"))
    cat(paste0("Order vs Nominal : ", data$method[2], "\n\n\n"))
    cat("List of p values of items successfully calculated for the test.\n\n")
    print(data.frame(name = catdf$name, Order.Order = catdf$OO, Order.Nominal = catdf$OF))
  }
  if(data$yis == "nominal"){
    cat(paste0("Nominal vs Order : ", data$method[2], "\n"))
    cat(paste0("Nominal vs Nominal : ", data$method[3], "\n\n\n"))
    cat("List of p values of items successfully calculated for the test.\n\n")
    print(data.frame(name = catdf$name, Nominal.Order = catdf$OF, Nominal.Nominal = catdf$FF))
  }

  #計算に失敗した項目
  cat("\nItem failed to calculate p value.\n")
  cat(as.character(data$df$name[!is.na(data$df$p.value)]))
}


#p値が小さい順に並べて表示。
slideshow <- function(data, significance = 0.05, num = NULL, cex = 0.7, name = NULL){
  if(class(data) != "calcor"){stop("Please enter the variable created by calcor function.")}

  #表示するi
  #p.value < significanceでNAでない。p.valueで並び替え
  slidenumdf <- data.frame(i = c(1:length(data$df$p.value)),
                           p.value = data$df$p.value, name = data$df$name)
  slidenumdf <- na.omit(slidenumdf)

  if(is.null(name)){
    slidenumdf <- slidenumdf[slidenumdf$p.value < significance, ]
    slidenumdf <- slidenumdf[order(slidenumdf$p.value), ]
  }else{
    slidenumdf <- slidenumdf[slidenumdf$name == name, ]
  }

  slidenum <- slidenumdf$i

  if(!is.null(num)){slidenum <- slidenum[num]}

  for(i in slidenum){

    plot(data$grp_df[[i]],
         cex.axis = cex, cex.lab = cex, cex.main = cex, cex.sub = cex)

    pp <- data$df$p.value[i]
    nn <- length(data$grp_df[[i]][,1])

    par(xpd = TRUE)
    par(oma = c(0, 0, 0, 0))
    legend("topleft", cex = cex, bty = "n",
           legend = paste0("p = ",format(pp, digits = 4) ,"\nn = ", nn))
    par(xpd = FALSE)

    if(i != slidenum[length(slidenum)]){
      cat("Hit <Return> to see next plot: \n")
      readline()
    }
  }
}



#p-valueで並べ替え
psort <- function(x){
  df <- x$df
  names(df) <- c("name", "Order to Order", "Order to Factor", "Factor to Factor", "p-value")

  if(length(x$OO) == 0){df$"Order to Order" <- NULL}
  if(length(x$OF) == 0){df$"Order to Factor" <- NULL}
  if(length(x$FF) == 0){df$"Factor to Factor" <- NULL}

  ret <- df[order(df$"p-value"),]
  return(ret)
}








#チェック用

#data <- read.csv("test.csv")
#test <- calcor(A~.,data)
#test <- calcor(B~.,data)
#plot(test)
#test2 <- psort(test)

#slideshow(test,significance =0.3)