R/hello.R

# You can learn more about package authoring with RStudio at:
#
#   http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
#   Build and Reload Package:  'Ctrl + Shift + B'
#   Check Package:             'Ctrl + Shift + E'
#   Test Package:              'Ctrl + Shift + T'

begin=function(dataf=iris,type="all",target=""){

  #データフレームか判定
  if(is.data.frame(dataf)!=TRUE){
    stop("not DataFrame")
  }

  if(target!=""){
    #dataf[target]
    print(paste("target variable : ",target,sep=""))
  }

  #levelsの例を出す関数を定義
  exlevels=function(column){
    ex=levels(column)
    m=length(ex)
    x=as.character("")
    for(i in 1:m){
      if(i>3){
        x=paste(x,as.character("..."),sep="/")
        break
      }else{
        x=paste(x,as.character(ex[i]),sep="/")
      }
    }
    return(x)
  }

  #はじめに基本的なコマンド
  di=dim(dataf)
  print(paste("Size : ",di[1]," records and ",di[2]," variables",sep=""))



  #出力データフレーム準備
  n=length(dataf[1,])
  index=c("name","mode","class","n_of_NA","min","max","levels","ex","like_normal","like_n(log)")
  c=length(index)
  val=as.data.frame(matrix(rep(0,n*c),nrow=n))
  is.na(val)=1:c
  val[,1]=colnames(dataf)
  colnames(val)=index

  #1列毎に計算
  for(i in 1:n){
    #NAの数
    val[i,4]=sum(is.na(dataf[,i]))

    #型の種類判別
    tmp=dataf[,i]
    if(is.logical(tmp)){
      val[i,2]="chr"
      val[i,3]="logical"

    }else if(is.integer(tmp)){
      val[i,2]="num"
      val[i,3]="int"
      val[i,5]=min(dataf[,i],na.rm=TRUE)
      val[i,6]=max(dataf[,i],na.rm=TRUE)
    }else if(is.numeric(tmp)){
      val[i,2]="num"
      val[i,3]="num"
      val[i,5]=min(dataf[,i],na.rm=TRUE)
      val[i,6]=max(dataf[,i],na.rm=TRUE)
    }else if(is.ordered(tmp)){
      val[i,2]=ifelse(is.numeric(tmp)==TRUE,"num","chr")
      val[i,3]="ordered"
      val[i,7]=length(levels(dataf[,i]))
      val[i,8]=exlevels(dataf[,i])
    }else if(is.factor(tmp)){
      ex=levels(dataf[,i])
      val[i,2]=ifelse(is.numeric(tmp)==TRUE,"num","chr")
      val[i,3]="factor"
      val[i,7]=length(levels(dataf[,i]))
      val[i,8]=exlevels(dataf[,i])
    }else if(is.character(tmp)){
      val[i,2]="chr"
      val[i,3]="chr"
      val[i,7]=length(table(dataf[,i]))
    }else{
      val[i,2]="unknown"
      val[i,3]="unknown"
    }

    #正規分布or対数とって正規分布の検定
    #Signif. codes:  0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
    if(val[i,2]=="num"){
      dataf_narm=na.omit(dataf[,i])
      p=shapiro.test(dataf_narm)$p.value

      if(p<0.001){
        val[i,9]="***"
      }else if(p<0.01){
        val[i,9]="**"
      }else if(p<0.05){
        val[i,9]="*"
      }else if(p<0.1){
        val[i,9]="."
      }else{
        val[i,9]=""
      }
      #最小値が0より大きければ対数とって正規か検定
      if(val[i,5]>0){
        logp=shapiro.test(log(dataf_narm))$p.value
        if(logp<0.001){
          val[i,10]="***"
        }else if(logp<0.01){
          val[i,10]="**"
        }else if(logp<0.05){
          val[i,10]="*"
        }else if(logp<0.1){
          val[i,10]="."
        }else{
          val[i,10]=""
        }
      }

    }

  }

  if(type=="num"){
    if(sum(val[,2]=="num")<=10){
      pairs(dataf[,val[,2]=="num"])
    }else{
      print("figure margins too large")
    }
  }

  print(paste("Details... numeric       :",sum(val[,3]=="num"),sep=""))
  print(paste("           integer       :",sum(val[,3]=="int"),sep=""))
  print(paste("           factor        :",sum(val[,3]=="factor"),sep=""))
  print(paste("           orderd factor :",sum(val[,3]=="ordered"),sep=""))

  return(val)
}
yuto16/Rlibrary_begin documentation built on May 26, 2019, 6:32 a.m.