# You can learn more about package authoring with RStudio at:
#
# http://r-pkgs.had.co.nz/
#
# Some useful keyboard shortcuts for package authoring:
#
# Build and Reload Package: 'Ctrl + Shift + B'
# Check Package: 'Ctrl + Shift + E'
# Test Package: 'Ctrl + Shift + T'
begin=function(dataf=iris,type="all",target=""){
#データフレームか判定
if(is.data.frame(dataf)!=TRUE){
stop("not DataFrame")
}
if(target!=""){
#dataf[target]
print(paste("target variable : ",target,sep=""))
}
#levelsの例を出す関数を定義
exlevels=function(column){
ex=levels(column)
m=length(ex)
x=as.character("")
for(i in 1:m){
if(i>3){
x=paste(x,as.character("..."),sep="/")
break
}else{
x=paste(x,as.character(ex[i]),sep="/")
}
}
return(x)
}
#はじめに基本的なコマンド
di=dim(dataf)
print(paste("Size : ",di[1]," records and ",di[2]," variables",sep=""))
#出力データフレーム準備
n=length(dataf[1,])
index=c("name","mode","class","n_of_NA","min","max","levels","ex","like_normal","like_n(log)")
c=length(index)
val=as.data.frame(matrix(rep(0,n*c),nrow=n))
is.na(val)=1:c
val[,1]=colnames(dataf)
colnames(val)=index
#1列毎に計算
for(i in 1:n){
#NAの数
val[i,4]=sum(is.na(dataf[,i]))
#型の種類判別
tmp=dataf[,i]
if(is.logical(tmp)){
val[i,2]="chr"
val[i,3]="logical"
}else if(is.integer(tmp)){
val[i,2]="num"
val[i,3]="int"
val[i,5]=min(dataf[,i],na.rm=TRUE)
val[i,6]=max(dataf[,i],na.rm=TRUE)
}else if(is.numeric(tmp)){
val[i,2]="num"
val[i,3]="num"
val[i,5]=min(dataf[,i],na.rm=TRUE)
val[i,6]=max(dataf[,i],na.rm=TRUE)
}else if(is.ordered(tmp)){
val[i,2]=ifelse(is.numeric(tmp)==TRUE,"num","chr")
val[i,3]="ordered"
val[i,7]=length(levels(dataf[,i]))
val[i,8]=exlevels(dataf[,i])
}else if(is.factor(tmp)){
ex=levels(dataf[,i])
val[i,2]=ifelse(is.numeric(tmp)==TRUE,"num","chr")
val[i,3]="factor"
val[i,7]=length(levels(dataf[,i]))
val[i,8]=exlevels(dataf[,i])
}else if(is.character(tmp)){
val[i,2]="chr"
val[i,3]="chr"
val[i,7]=length(table(dataf[,i]))
}else{
val[i,2]="unknown"
val[i,3]="unknown"
}
#正規分布or対数とって正規分布の検定
#Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
if(val[i,2]=="num"){
dataf_narm=na.omit(dataf[,i])
p=shapiro.test(dataf_narm)$p.value
if(p<0.001){
val[i,9]="***"
}else if(p<0.01){
val[i,9]="**"
}else if(p<0.05){
val[i,9]="*"
}else if(p<0.1){
val[i,9]="."
}else{
val[i,9]=""
}
#最小値が0より大きければ対数とって正規か検定
if(val[i,5]>0){
logp=shapiro.test(log(dataf_narm))$p.value
if(logp<0.001){
val[i,10]="***"
}else if(logp<0.01){
val[i,10]="**"
}else if(logp<0.05){
val[i,10]="*"
}else if(logp<0.1){
val[i,10]="."
}else{
val[i,10]=""
}
}
}
}
if(type=="num"){
if(sum(val[,2]=="num")<=10){
pairs(dataf[,val[,2]=="num"])
}else{
print("figure margins too large")
}
}
print(paste("Details... numeric :",sum(val[,3]=="num"),sep=""))
print(paste(" integer :",sum(val[,3]=="int"),sep=""))
print(paste(" factor :",sum(val[,3]=="factor"),sep=""))
print(paste(" orderd factor :",sum(val[,3]=="ordered"),sep=""))
return(val)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.