hello: Hello, World!
In tkwk6428/yjsc:

Description Usage Examples

Prints 'Hello, world!'.

hello()

#연산
A=matrix(c(1,-1,4,-1,1,3,4,3,2),nrow=3,ncol=3, byrow=TRUE)
B=matrix(c(3,-2,4,-1,1,0,4,0,5),nrow=3,ncol=3, byrow=TRUE)
X=matrix(c(1,-2,4),nrow=3,ncol=3, byrow=TRUE)
Y=matrix(c(3,2,1),nrow=3,ncol=3, byrow=TRUE)
matrix multiple percent*percent
matrix change t(x)

#암거북이
apply(TT,2,mean)
apply(TT,2,var)
apply(TT,2,sd)
sqrt(var(TT)*nrow(TT)-1)/nrow(TT))

#주성분분석
setwd("d:/")
TP <- read.csv("temp.csv")
head(TP)
x<-TP[,c(2:3)]
pr<-princomp(x)
summary(pr)
pr$scores[,1:2]
library(graphics)
biplot(pr)


#회귀분석
yj=read.csv("data2.csv")
head(yj)
y=yj$y
x1=yj$x1
x2=yj$x2
x=cbind(1,x1,x2)
theta=c(0,0,0)
m=nrow(x)
cost=sum(((x:*:theta)-y)^2)/(2*m) < fix percent
alpha=0.01
iterations=10000
for(i in 1:iterations){
theta[1]=theta[1]-alpha*(1/m)*sum((x:*:theta)-y) < fix percent
theta[2]=theta[2]-alpha*(1/m)*sum(((x:*:theta)-y)*x[,2])
theta[3]=theta[3]-alpha*(1/m)*sum(((x:*:theta)-y)*x[,3])
}
theta[1]
theta[2]
theta[3]

B=matrix(c(379.63,-188.69,-188.68,197.32),nc=2)
lambda=eigen(B)
lambda$values
[1] 498.02514  78.92486
lambda$vectors
           [,1]      [,2]
[1,]  0.8470483 0.5314956
[2,] -0.5315159 0.8470610
lambda$vectors[,1]
[1]  0.8470483 -0.5315159
lambda$vectors[,2]
[1] 0.5314956 0.8470610


#보스턴 ridge, lasso
library(pastecs)
library(glmnet)
library(lasso2)


Boston=read.csv(file.choose())
head(Boston)

#결측값
outlier<-c(which(Boston$rm<6.5& Boston$medv==50))
Boston[outlier,]
plot(medv~rm,data=Boston)
points(Boston[outlier,]$rm,Boston[outlier,]$medv,pch=16,col='red')
BostonOut=Boston[-outlier,] #Outlier 삭제한 Data
set.seed(97)
sample.no=sample(1:nrow(BostonOut),nrow(BostonOut)*0.8)
BostonOut.train=BostonOut[sample.no,]
BostonOut.test=BostonOut[-sample.no,]
fit.Case1<-lm(medv~.,data=BostonOut.train)
summary(fit.Case1)

sqrt(mean((fit.Case1$residuals)^2))
res.Case1<-BostonOut.test$medv-predict(fit.Case1,newdata=BostonOut.test)
1-sum(res.Case1)^2/sum((BostonOut.test$medv-mean(BostonOut.test$medv))^2)
sqrt(mean(res.Case1^2))
cor(Boston)
fit3.var<-lm(log(medv)~lstat+rm+ptratio, data=BostonOut.train)
summary(fit3.var)
sum( (BostonOut.train$medv - mean(BostonOut.train$medv))^2 )
sqrt(mean((exp(fit3.var$fitted.values) - BostonOut.train$medv)^2 ))

1 - sum( (BostonOut.train$medv - exp(fit3.var$fitted.values))^2 ) / sum( (BostonOut.train$medv - mean(BostonOut.train$medv))^2 )
sqrt(mean( (exp(fit3.var$fitted.values) - BostonOut.train$medv)^2 ))

# train data로 구한 R squared 와 RMSE
# 종속변수를 자연로그 변환 했기 때문에 R squared와 RMSE는 다시 지수 취한 후 계산해야 한다.
1 - sum( (BostonOut.train$medv - exp(fit3.var$fitted.values))^2 ) / sum( (BostonOut.train$medv - mean(BostonOut.train$medv))^2 )
sqrt(mean( (exp(fit3.var$fitted.values) - BostonOut.train$medv)^2 ))


# test data로 구한 R squared 와 RMSE
1 - sum( (BostonOut.test$medv - exp(predict(fit3.var, newdata = BostonOut.test)))^2 ) / sum( (BostonOut.test$medv - mean(BostonOut.test$medv))^2 )
sqrt(mean( (BostonOut.test$medv - exp(predict(fit3.var, newdata = BostonOut.test)))^2 ))
AIC(fit3.var)

predict(fit.Case1,newdata=BostonOut.test)
#RAD 범주화
BostonCat=BostonOut
BostonCat$rad=as.factor(BostonCat$rad)
set.seed(32)
sample.no=sample(1:nrow(BostonCat),nrow(BostonCat)*0.8)
BostonCat.train=BostonCat[sample.no,]
BostonCat.test=BostonCat[-sample.no,]
head(BostonCat)
fit.Cat <- lm(log(medv) ~ . + I(rm^2)+I(nox^2)+log(lstat)+log(dis) - rm - nox - lstat - dis, data= BostonCat.train)
summary(fit.Cat)

# 훈련집합의 R squared와 RMSE 계산

1 - sum( (BostonCat.train$medv - exp(fit.Cat$fitted.values))^2 ) / sum( (BostonCat.train$medv - mean(BostonCat.train$medv))^2 )
sqrt(mean( (exp(fit.Cat$fitted.values) - BostonCat.train$medv)^2 ))
fit.Cat<-lm(log(medv)~.+I(rm^2)+log(lstat)+log(dis)-rm-nox-lstat-dis, data=BostonCat.train)


# OLS, Stepwise, Ridge, Lasso
rad2 <- ifelse(BostonCat$rad == 2, 1, 0)
rad3 <- ifelse(BostonCat$rad == 3, 1, 0)
rad4 <- ifelse(BostonCat$rad == 4, 1, 0)
rad5 <- ifelse(BostonCat$rad == 5, 1, 0)
rad6 <- ifelse(BostonCat$rad == 6, 1, 0)
rad7 <- ifelse(BostonCat$rad == 7, 1, 0)
rad8 <- ifelse(BostonCat$rad == 8, 1, 0)
rad24 <- ifelse(BostonCat$rad == 24, 1, 0)
BostonTrans <- cbind(BostonOut, rad2, rad3, rad4, rad5, rad6, rad7, rad8, rad24)

head(BostonTrans)


# 변수변환  : rm^2, nox^2, ln(lstat), ln(dis)
BostonTrans <- with(BostonTrans, cbind(BostonTrans, rm2=rm^2, nox2=nox^2, llstat=log(lstat), ldis=log(dis)))
head(BostonTrans)
str(BostonTrans)

# set.seed(32) 로 구한 sample.no 그대로 사용하여 학습자료 분할
BostonTrans.train <- BostonTrans[sample.no,]
BostonTrans.test <- BostonTrans[-sample.no,]

# OLS 훈련
fit.Trans1 <- lm(log(medv) ~ .-rm-rad-nox-lstat-dis , data= BostonTrans.train)
summary(fit.Trans)
1 - sum( (BostonTrans.train$medv - exp(fit.Trans1$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.Trans1$fitted.values) - BostonTrans.train$medv)^2 ))
# OLS 평가
fit.Trans2 <- lm(log(medv) ~ .-rm-rad-nox-lstat-dis , data= BostonTrans.test)
summary(fit.Trans2)
1 - sum( (BostonTrans.train$medv - exp(fit.Trans2$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.Trans2$fitted.values) - BostonTrans.train$medv)^2 ))

# Stepwise 훈련
fit.step1 <- step(fit.Trans1, direction = "both")
# 훈련집합의 R squared와 RMSE 계산
1 - sum( (BostonTrans.train$medv - exp(fit.step1$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.step1$fitted.values) - BostonTrans.train$medv)^2 ))

# Stepwise 평가
fit.step2 <- step(fit.Trans2, direction = "both")
# 평가집합의 R squared와 RMSE 계산
1 - sum( (BostonTrans.train$medv - exp(fit.step2$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.step2$fitted.values) - BostonTrans.train$medv)^2 ))

fit.cv.ridge <- cv.glmnet(as.matrix(BostonTrans.train[,-14]), log(BostonTrans.train$medv), alpha = 0)
grid <- seq(fit.cv.ridge$lambda.min, fit.cv.ridge$lambda.1se, length.out = 5)

#람다값지정되어있을때
#grid = 0.0001 0.0002 0.0003

#Ridge 훈련
fit.ridge1 <- glmnet(as.matrix(BostonTrans.train[,-14]), log(BostonTrans.train$medv), alpha = 0, lambda=grid)
head(fit.ridge1)
ridge.fitted.value <- predict(fit.ridge1, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) /  sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#Ridge 테스트
fit.ridge2 <- glmnet(as.matrix(BostonTrans.test[,-14]), log(BostonTrans.test$medv), alpha = 0, lambda=grid)
head(fit.ridge2)
ridge.fitted.value <- predict(fit.ridge2, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) /  sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#lasso 훈련
fit.lasso1 <- glmnet(as.matrix(BostonTrans.train[,-14]), log(BostonTrans.train$medv), alpha = 1, lambda=grid)
head(fit.lasso1)
ridge.fitted.value <- predict(fit.lasso1, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) /  sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#lasso 테스트
fit.lasso2 <- glmnet(as.matrix(BostonTrans.test[,-14]), log(BostonTrans.test$medv), alpha = 1, lambda=grid)
head(fit.lasso2)
ridge.fitted.value <- predict(fit.lasso2, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) /  sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#여기까지



#gradient descent
xs=seq(0,4,len=20)
f=function(x){1.2*(x-2)^2+3.2}
plot(xs,f(xs),type="l", xlab="x", ylab=expression(1.2(x-2)^2+3.2))
grad=function(x){1.2*2*(x-2)}
lines(c(2,2), c(3,8), col="red", lty=2)
text(2.1,7, "Closed form solution", col="red", pos=4)
x=0.1
xtrace<-x
ftrace<-f(x)
stepFactor<-0.6
for(step in 1:100){
x<-x-stepFactor*grad(x)
xtrace<-c(xtrace,x)
ftrace<-c(ftrace,f(x))
}
lines(xtrace, ftrace, type="b",col="blue")
text(0.5,6, "Gradient descent", col="blue",pos=4)
print(x)


#결측치 평균 대체
data <- read.csv(file.choose())
mean_1 <- mean(data[,1], na.rm=TRUE) #1열 평균
data$열이름[is.na(data$열이름)] <- mean_1


#stepwise 과제용
stat.desc(concrete) #기술통계량
cor(concrete) #상관분석
fit.step <- step(CCS ~ Cement + FlyAsh + BFS + Water + Sp + CA, direction = "both")