Prints 'Hello, world!'.
1 | hello()
|
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 | #연산
A=matrix(c(1,-1,4,-1,1,3,4,3,2),nrow=3,ncol=3, byrow=TRUE)
B=matrix(c(3,-2,4,-1,1,0,4,0,5),nrow=3,ncol=3, byrow=TRUE)
X=matrix(c(1,-2,4),nrow=3,ncol=3, byrow=TRUE)
Y=matrix(c(3,2,1),nrow=3,ncol=3, byrow=TRUE)
matrix multiple percent*percent
matrix change t(x)
#암거북이
apply(TT,2,mean)
apply(TT,2,var)
apply(TT,2,sd)
sqrt(var(TT)*nrow(TT)-1)/nrow(TT))
#주성분분석
setwd("d:/")
TP <- read.csv("temp.csv")
head(TP)
x<-TP[,c(2:3)]
pr<-princomp(x)
summary(pr)
pr$scores[,1:2]
library(graphics)
biplot(pr)
#회귀분석
yj=read.csv("data2.csv")
head(yj)
y=yj$y
x1=yj$x1
x2=yj$x2
x=cbind(1,x1,x2)
theta=c(0,0,0)
m=nrow(x)
cost=sum(((x:*:theta)-y)^2)/(2*m) < fix percent
alpha=0.01
iterations=10000
for(i in 1:iterations){
theta[1]=theta[1]-alpha*(1/m)*sum((x:*:theta)-y) < fix percent
theta[2]=theta[2]-alpha*(1/m)*sum(((x:*:theta)-y)*x[,2])
theta[3]=theta[3]-alpha*(1/m)*sum(((x:*:theta)-y)*x[,3])
}
theta[1]
theta[2]
theta[3]
B=matrix(c(379.63,-188.69,-188.68,197.32),nc=2)
lambda=eigen(B)
lambda$values
[1] 498.02514 78.92486
lambda$vectors
[,1] [,2]
[1,] 0.8470483 0.5314956
[2,] -0.5315159 0.8470610
lambda$vectors[,1]
[1] 0.8470483 -0.5315159
lambda$vectors[,2]
[1] 0.5314956 0.8470610
#보스턴 ridge, lasso
library(pastecs)
library(glmnet)
library(lasso2)
Boston=read.csv(file.choose())
head(Boston)
#결측값
outlier<-c(which(Boston$rm<6.5& Boston$medv==50))
Boston[outlier,]
plot(medv~rm,data=Boston)
points(Boston[outlier,]$rm,Boston[outlier,]$medv,pch=16,col='red')
BostonOut=Boston[-outlier,] #Outlier 삭제한 Data
set.seed(97)
sample.no=sample(1:nrow(BostonOut),nrow(BostonOut)*0.8)
BostonOut.train=BostonOut[sample.no,]
BostonOut.test=BostonOut[-sample.no,]
fit.Case1<-lm(medv~.,data=BostonOut.train)
summary(fit.Case1)
sqrt(mean((fit.Case1$residuals)^2))
res.Case1<-BostonOut.test$medv-predict(fit.Case1,newdata=BostonOut.test)
1-sum(res.Case1)^2/sum((BostonOut.test$medv-mean(BostonOut.test$medv))^2)
sqrt(mean(res.Case1^2))
cor(Boston)
fit3.var<-lm(log(medv)~lstat+rm+ptratio, data=BostonOut.train)
summary(fit3.var)
sum( (BostonOut.train$medv - mean(BostonOut.train$medv))^2 )
sqrt(mean((exp(fit3.var$fitted.values) - BostonOut.train$medv)^2 ))
1 - sum( (BostonOut.train$medv - exp(fit3.var$fitted.values))^2 ) / sum( (BostonOut.train$medv - mean(BostonOut.train$medv))^2 )
sqrt(mean( (exp(fit3.var$fitted.values) - BostonOut.train$medv)^2 ))
# train data로 구한 R squared 와 RMSE
# 종속변수를 자연로그 변환 했기 때문에 R squared와 RMSE는 다시 지수 취한 후 계산해야 한다.
1 - sum( (BostonOut.train$medv - exp(fit3.var$fitted.values))^2 ) / sum( (BostonOut.train$medv - mean(BostonOut.train$medv))^2 )
sqrt(mean( (exp(fit3.var$fitted.values) - BostonOut.train$medv)^2 ))
# test data로 구한 R squared 와 RMSE
1 - sum( (BostonOut.test$medv - exp(predict(fit3.var, newdata = BostonOut.test)))^2 ) / sum( (BostonOut.test$medv - mean(BostonOut.test$medv))^2 )
sqrt(mean( (BostonOut.test$medv - exp(predict(fit3.var, newdata = BostonOut.test)))^2 ))
AIC(fit3.var)
predict(fit.Case1,newdata=BostonOut.test)
#RAD 범주화
BostonCat=BostonOut
BostonCat$rad=as.factor(BostonCat$rad)
set.seed(32)
sample.no=sample(1:nrow(BostonCat),nrow(BostonCat)*0.8)
BostonCat.train=BostonCat[sample.no,]
BostonCat.test=BostonCat[-sample.no,]
head(BostonCat)
fit.Cat <- lm(log(medv) ~ . + I(rm^2)+I(nox^2)+log(lstat)+log(dis) - rm - nox - lstat - dis, data= BostonCat.train)
summary(fit.Cat)
# 훈련집합의 R squared와 RMSE 계산
1 - sum( (BostonCat.train$medv - exp(fit.Cat$fitted.values))^2 ) / sum( (BostonCat.train$medv - mean(BostonCat.train$medv))^2 )
sqrt(mean( (exp(fit.Cat$fitted.values) - BostonCat.train$medv)^2 ))
fit.Cat<-lm(log(medv)~.+I(rm^2)+log(lstat)+log(dis)-rm-nox-lstat-dis, data=BostonCat.train)
# OLS, Stepwise, Ridge, Lasso
rad2 <- ifelse(BostonCat$rad == 2, 1, 0)
rad3 <- ifelse(BostonCat$rad == 3, 1, 0)
rad4 <- ifelse(BostonCat$rad == 4, 1, 0)
rad5 <- ifelse(BostonCat$rad == 5, 1, 0)
rad6 <- ifelse(BostonCat$rad == 6, 1, 0)
rad7 <- ifelse(BostonCat$rad == 7, 1, 0)
rad8 <- ifelse(BostonCat$rad == 8, 1, 0)
rad24 <- ifelse(BostonCat$rad == 24, 1, 0)
BostonTrans <- cbind(BostonOut, rad2, rad3, rad4, rad5, rad6, rad7, rad8, rad24)
head(BostonTrans)
# 변수변환 : rm^2, nox^2, ln(lstat), ln(dis)
BostonTrans <- with(BostonTrans, cbind(BostonTrans, rm2=rm^2, nox2=nox^2, llstat=log(lstat), ldis=log(dis)))
head(BostonTrans)
str(BostonTrans)
# set.seed(32) 로 구한 sample.no 그대로 사용하여 학습자료 분할
BostonTrans.train <- BostonTrans[sample.no,]
BostonTrans.test <- BostonTrans[-sample.no,]
# OLS 훈련
fit.Trans1 <- lm(log(medv) ~ .-rm-rad-nox-lstat-dis , data= BostonTrans.train)
summary(fit.Trans)
1 - sum( (BostonTrans.train$medv - exp(fit.Trans1$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.Trans1$fitted.values) - BostonTrans.train$medv)^2 ))
# OLS 평가
fit.Trans2 <- lm(log(medv) ~ .-rm-rad-nox-lstat-dis , data= BostonTrans.test)
summary(fit.Trans2)
1 - sum( (BostonTrans.train$medv - exp(fit.Trans2$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.Trans2$fitted.values) - BostonTrans.train$medv)^2 ))
# Stepwise 훈련
fit.step1 <- step(fit.Trans1, direction = "both")
# 훈련집합의 R squared와 RMSE 계산
1 - sum( (BostonTrans.train$medv - exp(fit.step1$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.step1$fitted.values) - BostonTrans.train$medv)^2 ))
# Stepwise 평가
fit.step2 <- step(fit.Trans2, direction = "both")
# 평가집합의 R squared와 RMSE 계산
1 - sum( (BostonTrans.train$medv - exp(fit.step2$fitted.values))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 )
sqrt(mean( (exp(fit.step2$fitted.values) - BostonTrans.train$medv)^2 ))
fit.cv.ridge <- cv.glmnet(as.matrix(BostonTrans.train[,-14]), log(BostonTrans.train$medv), alpha = 0)
grid <- seq(fit.cv.ridge$lambda.min, fit.cv.ridge$lambda.1se, length.out = 5)
#람다값지정되어있을때
#grid = 0.0001 0.0002 0.0003
#Ridge 훈련
fit.ridge1 <- glmnet(as.matrix(BostonTrans.train[,-14]), log(BostonTrans.train$medv), alpha = 0, lambda=grid)
head(fit.ridge1)
ridge.fitted.value <- predict(fit.ridge1, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#Ridge 테스트
fit.ridge2 <- glmnet(as.matrix(BostonTrans.test[,-14]), log(BostonTrans.test$medv), alpha = 0, lambda=grid)
head(fit.ridge2)
ridge.fitted.value <- predict(fit.ridge2, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#lasso 훈련
fit.lasso1 <- glmnet(as.matrix(BostonTrans.train[,-14]), log(BostonTrans.train$medv), alpha = 1, lambda=grid)
head(fit.lasso1)
ridge.fitted.value <- predict(fit.lasso1, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#lasso 테스트
fit.lasso2 <- glmnet(as.matrix(BostonTrans.test[,-14]), log(BostonTrans.test$medv), alpha = 1, lambda=grid)
head(fit.lasso2)
ridge.fitted.value <- predict(fit.lasso2, newx=as.matrix(BostonTrans.train[,-14]))
1 - colSums( (BostonTrans.train$medv - exp(ridge.fitted.value))^2 ) / sum( (BostonTrans.train$medv - mean(BostonTrans.train$medv))^2 ) # R squared
sqrt(colMeans( (exp(ridge.fitted.value) - BostonTrans.train$medv)^2 )) # RMSE
#여기까지
#gradient descent
xs=seq(0,4,len=20)
f=function(x){1.2*(x-2)^2+3.2}
plot(xs,f(xs),type="l", xlab="x", ylab=expression(1.2(x-2)^2+3.2))
grad=function(x){1.2*2*(x-2)}
lines(c(2,2), c(3,8), col="red", lty=2)
text(2.1,7, "Closed form solution", col="red", pos=4)
x=0.1
xtrace<-x
ftrace<-f(x)
stepFactor<-0.6
for(step in 1:100){
x<-x-stepFactor*grad(x)
xtrace<-c(xtrace,x)
ftrace<-c(ftrace,f(x))
}
lines(xtrace, ftrace, type="b",col="blue")
text(0.5,6, "Gradient descent", col="blue",pos=4)
print(x)
#결측치 평균 대체
data <- read.csv(file.choose())
mean_1 <- mean(data[,1], na.rm=TRUE) #1열 평균
data$열이름[is.na(data$열이름)] <- mean_1
#stepwise 과제용
stat.desc(concrete) #기술통계량
cor(concrete) #상관분석
fit.step <- step(CCS ~ Cement + FlyAsh + BFS + Water + Sp + CA, direction = "both")
|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.