# R programming example 4
####################------working with T41-----------------------
T41=read.table("D:/T4-1.DAT", header=F)
dim(T41)
hist(T41)
T41=matrix(scan("D:/T4-1.DAT"), ncol=1)
hist(T41)
hist(T41, nclass="fd")
hist(T41, nclass="scott")
hist(T41, nclass="sturges")
hist(T41, nclass=40)
hist(T41, nclass=30)
hist(T41, nclass=20,col="lightgreen", border="purple", xlab="Radiation data (door closed)")
#--------data look like non-normal----------------
######################-------play with "normal" points-------------------
set.seed(493)
y=rnorm(30)
z=qqnorm(y); qqline(y, col=2)
cov(z$x, z$y) #compare with the table given in page 182
y=rnorm(100)
qqnorm(y,main="100 points from N(0,1)",sub="Q-Q plot with line"); qqline(y, col=3)
y=rnorm(1000)
qqnorm(y); qqline(y, col=4)
#####################-----check normality of T41------------------------------------
par(mfrow=c(1,2))
z=qqnorm(T41, main="42 radiation data points",sub="\nQ-Q plot with line",ylab="radiation")
#compare this with Figure 4.6
qqline(T41,col="purple")
hist(T41, nclass=20,col="lightgreen", border="purple", xlab="Radiation data (door closed)")
cor(z$x,z$y); cor(sort(T41),qnorm((seq(1:42)-0.5)/42))
#check Table 4.2, reject normality at 0.01, 0.05, 0.1 levels
y=rnorm(42); qqplot(T41, y, pch=19, col="red"); qqplot(rnorm(42), y, pch=19, col=4)
#---Explain short and long tails------
######################----------load mvtnorm package-----------------
plot(rmvnorm(30, sigma=matrix(c(1,.9,.9,2),2)), xlab="", ylab="",xlim=c(-6,6), ylim=c(-6,6),col=2)
plot(rmvnorm(300, sigma=matrix(c(1,.9,.9,2),2)), xlab="", ylab="",xlim=c(-6,6), ylim=c(-6,6),col=3)
######################------------working on T4.3 ----lumber stiffness-------------------
T43=matrix(scan("D:T4-3.DAT"),ncol=5,byrow=T)
T43
#--------------check uni(bi)-variate normality, ---------------
qqnorm(T43[,1]); qqline(T43[,1],col="blue")
hist(T43[,1],nclass=20,col="lightgreen", border="purple",xlab="")
#do the same for other columns
pairs(T43[,1:4], col="blue", labels=c("x1","x2","x3","x4"), main="Pairwise plot of stiffness measures")
#compare this with Figure 4.11
#---------chi-square plot-----
par(mfrow=c(1,1))
plot(qchisq((seq(1:30)-0.5)/30,4),sort(T43d),pch=19,main="Chi-square plot",xlab="Chi-square quantiles",ylab="generalized distances")
abline(0,1,lty=1,cex=1.5,col="red")
#compare with Figure 4.9------------
# --------------outlier-detection---------------------
#------------one-D plots---------------
plot(T43[,1],rep(0,30),pch=18)
plot(T43[,2],rep(0,30),pch=19)
plot(T43[,3],rep(0,30),pch=20)
plot(T43[,4],rep(0,30),pch=21)
hist(T43[,1],nclass=20)
hist(T43[,2],nclass=20)
hist(T43[,3],nclass=20)
hist(T43[,4],nclass=20)
#-----------standardize and scatter plot--------------------------
colMeans(T43[,1:4])
T43S=t(t(T43[,1:4])-colMeans(T43[,1:4])) #= T43[,1:4]-rep(1/n,dim(T43)[1])%*%rep(1,dim(T43)[1])%*%T43[,1:4]
T43S
T43S=t(t(T43S)/sqrt(apply(T43[,1:4],2,var)))
cbind(T43S, T43[,5]) #compare this with Table 4.4
pairs(T43S, col="blue")
#----------- d^2_j---------
T43xbar=colMeans(T43[,1:4])
T43S=var(T43[,1:4])
T43SI=solve(T43S)
T43d=rep(0,30)
for (i in 1:30){T43d[i]=t(T43[i,1:4]-T43xbar)%*%T43SI%*%(T43[i,1:4]-T43xbar)}
T43d #compare with T43[,5]
#compare with qchisq(0.95, 4)=9.487729
###############--------------NORMAL TRANSFORMATION----------------
xlambda=function(x,lamb) #box-cox transformation
{
if (lamb != 0) {(x^(lamb)-1)/lamb}
else {log(x)}
}
llambda=function(x,lamb) #l-lambda
{
n=length(x); m=length(lamb); ll=rep(0,m);
for (i in 1:m)
{ ll[i]=(lamb[i]-1)*n*mean(log(x)) - n/2*log( (n-1)/n*var(xlambda(x,lamb[i])) ) }
ll }
lambda=seq(-4,4,0.2)
plot(lambda,llambda(T41,lambda))
lambda=seq(-0.5,1,0.01)
plot(lambda,llambda(T41,lambda))
lambda=seq(-0.0,0.5,0.001)
plot(lambda,llambda(T41,lambda))
lambda=seq(0.25,0.3,length=30)
plot(lambda,llambda(T41,lambda))
lambda=seq(0.27,0.28,length=30)
plot(lambda,llambda(T41,lambda))
#0.276
par(mfrow=c(1,2))
qqnorm(T41,main="42 radiation data points",sub="\nQ-Q plot with line",ylab="radiation")
qqline(T41,col="red")
T41trans=xlambda(T41,0.276)
qqnorm(T41trans,main="42 transformed radiation data points",sub="\nQ-Q plot with line",ylab="radiation")
qqline(T41trans,col="blue")