x <-rnorm(n <-50, mean =0, sd =1)y <-rnorm(n, mean =0, sd =1)z <-rnorm(n, mean =0, sd =1)x[c( 1, 8,15)] <-NA# ヴェクトルxの 1, 8, 15番目の値をNAに置換y[c( 3,10,17)] <-NAz[c( 5,12,19)] <-NAcor(x, y) # オプションを指定しないと,NAを含む計算は結果もNAになる。
[1] NA
cor(x, y, use ="pairwise") # ペアのいずれにもNAを含まないものに限定
[1] 0.173194
cor.test(x, y) # cor.test( )関数では自動でpairwise処理
Pearson's product-moment correlation
data: x and y
t = 1.1396, df = 42, p-value = 0.2609
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.1303908 0.4470861
sample estimates:
cor
0.173194
# cor( )は3つ以上のヴェクトルの相関係数行列を計算できる。cor(cbind(x, y, z), use ="pairwise") # ペアでNAが無ければ有効
x y z
x 1.0000000 0.1731940 -0.1450286
y 0.1731940 1.0000000 0.2372616
z -0.1450286 0.2372616 1.0000000
cor(cbind(x, y, z), use ="complete") # 全てのヴェクトルでNAを含まないケースだけが有効
x y z
x 1.0000000 0.2070665 -0.1428626
y 0.2070665 1.0000000 0.2736448
z -0.1428626 0.2736448 1.0000000
cor.test(y, z) # cor( )のpairwiseの方と結果が一致する事を確認。
Pearson's product-moment correlation
data: y and z
t = 1.5828, df = 42, p-value = 0.121
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.06413646 0.49899368
sample estimates:
cor
0.2372616
cor.test(z, x)
Pearson's product-moment correlation
data: z and x
t = -0.94994, df = 42, p-value = 0.3476
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
-0.4236675 0.1586841
sample estimates:
cor
-0.1450286
# xとyを,乱数を交えて線型変換してみる(結果が予め決まっていない)x2 <-runif(1, min =-2, max =2)*x1 +runif(1, min =-2, max =2)y2 <-runif(1, min =-2, max =2)*y1 +runif(1, min =-2, max =2)
変換前後で平均が変わる
mean(x1); mean(x2)
[1] 1.397943e-17
[1] -1.235336
mean(y1); mean(y2)
[1] 1.307331e-17
[1] 0.8497124
変換前後で標準偏差も変わる
sd(x1); sd(x2)
[1] 1
[1] 0.5217183
sd(y1); sd(y2)
[1] 1
[1] 1.902866
変換前後で共分散も変わる
cov(x1, y1); cov(x2, y2)
[,1]
[1,] 0.7
[,1]
[1,] 0.6949321
しかし変換前後で相関係数は(絶対値)不変
cor(x1, y1); cor(x2, y2)
[,1]
[1,] 0.7
[,1]
[1,] 0.7
plot(x1, y1, xlim =c(min(x1, x2), max(x1, x2)), ylim =c(min(y1, y2), max(y1, y2)), xlab ="x",ylab ="y",main ="変換前(黒)と変換後(赤)の散布図", sub ="黒の破線と赤の点線はそれぞれの算術平均")abline(v =mean(x1), col ="gray", lty ="dashed")abline(h =mean(y1), col ="gray", lty ="dashed")par(new = T)plot(x2, y2, col ="red", ,xlim =c(min(x1, x2), max(x1, x2)), ylim =c(min(y1, y2), max(y1, y2)), bty ="n", ann = F, axes = F)abline(v =mean(x2), col ="red", lty ="dotted")abline(h =mean(y2), col ="red", lty ="dotted")
Kendall's rank correlation tau
data: edu2 and q1900
z = 0.9294, p-value = 0.3527
alternative hypothesis: true tau is not equal to 0
sample estimates:
tau
0.05737565