> library(rstan) # Rでベイズ推計を行うパッケイジrstanを読み込む > > # 演習用データフレイムの読み込み > d0 <- read.csv("documents/practice.csv") > names(d0) [1] "id" "mode" "sex" "age" "q0101" "q0102" "q0103" "q0200" "q0301" [10] "q0302" "q0401" "q0402" "q0403" "edu1" "edu2" "edu" "job" "q0700" [19] "q0900" "q1000" "q1101" "q1102" "q1400" "q1501" "q1502" "q1601" "q1602" [28] "q1603" "q1604" "q1605" "q1700" "q1801" "q1802" "q1900" "q2100" "q2200" [37] "q2601" "q2602" "income" "fincome" "q3000" > > # 以下で使用する4つの変数がmissingでないケースだけを残す > d1 <- d0[complete.cases(d0$income, d0$sex, d0$age, d0$edu),] > d1$male <- c(1, 0)[d1$sex] # 男性ダミー変数の作成 > > # rstan( )関数に渡すデータをリスト形式で準備 > data02 <- list(N=nrow(d1), X1=d1$male, X2=d1$age, X3=d1$edu, Y=d1$income) > data02 # 一応,中身を確認 $N [1] 338 $X1 [1] 1 0 0 1 0 1 1 0 0 0 0 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 1 0 0 1 0 1 0 0 1 1 0 1 0 0 1 0 0 1 1 0 [47] 1 0 0 1 0 1 0 0 1 0 0 0 1 0 1 0 0 0 0 1 1 0 0 1 1 0 1 0 1 1 1 1 0 1 0 1 0 1 1 1 0 0 0 0 0 0 [93] 0 1 1 1 1 0 0 0 0 0 1 1 0 1 0 1 0 1 1 0 0 0 0 1 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 1 1 1 0 0 1 [139] 0 0 0 0 1 1 1 0 1 1 0 1 0 0 1 1 1 0 1 0 0 1 0 0 0 1 0 0 1 1 1 0 1 0 0 0 0 0 0 0 0 0 1 1 1 0 [185] 0 1 0 1 1 0 1 1 0 0 0 1 0 1 1 0 0 0 1 0 1 0 1 0 0 0 0 0 0 0 0 0 1 0 1 1 0 1 0 1 0 0 1 0 1 0 [231] 0 1 0 1 0 0 0 1 1 0 0 1 1 1 1 0 1 0 0 1 1 0 0 0 0 0 0 1 1 1 0 1 0 0 0 0 1 0 0 1 1 1 1 1 1 0 [277] 1 1 0 0 0 0 0 0 1 0 0 0 1 0 1 0 1 1 0 1 0 1 1 1 1 0 0 1 0 0 1 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 [323] 1 0 0 0 0 1 0 0 0 1 0 0 1 0 1 1 $X2 [1] 39 52 48 55 56 54 39 39 43 34 45 36 38 41 58 35 51 41 52 59 32 35 55 44 56 58 53 34 54 57 [31] 54 53 51 57 37 41 43 37 55 52 40 47 39 34 31 40 34 54 50 35 39 54 40 45 59 36 55 50 47 44 [61] 43 46 54 44 54 36 36 47 49 41 30 55 55 45 34 51 41 44 53 30 49 59 33 44 37 42 53 56 41 41 [91] 32 54 45 37 38 43 46 46 35 44 55 57 56 56 47 33 41 34 38 49 36 43 57 57 44 55 59 47 59 58 [121] 38 43 56 53 37 52 46 50 56 52 57 51 42 48 30 50 43 44 40 44 49 54 34 43 59 48 52 48 38 59 [151] 49 40 59 36 43 55 40 41 39 59 59 58 37 34 44 46 44 40 52 35 58 43 48 36 38 37 37 34 45 48 [181] 33 36 50 31 49 57 53 49 40 49 44 31 58 31 49 50 59 31 49 31 36 57 53 53 57 56 52 49 41 55 [211] 48 31 51 55 43 41 41 37 40 44 58 37 47 41 52 33 37 40 45 38 50 36 44 46 44 51 53 44 51 36 [241] 47 51 43 46 49 52 49 37 45 53 48 36 50 45 39 36 32 51 34 45 46 52 52 45 59 39 48 49 56 56 [271] 53 56 49 50 51 45 31 53 52 49 49 33 37 41 50 46 53 48 47 47 46 50 44 35 37 44 58 48 38 46 [301] 53 48 57 50 39 33 47 49 52 55 40 41 58 37 53 57 57 45 57 34 58 35 51 43 37 45 49 54 39 32 [331] 35 42 48 35 40 56 48 48 $X3 [1] 14 14 12 12 14 12 12 14 14 14 16 16 16 16 16 16 16 12 16 16 12 12 12 16 12 12 16 14 14 14 [31] 12 11 12 12 14 16 16 16 14 12 18 14 12 16 18 14 16 12 9 14 12 16 14 14 9 11 12 14 16 12 [61] 9 14 12 14 16 9 18 16 12 12 14 14 16 12 14 16 12 12 16 14 12 9 16 16 14 12 12 14 14 14 [91] 14 14 14 18 16 16 12 12 14 14 14 14 12 16 16 18 12 18 9 16 16 14 12 14 12 9 16 16 12 12 [121] 12 14 16 14 14 16 12 14 12 12 14 16 14 14 18 14 14 12 14 14 12 12 16 12 9 14 16 12 16 16 [151] 14 12 9 16 14 14 16 14 16 16 12 12 12 12 12 16 16 16 12 14 14 14 16 12 14 12 12 11 12 12 [181] 16 16 16 14 12 12 12 14 14 16 16 16 16 18 12 12 16 18 16 16 16 12 12 14 12 14 14 12 14 14 [211] 14 16 14 14 12 14 12 14 12 12 12 16 14 16 16 16 16 12 12 12 12 16 14 14 14 12 16 16 14 12 [241] 14 14 14 12 16 12 9 16 14 16 16 12 12 9 14 16 16 16 16 12 12 9 12 16 14 16 14 16 16 16 [271] 18 16 16 14 12 16 14 12 14 14 14 14 12 14 16 12 16 14 12 14 16 12 14 12 14 12 14 14 14 16 [301] 16 14 14 14 14 14 14 9 14 14 14 16 14 12 12 12 14 14 9 16 16 14 16 14 12 12 12 12 16 16 [331] 14 12 12 14 16 12 12 9 $Y [1] 800 300 75 925 75 400 200 200 0 25 125 1750 0 600 2500 925 75 925 [19] 800 700 0 200 200 75 125 500 1750 125 75 400 0 500 200 200 400 1125 [37] 1375 600 125 600 700 400 25 800 200 25 300 400 0 500 400 600 125 0 [55] 75 0 75 125 925 400 125 25 200 0 925 700 500 0 25 600 300 75 [73] 600 0 500 700 600 700 75 300 300 300 300 800 500 400 125 125 75 200 [91] 0 0 125 700 600 1125 600 25 125 125 400 0 200 1125 500 500 200 400 [109] 75 800 500 200 125 200 25 300 300 300 0 700 400 300 0 25 0 75 [127] 300 300 75 25 125 0 500 300 600 75 25 400 300 25 75 0 600 200 [145] 200 25 1125 0 0 500 75 300 500 700 700 400 500 0 300 0 0 125 [163] 400 300 75 200 925 0 200 125 400 0 400 300 200 200 25 125 75 125 [181] 300 400 500 200 200 500 125 600 600 400 500 300 1125 400 125 600 25 500 [199] 925 600 500 400 925 25 400 200 600 400 0 75 300 200 25 0 75 400 [217] 700 0 300 400 200 400 0 600 700 300 500 75 925 300 600 200 800 75 [235] 300 125 925 925 925 300 125 800 700 700 300 125 125 600 125 800 700 25 [253] 300 0 300 200 0 500 300 400 200 600 25 500 700 25 700 200 700 500 [271] 800 800 1125 1750 500 400 200 500 25 0 200 0 25 75 500 0 0 0 [289] 600 200 600 75 600 200 25 400 125 300 300 500 500 400 700 200 200 0 [307] 200 500 125 0 75 0 200 500 400 0 125 75 0 600 25 200 800 0 [325] 0 0 200 125 200 300 200 925 25 300 600 800 600 600 > > # rstanの結果をオブジェクト保存。seed=は乱数の種の番号で特に意味は無い > bayesMRA <- stan(file='MRA-3IV.stan', data=data02, seed=5978) SAMPLING FOR MODEL 'MRA-3IV' NOW (CHAIN 1). Gradient evaluation took 6.1e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.61 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 4.22074 seconds (Warm-up) 2.50718 seconds (Sampling) 6.72791 seconds (Total) SAMPLING FOR MODEL 'MRA-3IV' NOW (CHAIN 2). Gradient evaluation took 4.6e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.46 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 4.27844 seconds (Warm-up) 2.28698 seconds (Sampling) 6.56542 seconds (Total) SAMPLING FOR MODEL 'MRA-3IV' NOW (CHAIN 3). Gradient evaluation took 5.8e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.58 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 4.2271 seconds (Warm-up) 2.34835 seconds (Sampling) 6.57545 seconds (Total) SAMPLING FOR MODEL 'MRA-3IV' NOW (CHAIN 4). Gradient evaluation took 6.7e-05 seconds 1000 transitions using 10 leapfrog steps per transition would take 0.67 seconds. Adjust your expectations accordingly! Iteration: 1 / 2000 [ 0%] (Warmup) Iteration: 200 / 2000 [ 10%] (Warmup) Iteration: 400 / 2000 [ 20%] (Warmup) Iteration: 600 / 2000 [ 30%] (Warmup) Iteration: 800 / 2000 [ 40%] (Warmup) Iteration: 1000 / 2000 [ 50%] (Warmup) Iteration: 1001 / 2000 [ 50%] (Sampling) Iteration: 1200 / 2000 [ 60%] (Sampling) Iteration: 1400 / 2000 [ 70%] (Sampling) Iteration: 1600 / 2000 [ 80%] (Sampling) Iteration: 1800 / 2000 [ 90%] (Sampling) Iteration: 2000 / 2000 [100%] (Sampling) Elapsed Time: 3.92507 seconds (Warm-up) 2.40422 seconds (Sampling) 6.32929 seconds (Total) > > # 結果は,summary( )の方が詳しく表示され,オブジェクト名だけを指示すると最終結果のみ > summary(bayesMRA) $summary mean se_mean sd 2.5% 25% 50% 75% b0 -623.231898 3.38771222 153.756482 -921.728403 -725.781937 -626.393569 -519.689519 b1 352.673796 0.52062594 29.898071 294.377953 333.164088 352.603488 371.766881 b2 5.335034 0.03756853 1.923838 1.550765 4.048194 5.360098 6.627664 b3 42.075930 0.17421232 7.867556 26.073755 36.979958 42.160319 47.402669 sigma 277.022694 0.20249452 10.709596 256.640732 269.537061 276.812781 284.153775 lp__ -2063.245199 0.03947811 1.571806 -2067.185275 -2064.115779 -2062.942266 -2062.052792 97.5% n_eff Rhat b0 -304.322699 2059.937 1.0025994 b1 413.284925 3297.879 1.0004803 b2 9.003297 2622.337 1.0016307 b3 56.992365 2039.492 1.0021757 sigma 298.896702 2797.175 0.9995965 lp__ -2061.176380 1585.204 1.0019393 $c_summary , , chains = chain:1 stats parameter mean sd 2.5% 25% 50% 75% b0 -617.924965 155.770495 -922.284417 -722.263314 -623.847236 -514.645754 b1 351.520847 32.176780 292.107342 330.444910 350.029050 372.148817 b2 5.186789 2.043988 1.214848 3.793311 5.163375 6.610174 b3 42.228963 7.761022 25.891916 36.956063 42.549146 47.561131 sigma 277.044463 10.759760 256.076084 269.650277 276.978867 284.275931 lp__ -2063.334228 1.639128 -2067.363720 -2064.234155 -2063.011645 -2062.044296 stats parameter 97.5% b0 -301.193841 b1 417.975977 b2 9.159747 b3 57.627187 sigma 298.651104 lp__ -2061.223937 , , chains = chain:2 stats parameter mean sd 2.5% 25% 50% 75% b0 -619.419864 149.677914 -904.713256 -719.530472 -622.124719 -514.824734 b1 352.569037 29.380208 296.461413 333.011345 352.587501 372.028731 b2 5.330022 1.811562 1.928934 4.097053 5.354231 6.534042 b3 41.811430 7.784277 26.639559 37.254832 41.907671 46.971730 sigma 276.852766 10.838192 256.379791 269.036171 276.678307 283.829720 lp__ -2063.199824 1.569289 -2067.185647 -2064.007911 -2062.948930 -2062.053746 stats parameter 97.5% b0 -324.854664 b1 409.764080 b2 8.960024 b3 55.939186 sigma 298.488771 lp__ -2061.146848 , , chains = chain:3 stats parameter mean sd 2.5% 25% 50% 75% b0 -618.753946 153.115273 -916.72116 -723.951243 -620.284645 -515.965020 b1 352.766264 29.952841 291.28126 333.814792 353.989895 370.990805 b2 5.415499 1.926709 1.64962 4.139919 5.403632 6.778267 b3 41.499876 7.939414 25.04742 36.430000 41.128821 47.092733 sigma 277.300872 10.876147 257.66931 269.764233 277.112221 284.326258 lp__ -2063.331131 1.551608 -2067.22070 -2064.242820 -2062.999464 -2062.171713 stats parameter 97.5% b0 -305.057189 b1 414.182933 b2 8.982646 b3 56.147057 sigma 300.429542 lp__ -2061.200877 , , chains = chain:4 stats parameter mean sd 2.5% 25% 50% 75% 97.5% b0 -636.828817 155.804797 -936.836826 -736.28667 -643.120721 -541.47515 -288.323114 b1 353.839036 27.925601 301.481317 335.43680 353.515952 371.52170 408.966700 b2 5.407826 1.900016 1.616178 4.16573 5.478735 6.60682 9.025422 b3 42.763452 7.938395 27.259488 37.33069 43.019262 48.08810 57.703161 sigma 276.892676 10.366920 257.393555 269.58761 276.300310 283.75359 297.599816 lp__ -2063.115612 1.516122 -2066.729699 -2063.96012 -2062.796616 -2061.96738 -2061.172686 > bayesMRA Inference for Stan model: MRA-3IV. 4 chains, each with iter=2000; warmup=1000; thin=1; post-warmup draws per chain=1000, total post-warmup draws=4000. mean se_mean sd 2.5% 25% 50% 75% 97.5% n_eff Rhat b0 -623.23 3.39 153.76 -921.73 -725.78 -626.39 -519.69 -304.32 2060 1 b1 352.67 0.52 29.90 294.38 333.16 352.60 371.77 413.28 3298 1 b2 5.34 0.04 1.92 1.55 4.05 5.36 6.63 9.00 2622 1 b3 42.08 0.17 7.87 26.07 36.98 42.16 47.40 56.99 2039 1 sigma 277.02 0.20 10.71 256.64 269.54 276.81 284.15 298.90 2797 1 lp__ -2063.25 0.04 1.57 -2067.19 -2064.12 -2062.94 -2062.05 -2061.18 1585 1 Samples were drawn using NUTS(diag_e) at Sun May 7 16:17:08 2017. For each parameter, n_eff is a crude measure of effective sample size, and Rhat is the potential scale reduction factor on split chains (at convergence, Rhat=1). > > # 同じ分析を,古典的・頻度論的重回帰で実行 > summary(lm01 <- lm(income ~ male + age + edu, data=d1)); confint(lm01) Call: lm(formula = income ~ male + age + edu, data = d1) Residuals: Min 1Q Median 3Q Max -717.50 -169.66 -46.16 113.65 1787.86 Coefficients: Estimate Std. Error t value Pr(>|t|) (Intercept) -624.486 153.254 -4.075 5.76e-05 *** male 352.892 30.670 11.506 < 2e-16 *** age 5.366 1.929 2.782 0.00571 ** edu 42.031 7.735 5.434 1.06e-07 *** --- Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1 Residual standard error: 275.7 on 334 degrees of freedom Multiple R-squared: 0.3465, Adjusted R-squared: 0.3406 F-statistic: 59.02 on 3 and 334 DF, p-value: < 2.2e-16 2.5 % 97.5 % (Intercept) -925.950274 -323.022099 male 292.561188 413.222923 age 1.571567 9.160603 edu 26.816721 57.245668 >