Rでの因果グラフの描き方
Rで描くグラフは統計分析のグラフに留まらない。
因果関係の表現や分析に用いられる有向非巡回グラフ (DAG ; Directed Acyclic Graph)もRで描く事が出来る。
ここではそのごく初歩だけを紹介しよう。
Webブラウザ上でDAGを描いて分析する事も出来る。
install.packages ("DiagrammeR" )
library (DiagrammeR)
grViz ("
digraph dag_1s {
x -> y
}
" )
grViz ("
digraph dag_1a {
x[shape = box, label = <<I>x</I>>]
y[shape = box, label = <<I>y</I>>]
x -> y[label = <<I> β₁</I>>]
}
" )
grViz ("
digraph dag_2 {
graph [rankdir = TB, nodesep = 1.0, label = 'common cause']
edge [arrowsize = .5]
x[shape = box, label = <<I>x</I>>, width = .2, height = .2]
y[shape = box, label = <<I>y</I>>, width = .2, height = .2]
z[shape = circle, label = <<I>z</I>>, width = .15, height = .15]
x -> y[style = dashed]
z -> x
z -> y
{ rank = same; x; y; }
}
" )
grViz ("
digraph dag_3 {
graph [rankdir = TB, nodesep = 1.0, label = 'mediator']
edge [arrowsize = .5]
x[shape = box, label = <<I>x</I>>, width = .2, height = .2]
y[shape = box, label = <<I>y</I>>, width = .2, height = .2]
z[shape = circle, label = <<I>z</I>>, width = .15, height = .15]
x -> y[style = dashed]
x -> z
z -> y
{ rank = same; x; y; }
{ rank = max; x; y; }
}
" )
grViz ("
digraph EFA {
graph [rankdir = TB, label = <<I>Exploratory Factor Analysis</I>>, labelloc = t];
factor1[label = <factor₁>];
factor2[label = <factor₂>];
var1[shape=box, label = <var₁>];
var2[shape=box, label = <var₂>];
var3[shape=box, label = <var₃>];
var4[shape=box, label = <var₄>];
var5[shape=box, label = <var₅>];
var6[shape=box, label = <var₆>];
e1[shape=none, label = <e₁>];
e2[shape=none, label = <e₂>];
e3[shape=none, label = <e₃>];
e4[shape=none, label = <e₄>];
e5[shape=none, label = <e₅>];
e6[shape=none, label = <e₆>];
factor1 -> var1;
factor1 -> var2;
factor1 -> var3;
factor1 -> var4;
factor1 -> var5;
factor1 -> var6;
factor2 -> var1;
factor2 -> var2;
factor2 -> var3;
factor2 -> var4;
factor2 -> var5;
factor2 -> var6;
e1 -> var1
e2 -> var2
e3 -> var3
e4 -> var4
e5 -> var5
e6 -> var6
{ rank = same; factor1; factor2; }
{ rank = same; var1; var2; var3; var4; var5; var6; }
{ rank = same; e1; e2; e3; e4; e5; e6; }
{ rank = max; e1; e2; e3; e4; e5; e6; }
}" )
grViz ("
digraph PCA {
graph [label = <<I>Principal Component Analysis</I>>, labelloc = t ];
var1[shape=box, label = <var₁>];
var2[shape=box, label = <var₂>];
var3[shape=box, label = <var₃>];
var4[shape=box, label = <var₄>];
var5[shape=box, label = <var₅>];
var6[shape=box, label = <var₆>];
pc1[label = <principal component₁>];
pc2[label = <principal component₂>];
var1 -> pc1;
var2 -> pc1;
var3 -> pc1;
var4 -> pc1;
var5 -> pc1;
var6 -> pc1;
var1 -> pc2;
var2 -> pc2;
var3 -> pc2;
var4 -> pc2;
var5 -> pc2;
var6 -> pc2;
}" )
grViz ("
digraph PCE {
graph [layout = neato, fontname='times-italic', splines = true,
label ='Partial Correlation Coefficient', labelloc = t];
node [fontname='times-italic'];
z[pos = '3,4!'];
x[shape=box, pos = '2,3!'];
y[shape=box, pos = '4,3!'];
ex[shape=none; label=<e<sub>x</sub>>; pos = '2,2!'];
ey[shape=none; label=<e<sub>y</sub>>; pos = '4,2!'];
z -> x;
z -> y;
ex -> x;
ey -> y;
ex -> ey[color = red,
headlabel = <<I>r<SUB>xy|z</SUB></I>>,
dir = both,
labeldistance = '4.5'];
}
" )
grViz ("
digraph MRA {
graph [label = 'Multiple Regression Analysis', labelloc = b ];
layout = neato
y[pos = '5.0, 3.0!', label = <<I>y</I>>]
x1[pos = '3.0, 4.0!', label = <<I>x</I>₁>]
x2[pos = '2.0, 3.4!', label = <<I>x</I>₂>]
x3[pos = '2.0, 2.6!', label = <<I>x</I>₃>]
x4[pos = '3.0, 2.0!', label = <<I>x</I>₄>]
x1 -> y[taillabel = <<I>β₁</I>>, labeldistance = 2]
x2 -> y[taillabel = <<I>β₂</I>>, labeldistance = 3]
x3 -> y[taillabel = <<I>β₃</I>>, labeldistance = 3]
x4 -> y[taillabel = <<I>β₄</I>>, labeldistance = 3]
}
" )
grViz ("
digraph PATHD {
graph [label = 'Path Diagram', labelloc = b ];
layout = neato
y1[pos = '4.5, 2.5!', label = <<I>y</I>₁>]
y2[pos = '5.0, 1.0!', label = <<I>y</I>₂>]
x1[pos = '3.5, 4.0!', label = <<I>x</I>₁>]
x2[pos = '3.2, 3.0!', label = <<I>x</I>₂>]
x3[pos = '2.0, 2.5!', label = <<I>x</I>₃>]
x4[pos = '2.0, 1.0!', label = <<I>x</I>₄>]
x1 -> y1[headlabel = <<I>α₁₁</I>>, labeldistance = 4]
x1 -> x3[headlabel = <<I>γ₁₃</I>>, labeldistance = 4]
x2 -> y1[headlabel = <<I>α₂₁</I>>, labeldistance = 3]
x2 -> y2[headlabel = <<I>β₂₂</I>>, labeldistance = 4]
x3 -> y1[headlabel = <<I>α₃₁</I>>, labeldistance = 2]
x3 -> y2[headlabel = <<I>β₃₂</I>>, labeldistance = 3]
x4 -> y2[headlabel = <<I>β₄₂</I>>, labeldistance = 2]
y1 -> y2[headlabel = <<I>β₁₂</I>>, labeldistance = 5]
}
" )
ggdagによる分析
単にパス図を描くだけではなく,グラフを用いて分析を行うのがDAGの本来の使用法である。
ここではggdagパッケイジについてごく簡単に紹介する。
本来は,多くの要因が複雑に絡んで関連が見通しがたいモデルでより力を発揮する。以下の例示は全体構造が見て取れる程度なのでわざわざ色々な関数を使わなくてもと感じるだろうが,あくまで例示である。
また,適宜幾つか見栄えを変更するtheme_dag()を紹介の為に付けている。
install.packages ("ggdag" )
library (ggdag)
library (dagitty)
細かく好みのグラフを描く道具ではなく,グラフを用いて分析のアドヴァイスを示す為の道具と言える。
dag01 <- dagitty ("dag{
x <- z -> y
x -> y
x[exposure]
y[outcome]
}" )
ggdag01 <- tidy_dagitty (dag01)
ggdag (ggdag01)
ggdag_adjustment_set (ggdag01)
dag02 <- dagitty ("dag{
x -> m -> y
x -> y
x[exposure]
y[outcome]
}" )
ggdag02 <- tidy_dagitty (dag02)
ggdag (ggdag02) + theme_dag ()
ggdag_adjustment_set (ggdag02) + theme_dag ()
dag03 <- dagitty ("dag{
x <- z -> y
x -> y
ex -> x
ey -> y
ex <-> ey
ex[unobserved]
ey[unobserved]
}" )
ggdag03 <- tidy_dagitty (dag03)
ggdag (ggdag03) + theme_dag_grid () + theme_dag_grid ()
グラフによる分析
dag10 <- dagitty ("dag{
x <- z -> y
x -> m -> y
p -> x -> y
p -> z
p -> m
}" )
ggdag10 <- tidy_dagitty (dag10)
ggdag (ggdag10) + theme_dag_grid ()
ggdag_ancestors (ggdag10, "m" )
ggdag_parents (ggdag10, "m" )
ggdag_descendants (ggdag10, "z" )
ggdag_children (ggdag10, "z" )
ggdag_exogenous (ggdag10) + theme_dag_gray ()
ggdag_collider (ggdag10) + theme_dag_gray_grid ()
ggdag_adjustment_set (ggdag10, exposure = "x" , outcome = "y" , effect = "total" )
ggdag_adjustment_set (ggdag10, exposure = "x" , outcome = "y" , effect = "direct" )