超初心者向けのRガイド

DAG. Directed Acyclic Graph

Author

SUGINO Isamu, Build with R4.4.1

Published

January 6, 2025

1 全体の構成

2 Rでの因果グラフの描き方

Rで描くグラフは統計分析のグラフに留まらない。
因果関係の表現や分析に用いられる有向非巡回グラフ(DAG; Directed Acyclic Graph)もRで描く事が出来る。
ここではそのごく初歩だけを紹介しよう1

Webブラウザ上でDAGを描いて分析する事も出来る。

install.packages("DiagrammeR")
library(DiagrammeR)
grViz("
      digraph dag_1s {
        x -> y
      }
      ")
grViz("
      digraph dag_1a {
        x[shape = box, label = <<I>x</I>>]
        y[shape = box, label = <<I>y</I>>]
        x -> y[label = <<I>  &beta;&#8321;</I>>]
      }
      ")
grViz("
      digraph dag_2 {
        graph [rankdir = TB, nodesep = 1.0, label = 'common cause']
        edge [arrowsize = .5]
        x[shape = box, label = <<I>x</I>>, width = .2, height = .2]
        y[shape = box, label = <<I>y</I>>, width = .2, height = .2]
        z[shape = circle, label = <<I>z</I>>, width = .15, height = .15]
        x -> y[style = dashed]
        z -> x
        z -> y
      { rank = same; x; y; }
      }
      ")
grViz("
      digraph dag_3 {
        graph [rankdir = TB, nodesep = 1.0, label = 'mediator']
        edge [arrowsize = .5]
        x[shape = box, label = <<I>x</I>>, width = .2, height = .2]
        y[shape = box, label = <<I>y</I>>, width = .2, height = .2]
        z[shape = circle, label = <<I>z</I>>, width = .15, height = .15]
        x -> y[style = dashed]
        x -> z
        z -> y
      { rank = same; x; y; }
      { rank = max;  x; y; }
      }
      ")
grViz("
digraph EFA {
  graph [rankdir = TB, label = <<I>Exploratory Factor Analysis</I>>, labelloc = t];
  factor1[label = <factor&#8321;>];
  factor2[label = <factor&#8322;>];
  var1[shape=box, label = <var&#8321;>];
  var2[shape=box, label = <var&#8322;>];
  var3[shape=box, label = <var&#8323;>];
  var4[shape=box, label = <var&#8324;>];
  var5[shape=box, label = <var&#8325;>];
  var6[shape=box, label = <var&#8326;>];
  e1[shape=none, label = <e&#8321;>];
  e2[shape=none, label = <e&#8322;>];
  e3[shape=none, label = <e&#8323;>];
  e4[shape=none, label = <e&#8324;>];
  e5[shape=none, label = <e&#8325;>];
  e6[shape=none, label = <e&#8326;>];
  factor1 -> var1;
  factor1 -> var2;
  factor1 -> var3;
  factor1 -> var4;
  factor1 -> var5;
  factor1 -> var6;
  factor2 -> var1;
  factor2 -> var2;
  factor2 -> var3;
  factor2 -> var4;
  factor2 -> var5;
  factor2 -> var6;
  e1 -> var1
  e2 -> var2
  e3 -> var3
  e4 -> var4
  e5 -> var5
  e6 -> var6
  { rank = same; factor1; factor2; }
  { rank = same; var1; var2; var3; var4; var5; var6; }
  { rank = same; e1; e2; e3; e4; e5; e6; }
  { rank = max; e1; e2; e3; e4; e5; e6; }
}")
grViz("
digraph PCA {
  graph [label = <<I>Principal Component Analysis</I>>, labelloc = t ];
  var1[shape=box, label = <var&#8321;>];
  var2[shape=box, label = <var&#8322;>];
  var3[shape=box, label = <var&#8323;>];
  var4[shape=box, label = <var&#8324;>];
  var5[shape=box, label = <var&#8325;>];
  var6[shape=box, label = <var&#8326;>];
  pc1[label = <principal component&#8321;>];
  pc2[label = <principal component&#8322;>];
  var1 -> pc1;
  var2 -> pc1;
  var3 -> pc1;
  var4 -> pc1;
  var5 -> pc1;
  var6 -> pc1;
  var1 -> pc2;
  var2 -> pc2;
  var3 -> pc2;
  var4 -> pc2;
  var5 -> pc2;
  var6 -> pc2;
}")
grViz("
digraph PCE {
  graph [layout = neato, fontname='times-italic', splines = true,
         label ='Partial Correlation Coefficient', labelloc = t];
  node [fontname='times-italic'];
  z[pos = '3,4!'];
  x[shape=box, pos = '2,3!'];
  y[shape=box, pos = '4,3!'];
  ex[shape=none; label=<e<sub>x</sub>>; pos = '2,2!'];
  ey[shape=none; label=<e<sub>y</sub>>; pos = '4,2!'];
  z -> x;
  z -> y;
  ex -> x;
  ey -> y;
  ex -> ey[color = red,
           headlabel = <<I>r<SUB>xy|z</SUB></I>>,
           dir = both,
           labeldistance = '4.5'];
}
      ")
grViz("
digraph MRA {
     graph [label = 'Multiple Regression Analysis', labelloc = b ];
     layout = neato
     y[pos  = '5.0, 3.0!', label = <<I>y</I>>]
     x1[pos = '3.0, 4.0!', label = <<I>x</I>&#8321;>]
     x2[pos = '2.0, 3.4!', label = <<I>x</I>&#8322;>]
     x3[pos = '2.0, 2.6!', label = <<I>x</I>&#8323;>]
     x4[pos = '3.0, 2.0!', label = <<I>x</I>&#8324;>]
     x1 -> y[taillabel = <<I>&beta;&#8321;</I>>, labeldistance = 2]
     x2 -> y[taillabel = <<I>&beta;&#8322;</I>>, labeldistance = 3]
     x3 -> y[taillabel = <<I>&beta;&#8323;</I>>, labeldistance = 3]
     x4 -> y[taillabel = <<I>&beta;&#8324;</I>>, labeldistance = 3]
}
      ")
grViz("
digraph PATHD {
     graph [label = 'Path Diagram', labelloc = b ];
     layout = neato
     y1[pos = '4.5, 2.5!', label = <<I>y</I>&#8321;>]
     y2[pos = '5.0, 1.0!', label = <<I>y</I>&#8322;>]
     x1[pos = '3.5, 4.0!', label = <<I>x</I>&#8321;>]
     x2[pos = '3.2, 3.0!', label = <<I>x</I>&#8322;>]
     x3[pos = '2.0, 2.5!', label = <<I>x</I>&#8323;>]
     x4[pos = '2.0, 1.0!', label = <<I>x</I>&#8324;>]
     x1 -> y1[headlabel = <<I>&alpha;&#8321;&#8321;</I>>, labeldistance = 4]
     x1 -> x3[headlabel = <<I>&gamma;&#8321;&#8323;</I>>, labeldistance = 4]
     x2 -> y1[headlabel = <<I>&alpha;&#8322;&#8321;</I>>, labeldistance = 3]
     x2 -> y2[headlabel = <<I>&beta;&#8322;&#8322;</I>>,  labeldistance = 4]
     x3 -> y1[headlabel = <<I>&alpha;&#8323;&#8321;</I>>, labeldistance = 2]
     x3 -> y2[headlabel = <<I>&beta;&#8323;&#8322;</I>>,  labeldistance = 3]
     x4 -> y2[headlabel = <<I>&beta;&#8324;&#8322;</I>>,  labeldistance = 2]
     y1 -> y2[headlabel = <<I>&beta;&#8321;&#8322;</I>>,  labeldistance = 5]
}
      ")

3 ggdagによる分析

単にパス図を描くだけではなく,グラフを用いて分析を行うのがDAGの本来の使用法である。
ここではggdagパッケイジについてごく簡単に紹介する2
 本来は,多くの要因が複雑に絡んで関連が見通しがたいモデルでより力を発揮する。以下の例示は全体構造が見て取れる程度なのでわざわざ色々な関数を使わなくてもと感じるだろうが,あくまで例示である。
 また,適宜幾つか見栄えを変更するtheme_dag()を紹介の為に付けている。

install.packages("ggdag")
library(ggdag)
library(dagitty)

細かく好みのグラフを描く道具ではなく,グラフを用いて分析のアドヴァイスを示す為の道具と言える。

dag01 <- dagitty("dag{
                 x <- z -> y
                 x -> y
                 x[exposure]
                 y[outcome]
                 }")
ggdag01 <- tidy_dagitty(dag01)
ggdag(ggdag01)

ggdag_adjustment_set(ggdag01)

dag02 <- dagitty("dag{
                 x -> m -> y
                 x -> y
                 x[exposure]
                 y[outcome]
                 }")
ggdag02 <- tidy_dagitty(dag02)
ggdag(ggdag02) + theme_dag()

ggdag_adjustment_set(ggdag02) + theme_dag()

dag03 <- dagitty("dag{
                 x <- z -> y
                 x -> y
                 ex -> x
                 ey -> y
                 ex <-> ey
                 ex[unobserved]
                 ey[unobserved]
                 }")
ggdag03 <- tidy_dagitty(dag03)
ggdag(ggdag03) + theme_dag_grid() + theme_dag_grid()

3.1 グラフによる分析

dag10 <- dagitty("dag{
                 x <- z -> y
                 x -> m -> y
                 p -> x -> y
                 p -> z
                 p -> m
                 }")
ggdag10 <- tidy_dagitty(dag10)
ggdag(ggdag10) + theme_dag_grid()

ggdag_ancestors(ggdag10, "m")

ggdag_parents(ggdag10, "m")

ggdag_descendants(ggdag10, "z")

ggdag_children(ggdag10, "z")

ggdag_exogenous(ggdag10) + theme_dag_gray()

ggdag_collider(ggdag10) + theme_dag_gray_grid()

効果を推定する
ggdag_adjustment_set(ggdag10, exposure = "x", outcome = "y", effect = "total")

ggdag_adjustment_set(ggdag10, exposure = "x", outcome = "y", effect = "direct")

Footnotes

  1. RでDAGを描くには,daggityパッケイジggdagパッケイジなど幾つかの方法があるが,ここでは主に,分析まで行う事は考えずに単にモデル図を描く事だけに限定し,DiagrammeRパッケイジでGraphvizを使う。↩︎

  2. Rにはよくある事だが,同じ事を行うのに色々な方法がある。ここで紹介するものもそのうちの一つに過ぎない。↩︎