1 module pry.grammar.graph; 2 3 import pry; 4 import pry.grammar.ast; 5 import pry.grammar.parser; 6 import std.exception; 7 8 private: 9 10 class CollectDependencies(T) : Visitor { 11 T[string] defs; 12 T[string] deps; 13 14 this(T[string] definitions) { 15 defs = definitions; 16 } 17 18 override void visit(Grammar grammar) { 19 foreach(def; grammar.defs) 20 def.accept(this); 21 } 22 23 override void visit(Definition def) { 24 def.ast.accept(this); 25 } 26 27 override void visit(Sequence seq) { 28 foreach(ast; seq.seq) { 29 ast.accept(this); 30 } 31 } 32 33 override void visit(Alternative alt) { 34 foreach(ast; alt.alt) { 35 ast.accept(this); 36 } 37 } 38 39 override void visit(Map map) { 40 map.ast.accept(this); 41 } 42 43 override void visit(NegativeLookahead neg) { 44 neg.ast.accept(this); 45 } 46 47 override void visit(PositiveLookahead pos) { 48 pos.ast.accept(this); 49 } 50 51 override void visit(SimpleSequence seq){} 52 53 override void visit(CharClass charClass){} 54 55 override void visit(Literal literal){} 56 57 override void visit(Reference reference) { 58 enforce(reference.name in defs, 59 "Undefined non-terminal '"~reference.name~"'"); 60 deps[reference.name] = defs[reference.name]; 61 } 62 63 override void visit(Combinator comb) { 64 foreach(ast; comb.args) { 65 ast.accept(this); 66 } 67 } 68 } 69 70 T[string] collectDependenices(T)(Definition target, T[string] defs) { 71 auto visitor = new CollectDependencies!T(defs); 72 target.accept(visitor); 73 return visitor.deps; 74 } 75 76 class Node { 77 Definition def; 78 Node[] deps; // directed dependencies of this node 79 int priority; // order of definition, bigger is higher priority 80 81 this(Definition def){ 82 this.def = def; 83 } 84 } 85 86 struct Graph { 87 Node[] nodes; 88 89 this(Definition[] definitions) { 90 import std.algorithm, std.range, std.array, std.typecons; 91 nodes = definitions.map!(x => new Node(x)).array; 92 Node[string] name2node; 93 foreach(i, d; definitions) { 94 name2node[d.name] = nodes[i]; 95 } 96 foreach(n; nodes) { 97 n.deps = collectDependenices(n.def, name2node).values(); 98 } 99 } 100 101 void dfs(Node n) { 102 import std.algorithm, std.array; 103 Node[] path; 104 void _dfs(Node start, int priority){ 105 if(start.priority < priority) start.priority = priority; 106 path ~= start; 107 for(size_t i = 0; i < start.deps.length;){ 108 Node next = start.deps[i]; 109 if(!find!"a is b"(path, next).empty) { 110 // Found a cycle. 111 next.def.dynamic = true; 112 // Break it. 113 start.deps = remove!(SwapStrategy.unstable)(start.deps, i); 114 continue; 115 } 116 _dfs(next, priority+1); 117 i++; 118 } 119 path = path[0..$-1]; 120 } 121 _dfs(n, 1); 122 } 123 124 Node[] topologicalSort() { 125 import std.algorithm; 126 foreach(n; nodes){ 127 if(n.priority == 0) dfs(n); 128 } 129 sort!((a,b) => a.priority > b.priority, SwapStrategy.stable)(nodes); 130 return nodes; 131 } 132 } 133 134 unittest { 135 import std.algorithm, std.array; 136 Definition[] defs = ` 137 G: 138 A <- 'a' B?; 139 B <- 'b' C?; 140 C <- 'c' A?; 141 `.parse(pegParser).defs; 142 auto nodes = Graph(defs).topologicalSort(); 143 assert(nodes.map!(x => x.def.name).equal(["C", "B", "A"])); 144 assert(!nodes[0].def.dynamic); 145 assert(!nodes[1].def.dynamic); 146 assert(nodes[2].def.dynamic); 147 } 148 149 unittest { 150 import std.algorithm, std.array; 151 Definition[] defs = ` 152 G: 153 D <- 'd' D?; 154 C <- 'c' D; 155 B <- 'b' C; 156 A <- 'a' B; 157 E <- '12'; 158 `.parse(pegParser).defs; 159 auto nodes = Graph(defs).topologicalSort(); 160 assert(nodes.map!(x => x.def.name).equal(["D", "C", "B", "A", "E"])); 161 assert(nodes[0].def.dynamic); 162 assert(!nodes[1].def.dynamic); 163 assert(!nodes[2].def.dynamic); 164 assert(!nodes[3].def.dynamic); 165 assert(!nodes[4].def.dynamic); 166 } 167 168 169 class CodeGen : Visitor { 170 import std.algorithm : filter, map; 171 import std.array, std.format; 172 import std.range: iota; 173 Appender!string app; 174 string streamType; 175 bool skipWhite, useRep; 176 177 this(string streamType, bool skipWhite) { 178 app = appender!string(); 179 this.streamType = streamType; 180 this.skipWhite = skipWhite; 181 } 182 183 void outputModifier(Modifier mod) { 184 if(mod.min == 1 && mod.max == 1) 185 return; 186 if(mod.min == 0 && mod.max == 1){ 187 formattedWrite(app, ".optional"); 188 return; 189 } 190 if(useRep) 191 formattedWrite(app, ".rep!(%d, %d)", mod.min, mod.max); 192 else 193 formattedWrite(app, ".array!(%d, %d)", mod.min, mod.max); 194 } 195 196 override void visit(Grammar g) { 197 auto nodes = Graph(g.defs).topologicalSort(); 198 formattedWrite(app, 199 "immutable %s = (){\nimport pry, std.uni, std.typecons;\nwith(parsers!(%s)){\n", 200 g.name, streamType); 201 foreach(n; nodes) { 202 if(!n.def.dynamic) continue; 203 if(n.def.type == "") 204 throw new Exception("PEG needs type for circular definition: "~n.def.name); 205 formattedWrite(app, "auto %s = dynamic!%s;\n", n.def.name, n.def.type); 206 } 207 foreach(n; nodes) { 208 n.def.accept(this); 209 } 210 formattedWrite(app, "return %s;\n", g.defs[0].name); 211 formattedWrite(app, "}\n}();"); 212 } 213 214 override void visit(Definition def) { 215 formattedWrite(app, "%s %s = ", def.dynamic ? "" : "auto", def.name); 216 def.ast.accept(this); 217 formattedWrite(app, ";\n"); 218 } 219 220 override void visit(Sequence seq) { 221 if(seq.seq.length != 1) formattedWrite(app, "seq("); 222 foreach(i, ast; seq.seq) { 223 if(i != 0) formattedWrite(app, ","); 224 ast.accept(this); 225 } 226 if(seq.seq.length != 1) formattedWrite(app, ")"); 227 // Map away ignored pieces in the grammar 228 auto liveIdx = iota(0, seq.seq.length) 229 .filter!(i => !seq.seq[i].ignored).array; 230 if(liveIdx.length != seq.seq.length) { 231 bool noTuple = liveIdx.length == 1; 232 if(noTuple) { 233 formattedWrite(app, ".map!(x => x[%s])", liveIdx[0]); 234 } 235 else { 236 formattedWrite(app, ".map!(x => tuple("); 237 foreach(i, index; liveIdx) { 238 formattedWrite(app, "%sx[%s]", i != 0 ? "," : "", index); 239 } 240 formattedWrite(app, "))"); 241 } 242 } 243 } 244 245 override void visit(Alternative alt) { 246 if(alt.alt.length != 1) formattedWrite(app, "any("); 247 foreach(i, ast; alt.alt) { 248 if(i != 0) formattedWrite(app, ","); 249 ast.accept(this); 250 } 251 if(alt.alt.length != 1) formattedWrite(app, ")"); 252 outputModifier(alt.mod); 253 } 254 255 override void visit(Map map) { 256 map.ast.accept(this); 257 formattedWrite(app, ".map!((it)%s)", map.code); 258 outputModifier(map.mod); 259 } 260 261 override void visit(NegativeLookahead neg) { 262 neg.ast.accept(this); 263 formattedWrite(app, ".negLookahead"); 264 } 265 266 override void visit(PositiveLookahead pos) { 267 pos.ast.accept(this); 268 formattedWrite(app, ".lookahead"); 269 } 270 271 override void visit(SimpleSequence seq){ 272 if(seq.seq.length != 1) 273 formattedWrite(app, "seq("); 274 bool save = skipWhite; 275 skipWhite = false; 276 useRep = true; 277 foreach(i, ast; seq.seq) { 278 if(i != 0) formattedWrite(app, ","); 279 ast.accept(this); 280 } 281 useRep = false; 282 skipWhite = save; 283 if(seq.seq.length != 1) 284 formattedWrite(app, ").slice"); 285 if(skipWhite) 286 formattedWrite(app, ".skipWs"); 287 } 288 289 override void visit(CharClass charClass) { 290 formattedWrite(app, "set!(CodepointSet("); 291 bool first = true; 292 foreach(ival; charClass.set.byInterval) { 293 formattedWrite(app, "%s%s,%s", first ? "" : ",", ival.a, ival.b); 294 if(first) first = false; 295 } 296 formattedWrite(app, "))"); 297 outputModifier(charClass.mod); 298 if(skipWhite) formattedWrite(app, ".skipWs"); 299 } 300 301 override void visit(Literal literal) { 302 if(literal.lit.length == 1 303 || (literal.lit.length == 2 && literal.lit[0] == '\\')) { 304 formattedWrite(app, "tk!'%s'", literal.lit); 305 } 306 else { 307 formattedWrite(app, `literal!"%s"`, literal.lit); 308 } 309 outputModifier(literal.mod); 310 if(skipWhite) formattedWrite(app, ".skipWs"); 311 } 312 313 override void visit(Reference reference) { 314 formattedWrite(app, "%s", reference.name); 315 outputModifier(reference.mod); 316 } 317 318 override void visit(Combinator comb) { 319 formattedWrite(app, "%s(", comb.combinator); 320 foreach(i, arg; comb.args) { 321 if(i != 0) formattedWrite(app, ","); 322 arg.accept(this); 323 } 324 formattedWrite(app, ")"); 325 } 326 } 327 328 public enum PegOption: uint { 329 none = 0, 330 skipWhite = 1 331 }; 332 333 public string grammar(S=string)(string peg, PegOption options=PegOption.skipWhite) { 334 Grammar g; 335 try { 336 g = peg.parse(pegParser); 337 } 338 catch(ParseFailure!(SimpleStream!string) failure) { 339 throw new Exception("PEG grammar error " 340 ~ peg[0..failure.err.location] ~ " --HERE--> " 341 ~ peg[failure.err.location..$]); 342 } 343 auto generator = new CodeGen(S.stringof, (options & PegOption.skipWhite) != 0); 344 generator.visit(g); 345 return generator.app.data(); 346 } 347 348 unittest { 349 enum string expr = ` 350 calc: 351 expr : int <- 352 (term '+' expr) { return it[0] + it[2]; } 353 / (term '-' expr) { return it[0] - it[2]; } 354 / term ; 355 term : int <- 356 (primary '*' term) { return it[0] * it[2]; } 357 / (primary '/' term) { return it[0] / it[2]; } 358 / primary ; 359 primary <- 360 [0-9]+ { return to!int(it); } 361 / :'(' expr :')'; 362 `; 363 import std.stdio, std.conv, pry; 364 writeln(grammar(expr, PegOption.skipWhite)); 365 mixin(grammar(expr, PegOption.skipWhite)); 366 assert(" ( 2 + 4) * 2".parse(calc) == 12); 367 enum string values = ` 368 object: 369 obj <- ((identifier :':' value) $aa(:',' identifier :':' value) ) { 370 auto aa = it[1]; 371 aa[it[0][0]] = it[0][1]; 372 return aa; 373 }; 374 identifier <- [a-zA-Z_][a-zA-Z0-9_]* ; 375 value <- [0-9]+ { return it.to!int; } ; 376 `; 377 writeln(grammar(values)); 378 mixin(grammar(values)); 379 assert("abc:90, def:13".parse(object)["abc"] == 90); 380 } 381