1 module pry.grammar.parser; 2 3 import pry; 4 import pry.grammar.ast, pry.grammar.printer; 5 import std.conv, std.exception, std.uni; 6 7 alias Stream = SimpleStream!string; 8 9 auto modifier() { 10 with(parsers!Stream) { 11 auto digits = range!('0', '9').rep.skipWs.map!(x => to!int(x)); 12 return any( 13 tk!'*'.map!(x => Modifier(0, uint.max)), 14 tk!'+'.map!(x => Modifier(1, uint.max)), 15 tk!'?'.map!(x => Modifier(0, 1)), 16 seq(tk!'{', digits, seq(stk!',', digits).optional, stk!'}') 17 .map!(x => Modifier(x[1], x[2].isNull ? x[1] : x[2][1])) 18 ); 19 } 20 } 21 22 unittest { 23 auto m = modifier(); 24 assert("{ 2, 4 }".parse(m) == Modifier(2, 4)); 25 assert("{ 1 }".parse(m) == Modifier(1, 1)); 26 assert("*".parse(m) == Modifier(0, uint.max)); 27 assert("+".parse(m) == Modifier(1, uint.max)); 28 assert("?".parse(m) == Modifier(0, 1)); 29 } 30 31 struct CharClassParser { 32 bool parse(ref Stream stream, ref CodepointSet set, ref Stream.Error err) const { 33 auto m = stream.mark(); 34 try { 35 set = unicode.parseSet(stream); 36 return true; 37 } 38 catch(Exception e){ 39 stream.restore(m); 40 err.reason = e.msg; 41 err.location = stream.location; 42 return false; 43 } 44 } 45 } 46 47 auto charClass(){ 48 with(parsers!Stream) { 49 return CharClassParser().map!(x => cast(Ast)new CharClass(x)); 50 } 51 } 52 53 unittest { 54 auto cs = charClass(); 55 assert((cast(CharClass)"[0-9]".parse(cs)).set == CodepointSet('0', '9'+1)); 56 auto s = "[0-9".stream; 57 Stream.Error err; 58 Ast set; 59 assert(!cs.parse(s, set, err)); 60 assert(s.front == '['); 61 } 62 63 auto literalAtom(){ 64 enum notQuote = CodepointSet('\'', '\''+1, '\\', '\\'+1).inverted; 65 enum hex = CodepointSet('0', '9'+1, 'a', 'f'+1, 'A', 'F'+1); 66 with(parsers!Stream) { 67 auto p = seq( 68 tk!'\'', 69 any( 70 set!notQuote, 71 seq(tk!'\\', any( 72 tk!'"', 73 tk!'\'', 74 tk!'\\', 75 tk!'/', 76 tk!'b'.map!(_ => cast(dchar)'\b'), 77 tk!'f'.map!(_ => cast(dchar)'\f'), 78 tk!'n'.map!(_ => cast(dchar)'\n'), 79 tk!'r'.map!(_ => cast(dchar)'\r'), 80 tk!'t'.map!(_ => cast(dchar)'\t'), 81 seq(tk!'x', set!hex.rep!(2,2)).map!(x => cast(dchar)to!int(x[1], 16)), 82 seq(tk!'u', set!hex.rep!(4,4)).map!(x => cast(dchar)to!int(x[1], 16)) 83 )).map!(x => x[1]) 84 ).utfString!(char, 0), 85 tk!'\'' 86 ).map!(x => cast(Ast)new Literal(x[1])); 87 return p; 88 } 89 } 90 91 unittest { 92 assert((cast(Literal)`'abc\''`.parse(literalAtom)).lit == `abc'`); 93 assert((cast(Literal)`'\u2340\x90'`.parse(literalAtom)).lit == "\u2340\u0090"); 94 } 95 96 auto identifier(){ 97 enum start = CodepointSet('a', 'z'+1, 'A', 'Z'+1, '_', '_'+1); 98 enum end = CodepointSet('a', 'z'+1, 'A', 'Z'+1, '0', '9'+1, '_', '_'+1); 99 with(parsers!Stream) { 100 return seq(set!start, set!end.rep!0).slice; 101 } 102 } 103 104 unittest { 105 assert("a".parse(identifier) == "a"); 106 assert("_90".parse(identifier) == "_90"); 107 } 108 109 struct Balanced { 110 bool parse(ref Stream s, ref string code, ref Stream.Error err) const { 111 if(s.empty) { 112 err.location = s.location; 113 err.reason = "unexpected end of input"; 114 return false; 115 } 116 if(s.front != '{') { 117 err.location = s.location; 118 err.reason = "expected '{'"; 119 return false; 120 } 121 auto m = s.mark(); 122 s.popFront(); 123 int count = 1; 124 while(!s.empty){ 125 //TODO: ignore { and } in D string literals 126 auto c = s.front; 127 if(c == '{') count++; 128 if(c == '}') count--; 129 s.popFront(); 130 if(count == 0) break; 131 } 132 if(count != 0) { 133 s.restore(m); 134 err.location = s.location; 135 err.reason = "unbalanced parens"; 136 return false; 137 } 138 code = s.slice(m); 139 return true; 140 } 141 } 142 143 auto balanced(){ 144 return Balanced(); 145 } 146 147 auto pegParser() { 148 with(parsers!Stream) { 149 auto alternative = dynamic!Ast; 150 auto simpleAtom = any( 151 charClass, 152 literalAtom 153 ); 154 auto atomBase = seq( 155 stk!':'.optional, 156 any( 157 seq(simpleAtom, modifier.optional).map!((x){ 158 auto ast = x[0]; 159 if(!x[1].isNull) ast.mod = x[1]; 160 return ast; 161 }).skipWs.array.map!(x => cast(Ast)new SimpleSequence(x)), 162 seq(identifier, modifier.optional).map!((x){ 163 auto ast = cast(Ast)new Reference(x[0]); 164 if(!x[1].isNull) ast.mod = x[1]; 165 return ast; 166 }).skipWs, 167 seq(tk!'(', alternative, stk!')', modifier.optional).map!((x){ 168 auto ast = x[1]; 169 if(!x[3].isNull) ast.mod = x[3]; 170 return ast; 171 }).skipWs, 172 seq(tk!'$', identifier, stk!'(', 173 delimited(alternative, stk!','), stk!')' 174 ).map!(x => cast(Ast) new Combinator(x[1], x[3])).skipWs 175 ) 176 ).map!((x){ if(!x[0].isNull) x[1].ignored = true; return x[1]; }); 177 auto mappedAtom = seq( 178 atomBase, balanced.skipWs.optional 179 ).map!((x){ return x[1].isNull ? x[0] : cast(Ast)new Map(x[0], x[1]); }); 180 auto atom = any( 181 seq(tk!'!', mappedAtom).map!(x => cast(Ast)new NegativeLookahead(x[1])), 182 seq(tk!'&', mappedAtom).map!(x => cast(Ast)new PositiveLookahead(x[1])), 183 mappedAtom 184 ); 185 auto sequence = atom.array.map!(x => new Sequence(x)); 186 alternative = delimited(sequence, stk!'/').skipWs 187 .map!(x => cast(Ast)new Alternative(x)); 188 auto definitions = seq( 189 identifier.skipWs, seq(stk!':', identifier.skipWs).optional, 190 literal!"<-".skipWs, alternative, stk!';' 191 ).map!(x => new Definition(x[0], x[1].isNull ? "" : x[1][1], x[3])) 192 .array.skipWs; 193 auto grammar = seq(identifier.skipWs, stk!':', definitions, eof.skipWs) 194 .map!(x => new Grammar(x[0], x[2])); 195 return grammar; 196 } 197 } 198 199 unittest { 200 import std.stdio; 201 string s = ` 202 Test: 203 abc : Type <- [0-9]+ :'a' / 'b' abc { return it; }; 204 def <- :( '456' abc ){2} '90' !([a-c][d-f])[a-z]+ ; 205 j <- $comb([a-z]+ :':' [0-9]+, 'abc'); 206 `; 207 try { 208 prettyPrint(s.parse(pegParser)); 209 } 210 catch(ParseFailure!Stream ex){ 211 writeln(ex.err.reason, ":", s[ex.err.location .. $]); 212 } 213 }