1 module pry.grammar.parser;
2 
3 import pry;
4 import pry.grammar.ast, pry.grammar.printer;
5 import std.conv, std.exception, std.uni;
6 
7 alias Stream = SimpleStream!string;
8 
9 auto modifier() {
10 	with(parsers!Stream) {
11 		auto digits = range!('0', '9').rep.skipWs.map!(x => to!int(x));
12 		return any(
13 			tk!'*'.map!(x => Modifier(0, uint.max)),
14 			tk!'+'.map!(x => Modifier(1, uint.max)),
15 			tk!'?'.map!(x => Modifier(0, 1)),
16 			seq(tk!'{', digits, seq(stk!',', digits).optional, stk!'}')
17 				.map!(x => Modifier(x[1], x[2].isNull ? x[1] : x[2][1]))
18 		);
19 	}
20 }
21 
22 unittest {
23 	auto m = modifier();
24 	assert("{ 2, 4 }".parse(m) == Modifier(2, 4));
25 	assert("{ 1 }".parse(m) == Modifier(1, 1));
26 	assert("*".parse(m) == Modifier(0, uint.max));
27 	assert("+".parse(m) == Modifier(1, uint.max));
28 	assert("?".parse(m) == Modifier(0, 1));
29 }
30 
31 struct CharClassParser {
32 	bool parse(ref Stream stream, ref CodepointSet set, ref Stream.Error err) const {
33 		auto m = stream.mark();
34 		try {
35 			set = unicode.parseSet(stream);
36 			return true;
37 		}
38 		catch(Exception e){
39 			stream.restore(m);
40 			err.reason = e.msg;
41 			err.location = stream.location;
42 			return false;
43 		}
44 	}
45 }
46 
47 auto charClass(){
48 	with(parsers!Stream) {
49 		return CharClassParser().map!(x => cast(Ast)new CharClass(x));
50 	}
51 }
52 
53 unittest {
54 	auto cs = charClass();
55 	assert((cast(CharClass)"[0-9]".parse(cs)).set == CodepointSet('0', '9'+1));
56 	auto s = "[0-9".stream;
57 	Stream.Error err;
58 	Ast set;
59 	assert(!cs.parse(s, set, err));
60 	assert(s.front == '[');
61 }
62 
63 auto literalAtom(){
64 	enum notQuote = CodepointSet('\'', '\''+1, '\\', '\\'+1).inverted;
65 	enum hex = CodepointSet('0', '9'+1, 'a', 'f'+1, 'A', 'F'+1);
66 	with(parsers!Stream) {
67 		auto p = seq(
68 			tk!'\'',
69 			any(
70 				set!notQuote, 
71 				seq(tk!'\\', any(
72 					tk!'"',
73 					tk!'\'',
74 					tk!'\\',
75 					tk!'/',
76 					tk!'b'.map!(_ => cast(dchar)'\b'),
77 					tk!'f'.map!(_ => cast(dchar)'\f'),
78 					tk!'n'.map!(_ => cast(dchar)'\n'),
79 					tk!'r'.map!(_ => cast(dchar)'\r'),
80 					tk!'t'.map!(_ => cast(dchar)'\t'),
81 					seq(tk!'x', set!hex.rep!(2,2)).map!(x => cast(dchar)to!int(x[1], 16)),
82 					seq(tk!'u', set!hex.rep!(4,4)).map!(x => cast(dchar)to!int(x[1], 16))
83 				)).map!(x => x[1])
84 			).utfString!(char, 0),
85 			tk!'\''
86 		).map!(x => cast(Ast)new Literal(x[1]));
87 		return p;
88 	}
89 }
90 
91 unittest {
92 	assert((cast(Literal)`'abc\''`.parse(literalAtom)).lit == `abc'`);
93 	assert((cast(Literal)`'\u2340\x90'`.parse(literalAtom)).lit == "\u2340\u0090");
94 }
95 
96 auto identifier(){
97 	enum start = CodepointSet('a', 'z'+1, 'A', 'Z'+1, '_', '_'+1);
98 	enum end = CodepointSet('a', 'z'+1, 'A', 'Z'+1, '0', '9'+1, '_', '_'+1);
99 	with(parsers!Stream) {
100 		return seq(set!start, set!end.rep!0).slice;
101 	}
102 }
103 
104 unittest {
105 	assert("a".parse(identifier) == "a");
106 	assert("_90".parse(identifier) == "_90");
107 }
108 
109 struct Balanced {
110 	bool parse(ref Stream s, ref string code, ref Stream.Error err) const {
111 		if(s.empty) {
112 			err.location = s.location;
113 			err.reason = "unexpected end of input";
114 			return false;
115 		}
116 		if(s.front != '{') {
117 			err.location = s.location;
118 			err.reason = "expected '{'";
119 			return false;
120 		}
121 		auto m = s.mark();
122 		s.popFront();
123 		int count = 1;
124 		while(!s.empty){
125 			//TODO: ignore { and } in D string literals
126 			auto c = s.front;
127 			if(c == '{') count++;
128 			if(c == '}') count--;
129 			s.popFront();
130 			if(count == 0) break;
131 		}
132 		if(count != 0) {
133 			s.restore(m);
134 			err.location = s.location;
135 			err.reason = "unbalanced parens";
136 			return false;
137 		}
138 		code = s.slice(m);
139 		return true;
140 	}
141 }
142 
143 auto balanced(){
144 	return Balanced();
145 }
146 
147 auto pegParser() {
148 	with(parsers!Stream) {
149 		auto alternative = dynamic!Ast;
150 		auto simpleAtom = any(
151 			charClass,
152 			literalAtom
153 		);
154 		auto atomBase = seq(
155 			stk!':'.optional, 
156 			any(
157 				seq(simpleAtom, modifier.optional).map!((x){
158 					auto ast = x[0];
159 					if(!x[1].isNull) ast.mod = x[1];
160 					return ast; 
161 				}).skipWs.array.map!(x => cast(Ast)new SimpleSequence(x)),
162 				seq(identifier, modifier.optional).map!((x){
163 					auto ast = cast(Ast)new Reference(x[0]);
164 					if(!x[1].isNull) ast.mod = x[1];
165 					return ast;
166 				}).skipWs,
167 				seq(tk!'(', alternative, stk!')', modifier.optional).map!((x){
168 					auto ast = x[1];
169 					if(!x[3].isNull) ast.mod = x[3];
170 					return ast;
171 				}).skipWs,
172 				seq(tk!'$', identifier, stk!'(', 
173 					delimited(alternative, stk!','), stk!')' 
174 				).map!(x => cast(Ast) new Combinator(x[1], x[3])).skipWs
175 			)
176 		).map!((x){ if(!x[0].isNull) x[1].ignored = true; return x[1]; });
177 		auto mappedAtom = seq(
178 			atomBase, balanced.skipWs.optional
179 		).map!((x){ return x[1].isNull ? x[0] : cast(Ast)new Map(x[0], x[1]); });
180 		auto atom = any(
181 			seq(tk!'!', mappedAtom).map!(x => cast(Ast)new NegativeLookahead(x[1])),
182 			seq(tk!'&', mappedAtom).map!(x => cast(Ast)new PositiveLookahead(x[1])),
183 			mappedAtom
184 		);
185 		auto sequence = atom.array.map!(x => new Sequence(x));
186 		alternative = delimited(sequence, stk!'/').skipWs
187 			.map!(x => cast(Ast)new Alternative(x));
188 		auto definitions = seq(
189 			identifier.skipWs, seq(stk!':', identifier.skipWs).optional,
190 			literal!"<-".skipWs, alternative, stk!';'
191 		).map!(x => new Definition(x[0], x[1].isNull ? "" : x[1][1], x[3]))
192 			.array.skipWs;
193 		auto grammar = seq(identifier.skipWs, stk!':', definitions, eof.skipWs)
194 			.map!(x => new Grammar(x[0], x[2]));
195 		return grammar;
196 	}
197 }
198 
199 unittest {
200 	import std.stdio;
201 	string s = `
202 	Test:
203 		abc : Type <- [0-9]+ :'a' / 'b' abc { return it; };
204 		def <- :( '456' abc ){2} '90' !([a-c][d-f])[a-z]+ ;
205 		j <- $comb([a-z]+ :':' [0-9]+, 'abc');
206 	`;
207 	try {
208 		prettyPrint(s.parse(pegParser));
209 	}
210 	catch(ParseFailure!Stream ex){
211 		writeln(ex.err.reason, ":", s[ex.err.location .. $]);
212 	}
213 }