1 // SDLang-D
2 // Written in the D programming language.
3 
4 module gfx.decl.sdlang.parser;
5 
6 import std.file;
7 
8 import gfx.decl.sdlang.libInputVisitor;
9 import gfx.decl.sdlang.taggedalgebraic;
10 
11 import gfx.decl.sdlang.ast;
12 import gfx.decl.sdlang.exception;
13 import gfx.decl.sdlang.lexer;
14 import gfx.decl.sdlang.symbol;
15 import gfx.decl.sdlang.token;
16 import gfx.decl.sdlang.util;
17 
18 /// Returns root tag.
19 Tag parseFile(string filename)
20 {
21 	auto source = cast(string)read(filename);
22 	return parseSource(source, filename);
23 }
24 
25 /// Returns root tag. The optional `filename` parameter can be included
26 /// so that the SDLang document's filename (if any) can be displayed with
27 /// any syntax error messages.
28 Tag parseSource(string source, string filename=null)
29 {
30 	auto lexer = new Lexer(source, filename);
31 	auto parser = DOMParser(lexer);
32 	return parser.parseRoot();
33 }
34 
35 /++
36 Parses an SDL document using StAX/Pull-style. Returns an InputRange with
37 element type ParserEvent.
38 
39 The pullParseFile version reads a file and parses it, while pullParseSource
40 parses a string passed in. The optional `filename` parameter in pullParseSource
41 can be included so that the SDLang document's filename (if any) can be displayed
42 with any syntax error messages.
43 
44 Note: The old FileStartEvent and FileEndEvent events
45 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary)
46 and removed as of SDLang-D v0.10.0.
47 
48 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a
49 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic).
50 As of SDLang-D v0.10.0, it is now a
51 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic),
52 so usage has changed somewhat.
53 
54 Example:
55 ------------------
56 parent 12 attr="q" {
57 	childA 34
58 	childB 56
59 }
60 lastTag
61 ------------------
62 
63 The ParserEvent sequence emitted for that SDL document would be as
64 follows (indented for readability):
65 ------------------
66 TagStartEvent (parent)
67 	ValueEvent (12)
68 	AttributeEvent (attr, "q")
69 	TagStartEvent (childA)
70 		ValueEvent (34)
71 	TagEndEvent
72 	TagStartEvent (childB)
73 		ValueEvent (56)
74 	TagEndEvent
75 TagEndEvent
76 TagStartEvent (lastTag)
77 TagEndEvent
78 ------------------
79 +/
80 auto pullParseFile(string filename)
81 {
82 	auto source = cast(string)read(filename);
83 	return parseSource(source, filename);
84 }
85 
86 ///ditto
87 auto pullParseSource(string source, string filename=null)
88 {
89 	auto lexer = new Lexer(source, filename);
90 	auto parser = PullParser(lexer);
91 	return inputVisitor!ParserEvent( parser );
92 }
93 
94 ///
95 @("pullParseFile/pullParseSource example")
96 unittest
97 {
98 	// stuff.sdl
99 	immutable stuffSdl = `
100 		name "sdlang-d"
101 		description "An SDL (Simple Declarative Language) library for D."
102 		homepage "http://github.com/Abscissa/SDLang-D"
103 
104 		configuration "library" {
105 			targetType "library"
106 		}
107 	`;
108 
109 	import std.stdio;
110 
111 	foreach(event; pullParseSource(stuffSdl))
112 	final switch(event.kind)
113 	{
114 	case ParserEvent.Kind.tagStart:
115 		auto e = cast(TagStartEvent) event;
116 		writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location);
117 		break;
118 
119 	case ParserEvent.Kind.tagEnd:
120 		auto e = cast(TagEndEvent) event;
121 		writeln("TagEndEvent");
122 		break;
123 
124 	case ParserEvent.Kind.value:
125 		auto e = cast(ValueEvent) event;
126 		writeln("ValueEvent: ", e.value);
127 		break;
128 
129 	case ParserEvent.Kind.attribute:
130 		auto e = cast(AttributeEvent) event;
131 		writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value);
132 		break;
133 	}
134 }
135 
136 private union ParserEventUnion
137 {
138 	TagStartEvent  tagStart;
139 	TagEndEvent    tagEnd;
140 	ValueEvent     value;
141 	AttributeEvent attribute;
142 }
143 
144 /++
145 The element of the InputRange returned by pullParseFile and pullParseSource.
146 
147 This is a tagged union, built from the following:
148 -------
149 alias ParserEvent = TaggedAlgebraic!ParserEventUnion;
150 private union ParserEventUnion
151 {
152 	TagStartEvent  tagStart;
153 	TagEndEvent    tagEnd;
154 	ValueEvent     value;
155 	AttributeEvent attribute;
156 }
157 -------
158 
159 Note: The old FileStartEvent and FileEndEvent events
160 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary)
161 and removed as of SDLang-D v0.10.0.
162 
163 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a
164 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic).
165 As of SDLang-D v0.10.0, it is now a
166 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic),
167 so usage has changed somewhat.
168 +/
169 alias ParserEvent = TaggedAlgebraic!ParserEventUnion;
170 
171 ///
172 @("ParserEvent example")
173 unittest
174 {
175 	// Create
176 	ParserEvent event1 = TagStartEvent();
177 	ParserEvent event2 = TagEndEvent();
178 	ParserEvent event3 = ValueEvent();
179 	ParserEvent event4 = AttributeEvent();
180 
181 	// Check type
182 	assert(event1.kind == ParserEvent.Kind.tagStart);
183 	assert(event2.kind == ParserEvent.Kind.tagEnd);
184 	assert(event3.kind == ParserEvent.Kind.value);
185 	assert(event4.kind == ParserEvent.Kind.attribute);
186 
187 	// Cast to base type
188 	auto e1 = cast(TagStartEvent) event1;
189 	auto e2 = cast(TagEndEvent) event2;
190 	auto e3 = cast(ValueEvent) event3;
191 	auto e4 = cast(AttributeEvent) event4;
192 	//auto noGood = cast(AttributeEvent) event1; // AssertError: event1 is a TagStartEvent, not AttributeEvent.
193 
194 	// Use as base type.
195 	// In many cases, no casting is even needed.
196 	event1.name = "foo";
197 	//auto noGood = event3.name; // AssertError: ValueEvent doesn't have a member 'name'.
198 
199 	// Final switch is supported:
200 	final switch(event1.kind)
201 	{
202 		case ParserEvent.Kind.tagStart:  break;
203 		case ParserEvent.Kind.tagEnd:    break;
204 		case ParserEvent.Kind.value:     break;
205 		case ParserEvent.Kind.attribute: break;
206 	}
207 }
208 
209 /// Event: Start of tag
210 struct TagStartEvent
211 {
212 	Location location;
213 	string namespace;
214 	string name;
215 }
216 
217 /// Event: End of tag
218 struct TagEndEvent
219 {
220 	//Location location;
221 }
222 
223 /// Event: Found a Value in the current tag
224 struct ValueEvent
225 {
226 	Location location;
227 	Value value;
228 }
229 
230 /// Event: Found an Attribute in the current tag
231 struct AttributeEvent
232 {
233 	Location location;
234 	string namespace;
235 	string name;
236 	Value value;
237 }
238 
239 // The actual pull parser
240 private struct PullParser
241 {
242 	private Lexer lexer;
243 
244 	private struct IDFull
245 	{
246 		string namespace;
247 		string name;
248 	}
249 
250 	private void error(string msg)
251 	{
252 		error(lexer.front.location, msg);
253 	}
254 
255 	private void error(Location loc, string msg)
256 	{
257 		throw new ParseException(loc, "Error: "~msg);
258 	}
259 
260 	private InputVisitor!(PullParser, ParserEvent) v;
261 
262 	void visit(InputVisitor!(PullParser, ParserEvent) v)
263 	{
264 		this.v = v;
265 		parseRoot();
266 	}
267 
268 	private void emit(Event)(Event event)
269 	{
270 		v.yield( ParserEvent(event) );
271 	}
272 
273 	/// <Root> ::= <Tags> EOF  (Lookaheads: Anything)
274 	private void parseRoot()
275 	{
276 		//trace("Starting parse of file: ", lexer.filename);
277 		//trace(__FUNCTION__, ": <Root> ::= <Tags> EOF  (Lookaheads: Anything)");
278 
279 		auto startLocation = Location(lexer.filename, 0, 0, 0);
280 
281 		parseTags();
282 
283 		auto token = lexer.front;
284 		if(token.matches!":"())
285 		{
286 			lexer.popFront();
287 			token = lexer.front;
288 			if(token.matches!"Ident"())
289 			{
290 				error("Missing namespace. If you don't wish to use a namespace, then say '"~token.data~"', not ':"~token.data~"'");
291 				assert(0);
292 			}
293 			else
294 			{
295 				error("Missing namespace. If you don't wish to use a namespace, then omit the ':'");
296 				assert(0);
297 			}
298 		}
299 		else if(!token.matches!"EOF"())
300 			error("Expected a tag or end-of-file, not " ~ token.symbol.name);
301 	}
302 
303 	/// <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)
304 	///        |   EOL   <Tags>  (Lookaheads: EOL)
305 	///        |   {empty}       (Lookaheads: Anything else, except '{')
306 	void parseTags()
307 	{
308 		//trace("Enter ", __FUNCTION__);
309 		while(true)
310 		{
311 			auto token = lexer.front;
312 			if(token.matches!"Ident"() || token.matches!"Value"())
313 			{
314 				//trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags>  (Lookaheads: Ident Value)");
315 				parseTag();
316 				continue;
317 			}
318 			else if(token.matches!"EOL"())
319 			{
320 				//trace(__FUNCTION__, ": <Tags> ::= EOL <Tags>  (Lookaheads: EOL)");
321 				lexer.popFront();
322 				continue;
323 			}
324 			else if(token.matches!"{"())
325 			{
326 				error("Found start of child block, but no tag name. If you intended an anonymous "~
327 				"tag, you must have at least one value before any attributes or child tags.");
328 			}
329 			else
330 			{
331 				//trace(__FUNCTION__, ": <Tags> ::= {empty}  (Lookaheads: Anything else, except '{')");
332 				break;
333 			}
334 		}
335 	}
336 
337 	/// <Tag>
338 	///     ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)
339 	///     |   <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)
340 	void parseTag()
341 	{
342 		auto token = lexer.front;
343 		if(token.matches!"Ident"())
344 		{
345 			//trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Ident)");
346 			//trace("Found tag named: ", tag.fullName);
347 			auto id = parseIDFull();
348 			emit( TagStartEvent(token.location, id.namespace, id.name) );
349 		}
350 		else if(token.matches!"Value"())
351 		{
352 			//trace(__FUNCTION__, ": <Tag> ::= <Value>  <Values> <Attributes> <OptChild> <TagTerminator>  (Lookaheads: Value)");
353 			//trace("Found anonymous tag.");
354 			emit( TagStartEvent(token.location, null, null) );
355 		}
356 		else
357 			error("Expected tag name or value, not " ~ token.symbol.name);
358 
359 		if(lexer.front.matches!"="())
360 			error("Found attribute, but no tag name. If you intended an anonymous "~
361 			"tag, you must have at least one value before any attributes.");
362 
363 		parseValues();
364 		parseAttributes();
365 		parseOptChild();
366 		parseTagTerminator();
367 
368 		emit( TagEndEvent() );
369 	}
370 
371 	/// <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)
372 	IDFull parseIDFull()
373 	{
374 		auto token = lexer.front;
375 		if(token.matches!"Ident"())
376 		{
377 			//trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix>  (Lookaheads: Ident)");
378 			lexer.popFront();
379 			return parseIDSuffix(token.data);
380 		}
381 		else
382 		{
383 			error("Expected namespace or identifier, not " ~ token.symbol.name);
384 			assert(0);
385 		}
386 	}
387 
388 	/// <IDSuffix>
389 	///     ::= ':' Ident  (Lookaheads: ':')
390 	///     ::= {empty}    (Lookaheads: Anything else)
391 	IDFull parseIDSuffix(string firstIdent)
392 	{
393 		auto token = lexer.front;
394 		if(token.matches!":"())
395 		{
396 			//trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident  (Lookaheads: ':')");
397 			lexer.popFront();
398 			token = lexer.front;
399 			if(token.matches!"Ident"())
400 			{
401 				lexer.popFront();
402 				return IDFull(firstIdent, token.data);
403 			}
404 			else
405 			{
406 				error("Expected name, not " ~ token.symbol.name);
407 				assert(0);
408 			}
409 		}
410 		else
411 		{
412 			//trace(__FUNCTION__, ": <IDSuffix> ::= {empty}  (Lookaheads: Anything else)");
413 			return IDFull("", firstIdent);
414 		}
415 	}
416 
417 	/// <Values>
418 	///     ::= Value <Values>  (Lookaheads: Value)
419 	///     |   {empty}         (Lookaheads: Anything else)
420 	void parseValues()
421 	{
422 		while(true)
423 		{
424 			auto token = lexer.front;
425 			if(token.matches!"Value"())
426 			{
427 				//trace(__FUNCTION__, ": <Values> ::= Value <Values>  (Lookaheads: Value)");
428 				parseValue();
429 				continue;
430 			}
431 			else
432 			{
433 				//trace(__FUNCTION__, ": <Values> ::= {empty}  (Lookaheads: Anything else)");
434 				break;
435 			}
436 		}
437 	}
438 
439 	/// Handle Value terminals that aren't part of an attribute
440 	void parseValue()
441 	{
442 		auto token = lexer.front;
443 		if(token.matches!"Value"())
444 		{
445 			//trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)");
446 			auto value = token.value;
447 			//trace("In tag '", parent.fullName, "', found value: ", value);
448 			emit( ValueEvent(token.location, value) );
449 
450 			lexer.popFront();
451 		}
452 		else
453 			error("Expected value, not "~token.symbol.name);
454 	}
455 
456 	/// <Attributes>
457 	///     ::= <Attribute> <Attributes>  (Lookaheads: Ident)
458 	///     |   {empty}                   (Lookaheads: Anything else)
459 	void parseAttributes()
460 	{
461 		while(true)
462 		{
463 			auto token = lexer.front;
464 			if(token.matches!"Ident"())
465 			{
466 				//trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes>  (Lookaheads: Ident)");
467 				parseAttribute();
468 				continue;
469 			}
470 			else
471 			{
472 				//trace(__FUNCTION__, ": <Attributes> ::= {empty}  (Lookaheads: Anything else)");
473 				break;
474 			}
475 		}
476 	}
477 
478 	/// <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)
479 	void parseAttribute()
480 	{
481 		//trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value  (Lookaheads: Ident)");
482 		auto token = lexer.front;
483 		if(!token.matches!"Ident"())
484 			error("Expected attribute name, not "~token.symbol.name);
485 
486 		auto id = parseIDFull();
487 
488 		token = lexer.front;
489 		if(!token.matches!"="())
490 			error("Expected '=' after attribute name, not "~token.symbol.name);
491 
492 		lexer.popFront();
493 		token = lexer.front;
494 		if(!token.matches!"Value"())
495 			error("Expected attribute value, not "~token.symbol.name);
496 
497 		//trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'");
498 		emit( AttributeEvent(token.location, id.namespace, id.name, token.value) );
499 
500 		lexer.popFront();
501 	}
502 
503 	/// <OptChild>
504 	///      ::= '{' EOL <Tags> '}'  (Lookaheads: '{')
505 	///      |   {empty}             (Lookaheads: Anything else)
506 	void parseOptChild()
507 	{
508 		auto token = lexer.front;
509 		if(token.matches!"{")
510 		{
511 			//trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}'  (Lookaheads: '{')");
512 			lexer.popFront();
513 			token = lexer.front;
514 			if(!token.matches!"EOL"())
515 				error("Expected newline or semicolon after '{', not "~token.symbol.name);
516 
517 			lexer.popFront();
518 			parseTags();
519 
520 			token = lexer.front;
521 			if(!token.matches!"}"())
522 				error("Expected '}' after child tags, not "~token.symbol.name);
523 			lexer.popFront();
524 		}
525 		else
526 		{
527 			//trace(__FUNCTION__, ": <OptChild> ::= {empty}  (Lookaheads: Anything else)");
528 			// Do nothing, no error.
529 		}
530 	}
531 
532 	/// <TagTerminator>
533 	///     ::= EOL      (Lookahead: EOL)
534 	///     |   {empty}  (Lookahead: EOF)
535 	void parseTagTerminator()
536 	{
537 		auto token = lexer.front;
538 		if(token.matches!"EOL")
539 		{
540 			//trace(__FUNCTION__, ": <TagTerminator> ::= EOL  (Lookahead: EOL)");
541 			lexer.popFront();
542 		}
543 		else if(token.matches!"EOF")
544 		{
545 			//trace(__FUNCTION__, ": <TagTerminator> ::= {empty}  (Lookahead: EOF)");
546 			// Do nothing
547 		}
548 		else
549 			error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name);
550 	}
551 }
552 
553 private struct DOMParser
554 {
555 	Lexer lexer;
556 
557 	Tag parseRoot()
558 	{
559 		auto currTag = new Tag(null, null, "root");
560 		currTag.location = Location(lexer.filename, 0, 0, 0);
561 
562 		auto parser = PullParser(lexer);
563 		auto eventRange = inputVisitor!ParserEvent( parser );
564 
565 		foreach(event; eventRange)
566 		final switch(event.kind)
567 		{
568 		case ParserEvent.Kind.tagStart:
569 			auto newTag = new Tag(currTag, event.namespace, event.name);
570 			newTag.location = event.location;
571 
572 			currTag = newTag;
573 			break;
574 
575 		case ParserEvent.Kind.tagEnd:
576 			currTag = currTag.parent;
577 
578 			if(!currTag)
579 				parser.error("Internal Error: Received an extra TagEndEvent");
580 			break;
581 
582 		case ParserEvent.Kind.value:
583 			currTag.add((cast(ValueEvent)event).value);
584 			break;
585 
586 		case ParserEvent.Kind.attribute:
587 			auto e = cast(AttributeEvent) event;
588 			auto attr = new Attribute(e.namespace, e.name, e.value, e.location);
589 			currTag.add(attr);
590 			break;
591 		}
592 
593 		return currTag;
594 	}
595 }
596 
597 // Other parser tests are part of the AST's tests over in the ast module.
598 
599 // Regression test, issue #13: https://github.com/Abscissa/SDLang-D/issues/13
600 // "Incorrectly accepts ":tagname" (blank namespace, tagname prefixed with colon)"
601 @("parser: Regression test issue #13")
602 unittest
603 {
604 	import std.exception;
605 	assertThrown!ParseException(parseSource(`:test`));
606 	assertThrown!ParseException(parseSource(`:4`));
607 }
608 
609 // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16
610 @("parser: Regression test issue #16")
611 unittest
612 {
613 	// Shouldn't crash
614 	foreach(event; pullParseSource(`tag "data"`))
615 	{
616 		if(event.kind == ParserEvent.Kind.tagStart)
617 			auto e = cast(TagStartEvent) event;
618 	}
619 }
620 
621 // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31
622 // "Escape sequence results in range violation error"
623 @("parser: Regression test issue #31")
624 unittest
625 {
626 	// Shouldn't get a Range violation
627 	parseSource(`test "\"foo\""`);
628 }