1 // SDLang-D 2 // Written in the D programming language. 3 4 module gfx.decl.sdlang.parser; 5 6 import std.file; 7 8 import gfx.decl.sdlang.libInputVisitor; 9 import gfx.decl.sdlang.taggedalgebraic; 10 11 import gfx.decl.sdlang.ast; 12 import gfx.decl.sdlang.exception; 13 import gfx.decl.sdlang.lexer; 14 import gfx.decl.sdlang.symbol; 15 import gfx.decl.sdlang.token; 16 import gfx.decl.sdlang.util; 17 18 /// Returns root tag. 19 Tag parseFile(string filename) 20 { 21 auto source = cast(string)read(filename); 22 return parseSource(source, filename); 23 } 24 25 /// Returns root tag. The optional `filename` parameter can be included 26 /// so that the SDLang document's filename (if any) can be displayed with 27 /// any syntax error messages. 28 Tag parseSource(string source, string filename=null) 29 { 30 auto lexer = new Lexer(source, filename); 31 auto parser = DOMParser(lexer); 32 return parser.parseRoot(); 33 } 34 35 /++ 36 Parses an SDL document using StAX/Pull-style. Returns an InputRange with 37 element type ParserEvent. 38 39 The pullParseFile version reads a file and parses it, while pullParseSource 40 parses a string passed in. The optional `filename` parameter in pullParseSource 41 can be included so that the SDLang document's filename (if any) can be displayed 42 with any syntax error messages. 43 44 Note: The old FileStartEvent and FileEndEvent events 45 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) 46 and removed as of SDLang-D v0.10.0. 47 48 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a 49 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). 50 As of SDLang-D v0.10.0, it is now a 51 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), 52 so usage has changed somewhat. 53 54 Example: 55 ------------------ 56 parent 12 attr="q" { 57 childA 34 58 childB 56 59 } 60 lastTag 61 ------------------ 62 63 The ParserEvent sequence emitted for that SDL document would be as 64 follows (indented for readability): 65 ------------------ 66 TagStartEvent (parent) 67 ValueEvent (12) 68 AttributeEvent (attr, "q") 69 TagStartEvent (childA) 70 ValueEvent (34) 71 TagEndEvent 72 TagStartEvent (childB) 73 ValueEvent (56) 74 TagEndEvent 75 TagEndEvent 76 TagStartEvent (lastTag) 77 TagEndEvent 78 ------------------ 79 +/ 80 auto pullParseFile(string filename) 81 { 82 auto source = cast(string)read(filename); 83 return parseSource(source, filename); 84 } 85 86 ///ditto 87 auto pullParseSource(string source, string filename=null) 88 { 89 auto lexer = new Lexer(source, filename); 90 auto parser = PullParser(lexer); 91 return inputVisitor!ParserEvent( parser ); 92 } 93 94 /// 95 @("pullParseFile/pullParseSource example") 96 unittest 97 { 98 // stuff.sdl 99 immutable stuffSdl = ` 100 name "sdlang-d" 101 description "An SDL (Simple Declarative Language) library for D." 102 homepage "http://github.com/Abscissa/SDLang-D" 103 104 configuration "library" { 105 targetType "library" 106 } 107 `; 108 109 import std.stdio; 110 111 foreach(event; pullParseSource(stuffSdl)) 112 final switch(event.kind) 113 { 114 case ParserEvent.Kind.tagStart: 115 auto e = cast(TagStartEvent) event; 116 writeln("TagStartEvent: ", e.namespace, ":", e.name, " @ ", e.location); 117 break; 118 119 case ParserEvent.Kind.tagEnd: 120 auto e = cast(TagEndEvent) event; 121 writeln("TagEndEvent"); 122 break; 123 124 case ParserEvent.Kind.value: 125 auto e = cast(ValueEvent) event; 126 writeln("ValueEvent: ", e.value); 127 break; 128 129 case ParserEvent.Kind.attribute: 130 auto e = cast(AttributeEvent) event; 131 writeln("AttributeEvent: ", e.namespace, ":", e.name, "=", e.value); 132 break; 133 } 134 } 135 136 private union ParserEventUnion 137 { 138 TagStartEvent tagStart; 139 TagEndEvent tagEnd; 140 ValueEvent value; 141 AttributeEvent attribute; 142 } 143 144 /++ 145 The element of the InputRange returned by pullParseFile and pullParseSource. 146 147 This is a tagged union, built from the following: 148 ------- 149 alias ParserEvent = TaggedAlgebraic!ParserEventUnion; 150 private union ParserEventUnion 151 { 152 TagStartEvent tagStart; 153 TagEndEvent tagEnd; 154 ValueEvent value; 155 AttributeEvent attribute; 156 } 157 ------- 158 159 Note: The old FileStartEvent and FileEndEvent events 160 $(LINK2 https://github.com/Abscissa/SDLang-D/issues/17, were deemed unnessecary) 161 and removed as of SDLang-D v0.10.0. 162 163 Note: Previously, in SDLang-D v0.9.x, ParserEvent was a 164 $(LINK2 http://dlang.org/phobos/std_variant.html#.Algebraic, std.variant.Algebraic). 165 As of SDLang-D v0.10.0, it is now a 166 $(LINK2 https://github.com/s-ludwig/taggedalgebraic, TaggedAlgebraic), 167 so usage has changed somewhat. 168 +/ 169 alias ParserEvent = TaggedAlgebraic!ParserEventUnion; 170 171 /// 172 @("ParserEvent example") 173 unittest 174 { 175 // Create 176 ParserEvent event1 = TagStartEvent(); 177 ParserEvent event2 = TagEndEvent(); 178 ParserEvent event3 = ValueEvent(); 179 ParserEvent event4 = AttributeEvent(); 180 181 // Check type 182 assert(event1.kind == ParserEvent.Kind.tagStart); 183 assert(event2.kind == ParserEvent.Kind.tagEnd); 184 assert(event3.kind == ParserEvent.Kind.value); 185 assert(event4.kind == ParserEvent.Kind.attribute); 186 187 // Cast to base type 188 auto e1 = cast(TagStartEvent) event1; 189 auto e2 = cast(TagEndEvent) event2; 190 auto e3 = cast(ValueEvent) event3; 191 auto e4 = cast(AttributeEvent) event4; 192 //auto noGood = cast(AttributeEvent) event1; // AssertError: event1 is a TagStartEvent, not AttributeEvent. 193 194 // Use as base type. 195 // In many cases, no casting is even needed. 196 event1.name = "foo"; 197 //auto noGood = event3.name; // AssertError: ValueEvent doesn't have a member 'name'. 198 199 // Final switch is supported: 200 final switch(event1.kind) 201 { 202 case ParserEvent.Kind.tagStart: break; 203 case ParserEvent.Kind.tagEnd: break; 204 case ParserEvent.Kind.value: break; 205 case ParserEvent.Kind.attribute: break; 206 } 207 } 208 209 /// Event: Start of tag 210 struct TagStartEvent 211 { 212 Location location; 213 string namespace; 214 string name; 215 } 216 217 /// Event: End of tag 218 struct TagEndEvent 219 { 220 //Location location; 221 } 222 223 /// Event: Found a Value in the current tag 224 struct ValueEvent 225 { 226 Location location; 227 Value value; 228 } 229 230 /// Event: Found an Attribute in the current tag 231 struct AttributeEvent 232 { 233 Location location; 234 string namespace; 235 string name; 236 Value value; 237 } 238 239 // The actual pull parser 240 private struct PullParser 241 { 242 private Lexer lexer; 243 244 private struct IDFull 245 { 246 string namespace; 247 string name; 248 } 249 250 private void error(string msg) 251 { 252 error(lexer.front.location, msg); 253 } 254 255 private void error(Location loc, string msg) 256 { 257 throw new ParseException(loc, "Error: "~msg); 258 } 259 260 private InputVisitor!(PullParser, ParserEvent) v; 261 262 void visit(InputVisitor!(PullParser, ParserEvent) v) 263 { 264 this.v = v; 265 parseRoot(); 266 } 267 268 private void emit(Event)(Event event) 269 { 270 v.yield( ParserEvent(event) ); 271 } 272 273 /// <Root> ::= <Tags> EOF (Lookaheads: Anything) 274 private void parseRoot() 275 { 276 //trace("Starting parse of file: ", lexer.filename); 277 //trace(__FUNCTION__, ": <Root> ::= <Tags> EOF (Lookaheads: Anything)"); 278 279 auto startLocation = Location(lexer.filename, 0, 0, 0); 280 281 parseTags(); 282 283 auto token = lexer.front; 284 if(token.matches!":"()) 285 { 286 lexer.popFront(); 287 token = lexer.front; 288 if(token.matches!"Ident"()) 289 { 290 error("Missing namespace. If you don't wish to use a namespace, then say '"~token.data~"', not ':"~token.data~"'"); 291 assert(0); 292 } 293 else 294 { 295 error("Missing namespace. If you don't wish to use a namespace, then omit the ':'"); 296 assert(0); 297 } 298 } 299 else if(!token.matches!"EOF"()) 300 error("Expected a tag or end-of-file, not " ~ token.symbol.name); 301 } 302 303 /// <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value) 304 /// | EOL <Tags> (Lookaheads: EOL) 305 /// | {empty} (Lookaheads: Anything else, except '{') 306 void parseTags() 307 { 308 //trace("Enter ", __FUNCTION__); 309 while(true) 310 { 311 auto token = lexer.front; 312 if(token.matches!"Ident"() || token.matches!"Value"()) 313 { 314 //trace(__FUNCTION__, ": <Tags> ::= <Tag> <Tags> (Lookaheads: Ident Value)"); 315 parseTag(); 316 continue; 317 } 318 else if(token.matches!"EOL"()) 319 { 320 //trace(__FUNCTION__, ": <Tags> ::= EOL <Tags> (Lookaheads: EOL)"); 321 lexer.popFront(); 322 continue; 323 } 324 else if(token.matches!"{"()) 325 { 326 error("Found start of child block, but no tag name. If you intended an anonymous "~ 327 "tag, you must have at least one value before any attributes or child tags."); 328 } 329 else 330 { 331 //trace(__FUNCTION__, ": <Tags> ::= {empty} (Lookaheads: Anything else, except '{')"); 332 break; 333 } 334 } 335 } 336 337 /// <Tag> 338 /// ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident) 339 /// | <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value) 340 void parseTag() 341 { 342 auto token = lexer.front; 343 if(token.matches!"Ident"()) 344 { 345 //trace(__FUNCTION__, ": <Tag> ::= <IDFull> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Ident)"); 346 //trace("Found tag named: ", tag.fullName); 347 auto id = parseIDFull(); 348 emit( TagStartEvent(token.location, id.namespace, id.name) ); 349 } 350 else if(token.matches!"Value"()) 351 { 352 //trace(__FUNCTION__, ": <Tag> ::= <Value> <Values> <Attributes> <OptChild> <TagTerminator> (Lookaheads: Value)"); 353 //trace("Found anonymous tag."); 354 emit( TagStartEvent(token.location, null, null) ); 355 } 356 else 357 error("Expected tag name or value, not " ~ token.symbol.name); 358 359 if(lexer.front.matches!"="()) 360 error("Found attribute, but no tag name. If you intended an anonymous "~ 361 "tag, you must have at least one value before any attributes."); 362 363 parseValues(); 364 parseAttributes(); 365 parseOptChild(); 366 parseTagTerminator(); 367 368 emit( TagEndEvent() ); 369 } 370 371 /// <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident) 372 IDFull parseIDFull() 373 { 374 auto token = lexer.front; 375 if(token.matches!"Ident"()) 376 { 377 //trace(__FUNCTION__, ": <IDFull> ::= Ident <IDSuffix> (Lookaheads: Ident)"); 378 lexer.popFront(); 379 return parseIDSuffix(token.data); 380 } 381 else 382 { 383 error("Expected namespace or identifier, not " ~ token.symbol.name); 384 assert(0); 385 } 386 } 387 388 /// <IDSuffix> 389 /// ::= ':' Ident (Lookaheads: ':') 390 /// ::= {empty} (Lookaheads: Anything else) 391 IDFull parseIDSuffix(string firstIdent) 392 { 393 auto token = lexer.front; 394 if(token.matches!":"()) 395 { 396 //trace(__FUNCTION__, ": <IDSuffix> ::= ':' Ident (Lookaheads: ':')"); 397 lexer.popFront(); 398 token = lexer.front; 399 if(token.matches!"Ident"()) 400 { 401 lexer.popFront(); 402 return IDFull(firstIdent, token.data); 403 } 404 else 405 { 406 error("Expected name, not " ~ token.symbol.name); 407 assert(0); 408 } 409 } 410 else 411 { 412 //trace(__FUNCTION__, ": <IDSuffix> ::= {empty} (Lookaheads: Anything else)"); 413 return IDFull("", firstIdent); 414 } 415 } 416 417 /// <Values> 418 /// ::= Value <Values> (Lookaheads: Value) 419 /// | {empty} (Lookaheads: Anything else) 420 void parseValues() 421 { 422 while(true) 423 { 424 auto token = lexer.front; 425 if(token.matches!"Value"()) 426 { 427 //trace(__FUNCTION__, ": <Values> ::= Value <Values> (Lookaheads: Value)"); 428 parseValue(); 429 continue; 430 } 431 else 432 { 433 //trace(__FUNCTION__, ": <Values> ::= {empty} (Lookaheads: Anything else)"); 434 break; 435 } 436 } 437 } 438 439 /// Handle Value terminals that aren't part of an attribute 440 void parseValue() 441 { 442 auto token = lexer.front; 443 if(token.matches!"Value"()) 444 { 445 //trace(__FUNCTION__, ": (Handle Value terminals that aren't part of an attribute)"); 446 auto value = token.value; 447 //trace("In tag '", parent.fullName, "', found value: ", value); 448 emit( ValueEvent(token.location, value) ); 449 450 lexer.popFront(); 451 } 452 else 453 error("Expected value, not "~token.symbol.name); 454 } 455 456 /// <Attributes> 457 /// ::= <Attribute> <Attributes> (Lookaheads: Ident) 458 /// | {empty} (Lookaheads: Anything else) 459 void parseAttributes() 460 { 461 while(true) 462 { 463 auto token = lexer.front; 464 if(token.matches!"Ident"()) 465 { 466 //trace(__FUNCTION__, ": <Attributes> ::= <Attribute> <Attributes> (Lookaheads: Ident)"); 467 parseAttribute(); 468 continue; 469 } 470 else 471 { 472 //trace(__FUNCTION__, ": <Attributes> ::= {empty} (Lookaheads: Anything else)"); 473 break; 474 } 475 } 476 } 477 478 /// <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident) 479 void parseAttribute() 480 { 481 //trace(__FUNCTION__, ": <Attribute> ::= <IDFull> '=' Value (Lookaheads: Ident)"); 482 auto token = lexer.front; 483 if(!token.matches!"Ident"()) 484 error("Expected attribute name, not "~token.symbol.name); 485 486 auto id = parseIDFull(); 487 488 token = lexer.front; 489 if(!token.matches!"="()) 490 error("Expected '=' after attribute name, not "~token.symbol.name); 491 492 lexer.popFront(); 493 token = lexer.front; 494 if(!token.matches!"Value"()) 495 error("Expected attribute value, not "~token.symbol.name); 496 497 //trace("In tag '", parent.fullName, "', found attribute '", attr.fullName, "'"); 498 emit( AttributeEvent(token.location, id.namespace, id.name, token.value) ); 499 500 lexer.popFront(); 501 } 502 503 /// <OptChild> 504 /// ::= '{' EOL <Tags> '}' (Lookaheads: '{') 505 /// | {empty} (Lookaheads: Anything else) 506 void parseOptChild() 507 { 508 auto token = lexer.front; 509 if(token.matches!"{") 510 { 511 //trace(__FUNCTION__, ": <OptChild> ::= '{' EOL <Tags> '}' (Lookaheads: '{')"); 512 lexer.popFront(); 513 token = lexer.front; 514 if(!token.matches!"EOL"()) 515 error("Expected newline or semicolon after '{', not "~token.symbol.name); 516 517 lexer.popFront(); 518 parseTags(); 519 520 token = lexer.front; 521 if(!token.matches!"}"()) 522 error("Expected '}' after child tags, not "~token.symbol.name); 523 lexer.popFront(); 524 } 525 else 526 { 527 //trace(__FUNCTION__, ": <OptChild> ::= {empty} (Lookaheads: Anything else)"); 528 // Do nothing, no error. 529 } 530 } 531 532 /// <TagTerminator> 533 /// ::= EOL (Lookahead: EOL) 534 /// | {empty} (Lookahead: EOF) 535 void parseTagTerminator() 536 { 537 auto token = lexer.front; 538 if(token.matches!"EOL") 539 { 540 //trace(__FUNCTION__, ": <TagTerminator> ::= EOL (Lookahead: EOL)"); 541 lexer.popFront(); 542 } 543 else if(token.matches!"EOF") 544 { 545 //trace(__FUNCTION__, ": <TagTerminator> ::= {empty} (Lookahead: EOF)"); 546 // Do nothing 547 } 548 else 549 error("Expected end of tag (newline, semicolon or end-of-file), not " ~ token.symbol.name); 550 } 551 } 552 553 private struct DOMParser 554 { 555 Lexer lexer; 556 557 Tag parseRoot() 558 { 559 auto currTag = new Tag(null, null, "root"); 560 currTag.location = Location(lexer.filename, 0, 0, 0); 561 562 auto parser = PullParser(lexer); 563 auto eventRange = inputVisitor!ParserEvent( parser ); 564 565 foreach(event; eventRange) 566 final switch(event.kind) 567 { 568 case ParserEvent.Kind.tagStart: 569 auto newTag = new Tag(currTag, event.namespace, event.name); 570 newTag.location = event.location; 571 572 currTag = newTag; 573 break; 574 575 case ParserEvent.Kind.tagEnd: 576 currTag = currTag.parent; 577 578 if(!currTag) 579 parser.error("Internal Error: Received an extra TagEndEvent"); 580 break; 581 582 case ParserEvent.Kind.value: 583 currTag.add((cast(ValueEvent)event).value); 584 break; 585 586 case ParserEvent.Kind.attribute: 587 auto e = cast(AttributeEvent) event; 588 auto attr = new Attribute(e.namespace, e.name, e.value, e.location); 589 currTag.add(attr); 590 break; 591 } 592 593 return currTag; 594 } 595 } 596 597 // Other parser tests are part of the AST's tests over in the ast module. 598 599 // Regression test, issue #13: https://github.com/Abscissa/SDLang-D/issues/13 600 // "Incorrectly accepts ":tagname" (blank namespace, tagname prefixed with colon)" 601 @("parser: Regression test issue #13") 602 unittest 603 { 604 import std.exception; 605 assertThrown!ParseException(parseSource(`:test`)); 606 assertThrown!ParseException(parseSource(`:4`)); 607 } 608 609 // Regression test, issue #16: https://github.com/Abscissa/SDLang-D/issues/16 610 @("parser: Regression test issue #16") 611 unittest 612 { 613 // Shouldn't crash 614 foreach(event; pullParseSource(`tag "data"`)) 615 { 616 if(event.kind == ParserEvent.Kind.tagStart) 617 auto e = cast(TagStartEvent) event; 618 } 619 } 620 621 // Regression test, issue #31: https://github.com/Abscissa/SDLang-D/issues/31 622 // "Escape sequence results in range violation error" 623 @("parser: Regression test issue #31") 624 unittest 625 { 626 // Shouldn't get a Range violation 627 parseSource(`test "\"foo\""`); 628 }