1 // SDLang-D 2 // Written in the D programming language. 3 4 module gfx.decl.sdlang.lexer; 5 6 import std.algorithm; 7 import std.array; 8 static import std.ascii; 9 import std.base64; 10 import std.bigint; 11 import std.conv; 12 import std.datetime; 13 import std.file; 14 import std.format; 15 import std.traits; 16 import std.typecons; 17 import std.uni; 18 import std.utf; 19 import std.variant; 20 21 import gfx.decl.sdlang.exception; 22 import gfx.decl.sdlang.symbol; 23 import gfx.decl.sdlang.token; 24 import gfx.decl.sdlang.util; 25 26 alias gfx.decl.sdlang.util.startsWith startsWith; 27 28 Token[] lexFile(string filename) 29 { 30 auto source = cast(string)read(filename); 31 return lexSource(source, filename); 32 } 33 34 Token[] lexSource(string source, string filename=null) 35 { 36 auto lexer = scoped!Lexer(source, filename); 37 38 // Can't use 'std.array.array(Range)' because 'lexer' is scoped 39 // and therefore cannot have its reference copied. 40 Appender!(Token[]) tokens; 41 foreach(tok; lexer) 42 tokens.put(tok); 43 44 return tokens.data; 45 } 46 47 // Kind of a poor-man's yield, but fast. 48 // Only to be used inside Lexer.popFront (and Lexer.this). 49 private template accept(string symbolName) 50 { 51 static assert(symbolName != "Value", "Value symbols must also take a value."); 52 enum accept = acceptImpl!(symbolName, "null"); 53 } 54 private template accept(string symbolName, string value) 55 { 56 static assert(symbolName == "Value", "Only a Value symbol can take a value."); 57 enum accept = acceptImpl!(symbolName, value); 58 } 59 private template accept(string symbolName, string value, string startLocation, string endLocation) 60 { 61 static assert(symbolName == "Value", "Only a Value symbol can take a value."); 62 enum accept = (" 63 { 64 _front = makeToken!"~symbolName.stringof~"; 65 _front.value = "~value~"; 66 _front.location = "~(startLocation==""? "tokenStart" : startLocation)~"; 67 _front.data = source[ 68 "~(startLocation==""? "tokenStart.index" : startLocation)~" 69 .. 70 "~(endLocation==""? "location.index" : endLocation)~" 71 ]; 72 return; 73 } 74 ").replace("\n", ""); 75 } 76 private template acceptImpl(string symbolName, string value) 77 { 78 enum acceptImpl = (" 79 { 80 _front = makeToken!"~symbolName.stringof~"; 81 _front.value = "~value~"; 82 return; 83 } 84 ").replace("\n", ""); 85 } 86 87 class Lexer 88 { 89 string source; 90 string filename; 91 Location location; /// Location of current character in source 92 93 private dchar ch; // Current character 94 private dchar nextCh; // Lookahead character 95 private size_t nextPos; // Position of lookahead character (an index into source) 96 private bool hasNextCh; // If false, then there's no more lookahead, just EOF 97 private size_t posAfterLookahead; // Position after lookahead character (an index into source) 98 99 private Location tokenStart; // The starting location of the token being lexed 100 101 // Length so far of the token being lexed, not including current char 102 private size_t tokenLength; // Length in UTF-8 code units 103 private size_t tokenLength32; // Length in UTF-32 code units 104 105 // Slight kludge: 106 // If a numeric fragment is found after a Date (separated by arbitrary 107 // whitespace), it could be the "hours" part of a DateTime, or it could 108 // be a separate numeric literal that simply follows a plain Date. If the 109 // latter, then the Date must be emitted, but numeric fragment that was 110 // found after it needs to be saved for the the lexer's next iteration. 111 // 112 // It's a slight kludge, and could instead be implemented as a slightly 113 // kludgey parser hack, but it's the only situation where SDLang's lexing 114 // needs to lookahead more than one character, so this is good enough. 115 private struct LookaheadTokenInfo 116 { 117 bool exists = false; 118 string numericFragment = ""; 119 bool isNegative = false; 120 Location tokenStart; 121 } 122 private LookaheadTokenInfo lookaheadTokenInfo; 123 124 this(string source=null, string filename=null) 125 { 126 this.filename = filename; 127 this.source = source; 128 129 _front = Token(symbol!"Error", Location()); 130 lookaheadTokenInfo = LookaheadTokenInfo.init; 131 132 if( source.startsWith( ByteOrderMarks[BOM.UTF8] ) ) 133 { 134 source = source[ ByteOrderMarks[BOM.UTF8].length .. $ ]; 135 this.source = source; 136 } 137 138 foreach(bom; ByteOrderMarks) 139 if( source.startsWith(bom) ) 140 error(Location(filename,0,0,0), "SDL spec only supports UTF-8, not UTF-16 or UTF-32"); 141 142 if(source == "") 143 mixin(accept!"EOF"); 144 145 // Prime everything 146 hasNextCh = true; 147 nextCh = source.decode(posAfterLookahead); 148 advanceChar(ErrorOnEOF.Yes); 149 location = Location(filename, 0, 0, 0); 150 popFront(); 151 } 152 153 @property bool empty() 154 { 155 return _front.symbol == symbol!"EOF"; 156 } 157 158 Token _front; 159 @property Token front() 160 { 161 return _front; 162 } 163 164 @property bool isEOF() 165 { 166 return location.index == source.length && !lookaheadTokenInfo.exists; 167 } 168 169 private void error(string msg) 170 { 171 error(location, msg); 172 } 173 174 //TODO: Take varargs and use output range sink. 175 private void error(Location loc, string msg) 176 { 177 throw new ParseException(loc, "Error: "~msg); 178 } 179 180 private Token makeToken(string symbolName)() 181 { 182 auto tok = Token(symbol!symbolName, tokenStart); 183 tok.data = tokenData; 184 return tok; 185 } 186 187 private @property string tokenData() 188 { 189 return source[ tokenStart.index .. location.index ]; 190 } 191 192 /// Check the lookahead character 193 private bool lookahead(dchar ch) 194 { 195 return hasNextCh && nextCh == ch; 196 } 197 198 private bool lookahead(bool function(dchar) condition) 199 { 200 return hasNextCh && condition(nextCh); 201 } 202 203 private static bool isNewline(dchar ch) 204 { 205 return ch == '\n' || ch == '\r' || ch == lineSep || ch == paraSep; 206 } 207 208 /// Returns the length of the newline sequence, or zero if the current 209 /// character is not a newline 210 /// 211 /// Note that there are only single character sequences and the two 212 /// character sequence `\r\n` as used on Windows. 213 private size_t isAtNewline() 214 { 215 if(ch == '\n' || ch == lineSep || ch == paraSep) return 1; 216 else if(ch == '\r') return lookahead('\n') ? 2 : 1; 217 else return 0; 218 } 219 220 /// Is 'ch' a valid base 64 character? 221 private bool isBase64(dchar ch) 222 { 223 if(ch >= 'A' && ch <= 'Z') 224 return true; 225 226 if(ch >= 'a' && ch <= 'z') 227 return true; 228 229 if(ch >= '0' && ch <= '9') 230 return true; 231 232 return ch == '+' || ch == '/' || ch == '='; 233 } 234 235 /// Is the current character one that's allowed 236 /// immediately *after* an int/float literal? 237 private bool isEndOfNumber() 238 { 239 if(isEOF) 240 return true; 241 242 return !isDigit(ch) && ch != ':' && ch != '_' && !isAlpha(ch); 243 } 244 245 /// Is current character the last one in an ident? 246 private bool isEndOfIdentCached = false; 247 private bool _isEndOfIdent; 248 private bool isEndOfIdent() 249 { 250 if(!isEndOfIdentCached) 251 { 252 if(!hasNextCh) 253 _isEndOfIdent = true; 254 else 255 _isEndOfIdent = !isIdentChar(nextCh); 256 257 isEndOfIdentCached = true; 258 } 259 260 return _isEndOfIdent; 261 } 262 263 /// Is 'ch' a character that's allowed *somewhere* in an identifier? 264 private bool isIdentChar(dchar ch) 265 { 266 if(isAlpha(ch)) 267 return true; 268 269 else if(isNumber(ch)) 270 return true; 271 272 else 273 return 274 ch == '-' || 275 ch == '_' || 276 ch == '.' || 277 ch == '$'; 278 } 279 280 private bool isDigit(dchar ch) 281 { 282 return ch >= '0' && ch <= '9'; 283 } 284 285 private enum KeywordResult 286 { 287 Accept, // Keyword is matched 288 Continue, // Keyword is not matched *yet* 289 Failed, // Keyword doesn't match 290 } 291 private KeywordResult checkKeyword(dstring keyword32) 292 { 293 // Still within length of keyword 294 if(tokenLength32 < keyword32.length) 295 { 296 if(ch == keyword32[tokenLength32]) 297 return KeywordResult.Continue; 298 else 299 return KeywordResult.Failed; 300 } 301 302 // At position after keyword 303 else if(tokenLength32 == keyword32.length) 304 { 305 if(isEOF || !isIdentChar(ch)) 306 { 307 debug assert(tokenData == to!string(keyword32)); 308 return KeywordResult.Accept; 309 } 310 else 311 return KeywordResult.Failed; 312 } 313 314 assert(0, "Fell off end of keyword to check"); 315 } 316 317 enum ErrorOnEOF { No, Yes } 318 319 /// Advance one code point. 320 private void advanceChar(ErrorOnEOF errorOnEOF) 321 { 322 if(auto cnt = isAtNewline()) 323 { 324 if (cnt == 1) 325 location.line++; 326 location.col = 0; 327 } 328 else 329 location.col++; 330 331 location.index = nextPos; 332 333 nextPos = posAfterLookahead; 334 ch = nextCh; 335 336 if(!hasNextCh) 337 { 338 if(errorOnEOF == ErrorOnEOF.Yes) 339 error("Unexpected end of file"); 340 341 return; 342 } 343 344 tokenLength32++; 345 tokenLength = location.index - tokenStart.index; 346 347 if(nextPos == source.length) 348 { 349 nextCh = dchar.init; 350 hasNextCh = false; 351 return; 352 } 353 354 nextCh = source.decode(posAfterLookahead); 355 isEndOfIdentCached = false; 356 } 357 358 /// Advances the specified amount of characters 359 private void advanceChar(size_t count, ErrorOnEOF errorOnEOF) 360 { 361 while(count-- > 0) 362 advanceChar(errorOnEOF); 363 } 364 365 void popFront() 366 { 367 // -- Main Lexer ------------- 368 369 eatWhite(); 370 371 if(isEOF) 372 mixin(accept!"EOF"); 373 374 tokenStart = location; 375 tokenLength = 0; 376 tokenLength32 = 0; 377 isEndOfIdentCached = false; 378 379 if(lookaheadTokenInfo.exists) 380 { 381 tokenStart = lookaheadTokenInfo.tokenStart; 382 383 auto prevLATokenInfo = lookaheadTokenInfo; 384 lookaheadTokenInfo = LookaheadTokenInfo.init; 385 lexNumeric(prevLATokenInfo); 386 return; 387 } 388 389 if(ch == '=') 390 { 391 advanceChar(ErrorOnEOF.No); 392 mixin(accept!"="); 393 } 394 395 else if(ch == '{') 396 { 397 advanceChar(ErrorOnEOF.No); 398 mixin(accept!"{"); 399 } 400 401 else if(ch == '}') 402 { 403 advanceChar(ErrorOnEOF.No); 404 mixin(accept!"}"); 405 } 406 407 else if(ch == ':') 408 { 409 advanceChar(ErrorOnEOF.No); 410 mixin(accept!":"); 411 } 412 413 else if(ch == ';') 414 { 415 advanceChar(ErrorOnEOF.No); 416 mixin(accept!"EOL"); 417 } 418 419 else if(auto cnt = isAtNewline()) 420 { 421 advanceChar(cnt, ErrorOnEOF.No); 422 mixin(accept!"EOL"); 423 } 424 425 else if(isAlpha(ch) || ch == '_') 426 lexIdentKeyword(); 427 428 else if(ch == '"') 429 lexRegularString(); 430 431 else if(ch == '`') 432 lexRawString(); 433 434 else if(ch == '\'') 435 lexCharacter(); 436 437 else if(ch == '[') 438 lexBinary(); 439 440 else if(ch == '-' || ch == '.' || isDigit(ch)) 441 lexNumeric(); 442 443 else 444 { 445 if(ch == ',') 446 error("Unexpected comma: SDLang is not a comma-separated format."); 447 else if(std.ascii.isPrintable(ch)) 448 error(text("Unexpected: ", ch)); 449 else 450 error("Unexpected character code 0x%02X".format(ch)); 451 452 advanceChar(ErrorOnEOF.No); 453 } 454 } 455 456 /// Lex Ident or Keyword 457 private void lexIdentKeyword() 458 { 459 assert(isAlpha(ch) || ch == '_'); 460 461 // Keyword 462 struct Key 463 { 464 dstring name; 465 Value value; 466 bool failed = false; 467 } 468 static Key[5] keywords; 469 static keywordsInited = false; 470 if(!keywordsInited) 471 { 472 // Value (as a std.variant-based type) can't be statically inited 473 keywords[0] = Key("true", Value(true )); 474 keywords[1] = Key("false", Value(false)); 475 keywords[2] = Key("on", Value(true )); 476 keywords[3] = Key("off", Value(false)); 477 keywords[4] = Key("null", Value(null )); 478 keywordsInited = true; 479 } 480 481 foreach(ref key; keywords) 482 key.failed = false; 483 484 auto numKeys = keywords.length; 485 486 do 487 { 488 foreach(ref key; keywords) 489 if(!key.failed) 490 { 491 final switch(checkKeyword(key.name)) 492 { 493 case KeywordResult.Accept: 494 mixin(accept!("Value", "key.value")); 495 496 case KeywordResult.Continue: 497 break; 498 499 case KeywordResult.Failed: 500 key.failed = true; 501 numKeys--; 502 break; 503 } 504 } 505 506 if(numKeys == 0) 507 { 508 lexIdent(); 509 return; 510 } 511 512 advanceChar(ErrorOnEOF.No); 513 514 } while(!isEOF); 515 516 foreach(ref key; keywords) 517 if(!key.failed) 518 if(key.name.length == tokenLength32+1) 519 mixin(accept!("Value", "key.value")); 520 521 mixin(accept!"Ident"); 522 } 523 524 /// Lex Ident 525 private void lexIdent() 526 { 527 if(tokenLength == 0) 528 assert(isAlpha(ch) || ch == '_'); 529 530 while(!isEOF && isIdentChar(ch)) 531 advanceChar(ErrorOnEOF.No); 532 533 mixin(accept!"Ident"); 534 } 535 536 /// Lex regular string 537 private void lexRegularString() 538 { 539 assert(ch == '"'); 540 541 Appender!string buf; 542 size_t spanStart = nextPos; 543 544 // Doesn't include current character 545 void updateBuf() 546 { 547 if(location.index == spanStart) 548 return; 549 550 buf.put( source[spanStart..location.index] ); 551 } 552 553 advanceChar(ErrorOnEOF.Yes); 554 while(ch != '"') 555 { 556 if(ch == '\\') 557 { 558 updateBuf(); 559 560 bool wasEscSequence = true; 561 if(hasNextCh) 562 { 563 switch(nextCh) 564 { 565 case 'n': buf.put('\n'); break; 566 case 'r': buf.put('\r'); break; 567 case 't': buf.put('\t'); break; 568 case '"': buf.put('\"'); break; 569 case '\\': buf.put('\\'); break; 570 default: wasEscSequence = false; break; 571 } 572 } 573 574 if(wasEscSequence) 575 { 576 advanceChar(ErrorOnEOF.Yes); 577 spanStart = nextPos; 578 } 579 else 580 { 581 eatWhite(false); 582 spanStart = location.index; 583 } 584 } 585 586 else if(isNewline(ch)) 587 error("Unescaped newlines are only allowed in raw strings, not regular strings."); 588 589 advanceChar(ErrorOnEOF.Yes); 590 } 591 592 updateBuf(); 593 advanceChar(ErrorOnEOF.No); // Skip closing double-quote 594 mixin(accept!("Value", "buf.data")); 595 } 596 597 /// Lex raw string 598 private void lexRawString() 599 { 600 assert(ch == '`'); 601 602 do 603 advanceChar(ErrorOnEOF.Yes); 604 while(ch != '`'); 605 606 advanceChar(ErrorOnEOF.No); // Skip closing back-tick 607 mixin(accept!("Value", "tokenData[1..$-1]")); 608 } 609 610 /// Lex character literal 611 private void lexCharacter() 612 { 613 assert(ch == '\''); 614 advanceChar(ErrorOnEOF.Yes); // Skip opening single-quote 615 616 dchar value; 617 if(ch == '\\') 618 { 619 advanceChar(ErrorOnEOF.Yes); // Skip escape backslash 620 switch(ch) 621 { 622 case 'n': value = '\n'; break; 623 case 'r': value = '\r'; break; 624 case 't': value = '\t'; break; 625 case '\'': value = '\''; break; 626 case '\\': value = '\\'; break; 627 default: error("Invalid escape sequence."); 628 } 629 } 630 else if(isNewline(ch)) 631 error("Newline not alowed in character literal."); 632 else 633 value = ch; 634 advanceChar(ErrorOnEOF.Yes); // Skip the character itself 635 636 if(ch == '\'') 637 advanceChar(ErrorOnEOF.No); // Skip closing single-quote 638 else 639 error("Expected closing single-quote."); 640 641 mixin(accept!("Value", "value")); 642 } 643 644 /// Lex base64 binary literal 645 private void lexBinary() 646 { 647 assert(ch == '['); 648 advanceChar(ErrorOnEOF.Yes); 649 650 void eatBase64Whitespace() 651 { 652 while(!isEOF && isWhite(ch)) 653 { 654 if(isNewline(ch)) 655 advanceChar(ErrorOnEOF.Yes); 656 657 if(!isEOF && isWhite(ch)) 658 eatWhite(); 659 } 660 } 661 662 eatBase64Whitespace(); 663 664 // Iterates all valid base64 characters, ending at ']'. 665 // Skips all whitespace. Throws on invalid chars. 666 struct Base64InputRange 667 { 668 Lexer lexer; 669 private bool isInited = false; 670 private int numInputCharsMod4 = 0; 671 672 @property bool empty() 673 { 674 if(lexer.ch == ']') 675 { 676 if(numInputCharsMod4 != 0) 677 lexer.error("Length of Base64 encoding must be a multiple of 4. ("~to!string(numInputCharsMod4)~")"); 678 679 return true; 680 } 681 682 return false; 683 } 684 685 @property dchar front() 686 { 687 return lexer.ch; 688 } 689 690 void popFront() 691 { 692 auto lex = lexer; 693 694 if(!isInited) 695 { 696 if(lexer.isBase64(lexer.ch)) 697 { 698 numInputCharsMod4++; 699 numInputCharsMod4 %= 4; 700 } 701 702 isInited = true; 703 } 704 705 lex.advanceChar(lex.ErrorOnEOF.Yes); 706 707 eatBase64Whitespace(); 708 709 if(lex.isEOF) 710 lex.error("Unexpected end of file."); 711 712 if(lex.ch != ']') 713 { 714 if(!lex.isBase64(lex.ch)) 715 lex.error("Invalid character in base64 binary literal."); 716 717 numInputCharsMod4++; 718 numInputCharsMod4 %= 4; 719 } 720 } 721 } 722 723 // This is a slow ugly hack. It's necessary because Base64.decode 724 // currently requires the source to have known length. 725 //TODO: Remove this when DMD issue #9543 is fixed. 726 dchar[] tmpBuf = array(Base64InputRange(this)); 727 728 Appender!(ubyte[]) outputBuf; 729 // Ugly workaround for DMD issue #9102 730 //TODO: Remove this when DMD #9102 is fixed 731 struct OutputBuf 732 { 733 void put(ubyte ch) 734 { 735 outputBuf.put(ch); 736 } 737 } 738 739 try 740 //Base64.decode(Base64InputRange(this), OutputBuf()); 741 Base64.decode(tmpBuf, OutputBuf()); 742 743 catch(Base64Exception e) 744 error("Invalid character in base64 binary literal."); 745 746 advanceChar(ErrorOnEOF.No); // Skip ']' 747 mixin(accept!("Value", "outputBuf.data")); 748 } 749 750 private BigInt toBigInt(bool isNegative, string absValue) 751 { 752 auto num = BigInt(absValue); 753 assert(num >= 0); 754 755 if(isNegative) 756 num = -num; 757 758 return num; 759 } 760 761 /// Lex [0-9]+, but without emitting a token. 762 /// This is used by the other numeric parsing functions. 763 private string lexNumericFragment() 764 { 765 if(!isDigit(ch)) 766 error("Expected a digit 0-9."); 767 768 auto spanStart = location.index; 769 770 do 771 { 772 advanceChar(ErrorOnEOF.No); 773 } while(!isEOF && isDigit(ch)); 774 775 return source[spanStart..location.index]; 776 } 777 778 /// Lex anything that starts with 0-9 or '-'. Ints, floats, dates, etc. 779 private void lexNumeric(LookaheadTokenInfo laTokenInfo = LookaheadTokenInfo.init) 780 { 781 bool isNegative; 782 string firstFragment; 783 if(laTokenInfo.exists) 784 { 785 firstFragment = laTokenInfo.numericFragment; 786 isNegative = laTokenInfo.isNegative; 787 } 788 else 789 { 790 assert(ch == '-' || ch == '.' || isDigit(ch)); 791 792 // Check for negative 793 isNegative = ch == '-'; 794 if(isNegative) 795 advanceChar(ErrorOnEOF.Yes); 796 797 // Some floating point with omitted leading zero? 798 if(ch == '.') 799 { 800 lexFloatingPoint(""); 801 return; 802 } 803 804 firstFragment = lexNumericFragment(); 805 } 806 807 // Long integer (64-bit signed)? 808 if(ch == 'L' || ch == 'l') 809 { 810 advanceChar(ErrorOnEOF.No); 811 812 // BigInt(long.min) is a workaround for DMD issue #9548 813 auto num = toBigInt(isNegative, firstFragment); 814 if(num < BigInt(long.min) || num > long.max) 815 error(tokenStart, "Value doesn't fit in 64-bit signed long integer: "~to!string(num)); 816 817 mixin(accept!("Value", "num.toLong()")); 818 } 819 820 // Float (32-bit signed)? 821 else if(ch == 'F' || ch == 'f') 822 { 823 auto value = to!float(tokenData); 824 advanceChar(ErrorOnEOF.No); 825 mixin(accept!("Value", "value")); 826 } 827 828 // Double float (64-bit signed) with suffix? 829 else if((ch == 'D' || ch == 'd') && !lookahead(':') 830 ) 831 { 832 auto value = to!double(tokenData); 833 advanceChar(ErrorOnEOF.No); 834 mixin(accept!("Value", "value")); 835 } 836 837 // Decimal (128+ bits signed)? 838 else if( 839 (ch == 'B' || ch == 'b') && 840 (lookahead('D') || lookahead('d')) 841 ) 842 { 843 auto value = to!real(tokenData); 844 advanceChar(ErrorOnEOF.No); 845 advanceChar(ErrorOnEOF.No); 846 mixin(accept!("Value", "value")); 847 } 848 849 // Some floating point? 850 else if(ch == '.') 851 lexFloatingPoint(firstFragment); 852 853 // Some date? 854 else if(ch == '/' && hasNextCh && isDigit(nextCh)) 855 lexDate(isNegative, firstFragment); 856 857 // Some time span? 858 else if(ch == ':' || ch == 'd') 859 lexTimeSpan(isNegative, firstFragment); 860 861 // Integer (32-bit signed)? 862 else if(isEndOfNumber()) 863 { 864 auto num = toBigInt(isNegative, firstFragment); 865 if(num < int.min || num > int.max) 866 error(tokenStart, "Value doesn't fit in 32-bit signed integer: "~to!string(num)); 867 868 mixin(accept!("Value", "num.toInt()")); 869 } 870 871 // Invalid suffix 872 else 873 error("Invalid integer suffix."); 874 } 875 876 /// Lex any floating-point literal (after the initial numeric fragment was lexed) 877 private void lexFloatingPoint(string firstPart) 878 { 879 assert(ch == '.'); 880 advanceChar(ErrorOnEOF.No); 881 882 auto secondPart = lexNumericFragment(); 883 884 try 885 { 886 // Double float (64-bit signed) with suffix? 887 if(ch == 'D' || ch == 'd') 888 { 889 auto value = to!double(tokenData); 890 advanceChar(ErrorOnEOF.No); 891 mixin(accept!("Value", "value")); 892 } 893 894 // Float (32-bit signed)? 895 else if(ch == 'F' || ch == 'f') 896 { 897 auto value = to!float(tokenData); 898 advanceChar(ErrorOnEOF.No); 899 mixin(accept!("Value", "value")); 900 } 901 902 // Decimal (128+ bits signed)? 903 else if(ch == 'B' || ch == 'b') 904 { 905 auto value = to!real(tokenData); 906 advanceChar(ErrorOnEOF.Yes); 907 908 if(!isEOF && (ch == 'D' || ch == 'd')) 909 { 910 advanceChar(ErrorOnEOF.No); 911 if(isEndOfNumber()) 912 mixin(accept!("Value", "value")); 913 } 914 915 error("Invalid floating point suffix."); 916 } 917 918 // Double float (64-bit signed) without suffix? 919 else if(isEOF || !isIdentChar(ch)) 920 { 921 auto value = to!double(tokenData); 922 mixin(accept!("Value", "value")); 923 } 924 925 // Invalid suffix 926 else 927 error("Invalid floating point suffix."); 928 } 929 catch(ConvException e) 930 error("Invalid floating point literal."); 931 } 932 933 private Date makeDate(bool isNegative, string yearStr, string monthStr, string dayStr) 934 { 935 BigInt biTmp; 936 937 biTmp = BigInt(yearStr); 938 if(isNegative) 939 biTmp = -biTmp; 940 if(biTmp < int.min || biTmp > int.max) 941 error(tokenStart, "Date's year is out of range. (Must fit within a 32-bit signed int.)"); 942 auto year = biTmp.toInt(); 943 944 biTmp = BigInt(monthStr); 945 if(biTmp < 1 || biTmp > 12) 946 error(tokenStart, "Date's month is out of range."); 947 auto month = biTmp.toInt(); 948 949 biTmp = BigInt(dayStr); 950 if(biTmp < 1 || biTmp > 31) 951 error(tokenStart, "Date's month is out of range."); 952 auto day = biTmp.toInt(); 953 954 return Date(year, month, day); 955 } 956 957 private DateTimeFrac makeDateTimeFrac( 958 bool isNegative, Date date, string hourStr, string minuteStr, 959 string secondStr, string millisecondStr 960 ) 961 { 962 BigInt biTmp; 963 964 biTmp = BigInt(hourStr); 965 if(biTmp < int.min || biTmp > int.max) 966 error(tokenStart, "Datetime's hour is out of range."); 967 auto numHours = biTmp.toInt(); 968 969 biTmp = BigInt(minuteStr); 970 if(biTmp < 0 || biTmp > int.max) 971 error(tokenStart, "Datetime's minute is out of range."); 972 auto numMinutes = biTmp.toInt(); 973 974 int numSeconds = 0; 975 if(secondStr != "") 976 { 977 biTmp = BigInt(secondStr); 978 if(biTmp < 0 || biTmp > int.max) 979 error(tokenStart, "Datetime's second is out of range."); 980 numSeconds = biTmp.toInt(); 981 } 982 983 int millisecond = 0; 984 if(millisecondStr != "") 985 { 986 biTmp = BigInt(millisecondStr); 987 if(biTmp < 0 || biTmp > int.max) 988 error(tokenStart, "Datetime's millisecond is out of range."); 989 millisecond = biTmp.toInt(); 990 991 if(millisecondStr.length == 1) 992 millisecond *= 100; 993 else if(millisecondStr.length == 2) 994 millisecond *= 10; 995 } 996 997 Duration fracSecs = millisecond.msecs; 998 999 auto offset = hours(numHours) + minutes(numMinutes) + seconds(numSeconds); 1000 1001 if(isNegative) 1002 { 1003 offset = -offset; 1004 fracSecs = -fracSecs; 1005 } 1006 1007 return DateTimeFrac(DateTime(date) + offset, fracSecs); 1008 } 1009 1010 private Duration makeDuration( 1011 bool isNegative, string dayStr, 1012 string hourStr, string minuteStr, string secondStr, 1013 string millisecondStr 1014 ) 1015 { 1016 BigInt biTmp; 1017 1018 long day = 0; 1019 if(dayStr != "") 1020 { 1021 biTmp = BigInt(dayStr); 1022 if(biTmp < long.min || biTmp > long.max) 1023 error(tokenStart, "Time span's day is out of range."); 1024 day = biTmp.toLong(); 1025 } 1026 1027 biTmp = BigInt(hourStr); 1028 if(biTmp < long.min || biTmp > long.max) 1029 error(tokenStart, "Time span's hour is out of range."); 1030 auto hour = biTmp.toLong(); 1031 1032 biTmp = BigInt(minuteStr); 1033 if(biTmp < long.min || biTmp > long.max) 1034 error(tokenStart, "Time span's minute is out of range."); 1035 auto minute = biTmp.toLong(); 1036 1037 biTmp = BigInt(secondStr); 1038 if(biTmp < long.min || biTmp > long.max) 1039 error(tokenStart, "Time span's second is out of range."); 1040 auto second = biTmp.toLong(); 1041 1042 long millisecond = 0; 1043 if(millisecondStr != "") 1044 { 1045 biTmp = BigInt(millisecondStr); 1046 if(biTmp < long.min || biTmp > long.max) 1047 error(tokenStart, "Time span's millisecond is out of range."); 1048 millisecond = biTmp.toLong(); 1049 1050 if(millisecondStr.length == 1) 1051 millisecond *= 100; 1052 else if(millisecondStr.length == 2) 1053 millisecond *= 10; 1054 } 1055 1056 auto duration = 1057 dur!"days" (day) + 1058 dur!"hours" (hour) + 1059 dur!"minutes"(minute) + 1060 dur!"seconds"(second) + 1061 dur!"msecs" (millisecond); 1062 1063 if(isNegative) 1064 duration = -duration; 1065 1066 return duration; 1067 } 1068 1069 // This has to reproduce some weird corner case behaviors from the 1070 // original Java version of SDL. So some of this may seem weird. 1071 private Nullable!Duration getTimeZoneOffset(string str) 1072 { 1073 if(str.length < 2) 1074 return Nullable!Duration(); // Unknown timezone 1075 1076 if(str[0] != '+' && str[0] != '-') 1077 return Nullable!Duration(); // Unknown timezone 1078 1079 auto isNegative = str[0] == '-'; 1080 1081 string numHoursStr; 1082 string numMinutesStr; 1083 if(str[1] == ':') 1084 { 1085 numMinutesStr = str[1..$]; 1086 numHoursStr = ""; 1087 } 1088 else 1089 { 1090 numMinutesStr = str.find(':'); 1091 numHoursStr = str[1 .. $-numMinutesStr.length]; 1092 } 1093 1094 long numHours = 0; 1095 long numMinutes = 0; 1096 bool isUnknown = false; 1097 try 1098 { 1099 switch(numHoursStr.length) 1100 { 1101 case 0: 1102 if(numMinutesStr.length == 3) 1103 { 1104 numHours = 0; 1105 numMinutes = to!long(numMinutesStr[1..$]); 1106 } 1107 else 1108 isUnknown = true; 1109 break; 1110 1111 case 1: 1112 case 2: 1113 if(numMinutesStr.length == 0) 1114 { 1115 numHours = to!long(numHoursStr); 1116 numMinutes = 0; 1117 } 1118 else if(numMinutesStr.length == 3) 1119 { 1120 numHours = to!long(numHoursStr); 1121 numMinutes = to!long(numMinutesStr[1..$]); 1122 } 1123 else 1124 isUnknown = true; 1125 break; 1126 1127 default: 1128 if(numMinutesStr.length == 0) 1129 { 1130 // Yes, this is correct 1131 numHours = 0; 1132 numMinutes = to!long(numHoursStr[1..$]); 1133 } 1134 else 1135 isUnknown = true; 1136 break; 1137 } 1138 } 1139 catch(ConvException e) 1140 isUnknown = true; 1141 1142 if(isUnknown) 1143 return Nullable!Duration(); // Unknown timezone 1144 1145 auto timeZoneOffset = hours(numHours) + minutes(numMinutes); 1146 if(isNegative) 1147 timeZoneOffset = -timeZoneOffset; 1148 1149 // Timezone valid 1150 return Nullable!Duration(timeZoneOffset); 1151 } 1152 1153 /// Lex date or datetime (after the initial numeric fragment was lexed) 1154 private void lexDate(bool isDateNegative, string yearStr) 1155 { 1156 assert(ch == '/'); 1157 1158 // Lex months 1159 advanceChar(ErrorOnEOF.Yes); // Skip '/' 1160 auto monthStr = lexNumericFragment(); 1161 1162 // Lex days 1163 if(ch != '/') 1164 error("Invalid date format: Missing days."); 1165 advanceChar(ErrorOnEOF.Yes); // Skip '/' 1166 auto dayStr = lexNumericFragment(); 1167 1168 auto date = makeDate(isDateNegative, yearStr, monthStr, dayStr); 1169 1170 if(!isEndOfNumber() && ch != '/') 1171 error("Dates cannot have suffixes."); 1172 1173 // Date? 1174 if(isEOF) 1175 mixin(accept!("Value", "date")); 1176 1177 auto endOfDate = location; 1178 1179 while( 1180 !isEOF && 1181 ( ch == '\\' || ch == '/' || (isWhite(ch) && !isNewline(ch)) ) 1182 ) 1183 { 1184 if(ch == '\\' && hasNextCh && isNewline(nextCh)) 1185 { 1186 advanceChar(ErrorOnEOF.Yes); 1187 if(isAtNewline()) 1188 advanceChar(ErrorOnEOF.Yes); 1189 advanceChar(ErrorOnEOF.No); 1190 } 1191 1192 eatWhite(); 1193 } 1194 1195 // Date? 1196 if(isEOF || (!isDigit(ch) && ch != '-')) 1197 mixin(accept!("Value", "date", "", "endOfDate.index")); 1198 1199 auto startOfTime = location; 1200 1201 // Is time negative? 1202 bool isTimeNegative = ch == '-'; 1203 if(isTimeNegative) 1204 advanceChar(ErrorOnEOF.Yes); 1205 1206 // Lex hours 1207 auto hourStr = ch == '.'? "" : lexNumericFragment(); 1208 1209 // Lex minutes 1210 if(ch != ':') 1211 { 1212 // No minutes found. Therefore we had a plain Date followed 1213 // by a numeric literal, not a DateTime. 1214 lookaheadTokenInfo.exists = true; 1215 lookaheadTokenInfo.numericFragment = hourStr; 1216 lookaheadTokenInfo.isNegative = isTimeNegative; 1217 lookaheadTokenInfo.tokenStart = startOfTime; 1218 mixin(accept!("Value", "date", "", "endOfDate.index")); 1219 } 1220 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1221 auto minuteStr = lexNumericFragment(); 1222 1223 // Lex seconds, if exists 1224 string secondStr; 1225 if(ch == ':') 1226 { 1227 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1228 secondStr = lexNumericFragment(); 1229 } 1230 1231 // Lex milliseconds, if exists 1232 string millisecondStr; 1233 if(ch == '.') 1234 { 1235 advanceChar(ErrorOnEOF.Yes); // Skip '.' 1236 millisecondStr = lexNumericFragment(); 1237 } 1238 1239 auto dateTimeFrac = makeDateTimeFrac(isTimeNegative, date, hourStr, minuteStr, secondStr, millisecondStr); 1240 1241 // Lex zone, if exists 1242 if(ch == '-') 1243 { 1244 advanceChar(ErrorOnEOF.Yes); // Skip '-' 1245 auto timezoneStart = location; 1246 1247 if(!isAlpha(ch)) 1248 error("Invalid timezone format."); 1249 1250 while(!isEOF && !isWhite(ch)) 1251 advanceChar(ErrorOnEOF.No); 1252 1253 auto timezoneStr = source[timezoneStart.index..location.index]; 1254 if(timezoneStr.startsWith("GMT")) 1255 { 1256 auto isoPart = timezoneStr["GMT".length..$]; 1257 auto offset = getTimeZoneOffset(isoPart); 1258 1259 if(offset.isNull()) 1260 { 1261 // Unknown time zone 1262 mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)")); 1263 } 1264 else 1265 { 1266 auto timezone = new immutable SimpleTimeZone(offset.get()); 1267 mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezone)")); 1268 } 1269 } 1270 1271 try 1272 { 1273 version(Windows) 1274 auto timezone = WindowsTimeZone.getTimeZone(timezoneStr); 1275 else version(Posix) 1276 auto timezone = PosixTimeZone.getTimeZone(timezoneStr); 1277 else 1278 static assert(0); 1279 1280 if(timezone) 1281 mixin(accept!("Value", "SysTime(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezone)")); 1282 } 1283 catch(TimeException e) 1284 { 1285 // Time zone not found. So just move along to "Unknown time zone" below. 1286 } 1287 1288 // Unknown time zone 1289 mixin(accept!("Value", "DateTimeFracUnknownZone(dateTimeFrac.dateTime, dateTimeFrac.fracSecs, timezoneStr)")); 1290 } 1291 1292 if(!isEndOfNumber()) 1293 error("Date-Times cannot have suffixes."); 1294 1295 mixin(accept!("Value", "dateTimeFrac")); 1296 } 1297 1298 /// Lex time span (after the initial numeric fragment was lexed) 1299 private void lexTimeSpan(bool isNegative, string firstPart) 1300 { 1301 assert(ch == ':' || ch == 'd'); 1302 1303 string dayStr = ""; 1304 string hourStr; 1305 1306 // Lexed days? 1307 bool hasDays = ch == 'd'; 1308 if(hasDays) 1309 { 1310 dayStr = firstPart; 1311 advanceChar(ErrorOnEOF.Yes); // Skip 'd' 1312 1313 // Lex hours 1314 if(ch != ':') 1315 error("Invalid time span format: Missing hours."); 1316 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1317 hourStr = lexNumericFragment(); 1318 } 1319 else 1320 hourStr = firstPart; 1321 1322 // Lex minutes 1323 if(ch != ':') 1324 error("Invalid time span format: Missing minutes."); 1325 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1326 auto minuteStr = lexNumericFragment(); 1327 1328 // Lex seconds 1329 if(ch != ':') 1330 error("Invalid time span format: Missing seconds."); 1331 advanceChar(ErrorOnEOF.Yes); // Skip ':' 1332 auto secondStr = lexNumericFragment(); 1333 1334 // Lex milliseconds, if exists 1335 string millisecondStr = ""; 1336 if(ch == '.') 1337 { 1338 advanceChar(ErrorOnEOF.Yes); // Skip '.' 1339 millisecondStr = lexNumericFragment(); 1340 } 1341 1342 if(!isEndOfNumber()) 1343 error("Time spans cannot have suffixes."); 1344 1345 auto duration = makeDuration(isNegative, dayStr, hourStr, minuteStr, secondStr, millisecondStr); 1346 mixin(accept!("Value", "duration")); 1347 } 1348 1349 /// Advances past whitespace and comments 1350 private void eatWhite(bool allowComments=true) 1351 { 1352 // -- Comment/Whitepace Lexer ------------- 1353 1354 enum State 1355 { 1356 normal, 1357 lineComment, // Got "#" or "//" or "--", Eating everything until newline 1358 blockComment, // Got "/*", Eating everything until "*/" 1359 } 1360 1361 if(isEOF) 1362 return; 1363 1364 Location commentStart; 1365 State state = State.normal; 1366 bool consumeNewlines = false; 1367 bool hasConsumedNewline = false; 1368 while(true) 1369 { 1370 final switch(state) 1371 { 1372 case State.normal: 1373 1374 if(ch == '\\') 1375 { 1376 commentStart = location; 1377 consumeNewlines = true; 1378 hasConsumedNewline = false; 1379 } 1380 1381 else if(ch == '#') 1382 { 1383 if(!allowComments) 1384 return; 1385 1386 commentStart = location; 1387 state = State.lineComment; 1388 continue; 1389 } 1390 1391 else if(ch == '/' || ch == '-') 1392 { 1393 commentStart = location; 1394 if(lookahead(ch)) 1395 { 1396 if(!allowComments) 1397 return; 1398 1399 advanceChar(ErrorOnEOF.No); 1400 state = State.lineComment; 1401 continue; 1402 } 1403 else if(ch == '/' && lookahead('*')) 1404 { 1405 if(!allowComments) 1406 return; 1407 1408 advanceChar(ErrorOnEOF.No); 1409 state = State.blockComment; 1410 continue; 1411 } 1412 else 1413 return; // Done 1414 } 1415 else if(isAtNewline()) 1416 { 1417 if(consumeNewlines) 1418 hasConsumedNewline = true; 1419 else 1420 return; // Done 1421 } 1422 else if(!isWhite(ch)) 1423 { 1424 if(consumeNewlines) 1425 { 1426 if(hasConsumedNewline) 1427 return; // Done 1428 else 1429 error("Only whitespace can come between a line-continuation backslash and the following newline."); 1430 } 1431 else 1432 return; // Done 1433 } 1434 1435 break; 1436 1437 case State.lineComment: 1438 if(lookahead(&isNewline)) 1439 state = State.normal; 1440 break; 1441 1442 case State.blockComment: 1443 if(ch == '*' && lookahead('/')) 1444 { 1445 advanceChar(ErrorOnEOF.No); 1446 state = State.normal; 1447 } 1448 break; 1449 } 1450 1451 advanceChar(ErrorOnEOF.No); 1452 if(isEOF) 1453 { 1454 // Reached EOF 1455 1456 if(consumeNewlines && !hasConsumedNewline) 1457 error("Missing newline after line-continuation backslash."); 1458 1459 else if(state == State.blockComment) 1460 error(commentStart, "Unterminated block comment."); 1461 1462 else 1463 return; // Done, reached EOF 1464 } 1465 } 1466 } 1467 } 1468 1469 version(unittest) 1470 { 1471 import std.stdio; 1472 1473 version(Have_unit_threaded) import unit_threaded; 1474 else { enum DontTest; } 1475 1476 private auto loc = Location("filename", 0, 0, 0); 1477 private auto loc2 = Location("a", 1, 1, 1); 1478 1479 @("lexer: EOL") 1480 unittest 1481 { 1482 assert([Token(symbol!"EOL",loc) ] == [Token(symbol!"EOL",loc) ] ); 1483 assert([Token(symbol!"EOL",loc,Value(7),"A")] == [Token(symbol!"EOL",loc2,Value(7),"B")] ); 1484 } 1485 1486 private int numErrors = 0; 1487 @DontTest 1488 private void testLex(string source, Token[] expected, bool test_locations = false, string file=__FILE__, size_t line=__LINE__) 1489 { 1490 Token[] actual; 1491 try 1492 actual = lexSource(source, "filename"); 1493 catch(ParseException e) 1494 { 1495 numErrors++; 1496 stderr.writeln(file, "(", line, "): testLex failed on: ", source); 1497 stderr.writeln(" Expected:"); 1498 stderr.writeln(" ", expected); 1499 stderr.writeln(" Actual: ParseException thrown:"); 1500 stderr.writeln(" ", e.msg); 1501 return; 1502 } 1503 1504 bool is_same = actual == expected; 1505 if (is_same && test_locations) { 1506 is_same = actual.map!(t => t.location).equal(expected.map!(t => t.location)); 1507 } 1508 1509 if(!is_same) 1510 { 1511 numErrors++; 1512 stderr.writeln(file, "(", line, "): testLex failed on: ", source); 1513 stderr.writeln(" Expected:"); 1514 stderr.writeln(" ", expected); 1515 stderr.writeln(" Actual:"); 1516 stderr.writeln(" ", actual); 1517 1518 if(expected.length > 1 || actual.length > 1) 1519 { 1520 stderr.writeln(" expected.length: ", expected.length); 1521 stderr.writeln(" actual.length: ", actual.length); 1522 1523 if(actual.length == expected.length) 1524 foreach(i; 0..actual.length) 1525 if(actual[i] != expected[i]) 1526 { 1527 stderr.writeln(" Unequal at index #", i, ":"); 1528 stderr.writeln(" Expected:"); 1529 stderr.writeln(" ", expected[i]); 1530 stderr.writeln(" Actual:"); 1531 stderr.writeln(" ", actual[i]); 1532 } 1533 } 1534 } 1535 } 1536 1537 private void testLexThrows(string file=__FILE__, size_t line=__LINE__)(string source) 1538 { 1539 bool hadException = false; 1540 Token[] actual; 1541 try 1542 actual = lexSource(source, "filename"); 1543 catch(ParseException e) 1544 hadException = true; 1545 1546 if(!hadException) 1547 { 1548 numErrors++; 1549 stderr.writeln(file, "(", line, "): testLex failed on: ", source); 1550 stderr.writeln(" Expected ParseException"); 1551 stderr.writeln(" Actual:"); 1552 stderr.writeln(" ", actual); 1553 } 1554 } 1555 } 1556 1557 @("sdlang lexer") 1558 unittest 1559 { 1560 testLex("", []); 1561 testLex(" ", []); 1562 testLex("\\\n", []); 1563 testLex("/*foo*/", []); 1564 testLex("/* multiline \n comment */", []); 1565 testLex("/* * */", []); 1566 testLexThrows("/* "); 1567 1568 testLex(":", [ Token(symbol!":", loc) ]); 1569 testLex("=", [ Token(symbol!"=", loc) ]); 1570 testLex("{", [ Token(symbol!"{", loc) ]); 1571 testLex("}", [ Token(symbol!"}", loc) ]); 1572 testLex(";", [ Token(symbol!"EOL",loc) ]); 1573 testLex("\n", [ Token(symbol!"EOL",loc) ]); 1574 1575 testLex("foo", [ Token(symbol!"Ident",loc,Value(null),"foo") ]); 1576 testLex("_foo", [ Token(symbol!"Ident",loc,Value(null),"_foo") ]); 1577 testLex("foo.bar", [ Token(symbol!"Ident",loc,Value(null),"foo.bar") ]); 1578 testLex("foo-bar", [ Token(symbol!"Ident",loc,Value(null),"foo-bar") ]); 1579 testLex("foo.", [ Token(symbol!"Ident",loc,Value(null),"foo.") ]); 1580 testLex("foo-", [ Token(symbol!"Ident",loc,Value(null),"foo-") ]); 1581 testLexThrows(".foo"); 1582 1583 testLex("foo bar", [ 1584 Token(symbol!"Ident",loc,Value(null),"foo"), 1585 Token(symbol!"Ident",loc,Value(null),"bar"), 1586 ]); 1587 testLex("foo \\ \n \n bar", [ 1588 Token(symbol!"Ident",loc,Value(null),"foo"), 1589 Token(symbol!"Ident",loc,Value(null),"bar"), 1590 ]); 1591 testLex("foo \\ \n \\ \n bar", [ 1592 Token(symbol!"Ident",loc,Value(null),"foo"), 1593 Token(symbol!"Ident",loc,Value(null),"bar"), 1594 ]); 1595 testLexThrows("foo \\ "); 1596 testLexThrows("foo \\ bar"); 1597 testLexThrows("foo \\ \n \\ "); 1598 testLexThrows("foo \\ \n \\ bar"); 1599 1600 testLex("foo : = { } ; \n bar \n", [ 1601 Token(symbol!"Ident",loc,Value(null),"foo"), 1602 Token(symbol!":",loc), 1603 Token(symbol!"=",loc), 1604 Token(symbol!"{",loc), 1605 Token(symbol!"}",loc), 1606 Token(symbol!"EOL",loc), 1607 Token(symbol!"EOL",loc), 1608 Token(symbol!"Ident",loc,Value(null),"bar"), 1609 Token(symbol!"EOL",loc), 1610 ]); 1611 1612 testLexThrows("<"); 1613 testLexThrows("*"); 1614 testLexThrows(`\`); 1615 1616 // Integers 1617 testLex( "7", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); 1618 testLex( "-7", [ Token(symbol!"Value",loc,Value(cast( int)-7)) ]); 1619 testLex( "7L", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]); 1620 testLex( "7l", [ Token(symbol!"Value",loc,Value(cast(long) 7)) ]); 1621 testLex("-7L", [ Token(symbol!"Value",loc,Value(cast(long)-7)) ]); 1622 testLex( "0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]); 1623 testLex( "-0", [ Token(symbol!"Value",loc,Value(cast( int) 0)) ]); 1624 1625 testLex("7/**/", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); 1626 testLex("7#", [ Token(symbol!"Value",loc,Value(cast( int) 7)) ]); 1627 1628 testLex("7 A", [ 1629 Token(symbol!"Value",loc,Value(cast(int)7)), 1630 Token(symbol!"Ident",loc,Value( null),"A"), 1631 ]); 1632 testLexThrows("7A"); 1633 testLexThrows("-A"); 1634 testLexThrows(`-""`); 1635 1636 testLex("7;", [ 1637 Token(symbol!"Value",loc,Value(cast(int)7)), 1638 Token(symbol!"EOL",loc), 1639 ]); 1640 1641 // Floats 1642 testLex("1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]); 1643 testLex("1.2f" , [ Token(symbol!"Value",loc,Value(cast( float)1.2)) ]); 1644 testLex("1.2" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); 1645 testLex("1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); 1646 testLex("1.2d" , [ Token(symbol!"Value",loc,Value(cast(double)1.2)) ]); 1647 testLex("1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1648 testLex("1.2bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1649 testLex("1.2Bd", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1650 testLex("1.2bD", [ Token(symbol!"Value",loc,Value(cast( real)1.2)) ]); 1651 1652 testLex(".2F" , [ Token(symbol!"Value",loc,Value(cast( float)0.2)) ]); 1653 testLex(".2" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]); 1654 testLex(".2D" , [ Token(symbol!"Value",loc,Value(cast(double)0.2)) ]); 1655 testLex(".2BD", [ Token(symbol!"Value",loc,Value(cast( real)0.2)) ]); 1656 1657 testLex("-1.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-1.2)) ]); 1658 testLex("-1.2" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]); 1659 testLex("-1.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-1.2)) ]); 1660 testLex("-1.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-1.2)) ]); 1661 1662 testLex("-.2F" , [ Token(symbol!"Value",loc,Value(cast( float)-0.2)) ]); 1663 testLex("-.2" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]); 1664 testLex("-.2D" , [ Token(symbol!"Value",loc,Value(cast(double)-0.2)) ]); 1665 testLex("-.2BD", [ Token(symbol!"Value",loc,Value(cast( real)-0.2)) ]); 1666 1667 testLex( "0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1668 testLex( "0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1669 testLex( "0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1670 testLex("-0.0" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1671 testLex("-0.0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1672 testLex("-0.0BD", [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1673 testLex( "7F" , [ Token(symbol!"Value",loc,Value(cast( float)7.0)) ]); 1674 testLex( "7D" , [ Token(symbol!"Value",loc,Value(cast(double)7.0)) ]); 1675 testLex( "7BD" , [ Token(symbol!"Value",loc,Value(cast( real)7.0)) ]); 1676 testLex( "0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1677 testLex( "0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1678 testLex( "0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1679 testLex("-0F" , [ Token(symbol!"Value",loc,Value(cast( float)0.0)) ]); 1680 testLex("-0D" , [ Token(symbol!"Value",loc,Value(cast(double)0.0)) ]); 1681 testLex("-0BD" , [ Token(symbol!"Value",loc,Value(cast( real)0.0)) ]); 1682 1683 testLex("1.2 F", [ 1684 Token(symbol!"Value",loc,Value(cast(double)1.2)), 1685 Token(symbol!"Ident",loc,Value( null),"F"), 1686 ]); 1687 testLexThrows("1.2A"); 1688 testLexThrows("1.2B"); 1689 testLexThrows("1.2BDF"); 1690 1691 testLex("1.2;", [ 1692 Token(symbol!"Value",loc,Value(cast(double)1.2)), 1693 Token(symbol!"EOL",loc), 1694 ]); 1695 1696 testLex("1.2F;", [ 1697 Token(symbol!"Value",loc,Value(cast(float)1.2)), 1698 Token(symbol!"EOL",loc), 1699 ]); 1700 1701 testLex("1.2BD;", [ 1702 Token(symbol!"Value",loc,Value(cast(real)1.2)), 1703 Token(symbol!"EOL",loc), 1704 ]); 1705 1706 // Booleans and null 1707 testLex("true", [ Token(symbol!"Value",loc,Value( true)) ]); 1708 testLex("false", [ Token(symbol!"Value",loc,Value(false)) ]); 1709 testLex("on", [ Token(symbol!"Value",loc,Value( true)) ]); 1710 testLex("off", [ Token(symbol!"Value",loc,Value(false)) ]); 1711 testLex("null", [ Token(symbol!"Value",loc,Value( null)) ]); 1712 1713 testLex("TRUE", [ Token(symbol!"Ident",loc,Value(null),"TRUE") ]); 1714 testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]); 1715 testLex("true ", [ Token(symbol!"Value",loc,Value(true)) ]); 1716 testLex("tru", [ Token(symbol!"Ident",loc,Value(null),"tru") ]); 1717 testLex("truX", [ Token(symbol!"Ident",loc,Value(null),"truX") ]); 1718 testLex("trueX", [ Token(symbol!"Ident",loc,Value(null),"trueX") ]); 1719 1720 // Raw Backtick Strings 1721 testLex("`hello world`", [ Token(symbol!"Value",loc,Value(`hello world` )) ]); 1722 testLex("` hello world `", [ Token(symbol!"Value",loc,Value(` hello world ` )) ]); 1723 testLex("`hello \\t world`", [ Token(symbol!"Value",loc,Value(`hello \t world`)) ]); 1724 testLex("`hello \\n world`", [ Token(symbol!"Value",loc,Value(`hello \n world`)) ]); 1725 testLex("`hello \n world`", [ Token(symbol!"Value",loc,Value("hello \n world")) ]); 1726 testLex("`hello \r\n world`", [ Token(symbol!"Value",loc,Value("hello \r\n world")) ]); 1727 testLex("`hello \"world\"`", [ Token(symbol!"Value",loc,Value(`hello "world"` )) ]); 1728 1729 testLexThrows("`foo"); 1730 testLexThrows("`"); 1731 1732 // Double-Quote Strings 1733 testLex(`"hello world"`, [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1734 testLex(`" hello world "`, [ Token(symbol!"Value",loc,Value(" hello world " )) ]); 1735 testLex(`"hello \t world"`, [ Token(symbol!"Value",loc,Value("hello \t world")) ]); 1736 testLex(`"hello \n world"`, [ Token(symbol!"Value",loc,Value("hello \n world")) ]); 1737 testLex("\"hello \\\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1738 testLex("\"hello \\ \n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1739 testLex("\"hello \\ \n\n world\"", [ Token(symbol!"Value",loc,Value("hello world" )) ]); 1740 testLex(`"\"hello world\""`, [ Token(symbol!"Value",loc,Value(`"hello world"` )) ]); 1741 testLex(`""`, [ Token(symbol!"Value",loc,Value("" )) ]); // issue #34 1742 1743 testLexThrows("\"hello \n world\""); 1744 testLexThrows(`"foo`); 1745 testLexThrows(`"`); 1746 1747 // Characters 1748 testLex("'a'", [ Token(symbol!"Value",loc,Value(cast(dchar) 'a')) ]); 1749 testLex("'\\n'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\n')) ]); 1750 testLex("'\\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]); 1751 testLex("'\t'", [ Token(symbol!"Value",loc,Value(cast(dchar)'\t')) ]); 1752 testLex("'\\''", [ Token(symbol!"Value",loc,Value(cast(dchar)'\'')) ]); 1753 testLex(`'\\'`, [ Token(symbol!"Value",loc,Value(cast(dchar)'\\')) ]); 1754 1755 testLexThrows("'a"); 1756 testLexThrows("'aa'"); 1757 testLexThrows("''"); 1758 testLexThrows("'\\\n'"); 1759 testLexThrows("'\n'"); 1760 testLexThrows(`'\`); 1761 testLexThrows(`'\'`); 1762 testLexThrows("'"); 1763 1764 // Unicode 1765 testLex("日本語", [ Token(symbol!"Ident",loc,Value(null), "日本語") ]); 1766 testLex("`おはよう、日本。`", [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]); 1767 testLex(`"おはよう、日本。"`, [ Token(symbol!"Value",loc,Value(`おはよう、日本。`)) ]); 1768 testLex("'月'", [ Token(symbol!"Value",loc,Value("月"d.dup[0])) ]); 1769 1770 // Base64 Binary 1771 testLex("[aGVsbG8gd29ybGQ=]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); 1772 testLex("[ aGVsbG8gd29ybGQ= ]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); 1773 testLex("[\n aGVsbG8g \n \n d29ybGQ= \n]", [ Token(symbol!"Value",loc,Value(cast(ubyte[])"hello world".dup))]); 1774 1775 testLexThrows("[aGVsbG8gd29ybGQ]"); // Ie: Not multiple of 4 1776 testLexThrows("[ aGVsbG8gd29ybGQ ]"); 1777 1778 // Date 1779 testLex( "1999/12/5", [ Token(symbol!"Value",loc,Value(Date( 1999, 12, 5))) ]); 1780 testLex( "2013/2/22", [ Token(symbol!"Value",loc,Value(Date( 2013, 2, 22))) ]); 1781 testLex("-2013/2/22", [ Token(symbol!"Value",loc,Value(Date(-2013, 2, 22))) ]); 1782 1783 testLexThrows("7/"); 1784 testLexThrows("2013/2/22a"); 1785 testLexThrows("2013/2/22f"); 1786 1787 testLex("1999/12/5\n", [ 1788 Token(symbol!"Value",loc,Value(Date(1999, 12, 5))), 1789 Token(symbol!"EOL",loc), 1790 ]); 1791 1792 // DateTime, no timezone 1793 testLex( "2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1794 testLex( "2013/2/22 \t 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1795 testLex( "2013/2/22/*foo*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1796 testLex( "2013/2/22 /*foo*/ \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1797 testLex( "2013/2/22 /*foo*/ \\\n\n \n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1798 testLex( "2013/2/22 /*foo*/ \\\n\\\n \\\n /*bar*/ 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1799 testLex( "2013/2/22/*foo*/\\\n/*bar*/07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0)))) ]); 1800 testLex("-2013/2/22 07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 7, 53, 0)))) ]); 1801 testLex( "2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]); 1802 testLex("-2013/2/22 -07:53", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53)))) ]); 1803 testLex( "2013/2/22 07:53:34", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34)))) ]); 1804 testLex( "2013/2/22 07:53:34.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs))) ]); 1805 testLex( "2013/2/22 07:53:34.12", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 120.msecs))) ]); 1806 testLex( "2013/2/22 07:53:34.1", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 34), 100.msecs))) ]); 1807 testLex( "2013/2/22 07:53.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs))) ]); 1808 1809 testLex( "2013/2/22 34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0)))) ]); 1810 testLex( "2013/2/22 34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds(77), 123.msecs))) ]); 1811 testLex( "2013/2/22 34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) + hours(34) + minutes(65) + seconds( 0), 123.msecs))) ]); 1812 1813 testLex( "2013/2/22 -34:65", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0)))) ]); 1814 testLex( "2013/2/22 -34:65:77.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds(77), -123.msecs))) ]); 1815 testLex( "2013/2/22 -34:65.123", [ Token(symbol!"Value",loc,Value(DateTimeFrac(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), -123.msecs))) ]); 1816 1817 testLexThrows("2013/2/22 07:53a"); 1818 testLexThrows("2013/2/22 07:53f"); 1819 testLexThrows("2013/2/22 07:53:34.123a"); 1820 testLexThrows("2013/2/22 07:53:34.123f"); 1821 testLexThrows("2013/2/22a 07:53"); 1822 1823 testLex(`2013/2/22 "foo"`, [ 1824 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1825 Token(symbol!"Value",loc,Value("foo")), 1826 ]); 1827 1828 testLex("2013/2/22 07", [ 1829 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1830 Token(symbol!"Value",loc,Value(cast(int)7)), 1831 ]); 1832 1833 testLex("2013/2/22 1.2F", [ 1834 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1835 Token(symbol!"Value",loc,Value(cast(float)1.2)), 1836 ]); 1837 1838 testLex("2013/2/22 .2F", [ 1839 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1840 Token(symbol!"Value",loc,Value(cast(float)0.2)), 1841 ]); 1842 1843 testLex("2013/2/22 -1.2F", [ 1844 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1845 Token(symbol!"Value",loc,Value(cast(float)-1.2)), 1846 ]); 1847 1848 testLex("2013/2/22 -.2F", [ 1849 Token(symbol!"Value",loc,Value(Date(2013, 2, 22))), 1850 Token(symbol!"Value",loc,Value(cast(float)-0.2)), 1851 ]); 1852 1853 // DateTime, with known timezone 1854 testLex( "2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]); 1855 testLex("-2013/2/22 07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(0) )))) ]); 1856 testLex( "2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]); 1857 testLex("-2013/2/22 -07:53-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), new immutable SimpleTimeZone( hours(0) )))) ]); 1858 testLex( "2013/2/22 07:53-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1859 testLex( "2013/2/22 07:53-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1860 testLex( "2013/2/22 07:53:34-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(0) )))) ]); 1861 testLex( "2013/2/22 07:53:34-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1862 testLex( "2013/2/22 07:53:34-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1863 testLex( "2013/2/22 07:53:34.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]); 1864 testLex( "2013/2/22 07:53:34.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1865 testLex( "2013/2/22 07:53:34.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1866 testLex( "2013/2/22 07:53.123-GMT+00:00", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(0) )))) ]); 1867 testLex( "2013/2/22 07:53.123-GMT+02:10", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone( hours(2)+minutes(10))))) ]); 1868 testLex( "2013/2/22 07:53.123-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1869 1870 testLex( "2013/2/22 -34:65-GMT-05:30", [ Token(symbol!"Value",loc,Value(SysTime(DateTime( 2013, 2, 22, 0, 0, 0) - hours(34) - minutes(65) - seconds( 0), new immutable SimpleTimeZone(-hours(5)-minutes(30))))) ]); 1871 1872 // DateTime, with Java SDLang's occasionally weird interpretation of some 1873 // "not quite ISO" variations of the "GMT with offset" timezone strings. 1874 Token testTokenSimpleTimeZone(Duration d) 1875 { 1876 auto dateTime = DateTime(2013, 2, 22, 7, 53, 0); 1877 auto tz = new immutable SimpleTimeZone(d); 1878 return Token( symbol!"Value", loc, Value(SysTime(dateTime,tz)) ); 1879 } 1880 Token testTokenUnknownTimeZone(string tzName) 1881 { 1882 auto dateTime = DateTime(2013, 2, 22, 7, 53, 0); 1883 auto frac = 0.msecs; 1884 return Token( symbol!"Value", loc, Value(DateTimeFracUnknownZone(dateTime,frac,tzName)) ); 1885 } 1886 testLex("2013/2/22 07:53-GMT+", [ testTokenUnknownTimeZone("GMT+") ]); 1887 testLex("2013/2/22 07:53-GMT+:", [ testTokenUnknownTimeZone("GMT+:") ]); 1888 testLex("2013/2/22 07:53-GMT+:3", [ testTokenUnknownTimeZone("GMT+:3") ]); 1889 testLex("2013/2/22 07:53-GMT+:03", [ testTokenSimpleTimeZone(minutes(3)) ]); 1890 testLex("2013/2/22 07:53-GMT+:003", [ testTokenUnknownTimeZone("GMT+:003") ]); 1891 1892 testLex("2013/2/22 07:53-GMT+4", [ testTokenSimpleTimeZone(hours(4)) ]); 1893 testLex("2013/2/22 07:53-GMT+4:", [ testTokenUnknownTimeZone("GMT+4:") ]); 1894 testLex("2013/2/22 07:53-GMT+4:3", [ testTokenUnknownTimeZone("GMT+4:3") ]); 1895 testLex("2013/2/22 07:53-GMT+4:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]); 1896 testLex("2013/2/22 07:53-GMT+4:003", [ testTokenUnknownTimeZone("GMT+4:003") ]); 1897 1898 testLex("2013/2/22 07:53-GMT+04", [ testTokenSimpleTimeZone(hours(4)) ]); 1899 testLex("2013/2/22 07:53-GMT+04:", [ testTokenUnknownTimeZone("GMT+04:") ]); 1900 testLex("2013/2/22 07:53-GMT+04:3", [ testTokenUnknownTimeZone("GMT+04:3") ]); 1901 testLex("2013/2/22 07:53-GMT+04:03", [ testTokenSimpleTimeZone(hours(4)+minutes(3)) ]); 1902 testLex("2013/2/22 07:53-GMT+04:03abc", [ testTokenUnknownTimeZone("GMT+04:03abc") ]); 1903 testLex("2013/2/22 07:53-GMT+04:003", [ testTokenUnknownTimeZone("GMT+04:003") ]); 1904 1905 testLex("2013/2/22 07:53-GMT+004", [ testTokenSimpleTimeZone(minutes(4)) ]); 1906 testLex("2013/2/22 07:53-GMT+004:", [ testTokenUnknownTimeZone("GMT+004:") ]); 1907 testLex("2013/2/22 07:53-GMT+004:3", [ testTokenUnknownTimeZone("GMT+004:3") ]); 1908 testLex("2013/2/22 07:53-GMT+004:03", [ testTokenUnknownTimeZone("GMT+004:03") ]); 1909 testLex("2013/2/22 07:53-GMT+004:003", [ testTokenUnknownTimeZone("GMT+004:003") ]); 1910 1911 testLex("2013/2/22 07:53-GMT+0004", [ testTokenSimpleTimeZone(minutes(4)) ]); 1912 testLex("2013/2/22 07:53-GMT+0004:", [ testTokenUnknownTimeZone("GMT+0004:") ]); 1913 testLex("2013/2/22 07:53-GMT+0004:3", [ testTokenUnknownTimeZone("GMT+0004:3") ]); 1914 testLex("2013/2/22 07:53-GMT+0004:03", [ testTokenUnknownTimeZone("GMT+0004:03") ]); 1915 testLex("2013/2/22 07:53-GMT+0004:003", [ testTokenUnknownTimeZone("GMT+0004:003") ]); 1916 1917 testLex("2013/2/22 07:53-GMT+00004", [ testTokenSimpleTimeZone(minutes(4)) ]); 1918 testLex("2013/2/22 07:53-GMT+00004:", [ testTokenUnknownTimeZone("GMT+00004:") ]); 1919 testLex("2013/2/22 07:53-GMT+00004:3", [ testTokenUnknownTimeZone("GMT+00004:3") ]); 1920 testLex("2013/2/22 07:53-GMT+00004:03", [ testTokenUnknownTimeZone("GMT+00004:03") ]); 1921 testLex("2013/2/22 07:53-GMT+00004:003", [ testTokenUnknownTimeZone("GMT+00004:003") ]); 1922 1923 // DateTime, with unknown timezone 1924 testLex( "2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo")), "2013/2/22 07:53-Bogus/Foo") ]); 1925 testLex("-2013/2/22 07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 7, 53, 0), 0.msecs, "Bogus/Foo"))) ]); 1926 testLex( "2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]); 1927 testLex("-2013/2/22 -07:53-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime(-2013, 2, 22, 0, 0, 0) - hours(7) - minutes(53), 0.msecs, "Bogus/Foo"))) ]); 1928 testLex( "2013/2/22 07:53:34-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 0.msecs, "Bogus/Foo"))) ]); 1929 testLex( "2013/2/22 07:53:34.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 34), 123.msecs, "Bogus/Foo"))) ]); 1930 testLex( "2013/2/22 07:53.123-Bogus/Foo", [ Token(symbol!"Value",loc,Value(DateTimeFracUnknownZone(DateTime( 2013, 2, 22, 7, 53, 0), 123.msecs, "Bogus/Foo"))) ]); 1931 1932 // Time Span 1933 testLex( "12:14:42", [ Token(symbol!"Value",loc,Value( days( 0)+hours(12)+minutes(14)+seconds(42)+msecs( 0))) ]); 1934 testLex("-12:14:42", [ Token(symbol!"Value",loc,Value(-days( 0)-hours(12)-minutes(14)-seconds(42)-msecs( 0))) ]); 1935 testLex( "00:09:12", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 9)+seconds(12)+msecs( 0))) ]); 1936 testLex( "00:00:01.023", [ Token(symbol!"Value",loc,Value( days( 0)+hours( 0)+minutes( 0)+seconds( 1)+msecs( 23))) ]); 1937 testLex( "23d:05:21:23.532", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(532))) ]); 1938 testLex( "23d:05:21:23.53", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(530))) ]); 1939 testLex( "23d:05:21:23.5", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs(500))) ]); 1940 testLex("-23d:05:21:23.532", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(532))) ]); 1941 testLex("-23d:05:21:23.5", [ Token(symbol!"Value",loc,Value(-days(23)-hours( 5)-minutes(21)-seconds(23)-msecs(500))) ]); 1942 testLex( "23d:05:21:23", [ Token(symbol!"Value",loc,Value( days(23)+hours( 5)+minutes(21)+seconds(23)+msecs( 0))) ]); 1943 1944 testLexThrows("12:14:42a"); 1945 testLexThrows("23d:05:21:23.532a"); 1946 testLexThrows("23d:05:21:23.532f"); 1947 1948 // Combination 1949 testLex("foo. 7", [ 1950 Token(symbol!"Ident",loc,Value( null),"foo."), 1951 Token(symbol!"Value",loc,Value(cast(int)7)) 1952 ]); 1953 1954 testLex(` 1955 namespace:person "foo" "bar" 1 23L name.first="ひとみ" name.last="Smith" { 1956 namespace:age 37; namespace:favorite_color "blue" // comment 1957 somedate 2013/2/22 07:53 -- comment 1958 1959 inventory /* comment */ { 1960 socks 1961 } 1962 } 1963 `, 1964 [ 1965 Token(symbol!"EOL",loc,Value(null),"\n"), 1966 1967 Token(symbol!"Ident", loc, Value( null ), "namespace"), 1968 Token(symbol!":", loc, Value( null ), ":"), 1969 Token(symbol!"Ident", loc, Value( null ), "person"), 1970 Token(symbol!"Value", loc, Value( "foo" ), `"foo"`), 1971 Token(symbol!"Value", loc, Value( "bar" ), `"bar"`), 1972 Token(symbol!"Value", loc, Value( cast( int) 1 ), "1"), 1973 Token(symbol!"Value", loc, Value( cast(long)23 ), "23L"), 1974 Token(symbol!"Ident", loc, Value( null ), "name.first"), 1975 Token(symbol!"=", loc, Value( null ), "="), 1976 Token(symbol!"Value", loc, Value( "ひとみ" ), `"ひとみ"`), 1977 Token(symbol!"Ident", loc, Value( null ), "name.last"), 1978 Token(symbol!"=", loc, Value( null ), "="), 1979 Token(symbol!"Value", loc, Value( "Smith" ), `"Smith"`), 1980 Token(symbol!"{", loc, Value( null ), "{"), 1981 Token(symbol!"EOL", loc, Value( null ), "\n"), 1982 1983 Token(symbol!"Ident", loc, Value( null ), "namespace"), 1984 Token(symbol!":", loc, Value( null ), ":"), 1985 Token(symbol!"Ident", loc, Value( null ), "age"), 1986 Token(symbol!"Value", loc, Value( cast(int)37 ), "37"), 1987 Token(symbol!"EOL", loc, Value( null ), ";"), 1988 Token(symbol!"Ident", loc, Value( null ), "namespace"), 1989 Token(symbol!":", loc, Value( null ), ":"), 1990 Token(symbol!"Ident", loc, Value( null ), "favorite_color"), 1991 Token(symbol!"Value", loc, Value( "blue" ), `"blue"`), 1992 Token(symbol!"EOL", loc, Value( null ), "\n"), 1993 1994 Token(symbol!"Ident", loc, Value( null ), "somedate"), 1995 Token(symbol!"Value", loc, Value( DateTimeFrac(DateTime(2013, 2, 22, 7, 53, 0)) ), "2013/2/22 07:53"), 1996 Token(symbol!"EOL", loc, Value( null ), "\n"), 1997 Token(symbol!"EOL", loc, Value( null ), "\n"), 1998 1999 Token(symbol!"Ident", loc, Value(null), "inventory"), 2000 Token(symbol!"{", loc, Value(null), "{"), 2001 Token(symbol!"EOL", loc, Value(null), "\n"), 2002 2003 Token(symbol!"Ident", loc, Value(null), "socks"), 2004 Token(symbol!"EOL", loc, Value(null), "\n"), 2005 2006 Token(symbol!"}", loc, Value(null), "}"), 2007 Token(symbol!"EOL", loc, Value(null), "\n"), 2008 2009 Token(symbol!"}", loc, Value(null), "}"), 2010 Token(symbol!"EOL", loc, Value(null), "\n"), 2011 ]); 2012 2013 if(numErrors > 0) 2014 stderr.writeln(numErrors, " failed test(s)"); 2015 } 2016 2017 @("lexer: Regression test issue #8") 2018 unittest 2019 { 2020 testLex(`"\n \n"`, [ Token(symbol!"Value",loc,Value("\n \n"),`"\n \n"`) ]); 2021 testLex(`"\t\t"`, [ Token(symbol!"Value",loc,Value("\t\t"),`"\t\t"`) ]); 2022 testLex(`"\n\n"`, [ Token(symbol!"Value",loc,Value("\n\n"),`"\n\n"`) ]); 2023 } 2024 2025 @("lexer: Regression test issue #11") 2026 unittest 2027 { 2028 void test(string input) 2029 { 2030 testLex( 2031 input, 2032 [ 2033 Token(symbol!"EOL", loc, Value(null), "\n"), 2034 Token(symbol!"Ident",loc,Value(null), "a") 2035 ] 2036 ); 2037 } 2038 2039 test("//X\na"); 2040 test("//\na"); 2041 test("--\na"); 2042 test("#\na"); 2043 } 2044 2045 @("ast: Regression test issue #28") 2046 unittest 2047 { 2048 enum offset = 1; // workaround for an of-by-one error for line numbers 2049 testLex("test", [ 2050 Token(symbol!"Ident", Location("filename", 0, 0, 0), Value(null), "test") 2051 ], true); 2052 testLex("\ntest", [ 2053 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\n"), 2054 Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test") 2055 ], true); 2056 testLex("\rtest", [ 2057 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"), 2058 Token(symbol!"Ident", Location("filename", 1, 0, 1), Value(null), "test") 2059 ], true); 2060 testLex("\r\ntest", [ 2061 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"), 2062 Token(symbol!"Ident", Location("filename", 1, 0, 2), Value(null), "test") 2063 ], true); 2064 testLex("\r\n\ntest", [ 2065 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r\n"), 2066 Token(symbol!"EOL", Location("filename", 1, 0, 2), Value(null), "\n"), 2067 Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test") 2068 ], true); 2069 testLex("\r\r\ntest", [ 2070 Token(symbol!"EOL", Location("filename", 0, 0, 0), Value(null), "\r"), 2071 Token(symbol!"EOL", Location("filename", 1, 0, 1), Value(null), "\r\n"), 2072 Token(symbol!"Ident", Location("filename", 2, 0, 3), Value(null), "test") 2073 ], true); 2074 }