1 /** 2 * Some helper functions to work with strings 3 */ 4 module bc..string.string; 5 6 import bc.core.intrinsics; 7 import bc.core.memory : enforceMalloc, enforceRealloc, heapAlloc, heapDealloc; 8 import core.atomic : atomicOp; 9 import std.range : ElementEncodingType, hasLength, isInputRange; 10 import std.traits : ForeachType, isSomeChar, isSomeString, isStaticArray, Unqual; 11 // debug import core.stdc.stdio; 12 13 nothrow @nogc: 14 15 alias CString = const(char)[]; 16 17 template isAcceptableString(S) 18 { 19 enum isAcceptableString = 20 (isInputRange!S || isSomeString!S || isStaticArray!S) && 21 isSomeChar!(ElementEncodingType!S); 22 } 23 24 /** 25 * Temporary string buffer. 26 * It can be used to build temporary \0 ended C strings. 27 * For lengths < 255, it uses static char array, mallocated buffer otherwise. 28 * 29 * NOTE: be careful that pointer becomes invalid as soon as the struct comes out of scope! 30 * NOTE: inspired by std.internal.cstring.TempCStringBuffer in Phobos library 31 */ 32 struct TempCString(C) 33 { 34 @trusted pure nothrow @nogc: 35 36 @disable this(); 37 @disable this(this); 38 alias ptr this; 39 40 @property inout(C)* bufPtr() inout 41 { 42 return _ptr == useStack ? _buf.ptr : _ptr; 43 } 44 45 @property const(C)* ptr() const { return bufPtr; } 46 const(C)[] opIndex() const pure { return bufPtr[0 .. _length]; } 47 48 ~this() 49 { 50 if (_ptr != useStack) 51 { 52 import core.memory : pureFree; 53 pureFree(_ptr); 54 } 55 } 56 57 private: 58 C* _ptr; 59 size_t _length; 60 C[256] _buf; 61 62 enum C* useStack = () @trusted { return cast(C*)size_t.max; }(); 63 static TempCString initialize() { TempCString res = void; return res; } 64 } 65 66 /// ditto 67 auto tempCString(C = char, S)(scope S str) if (isAcceptableString!S) 68 { 69 alias CF = Unqual!(ElementEncodingType!S); 70 auto res = TempCString!C.initialize(); 71 72 static if (isSomeString!S) 73 { 74 if (str is null) 75 { 76 res._length = 0; 77 res._ptr = null; 78 return res; 79 } 80 } 81 82 static if (C.sizeof == CF.sizeof && is(typeof(res._buf[0 .. str.length] = str[]))) 83 { 84 if (str.length < res._buf.length) 85 { 86 res._buf[0..str.length] = str[]; 87 res._buf[str.length] = 0; 88 res._ptr = res.useStack; 89 } 90 else 91 { 92 res._ptr = () @trusted { 93 auto p = cast(C*)enforceMalloc((str.length + 1) * C.sizeof); 94 p[0 .. str.length] = str[]; 95 p[str.length] = 0; 96 return cast(C*)p; 97 }(); 98 } 99 res._length = str.length; 100 return res; 101 } 102 else 103 { 104 static assert(!(isSomeString!S && CF.sizeof == C.sizeof), "Should be using slice assignment."); 105 C[] p = res._buf; 106 size_t i; 107 108 size_t strLength; 109 static if (hasLength!S) strLength = str.length; 110 111 import bc.internal.utf : byUTF; 112 static if (isSomeString!S) 113 auto r = cast(const(CF)[])str; // because inout(CF) causes problems with byUTF 114 else 115 alias r = str; 116 117 C[] heapBuffer; 118 foreach (const c; r.byUTF!(Unqual!C)) 119 { 120 if (i + 1 == p.length) 121 { 122 heapBuffer = trustedRealloc(p, strLength, heapBuffer is null); 123 p = heapBuffer; 124 } 125 p[i++] = c; 126 } 127 p[i] = 0; 128 res._length = i; 129 res._ptr = (heapBuffer is null ? res.useStack : &heapBuffer[0]); 130 return res; 131 } 132 } 133 134 /// 135 @("tempCString") 136 nothrow @nogc @system unittest 137 { 138 import core.stdc..string : strlen; 139 140 string str = "abc"; 141 142 // Intended usage 143 assert(strlen(str.tempCString()) == 3); 144 145 // Correct usage 146 auto tmp = str.tempCString(); 147 assert(strlen(tmp) == 3); // or `tmp.ptr`, or `tmp.buffPtr` 148 149 // $(RED WARNING): $(RED Incorrect usage) 150 auto pInvalid1 = str.tempCString().ptr; 151 const char* pInvalid2 = str.tempCString(); 152 } 153 154 @("tempCString - char, wchar, dchar") 155 nothrow @nogc @trusted unittest 156 { 157 import std.algorithm : filter; 158 import bc.internal.utf : byCodeUnit; 159 160 { 161 auto tmp = "baz".byCodeUnit.filter!(a => a == 'z').tempCString; 162 assert(tmp._length == 1); 163 assert(tmp._buf[0] == 'z'); 164 assert(tmp._buf[1] == '\0'); 165 } 166 167 { 168 auto tmp = "baz".byCodeUnit.filter!(a => a == 'z').tempCString!wchar; 169 assert(tmp._length == 1); 170 assert(tmp._buf[0] == 'z'); 171 assert(tmp._buf[1] == '\0'); 172 } 173 174 { 175 auto tmp = "baz".tempCString!dchar; 176 assert(tmp._buf[0..3] == "baz"d); 177 } 178 } 179 180 @("tempCString - static array") 181 nothrow @nogc @trusted unittest 182 { 183 import core.stdc..string : strlen; 184 185 immutable(char)[3] str = "abc"; 186 assert(strlen(str.tempCString()) == 3); 187 } 188 189 /** 190 * Refcounted string implementation. 191 * 192 * It uses malloc for string buffer. 193 * 194 * Types with `RC` prefix are reference counted, so they can be moved around freely. 195 * Types without `RC` prefix has disabled copy constructor and can be only moved (passing ownership) or cloned. 196 * 197 * There are wariants with `W` and `D` before `String` that corresponds to payloads `wchar` and `dchar` as usual. 198 * 199 * Types that ends with `Z` means that they internally manages trailing '\0' and so can be safely used with C interop. 200 * 201 * NOTE: Beware of using exposed data pointer stored before some more content is added to RCString as internal buffer can be reallocated / resized if needed. 202 */ 203 alias RCString = StringImpl!(char, RC.yes, Zero.no); 204 205 /// ditto 206 alias RCWString = StringImpl!(wchar, RC.yes, Zero.no); 207 208 /// ditto 209 alias RCDString = StringImpl!(dchar, RC.yes, Zero.no); 210 211 /// ditto 212 alias RCStringZ = StringImpl!(char, RC.yes, Zero.yes); 213 214 /// ditto 215 alias RCWStringZ = StringImpl!(wchar, RC.yes, Zero.yes); 216 217 /// ditto 218 alias RCDStringZ = StringImpl!(dchar, RC.yes, Zero.yes); 219 220 /** 221 * String with unique ownership implementation. 222 * 223 * Similar to RCString but can be only moved passing it's ownership. 224 * Furthermore it uses 512B stack allocated buffer for short strings. 225 */ 226 alias String = StringImpl!(char, RC.no, Zero.no); 227 228 /// ditto 229 alias WString = StringImpl!(wchar, RC.no, Zero.no); 230 231 /// ditto 232 alias DString = StringImpl!(dchar, RC.no, Zero.no); 233 234 /// ditto 235 alias StringZ = StringImpl!(char, RC.no, Zero.yes); 236 237 /// ditto 238 alias WStringZ = StringImpl!(wchar, RC.no, Zero.yes); 239 240 /// ditto 241 alias DStringZ = StringImpl!(dchar, RC.no, Zero.yes); 242 243 private enum RC { no, yes } 244 private enum Zero { no, yes } 245 246 private struct StringImpl(C, RC rc, Zero zero) 247 { 248 @safe nothrow @nogc: 249 250 static if (zero) enum Z = 1; 251 else enum Z = 0; 252 253 static if (rc) 254 { 255 private 256 { 257 struct Payload 258 { 259 shared size_t refs; 260 size_t len; 261 C[] buf; 262 263 ~this() @trusted pure nothrow @nogc 264 { 265 import core.memory : pureFree; 266 if (buf) pureFree(buf.ptr); 267 } 268 } 269 270 Payload* pay; 271 } 272 273 /// Copy constructor 274 this(ref return scope StringImpl rhs) pure @safe 275 { 276 if (rhs.pay) 277 { 278 this.pay = rhs.pay; 279 atomicOp!"+="(this.pay.refs, 1); 280 } 281 } 282 283 /// Destructor 284 ~this() 285 { 286 if (pay && atomicOp!"-="(pay.refs, 1) == 0) heapDealloc(pay); 287 } 288 } 289 else 290 { 291 private 292 { 293 enum STACK_LEN = 512; 294 size_t len; 295 C[STACK_LEN] stackBuf; 296 C[] buf; 297 bool useStackBuf; 298 alias pay = typeof(this); // to access fields through pay.xx too 299 } 300 301 ~this() pure @trusted 302 { 303 import core.memory : pureFree; 304 if (buf) pureFree(buf.ptr); 305 } 306 307 @disable this(this); 308 309 // constructor used by move 310 private this(C[] sbuf, C[] buf, size_t len) 311 { 312 this.stackBuf[0..sbuf.length] = sbuf[]; 313 this.buf = buf; 314 this.len = len; 315 } 316 317 StringImpl move() scope @trusted 318 { 319 import std.algorithm : min; 320 auto obuf = buf; 321 auto olen = len; 322 buf = null; 323 len = 0; 324 return StringImpl(stackBuf[0..min(STACK_LEN, olen)], obuf, olen); 325 } 326 327 /// 328 StringImpl clone() scope @trusted 329 { 330 return StringImpl(this[]); 331 } 332 } 333 334 /** 335 * Constructor for cases when we know prior to the creation total length of the future string. 336 * It preallocates internal buffer with `initialSize`. 337 */ 338 this(size_t initialSize) pure 339 { 340 static if (rc) pay = heapAlloc!Payload(1, 0); 341 immutable len = initialSize + Z; 342 static if (!rc) { 343 if (len <= STACK_LEN) return; // we can use stack buffer for that 344 } 345 pay.buf = () @trusted { return (cast(C*)enforceMalloc(len * C.sizeof))[0..len]; }(); 346 } 347 348 this(S)(auto ref scope S str) 349 { 350 put(str); 351 } 352 353 /** 354 * Creates RCString from the provided arguments formated to string with nogcFormatter 355 */ 356 static StringImpl from(ARGS...)(auto ref ARGS args) 357 { 358 import bc..string.format : getFormatSize, nogcFormatTo; 359 360 size_t total; 361 // calculate total size needed so we don't have to reallocate 362 static foreach (a; args) total += getFormatSize(a); 363 364 // and format arguments to RCString 365 auto ret = StringImpl(total); 366 static foreach (a; args) ret.nogcFormatTo(a); 367 return ret; 368 } 369 370 alias data this; 371 372 /** 373 * Access internal string including the reserved block if any. 374 */ 375 @property inout(C)[] data() pure inout 376 { 377 if (!length) return null; 378 379 static if (!rc) { 380 if (len + Z <= STACK_LEN) return stackBuf[0..len]; 381 } 382 383 assert(pay.buf); 384 return pay.buf[0..pay.len]; 385 } 386 387 static if (zero) 388 { 389 /// Pointer to string data that can be directly used in a C functions expecting '\0' terminal char. 390 @property inout(C*) ptr() pure inout @trusted 391 { 392 if (!length) return null; 393 static if (!rc) { 394 if (len + Z <= STACK_LEN) return stackBuf.ptr; 395 } 396 return pay.buf.ptr; 397 } 398 } 399 400 /// Slicing support for the internal buffer data 401 @property inout(C)[] opSlice() pure inout 402 { 403 return this.data; 404 } 405 406 /// ditto 407 @property inout(C)[] opSlice(size_t start, size_t end) pure inout 408 { 409 if (start > length || end > length) assert(0, "Index out of bounds"); 410 if (start > end) assert(0, "Invalid slice indexes"); 411 return this.data[start .. end]; 412 } 413 414 /// Indexed access to the buffer data 415 @property ref C opIndex(size_t idx) pure return 416 { 417 if (idx >= length) assert(0, "Index out of bounds"); 418 return this.data[idx]; 419 } 420 421 /// opDollar implementation 422 alias length opDollar; 423 424 /// Managed string length 425 @property size_t length() pure const 426 { 427 static if (rc) 428 return pay ? pay.len : 0; 429 else 430 return len; 431 } 432 433 /// Returns: capacity that can be used without reallocation 434 size_t capacity() pure const 435 { 436 static if (rc) 437 return pay ? (pay.buf.length - pay.len - Z) : 0; 438 else 439 return (buf ? buf.length : STACK_LEN) - pay.len - Z; 440 } 441 442 /** 443 * Reserves space for requested number of characters that also increments string length. 444 * This can be used for example in cases when we need to fill slice of string with some known length data. 445 * To return reserved data, use `dropBack`. 446 */ 447 void reserve(size_t sz) 448 { 449 ensureAvail(sz); 450 pay.len += sz; 451 } 452 453 /** 454 * Drops defined amount of characters from the back. 455 */ 456 void dropBack(size_t sz) 457 { 458 assert(length >= sz, "Not enough data"); 459 if (!sz) return; 460 461 static if (!rc) 462 { 463 if (len + Z > STACK_LEN && len + Z - sz <= STACK_LEN) 464 { 465 // switch from heap buffer back to stack one 466 len -= sz; 467 stackBuf[0..len] = buf[0..len]; 468 static if (zero) stackBuf[len] = 0; 469 return; 470 } 471 } 472 pay.len -= sz; 473 static if (zero) pay.buf[pay.len] = 0; 474 } 475 476 /** 477 * Clears content of the data, but keeps internal buffer as is so it can be used to build another string. 478 */ 479 void clear() pure 480 { 481 static if (rc) { 482 if (pay) pay.len = 0; 483 } 484 else len = 0; 485 } 486 487 alias opOpAssign(string op : "~") = put; 488 489 void put(in C val) pure 490 { 491 static if (!rc) 492 { 493 if (len + 1 + Z <= STACK_LEN) 494 { 495 stackBuf[len++] = val; 496 static if (zero) stackBuf[len] = 0; 497 return; 498 } 499 } 500 ensureAvail(1); 501 pay.buf[pay.len++] = val; 502 static if (zero) pay.buf[pay.len] = 0; 503 } 504 505 void put(S)(auto ref scope S str) if (isAcceptableString!S) 506 { 507 alias CF = Unqual!(ElementEncodingType!S); 508 509 static if (C.sizeof == CF.sizeof && is(typeof(pay.buf[0 .. str.length] = str[]))) 510 { 511 static if (!rc) 512 { 513 if (len + str.length + Z <= STACK_LEN) 514 { 515 stackBuf[len .. len + str.length] = str[]; 516 len += str.length; 517 static if (zero) stackBuf[len] = 0; 518 return; 519 } 520 } 521 522 ensureAvail(str.length); 523 pay.buf[pay.len .. pay.len + str.length] = str[]; 524 pay.len += str.length; 525 static if (zero) pay.buf[pay.len] = 0; 526 } 527 else 528 { 529 // copy range 530 531 // special case when we can determine that it still fits to stack buffer 532 static if (!rc && hasLength!S && is(C == CF)) 533 { 534 if (pay.len + Z <= STACK_LEN) 535 { 536 foreach (ch; r.byUTF!(Unqual!C)) 537 { 538 stackBuf[pay.len++] = ch; 539 static if (zero) stackBuf[pay.dlen] = 0; 540 } 541 return; 542 } 543 } 544 545 static if (!rc) size_t nlen = pay.len; 546 static if (hasLength!S) { 547 ensureAvail(str.length); 548 static if (!rc) nlen += str.length; 549 } 550 import bc.internal.utf : byUTF; 551 static if (isSomeString!S) 552 auto r = cast(const(CF)[])str; // because inout(CF) causes problems with byUTF 553 else 554 alias r = str; 555 556 foreach (ch; r.byUTF!(Unqual!C)) 557 { 558 static if (!hasLength!S || !is(C == CF)) 559 { 560 ensureAvail(1); 561 static if (!rc) { 562 static if (!hasLength!S) nlen++; 563 else { 564 if (pay.len == nlen) nlen++; 565 } 566 } 567 } 568 static if (!rc) 569 { 570 if (nlen + Z + 1 <= STACK_LEN) // we can still use stack buffer 571 { 572 stackBuf[len++] = ch; 573 continue; 574 } 575 } 576 pay.buf[pay.len++] = ch; 577 } 578 static if (zero) pay.buf[pay.len] = 0; 579 static if (!rc) assert(nlen == pay.len); 580 } 581 } 582 583 private void ensureAvail(size_t sz) pure 584 { 585 static if (__VERSION__ >= 2094) pragma(inline, true); 586 else pragma(inline); 587 import core.bitop : bsr; 588 import std.algorithm : max, min; 589 590 static if (rc) 591 { 592 if (!pay) 593 { 594 // allocate new payload with required size 595 pay = heapAlloc!Payload(1, 0); 596 immutable l = max(sz+Z, 64); // allocates at leas 64B 597 pay.buf = () @trusted { return (cast(C*)enforceMalloc(l * C.sizeof))[0..l]; }(); 598 return; 599 } 600 601 if (pay.len + sz + Z <= pay.buf.length) return; // we can fit in what we've already allocated 602 } 603 else 604 { 605 if (len + sz + Z <= STACK_LEN) return; // still fits to stack buffer 606 if (buf is null) 607 { 608 immutable l = max(len + sz + Z, STACK_LEN + 64); // allocates at leas 64B over 609 buf = () @trusted { return (cast(C*)enforceMalloc(l * C.sizeof))[0..l]; }(); 610 buf[0..len] = stackBuf[0..len]; // copy data from stack buffer, we'll use heap allocated one from now 611 return; 612 } 613 if (len + Z <= STACK_LEN) 614 { 615 // some buffer is already preallocated, but we're still on stackBuffer and need to move to heap allocated one 616 assert(buf.length > STACK_LEN); 617 buf[0..len] = stackBuf[0..len]; // copy current data from the stack 618 } 619 620 if (len + sz + Z <= buf.length) return; // we can fit in what we've already allocated 621 } 622 623 // reallocate buffer 624 // Note: new length calculation taken from std.array.appenderNewCapacity 625 immutable ulong mult = 100 + (1000UL) / (bsr((pay.len + sz + Z)) + 1); 626 immutable l = cast(size_t)(((pay.len + sz + Z) * min(mult, 200) + 99) / 100); 627 // debug printf("realloc %lu -> %lu\n", pay.len, l); 628 pay.buf = () @trusted { return (cast(C*)enforceRealloc(pay.buf.ptr, l * C.sizeof))[0..l]; }(); 629 } 630 } 631 632 auto rcString(C = char, S)(auto ref S str) 633 { 634 StringImpl!(C, RC.yes, Zero.no) ret; 635 ret.put(str); 636 return ret; 637 } 638 639 @("RCString") 640 @system @nogc unittest 641 { 642 import bc.internal.utf : byCodeUnit; 643 import std.algorithm : filter; 644 645 RCStringZ s; 646 s ~= "fo"; 647 assert(s.pay.len == 2); 648 assert(s.pay.buf.length >= 3); 649 650 s ~= 'o'; 651 assert(s.pay.len == 3); 652 assert(s.pay.buf.length >= 4); 653 654 s ~= "bar"; 655 assert(s.pay.len == 6); 656 assert(s.pay.buf.length >= 7); 657 assert(s == "foobar"); 658 659 s ~= "baz".byCodeUnit.filter!(a => a == 'z'); 660 assert(s.length == "foobarz".length); 661 assert(s.data == "foobarz"); 662 assert(s == "foobarz"); 663 assert(s.ptr == &s.data[0]); 664 assert((s.ptr["foobarz".length]) == 0); 665 } 666 667 @("RCString.from") 668 @nogc @safe unittest 669 { 670 { 671 auto str = RCString.from("foo", 42, "bar"); 672 assert(str == "foo42bar"); 673 } 674 675 { 676 auto str = RCWString.from("foo"); 677 assert(str == "foo"w); 678 } 679 } 680 681 @("rcString") 682 @nogc @safe unittest 683 { 684 auto str = "foo".rcString(); 685 assert(str == "foo"); 686 } 687 688 @("String") 689 @nogc @safe unittest 690 { 691 auto s = String("Hello"); 692 assert(s.capacity == String.stackBuf.length - 5); 693 assert(s[] == "Hello", s[]); 694 s ~= " String"; 695 assert(s[] == "Hello String", s[]); 696 auto s2 = s.clone(); 697 assert(s[] == s2[]); 698 () @trusted { assert(s.ptr != s2.ptr); }(); 699 700 auto s3 = s.move(); 701 assert(s.buf is null); 702 assert(s.len == 0); 703 assert(s3 == "Hello String"); 704 } 705 706 @("String - put static array") 707 @nogc @safe unittest 708 { 709 String s; 710 immutable(char)[3] foo = "foo"; 711 s ~= foo; 712 assert(s == "foo"); 713 } 714 715 @("String stack to heap") 716 @nogc @safe unittest 717 { 718 import std.algorithm : each; 719 import std.range : repeat; 720 721 StringZ s; 722 'a'.repeat(s.stackBuf.length-1).each!(c => s.put(c)); 723 assert(s.length == s.stackBuf.length-1); 724 assert(s.stackBuf[$-2] == 'a'); 725 assert(s.stackBuf[$-1] == '\0'); 726 assert(s.buf is null); 727 assert(&s.data[0] == &s.stackBuf[0]); 728 s ~= 'b'; 729 assert(s.stackBuf[$-1] == '\0'); // doesn't change on stack to heap switch 730 assert(s.buf !is null); 731 assert(&s.data[0] == &s.buf[0]); 732 assert(s.buf[s.stackBuf.length-1] == 'b'); 733 s ~= "foo"; 734 735 s.clear(); 736 s ~= 'c'; 737 assert(&s.data[0] == &s.stackBuf[0]); // back to stack usage 738 assert(s.buf !is null); // but heap buffer is still there 739 'd'.repeat(s.stackBuf.length).each!(c => s.put(c)); 740 assert(&s.data[0] == &s.buf[0]); 741 assert(s.length == 1 + s.stackBuf.length); 742 assert(s.buf[1 + s.stackBuf.length] == '\0'); 743 } 744 745 @("String reserve") 746 @nogc @safe unittest 747 { 748 String buf; 749 assert(buf.length == 0); 750 assert(buf.capacity == buf.stackBuf.length); 751 buf.reserve(64); 752 assert(buf.length == 64); 753 assert(buf.buf is null); 754 buf[][0..3] = "foo"; 755 buf.dropBack(61); 756 assert(buf[] == "foo"); 757 buf.reserve(buf.stackBuf.length); 758 assert(buf.buf !is null); 759 assert(buf.buf[0..3] == "foo"); 760 buf.buf[0..3] = "bar"; 761 buf.dropBack(buf.stackBuf.length); 762 assert(buf.buf !is null); // left allocated for reuse 763 assert(buf.stackBuf[0..3] == "bar"); // copy from heap 764 } 765 766 private C[] trustedRealloc(C)(scope C[] buf, size_t strLength, bool bufIsOnStack) 767 @trusted @nogc pure nothrow 768 { 769 pragma(inline, false); // because it's rarely called 770 771 import bc.core.memory : enforceMalloc, enforceRealloc; 772 773 size_t newlen = buf.length * 3 / 2; 774 775 if (bufIsOnStack) 776 { 777 if (newlen <= strLength) 778 newlen = strLength + 1; // +1 for terminating 0 779 auto ptr = cast(C*) enforceMalloc(newlen * C.sizeof); 780 ptr[0 .. buf.length] = buf[]; 781 return ptr[0 .. newlen]; 782 } 783 else 784 { 785 if (buf.length >= size_t.max / (2 * C.sizeof)) 786 { 787 version (D_Exceptions) 788 { 789 import core.exception : onOutOfMemoryError; 790 onOutOfMemoryError(); 791 } 792 else assert(0, "Memory allocation failed"); 793 } 794 auto ptr = cast(C*) enforceRealloc(buf.ptr, newlen * C.sizeof); 795 return ptr[0 .. newlen]; 796 } 797 } 798 799 /** 800 * Alternative implementation of `std.string.outdent` that differs in: 801 * 802 * * meant for dedent string literals in CT 803 * * if first line is not indented, other lines are dedented still (std.string.outdent returns original text in that case) 804 * * empty lines at the text start are removed 805 */ 806 template dedent(alias str) 807 { 808 static S getLine(S)(S str) 809 { 810 if (!str.length) return null; 811 for (size_t i = 0; i < str.length; ++i) 812 { 813 if (str[i] == '\r') 814 { 815 if (i+1 < str.length && str[i+1] == '\n') 816 return str[0..i+2]; 817 } 818 if (str[i] == '\n') return str[0..i+1]; 819 } 820 return str; 821 } 822 823 // strip line whitespace but keep newline characters 824 static S stripWS(S)(S str) 825 { 826 if (!str.length) return null; 827 for (size_t i = 0; i < str.length; ++i) 828 { 829 if (str[i] <= ' ' && str[i] != '\r' && str[i] != '\n') continue; 830 return str[i..$]; 831 } 832 return null; 833 } 834 835 template shortestIndent(alias str, size_t prev = size_t.max) 836 { 837 enum line = getLine(str); 838 enum stripped = stripWS(line); 839 static if (line.length == 0) enum shortestIndent = prev; 840 else static if (line.length == stripped.length) enum shortestIndent = 0; 841 else 842 { 843 enum cur = prev > line.length - stripped.length ? line.length - stripped.length : prev; 844 enum next = shortestIndent!(str[line.length..$], cur); 845 enum shortestIndent = cur > next ? next : cur; 846 } 847 } 848 849 template dedentNext(alias str, size_t indent) 850 { 851 enum ln = getLine(str); 852 static if (!ln.length) 853 enum dedentNext = null; 854 else static if (ln.length < indent) 855 enum dedentNext = ln ~ dedentNext!(str[ln.length..$], indent); 856 else 857 enum dedentNext = ln[indent..$] ~ dedentNext!(str[ln.length..$], indent); 858 } 859 860 enum line = getLine(str); 861 enum stripped = stripWS(line); 862 863 static if (!line.length) enum dedent = null; 864 else static if ( 865 (stripped.length == 1 && stripped[0] == '\n') 866 || (stripped.length == 2 && stripped[0] == '\r' && stripped[1] == '\n')) 867 enum dedent = dedent!(str[line.length..$]); // drop first empty lines 868 else 869 { 870 // ignore no indentation of the first line 871 enum shortest = shortestIndent!( 872 str[line.length..$], 873 stripped.length == line.length ? size_t.max : (line.length - stripped.length)); 874 875 static if (shortest == 0) 876 enum dedent = str; // no indent used 877 else 878 enum dedent = stripped ~ dedentNext!(str[line.length..$], shortest); 879 } 880 } 881 882 @("dedent") 883 unittest 884 { 885 // with empty first line 886 { 887 enum str1 = ` 888 DELETE FROM elements.element 889 WHERE id=ANY($1) AND type_id IN ( 890 SELECT id FROM elements.element_type WHERE owner=$2 891 )`; 892 893 enum str2 = 894 "DELETE FROM elements.element\n" ~ 895 "WHERE id=ANY($1) AND type_id IN (\n" ~ 896 " SELECT id FROM elements.element_type WHERE owner=$2\n" ~ 897 ")"; 898 899 static assert(dedent!str1 == str2); 900 } 901 902 // with not indented first line 903 { 904 enum str1 = `DELETE FROM elements.element 905 WHERE id=ANY($1) AND type_id IN ( 906 SELECT id FROM elements.element_type WHERE owner=$2 907 )`; 908 909 enum str2 = "DELETE FROM elements.element\n" ~ 910 "WHERE id=ANY($1) AND type_id IN (\n" ~ 911 " SELECT id FROM elements.element_type WHERE owner=$2\n" ~ 912 ")"; 913 914 static assert(dedent!str1 == str2); 915 } 916 917 // test that we didn't touch number of lines 918 { 919 static assert(dedent!` 920 2 921 3 922 ` == "2\n3\n"); // first line is dropped, last newline is kept 923 } 924 925 // test we don't dedent when some line is not indented 926 { 927 enum str = `aa 928 bb 929 cc`; 930 assert(dedent!str == str); 931 } 932 933 // test that we don't touch space after last line text 934 { 935 assert(dedent!" foo " == "foo "); 936 assert(dedent!`foo 937 bar ` == "foo\nbar "); 938 } 939 } 940 941 /** 942 * Builds valid char map from the provided ranges of invalid ones 943 * 944 * For example when provided with "\0/:\xff" means that only characters 0-9 would have true in the generated map. 945 */ 946 bool[256] buildValidCharMap()(string invalidRanges) 947 { 948 assert(invalidRanges.length % 2 == 0, "Uneven ranges"); 949 bool[256] res = true; 950 951 for (int i=0; i < invalidRanges.length; i+=2) 952 for (int j=invalidRanges[i]; j <= invalidRanges[i+1]; ++j) 953 res[j] = false; 954 return res; 955 } 956 957 /// 958 @("buildValidCharMap") 959 @safe unittest 960 { 961 string ranges = "\0 \"\"(),,//:@[]{{}}\x7f\xff"; 962 assert(buildValidCharMap(ranges) == 963 cast(bool[])[ 964 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 965 0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0, 966 0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1, 967 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0, 968 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 969 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 970 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 971 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0, 972 ]); 973 } 974 975 /* 976 * Advances index over the token to the next character while checking for valid characters. 977 * On success, buffer index is left on the next character. 978 * 979 * Params: 980 * - ranges = ranges of characters to stop on 981 * - next = next character/s to stop on (must be present in the provided ranges too) 982 * - sseRanges = 983 * as SSE optimized path is limited to 8 pairs, here one can provide merged ranges for a fast 984 * SSE path that would be precised with `ranges`. Otherwise `ranges` is used for SSE path too. 985 * 986 * Returns: 987 * * 0 on success 988 * * -1 when token hasn't been found (ie not enough data in the buffer) 989 * * -2 when character from invalid ranges was found but not matching one of next characters (ie invalid token) 990 */ 991 int parseToken(string ranges, alias next, string sseRanges = null, C)(const(C)[] buffer, ref size_t i) pure 992 if (is(C == ubyte) || is(C == char)) 993 { 994 version (DigitalMars) { 995 static if (__VERSION__ >= 2094) pragma(inline, true); // older compilers can't inline this 996 } else pragma(inline, true); 997 998 immutable charMap = parseTokenCharMap!(ranges)(); 999 1000 static if (LDC_with_SSE42) 1001 { 1002 // CT function to prepare input for SIMD vector enum 1003 static byte[16] padRanges()(string ranges) 1004 { 1005 byte[16] res; 1006 // res[0..ranges.length] = cast(byte[])ranges[]; - broken on macOS betterC tests 1007 foreach (i, c; ranges) res[i] = cast(byte)c; 1008 return res; 1009 } 1010 1011 static if (sseRanges) alias usedRng = sseRanges; 1012 else alias usedRng = ranges; 1013 static assert(usedRng.length <= 16, "Ranges must be at most 16 characters long"); 1014 static assert(usedRng.length % 2 == 0, "Ranges must have even number of characters"); 1015 enum rangesSize = usedRng.length; 1016 enum byte16 rngE = padRanges(usedRng); 1017 1018 if (_expect(buffer.length - i >= 16, true)) 1019 { 1020 size_t left = (buffer.length - i) & ~15; // round down to multiple of 16 1021 byte16 ranges16 = rngE; 1022 1023 do 1024 { 1025 byte16 b16 = () @trusted { return cast(byte16)_mm_loadu_si128(cast(__m128i*)&buffer[i]); }(); 1026 immutable r = _mm_cmpestri( 1027 ranges16, rangesSize, 1028 b16, 16, 1029 _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS 1030 ); 1031 1032 if (r != 16) 1033 { 1034 i += r; 1035 goto FOUND; 1036 } 1037 i += 16; 1038 left -= 16; 1039 } 1040 while (_expect(left != 0, true)); 1041 } 1042 } 1043 else 1044 { 1045 // faster unrolled loop to iterate over 8 characters 1046 loop: while (_expect(buffer.length - i >= 8, true)) 1047 { 1048 static foreach (_; 0..8) 1049 { 1050 if (_expect(!charMap[buffer[i]], false)) goto FOUND; 1051 ++i; 1052 } 1053 } 1054 } 1055 1056 // handle the rest 1057 if (_expect(i >= buffer.length, false)) return -1; 1058 1059 FOUND: 1060 while (true) 1061 { 1062 static if (is(typeof(next) == char)) { 1063 static assert(!charMap[next], "Next character is not in ranges"); 1064 if (buffer[i] == next) return 0; 1065 } else { 1066 static assert(next.length > 0, "Next character not provided"); 1067 static foreach (c; next) { 1068 static assert(!charMap[c], "Next character is not in ranges"); 1069 if (buffer[i] == c) return 0; 1070 } 1071 } 1072 if (_expect(!charMap[buffer[i]], false)) return -2; 1073 if (_expect(++i == buffer.length, false)) return -1; 1074 } 1075 } 1076 1077 /// 1078 @("parseToken") 1079 @safe unittest 1080 { 1081 size_t idx; 1082 string buf = "foo\nbar"; 1083 auto ret = parseToken!("\0\037\177\377", "\r\n")(buf, idx); 1084 assert(ret == 0); // no error 1085 assert(idx == 3); // index of newline character 1086 1087 idx = 0; 1088 ret = parseToken!("\0\037\177\377", "\r\n")(buf[0..3], idx); 1089 assert(ret == -1); // not enough data to find next character 1090 assert(idx == 3); 1091 1092 idx = 0; 1093 buf = "foo\t\nbar"; 1094 ret = parseToken!("\0\037\177\377", "\r\n")(buf, idx); 1095 assert(ret == -2); // invalid character '\t' found in token 1096 assert(idx == 3); // invalid character on index 3 1097 } 1098 1099 private immutable(bool[256]) parseTokenCharMap(string invalidRanges)() { 1100 static immutable charMap = buildValidCharMap(invalidRanges); 1101 return charMap; 1102 }