1 /**
2  * Some helper functions to work with strings
3  */
4 module bc..string.string;
5 
6 import bc.core.intrinsics;
7 import bc.core.memory : enforceMalloc, enforceRealloc, heapAlloc, heapDealloc;
8 import std.range : ElementEncodingType, hasLength, isInputRange;
9 import std.traits : ForeachType, isSomeChar, isSomeString, isStaticArray, Unqual;
10 // debug import core.stdc.stdio;
11 
12 nothrow @nogc:
13 
14 alias CString = const(char)[];
15 
16 template isAcceptableString(S)
17 {
18     enum isAcceptableString =
19         (isInputRange!S || isSomeString!S || isStaticArray!S) &&
20         isSomeChar!(ElementEncodingType!S);
21 }
22 
23 /**
24  * Temporary string buffer.
25  * It can be used to build temporary \0 ended C strings.
26  * For lengths < 255, it uses static char array, mallocated buffer otherwise.
27  *
28  * NOTE: be careful that pointer becomes invalid as soon as the struct comes out of scope!
29  * NOTE: inspired by std.internal.cstring.TempCStringBuffer in Phobos library
30  */
31 struct TempCString(C)
32 {
33     @trusted pure nothrow @nogc:
34 
35     @disable this();
36     @disable this(this);
37     alias ptr this;
38 
39     @property inout(C)* bufPtr() inout
40     {
41         return _ptr == useStack ? _buf.ptr : _ptr;
42     }
43 
44     @property const(C)* ptr() const { return bufPtr; }
45     const(C)[] opIndex() const pure { return bufPtr[0 .. _length]; }
46 
47     ~this()
48     {
49         if (_ptr != useStack)
50         {
51             import core.memory : pureFree;
52             pureFree(_ptr);
53         }
54     }
55 
56     private:
57     C* _ptr;
58     size_t _length;
59     C[256] _buf;
60 
61     enum C* useStack = () @trusted { return cast(C*)size_t.max; }();
62     static TempCString initialize() { TempCString res = void; return res; }
63 }
64 
65 /// ditto
66 auto tempCString(C = char, S)(scope S str) if (isAcceptableString!S)
67 {
68     alias CF = Unqual!(ElementEncodingType!S);
69     auto res = TempCString!C.initialize();
70 
71     static if (isSomeString!S)
72     {
73         if (str is null)
74         {
75             res._length = 0;
76             res._ptr = null;
77             return res;
78         }
79     }
80 
81     static if (C.sizeof == CF.sizeof && is(typeof(res._buf[0 .. str.length] = str[])))
82     {
83         if (str.length < res._buf.length)
84         {
85             res._buf[0..str.length] = str[];
86             res._buf[str.length] = 0;
87             res._ptr = res.useStack;
88         }
89         else
90         {
91             res._ptr = () @trusted {
92                 auto p = cast(C*)enforceMalloc((str.length + 1) * C.sizeof);
93                 p[0 .. str.length] = str[];
94                 p[str.length] = 0;
95                 return cast(C*)p;
96             }();
97         }
98         res._length = str.length;
99         return res;
100     }
101     else
102     {
103         static assert(!(isSomeString!S && CF.sizeof == C.sizeof), "Should be using slice assignment.");
104         C[] p = res._buf;
105         size_t i;
106 
107         size_t strLength;
108         static if (hasLength!S) strLength = str.length;
109 
110         import bc.internal.utf : byUTF;
111         static if (isSomeString!S)
112             auto r = cast(const(CF)[])str;  // because inout(CF) causes problems with byUTF
113         else
114             alias r = str;
115 
116         C[] heapBuffer;
117         foreach (const c; r.byUTF!(Unqual!C))
118         {
119             if (i + 1 == p.length)
120             {
121                 heapBuffer = trustedRealloc(p, strLength, heapBuffer is null);
122                 p = heapBuffer;
123             }
124             p[i++] = c;
125         }
126         p[i] = 0;
127         res._length = i;
128         res._ptr = (heapBuffer is null ? res.useStack : &heapBuffer[0]);
129         return res;
130     }
131 }
132 
133 ///
134 @("tempCString")
135 nothrow @nogc @system unittest
136 {
137     import core.stdc.string : strlen;
138 
139     string str = "abc";
140 
141     // Intended usage
142     assert(strlen(str.tempCString()) == 3);
143 
144     // Correct usage
145     auto tmp = str.tempCString();
146     assert(strlen(tmp) == 3); // or `tmp.ptr`, or `tmp.buffPtr`
147 
148     // $(RED WARNING): $(RED Incorrect usage)
149     auto pInvalid1 = str.tempCString().ptr;
150     const char* pInvalid2 = str.tempCString();
151 }
152 
153 @("tempCString - char, wchar, dchar")
154 nothrow @nogc @trusted unittest
155 {
156     import std.algorithm : filter;
157     import bc.internal.utf : byCodeUnit;
158 
159     {
160         auto tmp = "baz".byCodeUnit.filter!(a => a == 'z').tempCString;
161         assert(tmp._length == 1);
162         assert(tmp._buf[0] == 'z');
163         assert(tmp._buf[1] == '\0');
164     }
165 
166     {
167         auto tmp = "baz".byCodeUnit.filter!(a => a == 'z').tempCString!wchar;
168         assert(tmp._length == 1);
169         assert(tmp._buf[0] == 'z');
170         assert(tmp._buf[1] == '\0');
171     }
172 
173     {
174         auto tmp = "baz".tempCString!dchar;
175         assert(tmp._buf[0..3] == "baz"d);
176     }
177 }
178 
179 @("tempCString - static array")
180 nothrow @nogc @trusted unittest
181 {
182     import core.stdc.string : strlen;
183 
184     immutable(char)[3] str = "abc";
185     assert(strlen(str.tempCString()) == 3);
186 }
187 
188 /**
189  * Refcounted string implementation.
190  *
191  * It uses malloc for string buffer.
192  *
193  * Types with `RC` prefix are reference counted, so they can be moved around freely.
194  * Types without `RC` prefix has disabled copy constructor and can be only moved (passing ownership) or cloned.
195  *
196  * There are wariants with `W` and `D` before `String` that corresponds to payloads `wchar` and `dchar` as usual.
197  *
198  * Types that ends with `Z` means that they internally manages trailing '\0' and so can be safely used with C interop.
199  *
200  * NOTE: Beware of using exposed data pointer stored before some more content is added to RCString as internal buffer can be reallocated / resized if needed.
201  */
202 alias RCString = StringImpl!(char, RC.yes, Zero.no);
203 
204 /// ditto
205 alias RCWString = StringImpl!(wchar, RC.yes, Zero.no);
206 
207 /// ditto
208 alias RCDString = StringImpl!(dchar, RC.yes, Zero.no);
209 
210 /// ditto
211 alias RCStringZ = StringImpl!(char, RC.yes, Zero.yes);
212 
213 /// ditto
214 alias RCWStringZ = StringImpl!(wchar, RC.yes, Zero.yes);
215 
216 /// ditto
217 alias RCDStringZ = StringImpl!(dchar, RC.yes, Zero.yes);
218 
219 /**
220  * String with unique ownership implementation.
221  *
222  * Similar to RCString but can be only moved passing it's ownership.
223  * Furthermore it uses 512B stack allocated buffer for short strings.
224  */
225 alias String = StringImpl!(char, RC.no, Zero.no);
226 
227 /// ditto
228 alias WString = StringImpl!(wchar, RC.no, Zero.no);
229 
230 /// ditto
231 alias DString = StringImpl!(dchar, RC.no, Zero.no);
232 
233 /// ditto
234 alias StringZ = StringImpl!(char, RC.no, Zero.yes);
235 
236 /// ditto
237 alias WStringZ = StringImpl!(wchar, RC.no, Zero.yes);
238 
239 /// ditto
240 alias DStringZ = StringImpl!(dchar, RC.no, Zero.yes);
241 
242 private enum RC { no, yes }
243 private enum Zero { no, yes }
244 
245 private struct StringImpl(C, RC rc, Zero zero)
246 {
247     @safe nothrow @nogc:
248 
249     static if (zero) enum Z = 1;
250     else enum Z = 0;
251 
252     static if (rc)
253     {
254         private
255         {
256             struct Payload
257             {
258                 size_t refs;
259                 size_t len;
260                 C[] buf;
261 
262                 ~this() @trusted pure nothrow @nogc
263                 {
264                     import core.memory : pureFree;
265                     if (buf) pureFree(buf.ptr);
266                 }
267             }
268 
269             Payload* pay;
270         }
271 
272         /// Copy constructor
273         this(ref return scope inout StringImpl rhs) pure @safe inout
274         {
275             pay = rhs.pay;
276             if (pay) () @trusted { (cast(Payload*)pay).refs++; }();
277         }
278 
279         /// Destructor
280         ~this()
281         {
282             if (pay && --pay.refs == 0) heapDealloc(pay);
283         }
284     }
285     else
286     {
287         private
288         {
289             enum STACK_LEN = 512;
290             size_t len;
291             C[STACK_LEN] stackBuf;
292             C[] buf;
293             bool useStackBuf;
294             alias pay = typeof(this); // to access fields through pay.xx too
295         }
296 
297         ~this() pure @trusted
298         {
299             import core.memory : pureFree;
300             if (buf) pureFree(buf.ptr);
301         }
302 
303         @disable this(this);
304 
305         // constructor used by move
306         private this(C[] sbuf, C[] buf, size_t len)
307         {
308             this.stackBuf[0..sbuf.length] = sbuf[];
309             this.buf = buf;
310             this.len = len;
311         }
312 
313         StringImpl move() scope @trusted
314         {
315             import std.algorithm : min;
316             auto obuf = buf;
317             auto olen = len;
318             buf = null;
319             len = 0;
320             return StringImpl(stackBuf[0..min(STACK_LEN, olen)], obuf, olen);
321         }
322 
323         ///
324         StringImpl clone() scope @trusted
325         {
326             return StringImpl(this[]);
327         }
328     }
329 
330     /**
331      * Constructor for cases when we know prior to the creation total length of the future string.
332      * It preallocates internal buffer with `initialSize`.
333      */
334     this(size_t initialSize) pure
335     {
336         static if (rc) pay = heapAlloc!Payload(1, 0);
337         immutable len = initialSize + Z;
338         static if (!rc) {
339             if (len <= STACK_LEN) return; // we can use stack buffer for that
340         }
341         pay.buf = () @trusted { return (cast(C*)enforceMalloc(len * C.sizeof))[0..len]; }();
342     }
343 
344     this(S)(auto ref scope S str)
345     {
346         put(str);
347     }
348 
349     /**
350      * Creates RCString from the provided arguments formated to string with nogcFormatter
351      */
352     static StringImpl from(ARGS...)(auto ref ARGS args)
353     {
354         import bc.string.format : getFormatSize, nogcFormatTo;
355 
356         size_t total;
357         // calculate total size needed so we don't have to reallocate
358         static foreach (a; args) total += getFormatSize(a);
359 
360         // and format arguments to RCString
361         auto ret = StringImpl(total);
362         static foreach (a; args) ret.nogcFormatTo(a);
363         return ret;
364     }
365 
366     alias data this;
367 
368     /**
369      * Access internal string including the reserved block if any.
370      */
371     @property inout(C)[] data() pure inout
372     {
373         if (!length) return null;
374 
375         static if (!rc) {
376             if (len + Z <= STACK_LEN) return stackBuf[0..len];
377         }
378 
379         assert(pay.buf);
380         return pay.buf[0..pay.len];
381     }
382 
383     static if (zero)
384     {
385         /// Pointer to string data that can be directly used in a C functions expecting '\0' terminal char.
386         @property inout(C*) ptr() pure inout @trusted
387         {
388             if (!length) return null;
389             static  if (!rc) {
390                 if (len + Z <= STACK_LEN) return stackBuf.ptr;
391             }
392             return pay.buf.ptr;
393         }
394     }
395 
396     /// Slicing support for the internal buffer data
397     @property inout(C)[] opSlice() pure inout
398     {
399         return this.data;
400     }
401 
402     /// ditto
403     @property inout(C)[] opSlice(size_t start, size_t end) pure inout
404     {
405         if (start > length || end > length) assert(0, "Index out of bounds");
406         if (start > end) assert(0, "Invalid slice indexes");
407         return this.data[start .. end];
408     }
409 
410     /// Indexed access to the buffer data
411     @property ref C opIndex(size_t idx) pure return
412     {
413         if (idx >= length) assert(0, "Index out of bounds");
414         return this.data[idx];
415     }
416 
417     /// opDollar implementation
418     alias length opDollar;
419 
420     /// Managed string length
421     @property size_t length() pure const
422     {
423         static if (rc)
424             return pay ? pay.len : 0;
425         else
426             return len;
427     }
428 
429     /// Returns: capacity that can be used without reallocation
430     size_t capacity() pure const
431     {
432         static if (rc)
433             return pay ? (pay.buf.length - pay.len - Z) : 0;
434         else
435             return (buf ? buf.length : STACK_LEN) - pay.len - Z;
436     }
437 
438     /**
439      * Reserves space for requested number of characters that also increments string length.
440      * This can be used for example in cases when we need to fill slice of string with some known length data.
441      * To return reserved data, use `dropBack`.
442      */
443     void reserve(size_t sz)
444     {
445         ensureAvail(sz);
446         pay.len += sz;
447     }
448 
449     /**
450      * Drops defined amount of characters from the back.
451      */
452     void dropBack(size_t sz)
453     {
454         assert(length >= sz, "Not enough data");
455         if (!sz) return;
456 
457         static if (!rc)
458         {
459             if (len + Z > STACK_LEN && len + Z - sz <= STACK_LEN)
460             {
461                 // switch from heap buffer back to stack one
462                 len -= sz;
463                 stackBuf[0..len] = buf[0..len];
464                 static if (zero) stackBuf[len] = 0;
465                 return;
466             }
467         }
468         pay.len -= sz;
469         static if (zero) pay.buf[pay.len] = 0;
470     }
471 
472     /**
473      * Clears content of the data, but keeps internal buffer as is so it can be used to build another string.
474      */
475     void clear() pure
476     {
477         static if (rc) {
478             if (pay) pay.len = 0;
479         }
480         else len = 0;
481     }
482 
483     alias opOpAssign(string op : "~") = put;
484 
485     void opAssign(S)(auto ref scope S str)
486     if (isAcceptableString!S || is(Unqual!S == C))
487     {
488         clear();
489         put(str);
490     }
491 
492     void put(in C val) pure
493     {
494         static if (!rc)
495         {
496             if (len + 1 + Z <= STACK_LEN)
497             {
498                 stackBuf[len++] = val;
499                 static if (zero) stackBuf[len] = 0;
500                 return;
501             }
502         }
503         ensureAvail(1);
504         pay.buf[pay.len++] = val;
505         static if (zero) pay.buf[pay.len] = 0;
506     }
507 
508     void put(S)(auto ref scope S str) if (isAcceptableString!S)
509     {
510         alias CF = Unqual!(ElementEncodingType!S);
511 
512         static if (C.sizeof == CF.sizeof && is(typeof(pay.buf[0 .. str.length] = str[])))
513         {
514             static if (!rc)
515             {
516                 if (len + str.length + Z <= STACK_LEN)
517                 {
518                     stackBuf[len .. len + str.length] = str[];
519                     len += str.length;
520                     static if (zero) stackBuf[len] = 0;
521                     return;
522                 }
523             }
524 
525             ensureAvail(str.length);
526             pay.buf[pay.len .. pay.len + str.length] = str[];
527             pay.len += str.length;
528             static if (zero) pay.buf[pay.len] = 0;
529         }
530         else
531         {
532             // copy range
533 
534             // special case when we can determine that it still fits to stack buffer
535             static if (!rc && hasLength!S && is(C == CF))
536             {
537                 if (pay.len + Z <= STACK_LEN)
538                 {
539                     foreach (ch; r.byUTF!(Unqual!C))
540                     {
541                         stackBuf[pay.len++] = ch;
542                         static if (zero) stackBuf[pay.dlen] = 0;
543                     }
544                     return;
545                 }
546             }
547 
548             static if (!rc) size_t nlen = pay.len;
549             static if (hasLength!S) {
550                 ensureAvail(str.length);
551                 static if (!rc) nlen += str.length;
552             }
553             import bc.internal.utf : byUTF;
554             static if (isSomeString!S)
555                 auto r = cast(const(CF)[])str;  // because inout(CF) causes problems with byUTF
556             else
557                 alias r = str;
558 
559             foreach (ch; r.byUTF!(Unqual!C))
560             {
561                 static if (!hasLength!S || !is(C == CF))
562                 {
563                     ensureAvail(1);
564                     static if (!rc) {
565                         static if (!hasLength!S) nlen++;
566                         else {
567                             if (pay.len == nlen) nlen++;
568                         }
569                     }
570                 }
571                 static if (!rc)
572                 {
573                     if (nlen + Z + 1 <= STACK_LEN) // we can still use stack buffer
574                     {
575                         stackBuf[len++] = ch;
576                         continue;
577                     }
578                 }
579                 pay.buf[pay.len++] = ch;
580             }
581             static if (zero) pay.buf[pay.len] = 0;
582             static if (!rc) assert(nlen == pay.len);
583         }
584     }
585 
586     private void ensureAvail(size_t sz) pure
587     {
588         static if (__VERSION__ >= 2094) pragma(inline, true);
589         else pragma(inline);
590         import core.bitop : bsr;
591         import std.algorithm : max, min;
592 
593         static if (rc)
594         {
595             if (!pay)
596             {
597                 // allocate new payload with required size
598                 pay = heapAlloc!Payload(1, 0);
599                 immutable l = max(sz+Z, 64); // allocates at leas 64B
600                 pay.buf = () @trusted { return (cast(C*)enforceMalloc(l * C.sizeof))[0..l]; }();
601                 return;
602             }
603 
604             if (pay.len + sz + Z <= pay.buf.length) return; // we can fit in what we've already allocated
605         }
606         else
607         {
608             if (len + sz + Z <= STACK_LEN) return; // still fits to stack buffer
609             if (buf is null)
610             {
611                 immutable l = max(len + sz + Z, STACK_LEN + 64); // allocates at leas 64B over
612                 buf = () @trusted { return (cast(C*)enforceMalloc(l * C.sizeof))[0..l]; }();
613                 buf[0..len] = stackBuf[0..len]; // copy data from stack buffer,  we'll use heap allocated one from now
614                 return;
615             }
616             if (len + Z <= STACK_LEN)
617             {
618                 // some buffer is already preallocated, but we're still on stackBuffer and need to move to heap allocated one
619                 assert(buf.length > STACK_LEN);
620                 buf[0..len] = stackBuf[0..len]; // copy current data from the stack
621             }
622 
623             if (len + sz + Z <= buf.length) return; // we can fit in what we've already allocated
624         }
625 
626         // reallocate buffer
627         // Note: new length calculation taken from std.array.appenderNewCapacity
628         immutable ulong mult = 100 + (1000UL) / (bsr((pay.len + sz + Z)) + 1);
629         immutable l = cast(size_t)(((pay.len + sz + Z) * min(mult, 200) + 99) / 100);
630         // debug printf("realloc %lu -> %lu\n", pay.len, l);
631         pay.buf = () @trusted { return (cast(C*)enforceRealloc(pay.buf.ptr, l * C.sizeof))[0..l]; }();
632     }
633 }
634 
635 auto rcString(C = char, S)(auto ref S str)
636 {
637     StringImpl!(C, RC.yes, Zero.no) ret;
638     ret.put(str);
639     return ret;
640 }
641 
642 @("RCString")
643 @system @nogc unittest
644 {
645     import bc.internal.utf : byCodeUnit;
646     import std.algorithm : filter;
647 
648     RCStringZ s;
649     s ~= "fo";
650     assert(s.pay.len == 2);
651     assert(s.pay.buf.length >= 3);
652 
653     s ~= 'o';
654     assert(s.pay.len == 3);
655     assert(s.pay.buf.length >= 4);
656 
657     s ~= "bar";
658     assert(s.pay.len == 6);
659     assert(s.pay.buf.length >= 7);
660     assert(s == "foobar");
661 
662     s ~= "baz".byCodeUnit.filter!(a => a == 'z');
663     assert(s.length == "foobarz".length);
664     assert(s.data == "foobarz");
665     assert(s == "foobarz");
666     assert(s.ptr == &s.data[0]);
667     assert((s.ptr["foobarz".length]) == 0);
668 }
669 
670 @("RCString.from")
671 @nogc @safe unittest
672 {
673     {
674         auto str = RCString.from("foo", 42, "bar");
675         assert(str == "foo42bar");
676     }
677 
678     {
679         auto str = RCWString.from("foo");
680         assert(str == "foo"w);
681     }
682 }
683 
684 version (D_Exceptions)
685 {
686     @("RCString with Nullable")
687     @nogc @safe unittest
688     {
689         import std.typecons : Nullable;
690         Nullable!RCString sn = RCString("foo");
691     }
692 }
693 
694 @("rcString")
695 @nogc @safe unittest
696 {
697     auto str = "foo".rcString();
698     assert(str == "foo");
699 }
700 
701 @("String")
702 @nogc @safe unittest
703 {
704     auto s = String("Hello");
705     assert(s.capacity == String.stackBuf.length - 5);
706     assert(s[] == "Hello");
707     s ~= " String";
708     assert(s[] == "Hello String");
709     auto s2 = s.clone();
710     assert(s[] == s2[]);
711     () @trusted { assert(s.ptr != s2.ptr); }();
712 
713     auto s3 = s.move();
714     assert(s.buf is null);
715     assert(s.len == 0);
716     assert(s3 == "Hello String");
717 }
718 
719 @("String - put static array")
720 @nogc @safe unittest
721 {
722     String s;
723     immutable(char)[3] foo = "foo";
724     s ~= foo;
725     assert(s == "foo");
726 }
727 
728 @("String stack to heap")
729 @nogc @safe unittest
730 {
731     import std.algorithm : each;
732     import std.range : repeat;
733 
734     StringZ s;
735     'a'.repeat(s.stackBuf.length-1).each!(c => s.put(c));
736     assert(s.length == s.stackBuf.length-1);
737     assert(s.stackBuf[$-2] == 'a');
738     assert(s.stackBuf[$-1] == '\0');
739     assert(s.buf is null);
740     assert(&s.data[0] == &s.stackBuf[0]);
741     s ~= 'b';
742     assert(s.stackBuf[$-1] == '\0'); // doesn't change on stack to heap switch
743     assert(s.buf !is null);
744     assert(&s.data[0] == &s.buf[0]);
745     assert(s.buf[s.stackBuf.length-1] == 'b');
746     s ~= "foo";
747 
748     s.clear();
749     s ~= 'c';
750     assert(&s.data[0] == &s.stackBuf[0]); // back to stack usage
751     assert(s.buf !is null); // but heap buffer is still there
752     'd'.repeat(s.stackBuf.length).each!(c => s.put(c));
753     assert(&s.data[0] == &s.buf[0]);
754     assert(s.length == 1 + s.stackBuf.length);
755     assert(s.buf[1 + s.stackBuf.length] == '\0');
756 }
757 
758 @("String reserve")
759 @nogc @safe unittest
760 {
761     String buf;
762     assert(buf.length == 0);
763     assert(buf.capacity == buf.stackBuf.length);
764     buf.reserve(64);
765     assert(buf.length == 64);
766     assert(buf.buf is null);
767     buf[][0..3] = "foo";
768     buf.dropBack(61);
769     assert(buf[] == "foo");
770     buf.reserve(buf.stackBuf.length);
771     assert(buf.buf !is null);
772     assert(buf.buf[0..3] == "foo");
773     buf.buf[0..3] = "bar";
774     buf.dropBack(buf.stackBuf.length);
775     assert(buf.buf !is null); // left allocated for reuse
776     assert(buf.stackBuf[0..3] == "bar"); // copy from heap
777 }
778 
779 private C[] trustedRealloc(C)(scope C[] buf, size_t strLength, bool bufIsOnStack)
780     @trusted @nogc pure nothrow
781 {
782     pragma(inline, false);  // because it's rarely called
783 
784     import bc.core.memory : enforceMalloc, enforceRealloc;
785 
786     size_t newlen = buf.length * 3 / 2;
787 
788     if (bufIsOnStack)
789     {
790         if (newlen <= strLength)
791             newlen = strLength + 1; // +1 for terminating 0
792         auto ptr = cast(C*) enforceMalloc(newlen * C.sizeof);
793         ptr[0 .. buf.length] = buf[];
794         return ptr[0 .. newlen];
795     }
796     else
797     {
798         if (buf.length >= size_t.max / (2 * C.sizeof))
799         {
800             version (D_Exceptions)
801             {
802                 import core.exception : onOutOfMemoryError;
803                 onOutOfMemoryError();
804             }
805             else assert(0, "Memory allocation failed");
806         }
807         auto ptr = cast(C*) enforceRealloc(buf.ptr, newlen * C.sizeof);
808         return ptr[0 .. newlen];
809     }
810 }
811 
812 /// Strips leading whitespace ('\t', '\n', '\r', ' ')
813 S stripLeft(S)(S str)
814 {
815     pragma(inline, true);
816     /// All chars except for whitespace ('\t', '\n', '\r', ' ')
817     enum AllExceptWhitespaceRanges = "\0\10\13\14\16\37\41\377";
818 
819     size_t rpos;
820     immutable rs = parseToken!(AllExceptWhitespaceRanges, '"')(str, rpos);
821     if(rs == -1) return str[$..$]; // Only whitespace string, return empty range.
822     else return str[rpos..$];
823 }
824 
825 @("stripLeft")
826 unittest
827 {
828     assert(stripLeft("\t\n\r foobar\t\n\r ") == "foobar\t\n\r ");
829     assert(stripLeft("\t\n\r\t\n\r ") == "");
830 }
831 
832 bool startsWith(S, char[] chars)(S str)
833 {
834     enum validCharMap = buildValidCharMap(chars, true);
835     return validCharMap[str[0]];
836 }
837 
838 @("startsWith")
839 unittest
840 {
841     assert(startsWith!(string, ['+', '-'])("-42"));
842     assert(startsWith!(string, ['+', '-'])("+42"));
843     assert(!startsWith!(string, ['+', '-'])("42"));
844 }
845 
846 /**
847  * Alternative implementation of `std.string.outdent` that differs in:
848  *
849  *   * meant for dedent string literals in CT
850  *   * if first line is not indented, other lines are dedented still (std.string.outdent returns original text in that case)
851  *   * empty lines at the text start are removed
852  */
853 template dedent(alias str)
854 {
855     static S getLine(S)(S str)
856     {
857         if (!str.length) return null;
858         for (size_t i = 0; i < str.length; ++i)
859         {
860             if (str[i] == '\r')
861             {
862                 if (i+1 < str.length && str[i+1] == '\n')
863                     return str[0..i+2];
864             }
865             if (str[i] == '\n') return str[0..i+1];
866         }
867         return str;
868     }
869 
870     // strip line whitespace but keep newline characters
871     static S stripWS(S)(S str)
872     {
873         if (!str.length) return null;
874         for (size_t i = 0; i < str.length; ++i)
875         {
876             if (str[i] <= ' ' && str[i] != '\r' && str[i] != '\n') continue;
877             return str[i..$];
878         }
879         return null;
880     }
881 
882     template shortestIndent(alias str, size_t prev = size_t.max)
883     {
884         enum line = getLine(str);
885         enum stripped = stripWS(line);
886         static if (line.length == 0) enum shortestIndent = prev;
887         else static if (line.length == stripped.length) enum shortestIndent = 0;
888         else
889         {
890             enum cur = prev > line.length - stripped.length ? line.length - stripped.length : prev;
891             enum next = shortestIndent!(str[line.length..$], cur);
892             enum shortestIndent = cur > next ? next : cur;
893         }
894     }
895 
896     template dedentNext(alias str, size_t indent)
897     {
898         enum ln = getLine(str);
899         static if (!ln.length)
900             enum dedentNext = null;
901         else static if (ln.length < indent)
902             enum dedentNext = ln ~ dedentNext!(str[ln.length..$], indent);
903         else
904             enum dedentNext = ln[indent..$] ~ dedentNext!(str[ln.length..$], indent);
905     }
906 
907     enum line = getLine(str);
908     enum stripped = stripWS(line);
909 
910     static if (!line.length) enum dedent = null;
911     else static if (
912             (stripped.length == 1 && stripped[0] == '\n')
913             || (stripped.length == 2 && stripped[0] == '\r' && stripped[1] == '\n'))
914         enum dedent = dedent!(str[line.length..$]); // drop first empty lines
915     else
916     {
917         // ignore no indentation of the first line
918         enum shortest = shortestIndent!(
919             str[line.length..$],
920             stripped.length == line.length ? size_t.max : (line.length - stripped.length));
921 
922         static if (shortest == 0)
923             enum dedent = str; // no indent used
924         else
925             enum dedent = stripped ~ dedentNext!(str[line.length..$], shortest);
926     }
927 }
928 
929 @("dedent")
930 unittest
931 {
932     // with empty first line
933     {
934         enum str1 = `
935                 DELETE FROM elements.element
936                 WHERE id=ANY($1) AND type_id IN (
937                     SELECT id FROM elements.element_type WHERE owner=$2
938                 )`;
939 
940         enum str2 =
941                     "DELETE FROM elements.element\n" ~
942                     "WHERE id=ANY($1) AND type_id IN (\n" ~
943                     "    SELECT id FROM elements.element_type WHERE owner=$2\n" ~
944                     ")";
945 
946         static assert(dedent!str1 == str2);
947     }
948 
949     // with not indented first line
950     {
951         enum str1 = `DELETE FROM elements.element
952                 WHERE id=ANY($1) AND type_id IN (
953                     SELECT id FROM elements.element_type WHERE owner=$2
954                 )`;
955 
956         enum str2 = "DELETE FROM elements.element\n" ~
957                     "WHERE id=ANY($1) AND type_id IN (\n" ~
958                     "    SELECT id FROM elements.element_type WHERE owner=$2\n" ~
959                     ")";
960 
961         static assert(dedent!str1 == str2);
962     }
963 
964     // test that we didn't touch number of lines
965     {
966         static assert(dedent!`
967             2
968             3
969             ` == "2\n3\n"); // first line is dropped, last newline is kept
970     }
971 
972     // test we don't dedent when some line is not indented
973     {
974         enum str = `aa
975             bb
976 cc`;
977         assert(dedent!str == str);
978     }
979 
980     // test that we don't touch space after last line text
981     {
982         assert(dedent!"  foo " == "foo ");
983         assert(dedent!`foo
984             bar ` == "foo\nbar ");
985     }
986 }
987 
988 /**
989  * Builds char map from the provided ranges.
990  *
991  * Params:
992  *      ranges = ranges of ascii characters.
993  *      valid = wheteher range characters are valid or not.
994  *              For example:
995  *              buildValidCharMap("\0/:\xff", false)   means that only characters 0-9 would have true in the generated map.
996  *              buildValidCharMap("\0/:\xff", true)    means that all characters except 0-9 would have true in the generated map.
997  *
998  * Returns: generated table
999  */
1000 bool[256] buildValidCharMap(S)(S ranges, bool valid = false)
1001 {
1002     assert(ranges.length % 2 == 0, "Uneven ranges");
1003     bool[256] res = valid ? false : true;
1004 
1005     for (int i=0; i < ranges.length; i+=2)
1006         for (int j=ranges[i]; j <= ranges[i+1]; ++j)
1007             res[j] = valid ? true : false;
1008     return res;
1009 }
1010 
1011 ///
1012 @("buildValidCharMap")
1013 @safe unittest
1014 {
1015     string ranges = "\0 \"\"(),,//:@[]{{}}\x7f\xff";
1016     assert(buildValidCharMap(ranges, false) ==
1017         cast(bool[])[
1018             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1019             0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
1020             0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,
1021             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,
1022             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1023             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1024             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1025             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
1026         ]);
1027 
1028     assert(buildValidCharMap(ranges, true) ==
1029         cast(bool[])[
1030             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1031             1,0,1,0,0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,0,0,0,0,0,0,0,1,1,1,1,1,1,
1032             1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,1,1,0,0,
1033             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1,0,1,0,1,
1034             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1035             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1036             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1037             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
1038         ]);
1039 }
1040 
1041 /*
1042  * Advances index over the token to the next character while checking for valid characters.
1043  * On success, buffer index is left on the next character.
1044  *
1045  * Params:
1046  *   - ranges = ranges of characters to stop on
1047  *   - next  = next character/s to stop on (must be present in the provided ranges too)
1048  *   - sseRanges =
1049  *         as SSE optimized path is limited to 8 pairs, here one can provide merged ranges for a fast
1050  *         SSE path that would be precised with `ranges`. Otherwise `ranges` is used for SSE path too.
1051  *
1052  * Returns:
1053  *     * 0 on success
1054  *     * -1 when token hasn't been found (ie not enough data in the buffer)
1055  *     * -2 when character from invalid ranges was found but not matching one of next characters (ie invalid token)
1056  */
1057 int parseToken(string ranges, alias next, string sseRanges = null, C)(const(C)[] buffer, ref size_t i) pure
1058     if (is(C == ubyte) || is(C == char))
1059 {
1060     version (DigitalMars) {
1061         static if (__VERSION__ >= 2094) pragma(inline, true); // older compilers can't inline this
1062     } else pragma(inline, true);
1063 
1064     immutable charMap = parseTokenCharMap!(ranges)();
1065 
1066     static if (LDC_with_SSE42)
1067     {
1068         // CT function to prepare input for SIMD vector enum
1069         static byte[16] padRanges()(string ranges)
1070         {
1071             byte[16] res;
1072             // res[0..ranges.length] = cast(byte[])ranges[]; - broken on macOS betterC tests
1073             foreach (i, c; ranges) res[i] = cast(byte)c;
1074             return res;
1075         }
1076 
1077         static if (sseRanges) alias usedRng = sseRanges;
1078         else alias usedRng = ranges;
1079         static assert(usedRng.length <= 16, "Ranges must be at most 16 characters long");
1080         static assert(usedRng.length % 2 == 0, "Ranges must have even number of characters");
1081         enum rangesSize = usedRng.length;
1082         enum byte16 rngE = padRanges(usedRng);
1083 
1084         if (_expect(buffer.length - i >= 16, true))
1085         {
1086             size_t left = (buffer.length - i) & ~15; // round down to multiple of 16
1087             byte16 ranges16 = rngE;
1088 
1089             do
1090             {
1091                 byte16 b16 = () @trusted { return cast(byte16)_mm_loadu_si128(cast(__m128i*)&buffer[i]); }();
1092                 immutable r = _mm_cmpestri(
1093                     ranges16, rangesSize,
1094                     b16, 16,
1095                     _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS
1096                 );
1097 
1098                 if (r != 16)
1099                 {
1100                     i += r;
1101                     goto FOUND;
1102                 }
1103                 i += 16;
1104                 left -= 16;
1105             }
1106             while (_expect(left != 0, true));
1107         }
1108     }
1109     else
1110     {
1111         // faster unrolled loop to iterate over 8 characters
1112         loop: while (_expect(buffer.length - i >= 8, true))
1113         {
1114             static foreach (_; 0..8)
1115             {
1116                 if (_expect(!charMap[buffer[i]], false)) goto FOUND;
1117                 ++i;
1118             }
1119         }
1120     }
1121 
1122     // handle the rest
1123     if (_expect(i >= buffer.length, false)) return -1;
1124 
1125     FOUND:
1126     while (true)
1127     {
1128         static if (is(typeof(next) == char)) {
1129             static assert(!charMap[next], "Next character is not in ranges");
1130             if (buffer[i] == next) return 0;
1131         } else {
1132             static assert(next.length > 0, "Next character not provided");
1133             static foreach (c; next) {
1134                 static assert(!charMap[c], "Next character is not in ranges");
1135                 if (buffer[i] == c) return 0;
1136             }
1137         }
1138         if (_expect(!charMap[buffer[i]], false)) return -2;
1139         if (_expect(++i == buffer.length, false)) return -1;
1140     }
1141 }
1142 
1143 ///
1144 @("parseToken")
1145 @safe unittest
1146 {
1147     size_t idx;
1148     string buf = "foo\nbar";
1149     auto ret = parseToken!("\0\037\177\377", "\r\n")(buf, idx);
1150     assert(ret == 0); // no error
1151     assert(idx == 3); // index of newline character
1152 
1153     idx = 0;
1154     ret = parseToken!("\0\037\177\377", "\r\n")(buf[0..3], idx);
1155     assert(ret == -1); // not enough data to find next character
1156     assert(idx == 3);
1157 
1158     idx = 0;
1159     buf = "foo\t\nbar";
1160     ret = parseToken!("\0\037\177\377", "\r\n")(buf, idx);
1161     assert(ret == -2); // invalid character '\t' found in token
1162     assert(idx == 3); // invalid character on index 3
1163 }
1164 
1165 private immutable(bool[256]) parseTokenCharMap(string invalidRanges)() {
1166     static immutable charMap = buildValidCharMap(invalidRanges);
1167     return charMap;
1168 }