1 /**
2  * Some helper functions to work with strings
3  */
4 module bc..string.string;
5 
6 import bc.core.intrinsics;
7 import bc.core.memory : enforceMalloc, enforceRealloc, heapAlloc, heapDealloc;
8 import core.atomic : atomicOp;
9 import std.range : ElementEncodingType, hasLength, isInputRange;
10 import std.traits : ForeachType, isSomeChar, isSomeString, isStaticArray, Unqual;
11 // debug import core.stdc.stdio;
12 
13 nothrow @nogc:
14 
15 alias CString = const(char)[];
16 
17 template isAcceptableString(S)
18 {
19     enum isAcceptableString =
20         (isInputRange!S || isSomeString!S || isStaticArray!S) &&
21         isSomeChar!(ElementEncodingType!S);
22 }
23 
24 /**
25  * Temporary string buffer.
26  * It can be used to build temporary \0 ended C strings.
27  * For lengths < 255, it uses static char array, mallocated buffer otherwise.
28  *
29  * NOTE: be careful that pointer becomes invalid as soon as the struct comes out of scope!
30  * NOTE: inspired by std.internal.cstring.TempCStringBuffer in Phobos library
31  */
32 struct TempCString(C)
33 {
34     @trusted pure nothrow @nogc:
35 
36     @disable this();
37     @disable this(this);
38     alias ptr this;
39 
40     @property inout(C)* bufPtr() inout
41     {
42         return _ptr == useStack ? _buf.ptr : _ptr;
43     }
44 
45     @property const(C)* ptr() const { return bufPtr; }
46     const(C)[] opIndex() const pure { return bufPtr[0 .. _length]; }
47 
48     ~this()
49     {
50         if (_ptr != useStack)
51         {
52             import core.memory : pureFree;
53             pureFree(_ptr);
54         }
55     }
56 
57     private:
58     C* _ptr;
59     size_t _length;
60     C[256] _buf;
61 
62     enum C* useStack = () @trusted { return cast(C*)size_t.max; }();
63     static TempCString initialize() { TempCString res = void; return res; }
64 }
65 
66 /// ditto
67 auto tempCString(C = char, S)(scope S str) if (isAcceptableString!S)
68 {
69     alias CF = Unqual!(ElementEncodingType!S);
70     auto res = TempCString!C.initialize();
71 
72     static if (isSomeString!S)
73     {
74         if (str is null)
75         {
76             res._length = 0;
77             res._ptr = null;
78             return res;
79         }
80     }
81 
82     static if (C.sizeof == CF.sizeof && is(typeof(res._buf[0 .. str.length] = str[])))
83     {
84         if (str.length < res._buf.length)
85         {
86             res._buf[0..str.length] = str[];
87             res._buf[str.length] = 0;
88             res._ptr = res.useStack;
89         }
90         else
91         {
92             res._ptr = () @trusted {
93                 auto p = cast(C*)enforceMalloc((str.length + 1) * C.sizeof);
94                 p[0 .. str.length] = str[];
95                 p[str.length] = 0;
96                 return cast(C*)p;
97             }();
98         }
99         res._length = str.length;
100         return res;
101     }
102     else
103     {
104         static assert(!(isSomeString!S && CF.sizeof == C.sizeof), "Should be using slice assignment.");
105         C[] p = res._buf;
106         size_t i;
107 
108         size_t strLength;
109         static if (hasLength!S) strLength = str.length;
110 
111         import bc.internal.utf : byUTF;
112         static if (isSomeString!S)
113             auto r = cast(const(CF)[])str;  // because inout(CF) causes problems with byUTF
114         else
115             alias r = str;
116 
117         C[] heapBuffer;
118         foreach (const c; r.byUTF!(Unqual!C))
119         {
120             if (i + 1 == p.length)
121             {
122                 heapBuffer = trustedRealloc(p, strLength, heapBuffer is null);
123                 p = heapBuffer;
124             }
125             p[i++] = c;
126         }
127         p[i] = 0;
128         res._length = i;
129         res._ptr = (heapBuffer is null ? res.useStack : &heapBuffer[0]);
130         return res;
131     }
132 }
133 
134 ///
135 @("tempCString")
136 nothrow @nogc @system unittest
137 {
138     import core.stdc..string : strlen;
139 
140     string str = "abc";
141 
142     // Intended usage
143     assert(strlen(str.tempCString()) == 3);
144 
145     // Correct usage
146     auto tmp = str.tempCString();
147     assert(strlen(tmp) == 3); // or `tmp.ptr`, or `tmp.buffPtr`
148 
149     // $(RED WARNING): $(RED Incorrect usage)
150     auto pInvalid1 = str.tempCString().ptr;
151     const char* pInvalid2 = str.tempCString();
152 }
153 
154 @("tempCString - char, wchar, dchar")
155 nothrow @nogc @trusted unittest
156 {
157     import std.algorithm : filter;
158     import bc.internal.utf : byCodeUnit;
159 
160     {
161         auto tmp = "baz".byCodeUnit.filter!(a => a == 'z').tempCString;
162         assert(tmp._length == 1);
163         assert(tmp._buf[0] == 'z');
164         assert(tmp._buf[1] == '\0');
165     }
166 
167     {
168         auto tmp = "baz".byCodeUnit.filter!(a => a == 'z').tempCString!wchar;
169         assert(tmp._length == 1);
170         assert(tmp._buf[0] == 'z');
171         assert(tmp._buf[1] == '\0');
172     }
173 
174     {
175         auto tmp = "baz".tempCString!dchar;
176         assert(tmp._buf[0..3] == "baz"d);
177     }
178 }
179 
180 @("tempCString - static array")
181 nothrow @nogc @trusted unittest
182 {
183     import core.stdc..string : strlen;
184 
185     immutable(char)[3] str = "abc";
186     assert(strlen(str.tempCString()) == 3);
187 }
188 
189 /**
190  * Refcounted string implementation.
191  *
192  * It uses malloc for string buffer.
193  *
194  * Types with `RC` prefix are reference counted, so they can be moved around freely.
195  * Types without `RC` prefix has disabled copy constructor and can be only moved (passing ownership) or cloned.
196  *
197  * There are wariants with `W` and `D` before `String` that corresponds to payloads `wchar` and `dchar` as usual.
198  *
199  * Types that ends with `Z` means that they internally manages trailing '\0' and so can be safely used with C interop.
200  *
201  * NOTE: Beware of using exposed data pointer stored before some more content is added to RCString as internal buffer can be reallocated / resized if needed.
202  */
203 alias RCString = StringImpl!(char, RC.yes, Zero.no);
204 
205 /// ditto
206 alias RCWString = StringImpl!(wchar, RC.yes, Zero.no);
207 
208 /// ditto
209 alias RCDString = StringImpl!(dchar, RC.yes, Zero.no);
210 
211 /// ditto
212 alias RCStringZ = StringImpl!(char, RC.yes, Zero.yes);
213 
214 /// ditto
215 alias RCWStringZ = StringImpl!(wchar, RC.yes, Zero.yes);
216 
217 /// ditto
218 alias RCDStringZ = StringImpl!(dchar, RC.yes, Zero.yes);
219 
220 /**
221  * String with unique ownership implementation.
222  *
223  * Similar to RCString but can be only moved passing it's ownership.
224  * Furthermore it uses 512B stack allocated buffer for short strings.
225  */
226 alias String = StringImpl!(char, RC.no, Zero.no);
227 
228 /// ditto
229 alias WString = StringImpl!(wchar, RC.no, Zero.no);
230 
231 /// ditto
232 alias DString = StringImpl!(dchar, RC.no, Zero.no);
233 
234 /// ditto
235 alias StringZ = StringImpl!(char, RC.no, Zero.yes);
236 
237 /// ditto
238 alias WStringZ = StringImpl!(wchar, RC.no, Zero.yes);
239 
240 /// ditto
241 alias DStringZ = StringImpl!(dchar, RC.no, Zero.yes);
242 
243 private enum RC { no, yes }
244 private enum Zero { no, yes }
245 
246 private struct StringImpl(C, RC rc, Zero zero)
247 {
248     @safe nothrow @nogc:
249 
250     static if (zero) enum Z = 1;
251     else enum Z = 0;
252 
253     static if (rc)
254     {
255         private
256         {
257             struct Payload
258             {
259                 shared size_t refs;
260                 size_t len;
261                 C[] buf;
262 
263                 ~this() @trusted pure nothrow @nogc
264                 {
265                     import core.memory : pureFree;
266                     if (buf) pureFree(buf.ptr);
267                 }
268             }
269 
270             Payload* pay;
271         }
272 
273         /// Copy constructor
274         this(ref return scope StringImpl rhs) pure @safe
275         {
276             if (rhs.pay)
277             {
278                 this.pay = rhs.pay;
279                 atomicOp!"+="(this.pay.refs, 1);
280             }
281         }
282 
283         /// Destructor
284         ~this()
285         {
286             if (pay && atomicOp!"-="(pay.refs, 1) == 0) heapDealloc(pay);
287         }
288     }
289     else
290     {
291         private
292         {
293             enum STACK_LEN = 512;
294             size_t len;
295             C[STACK_LEN] stackBuf;
296             C[] buf;
297             bool useStackBuf;
298             alias pay = typeof(this); // to access fields through pay.xx too
299         }
300 
301         ~this() pure @trusted
302         {
303             import core.memory : pureFree;
304             if (buf) pureFree(buf.ptr);
305         }
306 
307         @disable this(this);
308 
309         // constructor used by move
310         private this(C[] sbuf, C[] buf, size_t len)
311         {
312             this.stackBuf[0..sbuf.length] = sbuf[];
313             this.buf = buf;
314             this.len = len;
315         }
316 
317         StringImpl move() scope @trusted
318         {
319             import std.algorithm : min;
320             auto obuf = buf;
321             auto olen = len;
322             buf = null;
323             len = 0;
324             return StringImpl(stackBuf[0..min(STACK_LEN, olen)], obuf, olen);
325         }
326 
327         ///
328         StringImpl clone() scope @trusted
329         {
330             return StringImpl(this[]);
331         }
332     }
333 
334     /**
335      * Constructor for cases when we know prior to the creation total length of the future string.
336      * It preallocates internal buffer with `initialSize`.
337      */
338     this(size_t initialSize) pure
339     {
340         static if (rc) pay = heapAlloc!Payload(1, 0);
341         immutable len = initialSize + Z;
342         static if (!rc) {
343             if (len <= STACK_LEN) return; // we can use stack buffer for that
344         }
345         pay.buf = () @trusted { return (cast(C*)enforceMalloc(len * C.sizeof))[0..len]; }();
346     }
347 
348     this(S)(auto ref scope S str)
349     {
350         put(str);
351     }
352 
353     /**
354      * Creates RCString from the provided arguments formated to string with nogcFormatter
355      */
356     static StringImpl from(ARGS...)(auto ref ARGS args)
357     {
358         import bc..string.format : getFormatSize, nogcFormatTo;
359 
360         size_t total;
361         // calculate total size needed so we don't have to reallocate
362         static foreach (a; args) total += getFormatSize(a);
363 
364         // and format arguments to RCString
365         auto ret = StringImpl(total);
366         static foreach (a; args) ret.nogcFormatTo(a);
367         return ret;
368     }
369 
370     alias data this;
371 
372     /**
373      * Access internal string including the reserved block if any.
374      */
375     @property inout(C)[] data() pure inout
376     {
377         if (!length) return null;
378 
379         static if (!rc) {
380             if (len + Z <= STACK_LEN) return stackBuf[0..len];
381         }
382 
383         assert(pay.buf);
384         return pay.buf[0..pay.len];
385     }
386 
387     static if (zero)
388     {
389         /// Pointer to string data that can be directly used in a C functions expecting '\0' terminal char.
390         @property inout(C*) ptr() pure inout @trusted
391         {
392             if (!length) return null;
393             static  if (!rc) {
394                 if (len + Z <= STACK_LEN) return stackBuf.ptr;
395             }
396             return pay.buf.ptr;
397         }
398     }
399 
400     /// Slicing support for the internal buffer data
401     @property inout(C)[] opSlice() pure inout
402     {
403         return this.data;
404     }
405 
406     /// ditto
407     @property inout(C)[] opSlice(size_t start, size_t end) pure inout
408     {
409         if (start > length || end > length) assert(0, "Index out of bounds");
410         if (start > end) assert(0, "Invalid slice indexes");
411         return this.data[start .. end];
412     }
413 
414     /// Indexed access to the buffer data
415     @property ref C opIndex(size_t idx) pure return
416     {
417         if (idx >= length) assert(0, "Index out of bounds");
418         return this.data[idx];
419     }
420 
421     /// opDollar implementation
422     alias length opDollar;
423 
424     /// Managed string length
425     @property size_t length() pure const
426     {
427         static if (rc)
428             return pay ? pay.len : 0;
429         else
430             return len;
431     }
432 
433     /// Returns: capacity that can be used without reallocation
434     size_t capacity() pure const
435     {
436         static if (rc)
437             return pay ? (pay.buf.length - pay.len - Z) : 0;
438         else
439             return (buf ? buf.length : STACK_LEN) - pay.len - Z;
440     }
441 
442     /**
443      * Reserves space for requested number of characters that also increments string length.
444      * This can be used for example in cases when we need to fill slice of string with some known length data.
445      * To return reserved data, use `dropBack`.
446      */
447     void reserve(size_t sz)
448     {
449         ensureAvail(sz);
450         pay.len += sz;
451     }
452 
453     /**
454      * Drops defined amount of characters from the back.
455      */
456     void dropBack(size_t sz)
457     {
458         assert(length >= sz, "Not enough data");
459         if (!sz) return;
460 
461         static if (!rc)
462         {
463             if (len + Z > STACK_LEN && len + Z - sz <= STACK_LEN)
464             {
465                 // switch from heap buffer back to stack one
466                 len -= sz;
467                 stackBuf[0..len] = buf[0..len];
468                 static if (zero) stackBuf[len] = 0;
469                 return;
470             }
471         }
472         pay.len -= sz;
473         static if (zero) pay.buf[pay.len] = 0;
474     }
475 
476     /**
477      * Clears content of the data, but keeps internal buffer as is so it can be used to build another string.
478      */
479     void clear() pure
480     {
481         static if (rc) {
482             if (pay) pay.len = 0;
483         }
484         else len = 0;
485     }
486 
487     alias opOpAssign(string op : "~") = put;
488 
489     void put(in C val) pure
490     {
491         static if (!rc)
492         {
493             if (len + 1 + Z <= STACK_LEN)
494             {
495                 stackBuf[len++] = val;
496                 static if (zero) stackBuf[len] = 0;
497                 return;
498             }
499         }
500         ensureAvail(1);
501         pay.buf[pay.len++] = val;
502         static if (zero) pay.buf[pay.len] = 0;
503     }
504 
505     void put(S)(auto ref scope S str) if (isAcceptableString!S)
506     {
507         alias CF = Unqual!(ElementEncodingType!S);
508 
509         static if (C.sizeof == CF.sizeof && is(typeof(pay.buf[0 .. str.length] = str[])))
510         {
511             static if (!rc)
512             {
513                 if (len + str.length + Z <= STACK_LEN)
514                 {
515                     stackBuf[len .. len + str.length] = str[];
516                     len += str.length;
517                     static if (zero) stackBuf[len] = 0;
518                     return;
519                 }
520             }
521 
522             ensureAvail(str.length);
523             pay.buf[pay.len .. pay.len + str.length] = str[];
524             pay.len += str.length;
525             static if (zero) pay.buf[pay.len] = 0;
526         }
527         else
528         {
529             // copy range
530 
531             // special case when we can determine that it still fits to stack buffer
532             static if (!rc && hasLength!S && is(C == CF))
533             {
534                 if (pay.len + Z <= STACK_LEN)
535                 {
536                     foreach (ch; r.byUTF!(Unqual!C))
537                     {
538                         stackBuf[pay.len++] = ch;
539                         static if (zero) stackBuf[pay.dlen] = 0;
540                     }
541                     return;
542                 }
543             }
544 
545             static if (!rc) size_t nlen = pay.len;
546             static if (hasLength!S) {
547                 ensureAvail(str.length);
548                 static if (!rc) nlen += str.length;
549             }
550             import bc.internal.utf : byUTF;
551             static if (isSomeString!S)
552                 auto r = cast(const(CF)[])str;  // because inout(CF) causes problems with byUTF
553             else
554                 alias r = str;
555 
556             foreach (ch; r.byUTF!(Unqual!C))
557             {
558                 static if (!hasLength!S || !is(C == CF))
559                 {
560                     ensureAvail(1);
561                     static if (!rc) {
562                         static if (!hasLength!S) nlen++;
563                         else {
564                             if (pay.len == nlen) nlen++;
565                         }
566                     }
567                 }
568                 static if (!rc)
569                 {
570                     if (nlen + Z + 1 <= STACK_LEN) // we can still use stack buffer
571                     {
572                         stackBuf[len++] = ch;
573                         continue;
574                     }
575                 }
576                 pay.buf[pay.len++] = ch;
577             }
578             static if (zero) pay.buf[pay.len] = 0;
579             static if (!rc) assert(nlen == pay.len);
580         }
581     }
582 
583     private void ensureAvail(size_t sz) pure
584     {
585         static if (__VERSION__ >= 2094) pragma(inline, true);
586         else pragma(inline);
587         import core.bitop : bsr;
588         import std.algorithm : max, min;
589 
590         static if (rc)
591         {
592             if (!pay)
593             {
594                 // allocate new payload with required size
595                 pay = heapAlloc!Payload(1, 0);
596                 immutable l = max(sz+Z, 64); // allocates at leas 64B
597                 pay.buf = () @trusted { return (cast(C*)enforceMalloc(l * C.sizeof))[0..l]; }();
598                 return;
599             }
600 
601             if (pay.len + sz + Z <= pay.buf.length) return; // we can fit in what we've already allocated
602         }
603         else
604         {
605             if (len + sz + Z <= STACK_LEN) return; // still fits to stack buffer
606             if (buf is null)
607             {
608                 immutable l = max(len + sz + Z, STACK_LEN + 64); // allocates at leas 64B over
609                 buf = () @trusted { return (cast(C*)enforceMalloc(l * C.sizeof))[0..l]; }();
610                 buf[0..len] = stackBuf[0..len]; // copy data from stack buffer,  we'll use heap allocated one from now
611                 return;
612             }
613             if (len + Z <= STACK_LEN)
614             {
615                 // some buffer is already preallocated, but we're still on stackBuffer and need to move to heap allocated one
616                 assert(buf.length > STACK_LEN);
617                 buf[0..len] = stackBuf[0..len]; // copy current data from the stack
618             }
619 
620             if (len + sz + Z <= buf.length) return; // we can fit in what we've already allocated
621         }
622 
623         // reallocate buffer
624         // Note: new length calculation taken from std.array.appenderNewCapacity
625         immutable ulong mult = 100 + (1000UL) / (bsr((pay.len + sz + Z)) + 1);
626         immutable l = cast(size_t)(((pay.len + sz + Z) * min(mult, 200) + 99) / 100);
627         // debug printf("realloc %lu -> %lu\n", pay.len, l);
628         pay.buf = () @trusted { return (cast(C*)enforceRealloc(pay.buf.ptr, l * C.sizeof))[0..l]; }();
629     }
630 }
631 
632 auto rcString(C = char, S)(auto ref S str)
633 {
634     StringImpl!(C, RC.yes, Zero.no) ret;
635     ret.put(str);
636     return ret;
637 }
638 
639 @("RCString")
640 @system @nogc unittest
641 {
642     import bc.internal.utf : byCodeUnit;
643     import std.algorithm : filter;
644 
645     RCStringZ s;
646     s ~= "fo";
647     assert(s.pay.len == 2);
648     assert(s.pay.buf.length >= 3);
649 
650     s ~= 'o';
651     assert(s.pay.len == 3);
652     assert(s.pay.buf.length >= 4);
653 
654     s ~= "bar";
655     assert(s.pay.len == 6);
656     assert(s.pay.buf.length >= 7);
657     assert(s == "foobar");
658 
659     s ~= "baz".byCodeUnit.filter!(a => a == 'z');
660     assert(s.length == "foobarz".length);
661     assert(s.data == "foobarz");
662     assert(s == "foobarz");
663     assert(s.ptr == &s.data[0]);
664     assert((s.ptr["foobarz".length]) == 0);
665 }
666 
667 @("RCString.from")
668 @nogc @safe unittest
669 {
670     {
671         auto str = RCString.from("foo", 42, "bar");
672         assert(str == "foo42bar");
673     }
674 
675     {
676         auto str = RCWString.from("foo");
677         assert(str == "foo"w);
678     }
679 }
680 
681 @("rcString")
682 @nogc @safe unittest
683 {
684     auto str = "foo".rcString();
685     assert(str == "foo");
686 }
687 
688 @("String")
689 @nogc @safe unittest
690 {
691     auto s = String("Hello");
692     assert(s.capacity == String.stackBuf.length - 5);
693     assert(s[] == "Hello", s[]);
694     s ~= " String";
695     assert(s[] == "Hello String", s[]);
696     auto s2 = s.clone();
697     assert(s[] == s2[]);
698     () @trusted { assert(s.ptr != s2.ptr); }();
699 
700     auto s3 = s.move();
701     assert(s.buf is null);
702     assert(s.len == 0);
703     assert(s3 == "Hello String");
704 }
705 
706 @("String - put static array")
707 @nogc @safe unittest
708 {
709     String s;
710     immutable(char)[3] foo = "foo";
711     s ~= foo;
712     assert(s == "foo");
713 }
714 
715 @("String stack to heap")
716 @nogc @safe unittest
717 {
718     import std.algorithm : each;
719     import std.range : repeat;
720 
721     StringZ s;
722     'a'.repeat(s.stackBuf.length-1).each!(c => s.put(c));
723     assert(s.length == s.stackBuf.length-1);
724     assert(s.stackBuf[$-2] == 'a');
725     assert(s.stackBuf[$-1] == '\0');
726     assert(s.buf is null);
727     assert(&s.data[0] == &s.stackBuf[0]);
728     s ~= 'b';
729     assert(s.stackBuf[$-1] == '\0'); // doesn't change on stack to heap switch
730     assert(s.buf !is null);
731     assert(&s.data[0] == &s.buf[0]);
732     assert(s.buf[s.stackBuf.length-1] == 'b');
733     s ~= "foo";
734 
735     s.clear();
736     s ~= 'c';
737     assert(&s.data[0] == &s.stackBuf[0]); // back to stack usage
738     assert(s.buf !is null); // but heap buffer is still there
739     'd'.repeat(s.stackBuf.length).each!(c => s.put(c));
740     assert(&s.data[0] == &s.buf[0]);
741     assert(s.length == 1 + s.stackBuf.length);
742     assert(s.buf[1 + s.stackBuf.length] == '\0');
743 }
744 
745 @("String reserve")
746 @nogc @safe unittest
747 {
748     String buf;
749     assert(buf.length == 0);
750     assert(buf.capacity == buf.stackBuf.length);
751     buf.reserve(64);
752     assert(buf.length == 64);
753     assert(buf.buf is null);
754     buf[][0..3] = "foo";
755     buf.dropBack(61);
756     assert(buf[] == "foo");
757     buf.reserve(buf.stackBuf.length);
758     assert(buf.buf !is null);
759     assert(buf.buf[0..3] == "foo");
760     buf.buf[0..3] = "bar";
761     buf.dropBack(buf.stackBuf.length);
762     assert(buf.buf !is null); // left allocated for reuse
763     assert(buf.stackBuf[0..3] == "bar"); // copy from heap
764 }
765 
766 private C[] trustedRealloc(C)(scope C[] buf, size_t strLength, bool bufIsOnStack)
767     @trusted @nogc pure nothrow
768 {
769     pragma(inline, false);  // because it's rarely called
770 
771     import bc.core.memory : enforceMalloc, enforceRealloc;
772 
773     size_t newlen = buf.length * 3 / 2;
774 
775     if (bufIsOnStack)
776     {
777         if (newlen <= strLength)
778             newlen = strLength + 1; // +1 for terminating 0
779         auto ptr = cast(C*) enforceMalloc(newlen * C.sizeof);
780         ptr[0 .. buf.length] = buf[];
781         return ptr[0 .. newlen];
782     }
783     else
784     {
785         if (buf.length >= size_t.max / (2 * C.sizeof))
786         {
787             version (D_Exceptions)
788             {
789                 import core.exception : onOutOfMemoryError;
790                 onOutOfMemoryError();
791             }
792             else assert(0, "Memory allocation failed");
793         }
794         auto ptr = cast(C*) enforceRealloc(buf.ptr, newlen * C.sizeof);
795         return ptr[0 .. newlen];
796     }
797 }
798 
799 /**
800  * Alternative implementation of `std.string.outdent` that differs in:
801  *
802  *   * meant for dedent string literals in CT
803  *   * if first line is not indented, other lines are dedented still (std.string.outdent returns original text in that case)
804  *   * empty lines at the text start are removed
805  */
806 template dedent(alias str)
807 {
808     static S getLine(S)(S str)
809     {
810         if (!str.length) return null;
811         for (size_t i = 0; i < str.length; ++i)
812         {
813             if (str[i] == '\r')
814             {
815                 if (i+1 < str.length && str[i+1] == '\n')
816                     return str[0..i+2];
817             }
818             if (str[i] == '\n') return str[0..i+1];
819         }
820         return str;
821     }
822 
823     // strip line whitespace but keep newline characters
824     static S stripWS(S)(S str)
825     {
826         if (!str.length) return null;
827         for (size_t i = 0; i < str.length; ++i)
828         {
829             if (str[i] <= ' ' && str[i] != '\r' && str[i] != '\n') continue;
830             return str[i..$];
831         }
832         return null;
833     }
834 
835     template shortestIndent(alias str, size_t prev = size_t.max)
836     {
837         enum line = getLine(str);
838         enum stripped = stripWS(line);
839         static if (line.length == 0) enum shortestIndent = prev;
840         else static if (line.length == stripped.length) enum shortestIndent = 0;
841         else
842         {
843             enum cur = prev > line.length - stripped.length ? line.length - stripped.length : prev;
844             enum next = shortestIndent!(str[line.length..$], cur);
845             enum shortestIndent = cur > next ? next : cur;
846         }
847     }
848 
849     template dedentNext(alias str, size_t indent)
850     {
851         enum ln = getLine(str);
852         static if (!ln.length)
853             enum dedentNext = null;
854         else static if (ln.length < indent)
855             enum dedentNext = ln ~ dedentNext!(str[ln.length..$], indent);
856         else
857             enum dedentNext = ln[indent..$] ~ dedentNext!(str[ln.length..$], indent);
858     }
859 
860     enum line = getLine(str);
861     enum stripped = stripWS(line);
862 
863     static if (!line.length) enum dedent = null;
864     else static if (
865             (stripped.length == 1 && stripped[0] == '\n')
866             || (stripped.length == 2 && stripped[0] == '\r' && stripped[1] == '\n'))
867         enum dedent = dedent!(str[line.length..$]); // drop first empty lines
868     else
869     {
870         // ignore no indentation of the first line
871         enum shortest = shortestIndent!(
872             str[line.length..$],
873             stripped.length == line.length ? size_t.max : (line.length - stripped.length));
874 
875         static if (shortest == 0)
876             enum dedent = str; // no indent used
877         else
878             enum dedent = stripped ~ dedentNext!(str[line.length..$], shortest);
879     }
880 }
881 
882 @("dedent")
883 unittest
884 {
885     // with empty first line
886     {
887         enum str1 = `
888                 DELETE FROM elements.element
889                 WHERE id=ANY($1) AND type_id IN (
890                     SELECT id FROM elements.element_type WHERE owner=$2
891                 )`;
892 
893         enum str2 =
894                     "DELETE FROM elements.element\n" ~
895                     "WHERE id=ANY($1) AND type_id IN (\n" ~
896                     "    SELECT id FROM elements.element_type WHERE owner=$2\n" ~
897                     ")";
898 
899         static assert(dedent!str1 == str2);
900     }
901 
902     // with not indented first line
903     {
904         enum str1 = `DELETE FROM elements.element
905                 WHERE id=ANY($1) AND type_id IN (
906                     SELECT id FROM elements.element_type WHERE owner=$2
907                 )`;
908 
909         enum str2 = "DELETE FROM elements.element\n" ~
910                     "WHERE id=ANY($1) AND type_id IN (\n" ~
911                     "    SELECT id FROM elements.element_type WHERE owner=$2\n" ~
912                     ")";
913 
914         static assert(dedent!str1 == str2);
915     }
916 
917     // test that we didn't touch number of lines
918     {
919         static assert(dedent!`
920             2
921             3
922             ` == "2\n3\n"); // first line is dropped, last newline is kept
923     }
924 
925     // test we don't dedent when some line is not indented
926     {
927         enum str = `aa
928             bb
929 cc`;
930         assert(dedent!str == str);
931     }
932 
933     // test that we don't touch space after last line text
934     {
935         assert(dedent!"  foo " == "foo ");
936         assert(dedent!`foo
937             bar ` == "foo\nbar ");
938     }
939 }
940 
941 /**
942  * Builds valid char map from the provided ranges of invalid ones
943  *
944  * For example when provided with "\0/:\xff" means that only characters 0-9 would have true in the generated map.
945  */
946 bool[256] buildValidCharMap()(string invalidRanges)
947 {
948     assert(invalidRanges.length % 2 == 0, "Uneven ranges");
949     bool[256] res = true;
950 
951     for (int i=0; i < invalidRanges.length; i+=2)
952         for (int j=invalidRanges[i]; j <= invalidRanges[i+1]; ++j)
953             res[j] = false;
954     return res;
955 }
956 
957 ///
958 @("buildValidCharMap")
959 @safe unittest
960 {
961     string ranges = "\0 \"\"(),,//:@[]{{}}\x7f\xff";
962     assert(buildValidCharMap(ranges) ==
963         cast(bool[])[
964             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
965             0,1,0,1,1,1,1,1,0,0,1,1,0,1,1,0,1,1,1,1,1,1,1,1,1,1,0,0,0,0,0,0,
966             0,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,0,0,1,1,
967             1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,0,1,0,1,0,
968             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
969             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
970             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
971             0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,
972         ]);
973 }
974 
975 /*
976  * Advances index over the token to the next character while checking for valid characters.
977  * On success, buffer index is left on the next character.
978  *
979  * Params:
980  *   - ranges = ranges of characters to stop on
981  *   - next  = next character/s to stop on (must be present in the provided ranges too)
982  *   - sseRanges =
983  *         as SSE optimized path is limited to 8 pairs, here one can provide merged ranges for a fast
984  *         SSE path that would be precised with `ranges`. Otherwise `ranges` is used for SSE path too.
985  *
986  * Returns:
987  *     * 0 on success
988  *     * -1 when token hasn't been found (ie not enough data in the buffer)
989  *     * -2 when character from invalid ranges was found but not matching one of next characters (ie invalid token)
990  */
991 int parseToken(string ranges, alias next, string sseRanges = null, C)(const(C)[] buffer, ref size_t i) pure
992     if (is(C == ubyte) || is(C == char))
993 {
994     version (DigitalMars) {
995         static if (__VERSION__ >= 2094) pragma(inline, true); // older compilers can't inline this
996     } else pragma(inline, true);
997 
998     immutable charMap = parseTokenCharMap!(ranges)();
999 
1000     static if (LDC_with_SSE42)
1001     {
1002         // CT function to prepare input for SIMD vector enum
1003         static byte[16] padRanges()(string ranges)
1004         {
1005             byte[16] res;
1006             // res[0..ranges.length] = cast(byte[])ranges[]; - broken on macOS betterC tests
1007             foreach (i, c; ranges) res[i] = cast(byte)c;
1008             return res;
1009         }
1010 
1011         static if (sseRanges) alias usedRng = sseRanges;
1012         else alias usedRng = ranges;
1013         static assert(usedRng.length <= 16, "Ranges must be at most 16 characters long");
1014         static assert(usedRng.length % 2 == 0, "Ranges must have even number of characters");
1015         enum rangesSize = usedRng.length;
1016         enum byte16 rngE = padRanges(usedRng);
1017 
1018         if (_expect(buffer.length - i >= 16, true))
1019         {
1020             size_t left = (buffer.length - i) & ~15; // round down to multiple of 16
1021             byte16 ranges16 = rngE;
1022 
1023             do
1024             {
1025                 byte16 b16 = () @trusted { return cast(byte16)_mm_loadu_si128(cast(__m128i*)&buffer[i]); }();
1026                 immutable r = _mm_cmpestri(
1027                     ranges16, rangesSize,
1028                     b16, 16,
1029                     _SIDD_LEAST_SIGNIFICANT | _SIDD_CMP_RANGES | _SIDD_UBYTE_OPS
1030                 );
1031 
1032                 if (r != 16)
1033                 {
1034                     i += r;
1035                     goto FOUND;
1036                 }
1037                 i += 16;
1038                 left -= 16;
1039             }
1040             while (_expect(left != 0, true));
1041         }
1042     }
1043     else
1044     {
1045         // faster unrolled loop to iterate over 8 characters
1046         loop: while (_expect(buffer.length - i >= 8, true))
1047         {
1048             static foreach (_; 0..8)
1049             {
1050                 if (_expect(!charMap[buffer[i]], false)) goto FOUND;
1051                 ++i;
1052             }
1053         }
1054     }
1055 
1056     // handle the rest
1057     if (_expect(i >= buffer.length, false)) return -1;
1058 
1059     FOUND:
1060     while (true)
1061     {
1062         static if (is(typeof(next) == char)) {
1063             static assert(!charMap[next], "Next character is not in ranges");
1064             if (buffer[i] == next) return 0;
1065         } else {
1066             static assert(next.length > 0, "Next character not provided");
1067             static foreach (c; next) {
1068                 static assert(!charMap[c], "Next character is not in ranges");
1069                 if (buffer[i] == c) return 0;
1070             }
1071         }
1072         if (_expect(!charMap[buffer[i]], false)) return -2;
1073         if (_expect(++i == buffer.length, false)) return -1;
1074     }
1075 }
1076 
1077 ///
1078 @("parseToken")
1079 @safe unittest
1080 {
1081     size_t idx;
1082     string buf = "foo\nbar";
1083     auto ret = parseToken!("\0\037\177\377", "\r\n")(buf, idx);
1084     assert(ret == 0); // no error
1085     assert(idx == 3); // index of newline character
1086 
1087     idx = 0;
1088     ret = parseToken!("\0\037\177\377", "\r\n")(buf[0..3], idx);
1089     assert(ret == -1); // not enough data to find next character
1090     assert(idx == 3);
1091 
1092     idx = 0;
1093     buf = "foo\t\nbar";
1094     ret = parseToken!("\0\037\177\377", "\r\n")(buf, idx);
1095     assert(ret == -2); // invalid character '\t' found in token
1096     assert(idx == 3); // invalid character on index 3
1097 }
1098 
1099 private immutable(bool[256]) parseTokenCharMap(string invalidRanges)() {
1100     static immutable charMap = buildValidCharMap(invalidRanges);
1101     return charMap;
1102 }