1 /** 2 * Stuff for working with narrow strings. 3 * Safe because of strong type checking. 4 * 5 * Authors: Denis Shelomovskij <verylonglogin.reg@gmail.com> 6 */ 7 module java.nonstandard.SafeUtf; 8 9 import java.nonstandard.UtfBase; 10 11 private const bool UTFTypeCheck = true; 12 mixin(UtfBaseText); 13 14 unittest { 15 auto s = "abаб回家\U00064321\U00064321d"; assert(s.length == 1+1+2+2+3+3+4+4+1); 16 auto ws = "abаб回家\U00064321\U00064321d"w; assert(ws.length == 1+1+1+1+1+1+2+2+1); 17 auto dchars = "abаб回家\U00064321\U00064321d"d; 18 auto starts = [1, 1, 1,0, 1,0, 1,0,0, 1,0,0, 1,0,0,0, 1,0,0,0, 1]; 19 auto wstarts = [1, 1, 1, 1, 1, 1, 1,0, 1,0 , 1]; 20 assert(s.length == starts.length); 21 assert(ws.length == wstarts.length); 22 23 auto strides = [1, 1, 2, 2, 3, 3, 4, 4, 1]; 24 auto wstrides = [1, 1, 1, 1, 1, 1, 2, 2, 1]; 25 auto shifts0 = [0, 1, 1+1, 1+1+2, 1+1+2+2, 1+1+2+2+3, 1+1+2+2+3+3, 1+1+2+2+3+3+4, 1+1+2+2+3+3+4+4]; 26 assert(strides.length == dchars.length); 27 assert(wstrides.length == dchars.length); 28 assert(shifts0.length == dchars.length); 29 30 UTF8index prevStart = 0; 31 UCSindex n = 0; 32 foreach(UTF8index i, char ch; s) { 33 assert(s.isUTF8sequenceStart(i) == starts[i]); 34 if(starts[i]) { 35 s.validateUTF8index(i); 36 assert(s.UTF8strideAt(i) == strides[n]); 37 assert(s.toUTF8shift(0, n) == shifts0[n]); 38 assert(s.toUTF8shift(shifts0[n], -n) == -shifts0[n]); 39 if(i) assert(s.offsetBefore(i) == prevStart); 40 assert(s[0 .. val(i)].UCScount == n); 41 assert(s[val(i) .. $].UCScount == strides.length - n); 42 43 UTF8shift di; 44 assert(s.dcharAt(i, di) == dchars[n]); 45 assert(di == strides[n]); 46 if(i) assert(s.dcharBefore(i) == s.dcharAt(prevStart)); 47 if(i) assert(s.dcharAfter(prevStart) == s.dcharAt(i)); 48 auto dcharStr = s[val(i) .. val(i) + strides[n]]; 49 assert(s.dcharAsStringAt(i, di) == dcharStr && di == dcharStr.length); 50 assert(dcharToString(s.dcharAt(i)) == dcharStr); 51 prevStart = i; 52 ++n; 53 } 54 UTF8index t = i; 55 s.adjustUTF8index(t); 56 assert(t == prevStart); 57 } 58 59 n = 0; 60 foreach(UTF16index i, wchar ch; ws) 61 if(wstarts[i]) { 62 //s.validateUTF16index(i); 63 UTF16shift di; 64 assert(ws.dcharAt(i, di) == dchars[n]); 65 assert(di == wstrides[n]); 66 ++n; 67 } 68 69 s.validateUTF8index(s.length); 70 }