UtfBaseText

Undocumented in source.

package const

auto UtfBaseText = ` # line 11 "java\nonstandard\UtfBase.d" import java.lang.util; version(Tango){ static import tango.text.convert.Utf; } else { // Phobos static import std.utf; static import std.conv; } ///The Universal Character Set (UCS), defined by the International Standard ISO/IEC 10646 /*typedef*/alias ptrdiff_t UCSindex; alias UCSindex UCSshift; static if(UTFTypeCheck) { ///UTF-16 (16-bit Unicode Transformation Format) /*struct UTF16index { ptrdiff_t internalValue; alias internalValue val; private static UTF16index opCall(ptrdiff_t _val) { UTF16index t = { _val }; return t; } void opOpAssign(string op)(in UTF16shift di) if (op == "+") { val += di; } void opOpAssign(string op)(in UTF16shift di) if (op == "-") { val -= di; } mixin(constFuncs!(" UTF16index opBinary(string op)(in UTF16shift di) if (op == \"+\") { return UTF16index(val + di); } UTF16index opBinary(string op)(in UTF16shift di) if (op == \"-\") { return UTF16index(val - di); } version(Windows) { UTF16index opBinary(string op)(in ptrdiff_t di) if (op == \"+\") { return UTF16index(val + di); } UTF16index opBinary(string op)(in ptrdiff_t di) if (op == \"-\") { return UTF16index(val - di); } } int opCmp(in UTF16index i2) { return cast(int)(val - i2.val); } ")); }*/ alias ptrdiff_t UTF16index; alias ptrdiff_t UTF16shift; ///UTF-8 (UCS Transformation Format — 8-bit) //typedef ptrdiff_t UTF8index; //alias UTF8index UTF8shift; struct UTF8index { ptrdiff_t internalValue; alias internalValue val; private static UTF8index opCall(ptrdiff_t _val) { UTF8index t = { _val }; return t; } void opOpAssign(string op)(in UTF8shift di) if (op == "+") { val += di.val; } void opOpAssign(string op)(in UTF8shift di) if (op == "-") { val -= di.val; } mixin(constFuncs!(" UTF8index opBinary(string op)(in UTF8shift di) if (op == \"+\") { return UTF8index(val + di.val); } UTF8index opBinary(string op)(in UTF8shift di) if (op == \"-\") { return UTF8index(val - di.val); } UTF8shift opBinary(string op)(in UTF8index di) if (op == \"-\") { return UTF8shift(val - di.val); } int opCmp(in UTF8index i2) { return cast(int)(val - i2.val); } ")); } private UTF8index newUTF8index(ptrdiff_t i) { return UTF8index(i); } private ptrdiff_t val(T)(T i) { static if(is(T : UTF16index)) return cast(ptrdiff_t) i; else return i.val; } private void dec(ref UTF8index i) { --i.val; } struct UTF8shift { ptrdiff_t internalValue; alias internalValue val; private static UTF8shift opCall(ptrdiff_t _val) { UTF8shift t = { _val }; return t; } void opOpAssign(string op)(in UTF8shift di) if (op == "+") { val += di.val; } void opOpAssign(string op)(in UTF8shift di) if (op == "-") { val -= di.val; } mixin(constFuncs!(" UTF8shift opBinary(string op)(in UTF8shift di) if (op == \"+\") { return UTF8shift(val + di.val); } UTF8shift opBinary(string op)(in UTF8shift di) if (op == \"-\") { return UTF8shift(val - di.val); } int opCmp(in UTF8shift di2) { return cast(int)(val - di2.val); } ")); } UTF8index asUTF8index(ptrdiff_t i) { return UTF8index(i); } UTF8shift asUTF8shift(int i) { return UTF8shift(i); } } else { alias ptrdiff_t UTF16index; alias ptrdiff_t UTF16shift; alias ptrdiff_t UTF8index; alias ptrdiff_t UTF8shift; private ptrdiff_t val(ptrdiff_t i) { return i; } private void dec(ref UTF8index i) { --i; } } char charByteAt(in char[] s, in UTF8index i) { return s[val(i)]; } UTF8index preFirstIndex(in char[] s) { return cast(UTF8index) -1; } UTF8index firstIndex(in char[] s) { return cast(UTF8index) 0; } UTF8index endIndex(in char[] s) { return cast(UTF8index) cast(int)/*64bit*/s.length; } UTF8index beforeEndIndex(in char[] s) { return s.offsetBefore(s.endIndex()); } //These variables aren't in TLS so it can be used only for writing mixin(gshared!(" private UCSindex UCSdummyShift; private UTF8shift UTF8dummyShift; private UTF16shift UTF16dummyShift; ")); private const ubyte[256] p_UTF8stride = [ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF,0xFF, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3, 4,4,4,4,4,4,4,4,5,5,5,5,6,6,0xFF,0xFF, ]; private String toUTF8infoString(in char[] s, UTF8index i) { return Format("i = {}, s[i] = {}, s = {}", val(i), cast(ubyte)s.charByteAt(i), cast(ubyte[])s); } class UTF8Exception : Exception { this( String msg, in char[] s, UTF8index i){ super( Format("{}:\n{}", msg, toUTF8infoString(s, i))); } } bool isUTF8sequenceStart( in char[] s, in UTF8index i ) { return p_UTF8stride[s.charByteAt(i)] != 0xFF; } void validateUTF8index( in char[] s, in UTF8index i ) { if(i != s.endIndex() && !s.isUTF8sequenceStart(i)) throw new UTF8Exception("Not a start of an UTF-8 sequence", s, i); } UTF8shift UTF8strideAt( in char[] s, in UTF8index i ) { s.validateUTF8index(i); version(Tango) { return cast(UTF8shift)p_UTF8stride[s.charByteAt(i)]; } else { // Phobos return cast(UTF8shift)std.utf.stride( s, val(i) ); } } UTF16shift UTF16strideAt( in wchar[] s, in UTF16index i ) { //s.validateUTF16index(i); version(Tango) { uint u = s[val(i)]; return cast(UTF16shift)(1 + (u >= 0xD800 && u <= 0xDBFF)); } else { // Phobos return cast(UTF16shift)std.utf.stride( s, val(i) ); } } UCSindex UCScount( in char[] s ){ version(Tango){ scope dchar[] buf = new dchar[]( s.length ); uint ate; dchar[] res = tango.text.convert.Utf.toString32( s, buf, &ate ); assert( ate is s.length ); return res.length; } else { // Phobos return cast(UCSindex)/*64bit*/std.utf.count(s); } } UTF8shift toUTF8shift( in char[] s, in UTF8index i, in UCSshift dn ) { s.validateUTF8index(i); UTF8index j = i; UCSshift tdn = dn; if(tdn > 0) { do { j += s.UTF8strideAt(j); if(j > s.endIndex()) { throw new UTF8Exception(Format("toUTF8shift (dn = {}): No end of the UTF-8 sequence", dn), s, i); } } while(--tdn); } else if(tdn < 0) { do { if(!val(j)) { if(tdn == -1) { j = s.preFirstIndex(); break; } else { throw new UTF8Exception(Format("toUTF8shift (dn = {}): Can only go down to -1, not {}", dn, tdn), s, i); } } int l = 0; do { if(!val(j)) { throw new UTF8Exception(Format("toUTF8shift (dn = {}): No start of the UTF-8 sequence before", dn), s, i); } ++l; dec(j); } while(!s.isUTF8sequenceStart(j)); l -= val(s.UTF8strideAt(j)); if(l > 0) { throw new UTF8Exception(Format("toUTF8shift (dn = {}): Overlong UTF-8 sequence before", dn), s, i); } else if(l < 0) { throw new UTF8Exception(Format("toUTF8shift (dn = {}): Too short UTF-8 sequence before", dn), s, i); } } while(++tdn); } return j - i; } UTF8index offsetBefore( in char[] s, in UTF8index i ) { return i + s.toUTF8shift(i, -1); } UTF8index offsetAfter( in char[] s, in UTF8index i ) { return i + s.toUTF8shift(i, 1); } /** If the index is in a midle of an UTF-8 byte sequence, it will return the position of the first byte of this sequence. */ void adjustUTF8index( in char[] s, ref UTF8index i ){ if(i == s.endIndex() || s.isUTF8sequenceStart(i)) return; int l = 0; alias i res; do { if(!val(res)) throw new UTF8Exception("adjustUTF8index: No start of the UTF-8 sequence", s, i); ++l; dec(res); } while(!s.isUTF8sequenceStart(res)); l -= val(s.UTF8strideAt(i)); if(l > 0) throw new UTF8Exception("adjustUTF8index: Overlong UTF-8 sequence", s, i); } UTF8index takeIndexArg(String F = __FILE__, uint L = __LINE__)(String s, int i_arg, String location) { UTF8index res = cast(UTF8index) i_arg; if(i_arg > 0 && i_arg < s.length) { auto t = res; s.adjustUTF8index(res); if(t != res) getDwtLogger().warn(F, L, Format("Fixed invalid UTF-8 index at {}:\nnew i = {}, {}", location, val(res), toUTF8infoString(s, t))); } return res; } dchar dcharAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) { s.validateUTF8index(i); auto str = s[val(i) .. $]; version(Tango){ dchar[1] buf; uint ate; dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); assert( ate > 0 && res.length is 1 ); stride = cast(UTF8shift)ate; return res[0]; } else { // Phobos size_t ate = 0; dchar res = std.utf.decode(str, ate); stride = cast(UTF8shift)cast(int)/*64bit*/ate; return res; } } dchar dcharAt( in wchar[] s, in UTF16index i, out UTF16shift stride = UTF16dummyShift ) { //s.validateUTF16index(i); auto str = s[val(i) .. $]; version(Tango){ dchar[1] buf; uint ate; dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); assert( ate > 0 && res.length is 1 ); stride = cast(UTF16shift)ate; if( ate is 0 || res.length is 0 ){ getDwtLogger().trace( __FILE__, __LINE__, "str.length={} str={:X2}", str.length, cast(ubyte[])str ); } return res[0]; } else { // Phobos size_t ate = 0; dchar res = std.utf.decode(str, ate); stride = cast(UTF16shift)ate; return res; } } dchar dcharBefore( in char[] s, in UTF8index i ) { return s.dcharAt(s.offsetBefore(i)); } dchar dcharAfter( in char[] s, in UTF8index i ) { return s.dcharAt(i + s.toUTF8shift(i, 1)); } ///Get that String, that contains the next codepoint of a String. String dcharAsStringAt( in char[] s, in UTF8index i, out UTF8shift stride = UTF8dummyShift ) { s.validateUTF8index(i); auto str = s[val(i) .. $]; uint ate; version(Tango){ dchar[1] buf; dchar[] res = tango.text.convert.Utf.toString32( str, buf, &ate ); } else { // Phobos ate = std.utf.stride( str, 0 ); } stride = cast(UTF8shift)ate; return str[ 0 .. ate ]._idup(); } `;

UtfBaseText

Meta

Source