Difference between revisions of "UTF-8"

From Second Life Wiki
Jump to navigation Jump to search
 
m (wrong version ^^;)
Line 5: Line 5:
//===================================================//
//===================================================//
//                Combined Library                  //
//                Combined Library                  //
//            "May  4 2007", "20:06:45"            //
//            "May  4 2007", "20:17:12"            //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//    http://creativecommons.org/licenses/by/3.0/    //
Line 24: Line 24:
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),["%"],[])) >> 1))));
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),["%"],[])) >> 1))));
     return result;
     return result >> 24;
}
}


Line 41: Line 41:
     else if (input >= 0x800)
     else if (input >= 0x800)
         bytes = 2;
         bytes = 2;
     while((count = ~-count) < bytes)
     while((count = -~count) < bytes)
         result = "%" + byte2hex((((input >> (6 * count)) | 0x80) & 0xBF)) + result;
         result = "%" + byte2hex((((input >> (6 * count)) | 0x80) & 0xBF)) + result;
     result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) * !!bytes)) + result;
     result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) * !!bytes)) + result;

Revision as of 17:17, 4 May 2007

SL uses UTF-8 for storing and transmitting strings and with these functions you can work with Unicode characters.

These functions are part of the Combined Library (which includes many other functions written by Strife.

//===================================================//
//                 Combined Library                  //
//             "May  4 2007", "20:17:12"             //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//===================================================//
//{

integer UTF8ToUnicodeInteger(string input)
{
    integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
    if(result & 0x80000000)//multibyte, continuing to use base64 is impractical because it requires smart shifting.
        return  (   (  0x0000003f &  result       ) |
                    (( 0x00003f00 &  result) >> 2 ) | 
                    (( 0x003f0000 &  result) >> 4 ) | 
                    (( 0x3f000000 & (result = (integer)("0x"+llGetSubString(input,-8,-1)))) >> 6 ) | 
                    (( 0x0000003f &  result) << 24) | 
                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input = (string)llParseString2List(llEscapeURL(input),["%"],[]),-8,-1)))) << 22)
                ) & (  0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),["%"],[])) >> 1))));
    return result >> 24;
}

string UnicodeIntegerToUTF8(integer input)
{
    if(input <= 0) return "";
    integer count = -1;
    integer bytes = (input >= 0x80);
    string result;
    if (input >= 0x4000000)
        bytes = 5;
    else if (input >= 0x200000)
        bytes = 4;
    else if (input >= 0x10000)
        bytes = 3;
    else if (input >= 0x800)
        bytes = 2;
    while((count = -~count) < bytes)
        result = "%" + byte2hex((((input >> (6 * count)) | 0x80) & 0xBF)) + result;
    result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) * !!bytes)) + result;
    return llUnescapeURL(result);
}

string byte2hex(integer x)
{//Helper function for use with unicode characters.
    return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
}

string hexc="0123456789ABCDEF";

//} Combined Library