Difference between revisions of "UTF-8"

From Second Life Wiki
Jump to navigation Jump to search
Line 2: Line 2:


These functions are part of the Combined Library (which includes many other functions written by Strife.
These functions are part of the Combined Library (which includes many other functions written by Strife.
== Mono Unsafe==
These versions won't work in Mono but will in LSO.
<pre>
<pre>
//===================================================//
//===================================================//
//                Combined Library                  //
//                Combined Library                  //
//            "Nov  2 2007", "22:30:43"            //
//            "Nov  3 2007", "00:51:16"            //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//    http://creativecommons.org/licenses/by/3.0/    //
Line 30: Line 33:
{
{
     integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
     integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
     string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !(bytes = ((input >= 0x80) * (bytes + (((1 << bytes) - input) <= 0) - 2 ) / 5))));
     string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !(bytes = ((input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5))));
     while (bytes)
     while (bytes)
         result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
         result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
Line 39: Line 42:
{//Helper function for use with unicode characters.
{//Helper function for use with unicode characters.
     return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
     return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.
string hexc="0123456789ABCDEF";
//} Combined Library
</pre>
==Mono Safe==
These versions are Mono and LSO safe but will not execute as quickly in LSO as the Mono Unsafe versions
<pre>
//===================================================//
//                Combined Library                  //
//            "Nov  3 2007", "00:46:15"            //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//===================================================//
//{
integer UTF8ToUnicodeInteger(string input)//Mono Safe, LSO Safe
{
    integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
    if(result & 0x80000000){//multibyte, continuing to use base64 is impractical because it requires smart shifting.
        integer end = (integer)("0x"+llGetSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1));
        integer begin = (integer)("0x"+llDeleteSubString(input,-8,-1));
        return  (  (  0x0000003f &  end      ) |
                    (( 0x00003f00 &  end) >> 2 ) |
                    (( 0x003f0000 &  end) >> 4 ) |
                    (( 0x3f000000 &  end) >> 6 ) |
                    (( 0x0000003f &  begin) << 24) |
                    (( 0x00000100 &  begin) << 22) ) &
                    (0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
    }
    return result >> 24;
}
string UnicodeIntegerToUTF8(integer input)//Mono Safe, LSO Safe
{
    integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
    bytes = (input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5;//adjust
    string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !bytes));
    while (bytes)
        result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
    return llUnescapeURL(result);
}
string byte2hex(integer x)//Mono Safe, LSO Safe
{//Helper function for use with unicode characters.
    integer y = (x >> 4) & 0xF;
    return llGetSubString(hexc, y, y) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.



Revision as of 21:52, 2 November 2007

SL uses UTF-8 for storing and transmitting strings and with these functions you can work with Unicode characters.

These functions are part of the Combined Library (which includes many other functions written by Strife.

Mono Unsafe

These versions won't work in Mono but will in LSO.

//===================================================//
//                 Combined Library                  //
//             "Nov  3 2007", "00:51:16"             //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//===================================================//
//{

integer UTF8ToUnicodeInteger(string input)//Mono Unsafe, LSO Safe
{
    integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
    if(result & 0x80000000)//multibyte, continuing to use base64 is impractical because it requires smart shifting.
        return  (   (  0x0000003f &  result       ) |
                    (( 0x00003f00 &  result) >> 2 ) | 
                    (( 0x003f0000 &  result) >> 4 ) | 
                    (( 0x3f000000 & (result = (integer)("0x"+llGetSubString(input,-8,-1)))) >> 6 ) | 
                    (( 0x0000003f &  result) << 24) | 
                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1)))) << 22)
                ) & (  0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[])) >> 1))));
    return result >> 24;
}

string UnicodeIntegerToUTF8(integer input)//Mono Unsafe, LSO Safe
{
    integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
    string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !(bytes = ((input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5))));
    while (bytes)
        result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
    return llUnescapeURL(result);
}

string byte2hex(integer x)//Mono Unsafe, LSO Safe
{//Helper function for use with unicode characters.
    return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.

string hexc="0123456789ABCDEF";

//} Combined Library

Mono Safe

These versions are Mono and LSO safe but will not execute as quickly in LSO as the Mono Unsafe versions

//===================================================//
//                 Combined Library                  //
//             "Nov  3 2007", "00:46:15"             //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//===================================================//
//{

integer UTF8ToUnicodeInteger(string input)//Mono Safe, LSO Safe
{
    integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
    if(result & 0x80000000){//multibyte, continuing to use base64 is impractical because it requires smart shifting.
        integer end = (integer)("0x"+llGetSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1));
        integer begin = (integer)("0x"+llDeleteSubString(input,-8,-1));
        return  (   (  0x0000003f &  end       ) |
                    (( 0x00003f00 &  end) >> 2 ) | 
                    (( 0x003f0000 &  end) >> 4 ) | 
                    (( 0x3f000000 &  end) >> 6 ) |
                    (( 0x0000003f &  begin) << 24) |
                    (( 0x00000100 &  begin) << 22) ) & 
                    (0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
    }
    return result >> 24;
}

string UnicodeIntegerToUTF8(integer input)//Mono Safe, LSO Safe
{
    integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
    bytes = (input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5;//adjust
    string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !bytes));
    while (bytes)
        result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
    return llUnescapeURL(result);
}

string byte2hex(integer x)//Mono Safe, LSO Safe
{//Helper function for use with unicode characters.
    integer y = (x >> 4) & 0xF;
    return llGetSubString(hexc, y, y) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.

string hexc="0123456789ABCDEF";

//} Combined Library