Difference between revisions of "UTF-8"

From Second Life Wiki
Jump to navigation Jump to search
m
(Out of date)
Line 5: Line 5:
//===================================================//
//===================================================//
//                Combined Library                  //
//                Combined Library                  //
//            "May 4 2007", "20:17:12"            //
//            "Nov 2 2007", "22:30:43"            //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//    http://creativecommons.org/licenses/by/3.0/    //
Line 11: Line 11:
//{
//{


integer UTF8ToUnicodeInteger(string input)
integer UTF8ToUnicodeInteger(string input)//Mono Unsafe, LSO Safe
{
{
    integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
    if(result & 0x80000000)//multibyte, continuing to use base64 is impractical because it requires smart shifting.
if(result & 0x80000000)//multibyte, continuing to use base64 is impractical because it requires smart shifting.
        return  (  (  0x0000003f &  result      ) |
return  (  (  0x0000003f &  result      ) |
                    (( 0x00003f00 &  result) >> 2 ) |  
(( 0x00003f00 &  result) >> 2 ) |  
                    (( 0x003f0000 &  result) >> 4 ) |  
(( 0x003f0000 &  result) >> 4 ) |  
                    (( 0x3f000000 & (result = (integer)("0x"+llGetSubString(input,-8,-1)))) >> 6 ) |  
(( 0x3f000000 & (result = (integer)("0x"+llGetSubString(input,-8,-1)))) >> 6 ) |  
                    (( 0x0000003f &  result) << 24) |  
(( 0x0000003f &  result) << 24) |  
                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1)))) << 22)
(( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1)))) << 22)
                ) & (  0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
) & (  0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[])) >> 1))));
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[])) >> 1))));
    return result >> 24;
return result >> 24;
}
}


string UnicodeIntegerToUTF8(integer input)
string UnicodeIntegerToUTF8(integer input)//Mono Unsafe, LSO Safe
{
{
    if(input <= 0) return "";
integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
    integer count = -1;
string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !(bytes = ((input >= 0x80) * (bytes + (((1 << bytes) - input) <= 0) - 2 ) / 5))));
    integer bytes = (input >= 0x80);
while (bytes)
    string result;
result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
    if (input >= 0x4000000)
return llUnescapeURL(result);
        bytes = 5;
    else if (input >= 0x200000)
        bytes = 4;
    else if (input >= 0x10000)
        bytes = 3;
    else if (input >= 0x800)
        bytes = 2;
    while((count = -~count) < bytes)
        result = "%" + byte2hex((((input >> (6 * count)) | 0x80) & 0xBF)) + result;
    result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) * !!bytes)) + result;
    return llUnescapeURL(result);
}
}


string byte2hex(integer x)
string byte2hex(integer x)//Mono Unsafe, LSO Safe
{//Helper function for use with unicode characters.
{//Helper function for use with unicode characters.
    return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
}
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.


string hexc="0123456789ABCDEF";
string hexc="0123456789ABCDEF";

Revision as of 19:32, 2 November 2007

SL uses UTF-8 for storing and transmitting strings and with these functions you can work with Unicode characters.

These functions are part of the Combined Library (which includes many other functions written by Strife.

//===================================================//
//                 Combined Library                  //
//             "Nov  2 2007", "22:30:43"             //
//  Copyright (C) 2004-2007, Strife Onizuka (cc-by)  //
//    http://creativecommons.org/licenses/by/3.0/    //
//===================================================//
//{

integer UTF8ToUnicodeInteger(string input)//Mono Unsafe, LSO Safe
{
	integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0)));
	if(result & 0x80000000)//multibyte, continuing to use base64 is impractical because it requires smart shifting.
		return  (   (  0x0000003f &  result       ) |
					(( 0x00003f00 &  result) >> 2 ) | 
					(( 0x003f0000 &  result) >> 4 ) | 
					(( 0x3f000000 & (result = (integer)("0x"+llGetSubString(input,-8,-1)))) >> 6 ) | 
					(( 0x0000003f &  result) << 24) | 
					(( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1)))) << 22)
				) & (  0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
//                    (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22)
//                ) & (  0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[])) >> 1))));
	return result >> 24;
}

string UnicodeIntegerToUTF8(integer input)//Mono Unsafe, LSO Safe
{
	integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818);
	string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !(bytes = ((input >= 0x80) * (bytes + (((1 << bytes) - input) <= 0) - 2 ) / 5))));
	while (bytes)
		result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF));
	return llUnescapeURL(result);
}

string byte2hex(integer x)//Mono Unsafe, LSO Safe
{//Helper function for use with unicode characters.
	return llGetSubString(hexc, x = ((x >> 4) & 0xF), x) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benifit greatly from the DUP opcode, it would remove 19 bytes.

string hexc="0123456789ABCDEF";

//} Combined Library