Difference between revisions of "UTF-8"
Line 48: | Line 48: | ||
//} Combined Library</lsl> | //} Combined Library</lsl> | ||
== | ==LSLEditor Safe== | ||
This version will work in Mono, LSO & LSLEditor. There will be a slight performance hit in LSO as compared to the LSLEditor Unsafe version. | |||
< | <lsl>//===================================================// | ||
//===================================================// | |||
// Combined Library // | // Combined Library // | ||
// " | // "Feb 4 2008", "08:38:13" // | ||
// Copyright (C) 2004- | // Copyright (C) 2004-2008, Strife Onizuka (cc-by) // | ||
// http://creativecommons.org/licenses/by/3.0/ // | // http://creativecommons.org/licenses/by/3.0/ // | ||
//===================================================// | //===================================================// | ||
//{ | //{ | ||
integer UTF8ToUnicodeInteger(string input)// | integer UTF8ToUnicodeInteger(string input)//LSLEditor Safe, LSO Safe | ||
{ | { | ||
integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0))); | integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0))); | ||
Line 70: | Line 69: | ||
(( 0x3f000000 & end) >> 6 ) | | (( 0x3f000000 & end) >> 6 ) | | ||
(( 0x0000003f & begin) << 24) | | (( 0x0000003f & begin) << 24) | | ||
(( 0x00000100 & begin) << 22) ) & | (( 0x00000100 & begin) << 22) | ||
) & (0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25))); | |||
} | } | ||
return result >> 24; | return result >> 24; | ||
} | } | ||
string UnicodeIntegerToUTF8(integer input)// | string UnicodeIntegerToUTF8(integer input)//LSLEditor Safe, LSO Safe | ||
{ | { | ||
integer bytes = llCeil(llLog(input) / 0.69314718055994530941723212145818); | integer bytes = llCeil((llLog(input) / 0.69314718055994530941723212145818)); | ||
bytes = (input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5;//adjust | bytes = (input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5;//adjust | ||
string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !bytes)); | string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !bytes)); | ||
Line 86: | Line 85: | ||
} | } | ||
string byte2hex(integer x)// | string byte2hex(integer x)//LSLEditor Safe, LSO Safe | ||
{//Helper function for use with unicode characters. | {//Helper function for use with unicode characters. | ||
integer y = (x >> 4) & 0xF; | integer y = (x >> 4) & 0xF; | ||
return llGetSubString(hexc, y, y) + llGetSubString(hexc, x & 0xF, x & 0xF); | return llGetSubString(hexc, y, y) + llGetSubString(hexc, x & 0xF, x & 0xF); | ||
}//This function would | }//This function would benefit greatly from the DUP opcode, it would remove 19 bytes. | ||
string hexc="0123456789ABCDEF"; | string hexc="0123456789ABCDEF"; | ||
//} Combined Library | //} Combined Library</lsl> | ||
</ |
Revision as of 06:39, 4 February 2008
SL uses UTF-8 for storing and transmitting strings and with these functions you can work with Unicode characters.
These functions are part of the Combined Library written by Strife Onizuka.
General Use
This version will work fine in LSO and Mono but not in LSLEditor. <lsl>//===================================================// // Combined Library // // "Feb 4 2008", "08:35:00" // // Copyright (C) 2004-2008, Strife Onizuka (cc-by) // // http://creativecommons.org/licenses/by/3.0/ // //===================================================// //{
integer UTF8ToUnicodeInteger(string input)//LSLEditor Unsafe, LSO Safe {
integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0))); if(result & 0x80000000)//multibyte, continuing to use base64 is impractical because it requires smart shifting. return ( ( 0x0000003f & result ) | (( 0x00003f00 & result) >> 2 ) | (( 0x003f0000 & result) >> 4 ) | (( 0x3f000000 & (result = (integer)("0x"+llGetSubString(input,-8,-1)))) >> 6 ) | (( 0x0000003f & result) << 24) | (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1)))) << 22) ) & ( 0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25)));
// (( 0x00000100 & (result = (integer)("0x"+llDeleteSubString(input,-8,-1)))) << 22) // ) & ( 0x7FFFFFFF >> (30 - (5 * (llStringLength(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[])) >> 1))));
return result >> 24;
}
string UnicodeIntegerToUTF8(integer input)//LSLEditor Unsafe, LSO Safe {
integer bytes = llCeil((llLog(input) / 0.69314718055994530941723212145818)); string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !(bytes = ((input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5)))); while (bytes) result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF)); return llUnescapeURL(result);
}
string byte2hex(integer x)//LSLEditor Safe, LSO Safe {//Helper function for use with unicode characters.
integer y = (x >> 4) & 0xF; return llGetSubString(hexc, y, y) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benefit greatly from the DUP opcode, it would remove 19 bytes.
string hexc="0123456789ABCDEF";
//} Combined Library</lsl>
LSLEditor Safe
This version will work in Mono, LSO & LSLEditor. There will be a slight performance hit in LSO as compared to the LSLEditor Unsafe version. <lsl>//===================================================// // Combined Library // // "Feb 4 2008", "08:38:13" // // Copyright (C) 2004-2008, Strife Onizuka (cc-by) // // http://creativecommons.org/licenses/by/3.0/ // //===================================================// //{
integer UTF8ToUnicodeInteger(string input)//LSLEditor Safe, LSO Safe {
integer result = llBase64ToInteger(llStringToBase64(input = llGetSubString(input,0,0))); if(result & 0x80000000){//multibyte, continuing to use base64 is impractical because it requires smart shifting. integer end = (integer)("0x"+llGetSubString(input = (string)llParseString2List(llEscapeURL(input),(list)"%",[]),-8,-1)); integer begin = (integer)("0x"+llDeleteSubString(input,-8,-1)); return ( ( 0x0000003f & end ) | (( 0x00003f00 & end) >> 2 ) | (( 0x003f0000 & end) >> 4 ) | (( 0x3f000000 & end) >> 6 ) | (( 0x0000003f & begin) << 24) | (( 0x00000100 & begin) << 22) ) & (0x7FFFFFFF >> (5 * ((integer)(llLog(~result) / 0.69314718055994530941723212145818) - 25))); } return result >> 24;
}
string UnicodeIntegerToUTF8(integer input)//LSLEditor Safe, LSO Safe {
integer bytes = llCeil((llLog(input) / 0.69314718055994530941723212145818)); bytes = (input >= 0x80) * (bytes + ~(((1 << bytes) - input) > 0)) / 5;//adjust string result = "%" + byte2hex((input >> (6 * bytes)) | ((0x3F80 >> bytes) << !bytes)); while (bytes) result += "%" + byte2hex((((input >> (6 * (bytes = ~-bytes))) | 0x80) & 0xBF)); return llUnescapeURL(result);
}
string byte2hex(integer x)//LSLEditor Safe, LSO Safe {//Helper function for use with unicode characters.
integer y = (x >> 4) & 0xF; return llGetSubString(hexc, y, y) + llGetSubString(hexc, x & 0xF, x & 0xF);
}//This function would benefit greatly from the DUP opcode, it would remove 19 bytes.
string hexc="0123456789ABCDEF";
//} Combined Library</lsl>