UTF-8 Test
Revision as of 14:17, 26 April 2007 by Milo Linden (talk | contribs)
Tests taken from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt Special thanks to Strife Onizuka and Kelly Linden for wrangling them into LSL.
1) Make a script from the following script and a notecard from the following notecard and put them both into a box. Name the box "utf-8 test".
If you have a unicode font installed, you should see unicode characters, eg.
- utf-8 test: Greek kosme
- utf-8 test: κόσμε
If you do not have a unicode font installed, you should see something like boxes, eg.
- utf-8 test: Greek kosme
- utf-8 test: ■■■μ■
2) Let the script finish. It should report that all tests passed.
The Script follows
string card; integer lines = -1; integer line = 0; list resuts; integer pass; integer fail; default { state_entry() { if(llGetInventoryNumber(INVENTORY_NOTECARD)) { llGetNumberOfNotecardLines( card = llGetInventoryName(INVENTORY_NOTECARD,0)); llOwnerSay("Test-Starting: "+card); } } on_rez(integer a) { llResetScript(); } touch_start(integer a) { llResetScript(); } changed(integer a) { if(a & CHANGED_INVENTORY) llResetScript(); } dataserver(key a, string b) { if(lines == -1) { lines = (integer)b; llGetNotecardLine(card,line); } else { list c = llParseString2List(a=(string)llParseString2List(b,[" "],[]),["|"],[EOF]); integer d; integer e; integer f; if(llGetSubString(a,0,0) == "#") llOwnerSay(llDeleteSubString(b,0,0)); else if(llGetListLength(c) >= 2) { d = llStringLength(b = llUnescapeURL(llList2String(c,0))); pass += e = (d == f = llList2Integer(c,1)); fail += !e; string out = (string)line +": "; out += llList2String(["Fail","Pass"],e) + " "; out += "(" + (string)d + " - " + (string)f + ") "; //Enable this section to test llUnescapeURL // out += "(" + (string)(( // llStringLength( // (string)llParseString2List( //strips off the evil pad // llStringToBase64(b),["="],[] // ) // ) * 3 ) / 4); //thats how many bytes should be in it (assuming all escaped) // out += " - "; // out += (string)(llStringLength(llList2String(c,0))/3) + ")"; // This will IM all the tests to the owner! This is slow because IM sleeps the script for 2 seconds. llInstantMessage(llGetOwner(),b); llOwnerSay(out); } if(llListFindList(c,[EOF]) == -1 && ++line < lines) llGetNotecardLine(card,line); else { llOwnerSay("Finished"); llOwnerSay("Passed: "+(string)pass); llOwnerSay("Failed: "+(string)fail); } } } }
The Notecard follows
#Greek kosme %ce%ba%e1%bd%b9%cf%83%ce%bc%ce%b5|5 #Boundary condition test cases - First possible sequence of a certain length %00|0 %c2%80|1 %e0%a0%80|1 %f0%90%80%80|1 %f8%88%80%80%80|1 %fc%84%80%80%80%80|1 #Last Possible Sequence of a certain length %7f|1 %df%bf|1 %ef%bf%bf|1 %f7%bf%bf%bf|1 %fb%bf%bf%bf%bf|1 %fd%bf%bf%bf%bf%bf|1 #Other boundary conditions %ed%9f%bf|1 %ee%80%80|1 %ef%bf%bd|1 %f4%8f%bf%bf|1 %f4%90%80%80|1 #Unexpected continuation bytes %80|1 %bf|1 %80%bf|2 %80%bf%80|3 %80%bf%80%bf|4 %80%bf%80%bf%80|5 %80%bf%80%bf%80%bf|6 %80%bf%80%bf%80%bf%80|7 #All 64 possible continuation bytes %80%81%82%83%84%85%86%87%88%89%8a%8b%8c%8d%8e%8f%90%91%92%93%94%95%96%97%98%99%9a%9b%9c%9d%9e%9f%a0%a1%a2%a3%a4%a5%a6%a7%a8%a9%aa%ab%ac%ad%ae%af%b0%b1%b2%b3%b4%b5%b6%b7%b8%b9%ba%bb%bc%bd%be%bf|64 #All 32 first bytes of 2-byte sequences %c0%20%c1%20%c2%20%c3%20%c4%20%c5%20%c6%20%c7%20%c8%20%c9%20%ca%20%cb%20%cc%20%cd%20%ce%20%cf%20%d0%20%d1%20%d2%20%d3%20%d4%20%d5%20%d6%20%d7%20%d8%20%d9%20%da%20%db%20%dc%20%dd%20%de%20%df%20|64 #All 16 first bytes of 3-byte sequences %e0%20%e1%20%e2%20%e3%20%e4%20%e5%20%e6%20%e7%20%e8%20%e9%20%ea%20%eb%20%ec%20%ed%20%ee%20%ef%20|32 #All 8 first bytes of 4-byte sequences %f0%20%f1%20%f2%20%f3%20%f4%20%f5%20%f6%20%f7%20|16 #All 4 first bytes of 5-byte sequences %f8%20%f9%20%fa%20%fb%20|8 #All 2 first bytes of 6-byte sequences %fc%20%fd%20|4 #Sequences with last continuation byte missing %c0|1 %e0%80|1 %f0%80%80|1 %f8%80%80%80|1 %fc%80%80%80%80|1 %df|1 %ef%bf|1 %f7%bf%bf|1 %fb%bf%bf%bf|1 %fd%bf%bf%bf%bf|1 %c0%e0%80%f0%80%80%f8%80%80%80%fc%80%80%80%80%df%ef%bf%f7%bf%bf%fb%bf%bf%bf%fd%bf%bf%bf%bf|10 #Impossible bytes %fe|1 %ff|1 %fe%fe%ff%ff|4 #Examples of an overlong ASCII character %c0%af|1 %e0%80%af|1 %f0%80%80%af|1 %f8%80%80%80%af|1 %fc%80%80%80%80%af|1 #Maximum overlong sequences %c1%bf|1 %e0%9f%bf|1 %f0%8f%bf%bf|1 %f8%87%bf%bf%bf|1 %fc%83%bf%bf%bf%bf|1 #Overlong sequences - Overlong representation of the NUL character %c0%80|1 %e0%80%80|1 %f0%80%80%80|1 %f8%80%80%80%80|1 %fc%80%80%80%80%80|1 #Illegal code positions - Single UTF-16 surrogates %ed%a0%80|1 %ed%ad%bf|1 %ed%ae%80|1 %ed%af%bf|1 %ed%b0%80|1 %ed%be%80|1 %ed%bf%bf|1 #Illegal code positions - Paired UTF-16 surrogates %ed%a0%80%ed%b0%80|2 %ed%a0%80%ed%bf%bf|2 %ed%ad%bf%ed%b0%80|2 %ed%ad%bf%ed%bf%bf|2 %ed%ae%80%ed%b0%80|2 %ed%ae%80%ed%bf%bf|2 %ed%af%bf%ed%b0%80|2 %ed%af%bf%ed%bf%bf|2 #Illegal code positions - Other illegal code positions %ef%bf%be|1 %ef%bf%bf|1