UTF-8 Test

From Second Life Wiki
Jump to navigation Jump to search

Tests taken from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt Special thanks to Strife Onizuka and Kelly Linden for wrangling them into LSL.

1) Make a script from the following script and a notecard from the following notecard and put them both into a box. Name the box "utf-8 test".

If you have a unicode font installed, you should see unicode characters, eg.

  • utf-8 test: Greek kosme
  • utf-8 test: κόσμε

If you do not have a unicode font installed, you should see something like boxes, eg.

  • utf-8 test: Greek kosme
  • utf-8 test: ■■■μ■

2) Let the script finish. It should report that all tests passed.


The Script follows

string card;
integer lines = -1;
integer line = 0;
list resuts;
integer pass;
integer fail;

default
{
    state_entry()
    {
        if(llGetInventoryNumber(INVENTORY_NOTECARD))
        {
            llGetNumberOfNotecardLines(
                card = llGetInventoryName(INVENTORY_NOTECARD,0));
            llOwnerSay("Test-Starting: "+card);
        }
    }
    on_rez(integer a)
    {
        llResetScript();
    }
    touch_start(integer a)
    {
        llResetScript();
    }
    changed(integer a)
    {
        if(a & CHANGED_INVENTORY)
            llResetScript();
    }
    
    dataserver(key a, string b)
    {
        if(lines == -1)
        {
            lines = (integer)b;
            llGetNotecardLine(card,line);
        }
        else
        {
            list c = llParseString2List(a=(string)llParseString2List(b,[" "],[]),["|"],[EOF]);
            integer d;
            integer e;
            integer f;
            if(llGetSubString(a,0,0) == "#")
                llOwnerSay(llDeleteSubString(b,0,0));
            else if(llGetListLength(c) >= 2)
            {
                d = llStringLength(b = llUnescapeURL(llList2String(c,0)));
                pass += e = (d == f = llList2Integer(c,1));
                fail += !e;
                string out = (string)line +": ";
                    out += llList2String(["Fail","Pass"],e) + " ";
                    out += "(" + (string)d + " - " + (string)f + ") ";
//Enable this section to test llUnescapeURL
//                       out += "(" + (string)((
//                                llStringLength(
//                                    (string)llParseString2List( //strips off the evil pad
//                                        llStringToBase64(b),["="],[]
//                                    )
//                                ) * 3 ) / 4); //thats how many bytes should be in it (assuming all escaped)
//                       out += " - ";
//                       out += (string)(llStringLength(llList2String(c,0))/3) + ")";
                
                
                // This will IM all the tests to the owner!  This is slow because IM sleeps the script for 2 seconds.
                llInstantMessage(llGetOwner(),b);
                llOwnerSay(out);
            }
            if(llListFindList(c,[EOF]) == -1 && ++line < lines)
                llGetNotecardLine(card,line);
            else
            {
                llOwnerSay("Finished");
                llOwnerSay("Passed: "+(string)pass);
                llOwnerSay("Failed: "+(string)fail);
            }
        }
    }
} 


The Notecard follows

#Greek kosme
%ce%ba%e1%bd%b9%cf%83%ce%bc%ce%b5|5

#Boundary condition test cases - First possible sequence of a certain length
%00|0
%c2%80|1
%e0%a0%80|1
%f0%90%80%80|1
%f8%88%80%80%80|1
%fc%84%80%80%80%80|1

#Last Possible Sequence of a certain length
%7f|1
%df%bf|1
%ef%bf%bf|1
%f7%bf%bf%bf|1
%fb%bf%bf%bf%bf|1
%fd%bf%bf%bf%bf%bf|1

#Other boundary conditions
%ed%9f%bf|1
%ee%80%80|1
%ef%bf%bd|1
%f4%8f%bf%bf|1
%f4%90%80%80|1

#Unexpected continuation bytes
%80|1
%bf|1
%80%bf|2
%80%bf%80|3
%80%bf%80%bf|4
%80%bf%80%bf%80|5
%80%bf%80%bf%80%bf|6
%80%bf%80%bf%80%bf%80|7

#All 64 possible continuation bytes
%80%81%82%83%84%85%86%87%88%89%8a%8b%8c%8d%8e%8f%90%91%92%93%94%95%96%97%98%99%9a%9b%9c%9d%9e%9f%a0%a1%a2%a3%a4%a5%a6%a7%a8%a9%aa%ab%ac%ad%ae%af%b0%b1%b2%b3%b4%b5%b6%b7%b8%b9%ba%bb%bc%bd%be%bf|64

#All 32 first bytes of 2-byte sequences
%c0%20%c1%20%c2%20%c3%20%c4%20%c5%20%c6%20%c7%20%c8%20%c9%20%ca%20%cb%20%cc%20%cd%20%ce%20%cf%20%d0%20%d1%20%d2%20%d3%20%d4%20%d5%20%d6%20%d7%20%d8%20%d9%20%da%20%db%20%dc%20%dd%20%de%20%df%20|64

#All 16 first bytes of 3-byte sequences
%e0%20%e1%20%e2%20%e3%20%e4%20%e5%20%e6%20%e7%20%e8%20%e9%20%ea%20%eb%20%ec%20%ed%20%ee%20%ef%20|32

#All 8 first bytes of 4-byte sequences
%f0%20%f1%20%f2%20%f3%20%f4%20%f5%20%f6%20%f7%20|16

#All 4 first bytes of 5-byte sequences
%f8%20%f9%20%fa%20%fb%20|8

#All 2 first bytes of 6-byte sequences
%fc%20%fd%20|4

#Sequences with last continuation byte missing
%c0|1
%e0%80|1
%f0%80%80|1
%f8%80%80%80|1
%fc%80%80%80%80|1
%df|1
%ef%bf|1
%f7%bf%bf|1
%fb%bf%bf%bf|1
%fd%bf%bf%bf%bf|1
%c0%e0%80%f0%80%80%f8%80%80%80%fc%80%80%80%80%df%ef%bf%f7%bf%bf%fb%bf%bf%bf%fd%bf%bf%bf%bf|10

#Impossible bytes
%fe|1
%ff|1
%fe%fe%ff%ff|4

#Examples of an overlong ASCII character
%c0%af|1
%e0%80%af|1
%f0%80%80%af|1
%f8%80%80%80%af|1
%fc%80%80%80%80%af|1

#Maximum overlong sequences
%c1%bf|1
%e0%9f%bf|1
%f0%8f%bf%bf|1
%f8%87%bf%bf%bf|1
%fc%83%bf%bf%bf%bf|1

#Overlong sequences - Overlong representation of the NUL character
%c0%80|1
%e0%80%80|1
%f0%80%80%80|1
%f8%80%80%80%80|1
%fc%80%80%80%80%80|1

#Illegal code positions - Single UTF-16 surrogates
%ed%a0%80|1
%ed%ad%bf|1
%ed%ae%80|1
%ed%af%bf|1
%ed%b0%80|1
%ed%be%80|1
%ed%bf%bf|1

#Illegal code positions - Paired UTF-16 surrogates
%ed%a0%80%ed%b0%80|2
%ed%a0%80%ed%bf%bf|2
%ed%ad%bf%ed%b0%80|2
%ed%ad%bf%ed%bf%bf|2
%ed%ae%80%ed%b0%80|2
%ed%ae%80%ed%bf%bf|2
%ed%af%bf%ed%b0%80|2
%ed%af%bf%ed%bf%bf|2

#Illegal code positions - Other illegal code positions
%ef%bf%be|1
%ef%bf%bf|1