UTF-8 Test

From Second Life Wiki
Jump to navigation Jump to search
The printable version is no longer supported and may have rendering errors. Please update your browser bookmarks and please use the default browser print function instead.

Tests taken from http://www.cl.cam.ac.uk/~mgk25/ucs/examples/UTF-8-test.txt Special thanks to Strife Onizuka and Kelly Linden for wrangling them into LSL.

1) Make a script from the following script and a notecard from the following notecard and put them both into a box. Name the box "utf-8 test".

If you have a unicode font installed, you should see unicode characters, eg.

  • utf-8 test: Greek kosme
  • utf-8 test: κόσμε

If you do not have a unicode font installed, you should see something like boxes, eg.

  • utf-8 test: Greek kosme
  • utf-8 test: ■■■μ■

2) Let the script finish. It should report that all tests passed.


The Script follows

string card;
integer lines = -1;
integer line = 0;
list resuts;
integer pass;
integer fail;

default
{
    state_entry()
    {
        if(llGetInventoryNumber(INVENTORY_NOTECARD))
        {
            llGetNumberOfNotecardLines(
                card = llGetInventoryName(INVENTORY_NOTECARD,0));
            llOwnerSay("Test-Starting: "+card);
        }
    }
    on_rez(integer a)
    {
        llResetScript();
    }
    touch_start(integer a)
    {
        llResetScript();
    }
    changed(integer a)
    {
        if(a & CHANGED_INVENTORY)
            llResetScript();
    }
    
    dataserver(key a, string b)
    {
        if(lines == -1)
        {
            lines = (integer)b;
            llGetNotecardLine(card,line);
        }
        else
        {
            list c = llParseString2List(a=(string)llParseString2List(b,[" "],[]),["|"],[EOF]);
            integer d;
            integer e;
            integer f;
            if(llGetSubString(a,0,0) == "#")
                llOwnerSay(llDeleteSubString(b,0,0));
            else if(llGetListLength(c) >= 2)
            {
                d = llStringLength(b = llUnescapeURL(llList2String(c,0)));
                pass += e = (d == f = llList2Integer(c,1));
                fail += !e;
                string out = (string)line +": ";
                    out += llList2String(["Fail","Pass"],e) + " ";
                    out += "(" + (string)d + " - " + (string)f + ") ";
//Enable this section to test llUnescapeURL
//                       out += "(" + (string)((
//                                llStringLength(
//                                    (string)llParseString2List( //strips off the evil pad
//                                        llStringToBase64(b),["="],[]
//                                    )
//                                ) * 3 ) / 4); //thats how many bytes should be in it (assuming all escaped)
//                       out += " - ";
//                       out += (string)(llStringLength(llList2String(c,0))/3) + ")";
                
                
                // This will IM all the tests to the owner!  This is slow because IM sleeps the script for 2 seconds.
                llInstantMessage(llGetOwner(),b);
                llOwnerSay(out);
            }
            if(llListFindList(c,[EOF]) == -1 && ++line < lines)
                llGetNotecardLine(card,line);
            else
            {
                llOwnerSay("Finished");
                llOwnerSay("Passed: "+(string)pass);
                llOwnerSay("Failed: "+(string)fail);
            }
        }
    }
} 


The Notecard follows

#Greek kosme
%ce%ba%e1%bd%b9%cf%83%ce%bc%ce%b5|5

#Boundary condition test cases - First possible sequence of a certain length
%00|0
%c2%80|1
%e0%a0%80|1
%f0%90%80%80|1
%f8%88%80%80%80|1
%fc%84%80%80%80%80|1

#Last Possible Sequence of a certain length
%7f|1
%df%bf|1
%ef%bf%bf|1
%f7%bf%bf%bf|1
%fb%bf%bf%bf%bf|1
%fd%bf%bf%bf%bf%bf|1

#Other boundary conditions
%ed%9f%bf|1
%ee%80%80|1
%ef%bf%bd|1
%f4%8f%bf%bf|1
%f4%90%80%80|1

#Unexpected continuation bytes
%80|1
%bf|1
%80%bf|2
%80%bf%80|3
%80%bf%80%bf|4
%80%bf%80%bf%80|5
%80%bf%80%bf%80%bf|6
%80%bf%80%bf%80%bf%80|7

#All 64 possible continuation bytes
%80%81%82%83%84%85%86%87%88%89%8a%8b%8c%8d%8e%8f%90%91%92%93%94%95%96%97%98%99%9a%9b%9c%9d%9e%9f%a0%a1%a2%a3%a4%a5%a6%a7%a8%a9%aa%ab%ac%ad%ae%af%b0%b1%b2%b3%b4%b5%b6%b7%b8%b9%ba%bb%bc%bd%be%bf|64

#All 32 first bytes of 2-byte sequences
%c0%20%c1%20%c2%20%c3%20%c4%20%c5%20%c6%20%c7%20%c8%20%c9%20%ca%20%cb%20%cc%20%cd%20%ce%20%cf%20%d0%20%d1%20%d2%20%d3%20%d4%20%d5%20%d6%20%d7%20%d8%20%d9%20%da%20%db%20%dc%20%dd%20%de%20%df%20|64

#All 16 first bytes of 3-byte sequences
%e0%20%e1%20%e2%20%e3%20%e4%20%e5%20%e6%20%e7%20%e8%20%e9%20%ea%20%eb%20%ec%20%ed%20%ee%20%ef%20|32

#All 8 first bytes of 4-byte sequences
%f0%20%f1%20%f2%20%f3%20%f4%20%f5%20%f6%20%f7%20|16

#All 4 first bytes of 5-byte sequences
%f8%20%f9%20%fa%20%fb%20|8

#All 2 first bytes of 6-byte sequences
%fc%20%fd%20|4

#Sequences with last continuation byte missing
%c0|1
%e0%80|1
%f0%80%80|1
%f8%80%80%80|1
%fc%80%80%80%80|1
%df|1
%ef%bf|1
%f7%bf%bf|1
%fb%bf%bf%bf|1
%fd%bf%bf%bf%bf|1
%c0%e0%80%f0%80%80%f8%80%80%80%fc%80%80%80%80%df%ef%bf%f7%bf%bf%fb%bf%bf%bf%fd%bf%bf%bf%bf|10

#Impossible bytes
%fe|1
%ff|1
%fe%fe%ff%ff|4

#Examples of an overlong ASCII character
%c0%af|1
%e0%80%af|1
%f0%80%80%af|1
%f8%80%80%80%af|1
%fc%80%80%80%80%af|1

#Maximum overlong sequences
%c1%bf|1
%e0%9f%bf|1
%f0%8f%bf%bf|1
%f8%87%bf%bf%bf|1
%fc%83%bf%bf%bf%bf|1

#Overlong sequences - Overlong representation of the NUL character
%c0%80|1
%e0%80%80|1
%f0%80%80%80|1
%f8%80%80%80%80|1
%fc%80%80%80%80%80|1

#Illegal code positions - Single UTF-16 surrogates
%ed%a0%80|1
%ed%ad%bf|1
%ed%ae%80|1
%ed%af%bf|1
%ed%b0%80|1
%ed%be%80|1
%ed%bf%bf|1

#Illegal code positions - Paired UTF-16 surrogates
%ed%a0%80%ed%b0%80|2
%ed%a0%80%ed%bf%bf|2
%ed%ad%bf%ed%b0%80|2
%ed%ad%bf%ed%bf%bf|2
%ed%ae%80%ed%b0%80|2
%ed%ae%80%ed%bf%bf|2
%ed%af%bf%ed%b0%80|2
%ed%af%bf%ed%bf%bf|2

#Illegal code positions - Other illegal code positions
%ef%bf%be|1
%ef%bf%bf|1