LZW LSL Implementation

From Second Life Wiki
Revision as of 14:25, 18 September 2008 by Haravikk Mistral (talk | contribs) (New page: {{LSL Header}} = LZW LSL Implementation = == Description == === Overview === The following is an LSL implementation of the LZW light-weight compression algorithm. It gives modest comp...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)
Jump to navigation Jump to search

LZW LSL Implementation

Description

Overview

The following is an LSL implementation of the LZW light-weight compression algorithm. It gives modest compression (20-30% for natural language) with good performance and low memory consumption.

License

You may use the following code freely in your own work, provided you credit the author (Haravikk Mistral) and supply a link to this wiki article where feasible. You may use this implementation in commercial works. If you add enhanced support for alternate modes of operation, padding schemes or produce other enhancements to the engine, then you are asked to please contribute them to this wiki-article.

Details

Basic usage

This script is designed to operate as a compression "server", in that you communicate with it using linked-messages to compress/decompress your text. To learn how to do this, you are perhaps best off looking at the helper functions and examples available for this script.

Script

<lsl>// These variables are used to build communications. Commands are sent as // combined bits in the integer argument of a link-message, and are // recovered using masks, you may wish to read about bit-masks before // editing these values. These are used so the string argument is // kept free for data only. // // Commands take the following form (in hex): // 0xFFMMIOvv // Where the letters are: // F Filter, used to quickly determine if a message is for us. // C Command; encrypt/decrypt etc. // I Type of data provided (hex, base64, etc.). // O Desired type of data to be returned (hex, base64, etc.), // this is unused in replies as the reply's value for I will // be the request's value for O. // v Variable, depends on mode.

// This mask allows the filter byte to be retrieved quickly integer LSLLZW_FILTER_MASK = 0xFF000000; // This mask allows the mask byte to be retrieved quickly integer LSLLZW_COMMAND_MASK = 0x00FF0000; // This mask allows the input type to be retrieved quickly integer LSLLZW_INPUT_TYPE_MASK = 0x0000F000; // This mask allows the output type to be retireved quickly integer LSLLZW_OUTPUT_TYPE_MASK = 0x00000F00; // This mask allows the variable to retrieved quickly integer LSLLZW_VARIABLE_MASK = 0x000000FF; // How many bits right variable must be shifted integer LSLLZW_VARIABLE_SHIFT = 0;

// A request integer LSLLZW_FILTER_REQUEST = 0x83000000; // A reply integer LSLLZW_FILTER_REPLY = 0x84000000;

// An error occurred integer LSLLZW_COMMAND_ERROR = 0x00000000; // Prime engine with key integer LSLLZW_COMMAND_COMPRESS = 0x00010000; // Encrypt message using expanded key integer LSLLZW_COMMAND_DECOMPRESS = 0x00020000;

// Input type is hex integer LSLLZW_INPUT_HEX = 0x00000000; // Input type is base64 integer LSLLZW_INPUT_BASE64 = 0x00001000;

// Output type is hex integer LSLLZW_OUTPUT_HEX = 0x00000000; // Output type is base64 integer LSLLZW_OUTPUT_BASE64 = 0x00000100;

// Refuse any data longer than this many characters integer LSLLZW_MAX_SIZE = 8192;

integer LSLLZW_CODE_WIDTH = 9; // Maximum code-width, restricts table integer LSLLZW_MIN_DATA_SIZE = 32; // Data must be at least this many

                                      // bits long or compression is 
                                      // pointless.

string LSLLZW_HEX_CHARS = "0123456789abcdef"; string LSLLZW_BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";

// Load value as integers from a base64 string list lslLZWBase64ToBytes(string base64Data) {

   integer x = llSubStringIndex(base64Data, "=");
   if (x > 0) 
       base64Data = llDeleteSubString(
           (base64Data = "") + base64Data, 
           x,
           -1
       );
   return lslLZWStringToBytes(
       (base64Data = "") + base64Data, 
       6, 
       LSLLZW_BASE64_CHARS
   );

}

// Load value as integers from a hex string list lslLZWHexToBytes(string hexData) {

   if (llGetSubString(hexData, 0, 1) == "0x") 
       hexData = llDeleteSubString((hexData = "") + hexData, 0, 1);
   return lslLZWStringToBytes(
       llToLower((hexData = "") + hexData), 
       4, 
       LSLLZW_HEX_CHARS
   );

}

// Loads a string as a list of integers, using the given bit <width> and // appropriate <alphabet> into Key list lslLZWStringToBytes(string s, integer width, string alphabet) {

   integer l = llStringLength(s);

   list n = [l * width]; // Add bit-length
   integer bitbuf = 0;
   integer adjust = 32;

   integer i = 0;
   integer val;
   while (i < l) {
       val = llSubStringIndex(alphabet, llGetSubString(s, i, i));
       if (val < 0) {
           s = "";
           return (n = []) + 
               ["Invalid character at index "+(string)i];
       }

       if ((adjust -= width) <= 0) {
           bitbuf = bitbuf | (val >> -adjust);
           n = (n = []) + n + [bitbuf];

           adjust += 32;
           if (adjust < 32) bitbuf = (val << adjust);
           else bitbuf = 0;
       } else bitbuf = bitbuf | (val << adjust);

       ++i;
   }

   s = "";
   if (adjust < 32) 
       return (n = []) + n + [bitbuf];
   return (n = []) + n;

}

// Returns a hex string representing integers b, preceeded with "0x". string lslLZWBytesToHex(list b) {

   return "0x" + lslLZWBytesToString((b = []) + b, 4, LSLLZW_HEX_CHARS);

}

// Returns a base64 string representing integers b, with padding equals signs string lslLZWBytesToBase64(list b) {

   string s = lslLZWBytesToString((b = []) + b, 6, LSLLZW_BASE64_CHARS);
   integer l = llStringLength(s) % 4;
   if (l) {
       if (l == 2) return (s = "") + s + "==";
       return (s = "") + s + "=";
   }
   return (s = "") + s;

}

// Outputs integer data from b as characters representing width bits taken from // alphabet string lslLZWBytesToString(list b, integer width, string alphabet) {

   integer bits = llList2Integer(b, 0);

   integer i = 0;
   integer mask = ~(-1 << width);
   integer shift = 32 - width;

   integer available = 0;
   integer prev = 0;
   integer buf;
   integer extra;
   integer value;

   string s = "";

   @lslLZWBytesToStringLoop;
   if((bits -= 32) > -32) {
       available += 32 + (bits * (0 > bits));
       buf = llList2Integer(b, ++i);
       if (available >= width) {
           if (prev) {
               s = (s = "") + s + 
                   llGetSubString(
                       alphabet, 
                       value = (
                           extra | 
                           (
                               (buf >> (shift + prev)) & 
                               ~(-1 << (width - prev))
                           )
                       ), 
                       value
                   );
               buf = buf << (width - prev);
               available -= width;
           }
           while(available >= width) {
               s = (s = "") + s + 
                   llGetSubString(
                       alphabet, 
                       value = ((buf >> shift) & mask),
                       value
                   );
               buf = buf << width;
               available -= width;
           }
           if (prev = available) // Update prev
               extra = (buf >> shift) & mask;
           jump lslLZWBytesToStringLoop;
       }
   }
   if(available) {
       mask = -1 << (width - prev);
       return (s = "") + s + 
           llGetSubString(
               alphabet, 
               value = ((extra & mask) | 
                       (
                           (buf >> (shift + prev)) & 
                           ((-1 << (width - available)) ^ mask))
                       ), 
               value
           );
   }
   return (s = "") + s;

}

// Compresses the binary data using LZW compression list lslLZWCompress(list data) {

   integer dataBits = llList2Integer(data, 0);
   if (dataBits <= LSLLZW_MIN_DATA_SIZE) 
       return [
           "Data too short! Must be at least " + 
           (string)LSLLZW_MIN_DATA_SIZE + " bits long to compress"
       ];
   
   // Remove header
   data = llDeleteSubList((data = []) + data, 0, 0);
   
   // Set-up output
   integer outBits = 0;
   list    output  = [];
   integer outBuf  = 0;
   integer outBit  = 32;
   
   // Set-up dictionary
   list    dictionary  = [];
   integer nextCode    = 256; // 0-255 are the basic codes
   integer maxCode     = (1 << LSLLZW_CODE_WIDTH) - 1;
   
   // Set-up reading
   integer inBuf  = llList2Integer(data, 0);
   integer inBit  = 24;
   
   // Process characters
   integer code   = (inBuf >> inBit) & 0xFF;
   integer char   = 0;
   integer hash   = 0;
   integer x      = 0;
   
   while ((dataBits -= 8) > 0) {
       // Load more data as required
       if ((inBit -= 8) < 0) {
           data  = llDeleteSubList((data = []) + data, 0, 0);
           inBuf = llList2Integer(data, 0);
           inBit = 24;
       }
       
       // Grab a new character
       char = (inBuf >> inBit) & 0xFF;
       
       // Create a hash and look-it up
       hash = (code << 8) | char;
       
       if ((x = llListFindList(dictionary, [hash])) >= 0) 
           code = 256 + x; // Entry exists, update code
       else { // Need to create a new entry
           if (nextCode <= maxCode) {
               dictionary = (dictionary = []) + dictionary + [hash];
               ++nextCode;
           }
           
           // Output the currently held code
           outBit  -= LSLLZW_CODE_WIDTH;
           outBits += LSLLZW_CODE_WIDTH;
           
           if (outBit > 0) 
               outBuf = outBuf | (code << outBit);
           else {
               outBuf = outBuf | (code >> -outBit);
               output = (output = []) + output + [outBuf];
               
               outBit += 32;
               if (outBit >= 32) outBuf = 0;
               else outBuf = (code << outBit);
           }
           
           code = char;
       }
   }
   
   // Output final code(s)
   outBit  -= LSLLZW_CODE_WIDTH;
   outBits += LSLLZW_CODE_WIDTH;
   list  t  = [];
   
   if (outBit >= 0) t = [outBuf | (code << outBit)];
   else {
       outBuf = outBuf | (code >> -outBit);
       outBit += 32;
       t = [(code << outBit), outBuf];
   }
   
   return (t = data = output = dictionary = []) + [outBits] + output + t;

}

list lslLZWDecompress(list data) {

   integer dataBits = llList2Integer(data, 0);
   if (dataBits <= LSLLZW_MIN_DATA_SIZE) 
       return [
           "Data too short! Must be at least " + 
           (string)LSLLZW_MIN_DATA_SIZE + " bits long to compress"
       ];
   
   // Remove header
   data = llDeleteSubList((data = []) + data, 0, 0);
   
   // Set-up dictionary
   list    dictionary  = [];
   list    prefixes    = [];
   integer nextCode    = 256; // 0-255 are the basic codes
   integer maxCode     = (1 << LSLLZW_CODE_WIDTH) - 1;
   
   // Set-up reading
   integer inBuf  = llList2Integer(data, 0);
   integer inPrev = 0;
   integer inBit  = 32 - LSLLZW_CODE_WIDTH;
   
   // Process characters
   integer oldCode = (inBuf >> inBit) & maxCode;
   integer code    = 0;
   integer char    = oldCode;
   list    stack   = [];
   integer x       = 0;
   integer t       = 0;
   
   // Set-up output
   integer outBits = 8;
   list    output  = [];
   integer outBuf  = oldCode << 24;
   integer outBit  = 24;
   
   integer DELETE = 0;
   
   while ((dataBits -= LSLLZW_CODE_WIDTH) > 0) {
       // Load more data as required
       if ((inBit -= LSLLZW_CODE_WIDTH) < 0) {
           inPrev = inBuf;
           data   = llDeleteSubList((data = []) + data, 0, 0);
           inBuf  = llList2Integer(data, 0);
           inBit  += 32;
       }
       
       // Grab a new character
       if (!inBit || (inBit >= 32)) code = 0;
       else code = (inPrev << (32 - inBit)) & maxCode;
       
       x = 32 - LSLLZW_CODE_WIDTH;
       if (inBit > x) // Watch out for sign-bit carry distorting result!
           code = code | ((inBuf >> inBit) & ((1 << (32 - inBit)) - 1));
       else code = code | ((inBuf >> inBit) & maxCode);
       
       // Once we have the code we need to look-up it's contents
       if (code >= nextCode) {
           stack = [code];
           x  = oldCode;
       } else x = code;
       
       // Produce a stack of codes and sub-codes
       while (x > 255) {
           x -= 256;
           stack = (stack = []) + stack + 
               llList2List(dictionary, x, x);
           x  = llList2Integer(prefixes, x);
       }
       stack = (stack = []) + stack + [x];
       
       // Now we output the contents of the stack as the original 
       // characters
       x = stack != [];
       char = llList2Integer(stack, -1);
       while ((--x) >= 0) {
           t = llList2Integer(stack, x);
           
           outBit  -= 8;
           outBits += 8;
           
           if (outBit > 0) outBuf = outBuf | (t << outBit);
           else {
               output = (output = []) + output + [outBuf | t];
               outBit = 32;
               outBuf = 0;
           }
       }
       stack = [];
       
       if (nextCode <= maxCode) {
           prefixes   = (prefixes   = []) + prefixes   + [oldCode];
           dictionary = (dictionary = []) + dictionary + [char];
           ++nextCode;
       }
       
       oldCode = code;
   }
   
   // Output final code(s)
   if (outBit < 32) 
       output = (output = []) + output + [outBuf];
   
   return (data = stack = output = dictionary = []) + [outBits] + output;

}

error(integer link, string str, key id) {

   llMessageLinked(
       link,
       LSLLZW_FILTER_REPLY | LSLLZW_COMMAND_ERROR,
       str,
       id
   );

}

default {

   link_message(integer x, integer y, string msg, key id) {        
       // Is the message for us?
       if ((y & LSLLZW_FILTER_MASK) == LSLLZW_FILTER_REQUEST) {
           // Refuse overly large messages
           if (llStringLength(msg) > LSLLZW_MAX_SIZE) {
               error(
                   x, 
                   "Maxmimum message length is " + 
                   (string)LSLLZW_MAX_SIZE + 
                   " characters", 
                   id
               );
               return;
           }
           
           // What type of data do we have?
           integer type = y & LSLLZW_INPUT_TYPE_MASK;
           list data = [];
           if (type == LSLLZW_INPUT_HEX) 
               data = lslLZWHexToBytes((msg = "") + msg);
           else if (type == LSLLZW_INPUT_BASE64) 
               data = lslLZWBase64ToBytes((msg = "") + msg);
           else data = [(msg = "") + "Unsupported input-type"];
               
           // Was data parsed successfully?
           if (llGetListEntryType(data, 0) != TYPE_INTEGER) {
               error(x, llList2String((data = []) + data, 0), id);
               return;
           }
           
           // Now determine mode of operation
           type = y & LSLLZW_COMMAND_MASK;
           if (type == LSLLZW_COMMAND_COMPRESS) 
               data = lslLZWCompress((data = []) + data);
           else if (type == LSLLZW_COMMAND_DECOMPRESS) 
               data = lslLZWDecompress((data = []) + data);
           else data = ["Unsupported mode"];
           
           // Was mode executed successfully?
           if (llGetListEntryType(data, 0) != TYPE_INTEGER) {
               error(x, llList2String((data = []) + data, 0), id);
               return;
           }
           
           // Convert into requested output type
           integer output = y & LSLLZW_OUTPUT_TYPE_MASK;
           if (output == LSLLZW_OUTPUT_HEX) {
               msg = lslLZWBytesToHex((data = []) + data);
               output = LSLLZW_INPUT_HEX;
           } else if (output == LSLLZW_OUTPUT_BASE64) {
               msg = lslLZWBytesToBase64((data = []) + data);
               output = LSLLZW_INPUT_BASE64;
           } else {
               error(x, "Invalid output type", id);
               return;
           }
           
           // Construct reply
           llMessageLinked(
               x,
               LSLLZW_FILTER_REPLY | type | output,
               (msg = "") + msg,
               id
           );
       }
   }

}</lsl>