LSL Regex Engine

From Second Life Wiki
Jump to navigation Jump to search

Created by Kira Komarov.

Introduction

We have heard that a regular expression engine in LSL will not work well and decided to prove that it does by creating our own mini-engine that supports several of the standard regex functions. All functions use recursion to delete the input string, which provides a better memory performance for fragile, limited stacks such as the LSL VM.

After a chat with Marky Helstein we created the the kleene function which is disabled since it behaves differently than a traditional Kleene star. The function behaves like a kleene plus sign with the exception that it continues to match till the end of the string. Later on, we based the alter and plus on the kleene function and expanded the engine.

Supported Symbols

  • Kleene-* closures.
  • | alternation.

Current Status: Test Drives

Here are some test runs made with the regex engine:

Input Expression Result Status
rraaats r|a rraaa PASS
rraaats a* aaa PASS
rraaats r* rr PASS
rraaats r|t rrt PASS
rraaats a|t aaat PASS
rraaats r|a|t rraaat PASS
rraaats r|a|t|s rraaats PASS
rraaats s|t|a|r rraaats PASS
rraaats t|a|s aaats PASS

Code

////////////////////////////////////////////
//               CONFIGURATION            //
////////////////////////////////////////////
// Test string to test regular expression on.
string stringInput = "rraaats";

////////////////////////////////////////////
//               INTERNALS                //
////////////////////////////////////////////
string mem = "";

////////////////////////////////////////////
//////////////// UNUSED ////////////////////
////////////////////////////////////////////
string kleene(string input, string c) {
  if(llStringLength(input) == 0) return "";
  string d = llGetSubString(input, 0, 0);
  input = llDeleteSubString(input, 0, 0);
  if(c == d) {
    return c + kleene(input, c);
  }
  return kleene(input, c);
}
////////////////////////////////////////////

////////////////////////////////////////////
// Kleene Star: zero or more characters.  //
////////////////////////////////////////////
star(string input, string c) {
    if(llStringLength(input) == 0) return;
    string d = llGetSubString(input, 0, 0);
    input = llDeleteSubString(input, 0, 0);
    if (c == d) {
      mem += c;
      star(input, c);
    }
    if(mem == "") star(input, c);
}

////////////////////////////////////////////
// Alternation: either-or characters.     //
////////////////////////////////////////////
alter(string input, string ei, string or) {
    if(llStringLength(input) == 0) return;
    string d = llGetSubString(input, 0, 0);
    input = llDeleteSubString(input, 0, 0);
    if(ei == d || or == d) {
        mem += d;
        alter(input, ei, or);
    }
    if(ei != d && or != d) alter(input, ei, or);
}

////////////////////////////////////////////
/////// AUXILIARY : PRESERVES ORDER ////////
////////////////////////////////////////////
string order(string input, string refer) {

    integer itra = llStringLength(refer)-1;
    do {
        string s = llGetSubString(refer, itra, itra);
        if(llSubStringIndex(input, s) == -1) {
            refer = llDeleteSubString(refer, itra, itra);
        }
    } while(--itra>=0);
    return refer;
    
}

default {
    state_entry() {
        llListen(0, "", llGetOwner(), "");
    }
    listen(integer channel, string name, key id, string message) {

        string prev;
        string match = stringInput;
 
        do { 
            // Grab next character from input.
            string in = llGetSubString(message, 0, 0);
 
            if(in == "*") {
                star(match, prev);
            }
 
            if(in == "|") {
                string next = llDeleteSubString(message, 0, 0);
                alter(match, prev, next);
            }
            
            // Store current character for backtracking.
            prev = in;
        } while(message = llDeleteSubString(message, 0, 0));
        
        llOwnerSay(order(mem, stringInput));
        llResetScript();
    }
}

See also