LSL Regex Engine
Created by Kira Komarov.
Introduction
We have heard that a regular expression engine in LSL will not work well and decided to prove that it does by creating our own mini-engine that supports several of the standard regex functions. All functions use recursion to delete the input string, which provides a better memory performance for fragile, limited stacks such as the LSL VM.
After a chat with Marky Helstein we created the the kleene function which is disabled since it behaves differently than a traditional Kleene star. The function behaves like a kleene plus sign with the exception that it continues to match till the end of the string. Later on, we based the alter and plus on the kleene function and expanded the engine.
Supported Symbols
- Kleene-* closures.
- | alternation.
Current Status: Test Drives
Here are some test runs made with the regex engine:
Input | Expression | Result | Status |
rraaats | r|a | rraaa | PASS |
rraaats | a* | aaa | PASS |
rraaats | r* | rr | PASS |
rraaats | r|t | rrt | PASS |
rraaats | a|t | aaat | PASS |
rraaats | r|a|t | rraaat | PASS |
rraaats | r|a|t|s | rraaats | PASS |
rraaats | s|t|a|r | rraaats | PASS |
rraaats | t|a|s | aaats | PASS |
Code
////////////////////////////////////////////
// CONFIGURATION //
////////////////////////////////////////////
// Test string to test regular expression on.
string stringInput = "rraaats";
////////////////////////////////////////////
// INTERNALS //
////////////////////////////////////////////
string mem = "";
////////////////////////////////////////////
//////////////// UNUSED ////////////////////
////////////////////////////////////////////
string kleene(string input, string c) {
if(llStringLength(input) == 0) return "";
string d = llGetSubString(input, 0, 0);
input = llDeleteSubString(input, 0, 0);
if(c == d) {
return c + kleene(input, c);
}
return kleene(input, c);
}
////////////////////////////////////////////
////////////////////////////////////////////
// Kleene Star: zero or more characters. //
////////////////////////////////////////////
star(string input, string c) {
if(llStringLength(input) == 0) return;
string d = llGetSubString(input, 0, 0);
input = llDeleteSubString(input, 0, 0);
if (c == d) {
mem += c;
star(input, c);
}
if(mem == "") star(input, c);
}
////////////////////////////////////////////
// Alternation: either-or characters. //
////////////////////////////////////////////
alter(string input, string ei, string or) {
if(llStringLength(input) == 0) return;
string d = llGetSubString(input, 0, 0);
input = llDeleteSubString(input, 0, 0);
if(ei == d || or == d) {
mem += d;
alter(input, ei, or);
}
if(ei != d && or != d) alter(input, ei, or);
}
////////////////////////////////////////////
/////// AUXILIARY : PRESERVES ORDER ////////
////////////////////////////////////////////
string order(string input, string refer) {
integer itra = llStringLength(refer)-1;
do {
string s = llGetSubString(refer, itra, itra);
if(llSubStringIndex(input, s) == -1) {
refer = llDeleteSubString(refer, itra, itra);
}
} while(--itra>=0);
return refer;
}
default {
state_entry() {
llListen(0, "", llGetOwner(), "");
}
listen(integer channel, string name, key id, string message) {
string prev;
string match = stringInput;
do {
// Grab next character from input.
string in = llGetSubString(message, 0, 0);
if(in == "*") {
star(match, prev);
}
if(in == "|") {
string next = llDeleteSubString(message, 0, 0);
alter(match, prev, next);
}
// Store current character for backtracking.
prev = in;
} while(message = llDeleteSubString(message, 0, 0));
llOwnerSay(order(mem, stringInput));
llResetScript();
}
}
See also
- Independent fully capable regular expression engine in LSL by Sei Lisa