User:Andrew Linden/ChatWick

From Second Life Wiki
Jump to navigation Jump to search

This is the latest version of the script that generates the wiki format text for the Simulator_User_Group transcripts.

#!/usr/bin/env python
#
# chatwick.py -- translate chat logs into wiki format
#
# License = common domain
# 
# Usage: chatwick [-c config_file] [-o output_file] chat_log_file

import datetime
import optparse
import sys
import random
import re
import os.path


RED_SHIFT = 16
GREEN_SHIFT = 8
BLUE_SHIFT = 0

TABLE_START = "{|\n"
TABLE_COLUMN_START = "|"
TABLE_ROW_END = "|-\n"
TABLE_END = "|}\n"

DEFAULT_OBJECT_COLOR = "#808080"


def replace_link(match_obj):
    return "</nowiki>%s" % match_obj.group('link')
    

def angle_to_hex(angle, color):
    # Here is the the angle curve for red
    #     |
    # 255 +-------                               --------+
    #     |       \                             /
    #     |        \                           /
    #     |         \                         /
    #     |          \                       /
    #     |           \                     /
    #     |            \                   /
    #     |             \                 /
    #   0-+-------|------|=======|=======|-------|-------|--
    #     0      60     120     180     240     300     360
    #
    # Green and blue are the same curve, just shifted by 120 and 240 degrees.
    #
    offset = 0
    if color == 'red':
        pass
    elif color == 'green':
        offset = -120
    elif color == 'blue':
        offset = -240
    else:
        raise "Unknown primary color %s" % color

    # clamp color_angle in range of [0, 360]
    color_angle = (angle + offset) % 360
    if color_angle < 0:
        color_angle += 360
    
    # compute hex value
    hex = 255;
    if color_angle < 60:
        pass
    elif color_angle < 120:
        hex = int(255.0 * (1.0 - (float(color_angle) - 60.0)/60.0))
    elif color_angle < 240:
        hex = 0
    elif color_angle < 300:
        hex = int(255.0 * ((float(color_angle) - 240.0)/60.0))
    return hex    
        

def generate_colors(hues, saturations, max_saturation, min_saturation):
    colors = []
    hue_step = int(360 / hues)
    saturation_step = (max_saturation - min_saturation)/float(saturations)
    s = 0
    saturation = min_saturation
    while s < saturations:
        for angle in range(0, 360, hue_step):
            red = int(angle_to_hex(angle, 'red') * saturation)
            green = int(angle_to_hex(angle, 'green') * saturation)
            blue = int(angle_to_hex(angle, 'blue') * saturation)
            color_str = '#%02x%02x%02x' % (red, green, blue)
            color = (red << RED_SHIFT) + (green << GREEN_SHIFT) + (blue << BLUE_SHIFT)
            #print '<font color="%s">%s</font>\n' % (color_str, color_str)
            colors.append(color_str)
        saturation += saturation_step
        s += 1
    return colors


def write_output(output_fp, line):
    if output_fp:
        output_fp.write(line)


class LineProcessor:
    def __init__(self):
        self.configured_color_map = {}
        self.assigned_color_map = {}
        self.deletes = {}
        self.last_color = None
        self.available_colors = generate_colors(13, 4, 0.90, 0.4)
        self.special_colors = generate_colors(8, 1, 0.4, 0.3)
        self.prefix = []
        self.postfix = []

        # [00:00] text
        self.re_timestamped_line = re.compile('(?P<timestamp>^\[\d\d:\d\d\])\s+(?P<text>.*)$')
        # User Name: text
        self.re_two_word_name_line = re.compile('^(?P<username>\w+ \w+):\s+(?P<text>.*)$')
        # User Name text
        self.re_me_line = re.compile('^(?P<username>\w+ \w+)\s+(?P<text>.*)$')

        # UserName: text
        self.re_one_word_name_line = re.compile('^(?P<username>\w+):\s+(?P<text>.*)$')
        # UserName text

        # Object with arbitrarily long name: text
        self.re_object_line = re.compile('^(?P<objectname>.*?):\s+(?P<text>.+)$')
        # [0000/00./00 00:00] anything
        self.re_old_timestamp = re.compile('^\[\d{4,4}/\d\d/\d\d \d\d:\d\d]\s+.*$')
        # Something Linden
        self.re_special_name = re.compile('^\w+ Linden$')
        # http://something
        self.re_http_link = re.compile('(?P<link>https?://[-_:/=\w\.\?\$]+)')
        # <nowiki> 
        self.re_useless_nowiki = re.compile('\s*')


    def listColors(self):
    # debug method
        for color in self.available_colors:
            print '<font color="%s"><b>%s</b> available color</font>\n' % (color, color)
        for color in self.special_colors:
            print '<font color="%s"><b>%s</b> special color</font>\n' % (color, color)


    def readConfig(self, file):
    #   All config lines have the following format: 
    #       command 'User Name' [option]
    #   Currently supported commands are:
    #       color 'User Name' colorname
    #       delete 'User Name'
    #       prefix 'prefix line'
    #       postfix 'postfix line'
    # The prefix/postfix commands will pre/postpend the output with those lines,
    # in the order they are given.  Some ALL_CAPS keywords in the pre/postfix 
    # lines will be replaced (if applicable).  See the implementation of
    # replaceKeywordsInConfig().
        fp = open(file)    
        line = fp.readline()
        while line:
            clean_line = line.strip()
            line = fp.readline()
            if len(clean_line) == 0:
                continue
            if clean_line[0] == '#':
                continue
            [key, value] = clean_line.split(None, 1)
    
            words = value.split("'")
            if len(words) != 3:
                words = value.split('"')
            if len(words) != 3:
                continue
    
            if key == "color":
                assigned_color = words[2].strip().lower()
                self.configured_color_map[words[1]] = assigned_color
            elif key == "delete":
                self.deletes[words[1]] = 1
            else:
                value = value.strip("\"'")
                if key == "prefix":
                    self.prefix.append(value)
                elif key == "postfix":
                    self.postfix.append(value)
        if not self.configured_color_map.has_key('Object'):
            # by default objects show up as green
            self.configured_color_map['Object'] = DEFAULT_OBJECT_COLOR

        # remove matching assigned colors from available and special colors
        configured_colors = self.configured_color_map.values()
        for color in configured_colors:
            index = 0
            while index < len(self.available_colors):
                if color == self.available_colors[index]:
                    del(self.available_colors[index])
                else:
                    index += 1
            index = 0
            while index < len(self.special_colors):
                if color == self.special_colors[index]:
                    del(self.special_colors[index])
                else:
                    index += 1
        fp.close()


    def replaceKeywordsInConfig(self, input_file):
    # The following keywords will be replaced in the prefix and postfix lines:
    #       THIS_WEEK = "YYYY.MM.DD" for the date in the input_filename
    #       PREV_WEEK = "YYYY.MM.DD" for one week prior to the date in the infput_filename (")
    #       NEXT_WEEK = "YYYY.MM.DD" for one week after to the date in the infput_filename (")
    # Note: these only work if the input_file name contains the date in one of
    # the following formats:
    #       YYYYMMDD
    #       YYYY.MM.DD (where . can be any character)
    #
        # get date from input_file
        this_week = None
        simple_date = re.search('20\d\d\d\d\d\d', input_file)
        if simple_date:
            date_str = input_file[simple_date.start():simple_date.end()]
            year = int(date_str[0:4])
            month = int(date_str[4:6])
            day = int(date_str[6:8])
            this_week = datetime.date(year, month, day)
        else:
            fancy_date = re.search('20\d\d.\d\d.\d\d', input_file)
            if fancy_date:
                date_str = input_file[fancy_date.start():fancy_date.end()]
                year = int(date_str[0:4])
                month = int(date_str[5:7])
                day = int(date_str[8:10])
                this_week = datetime.date(year, month, day)
        if this_week:
            one_week = datetime.timedelta(days=7)
            last_week = this_week - one_week
            next_week = this_week + one_week
            this_week_str = str(this_week)
            last_week_str = str(last_week)
            next_week_str = str(next_week)
            while re.search('-', this_week_str):
                this_week_str = re.sub('-', '.', this_week_str)
            while re.search('-', last_week_str):
                last_week_str = re.sub('-', '.', last_week_str)
            while re.search('-', next_week_str):
                next_week_str = re.sub('-', '.', next_week_str)

            # replace keywords in prefix
            re_this_week = re.compile("THIS_WEEK")
            re_last_week = re.compile("PREV_WEEK")
            re_next_week = re.compile("NEXT_WEEK")
            new_prefix = []
            for line in self.prefix:
                while re_this_week.search(line):
                    line = re_this_week.sub(this_week_str, line)
                while re_last_week.search(line):
                    line = re_last_week.sub(last_week_str, line)
                while re_next_week.search(line):
                    line = re_next_week.sub(next_week_str, line)
                new_prefix.append(line)
            self.prefix = new_prefix;
            # replace keywords in postfix
            new_postfix = []
            for line in self.postfix:
                while re_this_week.search(line):
                    line = re_this_week.sub(this_week_str, line)
                while re_last_week.search(line):
                    line = re_last_week.sub(last_week_str, line)
                while re_next_week.search(line):
                    line = re_next_week.sub(next_week_str, line)
                new_postfix.append(line)
            self.postfix = new_postfix;
        
    def buildColorMap(self, file):
        fp = open(file, 'r')
        line = fp.readline()
        while line:
            timestamp_result = self.re_timestamped_line.match(line)
            if timestamp_result:
                # hunt for 2-word names which we assume to be users
                # rather than objects
                name_result = self.re_two_word_name_line.match(timestamp_result.group('text'))
                if name_result:
                    name = name_result.group('username')
                    if (name == "Second Life"):
                        pass
                    else:
                        self.getColor(name)
                else:
                    name_result = self.re_one_word_name_line.match(timestamp_result.group('text'))
                    if name_result:
                        name = name_result.group('username')
                        if self.configured_color_map.has_key(name):
                            self.assigned_color_map[name] = self.configured_color_map[name]
        
                #    # hunt for 1-word names that match configured colors
            line = fp.readline()
        fp.close()
        if not self.assigned_color_map.has_key('Object'):
            color = DEFAULT_OBJECT_COLOR
            if self.configured_color_map.has_key('Object'):
                color = self.configured_color_map['Object']
            self.assigned_color_map['Object'] = color

    def getAssignedColorMap(self):
        return self.assigned_color_map


    def getColor(self, name):
        if self.assigned_color_map.has_key(name):
            color = self.assigned_color_map[name]
        else:
            if self.configured_color_map.has_key(name):
                color = self.configured_color_map[name]
            else:
                special_result = self.re_special_name.match(name)
                if special_result and len(self.special_colors) > 0:
                    color = self.special_colors.pop()
                else:
                    color = self.available_colors.pop()
            self.assigned_color_map[name] = color
        return color
        

    def processLine(self, line):
        formatted_line = None
        line = line.rstrip()

        timestamp_result = self.re_timestamped_line.match(line)
        color = None
        if timestamp_result:
            timestamp = timestamp_result.group('timestamp')
            text = timestamp_result.group('text')
            # check for two-word name
            resi_result = self.re_two_word_name_line.match(text)
            if resi_result:
                name = resi_result.group('username')
                if name == "Second Life" or self.deletes.has_key(name):
                    pass
                else:
                    color = self.getColor(name)
                    text = ": %s" % resi_result.group('text')
            else:
                # check for /me line
                me_result = self.re_me_line.match(text)
                if me_result and self.assigned_color_map.has_key(me_result.group('username')):
                    name = me_result.group('username')
                    if self.deletes.has_key(name):
                        pass
                    else:
                        color = self.getColor(name)
                        text = " %s" % me_result.group('text')
                else:
                    # check for object line
                    object_result = self.re_object_line.match(text)
                    if object_result:
                        name = object_result.group('objectname')
                        if self.deletes.has_key(name):
                            pass
                        else:
                            if self.assigned_color_map.has_key(name):
                                color = self.assigned_color_map[name]
                            else:
                                color = self.assigned_color_map['Object']
                            text = ": %s" % object_result.group('text')
            if color:
                self.last_color = color
                formatted_line = "%s <font color=%s><b>%s</b>%s</font>\n" % (timestamp, color, name, text)
        else:
            # some strange line 
            if self.re_old_timestamp.match(line):
                # skip this line
                pass
            else:
                # assume this was continuation of the last line
                color = self.last_color
                formatted_line = "<font color=%s>%s</font>\n" % (color, line)

        # check for http links
        if formatted_line:
            formatted_line = self.re_http_link.sub(replace_link, formatted_line)
    
            # check for useless  pairs
            formatted_line = self.re_useless_nowiki.sub("", formatted_line)

        return formatted_line

def main():
    parser = optparse.OptionParser('%prog [options] chatlog.txt\n\tby default automatically stores result in chatlog.wiki')
    parser.add_option( '-s', '--stdout',
        action='store_true', dest='stdout', 
        default=False,
        help='print to stdout rather than default output file',)
    parser.add_option( '-o', '--output-file',
        type='string', dest='output_file',
        default=None,
        help='write to OUTPUT_FILE')
    parser.add_option( '-c', '--config-file',
        type='string', dest='config_file',
        default=None,
        help='resource config file (default is ~/.chatwickrc)')
    parser.add_option( '-v', '--verbose',
        action='store_true', dest='verbose', 
        default=False,
        help='print verbose output to stdout',)
    parser.add_option( '-l', '--list-colors',
        action='store_true', dest='list_colors', 
        default=False,
        help='list colors available and exit',)
    parser.add_option( '-T', '--test-only',
        action='store_true', dest='test_only', 
        default=False,
        help='set --verbose and do not write output',)
    options, args = parser.parse_args()

    reader = LineProcessor()

    if options.list_colors:
        reader.listColors()
        sys.exit()

    # make sure input file is specified and valid
    if len(args) == 0:
        print "No input file was specified."
        parser.print_help()
        sys.exit()

    input_file = args[0]

    if True == options.test_only:
        options.verbose = True
    if not os.path.isfile(input_file):
        raise "Could not find input file '%s'" % input_file
    if input_file == options.output_file:
        raise "Input and output are the same file"
    if options.verbose:
        print "input_file = '%s'" % input_file

    if None == options.config_file:
        home_dir = os.environ.get("HOME")
        options.config_file = os.path.join(home_dir, ".chatwickrc")

    # read config file
    if os.path.isfile(options.config_file):
        reader.readConfig(options.config_file)
        reader.replaceKeywordsInConfig(input_file)
            

    # open the output file
    output_fp = None
    if options.stdout:
        if not options.test_only:
            output_fp = sys.stdout
    else:
        if None != options.output_file:
            if not options.test_only:
                output_fp = open(options.output_file, 'w')
            if options.verbose:
                print "auto output_file = '%s'" % options.output_file
        elif options.stdout:
            if not options.test_only:
                output_fp = sys.stdout
        else:
            (file_name, ext) = os.path.splitext(input_file)
            if ext == ".wiki":
                raise "Will not auto overwrite filename with '.wiki' extension."
            options.output_file = file_name + ".wiki"
            if not options.test_only:
                output_fp = open(options.output_file, 'w')
            if options.verbose:
                print "output_file = '%s'" % options.output_file
        
    reader.buildColorMap(input_file)

    input_fp = open(input_file, 'r')

    color_map = reader.getAssignedColorMap()
    if color_map.has_key('Objects'):
        del color_map['Object']

    speaker_count = len(color_map)
    column_count = int(speaker_count/5)
    if column_count > 3:
        column_count = 3

    prefix = reader.prefix
    for line in prefix:
        write_output(output_fp, line)
        write_output(output_fp, "\n")
    write_output(output_fp, "== List of Speakers ==\n")
    write_output(output_fp, TABLE_START)
    column_index = 1

    # generate an alphabetized map of names
    names = color_map.keys()
    lname_map = {}
    for name in names:
        lname = name.lower()
        lname_map[lname] = name
    lnames = lname_map.keys()
    lnames.sort()

    # list each name in its color
    for lname in lnames:
        name = lname_map[lname]
        color = color_map[name]
        if name == "Object":
            continue
        line = "%s<font color=%s><b>%s</b></font>\n" % (TABLE_COLUMN_START, color, name)
        write_output(output_fp, line)
        if column_index == column_count:
            write_output(output_fp, TABLE_ROW_END)
            column_index = 1
        else:
            column_index += 1
    write_output(output_fp, TABLE_END)    

    write_output(output_fp, "\n")
    write_output(output_fp, "== Transcript ==\n")
    line = input_fp.readline();
    while line:
        new_line = reader.processLine(line)
        if new_line and output_fp:
            write_output(output_fp, new_line) 
            write_output(output_fp, "\n")
        line = input_fp.readline();
    input_fp.close()

    postfix = reader.postfix

    for line in postfix:
        write_output(output_fp, line)
        write_output(output_fp, "\n")
    output_fp.close()

if __name__ == '__main__':
    sys.exit(main())