User:Andrew Linden/ChatWick
Jump to navigation
Jump to search
This is the latest version of the script that generates the wiki format text for the Simulator_User_Group transcripts.
#!/usr/bin/env python
#
# chatwick.py -- translate chat logs into wiki format
#
# License = common domain
#
# Usage: chatwick [-c config_file] [-o output_file] chat_log_file
import datetime
import optparse
import sys
import random
import re
import os.path
RED_SHIFT = 16
GREEN_SHIFT = 8
BLUE_SHIFT = 0
TABLE_START = "{|\n"
TABLE_COLUMN_START = "|"
TABLE_ROW_END = "|-\n"
TABLE_END = "|}\n"
DEFAULT_OBJECT_COLOR = "#808080"
def replace_link(match_obj):
return "</nowiki>%s" % match_obj.group('link')
def angle_to_hex(angle, color):
# Here is the the angle curve for red
# |
# 255 +------- --------+
# | \ /
# | \ /
# | \ /
# | \ /
# | \ /
# | \ /
# | \ /
# 0-+-------|------|=======|=======|-------|-------|--
# 0 60 120 180 240 300 360
#
# Green and blue are the same curve, just shifted by 120 and 240 degrees.
#
offset = 0
if color == 'red':
pass
elif color == 'green':
offset = -120
elif color == 'blue':
offset = -240
else:
raise "Unknown primary color %s" % color
# clamp color_angle in range of [0, 360]
color_angle = (angle + offset) % 360
if color_angle < 0:
color_angle += 360
# compute hex value
hex = 255;
if color_angle < 60:
pass
elif color_angle < 120:
hex = int(255.0 * (1.0 - (float(color_angle) - 60.0)/60.0))
elif color_angle < 240:
hex = 0
elif color_angle < 300:
hex = int(255.0 * ((float(color_angle) - 240.0)/60.0))
return hex
def generate_colors(hues, saturations, max_saturation, min_saturation):
colors = []
hue_step = int(360 / hues)
saturation_step = (max_saturation - min_saturation)/float(saturations)
s = 0
saturation = min_saturation
while s < saturations:
for angle in range(0, 360, hue_step):
red = int(angle_to_hex(angle, 'red') * saturation)
green = int(angle_to_hex(angle, 'green') * saturation)
blue = int(angle_to_hex(angle, 'blue') * saturation)
color_str = '#%02x%02x%02x' % (red, green, blue)
color = (red << RED_SHIFT) + (green << GREEN_SHIFT) + (blue << BLUE_SHIFT)
#print '<font color="%s">%s</font>\n' % (color_str, color_str)
colors.append(color_str)
saturation += saturation_step
s += 1
return colors
def write_output(output_fp, line):
if output_fp:
output_fp.write(line)
class LineProcessor:
def __init__(self):
self.configured_color_map = {}
self.assigned_color_map = {}
self.deletes = {}
self.last_color = None
self.available_colors = generate_colors(13, 4, 0.90, 0.4)
self.special_colors = generate_colors(8, 1, 0.4, 0.3)
self.prefix = []
self.postfix = []
# [00:00] text
self.re_timestamped_line = re.compile('(?P<timestamp>^\[\d\d:\d\d\])\s+(?P<text>.*)$')
# User Name: text
self.re_two_word_name_line = re.compile('^(?P<username>\w+ \w+):\s+(?P<text>.*)$')
# User Name text
self.re_me_line = re.compile('^(?P<username>\w+ \w+)\s+(?P<text>.*)$')
# UserName: text
self.re_one_word_name_line = re.compile('^(?P<username>\w+):\s+(?P<text>.*)$')
# UserName text
# Object with arbitrarily long name: text
self.re_object_line = re.compile('^(?P<objectname>.*?):\s+(?P<text>.+)$')
# [0000/00./00 00:00] anything
self.re_old_timestamp = re.compile('^\[\d{4,4}/\d\d/\d\d \d\d:\d\d]\s+.*$')
# Something Linden
self.re_special_name = re.compile('^\w+ Linden$')
# http://something
self.re_http_link = re.compile('(?P<link>https?://[-_:/=\w\.\?\$]+)')
# <nowiki>
self.re_useless_nowiki = re.compile('\s*')
def listColors(self):
# debug method
for color in self.available_colors:
print '<font color="%s"><b>%s</b> available color</font>\n' % (color, color)
for color in self.special_colors:
print '<font color="%s"><b>%s</b> special color</font>\n' % (color, color)
def readConfig(self, file):
# All config lines have the following format:
# command 'User Name' [option]
# Currently supported commands are:
# color 'User Name' colorname
# delete 'User Name'
# prefix 'prefix line'
# postfix 'postfix line'
# The prefix/postfix commands will pre/postpend the output with those lines,
# in the order they are given. Some ALL_CAPS keywords in the pre/postfix
# lines will be replaced (if applicable). See the implementation of
# replaceKeywordsInConfig().
fp = open(file)
line = fp.readline()
while line:
clean_line = line.strip()
line = fp.readline()
if len(clean_line) == 0:
continue
if clean_line[0] == '#':
continue
[key, value] = clean_line.split(None, 1)
words = value.split("'")
if len(words) != 3:
words = value.split('"')
if len(words) != 3:
continue
if key == "color":
assigned_color = words[2].strip().lower()
self.configured_color_map[words[1]] = assigned_color
elif key == "delete":
self.deletes[words[1]] = 1
else:
value = value.strip("\"'")
if key == "prefix":
self.prefix.append(value)
elif key == "postfix":
self.postfix.append(value)
if not self.configured_color_map.has_key('Object'):
# by default objects show up as green
self.configured_color_map['Object'] = DEFAULT_OBJECT_COLOR
# remove matching assigned colors from available and special colors
configured_colors = self.configured_color_map.values()
for color in configured_colors:
index = 0
while index < len(self.available_colors):
if color == self.available_colors[index]:
del(self.available_colors[index])
else:
index += 1
index = 0
while index < len(self.special_colors):
if color == self.special_colors[index]:
del(self.special_colors[index])
else:
index += 1
fp.close()
def replaceKeywordsInConfig(self, input_file):
# The following keywords will be replaced in the prefix and postfix lines:
# THIS_WEEK = "YYYY.MM.DD" for the date in the input_filename
# PREV_WEEK = "YYYY.MM.DD" for one week prior to the date in the infput_filename (")
# NEXT_WEEK = "YYYY.MM.DD" for one week after to the date in the infput_filename (")
# Note: these only work if the input_file name contains the date in one of
# the following formats:
# YYYYMMDD
# YYYY.MM.DD (where . can be any character)
#
# get date from input_file
this_week = None
simple_date = re.search('20\d\d\d\d\d\d', input_file)
if simple_date:
date_str = input_file[simple_date.start():simple_date.end()]
year = int(date_str[0:4])
month = int(date_str[4:6])
day = int(date_str[6:8])
this_week = datetime.date(year, month, day)
else:
fancy_date = re.search('20\d\d.\d\d.\d\d', input_file)
if fancy_date:
date_str = input_file[fancy_date.start():fancy_date.end()]
year = int(date_str[0:4])
month = int(date_str[5:7])
day = int(date_str[8:10])
this_week = datetime.date(year, month, day)
if this_week:
one_week = datetime.timedelta(days=7)
last_week = this_week - one_week
next_week = this_week + one_week
this_week_str = str(this_week)
last_week_str = str(last_week)
next_week_str = str(next_week)
while re.search('-', this_week_str):
this_week_str = re.sub('-', '.', this_week_str)
while re.search('-', last_week_str):
last_week_str = re.sub('-', '.', last_week_str)
while re.search('-', next_week_str):
next_week_str = re.sub('-', '.', next_week_str)
# replace keywords in prefix
re_this_week = re.compile("THIS_WEEK")
re_last_week = re.compile("PREV_WEEK")
re_next_week = re.compile("NEXT_WEEK")
new_prefix = []
for line in self.prefix:
while re_this_week.search(line):
line = re_this_week.sub(this_week_str, line)
while re_last_week.search(line):
line = re_last_week.sub(last_week_str, line)
while re_next_week.search(line):
line = re_next_week.sub(next_week_str, line)
new_prefix.append(line)
self.prefix = new_prefix;
# replace keywords in postfix
new_postfix = []
for line in self.postfix:
while re_this_week.search(line):
line = re_this_week.sub(this_week_str, line)
while re_last_week.search(line):
line = re_last_week.sub(last_week_str, line)
while re_next_week.search(line):
line = re_next_week.sub(next_week_str, line)
new_postfix.append(line)
self.postfix = new_postfix;
def buildColorMap(self, file):
fp = open(file, 'r')
line = fp.readline()
while line:
timestamp_result = self.re_timestamped_line.match(line)
if timestamp_result:
# hunt for 2-word names which we assume to be users
# rather than objects
name_result = self.re_two_word_name_line.match(timestamp_result.group('text'))
if name_result:
name = name_result.group('username')
if (name == "Second Life"):
pass
else:
self.getColor(name)
else:
name_result = self.re_one_word_name_line.match(timestamp_result.group('text'))
if name_result:
name = name_result.group('username')
if self.configured_color_map.has_key(name):
self.assigned_color_map[name] = self.configured_color_map[name]
# # hunt for 1-word names that match configured colors
line = fp.readline()
fp.close()
if not self.assigned_color_map.has_key('Object'):
color = DEFAULT_OBJECT_COLOR
if self.configured_color_map.has_key('Object'):
color = self.configured_color_map['Object']
self.assigned_color_map['Object'] = color
def getAssignedColorMap(self):
return self.assigned_color_map
def getColor(self, name):
if self.assigned_color_map.has_key(name):
color = self.assigned_color_map[name]
else:
if self.configured_color_map.has_key(name):
color = self.configured_color_map[name]
else:
special_result = self.re_special_name.match(name)
if special_result and len(self.special_colors) > 0:
color = self.special_colors.pop()
else:
color = self.available_colors.pop()
self.assigned_color_map[name] = color
return color
def processLine(self, line):
formatted_line = None
line = line.rstrip()
timestamp_result = self.re_timestamped_line.match(line)
color = None
if timestamp_result:
timestamp = timestamp_result.group('timestamp')
text = timestamp_result.group('text')
# check for two-word name
resi_result = self.re_two_word_name_line.match(text)
if resi_result:
name = resi_result.group('username')
if name == "Second Life" or self.deletes.has_key(name):
pass
else:
color = self.getColor(name)
text = ": %s" % resi_result.group('text')
else:
# check for /me line
me_result = self.re_me_line.match(text)
if me_result and self.assigned_color_map.has_key(me_result.group('username')):
name = me_result.group('username')
if self.deletes.has_key(name):
pass
else:
color = self.getColor(name)
text = " %s" % me_result.group('text')
else:
# check for object line
object_result = self.re_object_line.match(text)
if object_result:
name = object_result.group('objectname')
if self.deletes.has_key(name):
pass
else:
if self.assigned_color_map.has_key(name):
color = self.assigned_color_map[name]
else:
color = self.assigned_color_map['Object']
text = ": %s" % object_result.group('text')
if color:
self.last_color = color
formatted_line = "%s <font color=%s><b>%s</b>%s</font>\n" % (timestamp, color, name, text)
else:
# some strange line
if self.re_old_timestamp.match(line):
# skip this line
pass
else:
# assume this was continuation of the last line
color = self.last_color
formatted_line = "<font color=%s>%s</font>\n" % (color, line)
# check for http links
if formatted_line:
formatted_line = self.re_http_link.sub(replace_link, formatted_line)
# check for useless pairs
formatted_line = self.re_useless_nowiki.sub("", formatted_line)
return formatted_line
def main():
parser = optparse.OptionParser('%prog [options] chatlog.txt\n\tby default automatically stores result in chatlog.wiki')
parser.add_option( '-s', '--stdout',
action='store_true', dest='stdout',
default=False,
help='print to stdout rather than default output file',)
parser.add_option( '-o', '--output-file',
type='string', dest='output_file',
default=None,
help='write to OUTPUT_FILE')
parser.add_option( '-c', '--config-file',
type='string', dest='config_file',
default=None,
help='resource config file (default is ~/.chatwickrc)')
parser.add_option( '-v', '--verbose',
action='store_true', dest='verbose',
default=False,
help='print verbose output to stdout',)
parser.add_option( '-l', '--list-colors',
action='store_true', dest='list_colors',
default=False,
help='list colors available and exit',)
parser.add_option( '-T', '--test-only',
action='store_true', dest='test_only',
default=False,
help='set --verbose and do not write output',)
options, args = parser.parse_args()
reader = LineProcessor()
if options.list_colors:
reader.listColors()
sys.exit()
# make sure input file is specified and valid
if len(args) == 0:
print "No input file was specified."
parser.print_help()
sys.exit()
input_file = args[0]
if True == options.test_only:
options.verbose = True
if not os.path.isfile(input_file):
raise "Could not find input file '%s'" % input_file
if input_file == options.output_file:
raise "Input and output are the same file"
if options.verbose:
print "input_file = '%s'" % input_file
if None == options.config_file:
home_dir = os.environ.get("HOME")
options.config_file = os.path.join(home_dir, ".chatwickrc")
# read config file
if os.path.isfile(options.config_file):
reader.readConfig(options.config_file)
reader.replaceKeywordsInConfig(input_file)
# open the output file
output_fp = None
if options.stdout:
if not options.test_only:
output_fp = sys.stdout
else:
if None != options.output_file:
if not options.test_only:
output_fp = open(options.output_file, 'w')
if options.verbose:
print "auto output_file = '%s'" % options.output_file
elif options.stdout:
if not options.test_only:
output_fp = sys.stdout
else:
(file_name, ext) = os.path.splitext(input_file)
if ext == ".wiki":
raise "Will not auto overwrite filename with '.wiki' extension."
options.output_file = file_name + ".wiki"
if not options.test_only:
output_fp = open(options.output_file, 'w')
if options.verbose:
print "output_file = '%s'" % options.output_file
reader.buildColorMap(input_file)
input_fp = open(input_file, 'r')
color_map = reader.getAssignedColorMap()
if color_map.has_key('Objects'):
del color_map['Object']
speaker_count = len(color_map)
column_count = int(speaker_count/5)
if column_count > 3:
column_count = 3
prefix = reader.prefix
for line in prefix:
write_output(output_fp, line)
write_output(output_fp, "\n")
write_output(output_fp, "== List of Speakers ==\n")
write_output(output_fp, TABLE_START)
column_index = 1
# generate an alphabetized map of names
names = color_map.keys()
lname_map = {}
for name in names:
lname = name.lower()
lname_map[lname] = name
lnames = lname_map.keys()
lnames.sort()
# list each name in its color
for lname in lnames:
name = lname_map[lname]
color = color_map[name]
if name == "Object":
continue
line = "%s<font color=%s><b>%s</b></font>\n" % (TABLE_COLUMN_START, color, name)
write_output(output_fp, line)
if column_index == column_count:
write_output(output_fp, TABLE_ROW_END)
column_index = 1
else:
column_index += 1
write_output(output_fp, TABLE_END)
write_output(output_fp, "\n")
write_output(output_fp, "== Transcript ==\n")
line = input_fp.readline();
while line:
new_line = reader.processLine(line)
if new_line and output_fp:
write_output(output_fp, new_line)
write_output(output_fp, "\n")
line = input_fp.readline();
input_fp.close()
postfix = reader.postfix
for line in postfix:
write_output(output_fp, line)
write_output(output_fp, "\n")
output_fp.close()
if __name__ == '__main__':
sys.exit(main())