User:Andrew Linden/ChatWick
Jump to navigation
Jump to search
This is the latest version of the script that generates the wiki format text for the Simulator_User_Group transcripts.
#!/usr/bin/env python # # chatwick.py -- translate chat logs into wiki format # # License = common domain # # Usage: chatwick [-c config_file] [-o output_file] chat_log_file import datetime import optparse import sys import random import re import os.path RED_SHIFT = 16 GREEN_SHIFT = 8 BLUE_SHIFT = 0 TABLE_START = "{|\n" TABLE_COLUMN_START = "|" TABLE_ROW_END = "|-\n" TABLE_END = "|}\n" DEFAULT_OBJECT_COLOR = "#808080" def replace_link(match_obj): return "</nowiki>%s" % match_obj.group('link') def angle_to_hex(angle, color): # Here is the the angle curve for red # | # 255 +------- --------+ # | \ / # | \ / # | \ / # | \ / # | \ / # | \ / # | \ / # 0-+-------|------|=======|=======|-------|-------|-- # 0 60 120 180 240 300 360 # # Green and blue are the same curve, just shifted by 120 and 240 degrees. # offset = 0 if color == 'red': pass elif color == 'green': offset = -120 elif color == 'blue': offset = -240 else: raise "Unknown primary color %s" % color # clamp color_angle in range of [0, 360] color_angle = (angle + offset) % 360 if color_angle < 0: color_angle += 360 # compute hex value hex = 255; if color_angle < 60: pass elif color_angle < 120: hex = int(255.0 * (1.0 - (float(color_angle) - 60.0)/60.0)) elif color_angle < 240: hex = 0 elif color_angle < 300: hex = int(255.0 * ((float(color_angle) - 240.0)/60.0)) return hex def generate_colors(hues, saturations, max_saturation, min_saturation): colors = [] hue_step = int(360 / hues) saturation_step = (max_saturation - min_saturation)/float(saturations) s = 0 saturation = min_saturation while s < saturations: for angle in range(0, 360, hue_step): red = int(angle_to_hex(angle, 'red') * saturation) green = int(angle_to_hex(angle, 'green') * saturation) blue = int(angle_to_hex(angle, 'blue') * saturation) color_str = '#%02x%02x%02x' % (red, green, blue) color = (red << RED_SHIFT) + (green << GREEN_SHIFT) + (blue << BLUE_SHIFT) #print '<font color="%s">%s</font>\n' % (color_str, color_str) colors.append(color_str) saturation += saturation_step s += 1 return colors def write_output(output_fp, line): if output_fp: output_fp.write(line) class LineProcessor: def __init__(self): self.configured_color_map = {} self.assigned_color_map = {} self.deletes = {} self.last_color = None self.available_colors = generate_colors(13, 4, 0.90, 0.4) self.special_colors = generate_colors(8, 1, 0.4, 0.3) self.prefix = [] self.postfix = [] # [00:00] text self.re_timestamped_line = re.compile('(?P<timestamp>^\[\d\d:\d\d\])\s+(?P<text>.*)$') # User Name: text self.re_two_word_name_line = re.compile('^(?P<username>\w+ \w+):\s+(?P<text>.*)$') # User Name text self.re_me_line = re.compile('^(?P<username>\w+ \w+)\s+(?P<text>.*)$') # UserName: text self.re_one_word_name_line = re.compile('^(?P<username>\w+):\s+(?P<text>.*)$') # UserName text # Object with arbitrarily long name: text self.re_object_line = re.compile('^(?P<objectname>.*?):\s+(?P<text>.+)$') # [0000/00./00 00:00] anything self.re_old_timestamp = re.compile('^\[\d{4,4}/\d\d/\d\d \d\d:\d\d]\s+.*$') # Something Linden self.re_special_name = re.compile('^\w+ Linden$') # http://something self.re_http_link = re.compile('(?P<link>https?://[-_:/=\w\.\?\$]+)') # <nowiki> self.re_useless_nowiki = re.compile('\s*') def listColors(self): # debug method for color in self.available_colors: print '<font color="%s"><b>%s</b> available color</font>\n' % (color, color) for color in self.special_colors: print '<font color="%s"><b>%s</b> special color</font>\n' % (color, color) def readConfig(self, file): # All config lines have the following format: # command 'User Name' [option] # Currently supported commands are: # color 'User Name' colorname # delete 'User Name' # prefix 'prefix line' # postfix 'postfix line' # The prefix/postfix commands will pre/postpend the output with those lines, # in the order they are given. Some ALL_CAPS keywords in the pre/postfix # lines will be replaced (if applicable). See the implementation of # replaceKeywordsInConfig(). fp = open(file) line = fp.readline() while line: clean_line = line.strip() line = fp.readline() if len(clean_line) == 0: continue if clean_line[0] == '#': continue [key, value] = clean_line.split(None, 1) words = value.split("'") if len(words) != 3: words = value.split('"') if len(words) != 3: continue if key == "color": assigned_color = words[2].strip().lower() self.configured_color_map[words[1]] = assigned_color elif key == "delete": self.deletes[words[1]] = 1 else: value = value.strip("\"'") if key == "prefix": self.prefix.append(value) elif key == "postfix": self.postfix.append(value) if not self.configured_color_map.has_key('Object'): # by default objects show up as green self.configured_color_map['Object'] = DEFAULT_OBJECT_COLOR # remove matching assigned colors from available and special colors configured_colors = self.configured_color_map.values() for color in configured_colors: index = 0 while index < len(self.available_colors): if color == self.available_colors[index]: del(self.available_colors[index]) else: index += 1 index = 0 while index < len(self.special_colors): if color == self.special_colors[index]: del(self.special_colors[index]) else: index += 1 fp.close() def replaceKeywordsInConfig(self, input_file): # The following keywords will be replaced in the prefix and postfix lines: # THIS_WEEK = "YYYY.MM.DD" for the date in the input_filename # PREV_WEEK = "YYYY.MM.DD" for one week prior to the date in the infput_filename (") # NEXT_WEEK = "YYYY.MM.DD" for one week after to the date in the infput_filename (") # Note: these only work if the input_file name contains the date in one of # the following formats: # YYYYMMDD # YYYY.MM.DD (where . can be any character) # # get date from input_file this_week = None simple_date = re.search('20\d\d\d\d\d\d', input_file) if simple_date: date_str = input_file[simple_date.start():simple_date.end()] year = int(date_str[0:4]) month = int(date_str[4:6]) day = int(date_str[6:8]) this_week = datetime.date(year, month, day) else: fancy_date = re.search('20\d\d.\d\d.\d\d', input_file) if fancy_date: date_str = input_file[fancy_date.start():fancy_date.end()] year = int(date_str[0:4]) month = int(date_str[5:7]) day = int(date_str[8:10]) this_week = datetime.date(year, month, day) if this_week: one_week = datetime.timedelta(days=7) last_week = this_week - one_week next_week = this_week + one_week this_week_str = str(this_week) last_week_str = str(last_week) next_week_str = str(next_week) while re.search('-', this_week_str): this_week_str = re.sub('-', '.', this_week_str) while re.search('-', last_week_str): last_week_str = re.sub('-', '.', last_week_str) while re.search('-', next_week_str): next_week_str = re.sub('-', '.', next_week_str) # replace keywords in prefix re_this_week = re.compile("THIS_WEEK") re_last_week = re.compile("PREV_WEEK") re_next_week = re.compile("NEXT_WEEK") new_prefix = [] for line in self.prefix: while re_this_week.search(line): line = re_this_week.sub(this_week_str, line) while re_last_week.search(line): line = re_last_week.sub(last_week_str, line) while re_next_week.search(line): line = re_next_week.sub(next_week_str, line) new_prefix.append(line) self.prefix = new_prefix; # replace keywords in postfix new_postfix = [] for line in self.postfix: while re_this_week.search(line): line = re_this_week.sub(this_week_str, line) while re_last_week.search(line): line = re_last_week.sub(last_week_str, line) while re_next_week.search(line): line = re_next_week.sub(next_week_str, line) new_postfix.append(line) self.postfix = new_postfix; def buildColorMap(self, file): fp = open(file, 'r') line = fp.readline() while line: timestamp_result = self.re_timestamped_line.match(line) if timestamp_result: # hunt for 2-word names which we assume to be users # rather than objects name_result = self.re_two_word_name_line.match(timestamp_result.group('text')) if name_result: name = name_result.group('username') if (name == "Second Life"): pass else: self.getColor(name) else: name_result = self.re_one_word_name_line.match(timestamp_result.group('text')) if name_result: name = name_result.group('username') if self.configured_color_map.has_key(name): self.assigned_color_map[name] = self.configured_color_map[name] # # hunt for 1-word names that match configured colors line = fp.readline() fp.close() if not self.assigned_color_map.has_key('Object'): color = DEFAULT_OBJECT_COLOR if self.configured_color_map.has_key('Object'): color = self.configured_color_map['Object'] self.assigned_color_map['Object'] = color def getAssignedColorMap(self): return self.assigned_color_map def getColor(self, name): if self.assigned_color_map.has_key(name): color = self.assigned_color_map[name] else: if self.configured_color_map.has_key(name): color = self.configured_color_map[name] else: special_result = self.re_special_name.match(name) if special_result and len(self.special_colors) > 0: color = self.special_colors.pop() else: color = self.available_colors.pop() self.assigned_color_map[name] = color return color def processLine(self, line): formatted_line = None line = line.rstrip() timestamp_result = self.re_timestamped_line.match(line) color = None if timestamp_result: timestamp = timestamp_result.group('timestamp') text = timestamp_result.group('text') # check for two-word name resi_result = self.re_two_word_name_line.match(text) if resi_result: name = resi_result.group('username') if name == "Second Life" or self.deletes.has_key(name): pass else: color = self.getColor(name) text = ": %s" % resi_result.group('text') else: # check for /me line me_result = self.re_me_line.match(text) if me_result and self.assigned_color_map.has_key(me_result.group('username')): name = me_result.group('username') if self.deletes.has_key(name): pass else: color = self.getColor(name) text = " %s" % me_result.group('text') else: # check for object line object_result = self.re_object_line.match(text) if object_result: name = object_result.group('objectname') if self.deletes.has_key(name): pass else: if self.assigned_color_map.has_key(name): color = self.assigned_color_map[name] else: color = self.assigned_color_map['Object'] text = ": %s" % object_result.group('text') if color: self.last_color = color formatted_line = "%s <font color=%s><b>%s</b>%s</font>\n" % (timestamp, color, name, text) else: # some strange line if self.re_old_timestamp.match(line): # skip this line pass else: # assume this was continuation of the last line color = self.last_color formatted_line = "<font color=%s>%s</font>\n" % (color, line) # check for http links if formatted_line: formatted_line = self.re_http_link.sub(replace_link, formatted_line) # check for useless pairs formatted_line = self.re_useless_nowiki.sub("", formatted_line) return formatted_line def main(): parser = optparse.OptionParser('%prog [options] chatlog.txt\n\tby default automatically stores result in chatlog.wiki') parser.add_option( '-s', '--stdout', action='store_true', dest='stdout', default=False, help='print to stdout rather than default output file',) parser.add_option( '-o', '--output-file', type='string', dest='output_file', default=None, help='write to OUTPUT_FILE') parser.add_option( '-c', '--config-file', type='string', dest='config_file', default=None, help='resource config file (default is ~/.chatwickrc)') parser.add_option( '-v', '--verbose', action='store_true', dest='verbose', default=False, help='print verbose output to stdout',) parser.add_option( '-l', '--list-colors', action='store_true', dest='list_colors', default=False, help='list colors available and exit',) parser.add_option( '-T', '--test-only', action='store_true', dest='test_only', default=False, help='set --verbose and do not write output',) options, args = parser.parse_args() reader = LineProcessor() if options.list_colors: reader.listColors() sys.exit() # make sure input file is specified and valid if len(args) == 0: print "No input file was specified." parser.print_help() sys.exit() input_file = args[0] if True == options.test_only: options.verbose = True if not os.path.isfile(input_file): raise "Could not find input file '%s'" % input_file if input_file == options.output_file: raise "Input and output are the same file" if options.verbose: print "input_file = '%s'" % input_file if None == options.config_file: home_dir = os.environ.get("HOME") options.config_file = os.path.join(home_dir, ".chatwickrc") # read config file if os.path.isfile(options.config_file): reader.readConfig(options.config_file) reader.replaceKeywordsInConfig(input_file) # open the output file output_fp = None if options.stdout: if not options.test_only: output_fp = sys.stdout else: if None != options.output_file: if not options.test_only: output_fp = open(options.output_file, 'w') if options.verbose: print "auto output_file = '%s'" % options.output_file elif options.stdout: if not options.test_only: output_fp = sys.stdout else: (file_name, ext) = os.path.splitext(input_file) if ext == ".wiki": raise "Will not auto overwrite filename with '.wiki' extension." options.output_file = file_name + ".wiki" if not options.test_only: output_fp = open(options.output_file, 'w') if options.verbose: print "output_file = '%s'" % options.output_file reader.buildColorMap(input_file) input_fp = open(input_file, 'r') color_map = reader.getAssignedColorMap() if color_map.has_key('Objects'): del color_map['Object'] speaker_count = len(color_map) column_count = int(speaker_count/5) if column_count > 3: column_count = 3 prefix = reader.prefix for line in prefix: write_output(output_fp, line) write_output(output_fp, "\n") write_output(output_fp, "== List of Speakers ==\n") write_output(output_fp, TABLE_START) column_index = 1 # generate an alphabetized map of names names = color_map.keys() lname_map = {} for name in names: lname = name.lower() lname_map[lname] = name lnames = lname_map.keys() lnames.sort() # list each name in its color for lname in lnames: name = lname_map[lname] color = color_map[name] if name == "Object": continue line = "%s<font color=%s><b>%s</b></font>\n" % (TABLE_COLUMN_START, color, name) write_output(output_fp, line) if column_index == column_count: write_output(output_fp, TABLE_ROW_END) column_index = 1 else: column_index += 1 write_output(output_fp, TABLE_END) write_output(output_fp, "\n") write_output(output_fp, "== Transcript ==\n") line = input_fp.readline(); while line: new_line = reader.processLine(line) if new_line and output_fp: write_output(output_fp, new_line) write_output(output_fp, "\n") line = input_fp.readline(); input_fp.close() postfix = reader.postfix for line in postfix: write_output(output_fp, line) write_output(output_fp, "\n") output_fp.close() if __name__ == '__main__': sys.exit(main())