User:Thickbrick Sleaford: Difference between revisions

Revision as of 16:05, 1 September 2008

A perl script I've written, to help merchants make sense of their transaction histories. Copy and paste into a .pl file and run. Use at your own risk... --Thickbrick Sleaford 15:47, 1 September 2008 (PDT)

Start copy from here

#!/usr/bin/perl
#
# Analyze a collection of SL transaction history XML files.
#
# Depends on libxml-simpleobject-perl (debian package). Only tested on Linux so far.
#
# Limitations: If there is a period that didn't have any sales in it,
# the resulting csv table will skip it.  Probably not secure.
#
# Please report bugs to thickbrick.sleaford (at) gmail.com
# License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0
#

use warnings;
use strict;
use Getopt::Std;
use File::Temp qw(tempfile);
use XML::SimpleObject;
use Time::Local;
use POSIX qw(strftime);

$main::VERSION='0.1 (2008-08-27)';
$Getopt::Std::STANDARD_HELP_VERSION=1;

sub read_files(@);
sub read_ll_xml_file($);
sub read_slexchange_csv_file($);
sub totals_by_desc(\@\@);
sub print_csv($$);
sub rows_to_csv(\@);
sub row_total($);

my $oocalc_name='oocalc';	#the executable for OpenOffice Calc
my $slexchange_comission_percent=5.0;
my %transactions;

#main:
if (@ARGV < 1) {
	main::VERSION_MESSAGE();
	main::HELP_MESSAGE();
} else {
	my @in_files;
	my %options;	#command line options
	my $split_period=7*24*3600;
	my $temp_filename;
	my $outfile_handler;

#process command line options:
	getopts('co:p:', \%options);
	if (exists $options{'p'} and defined $options{'p'}) {
		$split_period = 24*3600 * $options{'p'};
	} else {
		$split_period = 24*3600*7;
	}
	if (exists $options{'c'}) {
		($outfile_handler, $temp_filename) = tempfile(SUFFIX => '.csv');	#oocalc wants '.csv' suffix
		open($outfile_handler,'>', $temp_filename) or die "\nCan't open temporary file '" . $temp_filename . "' for writing: $!\n";
	} elsif (exists $options{'o'} and defined $options{'o'}) {
		if (-e $options{'o'}) {
			die "\nOutput file '" . $options{'o'} . "' already exists. Will not try to overwrite it.\n\n";
		} else {
			open($outfile_handler,'>', $options{'o'}) or die "\nCan't open file '" . $options{'o'} . "' for writing: $!\n";
		}
	}
#read input file(s)
	@in_files=@ARGV;
	if (@in_files < 1) {
		die "\nNo input file specified.\nUse --help for more info.\n\n"
	}
	read_files(@in_files);
#process/print output
	print_csv($outfile_handler, $split_period);
#run openoffice calc if needed
	if (exists $options{'c'}) {
		system($oocalc_name, $temp_filename) == 0 or die "\nCan't execute OpenOffice Calc: '$oocalc_name'.\n\n";
	}
	close($outfile_handler) if defined $outfile_handler;
	print "\n";
}

#read xml files and store in a hash ref
sub read_files (@) {
	print STDERR "\n";
	foreach my $file (@_) {
		my $fh;
		open $fh, '<', $file or die "Can't open file '$file': $!\n";
		my $first_line=<$fh>;
		if ($first_line =~ m/<\?xml version="1.0" \?>/) {
			read_ll_xml_file($file);
		} else {
			#assume it's an slexchange csv file
			read_slexchange_csv_file($file);
		}
	}
	my $num = keys %transactions;
	print STDERR "Total unique sales processed: $num.\n";
	print STDERR "\n";
}

# read an SL transactions xml file and add it to the global hash
sub read_ll_xml_file($) {
	my $file = shift or return;
	my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
	print STDERR "Reading LL XML file: \'$file\' ... ";
	my $added=0;
	my $parser = XML::Parser->new(ErrorContext => 2, Style => "Tree");
	my $xso = XML::SimpleObject->new( $parser->parsefile($file) );

	foreach my $transaction ($xso->child('transactions')->children) {
		#begin by filtering for interesting transaction only (object sales and payments that are also deposits):
		if ($transaction->child('deposit') and $transaction->child('deposit')->value ne 
			and $transaction->child('type') and ($transaction->child('type')->value eq 'Payment' or
			$transaction->child('type')->value eq 'Object Sale'))
		{
			#store the (hash-ref) transaction in the global hash if it has unique id
			my $t_id = $transaction->child('id') ? $transaction->child('id')->value : ;
			my $t;
			$t->{'price'} = $transaction->child('deposit') ? $transaction->child('deposit')->value : ;
			$t->{'description'} = $transaction->child('description') ? $transaction->child('description')->value : ;
			my $time = $transaction->child('time') ? $transaction->child('time')->value : ;
			my @time_l = split /[- :]/,$time;
			$time_l[1]-=1; #if int $time_l[1] > 0;	#timelocal thinks januaray is month 0, not 1;
			# (SL transactions are written in SLT/PST. this converts to gmt)
			$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
			if (not exists $transactions{$t_id})  {
				$transactions{$t_id}=$t;
				++$added;
			}
		}
	}
	print STDERR " $added unique sales added.\n";
}

# read an slexchange.com transactions csv file and add it to the global hash
# the CSV is : ID, Date, Type, Transaction_Description, Amount, Balance
sub read_slexchange_csv_file($) {
	my $file = shift or return;
	my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
	my $added=0;
	my $fh;
	open $fh, '<', $file or die "Can't open file '$file': $!\n";
	print STDERR "Reading SLExchange CSV file: \'$file\' ... ";
	my $discard_line = <$fh>;	#discard first line;
	while (<$fh>) {
		my ($t_id, $time, $type, $desc, $amount, $balance)=split(/,/);
		#create a hash ref;
		my $t;
		$amount = $amount ? $amount : 0;
		$amount *= (1 - ($slexchange_comission_percent/100));	#substract slex comission
		$t->{'price'}= sprintf("%.0f", $amount);	#round it
		$t->{'description'} = $desc ? $desc : ;
		my @time_l = split /[- :]/,($time ? $time : );
		$time_l[1] -= 1; #if int $time_l[1] > 0;	#timelocal thinks januaray is month 0, not 1;
		# (SL transactions are written in SLT/PST. this converts to gmt)
		$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
		if ($type eq '"SLL Item Purchase"' and $amount > 0 and $t_id ne  and $desc ne ) {
			$t_id='SLEX' . $t_id;	#make sure ids don't collide with LL transaction ids
			#add hasref to global hash if unique:
			if (not exists $transactions{$t_id})  {
				$transactions{$t_id}=$t;
				++$added;
			}
		}
	}
	print STDERR " $added unique sales added.\n";
}

#Accepts 2 array refs, unique descriptions, transaction ids.
#returns 2 lists, units sold per each description, and L$ sums per description
sub totals_by_desc(\@\@) {
	my ($aref_descs, $aref_tids) = @_;
	my $items;
	my @units;
	my @sums;
	# go over all ids and create a
	# hash ref of: tids->descriptions->totals:
	foreach my $tid (@{$aref_tids}) {
		my $t=$transactions{$tid};
		if (defined $t->{'description'}) {
			if (exists $items->{$t->{'description'}}) {
				$items->{$t->{'description'}}->{'sum'} += $t->{'price'};
				$items->{$t->{'description'}}->{'units'} += 1;
			} else {
				$items->{$t->{'description'}}->{'sum'} = $t->{'price'};
				$items->{$t->{'description'}}->{'units'} = 1;
			}
		}
	}
	# go over all descriptions and make a list of totals
	foreach my $desc (@{$aref_descs}) {
		if (exists $items->{$desc}) {
			push @sums, $items->{$desc}->{'sum'};
			push @units, $items->{$desc}->{'units'};
		} else {
			push @sums, 0;
			push @units, 0;
		}
	}
	return \@sums, \@units;
}

# accept and array of refs to row arrays
# return a csv string of the rows
sub rows_to_csv(\@) {
	my $aref_row_refs = shift;
	my $output=;
	foreach my $row (@{$aref_row_refs}) {
		$output .= join (',', @$row) . ",\n";
	}
	return $output;
}

#accepts a ref to an array (row), returns the sum of the items in the arra
sub row_total($) {
	my $aref_row=shift;
	my $total = 0;
	foreach (@{$aref_row}) {
		$total += $_;
	}
	return $total;
}

#print csv totals seperated to different tables, per period (in seconds)
sub print_csv($$) {
	my $file_desc = shift;
	my $duration = shift or 0;	#duration of each period
	my $output=;
	my $periods;		#hash of arrays or transaction ids
	my %all_seen_descriptions;	#keeps all seen descriptions, since not all will appear in a given period
	my $items;
	#order by periods:
	foreach my $tid (keys %transactions) {
		my $t=$transactions{$tid};
		if (defined $t->{'time'}) {
			#store the transaction id in an array, which is stored in a hash to the relevant period:
#if no duration, use time()+1dascalar.htmly as duration, to make sure only 1 period will be stored
			my $period = $duration ? int ($t->{'time'} / $duration) : time()+(24*3600);
			push @{$periods->{$period}}, $tid;
		}
		#if we haven't seen this description before, remember it:
		if (defined $t->{'description'}) {
			if (not exists $all_seen_descriptions{$t->{'description'}}) {
				$all_seen_descriptions{$t->{'description'}}=;
			}
		}
	}
	my @descs_arr =  sort keys %all_seen_descriptions;
	my @periods_arr = sort keys %{$periods};
	#my @descs_arr =  sort keys %all_seen_descriptions;
	print STDERR 'Total unique sale descriptions: ' . @descs_arr . "\n";
	print STDERR 'Total periods (' . (int $duration)/(24*3600) . ' days): ' . @periods_arr . "\n\n";

	#prepare each period's totals:
	my @sums_row_refs;
	my @units_row_refs;
	foreach my $period (@periods_arr) {
		my $start = strftime "%Y-%b-%d", localtime((int $period) * int $duration);
		my ($sums_row_ref, $units_row_ref) = totals_by_desc(@descs_arr, @{$periods->{$period}});
		#add totals and start, and convert to real array;
		my @sums_row = ($start, @{$sums_row_ref}, row_total($sums_row_ref));
		my @units_row = ($start, @{$units_row_ref} , row_total($units_row_ref));
		#add row to rows collections:
		push @sums_row_refs, \@sums_row;
		push @units_row_refs, \@units_row;
	}

	$output .= "SL Sales (all times in SLT)\n\nSales over time (L\$ sum)\n";
	$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
	$output .= join(',', @descs_arr) . ",Total,\n";	#column heads:
	$output .= rows_to_csv(@sums_row_refs);

	$output .= "\n\nSales over time (units sold)\n";
	$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
	$output .= join(',', @descs_arr) . ",Total,\n";
	$output .= rows_to_csv(@units_row_refs);

	#print to STDOUT or file:
	if (defined $file_desc) {
		print $file_desc $output;
	} else {
		print $output;
	}
}

sub main::VERSION_MESSAGE {
	print "  SL transactions analyzer version $main::VERSION.\n";
 	print '  Written by Thickbrick Sleaford <thickbrick.sleaford (at) gmail.com>.' . "\n";
 	print "  License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0.\n";
}

sub main::HELP_MESSAGE {
	my $help = <<END;

  Run this program with a list of Transaction History files as arguments.
  It will extract transactions from the files, get rid of duplicates, remove
  transactions that aren't sales by you (Object Payment and Object Sale) and
  aggregate them by weeks.  The result is comma seperated values (csv).
  Supports LL XML files and slexchange CSV files.

  transactions.pl [-p <days>] [-c] [-o <output_file>] <input_file1> [<input_file2> <input_file3>..]

    -p <days>
      Aggregate sales in periods of this length days (integer).
      Default is 7 Days.

    -c
      Save the resulting CSV in a temporary file and open it with OpenOffice Calc

    -o <output_file>
      Save the resulting CSV to a file.
      Use caution not to overwrite an input file!

    --help
      This helpful messages.

END
	print $help;

End copy here

User:Thickbrick Sleaford: Difference between revisions

Revision as of 16:05, 1 September 2008

Navigation menu

Search