Difference between revisions of "User:Thickbrick Sleaford"

From Second Life Wiki
Jump to navigation Jump to search
(New page: A perl script I've written, to help merchants make sense of their transaction histories. Copy and paste into a .pl file and run. '''Use at your own risk'''... --~~~~ <nowiki> #!/usr/bin...)
 
Line 2: Line 2:
--[[User:Thickbrick Sleaford|Thickbrick Sleaford]] 15:47, 1 September 2008 (PDT)
--[[User:Thickbrick Sleaford|Thickbrick Sleaford]] 15:47, 1 September 2008 (PDT)


<nowiki>
#!/usr/bin/perl
#
# Analyze a collection of SL transaction history XML files.
#
# Depends on libxml-simpleobject-perl (debian package). Only tested on Linux so far.
#
# Limitations: If there is a period that didn't have any sales in it,
# the resulting csv table will skip it.  Probably not secure.
#
# Please report bugs to thickbrick.sleaford (at) gmail.com
# License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0
#


use warnings;
use strict;
use Getopt::Std;
use File::Temp qw(tempfile);
use XML::SimpleObject;
use Time::Local;
use POSIX qw(strftime);


$main::VERSION='0.1 (2008-08-27)';
'''Start copy from here'''
$Getopt::Std::STANDARD_HELP_VERSION=1;
#!/usr/bin/perl
#
# Analyze a collection of SL transaction history XML files.
#
# Depends on libxml-simpleobject-perl (debian package). Only tested on Linux so far.
#
# Limitations: If there is a period that didn't have any sales in it,
# the resulting csv table will skip it.  Probably not secure.
#
# Please report bugs to thickbrick.sleaford (at) gmail.com
# License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0
#


sub read_files(@);
use warnings;
sub read_ll_xml_file($);
use strict;
sub read_slexchange_csv_file($);
use Getopt::Std;
sub totals_by_desc(\@\@);
use File::Temp qw(tempfile);
sub print_csv($$);
use XML::SimpleObject;
sub rows_to_csv(\@);
use Time::Local;
sub row_total($);
use POSIX qw(strftime);


my $oocalc_name='oocalc'; #the executable for OpenOffice Calc
$main::VERSION='0.1 (2008-08-27)';
my $slexchange_comission_percent=5.0;
$Getopt::Std::STANDARD_HELP_VERSION=1;
my %transactions;


sub read_files(@);
sub read_ll_xml_file($);
sub read_slexchange_csv_file($);
sub totals_by_desc(\@\@);
sub print_csv($$);
sub rows_to_csv(\@);
sub row_total($);


#main:
my $oocalc_name='oocalc'; #the executable for OpenOffice Calc
if (@ARGV < 1) {
my $slexchange_comission_percent=5.0;
main::VERSION_MESSAGE();
my %transactions;
main::HELP_MESSAGE();
} else {
my @in_files;
my %options; #command line options
my $split_period=7*24*3600;
my $temp_filename;
my $outfile_handler;


#process command line options:
getopts('co:p:', \%options);
if (exists $options{'p'} and defined $options{'p'}) {
$split_period = 24*3600 * $options{'p'};
} else {
$split_period = 24*3600*7;
}
if (exists $options{'c'}) {
($outfile_handler, $temp_filename) = tempfile(SUFFIX => '.csv'); #oocalc wants '.csv' suffix
open($outfile_handler,'>', $temp_filename) or die "\nCan't open temporary file '" . $temp_filename . "' for writing: $!\n";
} elsif (exists $options{'o'} and defined $options{'o'}) {
if (-e $options{'o'}) {
die "\nOutput file '" . $options{'o'} . "' already exists. Will not try to overwrite it.\n\n";
} else {
open($outfile_handler,'>', $options{'o'}) or die "\nCan't open file '" . $options{'o'} . "' for writing: $!\n";
}
}
#read input file(s)
@in_files=@ARGV;
if (@in_files < 1) {
die "\nNo input file specified.\nUse --help for more info.\n\n"
}
read_files(@in_files);
#process/print output
print_csv($outfile_handler, $split_period);
#run openoffice calc if needed
if (exists $options{'c'}) {
system($oocalc_name, $temp_filename) == 0 or die "\nCan't execute OpenOffice Calc: '$oocalc_name'.\n\n";
}
close($outfile_handler) if defined $outfile_handler;
print "\n";
}


#main:
if (@ARGV < 1) {
main::VERSION_MESSAGE();
main::HELP_MESSAGE();
} else {
my @in_files;
my %options; #command line options
my $split_period=7*24*3600;
my $temp_filename;
my $outfile_handler;


#read xml files and store in a hash ref
#process command line options:
sub read_files (@) {
getopts('co:p:', \%options);
print STDERR "\n";
if (exists $options{'p'} and defined $options{'p'}) {
foreach my $file (@_) {
$split_period = 24*3600 * $options{'p'};
my $fh;
} else {
open $fh, '<', $file or die "Can't open file '$file': $!\n";
$split_period = 24*3600*7;
my $first_line=<$fh>;
}
if ($first_line =~ m/<\?xml version="1.0" \?>/) {
if (exists $options{'c'}) {
read_ll_xml_file($file);
($outfile_handler, $temp_filename) = tempfile(SUFFIX => '.csv'); #oocalc wants '.csv' suffix
} else {
open($outfile_handler,'>', $temp_filename) or die "\nCan't open temporary file '" . $temp_filename . "' for writing: $!\n";
#assume it's an slexchange csv file
} elsif (exists $options{'o'} and defined $options{'o'}) {
read_slexchange_csv_file($file);
if (-e $options{'o'}) {
}
die "\nOutput file '" . $options{'o'} . "' already exists. Will not try to overwrite it.\n\n";
}
} else {
my $num = keys %transactions;
open($outfile_handler,'>', $options{'o'}) or die "\nCan't open file '" . $options{'o'} . "' for writing: $!\n";
print STDERR "Total unique sales processed: $num.\n";
}
print STDERR "\n";
}
}
#read input file(s)
@in_files=@ARGV;
if (@in_files < 1) {
die "\nNo input file specified.\nUse --help for more info.\n\n"
}
read_files(@in_files);
#process/print output
print_csv($outfile_handler, $split_period);
#run openoffice calc if needed
if (exists $options{'c'}) {
system($oocalc_name, $temp_filename) == 0 or die "\nCan't execute OpenOffice Calc: '$oocalc_name'.\n\n";
}
close($outfile_handler) if defined $outfile_handler;
print "\n";
}


# read an SL transactions xml file and add it to the global hash
sub read_ll_xml_file($) {
my $file = shift or return;
my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
print STDERR "Reading LL XML file: \'$file\' ... ";
my $added=0;
my $parser = XML::Parser->new(ErrorContext => 2, Style => "Tree");
my $xso = XML::SimpleObject->new( $parser->parsefile($file) );


foreach my $transaction ($xso->child('transactions')->children) {
#read xml files and store in a hash ref
#begin by filtering for interesting transaction only (object sales and payments that are also deposits):
sub read_files (@) {
if ($transaction->child('deposit') and $transaction->child('deposit')->value ne ''
print STDERR "\n";
and $transaction->child('type') and ($transaction->child('type')->value eq 'Payment' or
foreach my $file (@_) {
$transaction->child('type')->value eq 'Object Sale'))
my $fh;
{
open $fh, '<', $file or die "Can't open file '$file': $!\n";
#store the (hash-ref) transaction in the global hash if it has unique id
my $first_line=<$fh>;
my $t_id = $transaction->child('id') ? $transaction->child('id')->value : '';
if ($first_line =~ m/<\?xml version="1.0" \?>/) {
my $t;
read_ll_xml_file($file);
$t->{'price'} = $transaction->child('deposit') ? $transaction->child('deposit')->value : '';
} else {
$t->{'description'} = $transaction->child('description') ? $transaction->child('description')->value : '';
#assume it's an slexchange csv file
my $time = $transaction->child('time') ? $transaction->child('time')->value : '';
read_slexchange_csv_file($file);
my @time_l = split /[- :]/,$time;
}
$time_l[1]-=1; #if int $time_l[1] > 0; #timelocal thinks januaray is month 0, not 1;
}
# (SL transactions are written in SLT/PST. this converts to gmt)
my $num = keys %transactions;
$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
print STDERR "Total unique sales processed: $num.\n";
if (not exists $transactions{$t_id}{
print STDERR "\n";
$transactions{$t_id}=$t;
}
++$added;
}
}
}
print STDERR " $added unique sales added.\n";
}


# read an slexchange.com transactions csv file and add it to the global hash
# read an SL transactions xml file and add it to the global hash
# the CSV is : ID, Date, Type, Transaction_Description, Amount, Balance
sub read_ll_xml_file($) {
sub read_slexchange_csv_file($) {
my $file = shift or return;
my $file = shift or return;
my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
print STDERR "Reading LL XML file: \'$file\' ... ";
my $added=0;
my $added=0;
my $fh;
my $parser = XML::Parser->new(ErrorContext => 2, Style => "Tree");
open $fh, '<', $file or die "Can't open file '$file': $!\n";
my $xso = XML::SimpleObject->new( $parser->parsefile($file) );
print STDERR "Reading SLExchange CSV file: \'$file\' ... ";
my $discard_line = <$fh>; #discard first line;
while (<$fh>) {
my ($t_id, $time, $type, $desc, $amount, $balance)=split(/,/);
#create a hash ref;
my $t;
$amount = $amount ? $amount : 0;
$amount *= (1 - ($slexchange_comission_percent/100)); #substract slex comission
$t->{'price'}= sprintf("%.0f", $amount); #round it
$t->{'description'} = $desc ? $desc : '';
my @time_l = split /[- :]/,($time ? $time : '');
$time_l[1] -= 1; #if int $time_l[1] > 0; #timelocal thinks januaray is month 0, not 1;
# (SL transactions are written in SLT/PST. this converts to gmt)
$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
if ($type eq '"SLL Item Purchase"' and $amount > 0 and $t_id ne '' and $desc ne '') {
$t_id='SLEX' . $t_id; #make sure ids don't collide with LL transaction ids
#add hasref to global hash if unique:
if (not exists $transactions{$t_id}) {
$transactions{$t_id}=$t;
++$added;
}
}
}
print STDERR " $added unique sales added.\n";
}


foreach my $transaction ($xso->child('transactions')->children) {
#begin by filtering for interesting transaction only (object sales and payments that are also deposits):
if ($transaction->child('deposit') and $transaction->child('deposit')->value ne ''
and $transaction->child('type') and ($transaction->child('type')->value eq 'Payment' or
$transaction->child('type')->value eq 'Object Sale'))
{
#store the (hash-ref) transaction in the global hash if it has unique id
my $t_id = $transaction->child('id') ? $transaction->child('id')->value : '';
my $t;
$t->{'price'} = $transaction->child('deposit') ? $transaction->child('deposit')->value : '';
$t->{'description'} = $transaction->child('description') ? $transaction->child('description')->value : '';
my $time = $transaction->child('time') ? $transaction->child('time')->value : '';
my @time_l = split /[- :]/,$time;
$time_l[1]-=1; #if int $time_l[1] > 0; #timelocal thinks januaray is month 0, not 1;
# (SL transactions are written in SLT/PST. this converts to gmt)
$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
if (not exists $transactions{$t_id})  {
$transactions{$t_id}=$t;
++$added;
}
}
}
print STDERR " $added unique sales added.\n";
}


#Accepts 2 array refs, unique descriptions, transaction ids.
# read an slexchange.com transactions csv file and add it to the global hash
#returns 2 lists, units sold per each description, and L$ sums per description
# the CSV is : ID, Date, Type, Transaction_Description, Amount, Balance
sub totals_by_desc(\@\@) {
sub read_slexchange_csv_file($) {
my ($aref_descs, $aref_tids) = @_;
my $file = shift or return;
my $items;
my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
my @units;
my $added=0;
my @sums;
my $fh;
# go over all ids and create a
open $fh, '<', $file or die "Can't open file '$file': $!\n";
# hash ref of: tids->descriptions->totals:
print STDERR "Reading SLExchange CSV file: \'$file\' ... ";
foreach my $tid (@{$aref_tids}) {
my $discard_line = <$fh>; #discard first line;
my $t=$transactions{$tid};
while (<$fh>) {
if (defined $t->{'description'}) {
my ($t_id, $time, $type, $desc, $amount, $balance)=split(/,/);
if (exists $items->{$t->{'description'}}) {
#create a hash ref;
$items->{$t->{'description'}}->{'sum'} += $t->{'price'};
my $t;
$items->{$t->{'description'}}->{'units'} += 1;
$amount = $amount ? $amount : 0;
} else {
$amount *= (1 - ($slexchange_comission_percent/100)); #substract slex comission
$items->{$t->{'description'}}->{'sum'} = $t->{'price'};
$t->{'price'}= sprintf("%.0f", $amount); #round it
$items->{$t->{'description'}}->{'units'} = 1;
$t->{'description'} = $desc ? $desc : '';
}
my @time_l = split /[- :]/,($time ? $time : '');
}
$time_l[1] -= 1; #if int $time_l[1] > 0; #timelocal thinks januaray is month 0, not 1;
}
# (SL transactions are written in SLT/PST. this converts to gmt)
# go over all descriptions and make a list of totals
$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
foreach my $desc (@{$aref_descs}) {
if ($type eq '"SLL Item Purchase"' and $amount > 0 and $t_id ne '' and $desc ne '') {
if (exists $items->{$desc}) {
$t_id='SLEX' . $t_id; #make sure ids don't collide with LL transaction ids
push @sums, $items->{$desc}->{'sum'};
#add hasref to global hash if unique:
push @units, $items->{$desc}->{'units'};
if (not exists $transactions{$t_id}) {
} else {
$transactions{$t_id}=$t;
push @sums, 0;
++$added;
push @units, 0;
}
}
}
}
}
return \@sums, \@units;
print STDERR " $added unique sales added.\n";
}
}




# accept and array of refs to row arrays
#Accepts 2 array refs, unique descriptions, transaction ids.
# return a csv string of the rows
#returns 2 lists, units sold per each description, and L$ sums per description
sub rows_to_csv(\@) {
sub totals_by_desc(\@\@) {
my $aref_row_refs = shift;
my ($aref_descs, $aref_tids) = @_;
my $output='';
my $items;
foreach my $row (@{$aref_row_refs}) {
my @units;
$output .= join (',', @$row) . ",\n";
my @sums;
}
# go over all ids and create a
return $output;
# hash ref of: tids->descriptions->totals:
}
foreach my $tid (@{$aref_tids}) {
my $t=$transactions{$tid};
if (defined $t->{'description'}) {
if (exists $items->{$t->{'description'}}) {
$items->{$t->{'description'}}->{'sum'} += $t->{'price'};
$items->{$t->{'description'}}->{'units'} += 1;
} else {
$items->{$t->{'description'}}->{'sum'} = $t->{'price'};
$items->{$t->{'description'}}->{'units'} = 1;
}
}
}
# go over all descriptions and make a list of totals
foreach my $desc (@{$aref_descs}) {
if (exists $items->{$desc}) {
push @sums, $items->{$desc}->{'sum'};
push @units, $items->{$desc}->{'units'};
} else {
push @sums, 0;
push @units, 0;
}
}
return \@sums, \@units;
}


#accepts a ref to an array (row), returns the sum of the items in the arra
sub row_total($) {
my $aref_row=shift;
my $total = 0;
foreach (@{$aref_row}) {
$total += $_;
}
return $total;
}


#print csv totals seperated to different tables, per period (in seconds)
# accept and array of refs to row arrays
sub print_csv($$) {
# return a csv string of the rows
my $file_desc = shift;
sub rows_to_csv(\@) {
my $duration = shift or 0; #duration of each period
my $aref_row_refs = shift;
my $output='';
my $output='';
my $periods; #hash of arrays or transaction ids
foreach my $row (@{$aref_row_refs}) {
my %all_seen_descriptions; #keeps all seen descriptions, since not all will appear in a given period
$output .= join (',', @$row) . ",\n";
my $items;
}
#order by periods:
return $output;
foreach my $tid (keys %transactions) {
}
my $t=$transactions{$tid};
if (defined $t->{'time'}) {
#store the transaction id in an array, which is stored in a hash to the relevant period:
#if no duration, use time()+1dascalar.htmly as duration, to make sure only 1 period will be stored
my $period = $duration ? int ($t->{'time'} / $duration) : time()+(24*3600);
push @{$periods->{$period}}, $tid;
}
#if we haven't seen this description before, remember it:
if (defined $t->{'description'}) {
if (not exists $all_seen_descriptions{$t->{'description'}}) {
$all_seen_descriptions{$t->{'description'}}='';
}
}
}
my @descs_arr =  sort keys %all_seen_descriptions;
my @periods_arr = sort keys %{$periods};
#my @descs_arr =  sort keys %all_seen_descriptions;
print STDERR 'Total unique sale descriptions: ' . @descs_arr . "\n";
print STDERR 'Total periods (' . (int $duration)/(24*3600) . ' days): ' . @periods_arr . "\n\n";


#prepare each period's totals:
#accepts a ref to an array (row), returns the sum of the items in the arra
my @sums_row_refs;
sub row_total($) {
my @units_row_refs;
my $aref_row=shift;
foreach my $period (@periods_arr) {
my $total = 0;
my $start = strftime "%Y-%b-%d", localtime((int $period) * int $duration);
foreach (@{$aref_row}) {
my ($sums_row_ref, $units_row_ref) = totals_by_desc(@descs_arr, @{$periods->{$period}});
$total += $_;
#add totals and start, and convert to real array;
}
my @sums_row = ($start, @{$sums_row_ref}, row_total($sums_row_ref));
return $total;
my @units_row = ($start, @{$units_row_ref} , row_total($units_row_ref));
}
#add row to rows collections:
push @sums_row_refs, \@sums_row;
push @units_row_refs, \@units_row;
}


$output .= "SL Sales (all times in SLT)\n\nSales over time (L\$ sum)\n";
#print csv totals seperated to different tables, per period (in seconds)
$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
sub print_csv($$) {
$output .= join(',', @descs_arr) . ",Total,\n"; #column heads:
my $file_desc = shift;
$output .= rows_to_csv(@sums_row_refs);
my $duration = shift or 0; #duration of each period
my $output='';
my $periods; #hash of arrays or transaction ids
my %all_seen_descriptions; #keeps all seen descriptions, since not all will appear in a given period
my $items;
#order by periods:
foreach my $tid (keys %transactions) {
my $t=$transactions{$tid};
if (defined $t->{'time'}) {
#store the transaction id in an array, which is stored in a hash to the relevant period:
#if no duration, use time()+1dascalar.htmly as duration, to make sure only 1 period will be stored
my $period = $duration ? int ($t->{'time'} / $duration) : time()+(24*3600);
push @{$periods->{$period}}, $tid;
}
#if we haven't seen this description before, remember it:
if (defined $t->{'description'}) {
if (not exists $all_seen_descriptions{$t->{'description'}}) {
$all_seen_descriptions{$t->{'description'}}='';
}
}
}
my @descs_arr =  sort keys %all_seen_descriptions;
my @periods_arr = sort keys %{$periods};
#my @descs_arr = sort keys %all_seen_descriptions;
print STDERR 'Total unique sale descriptions: ' . @descs_arr . "\n";
print STDERR 'Total periods (' . (int $duration)/(24*3600) . ' days): ' . @periods_arr . "\n\n";


$output .= "\n\nSales over time (units sold)\n";
#prepare each period's totals:
$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
my @sums_row_refs;
$output .= join(',', @descs_arr) . ",Total,\n";
my @units_row_refs;
$output .= rows_to_csv(@units_row_refs);
foreach my $period (@periods_arr) {
my $start = strftime "%Y-%b-%d", localtime((int $period) * int $duration);
my ($sums_row_ref, $units_row_ref) = totals_by_desc(@descs_arr, @{$periods->{$period}});
#add totals and start, and convert to real array;
my @sums_row = ($start, @{$sums_row_ref}, row_total($sums_row_ref));
my @units_row = ($start, @{$units_row_ref} , row_total($units_row_ref));
#add row to rows collections:
push @sums_row_refs, \@sums_row;
push @units_row_refs, \@units_row;
}


#print to STDOUT or file:
$output .= "SL Sales (all times in SLT)\n\nSales over time (L\$ sum)\n";
if (defined $file_desc) {
$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
print $file_desc $output;
$output .= join(',', @descs_arr) . ",Total,\n"; #column heads:
} else {
$output .= rows_to_csv(@sums_row_refs);
print $output;
}
}


sub main::VERSION_MESSAGE {
$output .= "\n\nSales over time (units sold)\n";
print "  SL transactions analyzer version $main::VERSION.\n";
  $output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
  print '  Written by Thickbrick Sleaford <thickbrick.sleaford (at) gmail.com>.' . "\n";
$output .= join(',', @descs_arr) . ",Total,\n";
  print "  License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0.\n";
  $output .= rows_to_csv(@units_row_refs);
}


sub main::HELP_MESSAGE {
#print to STDOUT or file:
my $help = <<END;
if (defined $file_desc) {
print $file_desc $output;
} else {
print $output;
}
}


  Run this program with a list of Transaction History files as arguments.
sub main::VERSION_MESSAGE {
  It will extract transactions from the files, get rid of duplicates, remove
print "  SL transactions analyzer version $main::VERSION.\n";
  transactions that aren't sales by you (Object Payment and Object Sale) and
  print ' Written by Thickbrick Sleaford <thickbrick.sleaford (at) gmail.com>.' . "\n";
  aggregate them by weeks. The result is comma seperated values (csv).
  print "  License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0.\n";
  Supports LL XML files and slexchange CSV files.
}


  transactions.pl [-p <days>] [-c] [-o <output_file>] <input_file1> [<input_file2> <input_file3>..]
sub main::HELP_MESSAGE {
my $help = <<END;


    -p <days>
  Run this program with a list of Transaction History files as arguments.
      Aggregate sales in periods of this length days (integer).
  It will extract transactions from the files, get rid of duplicates, remove
      Default is 7 Days.
  transactions that aren't sales by you (Object Payment and Object Sale) and
  aggregate them by weeks.  The result is comma seperated values (csv).
  Supports LL XML files and slexchange CSV files.


    -c
  transactions.pl [-p <days>] [-c] [-o <output_file>] <input_file1> [<input_file2> <input_file3>..]
      Save the resulting CSV in a temporary file and open it with OpenOffice Calc


    -o <output_file>
    -p <days>
      Save the resulting CSV to a file.
      Aggregate sales in periods of this length days (integer).
      Use caution not to overwrite an input file!
      Default is 7 Days.


    --help
    -c
      This helpful messages.
      Save the resulting CSV in a temporary file and open it with OpenOffice Calc


END
    -o <output_file>
print $help;
      Save the resulting CSV to a file.
      Use caution not to overwrite an input file!


}
    --help
      This helpful messages.


#end of perl script...
END
print $help;


</nowiki>
}
 
'''End copy here'''

Revision as of 15:05, 1 September 2008

A perl script I've written, to help merchants make sense of their transaction histories. Copy and paste into a .pl file and run. Use at your own risk... --Thickbrick Sleaford 15:47, 1 September 2008 (PDT)


Start copy from here

#!/usr/bin/perl
#
# Analyze a collection of SL transaction history XML files.
#
# Depends on libxml-simpleobject-perl (debian package). Only tested on Linux so far.
#
# Limitations: If there is a period that didn't have any sales in it,
# the resulting csv table will skip it.  Probably not secure.
#
# Please report bugs to thickbrick.sleaford (at) gmail.com
# License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0
#
use warnings;
use strict;
use Getopt::Std;
use File::Temp qw(tempfile);
use XML::SimpleObject;
use Time::Local;
use POSIX qw(strftime);
$main::VERSION='0.1 (2008-08-27)';
$Getopt::Std::STANDARD_HELP_VERSION=1;
sub read_files(@);
sub read_ll_xml_file($);
sub read_slexchange_csv_file($);
sub totals_by_desc(\@\@);
sub print_csv($$);
sub rows_to_csv(\@);
sub row_total($);
my $oocalc_name='oocalc';	#the executable for OpenOffice Calc
my $slexchange_comission_percent=5.0;
my %transactions;


#main:
if (@ARGV < 1) {
	main::VERSION_MESSAGE();
	main::HELP_MESSAGE();
} else {
	my @in_files;
	my %options;	#command line options
	my $split_period=7*24*3600;
	my $temp_filename;
	my $outfile_handler;
#process command line options:
	getopts('co:p:', \%options);
	if (exists $options{'p'} and defined $options{'p'}) {
		$split_period = 24*3600 * $options{'p'};
	} else {
		$split_period = 24*3600*7;
	}
	if (exists $options{'c'}) {
		($outfile_handler, $temp_filename) = tempfile(SUFFIX => '.csv');	#oocalc wants '.csv' suffix
		open($outfile_handler,'>', $temp_filename) or die "\nCan't open temporary file '" . $temp_filename . "' for writing: $!\n";
	} elsif (exists $options{'o'} and defined $options{'o'}) {
		if (-e $options{'o'}) {
			die "\nOutput file '" . $options{'o'} . "' already exists. Will not try to overwrite it.\n\n";
		} else {
			open($outfile_handler,'>', $options{'o'}) or die "\nCan't open file '" . $options{'o'} . "' for writing: $!\n";
		}
	}
#read input file(s)
	@in_files=@ARGV;
	if (@in_files < 1) {
		die "\nNo input file specified.\nUse --help for more info.\n\n"
	}
	read_files(@in_files);
#process/print output
	print_csv($outfile_handler, $split_period);
#run openoffice calc if needed
	if (exists $options{'c'}) {
		system($oocalc_name, $temp_filename) == 0 or die "\nCan't execute OpenOffice Calc: '$oocalc_name'.\n\n";
	}
	close($outfile_handler) if defined $outfile_handler;
	print "\n";
}


#read xml files and store in a hash ref
sub read_files (@) {
	print STDERR "\n";
	foreach my $file (@_) {
		my $fh;
		open $fh, '<', $file or die "Can't open file '$file': $!\n";
		my $first_line=<$fh>;
		if ($first_line =~ m/<\?xml version="1.0" \?>/) {
			read_ll_xml_file($file);
		} else {
			#assume it's an slexchange csv file
			read_slexchange_csv_file($file);
		}
	}
	my $num = keys %transactions;
	print STDERR "Total unique sales processed: $num.\n";
	print STDERR "\n";
}
# read an SL transactions xml file and add it to the global hash
sub read_ll_xml_file($) {
	my $file = shift or return;
	my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
	print STDERR "Reading LL XML file: \'$file\' ... ";
	my $added=0;
	my $parser = XML::Parser->new(ErrorContext => 2, Style => "Tree");
	my $xso = XML::SimpleObject->new( $parser->parsefile($file) );
	foreach my $transaction ($xso->child('transactions')->children) {
		#begin by filtering for interesting transaction only (object sales and payments that are also deposits):
		if ($transaction->child('deposit') and $transaction->child('deposit')->value ne 
			and $transaction->child('type') and ($transaction->child('type')->value eq 'Payment' or
			$transaction->child('type')->value eq 'Object Sale'))
		{
			#store the (hash-ref) transaction in the global hash if it has unique id
			my $t_id = $transaction->child('id') ? $transaction->child('id')->value : ;
			my $t;
			$t->{'price'} = $transaction->child('deposit') ? $transaction->child('deposit')->value : ;
			$t->{'description'} = $transaction->child('description') ? $transaction->child('description')->value : ;
			my $time = $transaction->child('time') ? $transaction->child('time')->value : ;
			my @time_l = split /[- :]/,$time;
			$time_l[1]-=1; #if int $time_l[1] > 0;	#timelocal thinks januaray is month 0, not 1;
			# (SL transactions are written in SLT/PST. this converts to gmt)
			$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
			if (not exists $transactions{$t_id})  {
				$transactions{$t_id}=$t;
				++$added;
			}
		}
	}
	print STDERR " $added unique sales added.\n";
}
# read an slexchange.com transactions csv file and add it to the global hash
# the CSV is : ID, Date, Type, Transaction_Description, Amount, Balance
sub read_slexchange_csv_file($) {
	my $file = shift or return;
	my $gmt_to_slt = (-7)*3600; #SLT is 'America/Los_Angeles' (-0700). TODO: find the proper way to do this.
	my $added=0;
	my $fh;
	open $fh, '<', $file or die "Can't open file '$file': $!\n";
	print STDERR "Reading SLExchange CSV file: \'$file\' ... ";
	my $discard_line = <$fh>;	#discard first line;
	while (<$fh>) {
		my ($t_id, $time, $type, $desc, $amount, $balance)=split(/,/);
		#create a hash ref;
		my $t;
		$amount = $amount ? $amount : 0;
		$amount *= (1 - ($slexchange_comission_percent/100));	#substract slex comission
		$t->{'price'}= sprintf("%.0f", $amount);	#round it
		$t->{'description'} = $desc ? $desc : ;
		my @time_l = split /[- :]/,($time ? $time : );
		$time_l[1] -= 1; #if int $time_l[1] > 0;	#timelocal thinks januaray is month 0, not 1;
		# (SL transactions are written in SLT/PST. this converts to gmt)
		$t->{'time'} = (timelocal reverse @time_l) - $gmt_to_slt;
		if ($type eq '"SLL Item Purchase"' and $amount > 0 and $t_id ne  and $desc ne ) {
			$t_id='SLEX' . $t_id;	#make sure ids don't collide with LL transaction ids
			#add hasref to global hash if unique:
			if (not exists $transactions{$t_id})  {
				$transactions{$t_id}=$t;
				++$added;
			}
		}
	}
	print STDERR " $added unique sales added.\n";
}


#Accepts 2 array refs, unique descriptions, transaction ids.
#returns 2 lists, units sold per each description, and L$ sums per description
sub totals_by_desc(\@\@) {
	my ($aref_descs, $aref_tids) = @_;
	my $items;
	my @units;
	my @sums;
	# go over all ids and create a
	# hash ref of: tids->descriptions->totals:
	foreach my $tid (@{$aref_tids}) {
		my $t=$transactions{$tid};
		if (defined $t->{'description'}) {
			if (exists $items->{$t->{'description'}}) {
				$items->{$t->{'description'}}->{'sum'} += $t->{'price'};
				$items->{$t->{'description'}}->{'units'} += 1;
			} else {
				$items->{$t->{'description'}}->{'sum'} = $t->{'price'};
				$items->{$t->{'description'}}->{'units'} = 1;
			}
		}
	}
	# go over all descriptions and make a list of totals
	foreach my $desc (@{$aref_descs}) {
		if (exists $items->{$desc}) {
			push @sums, $items->{$desc}->{'sum'};
			push @units, $items->{$desc}->{'units'};
		} else {
			push @sums, 0;
			push @units, 0;
		}
	}
	return \@sums, \@units;
}


# accept and array of refs to row arrays
# return a csv string of the rows
sub rows_to_csv(\@) {
	my $aref_row_refs = shift;
	my $output=;
	foreach my $row (@{$aref_row_refs}) {
		$output .= join (',', @$row) . ",\n";
	}
	return $output;
}
#accepts a ref to an array (row), returns the sum of the items in the arra
sub row_total($) {
	my $aref_row=shift;
	my $total = 0;
	foreach (@{$aref_row}) {
		$total += $_;
	}
	return $total;
}
#print csv totals seperated to different tables, per period (in seconds)
sub print_csv($$) {
	my $file_desc = shift;
	my $duration = shift or 0;	#duration of each period
	my $output=;
	my $periods;		#hash of arrays or transaction ids
	my %all_seen_descriptions;	#keeps all seen descriptions, since not all will appear in a given period
	my $items;
	#order by periods:
	foreach my $tid (keys %transactions) {
		my $t=$transactions{$tid};
		if (defined $t->{'time'}) {
			#store the transaction id in an array, which is stored in a hash to the relevant period:
#if no duration, use time()+1dascalar.htmly as duration, to make sure only 1 period will be stored
			my $period = $duration ? int ($t->{'time'} / $duration) : time()+(24*3600);
			push @{$periods->{$period}}, $tid;
		}
		#if we haven't seen this description before, remember it:
		if (defined $t->{'description'}) {
			if (not exists $all_seen_descriptions{$t->{'description'}}) {
				$all_seen_descriptions{$t->{'description'}}=;
			}
		}
	}
	my @descs_arr =  sort keys %all_seen_descriptions;
	my @periods_arr = sort keys %{$periods};
	#my @descs_arr =  sort keys %all_seen_descriptions;
	print STDERR 'Total unique sale descriptions: ' . @descs_arr . "\n";
	print STDERR 'Total periods (' . (int $duration)/(24*3600) . ' days): ' . @periods_arr . "\n\n";
	#prepare each period's totals:
	my @sums_row_refs;
	my @units_row_refs;
	foreach my $period (@periods_arr) {
		my $start = strftime "%Y-%b-%d", localtime((int $period) * int $duration);
		my ($sums_row_ref, $units_row_ref) = totals_by_desc(@descs_arr, @{$periods->{$period}});
		#add totals and start, and convert to real array;
		my @sums_row = ($start, @{$sums_row_ref}, row_total($sums_row_ref));
		my @units_row = ($start, @{$units_row_ref} , row_total($units_row_ref));
		#add row to rows collections:
		push @sums_row_refs, \@sums_row;
		push @units_row_refs, \@units_row;
	}
	$output .= "SL Sales (all times in SLT)\n\nSales over time (L\$ sum)\n";
	$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
	$output .= join(',', @descs_arr) . ",Total,\n";	#column heads:
	$output .= rows_to_csv(@sums_row_refs);
	$output .= "\n\nSales over time (units sold)\n";
	$output .= "Period (" . (int $duration)/(24*3600) . " days starting with),";
	$output .= join(',', @descs_arr) . ",Total,\n";
	$output .= rows_to_csv(@units_row_refs);
	#print to STDOUT or file:
	if (defined $file_desc) {
		print $file_desc $output;
	} else {
		print $output;
	}
}
sub main::VERSION_MESSAGE {
	print "  SL transactions analyzer version $main::VERSION.\n";
 	print '  Written by Thickbrick Sleaford <thickbrick.sleaford (at) gmail.com>.' . "\n";
 	print "  License: GPL v3 or later, or Creative Commons Attribution-Share Alike 3.0.\n";
}
sub main::HELP_MESSAGE {
	my $help = <<END;
  Run this program with a list of Transaction History files as arguments.
  It will extract transactions from the files, get rid of duplicates, remove
  transactions that aren't sales by you (Object Payment and Object Sale) and
  aggregate them by weeks.  The result is comma seperated values (csv).
  Supports LL XML files and slexchange CSV files.
  transactions.pl [-p <days>] [-c] [-o <output_file>] <input_file1> [<input_file2> <input_file3>..]
    -p <days>
      Aggregate sales in periods of this length days (integer).
      Default is 7 Days.
    -c
      Save the resulting CSV in a temporary file and open it with OpenOffice Calc
    -o <output_file>
      Save the resulting CSV to a file.
      Use caution not to overwrite an input file!
    --help
      This helpful messages.
END
	print $help;
}

End copy here