#!/usr/bin/perl -w
use strict;
use Math::Trig;

if (@ARGV < 1)
{
die <<EOL
Syntax: compose.pl 'key:eq1:eq2...' file1 file2 file2..

Loads N files and uses a set of equations to combine them
into columns of an output file. E.g. problems of the sort
divide column 1 in file 2 with column 3 in file 1 and output
the result.

eq1 is an equation of \$0,\$1,..,\$M which refers to the line
number and column one to column M in the current row, respectively.
eq1 is evaluated when files are loaded, and can only
refer to the columns in the current row of the current file.

eq2 .. eqQ are equations for the 2nd to the Q'th output column.

These are evaluated for sets of rows having a matching
key that occurs once in every file.

Equations can use variables \$n.m, which refers to the
m'th column of the n'th file. E.g.

  \$1.1 is the first column in the first file.
  \$2.1 is the first column in the second file.
  \$1.0 is the line number of the first file.
  \$2.2 is the second column of the second file.

Default equation (used if not stated) is '$1:$1.2' which
types out the first column and second column of the first file.
If used with multiple input files, it will type out the
first and second column, for keys occuring in all files, i.e.
act as a filter.

Equations can be any Perl statement that evaluates to a number.
e.g. sin($1.2)*($2.2)^2/$3.4

Line numbers \$0 are handled correctly for files with remarks # and
empty lines. Errors are printed if the target column does not exist
in the desired file.

  Example compose.pl '$1:$1.2:$2.2:$1.2/$2.2' file1 file2

file1    file2     evaluates                output
-----------------------------------------------------
 1 2      1 4      key 1: 2/4              1 2 4 0.5
 2 3      2 5      key 2: 3/5              2 3 5 0.6
 5 5      3 6      key 3: occurs once      5 5 5 1
 6 6      5 5      key 5: 5/5
                   key 6: occurs once
EOL
}

my $eqs;
if ($ARGV[0] =~ /:/)
   {
     $eqs=shift @ARGV;
   }
  else
   {
     $eqs='$1:$1.2';  #default output
   }


#print $ARGV[0];

my @files=@ARGV;
die "No files found" if ($#files==-1);

print "#compose.pl \'$eqs\' ".join(" ",@ARGV)."\n";

my @eqns = split /:/,$eqs;  $eqs=shift @eqns;
my $files= scalar(@files);
my $data;       #hash for data
my %chain;      #chain for keys
my $mykey;      #one key to rule them all

sub evalfunc
#evalfunc(sin($1)*$2+$0, ..)
#arg1 string equation to evaluate
#arg2 ref to array hash array with numbers
#arg3 key
{
  my $eq   =shift;
  my $data =shift;
  my $key  =shift;

  while ( $eq =~ /\$([0-9]+)\.([0-9]+)/cg )
    {
      if (not defined $$data[$1-1]{$key}[$2])
        {
          die "Error: Reference to column $2 in file $1\n";
        }
    }

  $eq=~s/\$([0-9]+)\.([0-9]+)/($$data[$1-1]{$key}[$2])/g;

  my $x=eval($eq);
  if (not defined $x) 
    {
       print STDERR "ERROR! evaluating eq=\"$eq\"\n";
    }
  return $x;
}


sub evalfunc2
#evalfunc(sin($1)*$2+$0, ..)
#evaluates equation using values stored in default array
{
  my $eq=shift;

  while ( $eq =~ /\$([0-9]+)/cg )
    {
      if (not defined $_[$1])
        {
          die "Error: Reference to column $1 missing in file\n";
        }
    }

  $eq=~s/\$([0-9]+)/($_[$1])/g;
  return eval($eq);
}



#process all the files:

my $f=0;
foreach (@files)
  {
     open FI,$_ or die "Not found $_\n";
     my $line=0;
     while (<FI>)  #load
        {
           if (/^\#/) {print $_; }   #header line
             elsif (/^\s*$/) { }     #empty line
             else
              {
                $line++;             #data line
                s/^\s+//;            #remove leading whitespace
                s/\s+^//;            #remove trailing whitespace
                @_=split /\s+/;      #break on space
                unshift @_,$line;    #add as first column

                my $key=evalfunc2($eqs,@_);      #evaluate row key                
                $chain{$key}.=chr($f+ord('a'));  #add key to chain
                $data->[$f]{$key} = [ @_ ];      #store row
              }
        }
        
     $mykey.=chr($f+ord('a'));       #magic
     close FI;
     $f++;
  }

#Evaluate equations for keys occuring in all files:

foreach my $key (grep $chain{$_}=~/$mykey/, sort{$a<=>$b} keys %chain)
{
#   print $key;
   foreach (@eqns)
      {
         my $x=evalfunc($_,$data,$key);
         if (defined $x)
           {  print "$x\t"; }
          else 
           { print "\tERR"; print STDERR "Error evaluating: $_\n";}
      }
   print "\n";
}

