#!/usr/bin/perl
use strict;

if (@ARGV < 1)
{
 die <<EOL
Syntax: average.pl 'key_eq:eq1:eq2:...' file1 ... fileN
generates output eq1(data) <eq2(data)> <eq3(data)> .. 

The equations can refer to $1,$2.. which are the columns in
the input files. $0 refers to the line number.

If equation not specified '$0:$1:$2' is assumed, i.e. output
the average of column 1 and 2, using the line number as the
key. The key should evaluate to a integer.

Alternatively functions avg(eq) and dev(eq) can be used to
plot the average of the equation (default behaviour) but also
the std. deviation. e.g.  '$0:avg($1):dev($1)' outputs the
average and the std. deviation of column 1.
EOL

}

my $eqs;

if ($ARGV[0] =~ /:/)
   {
     $eqs=shift @ARGV;
   }
  else
   {
     $eqs='$1:$1:$2';  #default output
   }

my @files=<@ARGV>;

die "No files found" if (scalar(@files)==0);
my @eqns=split /:/,$eqs;
my @proc;
my $cols=scalar(@eqns);

print "\#key:\n";
print "\#$eqns[0]\n\#\n";

print "\#column equations:\n";
for (my $i=1;$i<scalar(@eqns);$i++)
{
  if ($eqns[$i] =~ /^\s*(\w+)\s*\((.+)\)\s*$/)
    {
      $proc[$i]=$1;
      $eqns[$i]=$2;
    }
  else
    {
      $proc[$i]="avg";
    }
  
  print "\#$i $proc[$i]($eqns[$i])\n";
}

print "\#\n\#Files:\n";
foreach (@files)
{
  print "\#$_\n";
}
print "\#\n";


#columns are referred to as $1 $2 $3 $4 ..
#$0 refers to the line count (header discarded)

my @data;

sub evalfunc
#evalfunc(sin($1)*$2+$0, ..)
#evaluates equation using values stored in default array
{
  my $eq=shift;
  $eq=~s/\$([0-9]+)/(@_[$1])/g;
  return eval($eq);
}

sub avg
#takes array and calculates average
{
   my $s;
   foreach (@_) {$s+=$_};
   return $s/scalar(@_);
}


sub dev
#takes array and calculates standard deviation
{
   my $s;

   foreach (@_) {$s+=$_;}
   $a=$s/scalar(@_); $s=0;
   
   foreach (@_) {$s+=($_-$a)*($_-$a);}
   
   if (scalar(@_)==1) { return 0; }
   return sqrt($s/(scalar(@_)-1));
}

#process all the files:

foreach (@files)
  {
     open FI,$_ or die "Not found $_\n";
     print "#File: $_\n";
     my $line=0;
     while (<FI>)  #load
        {
           s/^\s+//;            #remove leading whitespace
           s/\s+^//;            #remove trailing whitespace

           if (/^\#/) {print $_; }   #header line
             elsif (/^$/) { }     #empty line
             else
              {
                $line++;             #data line
                split /\s+/;         #break on space
                unshift @_,$line;    #add as first column

                my $x=evalfunc($eqns[0],@_);

                for (my $i=1;$i<$cols;$i++)
                   {
                      push @{%{$data[$i-1]}->{$x}},evalfunc($eqns[$i],@_);
                   }
              }
        }
     close FI;
  }

#print the result:
my @t; #tmp

foreach (sort{$a<=>$b} keys %{$data[0]})
  {
#    print $_."\t";
#average of first column 
#    print avg(@{%{$data[0]}->{$_}})."\t";
    for (my $i=1;$i<$cols;$i++)
        {
          @t=@{%{$data[$i-1]}->{$_}};
          
          if ($proc[$i] eq "avg") { print avg(@t)."\t"; }
           elsif
             ($proc[$i] eq "dev") { print dev(@t)."\t\t"; }
        }
    print "\n";
  }

