#! /usr/bin/perl -w
# XcompF [-r | -s] <left> <right> <count> [-G]
#   -- a program to compare and plot the measured and calculated functions
#         for a component-based system.
#
#  Assumes that the SYN script has been run in the current directory without
#   the -X option, so that SystemCode is an executable file, and "system.scf"
#   has the name of the calculated file in its first line.
#   These are the functions compared.
#
#  If any of the non-switch parameters are present, all must be.  Otherwise the default
#    is the interval defined by the calculated file and a count of about 
#    three times the number of subdomains it has.
#    The -r option causes the sampling to be random
#    and default 3 times as frequent as the count described above.
#
#  The normal sampling is equispaced across the interval; with the -s option
#    the subdomains within the interval are instead sampled the number of
#    times in the system.scf file (which means that -s overrides <count>).
#
# -G causes the graph to be displayed on the screen instead of going to a
# PostScript file.

use samplingf;
#argument processing; it's a mess
$DISPoption = 0;
$useRand = 0;
$useSubd = 0;
if (defined($ARGV[0])) { 
  if ($ARGV[0] eq "-r") {
    $useRand = 1;
  }
  if ($ARGV[0] eq "-s") {
    $useSubd = 1;
  }
}
$maybeParm = $useRand + $useSubd;  #not quite OK, like most switch processing
$lastarg = @ARGV - 1;
if (defined($ARGV[$lastarg]) && $ARGV[$lastarg] eq "-G") { 
  $DISPoption = 1;
  $lastarg--;
}
if ($maybeParm  <= $lastarg) { #possibly some parameters
  unless ($maybeParm+2 <= $lastarg) { die "Parameters: <left> <right> <count> must all be given";}
  $left = $ARGV[0+$maybeParm];
  $right = $ARGV[1+$maybeParm];
  $count = $ARGV[2+$maybeParm];
}
#debug
#print "PARAM:  $left,$right,$count\n";

# Get the input range from the theory.ccf file
# and allow override from command line
open(ST,"system.scf") or die "must have a system.scf file";
chop($lastheory = <ST>);
if ($useSubd) { #get samples/subd from first line of system.scf (fix later?)
  $firstline = <ST>;
  @firstsub = split(" ",$firstline);
  $sampersub = $firstsub[2];
}
close(ST); #leave open if to be read later; see $subhits below
open(TCCF,$lastheory) or die "A calculated ", $lastheory, " file must be present";
chop($line = <TCCF>);
@codes = split(" ",$line);
if ($line !~ /^theory/) {die "doesn't seem to be a theory file";}
#debug
#print "$line \n";
$Rely = 0;
if (defined($codes[3]) && $codes[2] eq "r") { #reliability file
  $Rely = $codes[3]; #confidence %
}

open(B,">subd.plt"); #for marking boundaries
open(C,">pred.plt"); #calculation values
open(CR,">runpred.plt"); #calculated run times
$firstsubhere = -1;
$k = 0;
while ($line = <TCCF>) { #read and store boundaries, store & plot theory graph.
  @subd = split(" ",$line);
  $LB[$k] = $subd[0];
  $UB[$k] = $subd[1];
  if (!defined($right) || ($subd[1] >= $left && $subd[0] < $right)) {
    if ($firstsubhere < 0) { #record first 
      $firstsubhere = $k;
    }
    #debug
    #print "$k   ";
    print B "$subd[0] 0\n";
    $lastsubhere = $k;
    $slope[$k] = $subd[2];
    $intcpt[$k] = $subd[3];
    $v1 = $slope[$k]*$LB[$k] + $intcpt[$k];
    $v2 = $slope[$k]*$UB[$k] + $intcpt[$k];
    $runslope[$k] = $subd[4];
    $runintcpt[$k] = $subd[5];
    $v3 = $runslope[$k]*$LB[$k] + $runintcpt[$k];
    $v4 = $runslope[$k]*$UB[$k] + $runintcpt[$k];
    print C "$LB[$k] $v1\n";
    print C "$UB[$k] $v2\n";
    print CR "$LB[$k] $v3\n";
    print CR "$UB[$k] $v4\n";
    $verr[$k] = 0;
    $rerr[$k] = 0;
    $diverge[$k] = "";
  }
  $subhits[$k] = 0;
  $k++;
}
$numSubd = $k;
$numhere = $lastsubhere - $firstsubhere + 1;
close(C);
close(CR);
if (!defined($right) || ($UB[$lastsubhere] <= $right)) {
print B "$UB[$lastsubhere] 0\n";
}
close(B);
unless (defined($right)) { #no parameters; past here cannot test $right to see if params
  $left = $LB[0];
  $right = $UB[$numSubd-1];
  $count = 3*$numhere;
  if ($useRand) {  #increase default count
    $count *= 3;
  }
}
$rs = "";
if ($useRand) {
  $rs = " (random)";
}
$princt = $count;
if ($useSubd) {
  $rs = $rs." (by subdomain)";
  $princt = $numhere*$sampersub;
}
printf "Comparing on [%.2f, %.2f) for %d points %s\n",$left,$right,$princt,$rs;

unless (-x "SystemCode") {die "No code `SystemCode' to measure";}
unlink "trace_path"; unlink "tmp_hash"; #for trace option
open(M,">meas.plt");
open(MR,">runmeas.plt");
$sed = "sed 's/^/out:  /'";  # command to reproduce stdin to stdout with a leading "out:  "
if ($useRand) {
  $input = $left + rand($right-$left);
}
else {
  &samplingf::init($left,$right,$count);
  $input = &samplingf::next;
}

$vsqrs = 0;
$rsqrs = 0;
SUB:
for ($h=$firstsubhere; $h<=$lastsubhere; $h++) { #for all subdomains to be considered here 
  #must jump out immediately if not -s
  #set up samples/subdomain
  if ($useSubd) { #override the global sampling
    #debug
    #print "sub: $h; $LB[$h] $UB[$h] $sampersub\n"; if ($LB[$h] >= $UB[$h]) { next SUB; }
    &samplingf::init($LB[$h],$UB[$h],$sampersub);
    $input = &samplingf::next;
    $verr[$h] = 0;
    $rerr[$h] = 0;
    $count = $sampersub*$numhere;
  }
$K = $count;
while ($input ne "." && $K-- > 0) {
  $thdiverge = 0;
  $codiverge = 0;  #assume a functional value for both
  #find the subdomain it's in 
  if ($useSubd) { #easy! 
    $k = $h;
  } else { #(should be binary search or remember where we are!)
    ITER: for ($k=$firstsubhere; $k <= $lastsubhere; $k++) {
      if ($input >= $LB[$k] && $input < $UB[$k]) {
        last ITER;
      }
    }
  }
  if ($k > $lastsubhere) { #subdomain not found
    warn "tried to sample at ",$input,"-- in no theory subdomain";
  } else {
    if ($runslope[$k] == 0 && $runintcpt[$k] < 0) { #theory undefined
      $thdiverge = 1;
      $diverge[$k] = "theory^ ";
    }
  }

  $cmd = "(echo ".$input."| ./SystemCode |".$sed.") 2>&1|" ;
  open(PM, $cmd);
  $rmeas = 0;
  PLP:
  while ($line = <PM>) {  #not eof, all output from this system on this input
    # results all come out together, but stdout is preceeded by "out:  "
    @outstrm = split(' ',$line);
    #debug
    #print "$line \n";
    if ($outstrm[0] =~ /out:/) {
      unless (defined($outstrm[1])) { #code gave no output
        $codiverge = 1;
        $meas = -1;
        $rmeas = -1; #phony values
        if ($k <= $lastsubhere) { #a subdomain was found
          $diverge[$k] .= "code^ ";
        }
      } else {
        $meas = $outstrm[1];
      }
    }
    else {
      if ($outstrm[0] =~ /LOOPING/) {
        die "System may be looping on input ", $input, "; stopped";
      }
      unless ($outstrm[0] =~ /\d/ ) {#no digits, usually means error for code
        #debug
        #print "Code stderr:  $line\n";
        $codiverge = 1;
        $meas = -1;
        $rmeas = -1; #phony values
        if ($k <= $lastsubhere) { #a subdomain was found
          $diverge[$k] .= "code^ ";
        }
        last PLP;
      } else {
        $rmeas += $outstrm[0];
      }
    }
  }
  close(PM);
    print M "$input $meas \n";
    print MR "$input $rmeas \n";
  #compare with theory
  if ($k <= $lastsubhere) {
    #debug
    #print "$k	";
    $v = $slope[$k]*$input + $intcpt[$k];
    $r = $runslope[$k]*$input + $runintcpt[$k];
    $vsqrs += $meas*$meas;
    $rsqrs += $rmeas*$rmeas;
    $subhits[$k]++;
    $verr[$k] += ($meas - $v)**2;
    #debug
    #if ($verr[$k] > .1) {print "$k [$LB[$k], $UB[$k]) error $input @ $slope[$k]-$intcpt[$k]\n";}
    if ($Rely) {
      $rerr[$k] = $r;
    } else {
      $rerr[$k] += ($rmeas - $r)**2;
    }
  }
  if ($useRand) {
    $input = $left + rand($right-$left);
  }
  else {
    $input = &samplingf::next;
  }
} #end sampling loop
unless ($useSubd) {
  last SUB; #must exit after one "subdomain" iteration
}
} #end loop over subdomains
close(M);
close(MR);
$vrms = sqrt($vsqrs/$count);
$rrms = sqrt($rsqrs/$count);

if ($Rely) { #reliability
  print "normalized rms error    $Rely-% confidence\n";
  print "{count}	interval     func            reliability\n";
} else { #run time
  print "normalized rms errors\n";
  print "{count}	interval	func	run\n";
}

$wverr = 0;
$wrerr = 0;
$totalen = 0;
if ($Rely) {
  $fform = "%5.4f";
} else {
  $fform = "%3.2f";
}
for ($k=$firstsubhere; $k <= $lastsubhere; $k++) {
  if ($diverge[$k] ne "") {
    printf "{%d}	[%.2f, %.2f)    %s \n", $subhits[$k], $LB[$k], $UB[$k], $diverge[$k];
  } else {
    if ($subhits[$k] > 0) {
      $verr[$k] = 100*sqrt($verr[$k]/$subhits[$k])/(abs($vrms)+.0000001);
      unless ($Rely) { #don't fool with reliability value
        $rerr[$k] = 100*sqrt($rerr[$k]/$subhits[$k])/(abs($rrms)+.0000001);
      }
      $thislen = $UB[$k] - $LB[$k];
      $wverr += $thislen*$verr[$k];
      $wrerr += $thislen*$rerr[$k];
      $totalen += $thislen;
      printf "{%d}	[%.2f, %.2f)	%2.2f%%	$fform%% \n", $subhits[$k],$LB[$k], $UB[$k], $verr[$k], $rerr[$k];
    }
  }
} #end loop over subdomains
printf "Weighted average:	%2.2f%%	$fform%% \n", $wverr/$totalen, $wrerr/$totalen;

if ($useRand) { #measured files out of order
  `mv runmeas.plt /tmp/t`;
  `sort -g /tmp/t >runmeas.plt`;
  `mv meas.plt /tmp/t`;
  `sort -g /tmp/t >meas.plt`;
}

open(PF,">complot");
  print PF "set terminal postscript color\n";
  print PF qq(set output "plot.ps"\n);
  print PF qq(plot "meas.plt", "runmeas.plt", "subd.plt" with points pt 2 pointsize 3, "pred.plt" with lines, "runpred.plt" with lines\n);
  close(PF);
system("gnuplot complot"); #plot measured run time even if reliability case

if ($DISPoption) {
  open(PF,">complot");
  print PF "set terminal x11\n";
  print PF "set nokey\n";
  print PF qq(set nox2tics\n);
  print PF qq(set border 11\n);
  print PF qq(set xlabel "Input showing subdomains" font "Helvetica,16"\n);
  print PF qq(set ylabel "Functional output values" font "Helvetica,16"\n);
  print PF qq(set xtics nomirror font "Helvetica,14"\n);
  print PF qq(set ytics nomirror font "Helvetica,14"\n);
  print PF qq(set y2tics font "Helvetica,14"\n);
  if ($Rely) {
    print PF qq(set y2label "$Rely%-confidence reliability bound" font "Helvetica,16"\n);
    print PF qq(plot "runpred.plt" axes x1y2 with lines lt 3,"subd.plt" axes x1y1 with points pointsize 3, "pred.plt" axes x1y1 with lines lt -1, "meas.plt" axes x1y1 with lines lt 4\n);
  } else {
    print PF qq(set y2label "Run time values" font "Helvitica,16"\n);
    print PF qq(plot "runpred.plt" axes x1y2 with lines lt 3,"subd.plt" axes x1y1 with points pointsize 3, "pred.plt" axes x1y1 with lines lt -1, "meas.plt" axes x1y1 with lines lt 4, "runmeas.plt" axes x1y2 with lines lt 5\n);
  }
  close(PF);
}
system("gnuplot -persist complot");
