#!/usr/bin/perl -w

#  COMPF script (measure component outputs and run times)
#  Use .pscf file to process all components and create .ccft files
#  that contain their specs
#  graphs the run time and output with single component (-S) option

use samplingf;
use dec15;
$eps = &dec15::geteps();

$Moption = 1; #  assume measuring all components
$Goption = 0; # graph the result (if also Soption)
$Soption = 0; # single .ccf file follows; do just that one
$Coption = 0;
$Loption = 1; #  assume linear
$Boption = 1; # use best-fit for lines (default); maybe fit endpoints later
$LCoption= 0; # linear for function, step for runtime
$CLoption= 0; # step for function approximation peice-wise linear for runtime
$Voption = 0; # print too many messages
$Foption = 0; # confidence percentage follows; do reliability in place of run time

OPT: for ($i = 0;;$i++) { #over all options
  unless (defined($ARGV[$i])) {last OPT;}
  if ($ARGV[$i] eq "-C") {
    $Coption = 1; $Loption = 0;
    $LCoption= 0; $CLoption= 0;
    $currentflag = "C";
  }
  if ($ARGV[$i] eq "-L") {
    $Coption = 0; $Loption = 1;
    $LCoption= 0; $CLoption= 0;
    $currentflag = "L";
    $Boption = 1; #assume best fit; maybe do endpoints later
  }
  if ($ARGV[$i] eq "-CL") {
    $Coption = 0; $Loption = 0;
    $LCoption= 0; $CLoption= 1;
    $currentflag = "CL";
  }
  if ($ARGV[$i] eq "-LC") {
    $Coption = 0; $Loption = 0;
    $LCoption= 1; $CLoption= 0;
    $currentflag = "LC";
  }
  if ($ARGV[$i] eq "-B") {
    $Boption = 1;
  }
  if ($ARGV[$i] eq "-V") {
    $Voption = 1;
  }
  if ($ARGV[$i] eq "-M") {
    $Soption = 0; $Moption = 1;
  }
  if ($ARGV[$i] eq "-F") {
    $Foption = 1;
    unless(defined($ARGV[$i+1]) && $ARGV[$i+1] >0 && $ARGV[$i+1] < 100) {
    die "Reliability % impossible";
    }
    unless ($Coption || $LCoption) {
    $LCoption = 1;
    }
    $confidencepct = $ARGV[$i+1];
  }
  if ($ARGV[$i] eq "-S") {
    $Soption = 1; $Moption = 0;
    $Goption = 1;
    unless(defined($ARGV[$i+1]) && $ARGV[$i+1] =~ m/.ccf$/) {
      die "Error reading .ccf file\n";
  }
    @components = ($ARGV[$i+1]);
  }
}

$i = 0;
unless ($Soption) {
  #process system configuration file
  $sys_desc = "system.pscf" ;
  open(SYSTEM, $sys_desc ) || die "could not open the file ", $sys_desc ;
  $comp_name = <SYSTEM>; #discard polish line

  #store component .ccf names
  @components = ();
  while ($comp_name = <SYSTEM>) { #read to end
    chop($comp_name);
    $components[$i] = $comp_name;
    $i++
  }
  close SYSTEM ;
}

#debug
#foreach $comp (@components) {print $comp."\n"; }

########################################subroutine#######################
sub warnmess{ #print warning based on V option
  unless ($Voption && defined($_[0])) {return;}
  $mess = "";
  MLOOP: for ($wm=0;;$wm++) {
    unless (defined($_[$wm])) {last MLOOP;}
    $mess = $mess.$_[$wm];
  }
warn $mess, "\n";  #don't print line from which this came...
}
sub fitline {
# If -B option, best-fit line in the sense of least squares of differences
# Otherwise, fit line at interval ends (really the last and first points measured)
# parameters:  $_[0] - X-vector; $_[1] - Y-vector; $_[2] - X-ave; $_[3] - Y-ave; $_[4] - Count
my($xvec, $yvec, $xave, $yave, $N, $Slope, $Inter, $dev, $cross, $var, $i);
$xvec = $_[0];
$yvec = $_[1];
$xave = $_[2];
$yave = $_[3];
$N = $_[4];
if ($Boption) { #best-fit the line
  $cross = 0;
  $var = 0;
  for ($i=0; $i<$N; $i++) {
    $cross += @$xvec[$i] * @$yvec[$i];
    $var += @$xvec[$i] * @$xvec[$i];
  }
  if ($N == 1) {
    $Slope = 0;
  }
  else {
    $Slope = ($cross/$N - $yave*$xave)/($var/$N - $xave*$xave);
  }
  if (abs($cross/$N - $yave*$xave) <= $eps) {
  #debug
  #$a = $cross/$N;
  #$b = $yave*$xave;
  #$c = $var/$N;
  #$d = $xave*$xave;
  #print "$Slope $a - $b / $c - $d\n";
  #end debug
    $Slope = 0;
  }
  $Inter = $yave - $Slope*$xave;
} else { #not -B, fit just to first/last points; can't happen just now`
  $x0 = $sub_start;
  $y0 = $Lord;
  $deltaX = $sub_end - $x0;
  $deltaY = $Rord - $y0;
  if ($deltaY == 0 || $deltaX == 0) {
    $Slope = 0;
    $Inter = $yave;  #degenerate cases
  } else {
    $Slope = $deltaY/$deltaX;
    $Inter = $y0 - $Slope*$x0;
  }
} #end not -B
$dev = 0;
for ($i=0; $i<$N; $i++) {
  $dev += ($Slope*@$xvec[$i] + $Inter -@$yvec[$i])**2;
}
$dev = sqrt($dev/$N); #rms deviation from the line; normalize later
return $Slope, $Inter, $dev;
#  return (slope,intercept,deviation) of the fitted line
}
#########################################################################

sub printf15 { # print line from a ccf file (9 values)
  #first parameter is a file name to write to
  #needed to avoid loss of precision when a float is near an integer in Perl
  my($file, $t, $N);
  $file = shift; #file handle or 0 if none
  for ((1..2)) {
    $t = shift;
    printf $file "%s",&dec15::dump15s($t);
  }
  #$t = shift;
  #if ($t eq "L") {
  #  $N = 6;
  #}
  #else {
 $N = 6;
  #}
  #printf $file "%s ",$t;
  for ((1..$N)) {
    $t = shift;
    printf $file "%s",&dec15::dump15s($t);
  }
  printf $file "\n";
}
#########################################################################

#for each component process its file and execute test
CCFL: 
foreach $comp (@components) {
  open(COMP, $comp) || die "could not open the file ", $comp;
  # get the name of the executable
  chop($comp_name = <COMP>) ;
  if ($comp_name =~ /^theory/) { #not a real component, skip
    $ccftsExist = 0;
  }
  #real component, check
  #see if the executable is there
  die "component ", $comp_name, " can't be executed." unless (-x $comp_name);
  $comptheory = $comp."t";  #assuming name ends in .ccf
  $ccftsExist = 1; #assume there are already good .ccft files
  @statvec = stat($comp); #for ccf file
  $ccfileTime = $statvec[9];
  @statvec = stat($comp_name); #for executable file
  $binfileTime = $statvec[9];  
  if (-e $comptheory) {
    open (CCFT, $comptheory);
    $tmp = <CCFT>; #examine the code line
    @tmp = split(" ",$tmp);
    $ccftsExist = 0 unless (((defined($tmp[1]) && ($tmp[1] eq "L")) && $Loption) || (($tmp[1] eq "C") && $Coption) 
      || (($tmp[1] eq "CL") && $CLoption) || (($tmp[1] eq "LC") && $LCoption)); 
    if ($Foption) {
      $ccftsExist = 0 unless (defined($tmp[3]) && ($tmp[2] eq "r") && ($tmp[3] == $confidencepct));
    }
    @statvec = stat($comptheory); #for theory ccf file
    $ccftsExist = 0 unless ($statvec[9] > $ccfileTime && $statvec[9] > $binfileTime);
    close (CCFT);
  } else {
    $ccftsExist = 0;
  }
  close(COMP);
  if ($ccftsExist) { #there is an OK .ccft file
    warnmess $comptheory, " up-to-date...skipping this component..." ;
    next CCFL;
  }
  warnmess $comptheory, " not found or out-of-date...generating...";
  open(COMPTH, ">".$comptheory)  || die "could not create theory file ", $comptheory;
  $code = "theory ";
  if($Loption){
    $code = $code."L";
  }elsif($Coption){
    $code = $code."C";
  }elsif($LCoption){
    $code = $code."LC";
  }elsif($CLoption){
    $code = $code."CL";
  }
  if ($Foption) { #theory LC r confidence
    $code = "theory LC r ".$confidencepct; #mark that file is reliability with that %
    $LCoption = 1; #never mind what was said about LC
  }
  print COMPTH "$code \n";
  #read subdomain information
  open(COMP,$comp);
  $linearr = <COMP>;#eat the name
  while($linearr = <COMP>) {
    #read line from .ccf file
    @line = split(' ',$linearr);
    $sub_start = $line[0] ;
    $sub_end = $line[1] ;
    $test_cnt = $line[2] ;
    if ($Foption) { #reliability, fake subroutine size to 100 times
      $rely = exp(log(1-$confidencepct/100.0)/(100.0*$test_cnt));
    }
    &samplingf::init($sub_start, $sub_end, $test_cnt);
    $sed = "sed 's/^/out:  /'";  # command to reproduce stdin to stdout with a leading "out:  "
    #for !-B, get values at interval ends
    unless ($Boption) {
      $cmd = "(echo ".$sub_start."|./".$comp_name."|".$sed.") 2>&1|" ;
      open(PI,$cmd);
      $t = 0;
      while ($line = <PI>) { #read all stuff out of executable
        @outstrm = split(' ',$line);
        if ($outstrm[0] =~ /out:/) { #functional output, record
          $Ly = $outstrm[1];
        }
        else { #run time; record 
          $Lt = $outstrm[0];
        }
      } #end loop over one execution's output
      close(PI);
      $cmd = "(echo ".$sub_end."|./".$comp_name."|".$sed.") 2>&1|" ;
      open(PI,$cmd);
      $t = 0;
      while ($line = <PI>) { #read all stuff out of executable
        @outstrm = split(' ',$line);
        if ($outstrm[0] =~ /out:/) { #functional output, record
          $Ry = $outstrm[1];
        }
        else { #run time; record
          $Rt = $outstrm[0];
        }
      } #end loop over one execution's output
      close(PI);
    }
    $cnt = 0 ;
    $input = &samplingf::next ;
    #for each subdomain get equi-spaced input and call components with this
    @x_vals = ();  #inputs
    $x_bar = 0;  #average
    @y_vals = ();  #functional outputs
    $y_bar = 0;
    $y_sqrs = 0;
    @t_vals = ();  #run times
    $t_bar = 0;
    $t_sqrs = 0;
    while($input ne ".") {
      $x_vals[$cnt] = $input;
      $x_bar += $input;
      $cmd = "(echo ".$input."|./".$comp_name."|".$sed.") 2>&1|" ;
      open(PI,$cmd);
      $t = 0;
      while ($line = <PI>) { #read all stuff out of executable
        @outstrm = split(' ',$line);
        #The output stream is: 1) RT of component; 2) "out:  ".output of component
        # just to be safe, check for the "out" and don't use the order
        #debug
        #print "$outstrm[0] \n";
        #print "$outstrm[1] \n";
        if ($outstrm[0] =~ /out:/) { #functional output, record
          $y = $outstrm[1];
          $y_vals[$cnt] = $y;
          $y_bar += $y;
          $y_sqrs += $y*$y;
        }
        else { #run time; record 
          $t += $outstrm[0];
        }
      } #end loop over one execution's output
      $t_vals[$cnt] = $t;
      $t_bar += $t;
      $t_sqrs += $t*$t;
      close(PI);
      $input = &samplingf::next ;
      $cnt++ ;
    } #end loop over inputs
    $x_bar /= $cnt;
    $y_bar /= $cnt;
    $t_bar /= $cnt;
    #debug
    #print "$cnt $y_bar\n";
    if ($Loption || $LCoption) { #functional line
      $Lord = $Ly;
      $Rord = $Ry;
      ($V_slope, $V_inter, $V_dev) = fitline(\@x_vals, \@y_vals, $x_bar, $y_bar, $cnt);
    } else { #functional constant
      $V_slope = 0;
      $V_inter = $y_bar;
      $V_dev = sqrt(abs($y_sqrs - $cnt*$y_bar*$y_bar)/$cnt);
    }
    if ($Foption) { #reliability
      $R_slope = 0;
      $R_inter = $rely;
      $R_dev = $confidencepct;
    } else {
      if ($Loption || $CLoption) { #run line
        $Lord = $Lt;
        $Rord = $Rt;
        ($R_slope, $R_inter, $R_dev) = fitline(\@x_vals, \@t_vals, $x_bar, $t_bar, $cnt);
      } else { #run constant
        $R_slope = 0;
        $R_inter = $t_bar;
        $R_dev = sqrt(abs($t_sqrs - $cnt*$t_bar*$t_bar)/$cnt);
      }
    }
    printf15(COMPTH, $sub_start, $sub_end,$V_slope,$V_inter,$R_slope,$R_inter,$V_dev,$R_dev);
    # the _dev values are rms, not normalized    

#debug
#print "N: $cnt, xave: $x_bar, yave: $t_bar, stdev: $y_sdev\n";
    #debug
      #print "rms: $V_dev, Rrms: $R_dev";
    #print "\n";
    #end debug
  } #end loop over subdomains, one component
  #get the rms values of measurements

  close COMP;
} #end loop over components

#Create "ident" ccft file
  open (I, ">ident.ccft");
  if ($Foption) { #reliability
    print I "theory LC r 99\n";
    print I "-1000000 1000000 1 0 0 1\n";  #[-10^6,10^6), slope 1, intercept 0, reliability 1, no error
  } else { #run time
    print I "theory L \n";
    print I "-1000000 1000000 1 0 0 0\n";  #[-10^6,10^6), slope 1, intercept 0, 0 run time, no error
  }
  close(I);

# Display the graph of the component
if ($Soption && $Goption) {
   warnmess "Graphing component..."; 
   system "./XcuteF ".$components[0]." -G";
}
