#!/usr/cs/bin/perl -w
#
# File: evaluate.pl  (version 2)
#
# Purpose: Reads rank comparisons for baseline_vs_estimate (one file) and
#          baseline_versus_random (contains avg and std dev).  It then
#          calculates the statistical significance of the estimate's
#          scores against the baseline.
#
# Modifications:
#   18-FEB-1998  txe  Initial creation
#   19-FEB-1998  txe  Additional mods
#   20-FEB-1998  txe  Added standard deviation, totals
#   23-FEB-1998  txe  Minor output-related changes
#   26-FEB-1998  txe  Using new random files instead of N random files
#   27-FEB-1998  txe  Continued overhaul to .cmp instead of .mse/.rap
#   02-MAR-1998  txe  Added R&P for i=5, i=10
#   06-MAR-1998  txe  Additional debugging
#   11-MAY-1998  txe  Added column labels
#

  require "subs/ir_subs.pm";
  require "subs/merits.pm";

  $data_dir  = GetArgument (0, "data directory", "test");
  $base_name = GetArgument (1, "baseline",       "opt");
  $rand_name = GetArgument (2, "random",         "pure");
  $est_name  = GetArgument (3, "estimate",       "sum.0");

  $base_name =~ s/\.//g;
  $rand_name =~ s/\.//g;
  $est_name  =~ s/\.//g;

  $ecmp_file = "$data_dir/cmp/$base_name.$est_name";
  $rcmp_file = "$data_dir/cmp/$base_name.$rand_name.cmp";
  $out_file  = "$data_dir/cmp/$base_name.$rand_name.$est_name.eval";

  &EvaluateEstimate ($ecmp_file, $rcmp_file, $out_file);
  print "Done.\n";

##############################################################################

sub EvaluateEstimate {
  local ($ecmp_file, $rcmp_file, $out_file) = @_;
  &ReadEstComparison  ($ecmp_file);
  &ReadRandComparison ($rcmp_file);
  &Evaluate ($out_file);
}

##############################################################################

sub ReadEstComparison {
  local ($ecmp_file) = @_;

  %ord_mses  = ();
  %rank_mses = ();
  %r_5       = ();
  %p_5       = ();
  %r_10      = ();
  %p_10      = ();
  %r_n       = ();
  %p_n       = ();
  %unsorted_query_ids = ();

  $mse_file = "$ecmp_file.mse";
  $rap_file = "$ecmp_file.rap";

  print "Reading MSEs from '$mse_file'...\n";

  open (FP, $mse_file) || die ("Error opening mse_file '$mse_file'\n");
  while ($line = <FP>) {
    ($query_id, $nstar, $ord_mse, $rank_mse) = split (/\s+/, $line);
    $ord_mses           { $query_id, "est" } = $ord_mse;
    $rank_mses          { $query_id, "est" } = $rank_mse;
    $unsorted_query_ids { $query_id } = 1;
  }
  close (FP);

  print "Reading R&Ps from '$rap_file'...\n";

  open (FP, $rap_file) || die ("Error opening rap_file '$rap_file'\n");
  while ($line = <FP>) {
    ($query_id, $i, $r1, $p1, $r2, $p2, $r2b, $c) = split (/\s+/, $line);
    if ($i == 5) { 
      $r_5  { $query_id, "est" } = $r2;
      $p_5  { $query_id, "est" } = $p2;
    }
    if ($i == 10) { 
      $r_10 { $query_id, "est" } = $r2;
      $p_10 { $query_id, "est" } = $p2;
    }
    if ($c eq "N") { 
      $r_n  { $query_id, "est" } = $r2;
      $p_n  { $query_id, "est" } = $p2;
      if ($i <= 5) {
        $r_5  { $query_id, "est" } = $r2;
        $p_5  { $query_id, "est" } = $p2;
      }    
      if ($i <= 10) {
        $r_10 { $query_id, "est" } = $r2;
        $p_10 { $query_id, "est" } = $p2;
      }    
    }
    $unsorted_query_ids { $query_id } = 1;
  }
  close (FP);

  @query_ids = sort (keys %unsorted_query_ids);  
}

##############################################################################

sub ReadRandComparison {
  local ($rcmp_file) = @_;

  %nstars    = ();

  print "Reading averages and std deviations from '$rcmp_file'...\n";

  open (FP, $rcmp_file) || die ("Error opening rcmp_file '$rcmp_file'\n");
  while ($line = <FP>) {
    ($query_id, $nstar, $ord_avg, $ord_std, $rank_avg, $rank_std,
        $r_5_avg,  $r_5_std,  $p_5_avg,  $p_5_std,
        $r_10_avg, $r_10_std, $p_10_avg, $p_10_std,
        $r_n_avg,  $r_n_std,  $p_n_avg,  $p_n_std) = split (/\s+/, $line);

    $nstars    { $query_id } = $nstar;
    $ord_mses  { $query_id, "avg" } = $ord_avg;
    $ord_mses  { $query_id, "std" } = $ord_std;
    $rank_mses { $query_id, "avg" } = $rank_avg;
    $rank_mses { $query_id, "std" } = $rank_std;
    $r_5       { $query_id, "avg" } = $r_5_avg;
    $r_5       { $query_id, "std" } = $r_5_std;
    $p_5       { $query_id, "avg" } = $p_5_avg;
    $p_5       { $query_id, "std" } = $p_5_std;
    $r_10      { $query_id, "avg" } = $r_10_avg;
    $r_10      { $query_id, "std" } = $r_10_std;
    $p_10      { $query_id, "avg" } = $p_10_avg;
    $p_10      { $query_id, "std" } = $p_10_std;
    $r_n       { $query_id, "avg" } = $r_n_avg;
    $r_n       { $query_id, "std" } = $r_n_std;
    $p_n       { $query_id, "avg" } = $p_n_avg;
    $p_n       { $query_id, "std" } = $p_n_std;
  }
  close (FP);
}

##############################################################################

sub Evaluate {
  local ($out_file) = @_;

  print "Evaluating estimate, writing results to '$out_file'...\n";

  $ord_total = $rank_total = $n_total = $n_valid = $r_5_total = $p_5_total
             = $r_10_total = $p_10_total = $r_n_total = $p_n_total 
             = $ord_count = $rank_count = $r_5_count = $p_5_count
             = $r_10_count = $p_10_count = $r_n_count = $p_n_count = 0;

  open (OUT, ">$out_file") || die ("Error opening out_file '$out_file'\n");

  printf OUT "%3s %6s  %6s %6s %6s %6s %6s %6s %6s %6s  %8s\n",
    "Que",    "  N*  ", "  Ord ", " Rank ", " R(5) ", " P(5) ",
    " R(10)", " P(10)", " R(N*)", " P(N*)", "Avg Qual";

  printf OUT "%3s %6s  %6s %6s %6s %6s %6s %6s %6s %6s  %8s\n",
    "---",    "------", "------", "------", "------", "------",
    "------", "------", "------", "------", "--------";

  foreach (@query_ids) {
    $query_id = $_;
    print "  Calculating qualities for query $query_id\n";

    $ord_qual  = 0 - GetQuality ($query_id, %ord_mses);
    $rank_qual = 0 - GetQuality ($query_id, %rank_mses);
    $r_5_qual  = 0 + GetQuality ($query_id, %r_5);
    $p_5_qual  = 0 + GetQuality ($query_id, %p_5);
    $r_10_qual = 0 + GetQuality ($query_id, %r_10);
    $p_10_qual = 0 + GetQuality ($query_id, %p_10);
    $r_n_qual  = 0 + GetQuality ($query_id, %r_n);
    $p_n_qual  = 0 + GetQuality ($query_id, %p_n);

    $avg_qual = ($ord_qual + $rank_qual + $r_5_qual + $p_5_qual
                 + $r_10_qual + $p_10_qual + $r_n_qual + $p_n_qual) / 8;

    if ($ord_qual  != -99 && $ord_qual  != 99) {
      $ord_total += $ord_qual;
      $ord_count++;
    }
    if ($rank_qual != -99 && $rank_qual != 99) {
      $rank_total += $rank_qual;
      $rank_count++;
    }
    if ($r_5_qual  != -99 && $r_5_qual  != 99) {
      $r_5_total += $r_5_qual;
      $r_5_count++;
    }
    if ($p_5_qual  != -99 && $p_5_qual  != 99) {
      $p_5_total += $p_5_qual;
      $p_5_count++;
    }
    if ($r_10_qual  != -99 && $r_10_qual  != 99) {
      $r_10_total += $r_10_qual;
      $r_10_count++;
    }
    if ($p_10_qual  != -99 && $p_10_qual  != 99) {
      $p_10_total += $p_10_qual;
      $p_10_count++;
    }
    if ($r_n_qual  != -99 && $r_n_qual  != 99) {
      $r_n_total += $r_n_qual;
      $r_n_count++;
    }
    if ($p_n_qual  != -99 && $p_n_qual  != 99) {
      $p_n_total += $p_n_qual;
      $p_n_count++;
    }
    else {
      $avg_qual = ($avg_qual > 0) * 198 - 99;
    }

    $n_total += $nstars {$query_id};
  
    printf OUT "%-3d %6d  %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f   %6.2f\n",
       $query_id, 
       $nstars{$query_id}, $ord_qual, $rank_qual, $r_5_qual, $p_5_qual, 
       $r_10_qual, $p_10_qual, $r_n_qual, $p_n_qual, $avg_qual;
  }

  $num_query_ids = @query_ids;

  $ord_avg  = SmartDiv ($ord_total,  $ord_count);
  $rank_avg = SmartDiv ($rank_total, $rank_count);
  $n_avg    = SmartDiv ($n_total,    $num_query_ids);
  $r_5_avg  = SmartDiv ($r_5_total,  $r_5_count);
  $p_5_avg  = SmartDiv ($p_5_total,  $p_5_count);
  $r_10_avg = SmartDiv ($r_10_total, $r_10_count);
  $p_10_avg = SmartDiv ($p_10_total, $p_10_count);
  $r_n_avg  = SmartDiv ($r_n_total,  $r_n_count);
  $p_n_avg  = SmartDiv ($p_n_total,  $p_n_count);

  $avg_avg = ($ord_avg + $rank_avg + $r_5_avg + $p_5_avg
              + $r_10_avg + $p_10_avg + $r_n_avg + $p_n_avg) / 8;

  printf OUT "AVG %6.1f  %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f %6.2f   %6.2f\n",
       $n_avg, $ord_avg, $rank_avg, $r_5_avg, $p_5_avg,
       $r_10_avg, $p_10_avg, $r_n_avg, $p_n_avg, $avg_avg;

  close (OUT);
}

#########################################################################

sub GetQuality {
  local ($query_id, %scores) = @_;

  $est = $scores { $query_id, "est" };
  $avg = $scores { $query_id, "avg" };
  $std = $scores { $query_id, "std" };

  if ($std == 0) {
    $quality = ($est > $avg) * 198 - 99;    # force +/- 99 for special case
  }
  else {
    $quality = ($est - $avg) / $std;
  }

#printf "$query_id [%6.2f, %6.2f, %6.2f -> %6.2f]\n", $avg, $std, $est, $quality;
  $quality;
}

##############################################################################

