#!/usr/cs/bin/perl
#
# File: cmp_ranks.pl
#
# Purpose: Compares ranks, generates MSE and P vs R output files
#
# Modifications:
#   23-JAN-1998  txe  Initial creation
#   24-JAN-1998  txe  Various fixes, added third (n=5,10) comparison
#   25-JAN-1998  txe  Got rid of scheme B, added normalized ranking
#                     Put scheme B back in
#   27-JAN-1998  txe  Added DEBUG files, i goes to $num_coll_ids
#   18-FEB-1998  txe  Uses new subroutines in ranks.pm
#   19-FEB-1998  txe  Small mods
#   20-FEB-1998  txe  Added wild cards to .cmp file
#   06-MAR-1998  txe  Use command line instead of .cmp file
#   13-MAY-1998  txe  Added calls to ReadMaskFile(), MaskedOut()
#

  require "subs/ir_subs.pm";
  require "subs/merits.pm";
  require "subs/ranks.pm";

  $data_dir  = GetArgument (0, "data directory", "test");
  $base_name = GetArgument (1, "baseline", "opt");
  $est_name  = GetArgument (2, "estimate", "sum.0");

  $base_file = "$data_dir/ranks/$base_name.ranks";
  $est_file  = "$data_dir/ranks/$est_name.ranks";

  $base_name =~ s/\.//g;
  $est_name  =~ s/\.//g;

  $mask_file = "$data_dir/mask.txt";
  $out_file  = "$data_dir/cmp/$base_name.$est_name";

  &ReadMaskFile ($mask_file);
  &CompareRanks ($base_file, $est_file, $out_file);
  print "Done.\n";

##############################################################################

sub CompareRanks {
  local ($in_file1, $in_file2, $out_file) = @_;

  print "Comparing ranks in baseline '$in_file1' and estimate '$in_file2'...\n";

  $mse_file = "$out_file.mse";
  $rap_file = "$out_file.rap";
  $bug_file = "$out_file.bug";

  unlink ($mse_file);
  unlink ($rap_file);
  unlink ($bug_file);

  &ReadRanks ($in_file1);
  %ords1   = %ords;
  %ranks1  = %ranks;
  %merits1 = %merits;

  &ReadRanks ($in_file2);
  %ords2   = %ords;
  %ranks2  = %ranks;
  %merits2 = %merits;

  @query_ids = GetSortedQueryIds (%merits1);
  @coll_ids  = GetSortedCollIds  (%merits1);

  $num_coll_ids = @coll_ids;

  foreach (@query_ids) {
    $query_id = $_;

    @valid_cids = ();

    for ($i = 0, $j = 0; $i < $num_coll_ids; $i++) {
      if (!MaskedOut ($query_id, $coll_ids[$i])) {
        $valid_cids[$j] = $coll_ids[$i];
        $j++;
      }
    }

    $num_valid_coll_ids = @valid_cids;

    @coll_ids1 = sort { $ords1{$query_id,$a} <=> $ords1{$query_id,$b} } (@valid_cids);
    @coll_ids2 = sort { $ords2{$query_id,$a} <=> $ords2{$query_id,$b} } (@valid_cids);

    &GetNandTotal   ($query_id);
    &CalculateMSE   ($query_id, $mse_file);
    &CalculateRandP ($query_id, $rap_file, $bug_file);
  }
}

##############################################################################

sub CalculateMSE {
  local ($query_id, $out_file) = @_;

#  print "    Writing MSE to '$out_file'...\n";

  $ord_total = $rank_total = 0;

  for ($i = 0; $i < $num_valid_coll_ids; $i++) {
    $coll_id = $coll_ids1[$i];
    $diff = $ords1{$query_id,$coll_id} - $ords2{$query_id,$coll_id};
    $ord_total += ($diff * $diff);
    $diff = $ranks1{$query_id,$coll_id} - $ranks2{$query_id,$coll_id};
    $rank_total += ($diff * $diff);
  }
  $ord_mse  = SmartDiv ($ord_total,  $num_valid_coll_ids);
  $rank_mse = SmartDiv ($rank_total, $num_valid_coll_ids);

  open (OUT_FP, ">>$out_file") || die ("Error opening out_file '$out_file'\n");
  printf OUT_FP "%s\t%d\t%20.4f %20.4f\n", $query_id, $n, $ord_mse, $rank_mse;
  close (OUT_FP);
}

##############################################################################

sub GetNandTotal {
  local ($query_id) = $_;
  $total = 0;
  for ($n = 0; $n < $num_valid_coll_ids && $merits1{$query_id,$coll_ids1[$n]} > 0; $n++) {
    $total += $merits1{$query_id,$coll_ids1[$n]};
  }
}

##############################################################################

sub CalculateRandP {
  local ($query_id, $out_file, $bug_file) = @_;

#  print "    Writing Rs and Ps to '$out_file'...\n";

  $tot1 = $tot2 = $n2 = $r2 = 0;
  $p1  = 1;

  open (OUT_FP, ">>$out_file") || die ("Error opening out_file '$out_file'\n");
  open (BUG_FP, ">>$bug_file") || die ("Error opening bug_file '$bug_file'\n");

  for ($i = 1; $i <= $num_valid_coll_ids; $i++) {
    $coll_id1 = $coll_ids1[$i-1];
    $coll_id2 = $coll_ids2[$i-1];

    $merit1 = $merits1{$query_id,$coll_id1};
    $merit2 = $merits1{$query_id,$coll_id2};
    
    if ($merits2{$query_id,$coll_id2} > 0 && $merit2 > 0) {
      $n2++;
    }
    else {
      $merit2 = 0;
    }

    $tot1 += $merit1;    
    $tot2 += $merit2;

    $r1  = SmartDiv ($tot1, $total);
    
    if ($r2 < 1) {
      $r2  = SmartDiv ($tot2, $total);
      $p2  = $n2 / $i;
      $r2b = SmartDiv ($tot2, $tot1);
      if ($i == $n) {
        $c = "N";
      }
      else {
        $c = "-";
      }
    }
    else {
      $c = "X";
    }

    printf OUT_FP "%s\t%d\t%9.7f %9.7f %9.7f %9.7f %9.7f %s\n", 
	$query_id, $i, $r1, $p1, $r2, $p2, $r2b, $c;

    printf BUG_FP "%3d %3d %12s %12.4f %9.7f %9.7f %3d %12s %12.4f %9.7f %9.7f %9.7f %s\n", 
	$query_id, $i, $coll_id1, $merit1, $r1, $p1,
                  $n2, $coll_id2, $merit2, $r2, $p2, $r2b, $c;
  }
  close (OUT_FP);
  close (BUG_FP);
}

##############################################################################

