#!/usr/cs/bin/perl
#
# File: problems.pl
#
# Purpose: Lists problematic query_ids and coll_ids (gotten from
#          a diff on A.noord and B.noord
#
# Modifications:
#   03-APR-1998  txe  Initial creation
#

  require "subs/ir_subs.pm";

  $data_dir  = GetArgument (0, "data directory",  "test");
  $diff_file = GetArgument (1, "diff file",       "diffs");

  $in_file  = "$data_dir/viles/$diff_file";
  $out_file = "$data_dir/viles/$diff_file.summary";

  &SummarizeProblems ($in_file, $out_file);
  print "Done.\n";

##############################################################################

sub SummarizeProblems {
  local ($in_file, $out_file) = @_;

  print "Writing summary of diffs in '$in_file' to '$out_file'...\n";

  %unsorted_query_ids = ();
  %unsorted_coll_ids  = ();
  %unsorted_merits    = ();
  %query_counts       = ();
  %coll_counts        = ();
  %merit_counts       = ();
  %total_counts       = ();

  open (IN, "$in_file")   || die ("Error opening in_file '$in_file'\n");
  while ($line = <IN>) {
    ($symbol, $query_id, $coll_id, $merit) = split (/\s+/, $line);

    if ($symbol eq "<" || $symbol eq ">") {
      if ($query_counts {$symbol, $query_id} == "") {
        $query_counts   {$symbol, $query_id} = 0;
      }
      if ($coll_counts  {$symbol, $coll_id}  == "") {
        $coll_counts    {$symbol, $coll_id}  = 0;
      }
      if ($merit_counts {$symbol, $merit}    == "") {
        $merit_counts   {$symbol, $merit}    = 0;
      }
      if ($total_counts {$symbol}            == "") {
        $total_counts   {$symbol}            = 0;
      }

      $query_counts {$symbol, $query_id} ++;
      $coll_counts  {$symbol, $coll_id}  ++;
      $merit_counts {$symbol, $merit}    ++;
      $total_counts {$symbol}            ++;      

      $unsorted_query_ids {$query_id} = 1;
      $unsorted_coll_ids  {$coll_id}  = 1;
      $unsorted_merits    {$merit}    = 1;
    }
  }
  close (IN);

  @query_ids = sort { $a <=> $b } (keys (%unsorted_query_ids));
  @coll_ids  = sort { $a cmp $b } (keys (%unsorted_coll_ids));
  @merits    = sort { $a <=> $b } (keys (%unsorted_merits));

  open (OUT, ">$out_file") || die ("Error opening out_file '$out_file'\n");

 ###############################################

  print OUT "Total number of differences:\n";
  $a = $total_counts {"<"};
  $b = $total_counts {">"};
  print OUT   "\n    cv < $a\n    te > $b"; 

 ###############################################

  print OUT "\n\nQueries:";
  foreach (@query_ids) {
    printf OUT " %3d", $_;
  }
  print OUT   "\n--------";
  foreach (@merits) {
    print OUT "+---";
  }
  print OUT   "\n    cv <";
  foreach (@query_ids) {
    $a = $query_counts {"<", $_};
    printf OUT " %3d", $a;
  }
  print OUT   "\n    te >";
  foreach (@query_ids) {
    $b = $query_counts {">", $_};
    printf OUT " %3d", $b;
  }

 ###############################################

  print OUT "\n\nColls:";
  foreach (@coll_ids) {
    printf OUT " %10s", $_;
  }
  print OUT   "\n--------";
  foreach (@merits) {
    print OUT "+----------";
  }
  print OUT   "\n    cv <";
  foreach (@coll_ids) {
    $a = $coll_counts {"<", $_};
    printf OUT " %10d", $a;
  }
  print OUT   "\n    te >";
  foreach (@coll_ids) {
    $b = $coll_counts {">", $_};
    printf OUT " %10d", $b;
  }

 ###############################################

  print OUT "\n\nMerits:";
  foreach (@merits) {
    printf OUT " %3d", $_;
  }
  print OUT   "\n--------";
  foreach (@merits) {
    print OUT "+---";
  }
  print OUT   "\n    cv <";
  foreach (@merits) {
    $a = $merit_counts {"<", $_};
    printf OUT " %3d", $a;
  }
  print OUT   "\n    te >";
  foreach (@merits) {
    $b = $merit_counts {">", $_};
    printf OUT " %3d", $b;
  }

 ###############################################

  print OUT "\n\n\t(The numbers in the parentheses are the number of times\n";
  print OUT "\t the term appears after a '<', then the number of times it\n";
  print OUT "\t appears after a '>'.  These roughly reflect the number of\n";
  print OUT "\t differences associated with these items.)\n";

  close (OUT);
}

############################################################################

