#!/usr/cs/bin/perl 
#
# File: matrix.pl
#
# Purpose: Generates a tab-deliminated merit matrix (queries
#          versus collections) for the given merit file(s) 
#
# Modifications:
#   25-MAR-1998  txe  Initial version (derived from rank.pl)
#   01-APR-1998  txe  Added summary info section after main matrix,
#                     Added biased weights file creation
#

  require "subs/ir_subs.pm";
  require "subs/maps.pm";
  require "subs/merits.pm";
  require "subs/ranks.pm";

  $data_dir = GetArgument (0, "data directory",                        "test"); 
  $in_files = GetArgument (1, "merit files to matrix ('all' for all)", "sum.0"); 
  $start_id = GetArgument (2, "starting query id ('0' for all)",       "51");
  $end_id   = GetArgument (3, "ending query id ('0' for all)",         "150");

  if ($in_files eq "all") {
    $in_files = "*";
  }

  &BuildMatrix ($data_dir, $in_files, $start_id, $end_id);  
  print "Done.\n";

#######################################################################

sub BuildMatrix {
  local ($data_dir, $in_files, $start_id, $end_id) = @_;
  local ($qids_file, $cids_file, $mat_file, $wts_file);

  $qids_file = "$data_dir/query_ids.txt";
  $cids_file = "$data_dir/coll_ids.txt";
  
  while ($in_file = <$data_dir/merit/$in_files>) {
    $mat_file = "$in_file.mat";
    $mat_file =~ s/merit/matrix/;
    $wts_file = "$in_file.wts";
    $wts_file =~ s/merit/ranks/;

    print "Building matrix for '$in_file', writing to '$mat_file'...\n";

    &ReadMerits ($in_file, ($in_file eq "$data_dir/merit/opt"));

    %merits    = PatchHoles ("merits", $qids_file, $cids_file, %merits);
    @coll_ids  = GetSortedCollIds  (%merits);
    @query_ids = GetSortedQueryIds (%merits);

    &WriteMatrix  ($mat_file, $start_id, $end_id, %merits);
    &WriteWtsFile ($wts_file, %coll_sizes);
  }
}
  
##############################################################################

sub WriteMatrix {
  local ($out_file, $start_id, $end_id, %merits) = @_;

  open (OUT, ">$mat_file") || die ("Error opening mat_file '$mat_file'\n");

  print OUT "Que\\Col";
  foreach (@coll_ids) {
    print OUT "\t$_";
  }

  foreach (@query_ids) {
    if ($start_id == 0 || $end_id == 0 || ($_ >= $start_id && $_ <= $end_id)) {
      print OUT "\n$_";
      $query_id = $_;
      foreach (@coll_ids) {
        print OUT "\t$merits{$query_id,$_}";
      }
    }
  }

 ### summary info (designed to work with opt)

  print OUT "\n\nSizes:";
  foreach (@coll_ids) {
    print OUT "\t$coll_sizes{$_}";
  }

  print OUT "\n --Popularity--";
  for ($i = 0; $i <= $max_popularity; $i++) {
    print OUT "\n$i hits";
    foreach (@coll_ids) {
      print OUT "\t$popularity{$_,$i}";
    }
  }
  print OUT "\n";
    
  close (OUT);
}

##############################################################################

sub WriteWtsFile {
  local ($wts_file, %coll_sizes) = @_;

  %weights = ();

  foreach (@query_ids) {
    $query_id = $_;
    foreach (@coll_ids) {
      $weights {$query_id, $_} = $coll_sizes {$_};
    }
  }

  unlink ($wts_file);
  &WriteRanks ($wts_file, %weights);
}

##############################################################################


