#!/usr/bin/perl5
#
# File: compmap.pl
#
# Purpose: Compares our mappings (coll_map.txt) to Charlie Viles'
#          mappings.
#
# Modifications:
#   16-JAN-1997  txe  Initial creation
#

$use_old_format = 0;

$src_dir = "/uf39/te3d/ir/pie";
require "$src_dir/ir_subs.pm";
&CompareMaps ("coll_map.txt", "./cv_maps", "compmap.out");

###############################################################################

sub CompareMaps {
  local ($our_file, $his_dir, $out_file) = @_;

  unlink ($out_file);
  open (OUT_FP, ">$out_file") || die ("Error opening out_file '$out_file'\n");

  %doc_ids = ();

  &LoadOurMaps ($our_file);
  &LoadHisMaps ($his_dir);

  $old_coll_id = "FIRST";
  $our_miss = 0;
  $his_miss = 0;
  $mismatch = 0;
  $bad_colls = 0;

  &Ticker_Init ("Making doc_id mapping comparisons", comparisons, 20000);

  foreach (sort (keys (%doc_ids))) {
    $doc_id = $_;

    if ($our_maps{$doc_id} ne $old_coll_id) {
      if ($our_miss + $his_miss + $mismatch > 0) {
        print OUT_FP "'$old_coll_id' misses: our/his/mix $our_miss/$his_miss/$mismatch\n";
        $bad_colls++;
      }
      $our_miss = 0;
      $his_miss = 0;
      $mismatch = 0;
      $old_coll_id = $our_maps{$doc_id};
    }

    if ($our_maps{$doc_id} ne $his_maps{$doc_id}) {
      if ($our_maps{$doc_id} eq "") {
        $our_miss++; 
      }
      elsif ($his_maps{$doc_id} eq "") {
        $his_miss++; 
      }
      else {
        $mismatch++;
        print OUT_FP " mismatch: doc_id = '$doc_id', ";
        print OUT_FP " our_coll= '$our_maps{$doc_id}', his_coll = '$his_maps{$doc_id}'\n";
      }
    }
    &Ticker_Run;
  }

  if ($our_miss + $his_miss + $mismatch > 0) {
    print OUT_FP "'$old_coll_id' misses: our/his/mix $our_miss/$his_miss/$mismatch\n";
    $bad_colls++;
  }

  close (OUT_FP);

  print "$bad_colls collections had erroneous or missing mappings\n";
}

###############################################################################

sub LoadOurMaps {
  local ($our_file) = @_;

  &Ticker_Init ("Reading our maps from $our_file...", "mappings", 20000);

  %our_maps = ();
  open (FP, $our_file) || die ("Error opening our_file '$our_file'\n");

  while ($line = <FP>) {
    ($doc_id, $coll_id) = split (/\s+/, $line);
    $our_maps{$doc_id} = $coll_id;    
    $doc_ids{$doc_id} = 1;
    &Ticker_Run;
  }
  close (FP);  
}

###############################################################################

sub LoadHisMaps {
  local ($root_dir) = @_;

  %his_maps = ();

  while ($his_file = <${root_dir}/*>) {
    &Ticker_Init ("Reading his maps from $his_file...", "mappings", 20000);
    
    open (FP, $his_file) || die ("Error opening his_file '$his_file'\n");

    while ($line = <FP>) {
      ($doc_id, $coll_id) = split (/\s+/, $line);
      if ($use_old_format == 1) {
        $journal = $his_file;
        $journal =~ s#.*/##;
        $journal =~ s#\d.*##;
        ($mm, $yy) = split (/:/, $coll_id);
        $coll_id = "$journal.$yy.$mm";
      }
      $his_maps{$doc_id} = $coll_id;
      $doc_ids{$doc_id} = 1;
      &Ticker_Run;
    }
    close (FP);  
  }
}

###############################################################################

