#!/usr/bin/perl5
#
# File: postproc.pl
#
# Purpose: Post-processing
#
# Modifications:
#   13-JAN-1998  txe  Initial creation
#   14-JAN-1998  txe  Added graphs, round_up, root_dir
#   16-JAN-1998  txe  Moved table generation to gentable.pm, added nonzeroes
#   19-JAN-1998  txe  Can now handle compressed (.Z) file names in root_dir
#

  $src_dir = "/uf39/te3d/ir/pie";
  require "$src_dir/gentable.pm";

  $root_dir = $ARGV[0];

  while ($root_dir eq "") {
    print "Please enter the root directory:\n";
    $root_dir = <STDIN>;
    chop ($root_dir);
  }

  print "[root_dir = $root_dir]\n";  

  &PostProc ("ranks.bug", "querybnd.txt", "tables.txt", $root_dir);

########################################################################

sub PostProc {
  local ($freq_file, $grp_file, $out_file, $root_dir) = @_;

  unlink ($out_file);
  &GenerateExistenceTable ($root_dir, $out_file);
  &LoadFreqs ($freq_file);
  $num_groups = &LoadQueryGroups ($grp_file);
  &SplitQueriesIntoGroups;
  &AnalyzeGroups ($out_file);
}

#########################################################################

sub GenerateExistenceTable {
  local ($root_dir, $out_file) = @_;

  while ($coll_file = <${root_dir}/*/*/*>) {
    ($journal, $date) = ExtractJournalAndDate ($coll_file);
    $exists{$journal,$date} = "*****";
  }
  &GT_PrintTableToFile ($out_file, "Existence only", "COLLECTIONS", "YEAR", %exists);
}

#########################################################################

sub LoadQueryGroups {
  local ($grp_file) = @_;

  open (FP, $grp_file) || die ("Error opening grp_file '$grp_file'\n");
  for ($i = 0; $line = <FP>; $i++) {
    ($grp_name[$i], $grp_limit[$i]) = split (/\s+/, $line);
    $grp_count[$i] = 0;
  }
  close (FP);
  $i;
}

#########################################################################

sub LoadFreqs {
  local ($freq_file) = @_;

  open (FREQ_FP, $freq_file)  || die ("Error opening freq_file '$freq_file'\n");
  while ($line = <FREQ_FP>) {
    ($qid, $coll_id, $rank, $freq) = split (/\s+/, $line);
    $query_id = sprintf ("%05d", $qid);
    $freqs{$query_id,$coll_id} = $freq;
  }
  close (FREQ_FP);
}

#######################################################################

sub SplitQueriesIntoGroups {
  local ($out_file) = @_;

  $group        = 0;
  %non_zeroes   = ();
  $old_query_id = -999;

  foreach (sort (keys (%freqs))) {
    ($query_id, $coll_id) = split (/\x1c/, $_);

    if ($query_id > $grp_limit[$group]) {
      $group++;
    }
    if ($freqs{$query_id,$coll_id} > 0) {
      $non_zeroes{$group,$coll_id}++;
    }
    if ($query_id != $old_query_id) {
      $grp_count[$group]++;
      $old_query_id = $query_id;
    }
  }
}

#########################################################################

sub ByQueryID {
  ($qa, $ca) = split (/\x1c/, $a);
  ($qb, $cb) = split (/\x1c/, $b);

  $qa <=> $qb;
}

#########################################################################

sub AnalyzeGroups {
  local ($out_file) = @_;

  $old_group = -999;
  %nz        = ();

  foreach (sort (keys (%non_zeroes))) {
    ($group, $coll_id) = split (/\x1c/, $_);
    if ($group != $old_group && $old_group >= 0) {
      &PrintGroupTable ($old_group, $out_file);
      %nz = ();
    }
    $old_group = $group;
    ($journal, $date)     = ExtractJournalAndDate ($coll_id);
    $nz{$journal,$date}   = $non_zeroes{$group,$coll_id};
    $nz{$journal,"TOTAL"} += $nz{$journal,$date}; 
    $nz{"TOTAL",$date}    += $nz{$journal,$date}; 
    $nz{"TOTAL","TOTAL"}  += $nz{$journal,$date};
  }
  &PrintGroupTable ($old_group, $out_file);
}

#########################################################################

sub PrintGroupTable {
  local ($g, $out_file) = @_;
  $title = "$grp_name[$g], last query = $grp_limit[$g], # queries = $grp_count[$g]";

  $_ = $out_file;
  $indiv_file = "";
  if (/.*\//) {
    $indiv_file = $&;
  }
  $indiv_file = $indiv_file . "table$g.txt";

  unlink ($indiv_file);
  &GT_PrintTableToFile ($indiv_file, $title, "COLLECTIONS", "YEAR", %nz);
}

#########################################################################

sub ExtractJournalAndDate {
  local ($coll_file) = @_;
  $coll_file =~ s#.*/##;
  ($journal, $yy, $mm) = split (/\./, $coll_file);
  ($journal, "$yy.$mm");
}

###########################################################################

