#!/usr/cs/bin/perl
#
# File: al.pl
#
# Purpose: Aligns two neucleotide sequences, displaying global and
#          local matrixes and best possible alignments.
#
# Authors:
#   txe  Travis Emmitt  emmitt@virginia.edu
#
# Modifications:
#   04-FEB-1998  txe  Initial creation, got global alignments working
#   06-FEB-1998  txe  Overhauled alignment logic and got local working
#

  print "enter top sequence:\n";
  $str1 = <STDIN>;
  chop ($str1);

  print "enter bottom sequence:\n";
  $str2 = <STDIN>;
  chop ($str2);

  print "Aligning '$str1' and $str2'...\n";

  &Align ($str1, $str2);

#######################################################################

sub Align {
  local ($str1, $str2) = @_;

  @scores = (-2, -2, -2, -2, -1, +1);
  @mode_name = ("Global", "Local");

  @seq1 = GetSequence ($str1);
  @seq2 = GetSequence ($str2);
  $len1 = @seq1;
  $len2 = @seq2;

  $RIGHT = 0;
  $DOWN  = 2;
  $DIAG  = 4;
  $SKIP  = -9999;

  for ($mode = 0; $mode <= 1; $mode++) {
    %leads = ();
    %S = ();
    %ends = ();
    $end_score = 0;

    for ($i = 0; $i <= $len1; $i++) {
      for ($j = 0; $j <= $len2; $j++) {
        &EvaluateNode ($i, $j, $mode);
      }
    }
    &ComputePaths ($mode);
    &PrintMatrix ($mode);
    &PrintPaths ($mode);
  }
}

#######################################################################

sub EvaluateNode {
  local ($i, $j, $mode) = @_;

  $down_score  = $SKIP;
  $right_score = $SKIP;
  $diag_score  = $SKIP;
  $match       = 0;

  if ($i > 0) {
    $right_score = $S{$i-1,$j} + $scores[$RIGHT];
  }
  if ($j > 0) {
    $down_score = $S{$i,$j-1} + $scores[$DOWN];
  }
  if ($i > 0 && $j > 0) {
    if ($seq1[$i-1] eq $seq2[$j-1]) {
      $match = 1;
    }
    $diag_score = $S{$i-1,$j-1} + $scores[$DIAG + $match];
  }
  if ($i > 0 || $j > 0) {
    if ($right_score >= $down_score) {
      if ($right_score >= $diag_score) {
        $S{$i,$j} = $right_score;
        $leads{$i,$j,$RIGHT} = 1;
      }
      if ($right_score <= $diag_score) {
        $S{$i,$j} = $diag_score;
        $leads{$i,$j,$DIAG} = 1;
      }
    }
    if ($right_score <= $down_score) {
      if ($down_score >= $diag_score) {
        $S{$i,$j} = $down_score;
        $leads{$i,$j,$DOWN} = 1;
      }
      if ($down_score <= $diag_score) {
        $S{$i,$j} = $diag_score;
        $leads{$i,$j,$DIAG} = 1;
      }
    }
    if ($mode == 1) {
      if ($S{$i,$j} < 0) {
        $S{$i,$j} = 0;
        delete $leads{$i,$j,$RIGHT};
        delete $leads{$i,$j,$DOWN};
        delete $leads{$i,$j,$DIAG};
      }
      if ($S{$i,$j} > $end_score) {
        %ends = ();
        $ends{$i,$j} = 1;
        $end_score = $S{$i,$j};
      }
      elsif ($S{$i,$j} == $end_score) {
        $ends{$i,$j} = 1;
      }
    }
    else {
      %ends = ();
      $ends{$i,$j} = 1;
    }
  }
  else {
    $S{0,0} = 0;
  }
}

#######################################################################

sub ComputePaths {
  local ($mode) = @_;

  %paths = ();
  %arrows = ();

  foreach (keys (%ends)) {
    ($i, $j) = split (/\x1c/, $_);

    $paths{$i,$j,"",""} = 1;

    $found = 1;

    while ($found == 1) {
      foreach (keys (%paths)) {
        ($i, $j, $path1, $path2) = split (/\x1c/, $_);
        $found = 0;
        if ($leads{$i,$j,$RIGHT} == 1) {
          $paths{$i-1,$j,"$seq1[$i-1]$path1","-$path2"} = 1;
          if ($arrows{$i-1,$j} eq "") {
            $arrows{$i-1,$j} = "    ";
          }
          substr ($arrows{$i-1,$j}, 1, 3) = "```";
          $found = 1;
        }
        if ($leads{$i,$j,$DOWN} == 1) {
          $paths{$i,$j-1,"-$path1","$seq2[$j-1]$path2"} = 1;
          if ($arrows{$i,$j-1} eq "") {
            $arrows{$i,$j-1} = "    ";
          }
          substr ($arrows{$i,$j-1}, 0, 1) = "|";
          $found = 1;
        }
        if ($leads{$i,$j,$DIAG} == 1) {
          $paths{$i-1,$j-1,"$seq1[$i-1]$path1","$seq2[$j-1]$path2"} = 1;
          if ($arrows{$i-1,$j-1} eq "") {
            $arrows{$i-1,$j-1} = "    ";
          }
          substr ($arrows{$i-1,$j-1}, 2, 1) = "\\";
          $found = 1;
        }
        if ($found == 1) {
          delete $paths{$i,$j,$path1,$path2};
        }
      }
    }
  }
}

#######################################################################

sub PrintPaths {
  local ($mode) = @_;
  print "Best possible $mode_name[$mode] alignments:\n";
  foreach (keys (%paths)) {
    ($i, $j, $path1, $path2) = split (/\x1c/, $_);
    print " $path1\n";
    print " $path2\n\n";
  }
}

#######################################################################

sub PrintMatrix {
  local ($mode) = @_;

  print "\n$mode_name[$mode] Matrix:\n\n        ";
  for ($i = 0; $i <= $len1; $i++) {
    print "  $seq1[$i] ";
  }
  print "\n        0";
  for ($i = 1; $i <= $len1; $i++) {
    printf "  %2d", $i;
  }
  print "\n    +====";
  for ($i = 1; $i <= $len1; $i++) {
    print "====";
  }
  print "\n";

  for ($j = 0; $j <= $len2; $j++) {
    printf "%4d|", $j;
    for ($i = 0; $i <= $len1; $i++) {
      printf "%4d", $S{$i,$j};
    }
    print "\n $seq2[$j]";
    if ($j < $len2) {
      print "  |   ";
      for ($i = 0; $i <= $len1; $i++) {
        printf "%4s", $arrows{$i,$j};
      }
      print "\n";
    }
  }
  print "\n\n";
}

#######################################################################

sub GetSequence {
  local ($str) = @_;

  @seq = ();

  for ($i = 0; $i < length ($str); $i++) {
    $ch = substr ($str, $i, 1);
    if ($ch eq "a" || $ch eq "A") {
      $seq[$i] = "a";
    }
    elsif ($ch eq "c" || $ch eq "C") {
      $seq[$i] = "c";
    }
    elsif ($ch eq "g" || $ch eq "G") {
      $seq[$i] = "g";
    }
    elsif ($ch eq "t" || $ch eq "T") {
      $seq[$i] = "t";
    }
    else {
      print "ERROR: seq_str[$i] '$ch' not in 'agctAGCT'\n";
      @seq = ();
      last;
    }
  }
  @seq;
}




