#!/usr/bin/perl -w

# computes the word accuracy for strings, based on edit distance. the entries
# in the input file should have the following format:  
#
# SOURCE < source sentence >
# REF < reference translation >
# TRANS < translation >
#
# if several reference are given then the max score for each translation is
# given. several translations can also be given, and each will be scored.


use strict;
use Getopt::Std;

my %opts;
getopts('t', \%opts);
#t = WAFT (word accuracy for translation)

$| = 1;

my ($edits, $ref_n, $tst_n, @ref_words, $score);

my $source;
my @references;
my $translation;
my @match;

##word level edit distance. change "/ /" to "//" in the split to operate on
##string level.
sub levenshtein($$){
    my @A=split / /, lc shift; 
    my @B=split / /, lc shift;
    my @W=(0..@B);
    my ($i, $j, $cur, $next);
    for $i (0..$#A){
	$cur=$i+1;
	for $j (0..$#B){
            $next=min(
                      $W[$j+1]+1,
                      $cur+1,
                      ($A[$i] ne $B[$j])+$W[$j]
                      );
            $W[$j]=$cur;
            $cur=$next;
	}
	$W[@B]=$next;
    }
    return $next;
}

sub min_levenshtein {
    my $translation = shift;
    my $references = shift;
    my $edit;    
    my $min_edit;
    my $min_reference;
    my $reference;
    foreach $reference (@references) {
        $edit = levenshtein($translation, $reference);
        if (!$min_edit || ($edit < $min_edit)) {
            $min_edit = $edit;
            $min_reference = $reference;
        }
    }
    return ($min_edit, $min_reference);    
}

sub min($$$){
    if ($_[0] < $_[2]){ pop @_; } else { shift @_; }
    return $_[0] < $_[1]? $_[0]:$_[1];
}


#helper from bleu.pl:
sub prepare_sentence {
  my $sentence = shift;
  $sentence =~ s/^\s*REF\s*//;                
  $sentence =~ s/^\s*SOURCE\s*//;             
  $sentence =~ s/^\s*TRANS\s*//;              
  $sentence =~ s/,/ ,/;                       
  $sentence =~ s/\./ \./;                     
  $sentence =~ s/\!/ \!/;                     
  $sentence =~ s/\?/ \?/;                     

  return $sentence;
}

while(<>) {
    next if /^\s*\;/;
    next if /^\s*$/;         
    chomp;                          
    my $sentence = $_;              
    if ($sentence =~ /^\s*SOURCE/) {
        $source = $sentence;    
        $translation = $score = ''; 
        @references = @match = ();
        
    } elsif ($sentence =~ /^\s*REF.?\s+/) {      
        unshift(@references, prepare_sentence($sentence));
    } elsif ($sentence =~ /^\s*TRANS.?\s+/) {
        $translation = prepare_sentence($sentence);             
        @match = min_levenshtein($translation, \@references);  
        $edits = $match[0];
        $ref_n = @ref_words = split / /, $match[1];
        
        if ($opts{t}) {
            $tst_n = @ref_words = split / /, $translation;
            $score = 1 - ($edits / ($tst_n > $ref_n ? $tst_n : $ref_n));  
        } else { 
            $score = 1 - ($edits / $ref_n);
        }        
        print $score . "\n";
    }
}
