#!/usr/bin/gawk -f function output(id, source, reference, victoria, top, best) { id /= 10; if(id && source && top && !error) printf("|< |%s| (%d)%s\n|= |%s|\n|> |%s|\n|? |%s|\n\n", source, id, reference, victoria, top, best); } # output BEGIN { oracle = ARGV[2]; delete ARGV[2]; } /^\|< / { output(id, source, reference, victoria, top, best); end = match($0, /\| \(([0-9]+)\) --- .*$/, foo); if(end) { source = substr($0, 5, end - 5); id = foo[1]; } # if else { source = ""; id = ""; } # else while((getline gold < oracle) > 0) { if(match(gold, /^\|< /)) { if(end) gold = substr(gold, 5, end - 5); else gold = ""; break; } # if } # while if(!source || source != gold) { printf("[%d] |%s| vs. |%s|.\n", NR, source, gold); error = NR; exit 1; } # if reference = ""; top = ""; bleu = 0; best = ""; victoria = ""; } /^\|@ / { reference = (reference "\n" $0); } /^\|> / { while(!victoria && (getline gold < oracle) > 0) { if(match(gold, /^\|< /)) break; if(match(gold, /^[ \t]*$/)) break; if(match(gold, /^x\|> \|(.*)\|.+\{[0-9.-]+\} <[0-9.]+> \([0-9:]+\)\.$/, bar)) { victoria = bar[1]; } # if } # while if(!source) { printf("[%d] unexpected |%s|.\n", NR, $0); error = NR; exit 1; } # if match($0, /^\|> \|(.*)\|.+\{([0-9.-]+)\} <([0-9.]+)> \([0-9:]+\)\.$/, foo); if(!top) top = foo[1]; if(foo[3] > bleu) { bleu = foo[3]; best = foo[1]; } # if } END { output(id, source, reference, victoria, top, best); }