use strict;
use locale;
use POSIX qw(locale_h);
setlocale(LC_ALL, "norwegian");

# input: two vocabulary lists with frequencies
# output: diff without frequencies

my %orig;
my %new;

open (ORIG, $ARGV[0]);
while (<ORIG>) {
    chomp;
    if (/\s*\d+\s+(.*)/) {
	$orig{$1}=1;
    }
    else {
	print STDERR "line not matching template: $_\n";
    }
}

open (NEW, $ARGV[1]);
while (<NEW>) {
    chomp;
    if (/\s*\d+\s+(.*)/) {
	$new{$1}=1;
    }
    else {
	print STDERR "line not matching template: $_\n";
    }
}


foreach my $w (keys %new) {
    unless ($orig{$w}) {
	print $w, "\n";
    }
}
