package Edict; # # A package to read entries in edict # $edictroot = "/usr/share/edict"; #FIXME - use env variable # FIXME choose with options @dicts = ( # "edict", "compdic", #"enamdict", ); return 1; # # FIXME what about vn # FIXME strip "to ", etc # sub Slurp { print "Slurping\n"; my %pairs; for $dict (@dicts) { $fulldict = $edictroot."/".$dict; open (DICT, $fulldict) || warn "Couldn't open $fulldict"; print "Slurping $dict\n"; ### fixme - includes attribution:¡©¡©¡©¡© while () { #print "::$_"; chomp; /^(\S+)\s+(\[\S+\]\s+)?\/(.*)$/; ### FIXME okurigana my $ja = $1; my $yomi = $2 || "NONE"; # No yomi for all kana #print ">>$ja\t$yomi\t$3\n"; @ens = &ParseEnglish($3); foreach my $en (@ens) { print "$ja\t$en\n"; } } } } sub ParseEnglish { ###FIXME: add options ### discard all ### POS only ### keep all ($english) = @_; my @bits = split /\//, $english; @results =(); foreach $bit (@bits) { ### strip stuff off for now ### FIXME parse POS, etc $bit =~ s/\([^)]+\)\s*//g; push @results, $bit; } return @results; }