#! /usr/bin/perl -w # # quick and dirty script to make a transfer lexicon # # the basic strategy is to read a bilingual lexicon, and then # see if we know how to translate the entries, based on # knowledge in the ERG, JACY and JAEN # #&prepare_mtr; &prepare_japanese; ### in skip file $japanesele{"¿©¤Ù¤ë"}{"v1-v-stem-lex"} = "_taberu_v_rel"; &prepare_english; #$englishle{"monkey"}{"n_intr_le"} = "_monkey_n_rel"; #$englishle{"eat"}{"v_np*_trans_le"} = "_eat_v_1_rel"; &prepare_lexicon; #$transfer{"±î"}{"monkey"} = "edict"; ### wierd format for Oe's extraction stuff #&output_kf; &make_rules; sub output_kf { foreach $japanese (keys %transfer) { print "\t$japanese\t\t"; @ens = (); foreach $english (keys %{ $transfer{$japanese} } ) { push @ens, $english; } print join " | ", @ens; print "\n"; } } sub make_rules { open(JE, ">:encoding(utf8)", "lex-auto-je.mtr") or die "Couldn't open file $input_file.log for output!\n"; open(EJ, ">:encoding(utf8)", "lex-auto-ej.mtr") or die "Couldn't open file $input_file.log for output!\n"; print JE "; -*- Mode: TDL; Package: LKB; Coding: euc-jp; -*-\n"; print JE "; Automatically Constructed Transfer Lexicon: ", " ", `date --iso-8601`, "\n"; print EJ "; -*- Mode: TDL; Package: LKB; Coding: euc-jp; -*-\n"; print EJ "; Automatically Constructed Transfer Lexicon: ", " ", `date --iso-8601`, "\n"; foreach $japanese (keys %transfer) { foreach $english (keys %{ $transfer{$japanese} } ) { foreach $rtype (keys %transfer_rule) { foreach $jlex (@{$transfer_rule{$rtype}{"ja"}}) { foreach $elex (@{$transfer_rule{$rtype}{"en"}}) { # print "$japanese\t$english\t$rtype\t$jlex\t$elex\n"; if(defined $japanesele{$japanese}{$jlex} && defined $englishle{$english}{$elex}) { #print "$japanese\t$english\t$rtype\t$jlex\t$elex\n"; $jpred = $japanesele{$japanese}{$jlex}; $epred = $englishle{$english}{$elex}; $name_je = $jpred."-".$epred."-omtr"; $name_je =~ s/_rel//g; $name_je =~ s/^_//; $name_je =~ s/-_/-/; $template_je = $transfer_rule{$rtype}{"template"}; $template_je =~ s/\$name/$name_je/; $template_je =~ s/\$jpred/$jpred/; $template_je =~ s/\$epred/$epred/; # Comment print "$japanese ¢ª $english ($transfer{$japanese}{$english})\n"; # transfer rule print JE "$template_je\n"; $name_ej = $epred."-".$jpred."-omtr"; $name_ej =~ s/_rel//g; $name_ej =~ s/^_//; $name_ej =~ s/-_/-/; $template_ej = $transfer_rule{$rtype}{"template"}; $template_ej =~ s/\$name/$name_ej/; $template_ej =~ s/\$jpred/$epred/; $template_ej =~ s/\$epred/$jpred/; # Comment print "$english ¢ª $japanese ($transfer{$japanese}{$english})\n"; # transfer rule print EJ "$template_ej\n"; } } } } } } close(JE); close(EJ); } sub prepare_mtr { @{$transfer_rule{"noun_omtr"}{"ja"}} = ("ordinary-nohon-n-lex", "ordinary-honsubj-n-lex"); @{$transfer_rule{"noun_omtr"}{"en"}} = ("n_intr_le", "n_mass_count_le", "n_mass_le"); $transfer_rule{"noun_omtr"}{"template"} = "\$name := noun_omtr & \n[ INPUT.RELS < [ PRED \"\$jpred\" ] >,\n OUTPUT.RELS < [ PRED \"\$epred\" ] > ].\n"; @{$transfer_rule{"adjective_omtr"}{"ja"}} = ("na-adj-lex", "i-adj-stem-lex"); @{$transfer_rule{"adjective_omtr"}{"en"}} = ("adj_intrans_le", "adv_int_vp_le"); $transfer_rule{"adjective_omtr"}{"template"} = "\$name := adjective_omtr & \n[ INPUT.RELS < [ PRED \"\$jpred\" ] >,\n OUTPUT.RELS < [ PRED \"\$epred\" ] > ].\n"; @{$transfer_rule{"arg1_v_omtr"}{"ja"}} = ("intrans-c-stem-lex", "intrans-v-stem-lex", "vn-intrans-lex"); @{$transfer_rule{"arg1_v_omtr"}{"en"}} = ("v_particle_le", "v_unacc_le", "v_unerg_le"); $transfer_rule{"arg1_v_omtr"}{"template"} = "\$name := arg1_v_omtr & \n[ INPUT.RELS < [ PRED \"\$jpred\" ] >,\n OUTPUT.RELS < [ PRED \"\$epred\" ] > ].\n"; @{$transfer_rule{"arg12_v_omtr"}{"ja"}} = ("v1-c-stem-lex", "v1-v-stem-lex", "vn-trans1-lex"); @{$transfer_rule{"arg12_v_omtr"}{"en"}} = ("v_particle_np_le", "v_np_trans_le", "v_empty_prep_intrans_le", "v_np*_trans_le"); $transfer_rule{"arg12_v_omtr"}{"template"} = "\$name := arg12_v_omtr & \n[ INPUT.RELS < [ PRED \"\$jpred\" ] >,\n OUTPUT.RELS < [ PRED \"\$epred\" ] > ].\n"; #@{$transfer_rule{"intersective_adverb_omtr"}{"ja"}} = #("isect-adv-lex"); #$transfer_rule{"intersective_adverb_omtr"}{"template"} = #"\$name := intersective_adverb_omtr & \n[ INPUT.RELS < [ PRED \"\$jpred\" ] >,\n OUTPUT.RELS < [ PRED \"\$epred\" ] > ].\n"; # ### index by English Lexical Type # foreach $mtr (keys %transfer_rule) { # foreach $en_le ($transfer_rule{"mtr"}{"en"}) { # push @{ $en_idx{$en_le} }, $mtr; # } # } # ### index by Japanese Lexical Type # foreach $mtr (keys %transfer_rule) { # foreach $ja_le ($transfer_rule{"mtr"}{"ja"}) { # push @{ $ja_idx{$ja_le} }, $mtr; # } # } } sub prepare_lexicon { #$transfer{"±î"}{"monkey"} = "edict"; open(LEX, "edict.txt"); while() { chomp; ($japanese, $english) = split /\t/; $english =~ s/^\(.*\)\s+//; $english =~ s/\s+\(.*\)$//; ### edict does funny things with verbs (move into module) $english =~ s/^to\s+(\S+.*)/$1/; #print "$japanese\t\t$english\n"; ##debug unless ($english eq "(P)") { $transfer{$japanese}{$english} = "edict"; } } } sub prepare_japanese { #$japanesele{"±î"}{"ordinary-nohon-n-lex"} = "_saru_n_1_rel"; open (LEX, "/home/eric-n/logon/dfki/jacy/lex/lexdb.rev"); while() { chomp; @lexdb = split /\t/; # print "$lexdb[5]\t$lexdb[4]\t$lexdb[6]\n"; $pred = $lexdb[6]; $pred =~ s/\"//g; $japanesele{$lexdb[5]}{$lexdb[4]} = $pred; } } sub prepare_english { #$englishle{"monkey"}{"n_intr_le"} = "_monkey_n_rel"; open (LEX, "/home/eric-n/logon/lingo/erg/lexdb.rev"); while() { chomp; @lexdb = split /\t/; # print "$lexdb[5]\t$lexdb[4]\t$lexdb[6]\n"; $pred = $lexdb[6]; $pred =~ s/\"//g; $englishle{$lexdb[5]}{$lexdb[4]} = $pred; } }