import sys def make_orth (orth) : orth_split = [ '"' + x + '"' for x in orth.split(" ") ] return "< " + ", ".join(orth_split) + " >" def make_lex_entry (lex_entry_id, lex_type, pred, keys, orth) : # Returns a string. # All arguments are strings, except keys, which is a list of 2-tuples. # TODO: if len(orth.split(" ")) < 12 and "!" not in orth and "%" not in orth : str_lst = [] str_lst.append (lex_entry_id + " := " + lex_type + " &\n") str_lst.append ('[ SYNSEM.LOCAL.CONT.RELS ,\n') if len(keys) == 1 : str_lst.append (" LKEYS [ " + keys[0][0] + " " + make_orth(keys[0][1]) + " ],\n") elif len(keys) > 1 : str_lst.append (" LKEYS [ " + keys[0][0] + " " + make_orth(keys[0][1]) + ",\n") for key in keys[1:-1] : str_lst.append (" " + key[0] + " " + make_orth(key[1]) + ",\n") str_lst.append (" " + keys[-1][0] + " " + make_orth(keys[-1][1]) + " ],\n") str_lst.append (' STEM ' + make_orth (orth) + ' ] .\n\n') return "".join(str_lst) tdl_lex_set = set() lines = [ line for line in sys.stdin.readlines () if line.find("\t") != -1 ] START_I = int(sys.argv[1]) for i, line in enumerate(lines) : tabs = line[:-1].split("\t") try : pos = tabs[0] lt = tabs[1] syn_roles = tabs[2] lemma = tabs[3] # Read keys i_arg = 4 keys = [] while i_arg < len(tabs) : keys.append ( (tabs[i_arg, i_arg+1]) ) i += 2 except Exception : sys.stderr.write ("Input line: " + str(i) + '\n') raise # Create the lex-id. Make sure no duplicates will be made. lex_lst = ["lex", pos] lex_lst.append (lt) lex_lst.append (lemma.replace(" ", "_")) tdl_lex = "-".join (lex_lst) + '-' i = START_I while tdl_lex + str(i) in tdl_lex_set : i += 1 tdl_lex = tdl_lex + str(i) tdl_lex_set.add(tdl_lex) # Create the lexical type. tdl_lt = "-".join(["lt", pos, lt]) # Create the predicate pred_lst = [pos, lemma.replace(" ", "+"), syn_roles, "rel"] tdl_pred = '_' + "_".join(pred_lst) # Write the whole thing. sys.stdout.write (make_lex_entry(tdl_lex, tdl_lt, tdl_pred, keys, lemma))