#!/usr/bin/env python from collections import defaultdict from xml.etree import ElementTree as ET import cStringIO as StringIO from datetime import datetime from contextlib import nested from tempfile import gettempdir import sys import unicodedata from os import path import os from SimpleXMLRPCServer import SimpleXMLRPCServer from xmlrpclib import ServerProxy #list of mappings courtesy of Jonathan Read # see eg http://wiki.delph-in.net/moin/WeSearch/LexicalFiltering #constructed on the basis of a confusion matrix between TnT (so not exactly PTB/GTB) and ERG GTB_TO_ERG_POS_MAP = { 'NN': 'n', 'NNS': 'n', 'NNP': 'n', 'NNPS': 'n', 'EX': 'n', 'VB': 'v', 'VBZ': 'v', 'VBP': 'v', 'VBN': 'v', 'VBD': 'v', 'VBG': 'v', 'MD': 'v', 'JJ': 'aj', 'JJR': 'aj', 'JJS': 'aj', 'RB': 'av', 'RBR': 'av', 'RBS': 'av', 'IN': 'p', 'RP': 'p' } class GTBTreeBlazer(object): def __init__(self, match_le_types=False, incompatible_gold_action='ignore', transformer=None, ignore_children_matching_parents=True): assert incompatible_gold_action in ('ignore', 'fail') self.incompatible_gold_action = incompatible_gold_action self.match_le_types = match_le_types self.tree_transformer = transformer self.ignore_children_matching_parents = ignore_children_matching_parents #(if a parent and child have the same node label, ignore the child) def filter(self, ext_tree, discrim_group): if self.tree_transformer: self.tree_transformer.transform(ext_tree) raw_text = discrim_group.raw_text charspan_to_discrims = defaultdict(set) discrim_encompassing_charspans = [set() for _ in discrim_group.raw_text] charspan_to_le_type_discrims = defaultdict(set) le_type_discrim_encompassing_charspans = [set() for _ in discrim_group.raw_text] for discrim in discrim_group.discriminants: charspan = (discrim.cfrom, discrim.cto) charspan_to_discrims[charspan].add(discrim) for i in range(discrim.cfrom, discrim.cto): discrim_encompassing_charspans[i].add(charspan) if self.match_le_types and discrim.type == 'type': charspan_to_le_type_discrims[charspan].add(discrim) for i in range(discrim.cfrom, discrim.cto): le_type_discrim_encompassing_charspans[i].add(charspan) #XXX need to correct for punctuation diffs somewhere in here response = DiscriminantFilterResponse(discrim_group) for cons_path in ext_tree.all_paths(): cons = cons_path[0] cons_parent = cons_path[1] if len(cons_path) > 1 else None if self.ignore_children_matching_parents: # if this is set, we ignore NPs that are immediately beneath NPs # which may avoid some spurious mismatches if cons_parent and cons.cat == cons_parent.cat: continue char_start, char_end = cons.char_start, cons.char_end if cons.char_start is None or cons.char_end is None: continue #null constituent handled_enc_charspans = set() for char_idx in range(cons.char_start, cons.char_end): for enc_charspan in discrim_encompassing_charspans[char_idx]: if enc_charspan in handled_enc_charspans: continue # we've already handled this char span for this cons else: handled_enc_charspans.add(enc_charspan) enc_start, enc_end = enc_charspan if enc_start < cons.char_start and enc_end < cons.char_end: disc_extra_head = raw_text[enc_start:cons.char_start] disc_missing_tail = raw_text[enc_end:cons.char_end] # if the only chars causing a conflict are punctuation, # ignore them: extra_head_is_punct = all(is_punctuation(c) for c in disc_extra_head) missing_tail_is_punct = all(is_punctuation(c) for c in disc_missing_tail) is_conflict = not extra_head_is_punct and not missing_tail_is_punct # (it's not a real conflict if either non-overlap is entirely punctuation) elif enc_start > char_start and enc_end > char_end: disc_missing_head = raw_text[enc_start:cons.char_start] disc_extra_tail = raw_text[enc_end:cons.char_end] # if the only chars causing a conflict are punctuation, # ignore them missing_head_is_punct = all(is_punctuation(c) for c in disc_missing_head) extra_tail_is_punct = all(is_punctuation(c) for c in disc_extra_tail) is_conflict = not missing_head_is_punct and not extra_tail_is_punct else: is_conflict = False if is_conflict: for discrim in charspan_to_discrims[enc_charspan]: response.reject(discrim, unicode(cons)) if self.match_le_types: handled_enc_charspans = set() for tok in ext_tree.all_tokens(): for char_idx in range(tok.char_start, tok.char_end): for enc_charspan in le_type_discrim_encompassing_charspans[char_idx]: if enc_charspan in handled_enc_charspans: continue # we've already handled this char span for this cons else: handled_enc_charspans.add(enc_charspan) enc_start, enc_end = enc_charspan try: mapped_pos_prefix = GTB_TO_ERG_POS_MAP[tok.cat] except KeyError: continue prefixes_to_discrims = defaultdict(set) for discrim in charspan_to_discrims[(tok.char_start, tok.char_end)]: if discrim.type == 'type': min_start = min(tok.char_start, enc_start) max_start = max(tok.char_start, enc_start) non_overlap_start = raw_text[min_start:max_start] min_end = min(tok.char_end, enc_end) max_end = max(tok.char_end, enc_end) non_overlap_end = raw_text[min_end:max_end] exact_or_near_match = all(is_punctuation(c) for c in non_overlap_start + non_overlap_end) #make sure the only differences are punctuation between the two if exact_or_near_match: prefix = discrim.key.split('_')[0] prefixes_to_discrims[prefix].add(discrim) if len(prefixes_to_discrims) > 1 and mapped_pos_prefix not in prefixes_to_discrims: # only reject if we have multiple prefixes here and at least one that agrees for prefix, discrims in prefixes_to_discrims.iteritems(): if prefix != mapped_pos_prefix: for discrim in discrims: response.reject(discrim, unicode(tok)) # this may not handle some edge cases with unary branching conversion # quite correctly - eg gerunds - but hopefully the number of cases where we have choices # and the exatly matching LE type (as opp the one that gets converted) # will not be too frequent return response def is_punctuation(unichr): return unicodedata.category(unichr).startswith('P') class RecursiveTransformer(object): """Recursively transforms constituents (bottom-up)""" def transform(self, tree): self._recursive_transform_constituents(tree.root_constituent) def _recursive_transform_constituents(self, constit): for sub_constit in constit.sub_constituents: self._recursive_transform_constituents(sub_constit) self._transform_constituent(constit) def _transform_constituent(self, constit): raise NotImplementedError() class NounCompoundDeepener(RecursiveTransformer): """ Transform an NP-internal contiguous sequence of (JJ(R|S)?|NNS?)+ into an NP (should probably be called something like an Nbar, but so should a few other structures, and we're not doing that so we'll at least be consistent) These can't be handled by BinarizePhraseTransformer as we don't want to binarize necessarily, but simply (eg) split off the determiner, since we can't work out bracketing for eg 'massive bike sale', so we shouldn't make guesses about it However transforming with BinarizePhraseTransformer aftewards should create sane PSTs We don't care about NPs with a single NN*, as these should be canonically decomposable using BinarizePhraseTransformer """ def _transform_constituent(self, constit): phrase_type = constit.cat if phrase_type != 'NP': return sub_units = constit.sub_units num_nns = sum(1 for su in sub_units if su.cat in ('NN', 'NNS')) if num_nns < 2: return sub_unit_iter = iter(sub_units) seq_idxs = [] curr_start = None curr_end = None in_jj_nn_seq = False idx = 0 while True: prev_in_jj_nn_seq = in_jj_nn_seq try: next_unit = sub_unit_iter.next() except StopIteration: if prev_in_jj_nn_seq: #clean up the last one seq_idxs.append((curr_start, curr_end)) break if next_unit.kind == 'tok' and next_unit.cat in ('NN', 'NNS', 'JJ', 'JJR', 'JJS'): if not prev_in_jj_nn_seq: in_jj_nn_seq = True curr_start = idx curr_end = idx + 1 else: if prev_in_jj_nn_seq: seq_idxs.append((curr_start, curr_end)) curr_start = None curr_end = None in_jj_nn_seq = False idx += 1 sub_units_new = sub_units[:] # take a copy, and we can manipulate then give it back for (start, end) in seq_idxs: sub_units_new[start:end] = [TreeConstituent('NP', sub_units[start:end])] constit.replace_sub_units(sub_units_new) class BinarizePhraseTransformer(RecursiveTransformer): """Converts a flat phrase with multiple children into a binary-branching nested phrase (where possible), leaving it untouched if there is uncertainty about the head phrase """ phrase_head_mapping = { #these POSs, as well as the phrasal categories themselves, are considered heads 'ADJP': set(['JJ', 'JJR', 'JJS']), 'ADVP': set(['RB', 'RBR', 'RBS']), 'NP': set(['NN', 'NNS', 'NNP', 'NNPS']), 'VP': set(['VB', 'VBP', 'VBZ', 'VBD', 'VBZ', 'VBN']), } def _transform_constituent(self, constit): phrase_type = constit.cat sub_units = constit.sub_units num_sub_units = len(sub_units) if len(constit.sub_units) <= 2 or phrase_type not in self.phrase_head_mapping: return poss_head_types = self.phrase_head_mapping[phrase_type] | set([phrase_type]) poss_head_idxs = [idx for (idx, su) in enumerate(sub_units) if su.cat in poss_head_types] if len(poss_head_idxs) != 1: # 0 oer > 1 possible heads - return as we can't binarize return head_idx = poss_head_idxs[0] #keep the nearest immediate head sibling, and binarise the rest #successively, preserving the phrase type #in english this will usually be right-branching if head_idx == 0: branch_right = True elif head_idx == num_sub_units - 1: branch_right = False else: #if it's not at an extreme, we can't work out the binarization # canonically. So be conservative and do nothing return if branch_right: new_constit = TreeConstituent(phrase_type, sub_units[:2]) for i in range(2, num_sub_units - 1): new_constit = TreeConstituent(phrase_type, [new_constit] + [sub_units[i]]) constit.replace_sub_units([new_constit] + [sub_units[-1]]) else: new_constit = TreeConstituent(phrase_type, sub_units[-2:]) for i in range(num_sub_units - 2, 1, -1): new_constit = TreeConstituent(phrase_type, [sub_units[i]] + [new_constit]) constit.replace_sub_units([sub_units[0]] + [new_constit]) class RaiseNPPremodTransformer(RecursiveTransformer): """ rearrange NPs which have an attachments both before and after the head N, so the left attachments attach higher than the right ones (which follows ERG convention) """ target_cats = None def _transform_constituent(self, constit): phrase_type = constit.cat if phrase_type != 'NP' or len(constit.sub_constituents) < 2: return first_subcons = constit.sub_constituents[0] if first_subcons.cat == 'NP' and len(first_subcons.sub_units) > 1: # rearrange the NP if the first consitit is an NP with a left most member # which is in self.target_cats leftmost = first_subcons.sub_units[0] if leftmost.cat in self.target_cats: #create a new constituent for the RHS with every thing but the first subunit new_right = TreeConstituent('NP', first_subcons.sub_units[1:] + constit.sub_units[1:]) constit.replace_sub_units([leftmost] + [new_right]) class RaiseJJsTransformer(RaiseNPPremodTransformer): target_cats = set(['JJ', 'JJR', 'JJS', 'ADJP']) class RaiseDetsTransformer(RaiseNPPremodTransformer): target_cats = set(['DT']) class CombinedTransformer(object): def __init__(self): self.intern_transformers = [] def transform(self, tree): for tf in self.intern_transformers: tf.transform(tree) class ERGMatchGTBTreeTransformer(CombinedTransformer): """Applies some transformations to the GTB tree to account for some systematic differences with the ERG""" def __init__(self, raise_dets=True, raise_jjs=True, binarize_phrases=True): """Initialise with the basic options. Set `raise_dets` to raise the determiner to top left of an NP. Set `raise_adjs` to do the same thing with adjectives. Set `binarize_phrases` to make sure that nodes (eg NPs) have at most one sibling""" super(ERGMatchGTBTreeTransformer, self).__init__() self.binarize_phrases = binarize_phrases self.raise_dets = raise_dets self.raise_jjs = raise_jjs if self.binarize_phrases: self.intern_transformers.append(BinarizePhraseTransformer()) if self.raise_dets: self.intern_transformers.append(RaiseDetsTransformer()) if self.raise_jjs: self.intern_transformers.append(RaiseJJsTransformer()) class IncompatibleGoldDiscriminant(Exception): pass class DiscriminantFilterResponse(object): def __init__(self, discriminant_group): self.discriminants_by_index = dict((disc.index, disc) for disc in discriminant_group.discriminants) self.rejected_indexes = set() self.required_indexes = set() self.rejection_reasons = defaultdict(list) self.requirement_reasons = defaultdict(list) def by_index(self): responses = {} for idx in self.discriminants_by_index: if idx in self.rejected_indexes: response = '-' elif idx in self.required_indexes: response = '+' else: response = '?' responses[idx] = response return responses def sequential(self): result = [resp for _, resp in sorted(self.by_index().items())] return result def verbose(self, include_reasons=True, include_unknown=False): output = []; for idx in sorted(self.rejected_indexes): reasons = u' for ' + u', '.join(repr(r) for r in self.rejection_reasons[idx]) \ if include_reasons else '' output.append(u"REJ: %r" % self.discriminants_by_index[idx] + reasons) for idx in sorted(self.required_indexes): reasons = u' for ' + u', '.join(repr(r) for r in self.requirement_reasons[idx]) \ if include_reasons else '' output.append(u"AFF: %r" % self.discriminants_by_index[idx] + reasons) if include_unknown: for idx in sorted(self.unknown_indexes): output.append(u"UNK: %r" % self.discriminants_by_index[idx]) return u"\n".join(output) def reject(self, discriminant, reason): if discriminant.index in self.required_indexes: raise ConflictingConditionsException("Discriminant at index %d was required then rejected" % discriminant.index) self.rejected_indexes.add(discriminant.index) self.rejection_reasons[discriminant.index].append(reason) def require(self, discriminant, reason=None): if discriminant.index in self.rejected_indexes: raise ConflictingConditionsException("Discriminant at index %d was rejected then required" % discriminant.index) self.required_indexes.add(discriminant.index) self.requirement_reasons[discriminant.index].append(reason) @property def unknown_indexes(self): return set(self.discriminants_by_index.iterkeys()) - \ self.rejected_indexes - self.required_indexes class ConflictingConditionsException(Exception): """ Indicates that conflicting conditions have been specified. """ pass class DiscriminantGroup(object): def __init__(self, raw_text, discriminant_xml_list): self.raw_text = raw_text self.discriminants = [Discriminant(idx, dxml, self.raw_text) for (idx, dxml) in enumerate(discriminant_xml_list)] def __repr__(self): return "DiscriminantGroup(%r, %r)" % (self.raw_text, self.discriminants) class Discriminant(object): def __init__(self, disc_idx, xml, all_text): self.index = disc_idx root = ET.fromstring(xml) self.supplied_id = root.get('id') # for debugging only at this stage self.type = root.get('type') self.key = root.get('key') self.cfrom = int(root.get('from')) self.cto = int(root.get('to')) self.representation = root.text # should probably not use this, except for maybe debugging self.all_text = all_text def raw_text(self): return self.all_text[self.cfrom:self.cto] def __repr__(self): return u'Discriminant(index=%(index)r, id=%(supplied_id)r, type=%(type)r, '\ 'key=%(key)r, from=%(cfrom)r, to=%(cto)r, repr=%(representation)r)' %\ self.__dict__ class GTBTree(object): def __init__(self, raw_xml): root_elem = ET.fromstring(raw_xml) cons_elems = root_elem.findall('./cons') assert len(cons_elems) == 1, "Found invalid number of root constituents" root_cons_elem = cons_elems[0] self.root_constituent = GTBConstit(root_cons_elem, 0) def all_constituents(self): return self.root_constituent.all_sub_constituents def all_paths(self): return self.root_constituent.all_sub_paths def all_tokens(self): return self.root_constituent.tokens def __unicode__(self): return unicode(self.root_constituent) def get_text(self): # urg - this may not work if we *do* have multiple root constituents - maybe should just disallow # them (if they in fact occur)? return self.root_constituent.get_text() class TreeConstituent(object): kind = 'cons' def __init__(self, cat, sub_units): self.cat = cat self.replace_sub_units(sub_units) @property def sub_units(self): return self._sub_units @property def sub_constituents(self): return self._sub_constituents @property def all_sub_constituents(self): all_subs = [self] for cons in self.sub_constituents: all_subs.extend(cons.all_sub_constituents) return all_subs @property def all_sub_paths(self): """The list of all (bottom-to-top) paths through this subtree. each element in the returned list is a tuple going from a constituent at some level tracing upwards through the tree, so the second element is the parent, the third the grandparent (if it exists) etc, and [sp[0] for sp in tree.all_sub_paths()] is equivalent to tree.all_sub_constituents()""" all_subs = [(self,)] for cons in self.sub_constituents: for sub_path in cons.all_sub_paths: all_subs.append(sub_path + (self,)) return all_subs @property def tokens(self): all_tokens = [] for cons in self.sub_units: try: tokens = cons.tokens except AttributeError: tokens = [cons] # if it is actually a token itself all_tokens.extend(tokens) return all_tokens def replace_sub_units(self, new_sub_units): self._sub_constituents = [su for su in new_sub_units if su.kind == 'cons'] self._sub_units = new_sub_units[:] #do we need to copy here? def __unicode__(self): return u'(%s %s)' % (self.cat, u' '.join(unicode(u) for u in self.sub_units)) def __repr__(self): return 'TreeConstituent(%r, %r)' % (self.cat, self.sub_units) def get_text(self, start_char_idx=0): text_comps = [] char_idx = start_char_idx sub_cons = None for sub_cons in self.sub_units: new_char_start = sub_cons.char_start if sub_cons.char_start is not None else char_idx if sub_cons.kind == 'tok': text_comps.append(u' ' * (new_char_start - char_idx)) # (writing in the amount of whitespace which we can calc from char spans) text_comps.append(sub_cons.text) else: text_comps.append(sub_cons.get_text(char_idx)) if sub_cons.char_end is not None: # don't change for null constituents char_idx = sub_cons.char_end # DO we need to handle the findl token suffix here? possibly not even picking it up, so # irrelevant in any case return u''.join(text_comps) @property def char_start(self): return self.tokens[0].char_start if self.tokens else None @property def char_end(self): return self.tokens[-1].char_end if self.tokens else None class GTBConstit(TreeConstituent): def __init__(self, cons_elem, char_start=0): self.cat = cons_elem.get('cat') sub_char_start = char_start new_subs = [] for sub_elem in cons_elem: if sub_elem.tag == 'cons': new_sub = GTBConstit(sub_elem, sub_char_start) elif sub_elem.tag == 'tok': new_sub = GTBToken(sub_elem, sub_char_start) new_subs.append(new_sub) sub_char_start = new_sub.char_end + len(sub_elem.tail or '') if not new_subs:# empty constituent? add a null token for ease of char tracking new_subs.append(NullToken(char_start)) self.replace_sub_units(new_subs) class GTBToken(object): kind = 'tok' def __init__(self, tok_elem, char_start=0): self.cat = tok_elem.get('cat') self.text = tok_elem.text self._char_start = char_start @property def char_start(self): return self._char_start @property def char_end(self): return self._char_start + len(self.text) def __unicode__(self): return u'%s/%s[%d,%d]' % (self.text, self.cat, self.char_start, self.char_end) def __repr__(self): return 'Token(%r, %r, %d, %d)' % (self.cat, self.text, self.char_start, self.char_end) class NullToken(object): """zero-width empty token Makes tracking characterization easier. """ kind = 'tok' def __init__(self, char_start): self._char_start = char_start self.cat = None self.text = '' @property def char_start(self): return self._char_start @property def char_end(self): return self._char_start def __unicode__(self): return u'' def __repr__(self): return 'NullToken(%d)' % self.char_start class UnknownAggressionLevel(Exception): pass class InputDumpDirNotWriteable(Exception): pass class GTBTreeBlazingHandler(object): def __init__(self, aggression_levels=(20,), input_dump_dir=None): self.blazers = [] for lvl in aggression_levels: if lvl == 50: self.blazers.append(GTBTreeBlazer(match_le_types=True, transformer=ERGMatchGTBTreeTransformer(), ignore_children_matching_parents=False)) elif lvl == 40: self.blazers.append(GTBTreeBlazer(match_le_types=False, transformer=ERGMatchGTBTreeTransformer(), ignore_children_matching_parents=False)) elif lvl == 20: self.blazers.append(GTBTreeBlazer(match_le_types=False, transformer=None, ignore_children_matching_parents=True)) else: raise UnknownAggressionLevel("Don't know about aggression level %r" % lvl) self.input_dump_dir = input_dump_dir try: test_fname = path.join(self.input_dump_dir, 'TEST') with open(test_fname, 'w'): pass os.unlink(test_fname) except (IOError, OSError), e: raise InputDumpDirNotWriteable("Can't write to directory '%s'" % self.input_dump_dir) def treeblaze(self, item_id, call_sequence, item_input, discriminants): encoding = 'utf-8' if call_sequence >= len(self.blazers): #fallback, currently to all trees after we've run # out of ways to selectively prune returnable = ['?' for d in discriminants] else: blazer = self.blazers[call_sequence] tree = GTBTree(item_input) discriminant_group = DiscriminantGroup(tree.get_text(), discriminants) response = blazer.filter(tree, discriminant_group) returnable = response.sequential() assert len(returnable) == len(discriminants) if self.input_dump_dir: fname_stem = path.join(self.input_dump_dir, str(item_id)) item_fname = fname_stem + '.input' disc_fname = fname_stem + '.discriminants' response_fname = fname_stem + '.%02d.response' % call_sequence with nested(open(item_fname, 'w'), open(disc_fname, 'w'), open(response_fname, 'w')) as (item_f, disc_f, resp_f): item_f.write(item_input.encode(encoding) + '\n') for disc in discriminants: disc_f.write(disc.encode(encoding) + '\n') resp_f.write(datetime.now().isoformat() + '\n') for val in returnable: resp_f.write(val.encode(encoding) + '\n') return returnable def run_xmlrpc_server(instance, host='localhost', port=8000): server = SimpleXMLRPCServer((host, port), encoding='utf-8') server.register_introspection_functions() server.register_instance(instance) print "initializing XMLRPC server for %r on %s:%d" % (instance, host, port) server.serve_forever() def file_to_blazing_comps(fname): """throwaway for debugging - usually do this from XMLRPC.""" with open(fname) as f: lines = [line.decode('utf-8').rstrip('\n') for line in f.readlines()] ext_tree = GTBTree(lines[0]) raw_text = ext_tree.get_text() discriminant_group = DiscriminantGroup(raw_text, lines[1:]) return (ext_tree, discriminant_group) def test_direct_from_files(fnames, levels=(50, 40, 20)): test_from_files(fnames, GTBTreeBlazingHandler(levels), len(levels)) def test_xmlrpc_from_files(fnames, max_seq=2, host='localhost', port=8000): sproxy = ServerProxy('http://%s:%d/' % (host, port)) test_from_files(fnames, sproxy, max_seq) def test_from_files(fnames, blazing_handler, max_seq=1): for fname in fnames: with open(fname) as f: lines = [line.decode('utf-8').rstrip('\n') for line in f.readlines()] fname_base = path.basename(fname) item_id, _ = path.splitext(fname_base) item_id = int(item_id) item_input = lines[0] discriminants = lines[1:] print "item_id=%d, item_input=%r, discriminants=[..%d discriminants..]" % ( item_id, item_input, len(discriminants)) for seq in range(max_seq + 1): result = blazing_handler.treeblaze(item_id, seq, item_input, discriminants) print "seq=%d, rejected %d: %r" % (seq, sum(1 for r in result if r == '-'), result) # ALSO allow to be run as an executable for easy distribution to third parties import optparse def main(): parser = optparse.OptionParser("Usage: %prog [options]") parser.add_option('-H', '--host', action='store', dest='host', default='localhost', help="Host name or address to listen on (default: %default)") parser.add_option('-p', '--port', action='store', dest='port', type='int', default=8000, help="Port to listen on (default: %default)") parser.add_option('-L', '--agg-level', action='append', type='int', dest='levels', help="Add the supplied level to the the list of aggression levels that will be" " attempted (default: [20] if none are supplied). Level 20 means avoid" " matching constituents where the parent has the same label. Level 40 means" " to rewrite GTB trees using some heuristics, and try and match all nodes." " Level 50 means the same as level 40, but also to attempt to reject or" " match discriminants at the type level using some hastily-constructed rules." " If multiple values are supplied, they will be tried sequentially on" " successive calls to the blazing module. Should usually be in" " descending numeric order. There is always an unrestricted fallback") parser.add_option('--no-dump', action='store_false', dest='dump_input', default=True, help="Don't dump input to a user-supplied or auto-selected directory") parser.add_option('--dump-dir', action='store', type='string', default=None, dest='dump_dir') options, _ = parser.parse_args() levels = options.levels if options.levels else [20] if tuple(levels) != tuple(sorted(levels, reverse=True)): print "WARNING: list of levels %r is not in descending order. " % levels if options.dump_input and options.dump_dir is None: options.dump_dir = path.join(gettempdir(), 'treeblazing-%s' % datetime.now().strftime("%Y%m%d%H%M%S")) os.mkdir(options.dump_dir) print "Dumping data to '%s'" % options.dump_dir run_xmlrpc_server(GTBTreeBlazingHandler(levels, options.dump_dir), options.host, options.port) if __name__ == "__main__": main()