# -*- coding: utf-8 -*- class Reduction(object): _MASK = '_' def __init__(self, source): self.source = source self.reduction = unicode() self.pointers = list() def __str__(self): return self.reduction def _append(self, char, source_index): assert len(char) == 1 self.reduction += char self.pointers.append(source_index) def _expand(self, x, y): """ subclasses should override this method to recover elements of markup that lie on the border, but should be included in sentences """ return x, y def _mask_char(self, offset): self._append(Reduction._MASK, offset) def get_source(self, x, y): """ return the substring that corresponds to the specified range """ return self.source[x:y] def get_source_range(self, reduction_x, reduction_y): """ return the range in the source that corresponds to the specified range in the reduction """ #x, y = self.pointers[reduction_x], self.pointers[reduction_y+1] x, y = self.pointers[reduction_x], self.pointers[reduction_y] x, y = self._expand(x, y) return x, y