;;; -*- mode: fundamental; coding: utf-8; indent-tabs-mode: t; -*- ;; ;; a ‘micro’-tokenizer, i.e. mimicry of token splitting rules in chart mapping; ;; in late 2012, this code is used in ERG-external tokenization experiments. ;; #1 !([+-]?[0-9]+(?:\.[0-9]*)?)[–-]([0-9]+(?:\.[0-9]*)?) \1 – \2 # >1 #2 !(.+)[–-]([a-zA-Z0-9]+-?) \1- \2 !(.+)/([a-zA-Z0-9]+) \1 / \2 # >2 :[ \t]+