# -*- coding: utf-8 -*-
import re
regtitlepreprocess = re.compile(r'([^<]*)',re.MULTILINE | re.DOTALL)
regdeflist = re.compile(r'(\n;[^:\n]*:)')
regsentinitialbracket = re.compile(r'([^M][^rs]\.)\s+(\[\[([^|\]]+\|)?([A-Z])([^|\]]+)\]\])',re.MULTILINE | re.DOTALL)
regneosfirst = re.compile(r'()+\s*()+\s*()*\s?', re.MULTILINE | re.DOTALL)
regneosfinal = re.compile(r'()*\s*()+\s*()+\s?', re.MULTILINE | re.DOTALL)
regneosremove = re.compile(r'', re.MULTILINE | re.DOTALL)
regeos = re.compile(r'\s?', re.MULTILINE | re.DOTALL)
regeosTwoOrMore = re.compile(r'((\s*)|(\s?\n)){3-10}', re.MULTILINE | re.DOTALL)
regredirect = re.compile(r'#[rR][eE][dD][iI][rR][eE][cC][tT]\s?\[\[([^\]]+)\]\]', re.MULTILINE | re.DOTALL )
regcurly1 = re.compile(r'(\({{[^}^{]*?}}\)|\'*{{[^}^{]*?}}\'*)', re.MULTILINE | re.DOTALL)
reglongTemp = re.compile(r'{{[^}^{]{500,10000000}}}\)|\'*{{[^}^{]{500,10000000}}}', re.MULTILINE | re.DOTALL)
regboxtable = re.compile(r'{{[^}^{]*?(box|table)[^}^{]*}}', re.MULTILINE | re.DOTALL)
regwikitable1 = re.compile(r'{[^}]*?class="?wikitable"?[^}]*?}', re.MULTILINE | re.DOTALL)
regwikitable2 = re.compile(r'[^{]{\|[^\\][^}^{]*?}', re.MULTILINE | re.DOTALL)
regwikitable3 = re.compile(r'([^{]|^){\|[^\\].{0,10000}?\|}', re.MULTILINE | re.DOTALL)
regtable = re.compile(r'<\s?table[^/]{0,100}>.{0,30000}\s?table.{0,10}>', re.MULTILINE | re.DOTALL)
regtableborder = re.compile(r'<\s?TABLE[^/]{0,100}>.{0,30000}\s?TABLE.{0,10}>', re.MULTILINE | re.DOTALL)
#regtableConvertStart = re.compile(r'{\|', re.MULTILINE | re.DOTALL)
#regtableConvertEnd = re.compile(r'\|}', re.MULTILINE | re.DOTALL)
#regtableStephan = re.compile(r'\{\|(?:(?!\{\|)(?!\|\}).)+?\|\}')
"""
regtableStephanImproved = re.compile(r'{\|(?:(?!{\|)(?!\|}).)+?\|}', re.MULTILINE | re.DOTALL)
regtableStephanImproved2 = re.compile(r'^[:*]{0,4}{\|(?:(?!^[:*]{0,4}\|}).)+^[:*]{0,4}\|}', re.MULTILINE | re.DOTALL)
regtableStephanImproved3 = re.compile(r'^[:* ]{0,4}{\|(?:(?!^[:* ]{0,4}(?:{\||\|})).)+?^[:* ]{0,4}\|}', re.MULTILINE | re.DOTALL)
"""
regtableClean = re.compile(r'(^[:* ]{0,4}|^(<[^>]*?>)\s*(<[^>]*?>)*\s*){\|(?:(?!^[:* ]{0,4}(?:{\||\|})).)+?^[:* ]{0,4}\|}', re.MULTILINE | re.DOTALL)
regtablestart = re.compile(r'(^[:* ]{0,4}|^(<[^>]*?>)\s*(<[^>]*?>)*\s*){\|')
#regtableGisleImproved = re.compile(r'^[:* ]{0,4}{\|(?:(?!^[:* ]{0,4}\|}).)(?:(?!^[:* ]{0,4}{\|).)+?^[:* ]{0,4}\|}', re.MULTILINE | re.DOTALL)
regmultiplePipes = re.compile(r'(\|.[^\|]*){20,100000}')
#regtableStephan = re.compile(r'{\|(?:(?!{\|).)+\|}')
#DEPRECATED
regtableConvertStart = re.compile(r'(^.)?{\|(?=([^\\].{0,400000}?\|}))', re.MULTILINE | re.DOTALL)
regtableConvertEnd = re.compile(r'(Ӂ.{0,400000}?)(\|})', re.MULTILINE | re.DOTALL)
regtableConvertRevertStart = re.compile(r'Ӂ', re.MULTILINE | re.DOTALL)
regtableConvertRevertEnd = re.compile(r'ጣ', re.MULTILINE | re.DOTALL)
regtableConverted = re.compile(r'Ӂ[^Ӂ]{0,400000}?ጣ', re.MULTILINE | re.DOTALL)
regtablehardcode = re.compile('{\|\sclass=\"wikitable\"\s!English\s\|\|\sFrench\s\|\|\sIPA pronunciation\s\(Canadian accent\).*?\|}', re.MULTILINE | re.DOTALL)
regeos = re.compile(r'\s?', re.MULTILINE | re.DOTALL)
regeosmathnowiki = re.compile(r'', re.MULTILINE | re.DOTALL)
regbr = re.compile(r'
', re.MULTILINE | re.DOTALL)
regref = re.compile(r'<[Rr]ef.*?/[Rr]ef>', re.MULTILINE | re.DOTALL)
regref2 = re.compile(r'<[Rr]ef[^>]*?/>', re.MULTILINE | re.DOTALL)
regcomment = re.compile(r'', re.MULTILINE | re.DOTALL)
regblockquote = re.compile(r'
(.*?)
',re.MULTILINE | re.DOTALL)
regsource = re.compile(r'',re.MULTILINE | re.DOTALL)
regdiv2 = re.compile(r'?div[^>]*?>',re.MULTILINE | re.DOTALL)
reggallery = re.compile(r'',re.MULTILINE | re.DOTALL)
regsingleast = re.compile(r'^\*$')
reghyphen = re.compile(r'----?',re.MULTILINE | re.DOTALL)
regcenter = re.compile(r'',re.MULTILINE | re.DOTALL)
regsealso = re.compile(r'==+\s?See also\s?==.*', re.MULTILINE | re.DOTALL)
regnotes = re.compile(r'==+\s?Notes\s?.*', re.MULTILINE | re.DOTALL)
regreferences = re.compile(r'==+\s?References.*', re.MULTILINE | re.DOTALL)
regsources = re.compile(r'==+\s?Sources.*', re.MULTILINE | re.DOTALL)
regsourcelookahead = re.compile(r'==+\s?(?=Sources.*?==+\s?Sources)', re.MULTILINE | re.DOTALL)
regseealsolookahead = re.compile(r'==+\s?(?=See also.*?==+\s?See also)', re.MULTILINE | re.DOTALL)
regnoteslookahead = re.compile(r'==+\s?(?=Notes.*?==+\s?Notes)', re.MULTILINE | re.DOTALL)
regreflookahead = re.compile(r'==+\s?(?=References.*?==+\s?References)', re.MULTILINE | re.DOTALL)
regsourcelookaheadrestore = re.compile(r'___(Sources)(.*?==+)',re.MULTILINE | re.DOTALL)
regnoteslookaheadrestore = re.compile(r'___(Notes)(.*?==+)',re.MULTILINE | re.DOTALL)
regreflookaheadrestore = re.compile(r'___(References)(.*?==+)',re.MULTILINE | re.DOTALL)
regseealsolookaheadrestore = re.compile(r'___(See also)(.*?==+)',re.MULTILINE | re.DOTALL)
regbibliography = re.compile(r'==+\s?Bibliography\s?.*', re.MULTILINE | re.DOTALL)
regfootnotes = re.compile(r'==+\s?Footnotes\s?.*', re.MULTILINE | re.DOTALL)
regrelated = re.compile(r'==+\s?Related web sites\s?.*', re.MULTILINE | re.DOTALL)
regexternal = re.compile(r'==+\s?External links\s?.*', re.MULTILINE | re.DOTALL)
regmath = re.compile(r'',re.MULTILINE | re.DOTALL)
regremovenewline = re.compile(r'(?=\n)\n',re.MULTILINE | re.DOTALL)
regparagraph = re.compile(r'(\n\n)',re.MULTILINE | re.DOTALL)
regtitle = re.compile(r'(==+?\s?[^=]+?\s?==+\s?)',re.MULTILINE | re.DOTALL)
regbullets = re.compile(r'(^[#*].*?)\n', re.MULTILINE | re.DOTALL)
regbulletscolon = re.compile(r'(?<=([\.:]\s))([*#].*?)\n')
regbullets2 = re.compile(r'(^\*.*?)\n', re.MULTILINE | re.DOTALL)
regindentcolon = re.compile(r'(^[;:][:;]?.*?)\n', re.MULTILINE | re.DOTALL)
regcolon = re.compile(r'^:', re.MULTILINE | re.DOTALL)
regimage = re.compile(r'\[\[Image:[^\[\]]*(\[\[[^\]]*\]\][^\[\]]*)*?\]\]',re.MULTILINE | re.DOTALL)
regspan = re.compile(r'?\s*span.*?>',re.MULTILINE | re.DOTALL)
regletternumber = re.compile(r'[A-Za-z0-9]',re.MULTILINE | re.DOTALL)
regonlyXML = re.compile(r'^(<[^>]*?>)\s*(<[^>]*?>)*\s*$',re.MULTILINE | re.DOTALL)
regbracket = re.compile(r'(\[\[([^|\]]+\|)?([^|\]]+)\]\]s\.)',re.MULTILINE | re.DOTALL)
regjava = re.compile(r'{{(Javadoc:.*?)}}',re.MULTILINE | re.DOTALL)
regiast = re.compile(r'{{(IAST.*?)}}',re.MULTILINE | re.DOTALL)
regipa = re.compile(r'{{(IPA.*?)}}(?=[^}])', re.MULTILINE | re.DOTALL)
regyeareos = re.compile(r'([0-9]{2,4}s\.)', re.MULTILINE | re.DOTALL)
regorg = re.compile(r'(\.org\.\s)', re.MULTILINE | re.DOTALL)
##japanese
"""
regtransjap = re.compile(r'{{transl\|ja\|([^}]*?)}}', re.MULTILINE | re.DOTALL)
reglangjap = re.compile(r'{{lang\|ja\|([^}]*?)}}', re.MULTILINE | re.DOTALL)
regnihongohardcode = re.compile(r'({{Nihongo\|[^|]+\|[^|]+\|)3=',re.MULTILINE | re.DOTALL)
regnihongojap5 = re.compile(r'{{[Nn]ihongo\|([^|}]+)\|([^|}]+)\|([^|}]+)\|([^|}]+)\|([^|}]+)}}', re.MULTILINE | re.DOTALL)
regnihongojap4 = re.compile(r'{{[Nn]ihongo\|([^|}]+)\|([^|}]+)\|([^|}]+)\|([^|}]+)}}', re.MULTILINE | re.DOTALL)
regnihongojap3 = re.compile(r'{{[Nn]ihongo\|([^|}]*?)\|([^|}]*?)\|([^|}]+)}}', re.MULTILINE | re.DOTALL)
regnihongojap2 = re.compile(r'{{[Nn]ihongo\|([^|}]+)\|([^|}]+)\|?}}', re.MULTILINE | re.DOTALL)
#general
reglanggeneral = re.compile(r'{{lang\|[^|]+\|([^}]*?)}}', re.MULTILINE | re.DOTALL)
regtransgeneral = re.compile(r'{{transl\|[^|]+\|([^}]*?)}}', re.MULTILINE | re.DOTALL)
"""
#preservetemplate
reglanggeneralpreserve = re.compile(r'{{(lang\|.*?)}}', re.MULTILINE | re.DOTALL)
regtransgeneralpreserve = re.compile(r'{{(transl\|.*?)}}', re.MULTILINE | re.DOTALL)
regnihongopreservere = re.compile(r'{{([Nn]ihongo\|.*?)}}', re.MULTILINE | re.DOTALL)
#harvard
regharv_general = re.compile(r'{{([Hh]arv.*?)}}', re.MULTILINE | re.DOTALL)
regaudio_general = re.compile(r'{{([Aa]udio.*?)}}', re.MULTILINE | re.DOTALL)
regflagtemplate = re.compile(r'{{([Ff]lag\s?\|.*?)}}', re.MULTILINE | re.DOTALL)
"""
USE IF TEMPLATE EXPANSION
regharvtxt_aut_year_page = re.compile(r'{{[Hh]arvtxt\s?\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharvtxt_aut_year = re.compile(r'{{[Hh]arvtxt\s?\|\s?([^\|]+)\|\s?([0-9][^\|]+)}}', re.MULTILINE | re.DOTALL)
regharv_aut_year_page = re.compile(r'{{[Hh]arv\s?\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharvtxt_aut_aut_year_page = re.compile(r'{{[Hh]arvtxt\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharv_aut_aut_year_page = re.compile(r'{{[Hh]arv\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharvtxt_aut_aut_year = re.compile(r'{{[Hh]arvtxt\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)}}', re.MULTILINE | re.DOTALL)
regharv_aut_aut_year = re.compile(r'{{[Hh]arv\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)}}', re.MULTILINE | re.DOTALL)
regharvnb_aut_year_page_nb = re.compile(r'{{[Hh]arvnb\s?\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharvnb_aut_aut_year_page_nb = re.compile(r'{{[Hh]arvnb\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharvnb_aut_aut_year_nb = re.compile(r'{{[Hh]arvnb\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)}}', re.MULTILINE | re.DOTALL)
regharvnb_aut_year_nb = re.compile(r'{{[Hh]arvnb\s?\|\s?([^\|]+)\|\s?([0-9][^\|]+)}}', re.MULTILINE | re.DOTALL)
regharvnb_aut_aut_aut_year_page_nb = re.compile(r'{{[Hh]arvnb\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
regharvnb_aut_aut_aut_year_nb = re.compile(r'{{[Hh]arvnb\s?\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([^\|]+)\|\s?([0-9][^\|]+)}}', re.MULTILINE | re.DOTALL)
regharvcoltxt_aut_year_page = re.compile(r'{{[Hh]arvcoltxt\s?\|\s?([^\|]+)\|\s?([0-9][^\|]+)\|\s?((pp?|loc)=[^\}]+)}}', re.MULTILINE | re.DOTALL)
"""
regbacktocurly1 = re.compile(r'<___', re.MULTILINE | re.DOTALL)
regbacktocurly2 = re.compile(r'___>', re.MULTILINE | re.DOTALL)
#original: could become exponential
#regremoveeosinmath = re.compile(r'((.*)*?(.*?)', re.MULTILINE | re.DOTALL)
#regremoveeosinmath = re.compile(r'()', re.MULTILINE | re.DOTALL)
#TRY AGAIN:
#make greedy!
regremoveeosinmath = re.compile(r'()', re.MULTILINE | re.DOTALL)
regremoveeosinsource = re.compile(r'((.*)*?(.*?)', re.MULTILINE | re.DOTALL)
#regremoveeosinsource2 = re.compile(r'(.{0,5000}?)', re.MULTILINE | re.DOTALL)
#regremoveeosincode2 = re.compile(r'(.{0,5000}?
)', re.MULTILINE | re.DOTALL)
#TRYING THE OLD
regremoveeosinsource2 = re.compile(r'()', re.MULTILINE | re.DOTALL)
regremoveeosincode2 = re.compile(r'()', re.MULTILINE | re.DOTALL)
regremoveeosincode = re.compile(r'(
(.*)*?(.*?)', re.MULTILINE | re.DOTALL)
regremoveeosinmath2 = re.compile(r'()', re.MULTILINE | re.DOTALL)
regremoveeosinnowiki2 = re.compile(r'(.{0,5000}?)', re.MULTILINE | re.DOTALL)
#ORIGINAL:
#regremoveeosinnowiki = re.compile(r'((.*)*?(.*?)', re.MULTILINE | re.DOTALL)
#not multiline/dotall
#regremoveeosinnowiki = re.compile(r'((.*)*?(.*?)')
#not multiline/dotall - not tag
#regremoveeosinnowiki = re.compile(r'(.*)(.*)*?(.*?)')
regremovenowiki = re.compile(r'')
#length constraint
#regremoveeosinnowiki = re.compile(r'((.{0,500})*?(.{0,500})', re.MULTILINE | re.DOTALL)
#length constraint - no multiline
#regremoveeosinnowiki = re.compile(r'((.{0,500})*?(.{0,500})')
#regremoveeosinnowiki = re.compile(r'(.{0,500})(.{0,500}){0,30}(.{0,500})')
#multiple: - greedy
regremoveeosinnowiki = re.compile(r'([^<]{0,500}?)()(.{0,500}?)', re.MULTILINE | re.DOTALL)
#regdigitandbold = re.compile(r'(^:"[0-9][0-9]\.\s+(?=\'))', re.MULTILINE | re.DOTALL)
#regcurly1 = re.compile(r'{{?[^}^{]*?}?}', re.MULTILINE | re.DOTALL)
#regsource1 = re.compile(r'