# (C) 2008  DFKI Language Technology Lab http://www.dfki.de/lt
#     Project HyLaP;  Author: Torsten Marek
# 
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU Lesser General Public
# License 2.1 as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
# Lesser General Public License for more details.
# 
# You should have received a copy of the GNU Lesser General Public
# License along with this library; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
# USA

from itertools import chain

import subprocess

from bibtex2xml import bibtexwasher, bibtexdecoder

from cStringIO import StringIO

from etree_utils import E, ET, write_tree

def to_author_name(a):
    try: 
        first, last = a.rsplit(" ", 1)
        return "%s, %s" % (last, first)
    except ValueError:
        return a
    
def _get_complete_text(article_file):
    for event, element in ET.iterparse(article_file):
        if element.tag == "abstract" and element.text:
            yield (element.text, 0)
        elif element.tag == "box" and element.text:
            yield (element.text, int(element.get("page")))

def get_complete_text(article_file):
    return list(_get_complete_text(article_file))

def get_abstract_text(article_file):
    for event, element in ET.iterparse(article_file):
        if element.tag == "abstract":
            return (element.text, 0)

def from_bibtex(filepath):
    try:
        fd = open(filepath, 'r')
        filecontents_source = fd.readlines()
        fd.close()
    except:
        print 'Could not open file:', filepath
    washeddata = bibtexwasher(filecontents_source)
    outdata = bibtexdecoder(washeddata)
    
    recoder = subprocess.Popen(["/usr/bin/recode", "Latex..UTF-8"], shell = False,
                               stdin = subprocess.PIPE, stdout = subprocess.PIPE)

    
    r = [u'<bibxml:file xmlns:bibxml="http://bibtexml.sf.net/">\n']
    for line in outdata:
        recoder.stdin.write(line)
    recoder.stdin.close()
    r.append(unicode(recoder.stdout.read(), "UTF-8"))
    recoder.stdout.close()
    r.append(u"</bibxml:file>")

    return "".join(r)


class Article(object):
    def __init__(self, bibtex_file = None):
        if bibtex_file is not None:
            self._bibtex = from_bibtex(bibtex_file).encode("UTF-8")
        else:
            self._bibtex = None
            
        self.main_text = []
        self.title = u""
        self.authors = []
        self.abstract = []
        self.conclusion = []
        self.references = []
        self._targets =  {
            "abstract" : (self.abstract, True),
            "references" : (self.references, True),
            }

        
    def _get_header(self):
        if self._bibtex is not None:
            yield E.header(
                ET.fromstring(self._bibtex),
                E.abstract(*[a[0] for a in self.abstract]))
        else:
            yield E.header(
                E(ET.QName("http://bibtexml.sf.net/", "file"),
                  E(ET.QName("http://bibtexml.sf.net/", "entry"),
                    E(ET.QName("http://bibtexml.sf.net/", "inproceedings"),
                      E(ET.QName("http://bibtexml.sf.net/", "title"), self.title),
                      *[E(ET.QName("http://bibtexml.sf.net/", "author"), to_author_name(a.strip())) 
                        for a in self.authors]))),
                E.abstract(*[a[0] for a in self.abstract]),
                )

    def _get_article_body(self):
        yield E.body(
            E.main(*[E.box(t[0], page=str(t[1])) for t in self.main_text]),
            E.conclusion("\n".join(c[0] for c in self.conclusion)))

    def get_target(self, text):
        for header_content in self._targets:
            if text.find(header_content) > -1:
                return (True, ) + self._targets[header_content]

        return (False, self.main_text, False)

        
    def to_xml(self):
        return E.article(
            *chain(self._get_header(),
                   self._get_article_body())
            )