'''
Created on 2010-07-06

@author: lennyn
'''
import re
import sys
import os
import subprocess
import shlex
from lxml import etree
from lxml.builder import ElementMaker

teins = 'http://www.tei-c.org/ns/1.0'
xmlns = 'http://www.w3.org/XML/1998/namespace'
pmlns = 'http://ufal.mff.cuni.cz/pdt/pml/'
xins = 'http://www.w3.org/2001/XInclude'
nkjpns = 'http://www.nkjp.pl/ns/1.0'
namespaces = {'tei':teins, 'xml':xmlns, 'pml':pmlns, 'xi':xins, 'nkjp':nkjpns}

E = ElementMaker(namespace=teins, nsmap={None:teins, 'xi':xins, 'nkjp':nkjpns})
xiE = ElementMaker(namespace=xins, nsmap={None:xins})

def XPath(qstr):
    return etree.XPath(qstr, namespaces={'pml':pmlns,
                                        'xml':'http://www.w3.org/XML/1998/namespace'
                                        })

xpathChildIds = XPath('pml:children//pml:seg/@id')

def emptyIfNone(str):
    return str if str != None else ''

def getXmlId(node):
    return xpath(node, '@xml:id')[0]

def xpath(node, qstr, **kwargs):
    return node.xpath(qstr, namespaces=namespaces, **kwargs)

def xpath_first_attr(node, qstr, attr):
    nodes = xpath(node, qstr + '/@' + attr)
    if len(nodes) == 0:
        return ''
    else:
        return '' + nodes[0]

def xpath_first_text(node, qstr):
    nodes = xpath(node, qstr)
    if len(nodes) == 0:
        return ''
    else:
        return '' + nodes[0].text

def xpath_first_elem(node, qstr):
    nodes = xpath(node, qstr)
    if len(nodes) == 0:
        return None
    else:
        return nodes[0]

def get_attr(node, attr):
    res = node.attrib.get(attr)
    if res == None:
        return ''
    else:
        return '' + res

def get_pml_paths(root_path):
    """
    return paths to PML files
    """
    paths = []
    for root, dirs, files in os.walk(root_path):
        if files != [] and not '.svn' in root:
            for f in files:
                if f.startswith('NKJP') and f.endswith('.xml'):
                    paths += [os.path.join(root, f)]
    return paths

def get_header_id(path):
    tree = etree.parse(path, etree.XMLParser(recover=True))
    return xpath_first_attr(tree, '/tei:teiHeader', 'xml:id')

def get_id2path_map(wypluwka_path):
    """
    return { teiHeader@id -> path }
    """
    res = {}
    for root, dirs, files in os.walk(wypluwka_path):
        if files != []:
            for f in files:
                if f == 'header.xml':
                    header_path = os.path.join(root, f)
                    id = get_header_id(header_path)
                    res[id] = root
    return res

def lxml_name(ns, name):
    return '{%(ns)s}' % {'ns':ns} + name

def morph2id(morphid, prefix):
    return re.sub(r'^morph', prefix, morphid)

def write_tree(root, path_to_save):
#    if not os.path.exists(path_to_save):
#        os.makedirs(path_to_save)
    file = open(path_to_save, 'w')
    xmlstr = etree.tostring(root, pretty_print=True, encoding='UTF-8', xml_declaration=True)
    file.write(xmlstr)
    file.close()
    subprocess.check_call('xmllint --format {0} | sponge {0}'.format(path_to_save), shell=True)

def unique_items(L):
    found = set()
    for item in L:
        if item[0] not in found:
            yield item
            found.add(item[0])

#def execute(args, input=None):
#    p = subprocess.Popen(
#                         args, 
#                         stdin=subprocess.PIPE, 
#                         stdout=subprocess.PIPE,
#                         stderr=subprocess.PIPE)
#    stdoutdata, stderrdata = p.communicate(input)
#    return (stdoutdata, stderrdata, p.returncode)
#
#def simpleExec(command, input=None):
#    args = shlex.split(command)
#    p = subprocess.Popen(
#                         args, 
#                         stdin=subprocess.PIPE, 
#                         stdout=subprocess.PIPE,
#                         stderr=subprocess.PIPE)
#    stdoutdata, _ = p.communicate(input)
#    return stdoutdata

def get_ord(node):
#    return float(xpathChildIds(node)[0].replace('morph_', '').replace('-seg', ''))
    ord = node.attrib.get('ord', None)
    if ord is None:
        ords = xpath(node, 'preceding-sibling::*[@ord]/@ord')
        if ords == []:
            return 0
        else:
            return int(ords[0])
    else:
        return int(ord)

def execute(cmd):
    return [line for line in subprocess.check_output(cmd, shell=True).strip().split('\n') if line]