
import sys
import os
from lxml import etree
from utils import *
from const import *

def execute(cmd):
    return [line for line in subprocess.check_output(cmd, shell=True).strip().split('\n') if line]

def _getTeiGroupId(pg):
    return pg.attrib['id'].replace('morph', 'groups')

def _getTeiWordId(pg):
    return pg.attrib['id'].replace('morph', 'words')

def _addHead(teiTree, groupId, target, headType):
    if xpath(teiTree, '//tei:seg[@xml:id=$id]', id=groupId):
        fsElem = xpath(teiTree, '//tei:seg[@xml:id=$id]/tei:fs', id=groupId)[0]
        if not xpath(fsElem, 'tei:f[@fVal=$target][@name=$headType]', target=target, headType=headType):
            fsElem.append(E.f(name=headType, fVal=target))
            fsElem[-1].tail = '\n'
        if xpath(fsElem, 'tei:f[@name="synh"]') and xpath(fsElem, 'tei:f[@name="semh"]'):
            fSemh = xpath(fsElem, 'tei:f[@name="semh"]')[0]
            fSynh = xpath(fsElem, 'tei:f[@name="synh"]')[0]
            fsElem.remove(fSynh)
            fsElem.remove(fSemh)
            fsElem.append(fSemh)
            fsElem.append(fSynh)
    
def _doCorrectHeads(teiFile, pg2head, headType):
#   print 'correct', headType
    teiTree = etree.parse(teiFile)
    for groupId, headId in pg2head.items():
        target = 'ann_words.xml#'+headId
        _addHead(teiTree, groupId, target, headType)
        print teiFile, headType, groupId, target
    write_tree(teiTree, teiFile)

def _doCorrect(pmlFile, teiFile):
    pg2semhead = {}
    pg2synhead = {}
    pmlTree = etree.parse(pmlFile)
    for pg in xpath(pmlTree, '//pml:pg[pml:pg]'):
#       print pmlFile, pg.attrib['id']
        semhead = xpath(pg, './/pml:pw[@id=$id]', id=pg.attrib['semantic_head'])[0]
        synhead = xpath(pg, './/pml:pw[@id=$id]', id=pg.attrib['syntactic_head'])[0]
        if semhead.getparent() != pg:
            pg2semhead[_getTeiGroupId(pg)] = _getTeiWordId(semhead)
        if synhead.getparent() != pg:
            pg2synhead[_getTeiGroupId(pg)] = _getTeiWordId(synhead)
#   print pg2semhead
#   print pg2synhead
    if pg2semhead:
        _doCorrectHeads(teiFile, pg2semhead, 'semh')
    if pg2synhead:
        _doCorrectHeads(teiFile, pg2synhead, 'synh')

def correctHeads(teiRoot, pmlRoot):
    for teiFile in execute('find "%s" -name "ann_groups.xml" | sort' % teiRoot):
        d = os.path.dirname(teiFile)
        headerId = get_header_id(os.path.join(d, 'header.xml'))
        headerId = new2oldid.get(headerId, headerId)
#       print teiFile
        for pmlFile in execute('find "%s" -name "%s-*.xml"' % (pmlRoot, headerId)):
            #~ print pmlFile
            _doCorrect(pmlFile, teiFile)

if __name__ == "__main__":
    TEI_ROOT = sys.argv[1]
    PML_ROOT = sys.argv[2]
    correctHeads(TEI_ROOT, PML_ROOT)
