'''
Created on 08-03-2012

Corrects foreign edges with wrong direction.

@author: mlenart
'''
# -*- coding: utf-8 -*-

import os
import sys
import subprocess
from utils import *
from const import *
from lxml import etree
from lxml.builder import ElementMaker

E = ElementMaker(namespace='http://www.tei-c.org/ns/1.0', nsmap={None:'http://www.tei-c.org/ns/1.0'})

def _doCorrect(pmlFile, teiFile):
    tree = etree.parse(pmlFile)
    for pw in xpath(tree, '//pml:pw[pml:rf/text()]'):
        for rf in xpath(pw, 'pml:rf'):
            if rf.text.endswith('-seg'):
                _doCorrectOneWord(
                                  teiFile, 
                                  pw.attrib['id'].replace('morph_', 'words_'),
                                  'ann_morphosyntax.xml#'+rf.text)

def _doCorrectOneWord(teiFile, wordId, newPtrId):
    tree = etree.parse(teiFile)
    #~ print teiFile, wordId
    word = xpath(tree, '//tei:seg[@xml:id="%s"]' % wordId)[0]
    if not xpath(word, 'tei:ptr[@target="%s"]' % newPtrId):
        print teiFile, wordId, newPtrId
        word.append(E.ptr(target=newPtrId))
    writeTree(tree, teiFile)

def writeTree(tree, path):
    f = open(path, 'w')
    xmlstr = etree.tostring(tree, pretty_print=True, encoding='UTF-8', xml_declaration=True)
    f.write(xmlstr)
    f.close()
    subprocess.check_call('xmllint --format {0} | sponge {0}'.format(path), shell=True)

def correctRefs(teiRoot, pmlRoot):
    for teiFile in execute('find "%s" -name "ann_words.xml" | sort' % teiRoot):
        d = os.path.dirname(teiFile)
        headerId = get_header_id(os.path.join(d, 'header.xml'))
        headerId = new2oldid.get(headerId, headerId)
        #~ print d
        for pmlFile in execute('find "%s" -name "%s-*.xml"' % (pmlRoot, headerId)):
            #~ print pmlFile
            _doCorrect(pmlFile, teiFile)

if __name__ == "__main__":
    TEI_ROOT = sys.argv[1]
    PML_ROOT = sys.argv[2]
    correctRefs(TEI_ROOT, PML_ROOT)
