
from lxml.builder import ElementMaker

import brev_se_helper
from utils import *
import converter

E = ElementMaker(namespace=teins, nsmap={None:teins, 'xi':xins})

class GroupsConverter(converter.Converter):
	
	def __init__(self, pmlpaths, path_to_save):
		super(GroupsConverter, self).__init__(pmlpaths, path_to_save)
	
	@property
	def out_path(self):
		return os.path.join(self.wypluwka_path, 'ann_groups.xml')
	
	@property
	def what(self):
		return 'groups'

	def convert_sent(self, pmlsent):
		sid = pmlsent.attrib['id']
		sent_attrs = {
						 lxml_name(xmlns, 'id') : morph2id(sid, 'groups'),
						 'corresp' : 'ann_words.xml#' + morph2id(sid, 'words')
						 }
		teisent = etree.Element(lxml_name(teins, 's'), attrib=sent_attrs)
	#	for pgnode in xpath(pmlsent, './/pml:pg'), key=get_ord):
		for pgnode in sorted(xpath(pmlsent, './/pml:pg'), key=lambda pg: self.get_pml_tag_sort_key(pg)):
			teisent.append(self._convert_pg(pgnode))
		return teisent

	def _last_descendant_is_se(self, pgnode):
		return get_attr(xpath(pgnode, './/pml:pw')[-1], 'pos') in ['Brev-se', 'Adj-se']

	def _get_child_ptrs(self, pgnode):
		children = xpath(pgnode, 'pml:pw | pml:pg')
		semhead = pgnode.attrib.get('semantic_head', None)
		synhead = pgnode.attrib.get('syntactic_head', None)
		res = []
		for node in children:
			id = get_attr(node, 'id')
			issynh = synhead == id
			issemh = semhead == id
	
			if issynh and issemh:
				gtype = 'head'
			elif issynh:
				gtype = 'synh'
			elif issemh:
				gtype = 'semh'
			else:
				gtype = 'nonhead'
	
			if node.tag.endswith('pg'):
				target = morph2id(id, 'groups')
			else:
				target = 'ann_words.xml#' + morph2id(id, 'words')
	
			res.append(E.ptr(type=gtype, target=target))
		
		if pgnode.attrib['label'] in ['CG', 'KG'] and self._last_descendant_is_se(pgnode):
			id = brev_se_helper.get_interp_id(xpath(pgnode, './/pml:pw/@id')[-1])
			res.append(E.ptr(type='nonhead', target='ann_words.xml#'+morph2id(id, 'words')))
		
		return res

	def _get_orth(self, pgnode):
		res = ''
		for segnode in xpath(pgnode, './/pml:seg'):
			orth = xpath_first_text(segnode, 'pml:orth')
			nps = get_attr(segnode, 'nps')
			if nps == 'false' and res != '':
				res += ' '
			res += orth
		if self._last_descendant_is_se(pgnode) and res.endswith('.'):
			if not pgnode.attrib['label'] in ['CG', 'KG']:
				res = res[:-1]
		return res

	def _convert_group_type(self, label):
		if label.startswith('NG') and not label in ['NGgodz', 'NGdata', 'NGadres']:
			return 'NG'
		elif label.startswith('AdjG'):
			return 'AdjG'
		elif label.startswith('NumG'):
			return 'NumG'
		elif label.startswith('PrepNG'):
			return 'PrepNG'
		else:
			return label

	def _convert_pg(self, pgnode):
		pgid = pgnode.attrib['id']
		orth = self._get_orth(pgnode)
		label = self._convert_group_type(pgnode.attrib['label'])
		attrs = {lxml_name(xmlns, 'id') : morph2id(pgid, 'groups')}
		return E.seg(E.fs(
						  E.f(E.string(orth), name='orth'),
						  E.f(E.symbol(value=label.replace(' ', '')), name='type'),
						  type='group'),
					*self._get_child_ptrs(pgnode),
					**attrs
					 )

