
from utils import *
import converter
import tagset
import brev_se_helper

validator = tagset.TagValidator('words.cfg')

E = ElementMaker(namespace=teins, nsmap={None:teins, 'xi':xins})

class WordsConverter(converter.Converter):
	
	def __init__(self, pmlpaths, path_to_save):
		super(WordsConverter, self).__init__(pmlpaths, path_to_save)
	
	@property
	def out_path(self):
		return os.path.join(self.wypluwka_path, 'ann_words.xml')
	
	@property
	def what(self):
		return 'words'

	def convert_sent(self, pmlsent):
		pid = pmlsent.attrib['id']
		sent_attrs = {
						 lxml_name(xmlns, 'id') : morph2id(pid, 'words'),
						 'corresp' : 'ann_morphosyntax.xml#' + pid
						 }
		teisent = etree.Element(lxml_name(teins, 's'), attrib=sent_attrs)
		
		foreign_edges_map = self.get_foreign_edges_map(pmlsent)
		
		# dzielimy liste slow skladniowych na chunki, 
		# bo potrzebne to bedzie do poprawiania kawalkow typu Conj1, Conj2, ... oraz Comp1, Comp2, ...
		for pw_chunk in self._get_chunks(sorted(xpath(pmlsent, './/pml:pw'), key=lambda pw: self.get_pml_tag_sort_key(pw))):
			for teipw_chunk in self._convert_pw_nodes(pw_chunk, foreign_edges_map):
				for teipw in teipw_chunk:
					teisent.append(teipw)
		return teisent
	
	def _chunk_len(self, pwnodes):
		chunk_pos = get_attr(pwnodes[0], 'pos')[:-1]
		chunk_len = None
		chunk_last_num = 1
		
		for len, pwnode in enumerate(xpath(pwnodes[0], 'following-sibling::pml:pw'), start=2):
			curr_pos = get_attr(pwnode, 'pos')
			if curr_pos.startswith(chunk_pos) and curr_pos.endswith(str(chunk_last_num+1)):
				chunk_len = len
				chunk_last_num = int(curr_pos[-1:])
			elif curr_pos == chunk_pos + '1':
				return chunk_len
		return chunk_len
	
	def _pop_chunk_from_front(self, nodes):
		res = []
		chunk_len = self._chunk_len(nodes)
		for _ in range(chunk_len):
			res.append(nodes.pop(0))
		return res
	
	def _debug_chunk(self, pwnodes):
		print [(get_attr(pw, 'id'), get_attr(pw, 'pos')) for pw in pwnodes]
	
	def _get_chunks(self, pwnodes):
		nodes = list(pwnodes)
		for node in nodes:
			yield [node]
	
	def _convert_pw_nodes(self, pwnodes, foreign_edges_map):
		if len(pwnodes) == 1 and pwnodes[0].attrib['pos'] in ['Brev-se', 'Adj-se']:
			pwnode = pwnodes[0]
			return self._convert_Brevse_Adjse(pwnode, foreign_edges_map.get(pwnode.attrib['id'], []))
		elif len(pwnodes) == 1:
			pwnode = pwnodes[0]
			return self._convert_pw(pwnode, foreign_edges_map.get(pwnode.attrib['id'], []))
		else:
			# should not get here!
			raise AssertionError()
	
	def _convert_pw(self, pwnode, foreign_edges):
		pwid = get_attr(pwnode, 'id')
		ctag = get_attr(pwnode, 'pos')
		msd = get_attr(pwnode, 'morph')
		base = xpath_first_text(pwnode, 'pml:base')
		orth = xpath_first_text(pwnode, 'pml:orth')
		attrs = {lxml_name(xmlns, 'id') : morph2id(pwid, 'words') }
		try:
			newBase, newCTag, newMsd = validator.validateTag(base, ctag, msd)
			msd = newMsd
		except tagset.TagException as ex:
			pass
	#	if xpath(pwnode, 'pml:children/pml:seg/@id')[0] not in morph_ids:
	#		return []
		teipw = E.seg(E.fs(
						E.f(E.string(orth), name='orth'),
						E.f(E.string(base), name='base'),
						E.f(E.symbol(value=ctag.replace(' ', '')), name='ctag'),
						E.f(E.symbol(value=msd.replace(' ', '')), name='msd'),
						type='words'),
						*self.get_child_ptrs(pwnode, foreign_edges),
						**attrs
						)
		res = []
		res.append([teipw])
		return res
	
	def _convert_Brevse_Adjse(self, pwnode, foreign_edges):
		return [[self._get_brev_part(pwnode, foreign_edges)], [self._get_interp_part(pwnode)]]
	
	def _get_brev_part(self, pwnode, foreign_edges):
		pwid = get_attr(pwnode, 'id')
		ctag = get_attr(pwnode, 'pos').replace('-se', '')
		msd = get_attr(pwnode, 'morph')
		base = xpath_first_text(pwnode, 'pml:base')
		orth = xpath_first_text(pwnode, 'pml:orth')
		attrs = {lxml_name(xmlns, 'id') : morph2id(pwid, 'words') }
		try:
			newBase, newCTag, newMsd = validator.validateTag(base, ctag, msd)
			msd = newMsd
		except tagset.TagException as ex:
			pass
		return E.seg(E.fs(
						E.f(E.string(orth), name='orth'),
						E.f(E.string(base), name='base'),
						E.f(E.symbol(value=ctag.replace(' ', '')), name='ctag'),
						E.f(E.symbol(value=msd.replace(' ', '')), name='msd'),
						type='words'),
						*self.get_child_ptrs(pwnode, foreign_edges),
						**attrs
						)
	
	def _get_interp_part(self, pwnode):
		pwid = brev_se_helper.get_interp_id(get_attr(pwnode, 'id'))
		ctag = 'Interp'
		msd = ''
		base = '.'
		orth = '.'
		attrs = {lxml_name(xmlns, 'id') : morph2id(pwid, 'words') }
		try:
			newBase, newCTag, newMsd = validator.validateTag(base, ctag, msd)
			msd = newMsd
		except tagset.TagException as ex:
			pass
		return E.seg(E.fs(
						E.f(E.string(orth), name='orth'),
						E.f(E.string(base), name='base'),
						E.f(E.symbol(value=ctag.replace(' ', '')), name='ctag'),
						E.f(E.symbol(value=msd.replace(' ', '')), name='msd'),
						type='words'),
						*self.get_child_ptrs(pwnode, [])[-1:],
						**attrs
						)
	
