
import sys
import os
import re
from threading import Thread
from optparse import OptionParser
from lxml import etree
import names
import groups, words
import utils
import const
import postprocess

def parseOptions():
	"""
	Parses commandline args
	"""
	parser = OptionParser()
	parser.add_option('--pml-names-root',
						dest='pml_names_root',
						metavar='FILE',
						help='path to PML names files')
	parser.add_option('--pml-groups-root',
						dest='pml_groups_root',
						metavar='FILE',
						help='path to PML groups files')
	parser.add_option('--nkjp-root',
						dest='wypluwka_root',
						metavar='FILE',
						help='path to NKJP TEI P5 root directory')
	parser.add_option('--old-version',
						action='store_true',
						dest='old_version',
						default=False,
						metavar='BOOL',
						help='parse old version of files from original NKJP annotation - don\'t use if you don\'t understand this option')

	opts, args = parser.parse_args()

	if None in [opts.wypluwka_root]:
		parser.print_help()
		exit(1)
	if not bool(opts.pml_names_root) and not bool(opts.pml_groups_root):
		print >> sys.stderr, 'Must provide at least one of: --pml-names-root --pml-groups-root'
		parser.print_help()
		exit(1)

	return opts

def _get_header_id(wypluwka_dir):
	tree = etree.parse(
					os.path.join(wypluwka_dir, 'header.xml'), 
					etree.XMLParser(recover=True))
	return utils.xpath(tree, '/tei:teiHeader/@xml:id')[0]

def _get_pml_files_for(pml_root, id, old_version):
	if old_version:
		fname_base = const.new2oldid.get(id, id)
	else:
		fname_base = id
	find = 'find %s -name "%s-*.xml"' % (pml_root, fname_base)
#	find = 'find %s -name "*%s-*.xml"' % (pml_root, re.sub('NKJP_1M_', '', id))
	paths = [path.strip()
			for path
			in utils.execute(find)
			if path.strip() != '']
	return sorted(paths, key=lambda p: int(p.split('-')[-1].split('.')[0]))

def do_convert_one(wypluwka_dir, pml_root, what, old_version):
	id = _get_header_id(wypluwka_dir)
	inpaths = _get_pml_files_for(pml_root, id, old_version)
	if what == 'names':
		names.NamesConverter(inpaths, wypluwka_dir).convert()
	else:
		words.WordsConverter(inpaths, wypluwka_dir).convert()
		groups.GroupsConverter(inpaths, wypluwka_dir).convert()

def do_convert(wypluwka_root, pml_root, what, old_version):
	morph_paths = [path.strip()
			for path
			in utils.execute('find %s -name ann_morphosyntax.xml' % wypluwka_root)
			if path.strip() != '']
	for morph_path in sorted(morph_paths):
		do_convert_one(os.path.dirname(morph_path), pml_root, what, old_version)
	if what == 'groups':
		postprocess.postprocess(wypluwka_root, pml_root)
	

def main(argv):
	opts = parseOptions()
	
	if opts.pml_names_root:
		do_convert(opts.wypluwka_root, opts.pml_names_root, 'names', opts.old_version)
	if opts.pml_groups_root:
		do_convert(opts.wypluwka_root, opts.pml_groups_root, 'groups', opts.old_version)

if __name__ == "__main__":
	main(sys.argv[1:])

