package pl.waw.ipipan.zil.core.mmax2tei;

import ipipan.clarin.tei.api.entities.AnnotationLayer;
import ipipan.clarin.tei.api.entities.EntitiesFactory;
import ipipan.clarin.tei.api.entities.TEICoreference;
import ipipan.clarin.tei.api.entities.TEICorpusText;
import ipipan.clarin.tei.api.entities.TEILex;
import ipipan.clarin.tei.api.entities.TEIMention;
import ipipan.clarin.tei.api.entities.TEIMorph;
import ipipan.clarin.tei.api.entities.TEIParagraph;
import ipipan.clarin.tei.api.entities.TEIParagraph.ParagraphType;
import ipipan.clarin.tei.api.entities.TEISegment;
import ipipan.clarin.tei.api.entities.TEISentence;
import ipipan.clarin.tei.impl.entities.TEICorpusTextImpl;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.transform.TransformerException;

import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.SAXException;

import pl.waw.ipipan.zil.core.mmaxAPI.Reader;
import pl.waw.ipipan.zil.core.mmaxAPI.Writer;
import pl.waw.ipipan.zil.core.mmaxAPI.types.Coreference;
import pl.waw.ipipan.zil.core.mmaxAPI.types.Mention;
import pl.waw.ipipan.zil.core.mmaxAPI.types.Word;

public class Converter {

	final private static Logger logger = Logger.getLogger(Converter.class);

	final private static EntitiesFactory ef = EntitiesFactory.getInstance();

	public static void convertText(File mentions, File words, File mmax,
			File targetDir) throws Exception {

		TEICorpusTextImpl resultText = new TEICorpusTextImpl();
		resultText.setCorpusHeader(ef.createCorpusHeader());

		List<Word> mmaxWords = Reader.loadWords(words);
		List<Mention> mmaxMentions = Reader.loadMentions(mentions);

		Map<String, TEIMorph> wordId2sent = new HashMap<>();
		Map<String, TEISentence> wordId2morph = new HashMap<>();
		setSegmentationAndMorphosyntax(resultText, mmaxWords, wordId2morph,
				wordId2sent);

		Map<Mention, TEIMention> mention2mention = new HashMap<>();
		setMentions(resultText, mmaxWords, mmaxMentions, wordId2morph,
				wordId2sent, mention2mention);
		setCoreferences(resultText, mention2mention, mmaxMentions);

		createHeader(mmax, targetDir);

		TeiSaver.save(resultText, targetDir);
	}

	private static void createHeader(File mmax, File targetDir)
			throws TransformerException, ParserConfigurationException,
			SAXException, IOException {
		Document doc = Reader.loadDocument(mmax);
		NodeList nl = doc.getElementsByTagName(Constants.TEIHEADER_CATREF);
		Element e = (Element) nl.item(0);
		String catRef = e.getAttribute("val");

		nl = doc.getElementsByTagName(Constants.TEIHEADER_TITLE);
		e = (Element) nl.item(0);
		String title = e.getAttribute("val");

		doc = TeiSaver.createHeader(title, catRef);
		Writer.saveDocument(doc, new File(targetDir, "header.xml"));
	}

	private static void setSegmentationAndMorphosyntax(
			TEICorpusTextImpl resultText, List<Word> mmaxWords,
			Map<String, TEISentence> wordId2sent,
			Map<String, TEIMorph> wordId2morph) {
		Iterator<Word> iter = mmaxWords.iterator();
		int parId = 1;
		int segId = 1;
		int sentId = 1;
		while (iter.hasNext()) {
			TEIParagraph par = readNextPar(iter, parId++, segId, sentId,
					wordId2sent, wordId2morph);
			resultText.addParagraph(par);
			segId += par.getMorphs().size();
			sentId += par.getSentences().size();
		}

		resultText.addAnnotationLayer(
				AnnotationLayer.SEGMENTATION,
				EntitiesFactory.getInstance().createHeader(
						AnnotationLayer.SEGMENTATION));

		resultText.addAnnotationLayer(
				AnnotationLayer.MORPHOSYNTAX,
				EntitiesFactory.getInstance().createHeader(
						AnnotationLayer.MORPHOSYNTAX));
	}

	private static TEIParagraph readNextPar(Iterator<Word> iter, int parId,
			int firstSegId, int firstSentIdMorph,
			Map<String, TEISentence> wordId2sent,
			Map<String, TEIMorph> wordId2morph) {

		String pIdSegm = "segm_p-" + parId;
		String prefixPIdMorph = "morph_" + parId;

		List<List<Word>> parsed = new ArrayList<>();
		StringBuilder sb = new StringBuilder();
		Word w = null;
		while (iter.hasNext()) {
			List<Word> currSent = new ArrayList<>();
			while (iter.hasNext()) {
				w = iter.next();
				currSent.add(w);
				if (!w.getHasNps())
					sb.append(" ");
				sb.append(w.getOrth());

				if (w.isLastInSent())
					break;
			}
			parsed.add(currSent);
			if (w == null || w.isLastInPar())
				break;
		}
		String fullText = sb.toString().trim();

		TEIParagraph par = ef.createParagraph("p-" + parId, ParagraphType.P,
				fullText);
		par.setId(AnnotationLayer.MORPHOSYNTAX, prefixPIdMorph + "-p");
		par.setId(AnnotationLayer.SEGMENTATION, pIdSegm);

		List<TEISentence> sentences = new ArrayList<>();
		int sentiddSegm = 1;
		int sentidMorph = firstSentIdMorph;
		int offset = 0;
		int segidSegm = firstSegId;
		for (List<Word> sent : parsed) {
			String sentIdSegm = pIdSegm + "." + sentiddSegm + "-s";
			String prefixSentIdMorph = prefixPIdMorph + "." + sentidMorph;

			List<TEISegment> segments = new ArrayList<>();
			List<TEIMorph> morphs = new ArrayList<>();

			int segidMorph = 1;
			for (Word word : sent) {
				String wordIdSegm = pIdSegm + "." + segidSegm + "-seg";
				String prefixWordIdMorph = prefixSentIdMorph + "." + segidMorph;
				int length = word.getOrth().length();
				offset += word.getHasNps() || offset == 0 ? 0 : 1;
				TEISegment segment = ef.createSegment(par, wordIdSegm, offset,
						length, word.getHasNps());
				offset += length;

				List<TEILex> lexems = new ArrayList<>();
				String prefixLexId = prefixWordIdMorph + ".1";
				Map<String, String> msdMap = new HashMap<>();
				msdMap.put(prefixLexId + ".1-msd", word.getMsd());
				lexems.add(ef.createLex(prefixLexId + "-lex", word.getBase(),
						word.getCtag(), msdMap));
				TEIMorph morph = ef.createMorph(prefixWordIdMorph + "-seg",
						segment, lexems);
				morph.setChosenInterpretation(morph.getAllInterpretations()
						.get(0));

				wordId2morph.put(word.getId(), morph);

				segments.add(segment);
				morphs.add(morph);
				segidSegm++;
				segidMorph++;
			}

			TEISentence sentence = ef.createSentence(sentIdSegm, segments);
			sentence.setId(AnnotationLayer.MORPHOSYNTAX, prefixSentIdMorph
					+ "-s");
			sentence.setTaggingResult(morphs);

			for (Word wo : sent)
				wordId2sent.put(wo.getId(), sentence);

			sentences.add(sentence);
			sentiddSegm++;
			sentidMorph++;
		}

		par.setSentences(sentences);

		return par;
	}

	private static void setMentions(TEICorpusText teiManual,
			List<Word> mmaxWords, List<Mention> mmaxMentions,
			Map<String, TEISentence> wordId2sentence,
			Map<String, TEIMorph> wordId2morph,
			Map<Mention, TEIMention> mention2mention) throws Exception {

		Map<Mention, List<Word>> mention2wordList = Reader.parseAllSpans(
				mmaxWords, mmaxMentions);

		for (TEISentence s : teiManual.getAllSentences())
			s.setMentions(new ArrayList<TEIMention>());

		int mentionId = 1;

		for (Mention m : mmaxMentions) {
			// parse mention span
			List<Word> span = mention2wordList.get(m);

			List<TEIMorph> morphs = new ArrayList<TEIMorph>();
			Set<TEISentence> sents = new HashSet<>();
			for (Word w : span) {
				String wid = w.getId();
				TEIMorph morph = wordId2morph.get(wid);
				TEISentence s = wordId2sentence.get(wid);
				morphs.add(morph);
				sents.add(s);
			}

			if (sents.size() != 1)
				throw new Exception("Mention spanning across two sentences! "
						+ m.getId());

			String headStr = m.getHead();
			List<TEIMorph> heads = new ArrayList<TEIMorph>();
			for (TEIMorph morph : morphs)
				if (morph.getOrth().equalsIgnoreCase(headStr)) {
					heads.add(morph);
					break;
				}
			TEIMention mention = ef.createMention("mention_" + mentionId++,
					morphs, heads, false);
			mention2mention.put(m, mention);

			TEISentence s = sents.iterator().next();
			s.getAllMentions().add(mention);
		}

		teiManual.addAnnotationLayer(AnnotationLayer.MENTIONS, EntitiesFactory
				.getInstance().createHeader(AnnotationLayer.MENTIONS));
		logger.debug((mentionId - 1) + " mentions added");
	}

	private static void setCoreferences(TEICorpusText teiManual,
			Map<Mention, TEIMention> mention2mention, List<Mention> mmaxMentions)
			throws Exception {

		int corefId = 1;

		List<Coreference> mmaxCoreferences = loadCoreferences(mmaxMentions);
		List<TEICoreference> coreferences = new ArrayList<TEICoreference>();

		for (Coreference coref : mmaxCoreferences) {
			String id = "coreference_" + corefId++;

			List<TEIMention> groupMentions = new ArrayList<TEIMention>();
			TEIMention sourceMention = null;
			if (coref.getSourceMention() != null) {
				sourceMention = mention2mention.get(coref.getSourceMention());
				if (sourceMention == null)
					throw new Exception("Problem matching mention "
							+ coref.getSourceMention().toString());
			}

			for (Mention m : coref.getMentions()) {
				TEIMention ment = mention2mention.get(m);
				if (ment == null)
					throw new Exception("Problem matching mention "
							+ m.toString());
				groupMentions.add(ment);
			}

			TEICoreference cor = ef.createCoreference(id, coref.getType(),
					coref.getDominant(), groupMentions, sourceMention);
			coreferences.add(cor);
		}

		teiManual.setCoreferences(coreferences);
		teiManual.addAnnotationLayer(
				AnnotationLayer.COREFERENCE,
				EntitiesFactory.getInstance().createHeader(
						AnnotationLayer.COREFERENCE));

		logger.debug((corefId - 1) + " coreferences added");
	}

	private static List<Coreference> loadCoreferences(List<Mention> mmaxMentions) {

		Map<String, Mention> mentionId2mention = new HashMap<String, Mention>();
		for (Mention m : mmaxMentions)
			mentionId2mention.put(m.getId(), m);

		LinkedHashMap<String, Coreference> corefId2coref = new LinkedHashMap<String, Coreference>();

		for (Mention m : mmaxMentions) {
			String mg = m.getMentionGroup();
			if (mg != null) {
				if (!corefId2coref.containsKey(mg)) {
					Coreference c = new Coreference(mg, "ident");
					c.setDominant(m.getDominant());
					corefId2coref.put(mg, c);
				}
				corefId2coref.get(mg).add(m);
			}

			String near = m.getNearIdentity();
			if (near != null) {
				String name = m.getId() + "->" + near;
				if (corefId2coref.containsKey(near)) {
					logger.error("Duplicate near identity in mention "
							+ m.getId());
					continue;
				}

				Coreference ni = new Coreference(name, "near-ident");
				ni.add(m);
				Mention another = mentionId2mention.get(near);
				if (another == null) {
					logger.error("No mention to target (" + near
							+ ") in near identity in mention " + m.getId());
					continue;
				}
				ni.add(another);

				ni.setSource(m);

				corefId2coref.put(name, ni);
			}
		}

		return new ArrayList<Coreference>(corefId2coref.values());
	}

}
